fix(kb): 完善知识库入库和重建索引
This commit is contained in:
@@ -10,8 +10,8 @@ from django.core.files.uploadedfile import UploadedFile
|
||||
|
||||
from review_agent.models import KnowledgeBaseDocument
|
||||
from review_agent.regulatory_review.services.rag_citation import RagIndexUnavailable, retrieve_citations
|
||||
from review_agent.regulatory_review.services.rag_embedding import DeterministicEmbeddingProvider
|
||||
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path
|
||||
from review_agent.regulatory_review.services.rag_embedding import get_embedding_provider
|
||||
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path, is_excluded_source_path
|
||||
from review_agent.regulatory_review.services.rule_loader import DEFAULT_RULE_PATH, compute_file_sha256, load_rule_file
|
||||
|
||||
|
||||
@@ -78,6 +78,8 @@ def list_source_documents(source_dir: Path) -> list[dict[str, Any]]:
|
||||
continue
|
||||
suffix = path.suffix.lower()
|
||||
relative_path = str(path.relative_to(source_dir))
|
||||
if is_excluded_source_path(relative_path):
|
||||
continue
|
||||
indexed_chunk_count = source_chunk_counts.get(relative_path, 0)
|
||||
documents.append(
|
||||
{
|
||||
@@ -101,7 +103,7 @@ def search_knowledge_base(query: str, *, n_results: int = 3) -> dict[str, Any]:
|
||||
try:
|
||||
results = retrieve_citations(
|
||||
normalized,
|
||||
embedding_provider=DeterministicEmbeddingProvider(),
|
||||
embedding_provider=get_embedding_provider(),
|
||||
n_results=n_results,
|
||||
)
|
||||
except RagIndexUnavailable as exc:
|
||||
@@ -210,7 +212,7 @@ def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||
return 0
|
||||
collection = _load_chroma_collection()
|
||||
texts = [chunk.text for chunk in chunks]
|
||||
embeddings = DeterministicEmbeddingProvider()(texts)
|
||||
embeddings = get_embedding_provider()(texts)
|
||||
ids = [
|
||||
hashlib.sha256(f"managed:{document.pk}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
||||
for chunk in chunks
|
||||
|
||||
Reference in New Issue
Block a user