refactor(rag): 梳理文档入库与检索服务结构
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from agent_core.orchestrator import build_messages, run_agent
|
||||
from agent_core.rag.ingest import ingest_document
|
||||
from agent_core.rag.ingest import _split_text, ingest_document
|
||||
from agent_core.rag.retriever import retrieve
|
||||
|
||||
|
||||
@@ -221,3 +221,30 @@ def test_run_agent_uses_retrieved_document_chunks(tmp_path):
|
||||
|
||||
assert result.references[0]["source"] == "sop.md"
|
||||
assert "隔离现场" in result.references[0]["content"]
|
||||
|
||||
|
||||
def test_rag_split_text_keeps_overlap_and_non_empty_chunks():
|
||||
chunks = _split_text("A" * 20, chunk_size=8, overlap=3)
|
||||
|
||||
assert chunks == ["AAAAAAAA", "AAAAAAAA", "AAAAAAAA", "AAAAA"]
|
||||
|
||||
|
||||
def test_retrieve_returns_empty_when_query_has_no_overlap(tmp_path):
|
||||
store_path = tmp_path / "rag_store.json"
|
||||
ingest_document(
|
||||
scenario_id="knowledge_qa",
|
||||
source_file="rules.md",
|
||||
text="这里描述的是报销流程和审批链。",
|
||||
collection="knowledge_qa",
|
||||
store_path=store_path,
|
||||
)
|
||||
|
||||
chunks = retrieve(
|
||||
scenario_id="knowledge_qa",
|
||||
query="设备点检",
|
||||
collection="knowledge_qa",
|
||||
top_k=3,
|
||||
store_path=store_path,
|
||||
)
|
||||
|
||||
assert chunks == []
|
||||
|
||||
@@ -3,7 +3,7 @@ from django.urls import reverse
|
||||
|
||||
from apps.documents.forms import DocumentUploadForm
|
||||
from apps.documents.models import UploadedDocument
|
||||
from apps.documents.services import extract_text
|
||||
from apps.documents.services import extract_text, index_document
|
||||
|
||||
|
||||
def test_upload_txt_document_creates_uploaded_record(client, db):
|
||||
@@ -128,3 +128,20 @@ def test_index_failure_message_is_visible_on_document_list(client, db, monkeypat
|
||||
assert response.status_code == 200
|
||||
assert "文档入库失败,请检查错误原因后重试" in content
|
||||
assert "模拟入库失败" in content
|
||||
|
||||
|
||||
def test_index_document_marks_failed_when_extracted_text_is_empty(db, monkeypatch):
|
||||
document = UploadedDocument.objects.create(
|
||||
scenario_id="knowledge_qa",
|
||||
original_name="empty.md",
|
||||
file_type="md",
|
||||
size=0,
|
||||
status="uploaded",
|
||||
)
|
||||
|
||||
monkeypatch.setattr("apps.documents.services.extract_text", lambda target: " ")
|
||||
|
||||
updated_document = index_document(document)
|
||||
|
||||
assert updated_document.status == UploadedDocument.STATUS_FAILED
|
||||
assert "文档内容为空" in updated_document.error_message
|
||||
|
||||
Reference in New Issue
Block a user