refactor(rag): 梳理文档入库与检索服务结构

This commit is contained in:
2026-05-30 00:44:52 +08:00
parent f68b44f325
commit ccfe5eb667
6 changed files with 284 additions and 103 deletions

View File

@@ -1,5 +1,5 @@
from agent_core.orchestrator import build_messages, run_agent
from agent_core.rag.ingest import ingest_document
from agent_core.rag.ingest import _split_text, ingest_document
from agent_core.rag.retriever import retrieve
@@ -221,3 +221,30 @@ def test_run_agent_uses_retrieved_document_chunks(tmp_path):
assert result.references[0]["source"] == "sop.md"
assert "隔离现场" in result.references[0]["content"]
def test_rag_split_text_keeps_overlap_and_non_empty_chunks():
chunks = _split_text("A" * 20, chunk_size=8, overlap=3)
assert chunks == ["AAAAAAAA", "AAAAAAAA", "AAAAAAAA", "AAAAA"]
def test_retrieve_returns_empty_when_query_has_no_overlap(tmp_path):
store_path = tmp_path / "rag_store.json"
ingest_document(
scenario_id="knowledge_qa",
source_file="rules.md",
text="这里描述的是报销流程和审批链。",
collection="knowledge_qa",
store_path=store_path,
)
chunks = retrieve(
scenario_id="knowledge_qa",
query="设备点检",
collection="knowledge_qa",
top_k=3,
store_path=store_path,
)
assert chunks == []