refactor(rag): 梳理文档入库与检索服务结构
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from agent_core.orchestrator import build_messages, run_agent
|
||||
from agent_core.rag.ingest import ingest_document
|
||||
from agent_core.rag.ingest import _split_text, ingest_document
|
||||
from agent_core.rag.retriever import retrieve
|
||||
|
||||
|
||||
@@ -221,3 +221,30 @@ def test_run_agent_uses_retrieved_document_chunks(tmp_path):
|
||||
|
||||
assert result.references[0]["source"] == "sop.md"
|
||||
assert "隔离现场" in result.references[0]["content"]
|
||||
|
||||
|
||||
def test_rag_split_text_keeps_overlap_and_non_empty_chunks():
|
||||
chunks = _split_text("A" * 20, chunk_size=8, overlap=3)
|
||||
|
||||
assert chunks == ["AAAAAAAA", "AAAAAAAA", "AAAAAAAA", "AAAAA"]
|
||||
|
||||
|
||||
def test_retrieve_returns_empty_when_query_has_no_overlap(tmp_path):
|
||||
store_path = tmp_path / "rag_store.json"
|
||||
ingest_document(
|
||||
scenario_id="knowledge_qa",
|
||||
source_file="rules.md",
|
||||
text="这里描述的是报销流程和审批链。",
|
||||
collection="knowledge_qa",
|
||||
store_path=store_path,
|
||||
)
|
||||
|
||||
chunks = retrieve(
|
||||
scenario_id="knowledge_qa",
|
||||
query="设备点检",
|
||||
collection="knowledge_qa",
|
||||
top_k=3,
|
||||
store_path=store_path,
|
||||
)
|
||||
|
||||
assert chunks == []
|
||||
|
||||
Reference in New Issue
Block a user