refactor(rag): 梳理文档入库与检索服务结构

This commit is contained in:
2026-05-30 00:44:52 +08:00
parent f68b44f325
commit ccfe5eb667
6 changed files with 284 additions and 103 deletions

View File

@@ -3,7 +3,7 @@ from django.urls import reverse
from apps.documents.forms import DocumentUploadForm
from apps.documents.models import UploadedDocument
from apps.documents.services import extract_text
from apps.documents.services import extract_text, index_document
def test_upload_txt_document_creates_uploaded_record(client, db):
@@ -128,3 +128,20 @@ def test_index_failure_message_is_visible_on_document_list(client, db, monkeypat
assert response.status_code == 200
assert "文档入库失败,请检查错误原因后重试" in content
assert "模拟入库失败" in content
def test_index_document_marks_failed_when_extracted_text_is_empty(db, monkeypatch):
document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="empty.md",
file_type="md",
size=0,
status="uploaded",
)
monkeypatch.setattr("apps.documents.services.extract_text", lambda target: " ")
updated_document = index_document(document)
assert updated_document.status == UploadedDocument.STATUS_FAILED
assert "文档内容为空" in updated_document.error_message