feat(documents): 支持文档上传与本地RAG入库
This commit is contained in:
82
tests/test_documents.py
Normal file
82
tests/test_documents.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.urls import reverse
|
||||
|
||||
from apps.documents.models import UploadedDocument
|
||||
from apps.documents.services import extract_text
|
||||
|
||||
|
||||
def test_upload_txt_document_creates_uploaded_record(client, db):
|
||||
file = SimpleUploadedFile("rules.txt", "hello".encode("utf-8"), content_type="text/plain")
|
||||
|
||||
response = client.post(
|
||||
reverse("documents:upload"),
|
||||
{"scenario_id": "knowledge_qa", "file": file},
|
||||
)
|
||||
|
||||
assert response.status_code == 302
|
||||
document = UploadedDocument.objects.get()
|
||||
assert document.status == "uploaded"
|
||||
assert document.file_type == "txt"
|
||||
assert document.scenario_id == "knowledge_qa"
|
||||
|
||||
|
||||
def test_upload_accepts_pdf_and_docx_documents(client, db):
|
||||
for filename, payload in [
|
||||
("policy.pdf", b"%PDF-1.4\nplain policy text"),
|
||||
("contract.docx", b"fake-docx-body"),
|
||||
]:
|
||||
file = SimpleUploadedFile(filename, payload)
|
||||
|
||||
response = client.post(
|
||||
reverse("documents:upload"),
|
||||
{"scenario_id": "knowledge_qa", "file": file},
|
||||
)
|
||||
|
||||
assert response.status_code == 302
|
||||
|
||||
assert set(UploadedDocument.objects.values_list("file_type", flat=True)) == {"pdf", "docx"}
|
||||
|
||||
|
||||
def test_index_document_updates_status_to_indexed(client, db):
|
||||
document = UploadedDocument.objects.create(
|
||||
scenario_id="knowledge_qa",
|
||||
original_name="rules.md",
|
||||
file="knowledge_qa/rules.md",
|
||||
file_type="md",
|
||||
size=5,
|
||||
status="uploaded",
|
||||
)
|
||||
document.file.save("rules.md", SimpleUploadedFile("rules.md", b"# rule").file)
|
||||
|
||||
response = client.post(reverse("documents:index", args=[document.id]))
|
||||
|
||||
assert response.status_code == 302
|
||||
document.refresh_from_db()
|
||||
assert document.status == "indexed"
|
||||
assert document.error_message == ""
|
||||
|
||||
|
||||
def test_extract_text_supports_pdf_and_docx_plain_text_fallback(db):
|
||||
pdf_document = UploadedDocument.objects.create(
|
||||
scenario_id="knowledge_qa",
|
||||
original_name="policy.pdf",
|
||||
file_type="pdf",
|
||||
size=10,
|
||||
status="uploaded",
|
||||
)
|
||||
pdf_document.file.save("policy.pdf", SimpleUploadedFile("policy.pdf", b"%PDF-1.4\nSafety policy"))
|
||||
|
||||
docx_document = UploadedDocument.objects.create(
|
||||
scenario_id="knowledge_qa",
|
||||
original_name="contract.docx",
|
||||
file_type="docx",
|
||||
size=10,
|
||||
status="uploaded",
|
||||
)
|
||||
docx_document.file.save(
|
||||
"contract.docx",
|
||||
SimpleUploadedFile("contract.docx", b"Contract clause review"),
|
||||
)
|
||||
|
||||
assert "Safety policy" in extract_text(pdf_document)
|
||||
assert "Contract clause review" in extract_text(docx_document)
|
||||
Reference in New Issue
Block a user