Files
DEMO-AGENT/tests/test_documents.py

148 lines
4.8 KiB
Python

from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
from apps.documents.forms import DocumentUploadForm
from apps.documents.models import UploadedDocument
from apps.documents.services import extract_text, index_document
def test_upload_txt_document_creates_uploaded_record(client, db):
file = SimpleUploadedFile("rules.txt", "hello".encode("utf-8"), content_type="text/plain")
response = client.post(
reverse("documents:upload"),
{"scenario_id": "knowledge_qa", "file": file},
)
assert response.status_code == 302
document = UploadedDocument.objects.get()
assert document.status == "uploaded"
assert document.file_type == "txt"
assert document.scenario_id == "knowledge_qa"
def test_upload_redirect_shows_success_message(client, db):
file = SimpleUploadedFile("notice.txt", "hello".encode("utf-8"), content_type="text/plain")
response = client.post(
reverse("documents:upload"),
{"scenario_id": "knowledge_qa", "file": file},
follow=True,
)
assert response.status_code == 200
assert "文件已上传,可继续执行入库" in response.content.decode("utf-8")
def test_upload_accepts_pdf_and_docx_documents(client, db):
for filename, payload in [
("policy.pdf", b"%PDF-1.4\nplain policy text"),
("contract.docx", b"fake-docx-body"),
]:
file = SimpleUploadedFile(filename, payload)
response = client.post(
reverse("documents:upload"),
{"scenario_id": "knowledge_qa", "file": file},
)
assert response.status_code == 302
assert set(UploadedDocument.objects.values_list("file_type", flat=True)) == {"pdf", "docx"}
def test_index_document_updates_status_to_indexed(client, db):
document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="rules.md",
file="knowledge_qa/rules.md",
file_type="md",
size=5,
status="uploaded",
)
document.file.save("rules.md", SimpleUploadedFile("rules.md", b"# rule").file)
response = client.post(reverse("documents:index", args=[document.id]))
assert response.status_code == 302
document.refresh_from_db()
assert document.status == "indexed"
assert document.error_message == ""
def test_extract_text_supports_pdf_and_docx_plain_text_fallback(db):
pdf_document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="policy.pdf",
file_type="pdf",
size=10,
status="uploaded",
)
pdf_document.file.save("policy.pdf", SimpleUploadedFile("policy.pdf", b"%PDF-1.4\nSafety policy"))
docx_document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="contract.docx",
file_type="docx",
size=10,
status="uploaded",
)
docx_document.file.save(
"contract.docx",
SimpleUploadedFile("contract.docx", b"Contract clause review"),
)
assert "Safety policy" in extract_text(pdf_document)
assert "Contract clause review" in extract_text(docx_document)
def test_document_upload_form_builds_scenario_choices():
form = DocumentUploadForm()
choice_values = [value for value, _label in form.fields["scenario_id"].choices]
assert "knowledge_qa" in choice_values
assert "quality_analysis" in choice_values
def test_index_failure_message_is_visible_on_document_list(client, db, monkeypatch):
document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="broken.md",
file_type="md",
size=5,
status="uploaded",
)
def fake_index_document(target_document):
target_document.status = UploadedDocument.STATUS_FAILED
target_document.error_message = "模拟入库失败"
target_document.save(update_fields=["status", "error_message", "updated_at"])
return target_document
monkeypatch.setattr("apps.documents.views.index_document", fake_index_document)
response = client.post(reverse("documents:index", args=[document.id]), follow=True)
content = response.content.decode("utf-8")
assert response.status_code == 200
assert "文档入库失败,请检查错误原因后重试" in content
assert "模拟入库失败" in content
def test_index_document_marks_failed_when_extracted_text_is_empty(db, monkeypatch):
document = UploadedDocument.objects.create(
scenario_id="knowledge_qa",
original_name="empty.md",
file_type="md",
size=0,
status="uploaded",
)
monkeypatch.setattr("apps.documents.services.extract_text", lambda target: " ")
updated_document = index_document(document)
assert updated_document.status == UploadedDocument.STATUS_FAILED
assert "文档内容为空" in updated_document.error_message