from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from apps.documents.models import UploadedDocument from apps.documents.services import extract_text def test_upload_txt_document_creates_uploaded_record(client, db): file = SimpleUploadedFile("rules.txt", "hello".encode("utf-8"), content_type="text/plain") response = client.post( reverse("documents:upload"), {"scenario_id": "knowledge_qa", "file": file}, ) assert response.status_code == 302 document = UploadedDocument.objects.get() assert document.status == "uploaded" assert document.file_type == "txt" assert document.scenario_id == "knowledge_qa" def test_upload_accepts_pdf_and_docx_documents(client, db): for filename, payload in [ ("policy.pdf", b"%PDF-1.4\nplain policy text"), ("contract.docx", b"fake-docx-body"), ]: file = SimpleUploadedFile(filename, payload) response = client.post( reverse("documents:upload"), {"scenario_id": "knowledge_qa", "file": file}, ) assert response.status_code == 302 assert set(UploadedDocument.objects.values_list("file_type", flat=True)) == {"pdf", "docx"} def test_index_document_updates_status_to_indexed(client, db): document = UploadedDocument.objects.create( scenario_id="knowledge_qa", original_name="rules.md", file="knowledge_qa/rules.md", file_type="md", size=5, status="uploaded", ) document.file.save("rules.md", SimpleUploadedFile("rules.md", b"# rule").file) response = client.post(reverse("documents:index", args=[document.id])) assert response.status_code == 302 document.refresh_from_db() assert document.status == "indexed" assert document.error_message == "" def test_extract_text_supports_pdf_and_docx_plain_text_fallback(db): pdf_document = UploadedDocument.objects.create( scenario_id="knowledge_qa", original_name="policy.pdf", file_type="pdf", size=10, status="uploaded", ) pdf_document.file.save("policy.pdf", SimpleUploadedFile("policy.pdf", b"%PDF-1.4\nSafety policy")) docx_document = UploadedDocument.objects.create( scenario_id="knowledge_qa", original_name="contract.docx", file_type="docx", size=10, status="uploaded", ) docx_document.file.save( "contract.docx", SimpleUploadedFile("contract.docx", b"Contract clause review"), ) assert "Safety policy" in extract_text(pdf_document) assert "Contract clause review" in extract_text(docx_document)