diff --git a/apps/documents/services.py b/apps/documents/services.py index a769c48..1dd03ba 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -247,6 +247,59 @@ def build_batch_rows(batches) -> list[dict]: return rows +def build_exception_items(batches, documents) -> list[dict]: + """ + 聚合资料包页需要关注的异常提示。 + + 只返回真实存在的异常来源,避免页面继续展示静态 demo 文案: + - 批次级待复核 + - 文档级待人工复核 + - 文档级处理失败 + """ + items = [] + + for document in documents: + if document.status == UploadedDocument.STATUS_FAILED: + items.append( + { + "level": "失败", + "title": f"文档处理失败:{document.original_name}", + "detail": document.error_message or "文档处理异常,请重新上传或稍后重试。", + } + ) + continue + + if document.needs_manual_review: + review_reasons = [] + if document.file_type.lower() == "docx" and document.page_count_confidence != "exact": + review_reasons.append("页数为估算值,建议人工确认") + if not document.chapter_code or document.chapter_match_status != "matched": + review_reasons.append("章节点未识别,建议人工确认归类") + items.append( + { + "level": "待确认", + "title": f"文档待人工复核:{document.original_name}", + "detail": ";".join(review_reasons) or "资料存在待确认项,建议人工复核。", + } + ) + + for batch in batches: + if batch.import_status != SubmissionBatch.STATUS_REVIEW_REQUIRED: + continue + items.append( + { + "level": "待确认", + "title": f"资料包待复核:{batch.batch_id}", + "detail": ( + f"{batch.product_name or '未识别产品名称'} 当前存在 " + f"{batch.exception_count} 项异常,请进入关联会话或处理历史继续复核。" + ), + } + ) + + return items + + def extract_text(document: UploadedDocument) -> str: """ 根据文档类型选择合适的文本抽取策略。 diff --git a/apps/documents/views.py b/apps/documents/views.py index 5841921..3d3b434 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -7,7 +7,12 @@ from apps.scenarios.services import list_scenarios from .forms import DocumentUploadForm from .models import SubmissionBatch, UploadedDocument -from .services import build_batch_rows, import_submission_batch, index_document +from .services import ( + build_batch_rows, + build_exception_items, + import_submission_batch, + index_document, +) def document_list(request): @@ -18,14 +23,21 @@ def document_list(request): batches = batches.filter( Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword) ) - documents = UploadedDocument.objects.all() + batches = list(batches) + documents = list(UploadedDocument.objects.all()) status_counts = { - "pending": batches.filter(import_status=SubmissionBatch.STATUS_PENDING).count(), - "completed": batches.filter(import_status=SubmissionBatch.STATUS_COMPLETED).count(), - "review_required": batches.filter( - import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED - ).count(), - "total": batches.count(), + "pending": sum( + 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING + ), + "completed": sum( + 1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED + ), + "review_required": sum( + 1 + for batch in batches + if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED + ), + "total": len(batches), } processing_pipeline = [ {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"}, @@ -34,11 +46,6 @@ def document_list(request): {"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"}, {"title": "切片与索引入库", "detail": "生成知识切片,供 RAG、规则定位和审计引用使用。"}, ] - exception_items = [ - {"level": "待确认", "title": "CH1.2 监管信息目录.docx", "detail": "目录页码与正文页数存在偏差,建议人工复核。"}, - {"level": "低可信度", "title": "目标产品说明书.docx", "detail": "Word 页数为估算值,表格抽取质量良好。"}, - {"level": "失败", "title": "沟通记录扫描件.pdf", "detail": "疑似扫描件,需补做 OCR 或重新上传清晰版。"}, - ] return render( request, "documents/document_list.html", @@ -49,7 +56,7 @@ def document_list(request): "keyword": keyword, "status_counts": status_counts, "processing_pipeline": processing_pipeline, - "exception_items": exception_items, + "exception_items": build_exception_items(batches, documents), }, ) diff --git a/tests/test_documents.py b/tests/test_documents.py index 83c8be2..f2fb3b6 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -641,3 +641,89 @@ def test_document_list_shows_export_history_links_and_processing_pipeline(client assert reverse("audit:list") in content assert "查看导出记录" in content assert f"{reverse('audit:list')}?keyword=SUB-20260604-012" in content + + +def test_document_list_shows_batch_level_exception_items(client, db): + SubmissionBatch.objects.create( + batch_id="SUB-20260604-101", + product_name="甲型流感病毒抗原检测试剂盒", + workflow_type="registration", + conversation_id="conv-101", + file_count=4, + page_count=28, + import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED, + exception_count=2, + ) + + response = client.get(reverse("documents:list")) + + content = response.content.decode("utf-8") + assert response.status_code == 200 + assert "资料包待复核:SUB-20260604-101" in content + assert "甲型流感病毒抗原检测试剂盒 当前存在 2 项异常" in content + + +def test_document_list_shows_manual_review_document_exception_items(client, db): + batch = SubmissionBatch.objects.create( + batch_id="SUB-20260604-102", + product_name="乙型流感病毒抗原检测试剂盒", + workflow_type="registration", + conversation_id="conv-102", + file_count=1, + page_count=9, + import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED, + exception_count=1, + ) + UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", + original_name="CH1-产品说明书.docx", + file="documents/20260604/manual-review.docx", + file_type="docx", + size=128, + page_count=9, + page_count_confidence="estimated", + chapter_code="CH1", + chapter_match_status="matched", + needs_manual_review=True, + status=UploadedDocument.STATUS_UPLOADED, + ) + + response = client.get(reverse("documents:list")) + + content = response.content.decode("utf-8") + assert response.status_code == 200 + assert "文档待人工复核:CH1-产品说明书.docx" in content + assert "页数为估算值,建议人工确认" in content + + +def test_document_list_shows_failed_document_exception_items(client, db): + batch = SubmissionBatch.objects.create( + batch_id="SUB-20260604-103", + product_name="呼吸道病原体多重核酸检测试剂盒", + workflow_type="registration", + conversation_id="conv-103", + file_count=1, + page_count=5, + import_status=SubmissionBatch.STATUS_COMPLETED, + exception_count=0, + ) + UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", + original_name="沟通记录扫描件.pdf", + file="documents/20260604/failed.pdf", + file_type="pdf", + size=256, + page_count=5, + chapter_match_status="unknown", + status=UploadedDocument.STATUS_FAILED, + error_message="OCR 识别失败,请重新上传清晰版。", + ) + + response = client.get(reverse("documents:list")) + + content = response.content.decode("utf-8") + assert response.status_code == 200 + assert "文档处理失败:沟通记录扫描件.pdf" in content + assert "OCR 识别失败,请重新上传清晰版。" in content