feat: 动态生成资料包异常提示

2026-06-04 03:50:12 +08:00
parent dc86fc0e58
commit 7c0dfe14d5
3 changed files with 160 additions and 14 deletions
--- a/apps/documents/services.py
+++ b/apps/documents/services.py
@@ -247,6 +247,59 @@ def build_batch_rows(batches) -> list[dict]:
    return rows


+def build_exception_items(batches, documents) -> list[dict]:
+    """
+    聚合资料包页需要关注的异常提示。
+
+    只返回真实存在的异常来源，避免页面继续展示静态 demo 文案：
+    - 批次级待复核
+    - 文档级待人工复核
+    - 文档级处理失败
+    """
+    items = []
+
+    for document in documents:
+        if document.status == UploadedDocument.STATUS_FAILED:
+            items.append(
+                {
+                    "level": "失败",
+                    "title": f"文档处理失败：{document.original_name}",
+                    "detail": document.error_message or "文档处理异常，请重新上传或稍后重试。",
+                }
+            )
+            continue
+
+        if document.needs_manual_review:
+            review_reasons = []
+            if document.file_type.lower() == "docx" and document.page_count_confidence != "exact":
+                review_reasons.append("页数为估算值，建议人工确认")
+            if not document.chapter_code or document.chapter_match_status != "matched":
+                review_reasons.append("章节点未识别，建议人工确认归类")
+            items.append(
+                {
+                    "level": "待确认",
+                    "title": f"文档待人工复核：{document.original_name}",
+                    "detail": "；".join(review_reasons) or "资料存在待确认项，建议人工复核。",
+                }
+            )
+
+    for batch in batches:
+        if batch.import_status != SubmissionBatch.STATUS_REVIEW_REQUIRED:
+            continue
+        items.append(
+            {
+                "level": "待确认",
+                "title": f"资料包待复核：{batch.batch_id}",
+                "detail": (
+                    f"{batch.product_name or '未识别产品名称'} 当前存在 "
+                    f"{batch.exception_count} 项异常，请进入关联会话或处理历史继续复核。"
+                ),
+            }
+        )
+
+    return items
+
+
 def extract_text(document: UploadedDocument) -> str:
    """
    根据文档类型选择合适的文本抽取策略。
--- a/apps/documents/views.py
+++ b/apps/documents/views.py
@@ -7,7 +7,12 @@ from apps.scenarios.services import list_scenarios

 from .forms import DocumentUploadForm
 from .models import SubmissionBatch, UploadedDocument
-from .services import build_batch_rows, import_submission_batch, index_document
+from .services import (
+    build_batch_rows,
+    build_exception_items,
+    import_submission_batch,
+    index_document,
+)


 def document_list(request):
@@ -18,14 +23,21 @@ def document_list(request):
        batches = batches.filter(
            Q(product_name__icontains=keyword) | Q(batch_id__icontains=keyword)
        )
-    documents = UploadedDocument.objects.all()
+    batches = list(batches)
+    documents = list(UploadedDocument.objects.all())
    status_counts = {
-        "pending": batches.filter(import_status=SubmissionBatch.STATUS_PENDING).count(),
-        "completed": batches.filter(import_status=SubmissionBatch.STATUS_COMPLETED).count(),
-        "review_required": batches.filter(
-            import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED
-        ).count(),
-        "total": batches.count(),
+        "pending": sum(
+            1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_PENDING
+        ),
+        "completed": sum(
+            1 for batch in batches if batch.import_status == SubmissionBatch.STATUS_COMPLETED
+        ),
+        "review_required": sum(
+            1
+            for batch in batches
+            if batch.import_status == SubmissionBatch.STATUS_REVIEW_REQUIRED
+        ),
+        "total": len(batches),
    }
    processing_pipeline = [
        {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"},
@@ -34,11 +46,6 @@ def document_list(request):
        {"title": "章节点归类", "detail": "基于文件名、标题和正文线索识别 CH 节点。"},
        {"title": "切片与索引入库", "detail": "生成知识切片，供 RAG、规则定位和审计引用使用。"},
    ]
-    exception_items = [
-        {"level": "待确认", "title": "CH1.2 监管信息目录.docx", "detail": "目录页码与正文页数存在偏差，建议人工复核。"},
-        {"level": "低可信度", "title": "目标产品说明书.docx", "detail": "Word 页数为估算值，表格抽取质量良好。"},
-        {"level": "失败", "title": "沟通记录扫描件.pdf", "detail": "疑似扫描件，需补做 OCR 或重新上传清晰版。"},
-    ]
    return render(
        request,
        "documents/document_list.html",
@@ -49,7 +56,7 @@ def document_list(request):
            "keyword": keyword,
            "status_counts": status_counts,
            "processing_pipeline": processing_pipeline,
-            "exception_items": exception_items,
+            "exception_items": build_exception_items(batches, documents),
        },
    )

--- a/tests/test_documents.py
+++ b/tests/test_documents.py
@@ -641,3 +641,89 @@ def test_document_list_shows_export_history_links_and_processing_pipeline(client
    assert reverse("audit:list") in content
    assert "查看导出记录" in content
    assert f"{reverse('audit:list')}?keyword=SUB-20260604-012" in content
+
+
+def test_document_list_shows_batch_level_exception_items(client, db):
+    SubmissionBatch.objects.create(
+        batch_id="SUB-20260604-101",
+        product_name="甲型流感病毒抗原检测试剂盒",
+        workflow_type="registration",
+        conversation_id="conv-101",
+        file_count=4,
+        page_count=28,
+        import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED,
+        exception_count=2,
+    )
+
+    response = client.get(reverse("documents:list"))
+
+    content = response.content.decode("utf-8")
+    assert response.status_code == 200
+    assert "资料包待复核：SUB-20260604-101" in content
+    assert "甲型流感病毒抗原检测试剂盒 当前存在 2 项异常" in content
+
+
+def test_document_list_shows_manual_review_document_exception_items(client, db):
+    batch = SubmissionBatch.objects.create(
+        batch_id="SUB-20260604-102",
+        product_name="乙型流感病毒抗原检测试剂盒",
+        workflow_type="registration",
+        conversation_id="conv-102",
+        file_count=1,
+        page_count=9,
+        import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED,
+        exception_count=1,
+    )
+    UploadedDocument.objects.create(
+        batch=batch,
+        scenario_id="document_review",
+        original_name="CH1-产品说明书.docx",
+        file="documents/20260604/manual-review.docx",
+        file_type="docx",
+        size=128,
+        page_count=9,
+        page_count_confidence="estimated",
+        chapter_code="CH1",
+        chapter_match_status="matched",
+        needs_manual_review=True,
+        status=UploadedDocument.STATUS_UPLOADED,
+    )
+
+    response = client.get(reverse("documents:list"))
+
+    content = response.content.decode("utf-8")
+    assert response.status_code == 200
+    assert "文档待人工复核：CH1-产品说明书.docx" in content
+    assert "页数为估算值，建议人工确认" in content
+
+
+def test_document_list_shows_failed_document_exception_items(client, db):
+    batch = SubmissionBatch.objects.create(
+        batch_id="SUB-20260604-103",
+        product_name="呼吸道病原体多重核酸检测试剂盒",
+        workflow_type="registration",
+        conversation_id="conv-103",
+        file_count=1,
+        page_count=5,
+        import_status=SubmissionBatch.STATUS_COMPLETED,
+        exception_count=0,
+    )
+    UploadedDocument.objects.create(
+        batch=batch,
+        scenario_id="document_review",
+        original_name="沟通记录扫描件.pdf",
+        file="documents/20260604/failed.pdf",
+        file_type="pdf",
+        size=256,
+        page_count=5,
+        chapter_match_status="unknown",
+        status=UploadedDocument.STATUS_FAILED,
+        error_message="OCR 识别失败，请重新上传清晰版。",
+    )
+
+    response = client.get(reverse("documents:list"))
+
+    content = response.content.decode("utf-8")
+    assert response.status_code == 200
+    assert "文档处理失败：沟通记录扫描件.pdf" in content
+    assert "OCR 识别失败，请重新上传清晰版。" in content