feat: 支持会话内补传资料并保持绑定

2026-06-04 01:51:48 +08:00
parent 1e18fd2be9
commit 96f710ea13
6 changed files with 278 additions and 53 deletions
--- a/apps/documents/services.py
+++ b/apps/documents/services.py
@@ -53,51 +53,14 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
        workflow_type="registration",
        import_status=SubmissionBatch.STATUS_PROCESSING,
    )
-    documents = []
-    candidates = []
-    chapter_summary = {}
-    total_pages = 0
-    warnings = []
-
-    expanded_result = _expand_uploaded_files(uploaded_files)
-    expanded_files = expanded_result["files"]
-    warnings.extend(expanded_result["warnings"])
-    for uploaded_item in expanded_files:
-        uploaded_file = uploaded_item["uploaded_file"]
-        relative_path = uploaded_item["relative_path"]
-        document = create_uploaded_document(
-            scenario_id,
-            uploaded_file,
-            batch=batch,
-            relative_path=relative_path,
-        )
-        text = extract_text(document)
-        page_count = _estimate_page_count(text)
-        document.page_count = page_count
-        document.page_count_confidence = "estimated"
-        document.document_role = _detect_document_role(document.relative_path)
-        document.chapter_code = _detect_chapter_code(document.relative_path, text)
-        document.chapter_match_status = "matched" if document.chapter_code else "unknown"
-        document.needs_manual_review = not bool(document.chapter_code)
-        document.save(
-            update_fields=[
-                "page_count",
-                "page_count_confidence",
-                "document_role",
-                "chapter_code",
-                "chapter_match_status",
-                "needs_manual_review",
-                "updated_at",
-            ]
-        )
-        documents.append(document)
-        total_pages += page_count
-        chapter_key = document.chapter_code or "UNCLASSIFIED"
-        chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
-        candidates.extend(_extract_product_candidates(document.relative_path, text))
-
-    product_name, product_warnings = _select_product_name(candidates)
-    warnings.extend(product_warnings)
+    ingest_result = _ingest_files_into_batch(
+        batch=batch,
+        scenario_id=scenario_id,
+        uploaded_files=uploaded_files,
+    )
+    documents = ingest_result["documents"]
+    warnings = ingest_result["warnings"]
+    product_name = ingest_result["product_name"]
    conversation = create_conversation_for_batch(batch.batch_id, product_name)

    if not documents:
@@ -106,11 +69,8 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
    batch.product_name = product_name
    batch.conversation_id = conversation.conversation_id
    batch.file_count = len(documents)
-    batch.page_count = total_pages
-    batch.chapter_summary = [
-        {"chapter_code": chapter_code, "document_count": count}
-        for chapter_code, count in sorted(chapter_summary.items())
-    ]
+    batch.page_count = ingest_result["page_count"]
+    batch.chapter_summary = ingest_result["chapter_summary"]
    batch.exception_count = len(warnings)
    if not documents:
        batch.import_status = SubmissionBatch.STATUS_FAILED
@@ -155,6 +115,89 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
    }


+def append_documents_to_batch(
+    scenario_id: str,
+    batch: SubmissionBatch,
+    uploaded_files: list,
+) -> dict:
+    """
+    在既有资料包下继续补传文件，并保持会话绑定不变。
+
+    该服务只负责 Documents 侧的数据更新：
+    - 新文件继续归属原 batch
+    - conversation_id 不变
+    - 如原产品名为空，可用新增文件补齐
+    - 如新增文件产品名与原产品名冲突，则转为待复核
+    """
+    ingest_result = _ingest_files_into_batch(
+        batch=batch,
+        scenario_id=scenario_id,
+        uploaded_files=uploaded_files,
+        keep_existing_product_name=True,
+    )
+    warnings = list(ingest_result["warnings"])
+    all_documents = list(batch.documents.order_by("id"))
+
+    if not all_documents:
+        warnings.append("未发现可导入的支持文件，请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
+        batch.import_status = SubmissionBatch.STATUS_FAILED
+    elif warnings:
+        batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
+    else:
+        batch.import_status = SubmissionBatch.STATUS_COMPLETED
+
+    batch.product_name = ingest_result["product_name"]
+    batch.file_count = len(all_documents)
+    batch.page_count = ingest_result["page_count"]
+    batch.chapter_summary = ingest_result["chapter_summary"]
+    batch.exception_count = len(warnings)
+    batch.save(
+        update_fields=[
+            "product_name",
+            "file_count",
+            "page_count",
+            "chapter_summary",
+            "exception_count",
+            "import_status",
+            "updated_at",
+        ]
+    )
+
+    if batch.conversation_id:
+        from apps.chat.models import Conversation
+
+        conversation = Conversation.objects.filter(conversation_id=batch.conversation_id).first()
+        if conversation:
+            conversation.product_name = batch.product_name
+            if batch.product_name:
+                conversation.title = batch.product_name
+            conversation.save(update_fields=["product_name", "title", "updated_at"])
+
+    return {
+        "batch_id": batch.batch_id,
+        "conversation_id": batch.conversation_id,
+        "product_name": batch.product_name,
+        "registration_overview_report": {
+            "batch_id": batch.batch_id,
+            "product_name": batch.product_name,
+            "file_count": batch.file_count,
+            "total_page_count": batch.page_count,
+            "chapter_summary": batch.chapter_summary,
+            "documents": [
+                {
+                    "document_id": document.id,
+                    "original_name": document.original_name,
+                    "chapter_code": document.chapter_code,
+                    "page_count": document.page_count,
+                    "document_role": document.document_role,
+                }
+                for document in all_documents
+            ],
+            "warnings": warnings,
+        },
+    }
+
+
 def extract_text(document: UploadedDocument) -> str:
    """
    根据文档类型选择合适的文本抽取策略。
@@ -248,6 +291,87 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
    return {"files": expanded_files, "warnings": warnings}


+def _ingest_files_into_batch(
+    *,
+    batch: SubmissionBatch,
+    scenario_id: str,
+    uploaded_files: list,
+    keep_existing_product_name: bool = False,
+) -> dict:
+    expanded_result = _expand_uploaded_files(uploaded_files)
+    expanded_files = expanded_result["files"]
+    warnings = list(expanded_result["warnings"])
+    new_documents = []
+    new_candidates = []
+
+    for uploaded_item in expanded_files:
+        uploaded_file = uploaded_item["uploaded_file"]
+        relative_path = uploaded_item["relative_path"]
+        document = create_uploaded_document(
+            scenario_id,
+            uploaded_file,
+            batch=batch,
+            relative_path=relative_path,
+        )
+        text = extract_text(document)
+        page_count = _estimate_page_count(text)
+        document.page_count = page_count
+        document.page_count_confidence = "estimated"
+        document.document_role = _detect_document_role(document.relative_path)
+        document.chapter_code = _detect_chapter_code(document.relative_path, text)
+        document.chapter_match_status = "matched" if document.chapter_code else "unknown"
+        document.needs_manual_review = not bool(document.chapter_code)
+        document.save(
+            update_fields=[
+                "page_count",
+                "page_count_confidence",
+                "document_role",
+                "chapter_code",
+                "chapter_match_status",
+                "needs_manual_review",
+                "updated_at",
+            ]
+        )
+        new_documents.append(document)
+        new_candidates.extend(_extract_product_candidates(document.relative_path, text))
+
+    all_documents = list(batch.documents.order_by("id"))
+    chapter_summary = {}
+    total_pages = 0
+    for document in all_documents:
+        total_pages += document.page_count
+        chapter_key = document.chapter_code or "UNCLASSIFIED"
+        chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
+
+    product_name = batch.product_name
+    if keep_existing_product_name and batch.product_name:
+        conflict_names = {
+            item["product_name"] for item in new_candidates if item["product_name"] != batch.product_name
+        }
+        if conflict_names:
+            warnings.append(
+                "新增文件与当前资料包产品名称不一致："
+                + " / ".join([batch.product_name, *sorted(conflict_names)])
+            )
+    else:
+        product_name, product_warnings = _select_product_name(new_candidates)
+        warnings.extend(product_warnings)
+        if keep_existing_product_name and not product_name:
+            product_name = batch.product_name
+
+    return {
+        "documents": all_documents if keep_existing_product_name else new_documents,
+        "new_documents": new_documents,
+        "warnings": warnings,
+        "product_name": product_name,
+        "page_count": total_pages if keep_existing_product_name else total_pages,
+        "chapter_summary": [
+            {"chapter_code": chapter_code, "document_count": count}
+            for chapter_code, count in sorted(chapter_summary.items())
+        ],
+    }
+
+
 def _extract_zip_entries(uploaded_file) -> dict:
    archive_bytes = uploaded_file.read()
    uploaded_file.seek(0)