feat: 支持会话内补传资料并保持绑定

2026-06-04 01:51:48 +08:00
parent 1e18fd2be9
commit 96f710ea13
6 changed files with 278 additions and 53 deletions
--- a/apps/chat/forms.py
+++ b/apps/chat/forms.py
@@ -1,4 +1,7 @@
 from django import forms
 from pathlib import Path
 from apps.documents.forms import MultipleFileField, SUPPORTED_EXTENSIONS
 class ChatForm(forms.Form):
@@ -38,3 +41,23 @@ class ChatForm(forms.Form):
    def clean_document_ids(self):
        # View 与 Agent Core 都使用整型文档 ID，统一在表单层完成转换。
        return [int(document_id) for document_id in self.cleaned_data.get("document_ids", [])]
 class ConversationUploadForm(forms.Form):
    # 会话右侧上传区只负责继续补传资料，不修改会话绑定关系。
    files = MultipleFileField(label="补充文件或资料包", required=False)
    file = forms.FileField(label="兼容单文件上传", required=False)
    def clean(self):
        cleaned_data = super().clean()
        files = list(cleaned_data.get("files") or [])
        file = cleaned_data.get("file")
        if file:
            files.append(file)
        if not files:
            raise forms.ValidationError("请至少上传一个文件或资料包。")
        for uploaded_file in files:
            if Path(uploaded_file.name).suffix.lower() not in SUPPORTED_EXTENSIONS:
                raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件")
        cleaned_data["uploaded_files"] = files
        return cleaned_data
--- a/apps/chat/urls.py
+++ b/apps/chat/urls.py
@@ -9,4 +9,5 @@ app_name = "chat"
 urlpatterns = [
    path("", views.index, name="index"),
    path("<str:conversation_id>/", views.detail, name="detail"),
    path("<str:conversation_id>/upload/", views.upload_documents, name="upload-documents"),
 ]
--- a/apps/chat/views.py
+++ b/apps/chat/views.py
@@ -1,14 +1,17 @@
 from django.contrib import messages
 from django.utils import timezone
 from django.shortcuts import get_object_or_404, redirect, render
 from django.urls import reverse
 from django.views.decorators.http import require_POST
 from agent_core.orchestrator import run_agent
 from agent_core.results import AgentResult
 from apps.audit.services import create_audit_log, create_notification_record
 from apps.documents.models import SubmissionBatch, UploadedDocument
 from apps.documents.services import append_documents_to_batch
 from apps.scenarios.services import get_scenario
-from .forms import ChatForm
+from .forms import ChatForm, ConversationUploadForm
 from .models import Conversation
@@ -37,6 +40,7 @@ def detail(request, conversation_id: str):
    batch = SubmissionBatch.objects.filter(batch_id=conversation.batch_id).first()
    documents = UploadedDocument.objects.filter(batch=batch)
    form = ChatForm(request.POST or None, documents=documents)
    upload_form = ConversationUploadForm()
    result = None
    audit_log = None
    active_node = None
@@ -97,10 +101,35 @@ def detail(request, conversation_id: str):
            "node_results": conversation.node_results,
            "active_node": active_node,
            "workspace_summary": workspace_summary,
            "upload_form": upload_form,
        },
    )
@require_POST
 def upload_documents(request, conversation_id: str):
    conversation = get_object_or_404(Conversation, conversation_id=conversation_id)
    batch = get_object_or_404(SubmissionBatch, batch_id=conversation.batch_id)
    upload_form = ConversationUploadForm(request.POST, request.FILES)
    if upload_form.is_valid():
        result = append_documents_to_batch(
            "document_review",
            batch,
            upload_form.cleaned_data["uploaded_files"],
        )
        warning_count = len(result["registration_overview_report"]["warnings"])
        message = "资料已补充到当前资料包。"
        if warning_count:
            message += f" 当前有 {warning_count} 条待复核提示。"
        messages.success(request, message)
    else:
        messages.error(
            request,
            "补充资料失败：" + " ".join(upload_form.non_field_errors()) if upload_form.non_field_errors() else "补充资料失败。",
        )
    return redirect("chat:detail", conversation_id=conversation.conversation_id)
 def _persist_notification_records(result: AgentResult, *, web_detail_url: str = "") -> None:
    payload = result.notification_payload or {}
    owners = payload.get("owners") or []
--- a/apps/documents/services.py
+++ b/apps/documents/services.py
@@ -53,51 +53,14 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
        workflow_type="registration",
        import_status=SubmissionBatch.STATUS_PROCESSING,
    )
-    documents = []
+    ingest_result = _ingest_files_into_batch(
    candidates = []
    chapter_summary = {}
    total_pages = 0
    warnings = []
    expanded_result = _expand_uploaded_files(uploaded_files)
    expanded_files = expanded_result["files"]
    warnings.extend(expanded_result["warnings"])
    for uploaded_item in expanded_files:
        uploaded_file = uploaded_item["uploaded_file"]
        relative_path = uploaded_item["relative_path"]
        document = create_uploaded_document(
            scenario_id,
            uploaded_file,
        batch=batch,
-            relative_path=relative_path,
+        scenario_id=scenario_id,
        uploaded_files=uploaded_files,
    )
-        text = extract_text(document)
+    documents = ingest_result["documents"]
-        page_count = _estimate_page_count(text)
+    warnings = ingest_result["warnings"]
-        document.page_count = page_count
+    product_name = ingest_result["product_name"]
        document.page_count_confidence = "estimated"
        document.document_role = _detect_document_role(document.relative_path)
        document.chapter_code = _detect_chapter_code(document.relative_path, text)
        document.chapter_match_status = "matched" if document.chapter_code else "unknown"
        document.needs_manual_review = not bool(document.chapter_code)
        document.save(
            update_fields=[
                "page_count",
                "page_count_confidence",
                "document_role",
                "chapter_code",
                "chapter_match_status",
                "needs_manual_review",
                "updated_at",
            ]
        )
        documents.append(document)
        total_pages += page_count
        chapter_key = document.chapter_code or "UNCLASSIFIED"
        chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
        candidates.extend(_extract_product_candidates(document.relative_path, text))
    product_name, product_warnings = _select_product_name(candidates)
    warnings.extend(product_warnings)
    conversation = create_conversation_for_batch(batch.batch_id, product_name)
    if not documents:
@@ -106,11 +69,8 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
    batch.product_name = product_name
    batch.conversation_id = conversation.conversation_id
    batch.file_count = len(documents)
-    batch.page_count = total_pages
+    batch.page_count = ingest_result["page_count"]
-    batch.chapter_summary = [
+    batch.chapter_summary = ingest_result["chapter_summary"]
        {"chapter_code": chapter_code, "document_count": count}
        for chapter_code, count in sorted(chapter_summary.items())
    ]
    batch.exception_count = len(warnings)
    if not documents:
        batch.import_status = SubmissionBatch.STATUS_FAILED
@@ -155,6 +115,89 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
    }
 def append_documents_to_batch(
    scenario_id: str,
    batch: SubmissionBatch,
    uploaded_files: list,
 ) -> dict:
    """
    在既有资料包下继续补传文件，并保持会话绑定不变。
    该服务只负责 Documents 侧的数据更新：
    - 新文件继续归属原 batch
    - conversation_id 不变
    - 如原产品名为空，可用新增文件补齐
    - 如新增文件产品名与原产品名冲突，则转为待复核
    """
    ingest_result = _ingest_files_into_batch(
        batch=batch,
        scenario_id=scenario_id,
        uploaded_files=uploaded_files,
        keep_existing_product_name=True,
    )
    warnings = list(ingest_result["warnings"])
    all_documents = list(batch.documents.order_by("id"))
    if not all_documents:
        warnings.append("未发现可导入的支持文件，请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。")
        batch.import_status = SubmissionBatch.STATUS_FAILED
    elif warnings:
        batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED
    else:
        batch.import_status = SubmissionBatch.STATUS_COMPLETED
    batch.product_name = ingest_result["product_name"]
    batch.file_count = len(all_documents)
    batch.page_count = ingest_result["page_count"]
    batch.chapter_summary = ingest_result["chapter_summary"]
    batch.exception_count = len(warnings)
    batch.save(
        update_fields=[
            "product_name",
            "file_count",
            "page_count",
            "chapter_summary",
            "exception_count",
            "import_status",
            "updated_at",
        ]
    )
    if batch.conversation_id:
        from apps.chat.models import Conversation
        conversation = Conversation.objects.filter(conversation_id=batch.conversation_id).first()
        if conversation:
            conversation.product_name = batch.product_name
            if batch.product_name:
                conversation.title = batch.product_name
            conversation.save(update_fields=["product_name", "title", "updated_at"])
    return {
        "batch_id": batch.batch_id,
        "conversation_id": batch.conversation_id,
        "product_name": batch.product_name,
        "registration_overview_report": {
            "batch_id": batch.batch_id,
            "product_name": batch.product_name,
            "file_count": batch.file_count,
            "total_page_count": batch.page_count,
            "chapter_summary": batch.chapter_summary,
            "documents": [
                {
                    "document_id": document.id,
                    "original_name": document.original_name,
                    "chapter_code": document.chapter_code,
                    "page_count": document.page_count,
                    "document_role": document.document_role,
                }
                for document in all_documents
            ],
            "warnings": warnings,
        },
    }
 def extract_text(document: UploadedDocument) -> str:
    """
    根据文档类型选择合适的文本抽取策略。
@@ -248,6 +291,87 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]:
    return {"files": expanded_files, "warnings": warnings}
 def _ingest_files_into_batch(
    *,
    batch: SubmissionBatch,
    scenario_id: str,
    uploaded_files: list,
    keep_existing_product_name: bool = False,
 ) -> dict:
    expanded_result = _expand_uploaded_files(uploaded_files)
    expanded_files = expanded_result["files"]
    warnings = list(expanded_result["warnings"])
    new_documents = []
    new_candidates = []
    for uploaded_item in expanded_files:
        uploaded_file = uploaded_item["uploaded_file"]
        relative_path = uploaded_item["relative_path"]
        document = create_uploaded_document(
            scenario_id,
            uploaded_file,
            batch=batch,
            relative_path=relative_path,
        )
        text = extract_text(document)
        page_count = _estimate_page_count(text)
        document.page_count = page_count
        document.page_count_confidence = "estimated"
        document.document_role = _detect_document_role(document.relative_path)
        document.chapter_code = _detect_chapter_code(document.relative_path, text)
        document.chapter_match_status = "matched" if document.chapter_code else "unknown"
        document.needs_manual_review = not bool(document.chapter_code)
        document.save(
            update_fields=[
                "page_count",
                "page_count_confidence",
                "document_role",
                "chapter_code",
                "chapter_match_status",
                "needs_manual_review",
                "updated_at",
            ]
        )
        new_documents.append(document)
        new_candidates.extend(_extract_product_candidates(document.relative_path, text))
    all_documents = list(batch.documents.order_by("id"))
    chapter_summary = {}
    total_pages = 0
    for document in all_documents:
        total_pages += document.page_count
        chapter_key = document.chapter_code or "UNCLASSIFIED"
        chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
    product_name = batch.product_name
    if keep_existing_product_name and batch.product_name:
        conflict_names = {
            item["product_name"] for item in new_candidates if item["product_name"] != batch.product_name
        }
        if conflict_names:
            warnings.append(
                "新增文件与当前资料包产品名称不一致："
                + " / ".join([batch.product_name, *sorted(conflict_names)])
            )
    else:
        product_name, product_warnings = _select_product_name(new_candidates)
        warnings.extend(product_warnings)
        if keep_existing_product_name and not product_name:
            product_name = batch.product_name
    return {
        "documents": all_documents if keep_existing_product_name else new_documents,
        "new_documents": new_documents,
        "warnings": warnings,
        "product_name": product_name,
        "page_count": total_pages if keep_existing_product_name else total_pages,
        "chapter_summary": [
            {"chapter_code": chapter_code, "document_count": count}
            for chapter_code, count in sorted(chapter_summary.items())
        ],
    }
 def _extract_zip_entries(uploaded_file) -> dict:
    archive_bytes = uploaded_file.read()
    uploaded_file.seek(0)
--- a/templates/chat/index.html
+++ b/templates/chat/index.html
@@ -119,10 +119,20 @@
              <div>导入状态：{{ batch.get_import_status_display_text }}</div>
            </li>
          </ul>
-          <div class="button-row" style="margin-top: 16px;">
+          <form method="post" action="{% url 'chat:upload-documents' conversation.conversation_id %}" enctype="multipart/form-data" class="stack" style="margin-top: 16px;">
-            <a class="button button-primary" href="{% url 'documents:upload' %}">继续上传资料</a>
+            {% csrf_token %}
            <div>
              {{ upload_form.files.label_tag }}
              {{ upload_form.files }}
            </div>
            <div class="button-row">
              <button type="submit">继续上传资料</button>
              <a class="button" href="{% url 'documents:list' %}">返回资料包</a>
            </div>
          </form>
          <div class="button-row" style="margin-top: 16px;">
            <a class="button" href="{% url 'documents:upload' %}">导入新资料包</a>
          </div>
        {% else %}
          <div class="notice">暂无绑定资料包。</div>
        {% endif %}
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -1,4 +1,5 @@
 from django.urls import reverse
 from django.core.files.uploadedfile import SimpleUploadedFile
 from agent_core.results import AgentResult
 from apps.audit.models import AgentAuditLog
@@ -325,3 +326,40 @@ def test_chat_page_shows_upload_entry_and_dynamic_context_cards(client, db):
    assert "是否允许正式导出" in content
    assert "通知状态" in content
    assert "飞书通知 / 待处理" in content
 def test_chat_upload_keeps_existing_conversation_binding_and_adds_documents(client, db):
    batch, conversation = _create_conversation_with_batch()
    existing_document = UploadedDocument.objects.create(
        batch=batch,
        scenario_id="document_review",
        original_name="原说明书.md",
        file_type="md",
        size=1,
        status=UploadedDocument.STATUS_INDEXED,
    )
    existing_count = UploadedDocument.objects.filter(batch=batch).count()
    upload_file = SimpleUploadedFile(
        "新增补充资料.txt",
        "产品名称：新型冠状病毒 2019-nCoV 核酸检测试剂盒".encode("utf-8"),
        content_type="text/plain",
    )
    response = client.post(
        reverse("chat:upload-documents", args=[conversation.conversation_id]),
        {"files": [upload_file]},
        follow=True,
    )
    content = response.content.decode("utf-8")
    batch.refresh_from_db()
    conversation.refresh_from_db()
    assert response.status_code == 200
    assert SubmissionBatch.objects.count() == 1
    assert Conversation.objects.count() == 1
    assert conversation.conversation_id == "conv-001"
    assert batch.conversation_id == conversation.conversation_id
    assert UploadedDocument.objects.filter(batch=batch).count() == existing_count + 1
    assert UploadedDocument.objects.filter(batch=batch, original_name="新增补充资料.txt").exists()
    assert "新增补充资料.txt" in content
    assert "已补充到当前资料包" in content