From 96f710ea13f587fb9cda062e8f07ca1160b59998 Mon Sep 17 00:00:00 2001 From: bruce Date: Thu, 4 Jun 2026 01:51:48 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=94=AF=E6=8C=81=E4=BC=9A=E8=AF=9D?= =?UTF-8?q?=E5=86=85=E8=A1=A5=E4=BC=A0=E8=B5=84=E6=96=99=E5=B9=B6=E4=BF=9D?= =?UTF-8?q?=E6=8C=81=E7=BB=91=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/chat/forms.py | 23 ++++ apps/chat/urls.py | 1 + apps/chat/views.py | 31 ++++- apps/documents/services.py | 224 ++++++++++++++++++++++++++++--------- templates/chat/index.html | 14 ++- tests/test_chat.py | 38 +++++++ 6 files changed, 278 insertions(+), 53 deletions(-) diff --git a/apps/chat/forms.py b/apps/chat/forms.py index 5f5d286..87c60d6 100644 --- a/apps/chat/forms.py +++ b/apps/chat/forms.py @@ -1,4 +1,7 @@ from django import forms +from pathlib import Path + +from apps.documents.forms import MultipleFileField, SUPPORTED_EXTENSIONS class ChatForm(forms.Form): @@ -38,3 +41,23 @@ class ChatForm(forms.Form): def clean_document_ids(self): # View 与 Agent Core 都使用整型文档 ID,统一在表单层完成转换。 return [int(document_id) for document_id in self.cleaned_data.get("document_ids", [])] + + +class ConversationUploadForm(forms.Form): + # 会话右侧上传区只负责继续补传资料,不修改会话绑定关系。 + files = MultipleFileField(label="补充文件或资料包", required=False) + file = forms.FileField(label="兼容单文件上传", required=False) + + def clean(self): + cleaned_data = super().clean() + files = list(cleaned_data.get("files") or []) + file = cleaned_data.get("file") + if file: + files.append(file) + if not files: + raise forms.ValidationError("请至少上传一个文件或资料包。") + for uploaded_file in files: + if Path(uploaded_file.name).suffix.lower() not in SUPPORTED_EXTENSIONS: + raise forms.ValidationError("仅支持 .txt、.md、.pdf、.docx、.zip 和 .7z 文件") + cleaned_data["uploaded_files"] = files + return cleaned_data diff --git a/apps/chat/urls.py b/apps/chat/urls.py index 8a84c94..6985a36 100644 --- a/apps/chat/urls.py +++ b/apps/chat/urls.py @@ -9,4 +9,5 @@ app_name = "chat" urlpatterns = [ path("", views.index, name="index"), path("/", views.detail, name="detail"), + path("/upload/", views.upload_documents, name="upload-documents"), ] diff --git a/apps/chat/views.py b/apps/chat/views.py index 83a062e..025d9dc 100644 --- a/apps/chat/views.py +++ b/apps/chat/views.py @@ -1,14 +1,17 @@ +from django.contrib import messages from django.utils import timezone from django.shortcuts import get_object_or_404, redirect, render from django.urls import reverse +from django.views.decorators.http import require_POST from agent_core.orchestrator import run_agent from agent_core.results import AgentResult from apps.audit.services import create_audit_log, create_notification_record from apps.documents.models import SubmissionBatch, UploadedDocument +from apps.documents.services import append_documents_to_batch from apps.scenarios.services import get_scenario -from .forms import ChatForm +from .forms import ChatForm, ConversationUploadForm from .models import Conversation @@ -37,6 +40,7 @@ def detail(request, conversation_id: str): batch = SubmissionBatch.objects.filter(batch_id=conversation.batch_id).first() documents = UploadedDocument.objects.filter(batch=batch) form = ChatForm(request.POST or None, documents=documents) + upload_form = ConversationUploadForm() result = None audit_log = None active_node = None @@ -97,10 +101,35 @@ def detail(request, conversation_id: str): "node_results": conversation.node_results, "active_node": active_node, "workspace_summary": workspace_summary, + "upload_form": upload_form, }, ) +@require_POST +def upload_documents(request, conversation_id: str): + conversation = get_object_or_404(Conversation, conversation_id=conversation_id) + batch = get_object_or_404(SubmissionBatch, batch_id=conversation.batch_id) + upload_form = ConversationUploadForm(request.POST, request.FILES) + if upload_form.is_valid(): + result = append_documents_to_batch( + "document_review", + batch, + upload_form.cleaned_data["uploaded_files"], + ) + warning_count = len(result["registration_overview_report"]["warnings"]) + message = "资料已补充到当前资料包。" + if warning_count: + message += f" 当前有 {warning_count} 条待复核提示。" + messages.success(request, message) + else: + messages.error( + request, + "补充资料失败:" + " ".join(upload_form.non_field_errors()) if upload_form.non_field_errors() else "补充资料失败。", + ) + return redirect("chat:detail", conversation_id=conversation.conversation_id) + + def _persist_notification_records(result: AgentResult, *, web_detail_url: str = "") -> None: payload = result.notification_payload or {} owners = payload.get("owners") or [] diff --git a/apps/documents/services.py b/apps/documents/services.py index 29862f6..429d581 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -53,51 +53,14 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: workflow_type="registration", import_status=SubmissionBatch.STATUS_PROCESSING, ) - documents = [] - candidates = [] - chapter_summary = {} - total_pages = 0 - warnings = [] - - expanded_result = _expand_uploaded_files(uploaded_files) - expanded_files = expanded_result["files"] - warnings.extend(expanded_result["warnings"]) - for uploaded_item in expanded_files: - uploaded_file = uploaded_item["uploaded_file"] - relative_path = uploaded_item["relative_path"] - document = create_uploaded_document( - scenario_id, - uploaded_file, - batch=batch, - relative_path=relative_path, - ) - text = extract_text(document) - page_count = _estimate_page_count(text) - document.page_count = page_count - document.page_count_confidence = "estimated" - document.document_role = _detect_document_role(document.relative_path) - document.chapter_code = _detect_chapter_code(document.relative_path, text) - document.chapter_match_status = "matched" if document.chapter_code else "unknown" - document.needs_manual_review = not bool(document.chapter_code) - document.save( - update_fields=[ - "page_count", - "page_count_confidence", - "document_role", - "chapter_code", - "chapter_match_status", - "needs_manual_review", - "updated_at", - ] - ) - documents.append(document) - total_pages += page_count - chapter_key = document.chapter_code or "UNCLASSIFIED" - chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1 - candidates.extend(_extract_product_candidates(document.relative_path, text)) - - product_name, product_warnings = _select_product_name(candidates) - warnings.extend(product_warnings) + ingest_result = _ingest_files_into_batch( + batch=batch, + scenario_id=scenario_id, + uploaded_files=uploaded_files, + ) + documents = ingest_result["documents"] + warnings = ingest_result["warnings"] + product_name = ingest_result["product_name"] conversation = create_conversation_for_batch(batch.batch_id, product_name) if not documents: @@ -106,11 +69,8 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: batch.product_name = product_name batch.conversation_id = conversation.conversation_id batch.file_count = len(documents) - batch.page_count = total_pages - batch.chapter_summary = [ - {"chapter_code": chapter_code, "document_count": count} - for chapter_code, count in sorted(chapter_summary.items()) - ] + batch.page_count = ingest_result["page_count"] + batch.chapter_summary = ingest_result["chapter_summary"] batch.exception_count = len(warnings) if not documents: batch.import_status = SubmissionBatch.STATUS_FAILED @@ -155,6 +115,89 @@ def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: } +def append_documents_to_batch( + scenario_id: str, + batch: SubmissionBatch, + uploaded_files: list, +) -> dict: + """ + 在既有资料包下继续补传文件,并保持会话绑定不变。 + + 该服务只负责 Documents 侧的数据更新: + - 新文件继续归属原 batch + - conversation_id 不变 + - 如原产品名为空,可用新增文件补齐 + - 如新增文件产品名与原产品名冲突,则转为待复核 + """ + ingest_result = _ingest_files_into_batch( + batch=batch, + scenario_id=scenario_id, + uploaded_files=uploaded_files, + keep_existing_product_name=True, + ) + warnings = list(ingest_result["warnings"]) + all_documents = list(batch.documents.order_by("id")) + + if not all_documents: + warnings.append("未发现可导入的支持文件,请检查资料包格式或补充 PDF/DOCX/MD/TXT 文件。") + batch.import_status = SubmissionBatch.STATUS_FAILED + elif warnings: + batch.import_status = SubmissionBatch.STATUS_REVIEW_REQUIRED + else: + batch.import_status = SubmissionBatch.STATUS_COMPLETED + + batch.product_name = ingest_result["product_name"] + batch.file_count = len(all_documents) + batch.page_count = ingest_result["page_count"] + batch.chapter_summary = ingest_result["chapter_summary"] + batch.exception_count = len(warnings) + batch.save( + update_fields=[ + "product_name", + "file_count", + "page_count", + "chapter_summary", + "exception_count", + "import_status", + "updated_at", + ] + ) + + if batch.conversation_id: + from apps.chat.models import Conversation + + conversation = Conversation.objects.filter(conversation_id=batch.conversation_id).first() + if conversation: + conversation.product_name = batch.product_name + if batch.product_name: + conversation.title = batch.product_name + conversation.save(update_fields=["product_name", "title", "updated_at"]) + + return { + "batch_id": batch.batch_id, + "conversation_id": batch.conversation_id, + "product_name": batch.product_name, + "registration_overview_report": { + "batch_id": batch.batch_id, + "product_name": batch.product_name, + "file_count": batch.file_count, + "total_page_count": batch.page_count, + "chapter_summary": batch.chapter_summary, + "documents": [ + { + "document_id": document.id, + "original_name": document.original_name, + "chapter_code": document.chapter_code, + "page_count": document.page_count, + "document_role": document.document_role, + } + for document in all_documents + ], + "warnings": warnings, + }, + } + + def extract_text(document: UploadedDocument) -> str: """ 根据文档类型选择合适的文本抽取策略。 @@ -248,6 +291,87 @@ def _expand_uploaded_files(uploaded_files: list) -> list[dict]: return {"files": expanded_files, "warnings": warnings} +def _ingest_files_into_batch( + *, + batch: SubmissionBatch, + scenario_id: str, + uploaded_files: list, + keep_existing_product_name: bool = False, +) -> dict: + expanded_result = _expand_uploaded_files(uploaded_files) + expanded_files = expanded_result["files"] + warnings = list(expanded_result["warnings"]) + new_documents = [] + new_candidates = [] + + for uploaded_item in expanded_files: + uploaded_file = uploaded_item["uploaded_file"] + relative_path = uploaded_item["relative_path"] + document = create_uploaded_document( + scenario_id, + uploaded_file, + batch=batch, + relative_path=relative_path, + ) + text = extract_text(document) + page_count = _estimate_page_count(text) + document.page_count = page_count + document.page_count_confidence = "estimated" + document.document_role = _detect_document_role(document.relative_path) + document.chapter_code = _detect_chapter_code(document.relative_path, text) + document.chapter_match_status = "matched" if document.chapter_code else "unknown" + document.needs_manual_review = not bool(document.chapter_code) + document.save( + update_fields=[ + "page_count", + "page_count_confidence", + "document_role", + "chapter_code", + "chapter_match_status", + "needs_manual_review", + "updated_at", + ] + ) + new_documents.append(document) + new_candidates.extend(_extract_product_candidates(document.relative_path, text)) + + all_documents = list(batch.documents.order_by("id")) + chapter_summary = {} + total_pages = 0 + for document in all_documents: + total_pages += document.page_count + chapter_key = document.chapter_code or "UNCLASSIFIED" + chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1 + + product_name = batch.product_name + if keep_existing_product_name and batch.product_name: + conflict_names = { + item["product_name"] for item in new_candidates if item["product_name"] != batch.product_name + } + if conflict_names: + warnings.append( + "新增文件与当前资料包产品名称不一致:" + + " / ".join([batch.product_name, *sorted(conflict_names)]) + ) + else: + product_name, product_warnings = _select_product_name(new_candidates) + warnings.extend(product_warnings) + if keep_existing_product_name and not product_name: + product_name = batch.product_name + + return { + "documents": all_documents if keep_existing_product_name else new_documents, + "new_documents": new_documents, + "warnings": warnings, + "product_name": product_name, + "page_count": total_pages if keep_existing_product_name else total_pages, + "chapter_summary": [ + {"chapter_code": chapter_code, "document_count": count} + for chapter_code, count in sorted(chapter_summary.items()) + ], + } + + def _extract_zip_entries(uploaded_file) -> dict: archive_bytes = uploaded_file.read() uploaded_file.seek(0) diff --git a/templates/chat/index.html b/templates/chat/index.html index 270817a..8faf8c9 100644 --- a/templates/chat/index.html +++ b/templates/chat/index.html @@ -119,9 +119,19 @@
导入状态:{{ batch.get_import_status_display_text }}
+
+ {% csrf_token %} +
+ {{ upload_form.files.label_tag }} + {{ upload_form.files }} +
+
+ + 返回资料包 +
+
{% else %}
暂无绑定资料包。
diff --git a/tests/test_chat.py b/tests/test_chat.py index c53923c..7a3cdab 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -1,4 +1,5 @@ from django.urls import reverse +from django.core.files.uploadedfile import SimpleUploadedFile from agent_core.results import AgentResult from apps.audit.models import AgentAuditLog @@ -325,3 +326,40 @@ def test_chat_page_shows_upload_entry_and_dynamic_context_cards(client, db): assert "是否允许正式导出" in content assert "通知状态" in content assert "飞书通知 / 待处理" in content + + +def test_chat_upload_keeps_existing_conversation_binding_and_adds_documents(client, db): + batch, conversation = _create_conversation_with_batch() + existing_document = UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", + original_name="原说明书.md", + file_type="md", + size=1, + status=UploadedDocument.STATUS_INDEXED, + ) + existing_count = UploadedDocument.objects.filter(batch=batch).count() + upload_file = SimpleUploadedFile( + "新增补充资料.txt", + "产品名称:新型冠状病毒 2019-nCoV 核酸检测试剂盒".encode("utf-8"), + content_type="text/plain", + ) + + response = client.post( + reverse("chat:upload-documents", args=[conversation.conversation_id]), + {"files": [upload_file]}, + follow=True, + ) + + content = response.content.decode("utf-8") + batch.refresh_from_db() + conversation.refresh_from_db() + assert response.status_code == 200 + assert SubmissionBatch.objects.count() == 1 + assert Conversation.objects.count() == 1 + assert conversation.conversation_id == "conv-001" + assert batch.conversation_id == conversation.conversation_id + assert UploadedDocument.objects.filter(batch=batch).count() == existing_count + 1 + assert UploadedDocument.objects.filter(batch=batch, original_name="新增补充资料.txt").exists() + assert "新增补充资料.txt" in content + assert "已补充到当前资料包" in content