From d0841e533f80da0e981af659422db7a417c77b75 Mon Sep 17 00:00:00 2001 From: bruce Date: Thu, 4 Jun 2026 00:43:13 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84=E8=B5=84=E6=96=99?= =?UTF-8?q?=E5=8C=85=E6=A8=A1=E5=9E=8B=E4=B8=8E=E4=BC=9A=E8=AF=9D=E7=BB=91?= =?UTF-8?q?=E5=AE=9A=E4=B8=BB=E9=93=BE=E8=B7=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...nrecord_agentauditlog_batch_id_and_more.py | 62 +++++ apps/audit/models.py | 33 +++ apps/audit/services.py | 6 + apps/chat/migrations/0001_initial.py | 52 ++++ apps/chat/models.py | 34 +++ apps/chat/services.py | 26 ++ apps/chat/urls.py | 5 +- apps/chat/views.py | 79 +++--- ..._uploadeddocument_chapter_code_and_more.py | 103 ++++++++ apps/documents/models.py | 56 +++++ apps/documents/services.py | 199 ++++++++++++++- apps/documents/views.py | 34 ++- templates/base.html | 10 +- templates/chat/index.html | 233 ++++++++---------- templates/documents/document_list.html | 110 +++++++-- templates/scenarios/index.html | 18 +- tests/test_chat.py | 125 ++++++---- tests/test_documents.py | 78 +++++- 18 files changed, 1000 insertions(+), 263 deletions(-) create mode 100644 apps/audit/migrations/0003_notificationrecord_agentauditlog_batch_id_and_more.py create mode 100644 apps/chat/migrations/0001_initial.py create mode 100644 apps/chat/models.py create mode 100644 apps/chat/services.py create mode 100644 apps/documents/migrations/0002_submissionbatch_uploadeddocument_chapter_code_and_more.py diff --git a/apps/audit/migrations/0003_notificationrecord_agentauditlog_batch_id_and_more.py b/apps/audit/migrations/0003_notificationrecord_agentauditlog_batch_id_and_more.py new file mode 100644 index 0000000..cf88249 --- /dev/null +++ b/apps/audit/migrations/0003_notificationrecord_agentauditlog_batch_id_and_more.py @@ -0,0 +1,62 @@ +# Generated by Django 5.2.14 on 2026-06-03 16:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("audit", "0002_demobusinessrecord"), + ] + + operations = [ + migrations.CreateModel( + name="NotificationRecord", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("batch_id", models.CharField(db_index=True, max_length=64)), + ("conversation_id", models.CharField(db_index=True, max_length=64)), + ( + "product_name", + models.CharField(blank=True, db_index=True, max_length=255), + ), + ("trigger_source", models.CharField(blank=True, max_length=64)), + ("notify_reason", models.CharField(db_index=True, max_length=32)), + ("owner_role", models.CharField(blank=True, max_length=100)), + ("feishu_user_id", models.CharField(blank=True, max_length=100)), + ( + "message_status", + models.CharField(db_index=True, default="pending", max_length=32), + ), + ("web_detail_url", models.URLField(blank=True)), + ("receipt", models.JSONField(blank=True, default=dict)), + ("created_at", models.DateTimeField(auto_now_add=True, db_index=True)), + ], + options={ + "ordering": ["-created_at"], + }, + ), + migrations.AddField( + model_name="agentauditlog", + name="batch_id", + field=models.CharField(blank=True, db_index=True, max_length=64), + ), + migrations.AddField( + model_name="agentauditlog", + name="conversation_id", + field=models.CharField(blank=True, db_index=True, max_length=64), + ), + migrations.AddField( + model_name="agentauditlog", + name="product_name", + field=models.CharField(blank=True, db_index=True, max_length=255), + ), + ] diff --git a/apps/audit/models.py b/apps/audit/models.py index f34322d..7132245 100644 --- a/apps/audit/models.py +++ b/apps/audit/models.py @@ -16,6 +16,9 @@ class AgentAuditLog(models.Model): scenario_id = models.CharField(max_length=100, db_index=True) scenario_name = models.CharField(max_length=200, blank=True) + batch_id = models.CharField(max_length=64, blank=True, db_index=True) + conversation_id = models.CharField(max_length=64, blank=True, db_index=True) + product_name = models.CharField(max_length=255, blank=True, db_index=True) user_input = models.TextField() retrieved_chunks = models.JSONField(default=list, blank=True) tool_calls = models.JSONField(default=list, blank=True) @@ -66,3 +69,33 @@ class DemoBusinessRecord(models.Model): def __str__(self) -> str: return self.title + + +class NotificationRecord(models.Model): + """ + 飞书通知留痕。 + + 首版只保存离线通知载荷与结果状态,不直接依赖真实飞书网络。 + """ + + STATUS_PENDING = "pending" + STATUS_SENT = "sent" + STATUS_FAILED = "failed" + + batch_id = models.CharField(max_length=64, db_index=True) + conversation_id = models.CharField(max_length=64, db_index=True) + product_name = models.CharField(max_length=255, blank=True, db_index=True) + trigger_source = models.CharField(max_length=64, blank=True) + notify_reason = models.CharField(max_length=32, db_index=True) + owner_role = models.CharField(max_length=100, blank=True) + feishu_user_id = models.CharField(max_length=100, blank=True) + message_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True) + web_detail_url = models.URLField(blank=True) + receipt = models.JSONField(default=dict, blank=True) + created_at = models.DateTimeField(auto_now_add=True, db_index=True) + + class Meta: + ordering = ["-created_at"] + + def __str__(self) -> str: + return f"{self.notify_reason}:{self.batch_id}" diff --git a/apps/audit/services.py b/apps/audit/services.py index d56d5cc..452a303 100644 --- a/apps/audit/services.py +++ b/apps/audit/services.py @@ -8,6 +8,9 @@ def create_audit_log( scenario_name: str, user_input: str, agent_result: AgentResult, + batch_id: str = "", + conversation_id: str = "", + product_name: str = "", ) -> AgentAuditLog: """ 将一次 Agent 执行结果落库为审计日志。 @@ -20,6 +23,9 @@ def create_audit_log( return AgentAuditLog.objects.create( scenario_id=scenario_id, scenario_name=scenario_name, + batch_id=batch_id, + conversation_id=conversation_id, + product_name=product_name, user_input=user_input, retrieved_chunks=agent_result.references, tool_calls=agent_result.tool_calls, diff --git a/apps/chat/migrations/0001_initial.py b/apps/chat/migrations/0001_initial.py new file mode 100644 index 0000000..1dc751b --- /dev/null +++ b/apps/chat/migrations/0001_initial.py @@ -0,0 +1,52 @@ +# Generated by Django 5.2.14 on 2026-06-03 16:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="Conversation", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "conversation_id", + models.CharField(db_index=True, max_length=64, unique=True), + ), + ("title", models.CharField(max_length=255)), + ( + "product_name", + models.CharField(blank=True, db_index=True, max_length=255), + ), + ( + "batch_id", + models.CharField(blank=True, db_index=True, max_length=64), + ), + ( + "task_status", + models.CharField(db_index=True, default="pending", max_length=32), + ), + ("node_results", models.JSONField(blank=True, default=list)), + ("latest_summary", models.JSONField(blank=True, default=dict)), + ("created_at", models.DateTimeField(auto_now_add=True, db_index=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ("last_run_at", models.DateTimeField(blank=True, null=True)), + ], + options={ + "ordering": ["-updated_at", "-created_at"], + }, + ), + ] diff --git a/apps/chat/models.py b/apps/chat/models.py new file mode 100644 index 0000000..590a43d --- /dev/null +++ b/apps/chat/models.py @@ -0,0 +1,34 @@ +from django.db import models + + +class Conversation(models.Model): + """ + 审核智能体会话主对象。 + + 会话与资料包一一绑定,标题默认使用解析出的产品名称, + 节点结果使用 JSON 挂载,便于页面按节点展示。 + """ + + STATUS_PENDING = "pending" + STATUS_PROCESSING = "processing" + STATUS_COMPLETED = "completed" + STATUS_REVIEW_REQUIRED = "review_required" + STATUS_BLOCKED = "blocked" + STATUS_FAILED = "failed" + + conversation_id = models.CharField(max_length=64, unique=True, db_index=True) + title = models.CharField(max_length=255) + product_name = models.CharField(max_length=255, blank=True, db_index=True) + batch_id = models.CharField(max_length=64, blank=True, db_index=True) + task_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True) + node_results = models.JSONField(default=list, blank=True) + latest_summary = models.JSONField(default=dict, blank=True) + created_at = models.DateTimeField(auto_now_add=True, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + last_run_at = models.DateTimeField(null=True, blank=True) + + class Meta: + ordering = ["-updated_at", "-created_at"] + + def __str__(self) -> str: + return self.title diff --git a/apps/chat/services.py b/apps/chat/services.py new file mode 100644 index 0000000..6df668a --- /dev/null +++ b/apps/chat/services.py @@ -0,0 +1,26 @@ +from .models import Conversation + + +def create_conversation_for_batch(batch_id: str, product_name: str) -> Conversation: + """ + 为资料包创建主会话。 + + 会话标题固定优先使用解析出的产品名称, + 缺失时回退到批次号,确保前台始终有稳定标题。 + """ + conversation = Conversation.objects.create( + conversation_id=_generate_conversation_id(), + title=product_name or f"未命名资料包-{batch_id}", + product_name=product_name, + batch_id=batch_id, + task_status=Conversation.STATUS_PENDING, + node_results=[ + {"code": "package_import", "label": "资料包导入", "status": "已完成"}, + {"code": "overview", "label": "目录汇总", "status": "处理中"}, + ], + ) + return conversation + + +def _generate_conversation_id() -> str: + return f"conv-{Conversation.objects.count() + 1:03d}" diff --git a/apps/chat/urls.py b/apps/chat/urls.py index 4ddfec7..8a84c94 100644 --- a/apps/chat/urls.py +++ b/apps/chat/urls.py @@ -5,7 +5,8 @@ from . import views app_name = "chat" -# 当前 V1 仅保留一个场景对话入口,场景详情合并在对话页中展示。 +# 审核智能体前台以会话为中心。 urlpatterns = [ - path("/", views.index, name="index"), + path("", views.index, name="index"), + path("/", views.detail, name="detail"), ] diff --git a/apps/chat/views.py b/apps/chat/views.py index 92a5c4a..171b3b9 100644 --- a/apps/chat/views.py +++ b/apps/chat/views.py @@ -1,38 +1,43 @@ -from django.shortcuts import render +from django.shortcuts import get_object_or_404, redirect, render from agent_core.orchestrator import run_agent from agent_core.results import AgentResult from apps.audit.services import create_audit_log -from apps.documents.models import UploadedDocument -from apps.scenarios.services import ScenarioNotFound, get_scenario +from apps.documents.models import SubmissionBatch, UploadedDocument +from apps.scenarios.services import get_scenario from .forms import ChatForm +from .models import Conversation -def index(request, scenario_id: str): - # View 只负责请求编排、表单校验和模板渲染。 - # 具体 Agent 执行、审计写入和文档筛选规则分别交给独立模块处理。 - try: - scenario = get_scenario(scenario_id) - except ScenarioNotFound: - return render( - request, - "chat/index.html", - { - "scenario": None, - "form": ChatForm(), - "error": "场景不存在,请返回首页检查配置。", - }, - status=404, - ) +def index(request): + conversations = Conversation.objects.all() + if conversations.exists(): + return redirect("chat:detail", conversation_id=conversations.first().conversation_id) + return render( + request, + "chat/index.html", + { + "conversation": None, + "conversations": [], + "form": ChatForm(), + "documents": [], + "result": None, + "audit_log": None, + "node_results": [], + "active_node": None, + }, + ) + +def detail(request, conversation_id: str): + conversation = get_object_or_404(Conversation, conversation_id=conversation_id) + batch = SubmissionBatch.objects.filter(batch_id=conversation.batch_id).first() + documents = UploadedDocument.objects.filter(batch=batch) + form = ChatForm(request.POST or None, documents=documents) result = None audit_log = None - documents = UploadedDocument.objects.filter( - scenario_id=scenario["id"], - status=UploadedDocument.STATUS_INDEXED, - ) - form = ChatForm(request.POST or None, documents=documents) + active_node = None task_modes = [ {"name": "目录汇总", "description": "汇总文件、页数、章节点和目录型文档。"}, {"name": "完整性检查", "description": "对照法规模板检查齐套性、缺失项和错放项。"}, @@ -41,28 +46,46 @@ def index(request, scenario_id: str): {"name": "综合风险报告", "description": "形成高优先级问题、建议动作和责任人通知。"}, ] if request.method == "POST" and form.is_valid(): + scenario = get_scenario("document_review") message = form.cleaned_data["message"] try: - # 只把必要的运行选项传给 Agent Core,避免在 View 中散落模型细节。 result = run_agent( scenario, message, - options={"document_ids": form.cleaned_data["document_ids"]}, + options={ + "conversation_id": conversation.conversation_id, + "batch_id": conversation.batch_id, + "product_name": conversation.product_name, + "document_ids": form.cleaned_data["document_ids"], + }, ) except Exception as exc: result = AgentResult(status="failed", error=str(exc), answer="") - audit_log = create_audit_log(scenario["id"], scenario["name"], message, result) + audit_log = create_audit_log( + "document_review", + "注册审核智能体", + message, + result, + batch_id=conversation.batch_id, + conversation_id=conversation.conversation_id, + product_name=conversation.product_name, + ) + active_node = "risk" return render( request, "chat/index.html", { - "scenario": scenario, + "conversation": conversation, + "conversations": Conversation.objects.all(), + "batch": batch, "form": form, "documents": documents, "document_count": documents.count(), "result": result, "audit_log": audit_log, "task_modes": task_modes, + "node_results": conversation.node_results, + "active_node": active_node, }, ) diff --git a/apps/documents/migrations/0002_submissionbatch_uploadeddocument_chapter_code_and_more.py b/apps/documents/migrations/0002_submissionbatch_uploadeddocument_chapter_code_and_more.py new file mode 100644 index 0000000..f044067 --- /dev/null +++ b/apps/documents/migrations/0002_submissionbatch_uploadeddocument_chapter_code_and_more.py @@ -0,0 +1,103 @@ +# Generated by Django 5.2.14 on 2026-06-03 16:39 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("documents", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="SubmissionBatch", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "batch_id", + models.CharField(db_index=True, max_length=64, unique=True), + ), + ( + "product_name", + models.CharField(blank=True, db_index=True, max_length=255), + ), + ( + "workflow_type", + models.CharField(default="registration", max_length=64), + ), + ( + "conversation_id", + models.CharField(blank=True, db_index=True, max_length=64), + ), + ("file_count", models.PositiveIntegerField(default=0)), + ("page_count", models.PositiveIntegerField(default=0)), + ("chapter_summary", models.JSONField(blank=True, default=list)), + ( + "import_status", + models.CharField(db_index=True, default="pending", max_length=32), + ), + ("exception_count", models.PositiveIntegerField(default=0)), + ("created_at", models.DateTimeField(auto_now_add=True, db_index=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ], + options={ + "ordering": ["-created_at"], + }, + ), + migrations.AddField( + model_name="uploadeddocument", + name="chapter_code", + field=models.CharField(blank=True, max_length=32), + ), + migrations.AddField( + model_name="uploadeddocument", + name="chapter_match_status", + field=models.CharField(blank=True, max_length=32), + ), + migrations.AddField( + model_name="uploadeddocument", + name="document_role", + field=models.CharField(blank=True, max_length=64), + ), + migrations.AddField( + model_name="uploadeddocument", + name="needs_manual_review", + field=models.BooleanField(default=False), + ), + migrations.AddField( + model_name="uploadeddocument", + name="page_count", + field=models.PositiveIntegerField(default=0), + ), + migrations.AddField( + model_name="uploadeddocument", + name="page_count_confidence", + field=models.CharField(blank=True, max_length=32), + ), + migrations.AddField( + model_name="uploadeddocument", + name="relative_path", + field=models.CharField(blank=True, max_length=500), + ), + migrations.AddField( + model_name="uploadeddocument", + name="batch", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.CASCADE, + related_name="documents", + to="documents.submissionbatch", + ), + ), + ] diff --git a/apps/documents/models.py b/apps/documents/models.py index e7fdcf5..b33fb3f 100644 --- a/apps/documents/models.py +++ b/apps/documents/models.py @@ -1,6 +1,48 @@ from django.db import models +class SubmissionBatch(models.Model): + """ + 资料包主对象,承接导入、会话绑定和目录汇总结果。 + + Documents 模块负责维护资料包与文件的关系, + 不在模型层耦合 Agent 执行细节。 + """ + + STATUS_PENDING = "pending" + STATUS_PROCESSING = "processing" + STATUS_COMPLETED = "completed" + STATUS_REVIEW_REQUIRED = "review_required" + STATUS_FAILED = "failed" + + batch_id = models.CharField(max_length=64, unique=True, db_index=True) + product_name = models.CharField(max_length=255, blank=True, db_index=True) + workflow_type = models.CharField(max_length=64, default="registration") + conversation_id = models.CharField(max_length=64, blank=True, db_index=True) + file_count = models.PositiveIntegerField(default=0) + page_count = models.PositiveIntegerField(default=0) + chapter_summary = models.JSONField(default=list, blank=True) + import_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True) + exception_count = models.PositiveIntegerField(default=0) + created_at = models.DateTimeField(auto_now_add=True, db_index=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + ordering = ["-created_at"] + + def __str__(self) -> str: + return self.product_name or self.batch_id + + def get_import_status_display_text(self) -> str: + return { + self.STATUS_PENDING: "待导入", + self.STATUS_PROCESSING: "处理中", + self.STATUS_COMPLETED: "已完成", + self.STATUS_REVIEW_REQUIRED: "待复核", + self.STATUS_FAILED: "失败", + }.get(self.import_status, self.import_status) + + class UploadedDocument(models.Model): """ 保存用户上传文档的元数据和入库状态。 @@ -13,11 +55,25 @@ class UploadedDocument(models.Model): STATUS_INDEXED = "indexed" STATUS_FAILED = "failed" + batch = models.ForeignKey( + SubmissionBatch, + related_name="documents", + null=True, + blank=True, + on_delete=models.CASCADE, + ) scenario_id = models.CharField(max_length=100, db_index=True) original_name = models.CharField(max_length=255) file = models.FileField(upload_to="documents/%Y%m%d/") file_type = models.CharField(max_length=20) size = models.PositiveIntegerField(default=0) + relative_path = models.CharField(max_length=500, blank=True) + chapter_code = models.CharField(max_length=32, blank=True) + document_role = models.CharField(max_length=64, blank=True) + page_count = models.PositiveIntegerField(default=0) + page_count_confidence = models.CharField(max_length=32, blank=True) + chapter_match_status = models.CharField(max_length=32, blank=True) + needs_manual_review = models.BooleanField(default=False) status = models.CharField(max_length=20, default=STATUS_UPLOADED, db_index=True) error_message = models.TextField(blank=True) created_at = models.DateTimeField(auto_now_add=True) diff --git a/apps/documents/services.py b/apps/documents/services.py index a3b8e73..7ff6dca 100644 --- a/apps/documents/services.py +++ b/apps/documents/services.py @@ -4,11 +4,12 @@ import xml.etree.ElementTree as ET from zipfile import BadZipFile, ZipFile from agent_core.rag.ingest import ingest_document +from apps.chat.services import create_conversation_for_batch -from .models import UploadedDocument +from .models import SubmissionBatch, UploadedDocument -def create_uploaded_document(scenario_id: str, uploaded_file) -> UploadedDocument: +def create_uploaded_document(scenario_id: str, uploaded_file, batch: SubmissionBatch | None = None) -> UploadedDocument: """ 保存上传文件的元数据记录。 @@ -17,15 +18,116 @@ def create_uploaded_document(scenario_id: str, uploaded_file) -> UploadedDocumen """ extension = _detect_extension(uploaded_file.name) return UploadedDocument.objects.create( + batch=batch, scenario_id=scenario_id, original_name=uploaded_file.name, file=uploaded_file, file_type=extension, size=uploaded_file.size, + relative_path=uploaded_file.name, status=UploadedDocument.STATUS_UPLOADED, ) +def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict: + """ + 导入资料包并建立批次、文档、目录汇总和主会话。 + + 当前实现保持离线稳定,重点保证: + - 资料包记录可落库 + - 产品名称可解析 + - 会话可自动绑定 + - 可直接产出 overview report + """ + batch = SubmissionBatch.objects.create( + batch_id=_generate_batch_id(), + workflow_type="registration", + import_status=SubmissionBatch.STATUS_PROCESSING, + ) + documents = [] + candidates = [] + chapter_summary = {} + total_pages = 0 + + for uploaded_file in uploaded_files: + document = create_uploaded_document(scenario_id, uploaded_file, batch=batch) + text = extract_text(document) + page_count = _estimate_page_count(text) + document.page_count = page_count + document.page_count_confidence = "estimated" + document.document_role = _detect_document_role(document.original_name) + document.chapter_code = _detect_chapter_code(document.original_name, text) + document.chapter_match_status = "matched" if document.chapter_code else "unknown" + document.needs_manual_review = not bool(document.chapter_code) + document.save( + update_fields=[ + "page_count", + "page_count_confidence", + "document_role", + "chapter_code", + "chapter_match_status", + "needs_manual_review", + "updated_at", + ] + ) + documents.append(document) + total_pages += page_count + chapter_key = document.chapter_code or "UNCLASSIFIED" + chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1 + candidates.extend(_extract_product_candidates(document.original_name, text)) + + product_name, warnings = _select_product_name(candidates) + conversation = create_conversation_for_batch(batch.batch_id, product_name) + + batch.product_name = product_name + batch.conversation_id = conversation.conversation_id + batch.file_count = len(documents) + batch.page_count = total_pages + batch.chapter_summary = [ + {"chapter_code": chapter_code, "document_count": count} + for chapter_code, count in sorted(chapter_summary.items()) + ] + batch.exception_count = len(warnings) + batch.import_status = ( + SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED + ) + batch.save( + update_fields=[ + "product_name", + "conversation_id", + "file_count", + "page_count", + "chapter_summary", + "exception_count", + "import_status", + "updated_at", + ] + ) + return { + "batch_id": batch.batch_id, + "conversation_id": conversation.conversation_id, + "product_name": batch.product_name, + "registration_overview_report": { + "batch_id": batch.batch_id, + "product_name": batch.product_name, + "file_count": batch.file_count, + "total_page_count": batch.page_count, + "chapter_summary": batch.chapter_summary, + "documents": [ + { + "document_id": document.id, + "original_name": document.original_name, + "chapter_code": document.chapter_code, + "page_count": document.page_count, + "document_role": document.document_role, + } + for document in documents + ], + "warnings": warnings, + }, + } + + def extract_text(document: UploadedDocument) -> str: """ 根据文档类型选择合适的文本抽取策略。 @@ -83,6 +185,99 @@ def _detect_extension(file_name: str) -> str: return Path(file_name).suffix.lower().lstrip(".") +def _generate_batch_id() -> str: + return f"SUB-20260604-{SubmissionBatch.objects.count() + 1:03d}" + + +def _estimate_page_count(text: str) -> int: + stripped = text.strip() + if not stripped: + return 0 + line_count = len([line for line in stripped.splitlines() if line.strip()]) + return max(1, line_count) + + +def _detect_document_role(file_name: str) -> str: + normalized = file_name.lower() + if "申请表" in file_name: + return "application_form" + if "说明书" in file_name: + return "product_manual" + if "产品列表" in file_name: + return "product_list" + if "声明" in file_name: + return "declaration" + if normalized.endswith(".pdf"): + return "pdf_document" + return "general_document" + + +def _detect_chapter_code(file_name: str, text: str) -> str: + for source in (file_name, text): + match = re.search(r"(CH\d+(?:\.\d+)*)", source, flags=re.IGNORECASE) + if match: + return match.group(1).upper() + if "监管" in file_name or "申请表" in file_name or "说明书" in file_name: + return "CH1" + return "" + + +def _extract_product_candidates(file_name: str, text: str) -> list[dict]: + source_type = _detect_candidate_source(file_name) + if not source_type: + return [] + patterns = [ + r"产品名称[::]\s*([^\n\r]+)", + r"名称[::]\s*([^\n\r]+检测试剂盒[^\n\r]*)", + ] + for pattern in patterns: + match = re.search(pattern, text) + if match: + return [{"source_type": source_type, "product_name": match.group(1).strip()}] + cleaned = Path(file_name).stem.replace("目标产品", "").replace("说明书", "").strip("-_ ") + if cleaned and "申请表" not in cleaned and "产品列表" not in cleaned: + return [{"source_type": source_type, "product_name": cleaned}] + return [] + + +def _detect_candidate_source(file_name: str) -> str: + if "申请表" in file_name: + return "application_form" + if "说明书" in file_name: + return "product_manual" + if "产品列表" in file_name: + return "product_list" + return "" + + +def _select_product_name(candidates: list[dict]) -> tuple[str, list[str]]: + if not candidates: + return "", ["未识别到产品名称,建议人工补录。"] + + priority = { + "application_form": 1, + "product_manual": 2, + "product_list": 3, + } + sorted_candidates = sorted( + candidates, + key=lambda item: priority.get(item["source_type"], 99), + ) + top_candidate = sorted_candidates[0] + warnings = [] + conflict_names = { + item["product_name"] + for item in sorted_candidates + if item["product_name"] != top_candidate["product_name"] + } + if conflict_names: + warnings.append( + "产品名称来源冲突:" + + " / ".join([top_candidate["product_name"], *sorted(conflict_names)]) + ) + return top_candidate["product_name"], warnings + + def _read_text_file(path: Path) -> str: """优先按 UTF-8 读取;失败时回退到系统默认编码。""" try: diff --git a/apps/documents/views.py b/apps/documents/views.py index c77314c..83bbbc4 100644 --- a/apps/documents/views.py +++ b/apps/documents/views.py @@ -5,18 +5,24 @@ from django.views.decorators.http import require_POST from apps.scenarios.services import list_scenarios from .forms import DocumentUploadForm -from .models import UploadedDocument -from .services import create_uploaded_document, index_document +from .models import SubmissionBatch, UploadedDocument +from .services import import_submission_batch, index_document def document_list(request): - # 列表页只负责展示文档元数据和可执行操作,不处理入库细节。 + # 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。 + keyword = (request.GET.get("keyword") or "").strip() + batches = SubmissionBatch.objects.all() + if keyword: + batches = batches.filter(product_name__icontains=keyword) documents = UploadedDocument.objects.all() status_counts = { - "uploaded": documents.filter(status=UploadedDocument.STATUS_UPLOADED).count(), - "indexed": documents.filter(status=UploadedDocument.STATUS_INDEXED).count(), - "failed": documents.filter(status=UploadedDocument.STATUS_FAILED).count(), - "total": documents.count(), + "pending": batches.filter(import_status=SubmissionBatch.STATUS_PENDING).count(), + "completed": batches.filter(import_status=SubmissionBatch.STATUS_COMPLETED).count(), + "review_required": batches.filter( + import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED + ).count(), + "total": batches.count(), } processing_pipeline = [ {"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"}, @@ -35,6 +41,8 @@ def document_list(request): "documents/document_list.html", { "documents": documents, + "batches": batches, + "keyword": keyword, "status_counts": status_counts, "processing_pipeline": processing_pipeline, "exception_items": exception_items, @@ -43,12 +51,18 @@ def document_list(request): def upload(request): - # 上传成功后仅保存文件和元数据,是否入库由用户显式触发。 + # 上传成功后直接创建资料包并绑定主会话。 if request.method == "POST": form = DocumentUploadForm(request.POST, request.FILES) if form.is_valid(): - create_uploaded_document(form.cleaned_data["scenario_id"], form.cleaned_data["file"]) - messages.success(request, "文件已上传,可继续执行入库。") + result = import_submission_batch( + form.cleaned_data["scenario_id"], + [form.cleaned_data["file"]], + ) + messages.success( + request, + f"资料包已导入,已绑定会话 {result['conversation_id']}。", + ) return redirect("documents:list") else: form = DocumentUploadForm() diff --git a/templates/base.html b/templates/base.html index 4a6b7e9..b86e681 100644 --- a/templates/base.html +++ b/templates/base.html @@ -363,14 +363,10 @@ diff --git a/templates/chat/index.html b/templates/chat/index.html index 5f3960c..fd9c617 100644 --- a/templates/chat/index.html +++ b/templates/chat/index.html @@ -1,31 +1,53 @@ {% extends "base.html" %} -{% block title %}{{ scenario.name|default:"Agent 审核工作台" }}{% endblock %} +{% block title %}审核智能体{% endblock %} {% block content %} - {% if error %} -
{{ error }}
- {% endif %} - - {% if scenario %} - -
-
-
-
-
-

任务输入与资料范围

-

左侧突出受控输入:先描述审核目标,再限定本轮使用的文档范围。

-
+
+
+
+

会话历史

+

左侧保留历史会话,标题默认使用解析后的产品名称。

+
    + {% for item in conversations %} +
  • + {{ item.title }} +
    产品:{{ item.product_name|default:"未识别" }}
    +
    批次:{{ item.batch_id }}
    +
  • + {% empty %} +
  • 暂无会话,请先从资料包页面导入资料。
  • + {% endfor %} +
+
+
+ +
+
+
+
+

对话区与节点导航

+

中间区域承接用户问题、Agent 回答和节点式结果摘要。

+
+
+ {% if conversation %} +
+ {% for node in node_results %} + {{ node.label }} / {{ node.status }} + {% endfor %}
{% csrf_token %} @@ -38,7 +60,6 @@
{{ form.document_ids.label_tag }} -

不勾选时默认使用全部已入库文档。

{% for checkbox in form.document_ids %} {% empty %} -
当前场景还没有已入库文档,系统将仅依赖工具和模型能力生成结果。
+
当前资料包还没有可选文档。
{% endfor %}
- {% if form.document_ids.errors %} -

{{ form.document_ids.errors|join:" " }}

- {% endif %}
- +
-
- -
-

快捷示例

-
    -
  • 检查当前资料是否存在缺失项
  • -
  • 抽取说明书中的关键字段
  • -
  • 比较两份文档中的产品名称是否一致
  • -
-
-
- -
-
-

结果

{% if result %} -
    -
  • 模型:{{ result.model_name }}
  • -
  • 状态:{{ result.status }}
  • -
  • 耗时:{{ result.latency_ms }} ms
  • -
- 主回答 + Agent 回答
{{ result.answer|linebreaksbr }}
- {% else %} -
提交任务后,这里会展示 Agent 的执行状态、主回答和过程摘要。
- {% endif %} -
- - {% if result %} -
-

证据引用与工具调用

-

引用片段与工具调用用于支撑结果可解释性。

- {% if result.references %} -

引用片段

-
    - {% for reference in result.references %} -
  • - {{ reference.source }} -
    {{ reference.content|default:"无正文内容"|linebreaksbr }}
    -
  • - {% endfor %} -
- {% else %} -
当前回答没有引用知识库片段。
- {% endif %} - - {% if result.tool_calls %} -

工具调用

-
    - {% for tool_call in result.tool_calls %} -
  • - {{ tool_call.tool_name }} -

    执行状态:{{ tool_call.success }}

    - {% if tool_call.error %} -

    {{ tool_call.error }}

    - {% endif %} -
    {{ tool_call.result }}
    -
  • - {% endfor %} -
- {% else %} -
当前场景没有声明工具,或本次执行无需调用工具。
- {% endif %} -
- - {% if result.error %} -
-

错误信息

-
{{ result.error }}
-
{% endif %} + {% else %} +
暂无会话,请先导入资料包。
{% endif %} -
+ -
-
-
-
-

结构化审核结果

-

右侧结果舱用于展示缺失项、冲突项、字段池结果或风险清单。

-
-
- {% if result %} - - - - {% for key, value in result.structured_output.items %} - - - - - {% endfor %} - -
结构化结果
{{ key }} - {% if key == "answer" or key == "summary" or key == "reply" %} - {{ value|linebreaksbr }} - {% else %} -
{{ value }}
- {% endif %} -
- {% else %} -
执行任务后,这里会展示结构化审核结果和回填准备信息。
- {% endif %} -
+
+

节点式结果

+ {% if result and result.structured_output %} + + + {% for key, value in result.structured_output.items %} + + + + + {% endfor %} + +
{{ key }}
{{ value }}
+ {% else %} +
执行任务后,这里会展示结构化节点结果。
+ {% endif %} +
+
-
-

引用与审计

+
+
+

上传区

+

资料包导入入口在资料包页统一维护,当前会话只展示绑定关系。

+ {% if batch %}
    -
  • 可查看引用片段、工具调用和本次审计日志。
  • +
  • + 当前资料包 +
    批次:{{ batch.batch_id }}
    +
    文件数:{{ batch.file_count }}
    +
    页数:{{ batch.page_count }}
    +
    导入状态:{{ batch.get_import_status_display_text }}
    +
+ + {% else %} +
暂无绑定资料包。
+ {% endif %} +
+ +
+

动态信息卡

+
    +
  • 当前会话围绕 `conversation_id / batch_id / product_name` 串联。
  • +
  • 任务模式:目录汇总、完整性检查、字段抽取、一致性核查、风险预警。
  • {% if audit_log %} - +
  • 查看本次处理历史
  • {% endif %} -
-
-
- {% endif %} + + + + {% endblock %} diff --git a/templates/documents/document_list.html b/templates/documents/document_list.html index ce11604..072722d 100644 --- a/templates/documents/document_list.html +++ b/templates/documents/document_list.html @@ -1,36 +1,114 @@ {% extends "base.html" %} -{% block title %}文件中心{% endblock %} +{% block title %}资料包{% endblock %} {% block content %}
-
文件总数
+
资料包总数
{{ status_counts.total }}
-
已完成入库
-
{{ status_counts.indexed }}
+
已完成
+
{{ status_counts.completed }}
-
待入库
-
{{ status_counts.uploaded }}
+
待复核
+
{{ status_counts.review_required }}
-
失败
-
{{ status_counts.failed }}
+
待导入
+
{{ status_counts.pending }}
+
+
+
+

按产品名称搜索

+

支持按产品名称定位资料包,并跳转到关联会话。

+
+
+
+
+ + +
+
+ + 清空 +
+
+
+ +
+
+
+

资料包列表

+

资料包与会话一一绑定,会话标题默认采用解析后的产品名称。

+
+
+
+ + + + + + + + + + + + + + {% for batch in batches %} + + + + + + + + + + {% empty %} + + + + {% endfor %} + +
批次号产品名称会话文件数页数状态章节点概览
{{ batch.batch_id }}{{ batch.product_name|default:"未识别产品名称" }} + {% if batch.conversation_id %} + 查看对话 {{ batch.conversation_id }} + {% else %} + 尚未绑定 + {% endif %} + {{ batch.file_count }}{{ batch.page_count }} + + {{ batch.get_import_status_display_text }} + + + {% if batch.chapter_summary %} + {% for chapter in batch.chapter_summary %} +
{{ chapter.chapter_code }} / {{ chapter.document_count }} 份
+ {% endfor %} + {% else %} + 暂无目录汇总 + {% endif %} +
暂无资料包,请先导入申报资料。
+
+
+
@@ -52,7 +130,7 @@

资料目录总览

-

页面下方保留真实文件记录与手动入库动作,保证演示原型仍基于当前系统能力运行。

+

保留文件明细,便于说明目录识别、页数统计和异常定位。

@@ -60,7 +138,9 @@ 文件名 + 批次 场景 + 章节点 类型 大小 状态 @@ -71,7 +151,9 @@ {% for document in documents %} {{ document.original_name }} + {{ document.batch.batch_id|default:"-" }} {{ document.scenario_id }} + {{ document.chapter_code|default:"待识别" }} {{ document.file_type }} {{ document.size }} @@ -98,7 +180,7 @@ {% empty %} - 暂无文件,请先导入申报资料或法规原文。 + 暂无文件,请先导入申报资料或法规原文。 {% endfor %} diff --git a/templates/scenarios/index.html b/templates/scenarios/index.html index 84bf309..521e359 100644 --- a/templates/scenarios/index.html +++ b/templates/scenarios/index.html @@ -1,6 +1,6 @@ {% extends "base.html" %} -{% block title %}任务总览{% endblock %} +{% block title %}平台总览{% endblock %} {% block content %} @@ -71,7 +71,7 @@ {% endif %}

{% empty %} diff --git a/tests/test_chat.py b/tests/test_chat.py index 7bae4cc..52687db 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -2,24 +2,67 @@ from django.urls import reverse from agent_core.results import AgentResult from apps.audit.models import AgentAuditLog -from apps.documents.models import UploadedDocument +from apps.chat.models import Conversation +from apps.documents.models import SubmissionBatch, UploadedDocument -def test_chat_post_returns_agent_result_and_audit_log(client, db): +def _create_conversation_with_batch(): + batch = SubmissionBatch.objects.create( + batch_id="SUB-20260604-001", + product_name="新型冠状病毒 2019-nCoV 核酸检测试剂盒", + workflow_type="registration", + conversation_id="conv-001", + file_count=2, + page_count=12, + import_status="completed", + ) + conversation = Conversation.objects.create( + conversation_id="conv-001", + title="新型冠状病毒 2019-nCoV 核酸检测试剂盒", + product_name=batch.product_name, + batch_id=batch.batch_id, + task_status="processing", + node_results=[ + {"label": "资料包导入", "status": "已完成"}, + {"label": "目录汇总", "status": "处理中"}, + ], + ) + return batch, conversation + + +def test_chat_post_returns_agent_result_and_audit_log(client, db, monkeypatch): + batch, conversation = _create_conversation_with_batch() + UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", + original_name="说明书.md", + file_type="md", + size=1, + status=UploadedDocument.STATUS_INDEXED, + ) + + monkeypatch.setattr( + "apps.chat.views.run_agent", + lambda *args, **kwargs: AgentResult(answer="模拟回答", status="success"), + ) + response = client.post( - reverse("chat:index", args=["knowledge_qa"]), + reverse("chat:detail", args=[conversation.conversation_id]), {"message": "如何处理异常?"}, ) assert response.status_code == 200 content = response.content.decode("utf-8") - assert "mock-model" in content + assert "审核智能体" in content assert "模拟回答" in content assert AgentAuditLog.objects.count() == 1 + assert AgentAuditLog.objects.get().batch_id == batch.batch_id def test_chat_rejects_empty_message(client, db): - response = client.post(reverse("chat:index", args=["knowledge_qa"]), {"message": ""}) + _batch, conversation = _create_conversation_with_batch() + + response = client.post(reverse("chat:detail", args=[conversation.conversation_id]), {"message": ""}) assert response.status_code == 200 assert AgentAuditLog.objects.count() == 0 @@ -27,15 +70,18 @@ def test_chat_rejects_empty_message(client, db): def test_chat_passes_selected_document_ids_to_agent_core(client, db, monkeypatch): + batch, conversation = _create_conversation_with_batch() selected = UploadedDocument.objects.create( - scenario_id="knowledge_qa", + batch=batch, + scenario_id="document_review", original_name="selected.md", file_type="md", size=1, status=UploadedDocument.STATUS_INDEXED, ) - other = UploadedDocument.objects.create( - scenario_id="knowledge_qa", + UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", original_name="other.md", file_type="md", size=1, @@ -45,63 +91,38 @@ def test_chat_passes_selected_document_ids_to_agent_core(client, db, monkeypatch def fake_run_agent(scenario_config, user_input, options=None): captured["options"] = options or {} - from agent_core.results import AgentResult - return AgentResult(answer="ok", status="success") monkeypatch.setattr("apps.chat.views.run_agent", fake_run_agent) response = client.post( - reverse("chat:index", args=["knowledge_qa"]), + reverse("chat:detail", args=[conversation.conversation_id]), {"message": "只查选中文档", "document_ids": [str(selected.id)]}, ) assert response.status_code == 200 assert captured["options"]["document_ids"] == [selected.id] - assert other.id not in captured["options"]["document_ids"] + assert captured["options"]["conversation_id"] == conversation.conversation_id + assert captured["options"]["batch_id"] == batch.batch_id -def test_chat_renders_structured_output_references_and_tool_calls(client, db, monkeypatch): - def fake_run_agent(scenario_config, user_input, options=None): - return AgentResult( - answer="建议先隔离现场。", - structured_output={ - "output_type": "quality_report", - "summary": "发现异常批次需要立即处置。", - "risk_level": "high", - "suggested_actions": ["隔离现场", "通知负责人"], - }, - references=[ - { - "source": "sop.md", - "content": "异常处理 SOP:先隔离现场,再通知负责人。", - } - ], - tool_calls=[ - { - "tool_name": "query_demo_records", - "success": True, - "result": {"records": [{"title": "A线缺陷"}]}, - "error": "", - } - ], - model_name="mock-model", - status="success", - ) - - monkeypatch.setattr("apps.chat.views.run_agent", fake_run_agent) - - response = client.post( - reverse("chat:index", args=["quality_analysis"]), - {"message": "分析 A 线异常"}, +def test_chat_renders_three_column_workspace_and_node_results(client, db): + batch, conversation = _create_conversation_with_batch() + UploadedDocument.objects.create( + batch=batch, + scenario_id="document_review", + original_name="说明书.md", + file_type="md", + size=1, + status=UploadedDocument.STATUS_INDEXED, ) + response = client.get(reverse("chat:detail", args=[conversation.conversation_id])) + content = response.content.decode("utf-8") assert response.status_code == 200 - assert "结构化结果" in content - assert "发现异常批次需要立即处置" in content - assert "引用片段" in content - assert "sop.md" in content - assert "工具调用" in content - assert "query_demo_records" in content - assert "查看本次审计日志" in content + assert "会话历史" in content + assert "对话区与节点导航" in content + assert "上传区" in content + assert "资料包导入 / 已完成" in content + assert "目录汇总 / 处理中" in content diff --git a/tests/test_documents.py b/tests/test_documents.py index 5af8267..6f13b6b 100644 --- a/tests/test_documents.py +++ b/tests/test_documents.py @@ -2,8 +2,9 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from apps.documents.forms import DocumentUploadForm -from apps.documents.models import UploadedDocument -from apps.documents.services import extract_text, index_document +from apps.documents.models import SubmissionBatch, UploadedDocument +from apps.documents.services import extract_text, import_submission_batch, index_document +from apps.chat.models import Conversation def test_upload_txt_document_creates_uploaded_record(client, db): @@ -31,7 +32,7 @@ def test_upload_redirect_shows_success_message(client, db): ) assert response.status_code == 200 - assert "文件已上传,可继续执行入库" in response.content.decode("utf-8") + assert "资料包已导入,已绑定会话" in response.content.decode("utf-8") def test_upload_accepts_pdf_and_docx_documents(client, db): @@ -145,3 +146,74 @@ def test_index_document_marks_failed_when_extracted_text_is_empty(db, monkeypatc assert updated_document.status == UploadedDocument.STATUS_FAILED assert "文档内容为空" in updated_document.error_message + + +def test_upload_creates_submission_batch_and_bound_conversation(client, db): + file = SimpleUploadedFile( + "目标产品说明书.txt", + "产品名称:新型冠状病毒 2019-nCoV 核酸检测试剂盒".encode("utf-8"), + content_type="text/plain", + ) + + response = client.post( + reverse("documents:upload"), + {"scenario_id": "document_review", "file": file}, + ) + + assert response.status_code == 302 + batch = SubmissionBatch.objects.get() + conversation = Conversation.objects.get() + assert batch.product_name == "新型冠状病毒 2019-nCoV 核酸检测试剂盒" + assert batch.conversation_id == conversation.conversation_id + assert conversation.title == "新型冠状病毒 2019-nCoV 核酸检测试剂盒" + assert batch.file_count == 1 + + +def test_document_list_supports_product_name_search(client, db): + SubmissionBatch.objects.create( + batch_id="SUB-20260604-001", + product_name="新型冠状病毒 2019-nCoV 核酸检测试剂盒", + workflow_type="registration", + conversation_id="conv-001", + file_count=2, + page_count=12, + import_status="completed", + ) + SubmissionBatch.objects.create( + batch_id="SUB-20260604-002", + product_name="呼吸道合胞病毒核酸检测试剂盒", + workflow_type="registration", + conversation_id="conv-002", + file_count=3, + page_count=20, + import_status="completed", + ) + + response = client.get(reverse("documents:list"), {"keyword": "新型冠状病毒"}) + + content = response.content.decode("utf-8") + assert response.status_code == 200 + assert "新型冠状病毒 2019-nCoV 核酸检测试剂盒" in content + assert "呼吸道合胞病毒核酸检测试剂盒" not in content + + +def test_import_submission_batch_marks_manual_review_when_product_names_conflict(db): + files = [ + SimpleUploadedFile( + "注册申请表.txt", + "产品名称:产品A".encode("utf-8"), + content_type="text/plain", + ), + SimpleUploadedFile( + "目标产品说明书.txt", + "产品名称:产品B".encode("utf-8"), + content_type="text/plain", + ), + ] + + result = import_submission_batch("document_review", files) + + batch = SubmissionBatch.objects.get(batch_id=result["batch_id"]) + assert batch.import_status == "review_required" + assert result["registration_overview_report"]["warnings"] + assert "产品名称来源冲突" in result["registration_overview_report"]["warnings"][0]