feat: 重构资料包模型与会话绑定主链路
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
# Generated by Django 5.2.14 on 2026-06-03 16:39
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("audit", "0002_demobusinessrecord"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="NotificationRecord",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("batch_id", models.CharField(db_index=True, max_length=64)),
|
||||
("conversation_id", models.CharField(db_index=True, max_length=64)),
|
||||
(
|
||||
"product_name",
|
||||
models.CharField(blank=True, db_index=True, max_length=255),
|
||||
),
|
||||
("trigger_source", models.CharField(blank=True, max_length=64)),
|
||||
("notify_reason", models.CharField(db_index=True, max_length=32)),
|
||||
("owner_role", models.CharField(blank=True, max_length=100)),
|
||||
("feishu_user_id", models.CharField(blank=True, max_length=100)),
|
||||
(
|
||||
"message_status",
|
||||
models.CharField(db_index=True, default="pending", max_length=32),
|
||||
),
|
||||
("web_detail_url", models.URLField(blank=True)),
|
||||
("receipt", models.JSONField(blank=True, default=dict)),
|
||||
("created_at", models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||
],
|
||||
options={
|
||||
"ordering": ["-created_at"],
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="agentauditlog",
|
||||
name="batch_id",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=64),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="agentauditlog",
|
||||
name="conversation_id",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=64),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="agentauditlog",
|
||||
name="product_name",
|
||||
field=models.CharField(blank=True, db_index=True, max_length=255),
|
||||
),
|
||||
]
|
||||
@@ -16,6 +16,9 @@ class AgentAuditLog(models.Model):
|
||||
|
||||
scenario_id = models.CharField(max_length=100, db_index=True)
|
||||
scenario_name = models.CharField(max_length=200, blank=True)
|
||||
batch_id = models.CharField(max_length=64, blank=True, db_index=True)
|
||||
conversation_id = models.CharField(max_length=64, blank=True, db_index=True)
|
||||
product_name = models.CharField(max_length=255, blank=True, db_index=True)
|
||||
user_input = models.TextField()
|
||||
retrieved_chunks = models.JSONField(default=list, blank=True)
|
||||
tool_calls = models.JSONField(default=list, blank=True)
|
||||
@@ -66,3 +69,33 @@ class DemoBusinessRecord(models.Model):
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.title
|
||||
|
||||
|
||||
class NotificationRecord(models.Model):
|
||||
"""
|
||||
飞书通知留痕。
|
||||
|
||||
首版只保存离线通知载荷与结果状态,不直接依赖真实飞书网络。
|
||||
"""
|
||||
|
||||
STATUS_PENDING = "pending"
|
||||
STATUS_SENT = "sent"
|
||||
STATUS_FAILED = "failed"
|
||||
|
||||
batch_id = models.CharField(max_length=64, db_index=True)
|
||||
conversation_id = models.CharField(max_length=64, db_index=True)
|
||||
product_name = models.CharField(max_length=255, blank=True, db_index=True)
|
||||
trigger_source = models.CharField(max_length=64, blank=True)
|
||||
notify_reason = models.CharField(max_length=32, db_index=True)
|
||||
owner_role = models.CharField(max_length=100, blank=True)
|
||||
feishu_user_id = models.CharField(max_length=100, blank=True)
|
||||
message_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True)
|
||||
web_detail_url = models.URLField(blank=True)
|
||||
receipt = models.JSONField(default=dict, blank=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-created_at"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.notify_reason}:{self.batch_id}"
|
||||
|
||||
@@ -8,6 +8,9 @@ def create_audit_log(
|
||||
scenario_name: str,
|
||||
user_input: str,
|
||||
agent_result: AgentResult,
|
||||
batch_id: str = "",
|
||||
conversation_id: str = "",
|
||||
product_name: str = "",
|
||||
) -> AgentAuditLog:
|
||||
"""
|
||||
将一次 Agent 执行结果落库为审计日志。
|
||||
@@ -20,6 +23,9 @@ def create_audit_log(
|
||||
return AgentAuditLog.objects.create(
|
||||
scenario_id=scenario_id,
|
||||
scenario_name=scenario_name,
|
||||
batch_id=batch_id,
|
||||
conversation_id=conversation_id,
|
||||
product_name=product_name,
|
||||
user_input=user_input,
|
||||
retrieved_chunks=agent_result.references,
|
||||
tool_calls=agent_result.tool_calls,
|
||||
|
||||
52
apps/chat/migrations/0001_initial.py
Normal file
52
apps/chat/migrations/0001_initial.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# Generated by Django 5.2.14 on 2026-06-03 16:39
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
initial = True
|
||||
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="Conversation",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"conversation_id",
|
||||
models.CharField(db_index=True, max_length=64, unique=True),
|
||||
),
|
||||
("title", models.CharField(max_length=255)),
|
||||
(
|
||||
"product_name",
|
||||
models.CharField(blank=True, db_index=True, max_length=255),
|
||||
),
|
||||
(
|
||||
"batch_id",
|
||||
models.CharField(blank=True, db_index=True, max_length=64),
|
||||
),
|
||||
(
|
||||
"task_status",
|
||||
models.CharField(db_index=True, default="pending", max_length=32),
|
||||
),
|
||||
("node_results", models.JSONField(blank=True, default=list)),
|
||||
("latest_summary", models.JSONField(blank=True, default=dict)),
|
||||
("created_at", models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
("last_run_at", models.DateTimeField(blank=True, null=True)),
|
||||
],
|
||||
options={
|
||||
"ordering": ["-updated_at", "-created_at"],
|
||||
},
|
||||
),
|
||||
]
|
||||
34
apps/chat/models.py
Normal file
34
apps/chat/models.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Conversation(models.Model):
|
||||
"""
|
||||
审核智能体会话主对象。
|
||||
|
||||
会话与资料包一一绑定,标题默认使用解析出的产品名称,
|
||||
节点结果使用 JSON 挂载,便于页面按节点展示。
|
||||
"""
|
||||
|
||||
STATUS_PENDING = "pending"
|
||||
STATUS_PROCESSING = "processing"
|
||||
STATUS_COMPLETED = "completed"
|
||||
STATUS_REVIEW_REQUIRED = "review_required"
|
||||
STATUS_BLOCKED = "blocked"
|
||||
STATUS_FAILED = "failed"
|
||||
|
||||
conversation_id = models.CharField(max_length=64, unique=True, db_index=True)
|
||||
title = models.CharField(max_length=255)
|
||||
product_name = models.CharField(max_length=255, blank=True, db_index=True)
|
||||
batch_id = models.CharField(max_length=64, blank=True, db_index=True)
|
||||
task_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True)
|
||||
node_results = models.JSONField(default=list, blank=True)
|
||||
latest_summary = models.JSONField(default=dict, blank=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
last_run_at = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-updated_at", "-created_at"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.title
|
||||
26
apps/chat/services.py
Normal file
26
apps/chat/services.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from .models import Conversation
|
||||
|
||||
|
||||
def create_conversation_for_batch(batch_id: str, product_name: str) -> Conversation:
|
||||
"""
|
||||
为资料包创建主会话。
|
||||
|
||||
会话标题固定优先使用解析出的产品名称,
|
||||
缺失时回退到批次号,确保前台始终有稳定标题。
|
||||
"""
|
||||
conversation = Conversation.objects.create(
|
||||
conversation_id=_generate_conversation_id(),
|
||||
title=product_name or f"未命名资料包-{batch_id}",
|
||||
product_name=product_name,
|
||||
batch_id=batch_id,
|
||||
task_status=Conversation.STATUS_PENDING,
|
||||
node_results=[
|
||||
{"code": "package_import", "label": "资料包导入", "status": "已完成"},
|
||||
{"code": "overview", "label": "目录汇总", "status": "处理中"},
|
||||
],
|
||||
)
|
||||
return conversation
|
||||
|
||||
|
||||
def _generate_conversation_id() -> str:
|
||||
return f"conv-{Conversation.objects.count() + 1:03d}"
|
||||
@@ -5,7 +5,8 @@ from . import views
|
||||
|
||||
app_name = "chat"
|
||||
|
||||
# 当前 V1 仅保留一个场景对话入口,场景详情合并在对话页中展示。
|
||||
# 审核智能体前台以会话为中心。
|
||||
urlpatterns = [
|
||||
path("<str:scenario_id>/", views.index, name="index"),
|
||||
path("", views.index, name="index"),
|
||||
path("<str:conversation_id>/", views.detail, name="detail"),
|
||||
]
|
||||
|
||||
@@ -1,38 +1,43 @@
|
||||
from django.shortcuts import render
|
||||
from django.shortcuts import get_object_or_404, redirect, render
|
||||
|
||||
from agent_core.orchestrator import run_agent
|
||||
from agent_core.results import AgentResult
|
||||
from apps.audit.services import create_audit_log
|
||||
from apps.documents.models import UploadedDocument
|
||||
from apps.scenarios.services import ScenarioNotFound, get_scenario
|
||||
from apps.documents.models import SubmissionBatch, UploadedDocument
|
||||
from apps.scenarios.services import get_scenario
|
||||
|
||||
from .forms import ChatForm
|
||||
from .models import Conversation
|
||||
|
||||
|
||||
def index(request, scenario_id: str):
|
||||
# View 只负责请求编排、表单校验和模板渲染。
|
||||
# 具体 Agent 执行、审计写入和文档筛选规则分别交给独立模块处理。
|
||||
try:
|
||||
scenario = get_scenario(scenario_id)
|
||||
except ScenarioNotFound:
|
||||
return render(
|
||||
request,
|
||||
"chat/index.html",
|
||||
{
|
||||
"scenario": None,
|
||||
"form": ChatForm(),
|
||||
"error": "场景不存在,请返回首页检查配置。",
|
||||
},
|
||||
status=404,
|
||||
)
|
||||
def index(request):
|
||||
conversations = Conversation.objects.all()
|
||||
if conversations.exists():
|
||||
return redirect("chat:detail", conversation_id=conversations.first().conversation_id)
|
||||
return render(
|
||||
request,
|
||||
"chat/index.html",
|
||||
{
|
||||
"conversation": None,
|
||||
"conversations": [],
|
||||
"form": ChatForm(),
|
||||
"documents": [],
|
||||
"result": None,
|
||||
"audit_log": None,
|
||||
"node_results": [],
|
||||
"active_node": None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def detail(request, conversation_id: str):
|
||||
conversation = get_object_or_404(Conversation, conversation_id=conversation_id)
|
||||
batch = SubmissionBatch.objects.filter(batch_id=conversation.batch_id).first()
|
||||
documents = UploadedDocument.objects.filter(batch=batch)
|
||||
form = ChatForm(request.POST or None, documents=documents)
|
||||
result = None
|
||||
audit_log = None
|
||||
documents = UploadedDocument.objects.filter(
|
||||
scenario_id=scenario["id"],
|
||||
status=UploadedDocument.STATUS_INDEXED,
|
||||
)
|
||||
form = ChatForm(request.POST or None, documents=documents)
|
||||
active_node = None
|
||||
task_modes = [
|
||||
{"name": "目录汇总", "description": "汇总文件、页数、章节点和目录型文档。"},
|
||||
{"name": "完整性检查", "description": "对照法规模板检查齐套性、缺失项和错放项。"},
|
||||
@@ -41,28 +46,46 @@ def index(request, scenario_id: str):
|
||||
{"name": "综合风险报告", "description": "形成高优先级问题、建议动作和责任人通知。"},
|
||||
]
|
||||
if request.method == "POST" and form.is_valid():
|
||||
scenario = get_scenario("document_review")
|
||||
message = form.cleaned_data["message"]
|
||||
try:
|
||||
# 只把必要的运行选项传给 Agent Core,避免在 View 中散落模型细节。
|
||||
result = run_agent(
|
||||
scenario,
|
||||
message,
|
||||
options={"document_ids": form.cleaned_data["document_ids"]},
|
||||
options={
|
||||
"conversation_id": conversation.conversation_id,
|
||||
"batch_id": conversation.batch_id,
|
||||
"product_name": conversation.product_name,
|
||||
"document_ids": form.cleaned_data["document_ids"],
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
result = AgentResult(status="failed", error=str(exc), answer="")
|
||||
audit_log = create_audit_log(scenario["id"], scenario["name"], message, result)
|
||||
audit_log = create_audit_log(
|
||||
"document_review",
|
||||
"注册审核智能体",
|
||||
message,
|
||||
result,
|
||||
batch_id=conversation.batch_id,
|
||||
conversation_id=conversation.conversation_id,
|
||||
product_name=conversation.product_name,
|
||||
)
|
||||
active_node = "risk"
|
||||
|
||||
return render(
|
||||
request,
|
||||
"chat/index.html",
|
||||
{
|
||||
"scenario": scenario,
|
||||
"conversation": conversation,
|
||||
"conversations": Conversation.objects.all(),
|
||||
"batch": batch,
|
||||
"form": form,
|
||||
"documents": documents,
|
||||
"document_count": documents.count(),
|
||||
"result": result,
|
||||
"audit_log": audit_log,
|
||||
"task_modes": task_modes,
|
||||
"node_results": conversation.node_results,
|
||||
"active_node": active_node,
|
||||
},
|
||||
)
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
# Generated by Django 5.2.14 on 2026-06-03 16:39
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("documents", "0001_initial"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="SubmissionBatch",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"batch_id",
|
||||
models.CharField(db_index=True, max_length=64, unique=True),
|
||||
),
|
||||
(
|
||||
"product_name",
|
||||
models.CharField(blank=True, db_index=True, max_length=255),
|
||||
),
|
||||
(
|
||||
"workflow_type",
|
||||
models.CharField(default="registration", max_length=64),
|
||||
),
|
||||
(
|
||||
"conversation_id",
|
||||
models.CharField(blank=True, db_index=True, max_length=64),
|
||||
),
|
||||
("file_count", models.PositiveIntegerField(default=0)),
|
||||
("page_count", models.PositiveIntegerField(default=0)),
|
||||
("chapter_summary", models.JSONField(blank=True, default=list)),
|
||||
(
|
||||
"import_status",
|
||||
models.CharField(db_index=True, default="pending", max_length=32),
|
||||
),
|
||||
("exception_count", models.PositiveIntegerField(default=0)),
|
||||
("created_at", models.DateTimeField(auto_now_add=True, db_index=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
],
|
||||
options={
|
||||
"ordering": ["-created_at"],
|
||||
},
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="chapter_code",
|
||||
field=models.CharField(blank=True, max_length=32),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="chapter_match_status",
|
||||
field=models.CharField(blank=True, max_length=32),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="document_role",
|
||||
field=models.CharField(blank=True, max_length=64),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="needs_manual_review",
|
||||
field=models.BooleanField(default=False),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="page_count",
|
||||
field=models.PositiveIntegerField(default=0),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="page_count_confidence",
|
||||
field=models.CharField(blank=True, max_length=32),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="relative_path",
|
||||
field=models.CharField(blank=True, max_length=500),
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="uploadeddocument",
|
||||
name="batch",
|
||||
field=models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="documents",
|
||||
to="documents.submissionbatch",
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -1,6 +1,48 @@
|
||||
from django.db import models
|
||||
|
||||
|
||||
class SubmissionBatch(models.Model):
|
||||
"""
|
||||
资料包主对象,承接导入、会话绑定和目录汇总结果。
|
||||
|
||||
Documents 模块负责维护资料包与文件的关系,
|
||||
不在模型层耦合 Agent 执行细节。
|
||||
"""
|
||||
|
||||
STATUS_PENDING = "pending"
|
||||
STATUS_PROCESSING = "processing"
|
||||
STATUS_COMPLETED = "completed"
|
||||
STATUS_REVIEW_REQUIRED = "review_required"
|
||||
STATUS_FAILED = "failed"
|
||||
|
||||
batch_id = models.CharField(max_length=64, unique=True, db_index=True)
|
||||
product_name = models.CharField(max_length=255, blank=True, db_index=True)
|
||||
workflow_type = models.CharField(max_length=64, default="registration")
|
||||
conversation_id = models.CharField(max_length=64, blank=True, db_index=True)
|
||||
file_count = models.PositiveIntegerField(default=0)
|
||||
page_count = models.PositiveIntegerField(default=0)
|
||||
chapter_summary = models.JSONField(default=list, blank=True)
|
||||
import_status = models.CharField(max_length=32, default=STATUS_PENDING, db_index=True)
|
||||
exception_count = models.PositiveIntegerField(default=0)
|
||||
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ["-created_at"]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.product_name or self.batch_id
|
||||
|
||||
def get_import_status_display_text(self) -> str:
|
||||
return {
|
||||
self.STATUS_PENDING: "待导入",
|
||||
self.STATUS_PROCESSING: "处理中",
|
||||
self.STATUS_COMPLETED: "已完成",
|
||||
self.STATUS_REVIEW_REQUIRED: "待复核",
|
||||
self.STATUS_FAILED: "失败",
|
||||
}.get(self.import_status, self.import_status)
|
||||
|
||||
|
||||
class UploadedDocument(models.Model):
|
||||
"""
|
||||
保存用户上传文档的元数据和入库状态。
|
||||
@@ -13,11 +55,25 @@ class UploadedDocument(models.Model):
|
||||
STATUS_INDEXED = "indexed"
|
||||
STATUS_FAILED = "failed"
|
||||
|
||||
batch = models.ForeignKey(
|
||||
SubmissionBatch,
|
||||
related_name="documents",
|
||||
null=True,
|
||||
blank=True,
|
||||
on_delete=models.CASCADE,
|
||||
)
|
||||
scenario_id = models.CharField(max_length=100, db_index=True)
|
||||
original_name = models.CharField(max_length=255)
|
||||
file = models.FileField(upload_to="documents/%Y%m%d/")
|
||||
file_type = models.CharField(max_length=20)
|
||||
size = models.PositiveIntegerField(default=0)
|
||||
relative_path = models.CharField(max_length=500, blank=True)
|
||||
chapter_code = models.CharField(max_length=32, blank=True)
|
||||
document_role = models.CharField(max_length=64, blank=True)
|
||||
page_count = models.PositiveIntegerField(default=0)
|
||||
page_count_confidence = models.CharField(max_length=32, blank=True)
|
||||
chapter_match_status = models.CharField(max_length=32, blank=True)
|
||||
needs_manual_review = models.BooleanField(default=False)
|
||||
status = models.CharField(max_length=20, default=STATUS_UPLOADED, db_index=True)
|
||||
error_message = models.TextField(blank=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
@@ -4,11 +4,12 @@ import xml.etree.ElementTree as ET
|
||||
from zipfile import BadZipFile, ZipFile
|
||||
|
||||
from agent_core.rag.ingest import ingest_document
|
||||
from apps.chat.services import create_conversation_for_batch
|
||||
|
||||
from .models import UploadedDocument
|
||||
from .models import SubmissionBatch, UploadedDocument
|
||||
|
||||
|
||||
def create_uploaded_document(scenario_id: str, uploaded_file) -> UploadedDocument:
|
||||
def create_uploaded_document(scenario_id: str, uploaded_file, batch: SubmissionBatch | None = None) -> UploadedDocument:
|
||||
"""
|
||||
保存上传文件的元数据记录。
|
||||
|
||||
@@ -17,15 +18,116 @@ def create_uploaded_document(scenario_id: str, uploaded_file) -> UploadedDocumen
|
||||
"""
|
||||
extension = _detect_extension(uploaded_file.name)
|
||||
return UploadedDocument.objects.create(
|
||||
batch=batch,
|
||||
scenario_id=scenario_id,
|
||||
original_name=uploaded_file.name,
|
||||
file=uploaded_file,
|
||||
file_type=extension,
|
||||
size=uploaded_file.size,
|
||||
relative_path=uploaded_file.name,
|
||||
status=UploadedDocument.STATUS_UPLOADED,
|
||||
)
|
||||
|
||||
|
||||
def import_submission_batch(scenario_id: str, uploaded_files: list) -> dict:
|
||||
"""
|
||||
导入资料包并建立批次、文档、目录汇总和主会话。
|
||||
|
||||
当前实现保持离线稳定,重点保证:
|
||||
- 资料包记录可落库
|
||||
- 产品名称可解析
|
||||
- 会话可自动绑定
|
||||
- 可直接产出 overview report
|
||||
"""
|
||||
batch = SubmissionBatch.objects.create(
|
||||
batch_id=_generate_batch_id(),
|
||||
workflow_type="registration",
|
||||
import_status=SubmissionBatch.STATUS_PROCESSING,
|
||||
)
|
||||
documents = []
|
||||
candidates = []
|
||||
chapter_summary = {}
|
||||
total_pages = 0
|
||||
|
||||
for uploaded_file in uploaded_files:
|
||||
document = create_uploaded_document(scenario_id, uploaded_file, batch=batch)
|
||||
text = extract_text(document)
|
||||
page_count = _estimate_page_count(text)
|
||||
document.page_count = page_count
|
||||
document.page_count_confidence = "estimated"
|
||||
document.document_role = _detect_document_role(document.original_name)
|
||||
document.chapter_code = _detect_chapter_code(document.original_name, text)
|
||||
document.chapter_match_status = "matched" if document.chapter_code else "unknown"
|
||||
document.needs_manual_review = not bool(document.chapter_code)
|
||||
document.save(
|
||||
update_fields=[
|
||||
"page_count",
|
||||
"page_count_confidence",
|
||||
"document_role",
|
||||
"chapter_code",
|
||||
"chapter_match_status",
|
||||
"needs_manual_review",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
documents.append(document)
|
||||
total_pages += page_count
|
||||
chapter_key = document.chapter_code or "UNCLASSIFIED"
|
||||
chapter_summary[chapter_key] = chapter_summary.get(chapter_key, 0) + 1
|
||||
candidates.extend(_extract_product_candidates(document.original_name, text))
|
||||
|
||||
product_name, warnings = _select_product_name(candidates)
|
||||
conversation = create_conversation_for_batch(batch.batch_id, product_name)
|
||||
|
||||
batch.product_name = product_name
|
||||
batch.conversation_id = conversation.conversation_id
|
||||
batch.file_count = len(documents)
|
||||
batch.page_count = total_pages
|
||||
batch.chapter_summary = [
|
||||
{"chapter_code": chapter_code, "document_count": count}
|
||||
for chapter_code, count in sorted(chapter_summary.items())
|
||||
]
|
||||
batch.exception_count = len(warnings)
|
||||
batch.import_status = (
|
||||
SubmissionBatch.STATUS_REVIEW_REQUIRED if warnings else SubmissionBatch.STATUS_COMPLETED
|
||||
)
|
||||
batch.save(
|
||||
update_fields=[
|
||||
"product_name",
|
||||
"conversation_id",
|
||||
"file_count",
|
||||
"page_count",
|
||||
"chapter_summary",
|
||||
"exception_count",
|
||||
"import_status",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
return {
|
||||
"batch_id": batch.batch_id,
|
||||
"conversation_id": conversation.conversation_id,
|
||||
"product_name": batch.product_name,
|
||||
"registration_overview_report": {
|
||||
"batch_id": batch.batch_id,
|
||||
"product_name": batch.product_name,
|
||||
"file_count": batch.file_count,
|
||||
"total_page_count": batch.page_count,
|
||||
"chapter_summary": batch.chapter_summary,
|
||||
"documents": [
|
||||
{
|
||||
"document_id": document.id,
|
||||
"original_name": document.original_name,
|
||||
"chapter_code": document.chapter_code,
|
||||
"page_count": document.page_count,
|
||||
"document_role": document.document_role,
|
||||
}
|
||||
for document in documents
|
||||
],
|
||||
"warnings": warnings,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def extract_text(document: UploadedDocument) -> str:
|
||||
"""
|
||||
根据文档类型选择合适的文本抽取策略。
|
||||
@@ -83,6 +185,99 @@ def _detect_extension(file_name: str) -> str:
|
||||
return Path(file_name).suffix.lower().lstrip(".")
|
||||
|
||||
|
||||
def _generate_batch_id() -> str:
|
||||
return f"SUB-20260604-{SubmissionBatch.objects.count() + 1:03d}"
|
||||
|
||||
|
||||
def _estimate_page_count(text: str) -> int:
|
||||
stripped = text.strip()
|
||||
if not stripped:
|
||||
return 0
|
||||
line_count = len([line for line in stripped.splitlines() if line.strip()])
|
||||
return max(1, line_count)
|
||||
|
||||
|
||||
def _detect_document_role(file_name: str) -> str:
|
||||
normalized = file_name.lower()
|
||||
if "申请表" in file_name:
|
||||
return "application_form"
|
||||
if "说明书" in file_name:
|
||||
return "product_manual"
|
||||
if "产品列表" in file_name:
|
||||
return "product_list"
|
||||
if "声明" in file_name:
|
||||
return "declaration"
|
||||
if normalized.endswith(".pdf"):
|
||||
return "pdf_document"
|
||||
return "general_document"
|
||||
|
||||
|
||||
def _detect_chapter_code(file_name: str, text: str) -> str:
|
||||
for source in (file_name, text):
|
||||
match = re.search(r"(CH\d+(?:\.\d+)*)", source, flags=re.IGNORECASE)
|
||||
if match:
|
||||
return match.group(1).upper()
|
||||
if "监管" in file_name or "申请表" in file_name or "说明书" in file_name:
|
||||
return "CH1"
|
||||
return ""
|
||||
|
||||
|
||||
def _extract_product_candidates(file_name: str, text: str) -> list[dict]:
|
||||
source_type = _detect_candidate_source(file_name)
|
||||
if not source_type:
|
||||
return []
|
||||
patterns = [
|
||||
r"产品名称[::]\s*([^\n\r]+)",
|
||||
r"名称[::]\s*([^\n\r]+检测试剂盒[^\n\r]*)",
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
return [{"source_type": source_type, "product_name": match.group(1).strip()}]
|
||||
cleaned = Path(file_name).stem.replace("目标产品", "").replace("说明书", "").strip("-_ ")
|
||||
if cleaned and "申请表" not in cleaned and "产品列表" not in cleaned:
|
||||
return [{"source_type": source_type, "product_name": cleaned}]
|
||||
return []
|
||||
|
||||
|
||||
def _detect_candidate_source(file_name: str) -> str:
|
||||
if "申请表" in file_name:
|
||||
return "application_form"
|
||||
if "说明书" in file_name:
|
||||
return "product_manual"
|
||||
if "产品列表" in file_name:
|
||||
return "product_list"
|
||||
return ""
|
||||
|
||||
|
||||
def _select_product_name(candidates: list[dict]) -> tuple[str, list[str]]:
|
||||
if not candidates:
|
||||
return "", ["未识别到产品名称,建议人工补录。"]
|
||||
|
||||
priority = {
|
||||
"application_form": 1,
|
||||
"product_manual": 2,
|
||||
"product_list": 3,
|
||||
}
|
||||
sorted_candidates = sorted(
|
||||
candidates,
|
||||
key=lambda item: priority.get(item["source_type"], 99),
|
||||
)
|
||||
top_candidate = sorted_candidates[0]
|
||||
warnings = []
|
||||
conflict_names = {
|
||||
item["product_name"]
|
||||
for item in sorted_candidates
|
||||
if item["product_name"] != top_candidate["product_name"]
|
||||
}
|
||||
if conflict_names:
|
||||
warnings.append(
|
||||
"产品名称来源冲突:"
|
||||
+ " / ".join([top_candidate["product_name"], *sorted(conflict_names)])
|
||||
)
|
||||
return top_candidate["product_name"], warnings
|
||||
|
||||
|
||||
def _read_text_file(path: Path) -> str:
|
||||
"""优先按 UTF-8 读取;失败时回退到系统默认编码。"""
|
||||
try:
|
||||
|
||||
@@ -5,18 +5,24 @@ from django.views.decorators.http import require_POST
|
||||
from apps.scenarios.services import list_scenarios
|
||||
|
||||
from .forms import DocumentUploadForm
|
||||
from .models import UploadedDocument
|
||||
from .services import create_uploaded_document, index_document
|
||||
from .models import SubmissionBatch, UploadedDocument
|
||||
from .services import import_submission_batch, index_document
|
||||
|
||||
|
||||
def document_list(request):
|
||||
# 列表页只负责展示文档元数据和可执行操作,不处理入库细节。
|
||||
# 资料包页展示批次、会话绑定和关键异常,同时保留文档级明细便于演示。
|
||||
keyword = (request.GET.get("keyword") or "").strip()
|
||||
batches = SubmissionBatch.objects.all()
|
||||
if keyword:
|
||||
batches = batches.filter(product_name__icontains=keyword)
|
||||
documents = UploadedDocument.objects.all()
|
||||
status_counts = {
|
||||
"uploaded": documents.filter(status=UploadedDocument.STATUS_UPLOADED).count(),
|
||||
"indexed": documents.filter(status=UploadedDocument.STATUS_INDEXED).count(),
|
||||
"failed": documents.filter(status=UploadedDocument.STATUS_FAILED).count(),
|
||||
"total": documents.count(),
|
||||
"pending": batches.filter(import_status=SubmissionBatch.STATUS_PENDING).count(),
|
||||
"completed": batches.filter(import_status=SubmissionBatch.STATUS_COMPLETED).count(),
|
||||
"review_required": batches.filter(
|
||||
import_status=SubmissionBatch.STATUS_REVIEW_REQUIRED
|
||||
).count(),
|
||||
"total": batches.count(),
|
||||
}
|
||||
processing_pipeline = [
|
||||
{"title": "原始文件接收", "detail": "校验格式、大小和场景归属后保存原件。"},
|
||||
@@ -35,6 +41,8 @@ def document_list(request):
|
||||
"documents/document_list.html",
|
||||
{
|
||||
"documents": documents,
|
||||
"batches": batches,
|
||||
"keyword": keyword,
|
||||
"status_counts": status_counts,
|
||||
"processing_pipeline": processing_pipeline,
|
||||
"exception_items": exception_items,
|
||||
@@ -43,12 +51,18 @@ def document_list(request):
|
||||
|
||||
|
||||
def upload(request):
|
||||
# 上传成功后仅保存文件和元数据,是否入库由用户显式触发。
|
||||
# 上传成功后直接创建资料包并绑定主会话。
|
||||
if request.method == "POST":
|
||||
form = DocumentUploadForm(request.POST, request.FILES)
|
||||
if form.is_valid():
|
||||
create_uploaded_document(form.cleaned_data["scenario_id"], form.cleaned_data["file"])
|
||||
messages.success(request, "文件已上传,可继续执行入库。")
|
||||
result = import_submission_batch(
|
||||
form.cleaned_data["scenario_id"],
|
||||
[form.cleaned_data["file"]],
|
||||
)
|
||||
messages.success(
|
||||
request,
|
||||
f"资料包已导入,已绑定会话 {result['conversation_id']}。",
|
||||
)
|
||||
return redirect("documents:list")
|
||||
else:
|
||||
form = DocumentUploadForm()
|
||||
|
||||
Reference in New Issue
Block a user