From 4c28466fe40348e1c12883289ab8b8ccd0a7e634 Mon Sep 17 00:00:00 2001 From: bruce Date: Sun, 7 Jun 2026 00:39:33 +0800 Subject: [PATCH] =?UTF-8?q?feat(regulatory):=20=E5=A2=9E=E5=8A=A0=E9=A3=8E?= =?UTF-8?q?=E9=99=A9=E5=BD=92=E5=B9=B6=E4=B8=8E=E6=A0=B8=E6=9F=A5=E6=8A=A5?= =?UTF-8?q?=E5=91=8A=E5=AF=BC=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../regulatory_review/services/export.py | 167 ++++++++++++++++++ .../regulatory_review/services/risk_assess.py | 50 ++++++ review_agent/regulatory_review/workflow.py | 57 ++++++ tests/test_regulatory_export.py | 49 +++++ tests/test_regulatory_risk_assess.py | 35 ++++ tests/test_regulatory_workflow.py | 43 +++++ 6 files changed, 401 insertions(+) create mode 100644 review_agent/regulatory_review/services/export.py create mode 100644 review_agent/regulatory_review/services/risk_assess.py create mode 100644 tests/test_regulatory_export.py create mode 100644 tests/test_regulatory_risk_assess.py diff --git a/review_agent/regulatory_review/services/export.py b/review_agent/regulatory_review/services/export.py new file mode 100644 index 0000000..b29a591 --- /dev/null +++ b/review_agent/regulatory_review/services/export.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +import json +from pathlib import Path + +from django.conf import settings +from openpyxl import Workbook + +from review_agent.models import ExportedSummaryFile, RegulatoryIssue, RegulatoryReviewBatch + + +SEVERITY_LABELS = { + "blocking": "阻断项", + "high": "高风险", + "medium": "中风险", + "low": "低风险", + "info": "提示", +} + + +def export_review_results(batch: RegulatoryReviewBatch) -> list[ExportedSummaryFile]: + root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no + export_dir = root / "exports" + export_dir.mkdir(parents=True, exist_ok=True) + + markdown = _create_export( + batch, + export_dir / f"{batch.batch_no}-regulatory-review.md", + ExportedSummaryFile.ExportType.MARKDOWN, + "markdown_report", + build_markdown_report(batch), + ) + excel = _create_excel_export(batch, export_dir / f"{batch.batch_no}-regulatory-issues.xlsx") + result_json = _create_export( + batch, + export_dir / f"{batch.batch_no}-regulatory-result.json", + ExportedSummaryFile.ExportType.JSON, + "result_package", + json.dumps(build_result_payload(batch), ensure_ascii=False, indent=2), + ) + return [markdown, excel, result_json] + + +def build_markdown_report(batch: RegulatoryReviewBatch) -> str: + lines = [ + "# NMPA 注册资料法规核查报告", + "", + f"批次号:{batch.batch_no}", + "", + "## 风险汇总", + "", + "| 风险等级 | 数量 |", + "| --- | --- |", + ] + summary = batch.risk_summary or {} + for severity, label in SEVERITY_LABELS.items(): + lines.append(f"| {label} | {summary.get(severity, 0)} |") + lines.extend(["", "## 问题清单", "", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"]) + for issue in batch.issues.order_by("id"): + lines.append( + f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |" + ) + return "\n".join(lines) + + +def build_result_payload(batch: RegulatoryReviewBatch) -> dict[str, object]: + return { + "batch_no": batch.batch_no, + "source_summary_batch": batch.source_summary_batch.batch_no, + "risk_summary": batch.risk_summary, + "issues": [ + { + "severity": issue.severity, + "category": issue.category, + "rule_code": issue.rule_code, + "title": issue.title, + "detail": issue.detail, + "suggestion": issue.suggestion, + "status": issue.status, + "evidence": issue.evidence, + "citations": issue.citations, + } + for issue in batch.issues.order_by("id") + ], + } + + +def build_assistant_summary(batch: RegulatoryReviewBatch, exports: list[ExportedSummaryFile]) -> str: + export_by_type = {export.export_type: export for export in exports} + lines = [ + "已完成 NMPA 注册资料法规核查。", + "", + "| 风险等级 | 数量 |", + "| --- | --- |", + ] + summary = batch.risk_summary or {} + for severity, label in SEVERITY_LABELS.items(): + if summary.get(severity, 0): + lines.append(f"| {label} | {summary[severity]} |") + lines.extend(["", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"]) + for issue in batch.issues.order_by("id")[:8]: + lines.append( + f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |" + ) + lines.extend( + [ + "", + _download_link("下载 Markdown 核查报告", export_by_type.get(ExportedSummaryFile.ExportType.MARKDOWN)), + _download_link("下载 Excel 缺失清单", export_by_type.get(ExportedSummaryFile.ExportType.EXCEL)), + _download_link("下载 JSON 结果包", export_by_type.get(ExportedSummaryFile.ExportType.JSON)), + ] + ) + return "\n".join(line for line in lines if line is not None) + + +def _download_link(label: str, exported: ExportedSummaryFile | None) -> str | None: + if not exported: + return None + return f"[{label}](/api/review-agent/file-summary/exports/{exported.pk}/download/)" + + +def _create_export( + batch: RegulatoryReviewBatch, + path: Path, + export_type: str, + category: str, + content: str, +) -> ExportedSummaryFile: + path.write_text(content, encoding="utf-8") + return ExportedSummaryFile.objects.create( + batch=batch.source_summary_batch, + workflow_type="regulatory_review", + workflow_batch_id=batch.pk, + export_category=category, + export_type=export_type, + file_name=path.name, + storage_path=str(path), + ) + + +def _create_excel_export(batch: RegulatoryReviewBatch, path: Path) -> ExportedSummaryFile: + workbook = Workbook() + sheet = workbook.active + sheet.title = "法规问题清单" + sheet.append(["等级", "类别", "规则", "问题", "状态", "建议", "法规依据"]) + for issue in batch.issues.order_by("id"): + sheet.append( + [ + SEVERITY_LABELS.get(issue.severity, issue.severity), + issue.category, + issue.rule_code, + issue.title, + issue.status, + issue.suggestion, + "; ".join(str(item.get("source", "")) for item in issue.citations), + ] + ) + workbook.save(path) + return ExportedSummaryFile.objects.create( + batch=batch.source_summary_batch, + workflow_type="regulatory_review", + workflow_batch_id=batch.pk, + export_category="issue_checklist", + export_type=ExportedSummaryFile.ExportType.EXCEL, + file_name=path.name, + storage_path=str(path), + ) diff --git a/review_agent/regulatory_review/services/risk_assess.py b/review_agent/regulatory_review/services/risk_assess.py new file mode 100644 index 0000000..5f342d7 --- /dev/null +++ b/review_agent/regulatory_review/services/risk_assess.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from collections import Counter + +from review_agent.models import RegulatoryIssue, RegulatoryReviewBatch +from review_agent.regulatory_review.schemas import Finding + +from .rag_citation import retrieve_citations + + +SEVERITY_ORDER = ["blocking", "high", "medium", "low", "info"] + + +def persist_findings(batch: RegulatoryReviewBatch, findings: list[Finding]) -> list[RegulatoryIssue]: + RegulatoryIssue.objects.filter(batch=batch).delete() + unique = {} + for finding in findings: + unique.setdefault((finding.rule_code, finding.category, finding.title), finding) + + issues = [] + for finding in unique.values(): + citations = finding.citations or _safe_citations(finding) + issues.append( + RegulatoryIssue.objects.create( + batch=batch, + rule_code=finding.rule_code, + category=finding.category, + severity=finding.severity, + title=finding.title, + detail=finding.detail, + suggestion=finding.suggestion, + evidence=finding.evidence, + citations=citations, + ) + ) + batch.risk_summary = _risk_summary(issues) + batch.save(update_fields=["risk_summary"]) + return issues + + +def _safe_citations(finding: Finding) -> list[dict[str, object]]: + try: + return retrieve_citations(finding.title) + except Exception: + return [{"source": "原文依据待补充", "text": "RAG 索引不可用或无命中", "score": None}] + + +def _risk_summary(issues: list[RegulatoryIssue]) -> dict[str, int]: + counts = Counter(issue.severity for issue in issues) + return {severity: counts.get(severity, 0) for severity in SEVERITY_ORDER} diff --git a/review_agent/regulatory_review/workflow.py b/review_agent/regulatory_review/workflow.py index fc0b2e6..602da66 100644 --- a/review_agent/regulatory_review/workflow.py +++ b/review_agent/regulatory_review/workflow.py @@ -16,6 +16,13 @@ from review_agent.models import ( RegulatoryReviewBatch, WorkflowNodeRun, ) +from review_agent.regulatory_review.services.completeness_check import run_completeness_check +from review_agent.regulatory_review.services.consistency_check import run_consistency_check +from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results +from review_agent.regulatory_review.services.risk_assess import persist_findings +from review_agent.regulatory_review.services.rule_loader import load_rule_file +from review_agent.regulatory_review.services.structure_check import run_structure_check +from review_agent.regulatory_review.services.text_extract import extract_text from .events import record_event @@ -89,6 +96,9 @@ def create_regulatory_review_batch( class RegulatoryWorkflowExecutor: def __init__(self, batch: RegulatoryReviewBatch): self.batch = batch + self.rule_set: dict | None = None + self.findings = [] + self.document_texts: dict[str, str] = {} def run(self) -> None: self.batch.status = RegulatoryReviewBatch.Status.RUNNING @@ -131,6 +141,8 @@ class RegulatoryWorkflowExecutor: {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message}, ) + self._execute_node(node.node_code) + node.status = WorkflowNodeRun.Status.SUCCESS node.progress = 100 node.finished_at = timezone.now() @@ -142,6 +154,51 @@ class RegulatoryWorkflowExecutor: {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message}, ) + def _execute_node(self, node_code: str) -> None: + if node_code == "rule_scope": + self.rule_set = load_rule_file() + return + if node_code == "completeness_check": + self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules())) + return + if node_code == "text_extract": + self.document_texts = self._extract_source_texts() + return + if node_code == "structure_check": + self.findings.extend(run_structure_check(self.document_texts, self._rules())) + return + if node_code == "consistency_check": + self.findings.extend(run_consistency_check(self.document_texts)) + return + if node_code == "risk_assess": + persist_findings(self.batch, self.findings) + return + if node_code == "report_export": + exports = export_review_results(self.batch) + Message.objects.create( + conversation=self.batch.conversation, + role=Message.Role.ASSISTANT, + content=build_assistant_summary(self.batch, exports), + ) + + def _rules(self) -> dict: + if self.rule_set is None: + self.rule_set = load_rule_file() + return self.rule_set + + def _extract_source_texts(self) -> dict[str, str]: + texts = {} + for item in self.batch.source_summary_batch.items.order_by("file_index"): + path = Path(item.storage_path) + if not path.is_absolute(): + path = Path(settings.MEDIA_ROOT) / item.storage_path + if not path.exists(): + continue + result = extract_text(path) + if result.status == "success" and result.text: + texts[item.file_name] = result.text + return texts + def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None: executor = RegulatoryWorkflowExecutor(batch) diff --git a/tests/test_regulatory_export.py b/tests/test_regulatory_export.py new file mode 100644 index 0000000..fbe8870 --- /dev/null +++ b/tests/test_regulatory_export.py @@ -0,0 +1,49 @@ +import json + +import pytest + +from review_agent.models import ( + Conversation, + ExportedSummaryFile, + FileSummaryBatch, + RegulatoryIssue, + RegulatoryReviewBatch, +) +from review_agent.regulatory_review.services.export import export_review_results + + +pytestmark = pytest.mark.django_db + + +def test_export_review_results_creates_markdown_excel_and_json(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK") + batch = RegulatoryReviewBatch.objects.create( + conversation=conversation, + user=user, + source_summary_batch=summary, + batch_no="RR-EXPORT", + risk_summary={"blocking": 1}, + ) + RegulatoryIssue.objects.create( + batch=batch, + rule_code="registration_test_report", + category=RegulatoryIssue.Category.COMPLETENESS, + severity=RegulatoryIssue.Severity.BLOCKING, + title="缺少注册检验报告", + suggestion="请补充注册检验报告并复核。", + ) + + exports = export_review_results(batch) + + assert {export.export_type for export in exports} == { + ExportedSummaryFile.ExportType.MARKDOWN, + ExportedSummaryFile.ExportType.EXCEL, + ExportedSummaryFile.ExportType.JSON, + } + json_export = next(export for export in exports if export.export_type == ExportedSummaryFile.ExportType.JSON) + payload = json.loads(open(json_export.storage_path, encoding="utf-8").read()) + assert payload["batch_no"] == "RR-EXPORT" + assert payload["issues"][0]["title"] == "缺少注册检验报告" diff --git a/tests/test_regulatory_risk_assess.py b/tests/test_regulatory_risk_assess.py new file mode 100644 index 0000000..7a5f1e9 --- /dev/null +++ b/tests/test_regulatory_risk_assess.py @@ -0,0 +1,35 @@ +import pytest + +from review_agent.models import Conversation, FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch +from review_agent.regulatory_review.schemas import Finding +from review_agent.regulatory_review.services.risk_assess import persist_findings + + +pytestmark = pytest.mark.django_db + + +def test_persist_findings_deduplicates_and_updates_risk_summary(django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK") + batch = RegulatoryReviewBatch.objects.create( + conversation=conversation, + user=user, + source_summary_batch=summary, + batch_no="RR-RISK", + ) + finding = Finding( + rule_code="registration_test_report", + category="completeness", + severity="blocking", + title="缺少注册检验报告", + suggestion="请补充注册检验报告并复核。", + citations=[{"source": "法规.doc", "text": "注册检验报告"}], + ) + + issues = persist_findings(batch, [finding, finding]) + + batch.refresh_from_db() + assert len(issues) == 1 + assert RegulatoryIssue.objects.count() == 1 + assert batch.risk_summary["blocking"] == 1 diff --git a/tests/test_regulatory_workflow.py b/tests/test_regulatory_workflow.py index 3d1b0ca..71a0114 100644 --- a/tests/test_regulatory_workflow.py +++ b/tests/test_regulatory_workflow.py @@ -2,8 +2,11 @@ import pytest from review_agent.models import ( Conversation, + ExportedSummaryFile, FileSummaryBatch, + FileSummaryItem, Message, + RegulatoryIssue, RegulatoryReviewBatch, WorkflowEvent, WorkflowNodeRun, @@ -155,3 +158,43 @@ def test_stream_message_starts_regulatory_workflow(monkeypatch, settings, django assert "workflow_started" in joined assert "\"workflow_type\": \"regulatory_review\"" in joined assert RegulatoryReviewBatch.objects.filter(conversation=conversation).exists() + + +def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + settings.REGULATORY_REVIEW_ASYNC = False + settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag" + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + summary = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-OK", + status=FileSummaryBatch.Status.SUCCESS, + ) + ifu_path = tmp_path / "ifu.txt" + ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n样本要求:血清\n有效期:12个月", encoding="utf-8") + FileSummaryItem.objects.create( + batch=summary, + file_index=1, + file_name="说明书.txt", + file_type="txt", + relative_path="说明书.txt", + storage_path=str(ifu_path), + ) + batch = create_regulatory_review_batch( + conversation=conversation, + user=user, + source_summary_batch=summary, + ) + + start_regulatory_review_workflow(batch, async_run=False) + + batch.refresh_from_db() + assert batch.status == RegulatoryReviewBatch.Status.SUCCESS + assert RegulatoryIssue.objects.filter(batch=batch, severity="blocking").exists() + assert ExportedSummaryFile.objects.filter( + workflow_type="regulatory_review", + workflow_batch_id=batch.pk, + ).count() == 3 + assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()