feat(regulatory): 增加风险归并与核查报告导出

2026-06-07 00:39:33 +08:00
parent ec89e62661
commit 4c28466fe4
6 changed files with 401 additions and 0 deletions
--- a/review_agent/regulatory_review/services/export.py
+++ b/review_agent/regulatory_review/services/export.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+from django.conf import settings
+from openpyxl import Workbook
+
+from review_agent.models import ExportedSummaryFile, RegulatoryIssue, RegulatoryReviewBatch
+
+
+SEVERITY_LABELS = {
+    "blocking": "阻断项",
+    "high": "高风险",
+    "medium": "中风险",
+    "low": "低风险",
+    "info": "提示",
+}
+
+
+def export_review_results(batch: RegulatoryReviewBatch) -> list[ExportedSummaryFile]:
+    root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
+    export_dir = root / "exports"
+    export_dir.mkdir(parents=True, exist_ok=True)
+
+    markdown = _create_export(
+        batch,
+        export_dir / f"{batch.batch_no}-regulatory-review.md",
+        ExportedSummaryFile.ExportType.MARKDOWN,
+        "markdown_report",
+        build_markdown_report(batch),
+    )
+    excel = _create_excel_export(batch, export_dir / f"{batch.batch_no}-regulatory-issues.xlsx")
+    result_json = _create_export(
+        batch,
+        export_dir / f"{batch.batch_no}-regulatory-result.json",
+        ExportedSummaryFile.ExportType.JSON,
+        "result_package",
+        json.dumps(build_result_payload(batch), ensure_ascii=False, indent=2),
+    )
+    return [markdown, excel, result_json]
+
+
+def build_markdown_report(batch: RegulatoryReviewBatch) -> str:
+    lines = [
+        "# NMPA 注册资料法规核查报告",
+        "",
+        f"批次号：{batch.batch_no}",
+        "",
+        "## 风险汇总",
+        "",
+        "| 风险等级 | 数量 |",
+        "| --- | --- |",
+    ]
+    summary = batch.risk_summary or {}
+    for severity, label in SEVERITY_LABELS.items():
+        lines.append(f"| {label} | {summary.get(severity, 0)} |")
+    lines.extend(["", "## 问题清单", "", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
+    for issue in batch.issues.order_by("id"):
+        lines.append(
+            f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
+        )
+    return "\n".join(lines)
+
+
+def build_result_payload(batch: RegulatoryReviewBatch) -> dict[str, object]:
+    return {
+        "batch_no": batch.batch_no,
+        "source_summary_batch": batch.source_summary_batch.batch_no,
+        "risk_summary": batch.risk_summary,
+        "issues": [
+            {
+                "severity": issue.severity,
+                "category": issue.category,
+                "rule_code": issue.rule_code,
+                "title": issue.title,
+                "detail": issue.detail,
+                "suggestion": issue.suggestion,
+                "status": issue.status,
+                "evidence": issue.evidence,
+                "citations": issue.citations,
+            }
+            for issue in batch.issues.order_by("id")
+        ],
+    }
+
+
+def build_assistant_summary(batch: RegulatoryReviewBatch, exports: list[ExportedSummaryFile]) -> str:
+    export_by_type = {export.export_type: export for export in exports}
+    lines = [
+        "已完成 NMPA 注册资料法规核查。",
+        "",
+        "| 风险等级 | 数量 |",
+        "| --- | --- |",
+    ]
+    summary = batch.risk_summary or {}
+    for severity, label in SEVERITY_LABELS.items():
+        if summary.get(severity, 0):
+            lines.append(f"| {label} | {summary[severity]} |")
+    lines.extend(["", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
+    for issue in batch.issues.order_by("id")[:8]:
+        lines.append(
+            f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
+        )
+    lines.extend(
+        [
+            "",
+            _download_link("下载 Markdown 核查报告", export_by_type.get(ExportedSummaryFile.ExportType.MARKDOWN)),
+            _download_link("下载 Excel 缺失清单", export_by_type.get(ExportedSummaryFile.ExportType.EXCEL)),
+            _download_link("下载 JSON 结果包", export_by_type.get(ExportedSummaryFile.ExportType.JSON)),
+        ]
+    )
+    return "\n".join(line for line in lines if line is not None)
+
+
+def _download_link(label: str, exported: ExportedSummaryFile | None) -> str | None:
+    if not exported:
+        return None
+    return f"[{label}](/api/review-agent/file-summary/exports/{exported.pk}/download/)"
+
+
+def _create_export(
+    batch: RegulatoryReviewBatch,
+    path: Path,
+    export_type: str,
+    category: str,
+    content: str,
+) -> ExportedSummaryFile:
+    path.write_text(content, encoding="utf-8")
+    return ExportedSummaryFile.objects.create(
+        batch=batch.source_summary_batch,
+        workflow_type="regulatory_review",
+        workflow_batch_id=batch.pk,
+        export_category=category,
+        export_type=export_type,
+        file_name=path.name,
+        storage_path=str(path),
+    )
+
+
+def _create_excel_export(batch: RegulatoryReviewBatch, path: Path) -> ExportedSummaryFile:
+    workbook = Workbook()
+    sheet = workbook.active
+    sheet.title = "法规问题清单"
+    sheet.append(["等级", "类别", "规则", "问题", "状态", "建议", "法规依据"])
+    for issue in batch.issues.order_by("id"):
+        sheet.append(
+            [
+                SEVERITY_LABELS.get(issue.severity, issue.severity),
+                issue.category,
+                issue.rule_code,
+                issue.title,
+                issue.status,
+                issue.suggestion,
+                "; ".join(str(item.get("source", "")) for item in issue.citations),
+            ]
+        )
+    workbook.save(path)
+    return ExportedSummaryFile.objects.create(
+        batch=batch.source_summary_batch,
+        workflow_type="regulatory_review",
+        workflow_batch_id=batch.pk,
+        export_category="issue_checklist",
+        export_type=ExportedSummaryFile.ExportType.EXCEL,
+        file_name=path.name,
+        storage_path=str(path),
+    )
--- a/review_agent/regulatory_review/services/risk_assess.py
+++ b/review_agent/regulatory_review/services/risk_assess.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from collections import Counter
+
+from review_agent.models import RegulatoryIssue, RegulatoryReviewBatch
+from review_agent.regulatory_review.schemas import Finding
+
+from .rag_citation import retrieve_citations
+
+
+SEVERITY_ORDER = ["blocking", "high", "medium", "low", "info"]
+
+
+def persist_findings(batch: RegulatoryReviewBatch, findings: list[Finding]) -> list[RegulatoryIssue]:
+    RegulatoryIssue.objects.filter(batch=batch).delete()
+    unique = {}
+    for finding in findings:
+        unique.setdefault((finding.rule_code, finding.category, finding.title), finding)
+
+    issues = []
+    for finding in unique.values():
+        citations = finding.citations or _safe_citations(finding)
+        issues.append(
+            RegulatoryIssue.objects.create(
+                batch=batch,
+                rule_code=finding.rule_code,
+                category=finding.category,
+                severity=finding.severity,
+                title=finding.title,
+                detail=finding.detail,
+                suggestion=finding.suggestion,
+                evidence=finding.evidence,
+                citations=citations,
+            )
+        )
+    batch.risk_summary = _risk_summary(issues)
+    batch.save(update_fields=["risk_summary"])
+    return issues
+
+
+def _safe_citations(finding: Finding) -> list[dict[str, object]]:
+    try:
+        return retrieve_citations(finding.title)
+    except Exception:
+        return [{"source": "原文依据待补充", "text": "RAG 索引不可用或无命中", "score": None}]
+
+
+def _risk_summary(issues: list[RegulatoryIssue]) -> dict[str, int]:
+    counts = Counter(issue.severity for issue in issues)
+    return {severity: counts.get(severity, 0) for severity in SEVERITY_ORDER}
--- a/review_agent/regulatory_review/workflow.py
+++ b/review_agent/regulatory_review/workflow.py
@@ -16,6 +16,13 @@ from review_agent.models import (
    RegulatoryReviewBatch,
    WorkflowNodeRun,
 )
+from review_agent.regulatory_review.services.completeness_check import run_completeness_check
+from review_agent.regulatory_review.services.consistency_check import run_consistency_check
+from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
+from review_agent.regulatory_review.services.risk_assess import persist_findings
+from review_agent.regulatory_review.services.rule_loader import load_rule_file
+from review_agent.regulatory_review.services.structure_check import run_structure_check
+from review_agent.regulatory_review.services.text_extract import extract_text

 from .events import record_event

@@ -89,6 +96,9 @@ def create_regulatory_review_batch(
 class RegulatoryWorkflowExecutor:
    def __init__(self, batch: RegulatoryReviewBatch):
        self.batch = batch
+        self.rule_set: dict | None = None
+        self.findings = []
+        self.document_texts: dict[str, str] = {}

    def run(self) -> None:
        self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -131,6 +141,8 @@ class RegulatoryWorkflowExecutor:
            {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
        )

+        self._execute_node(node.node_code)
+
        node.status = WorkflowNodeRun.Status.SUCCESS
        node.progress = 100
        node.finished_at = timezone.now()
@@ -142,6 +154,51 @@ class RegulatoryWorkflowExecutor:
            {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
        )

+    def _execute_node(self, node_code: str) -> None:
+        if node_code == "rule_scope":
+            self.rule_set = load_rule_file()
+            return
+        if node_code == "completeness_check":
+            self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules()))
+            return
+        if node_code == "text_extract":
+            self.document_texts = self._extract_source_texts()
+            return
+        if node_code == "structure_check":
+            self.findings.extend(run_structure_check(self.document_texts, self._rules()))
+            return
+        if node_code == "consistency_check":
+            self.findings.extend(run_consistency_check(self.document_texts))
+            return
+        if node_code == "risk_assess":
+            persist_findings(self.batch, self.findings)
+            return
+        if node_code == "report_export":
+            exports = export_review_results(self.batch)
+            Message.objects.create(
+                conversation=self.batch.conversation,
+                role=Message.Role.ASSISTANT,
+                content=build_assistant_summary(self.batch, exports),
+            )
+
+    def _rules(self) -> dict:
+        if self.rule_set is None:
+            self.rule_set = load_rule_file()
+        return self.rule_set
+
+    def _extract_source_texts(self) -> dict[str, str]:
+        texts = {}
+        for item in self.batch.source_summary_batch.items.order_by("file_index"):
+            path = Path(item.storage_path)
+            if not path.is_absolute():
+                path = Path(settings.MEDIA_ROOT) / item.storage_path
+            if not path.exists():
+                continue
+            result = extract_text(path)
+            if result.status == "success" and result.text:
+                texts[item.file_name] = result.text
+        return texts
+

 def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
    executor = RegulatoryWorkflowExecutor(batch)