feat(regulatory): 增加风险归并与核查报告导出

This commit is contained in:
2026-06-07 00:39:33 +08:00
parent ec89e62661
commit 4c28466fe4
6 changed files with 401 additions and 0 deletions

View File

@@ -0,0 +1,167 @@
from __future__ import annotations
import json
from pathlib import Path
from django.conf import settings
from openpyxl import Workbook
from review_agent.models import ExportedSummaryFile, RegulatoryIssue, RegulatoryReviewBatch
SEVERITY_LABELS = {
"blocking": "阻断项",
"high": "高风险",
"medium": "中风险",
"low": "低风险",
"info": "提示",
}
def export_review_results(batch: RegulatoryReviewBatch) -> list[ExportedSummaryFile]:
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
markdown = _create_export(
batch,
export_dir / f"{batch.batch_no}-regulatory-review.md",
ExportedSummaryFile.ExportType.MARKDOWN,
"markdown_report",
build_markdown_report(batch),
)
excel = _create_excel_export(batch, export_dir / f"{batch.batch_no}-regulatory-issues.xlsx")
result_json = _create_export(
batch,
export_dir / f"{batch.batch_no}-regulatory-result.json",
ExportedSummaryFile.ExportType.JSON,
"result_package",
json.dumps(build_result_payload(batch), ensure_ascii=False, indent=2),
)
return [markdown, excel, result_json]
def build_markdown_report(batch: RegulatoryReviewBatch) -> str:
lines = [
"# NMPA 注册资料法规核查报告",
"",
f"批次号:{batch.batch_no}",
"",
"## 风险汇总",
"",
"| 风险等级 | 数量 |",
"| --- | --- |",
]
summary = batch.risk_summary or {}
for severity, label in SEVERITY_LABELS.items():
lines.append(f"| {label} | {summary.get(severity, 0)} |")
lines.extend(["", "## 问题清单", "", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
for issue in batch.issues.order_by("id"):
lines.append(
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
)
return "\n".join(lines)
def build_result_payload(batch: RegulatoryReviewBatch) -> dict[str, object]:
return {
"batch_no": batch.batch_no,
"source_summary_batch": batch.source_summary_batch.batch_no,
"risk_summary": batch.risk_summary,
"issues": [
{
"severity": issue.severity,
"category": issue.category,
"rule_code": issue.rule_code,
"title": issue.title,
"detail": issue.detail,
"suggestion": issue.suggestion,
"status": issue.status,
"evidence": issue.evidence,
"citations": issue.citations,
}
for issue in batch.issues.order_by("id")
],
}
def build_assistant_summary(batch: RegulatoryReviewBatch, exports: list[ExportedSummaryFile]) -> str:
export_by_type = {export.export_type: export for export in exports}
lines = [
"已完成 NMPA 注册资料法规核查。",
"",
"| 风险等级 | 数量 |",
"| --- | --- |",
]
summary = batch.risk_summary or {}
for severity, label in SEVERITY_LABELS.items():
if summary.get(severity, 0):
lines.append(f"| {label} | {summary[severity]} |")
lines.extend(["", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
for issue in batch.issues.order_by("id")[:8]:
lines.append(
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
)
lines.extend(
[
"",
_download_link("下载 Markdown 核查报告", export_by_type.get(ExportedSummaryFile.ExportType.MARKDOWN)),
_download_link("下载 Excel 缺失清单", export_by_type.get(ExportedSummaryFile.ExportType.EXCEL)),
_download_link("下载 JSON 结果包", export_by_type.get(ExportedSummaryFile.ExportType.JSON)),
]
)
return "\n".join(line for line in lines if line is not None)
def _download_link(label: str, exported: ExportedSummaryFile | None) -> str | None:
if not exported:
return None
return f"[{label}](/api/review-agent/file-summary/exports/{exported.pk}/download/)"
def _create_export(
batch: RegulatoryReviewBatch,
path: Path,
export_type: str,
category: str,
content: str,
) -> ExportedSummaryFile:
path.write_text(content, encoding="utf-8")
return ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
export_category=category,
export_type=export_type,
file_name=path.name,
storage_path=str(path),
)
def _create_excel_export(batch: RegulatoryReviewBatch, path: Path) -> ExportedSummaryFile:
workbook = Workbook()
sheet = workbook.active
sheet.title = "法规问题清单"
sheet.append(["等级", "类别", "规则", "问题", "状态", "建议", "法规依据"])
for issue in batch.issues.order_by("id"):
sheet.append(
[
SEVERITY_LABELS.get(issue.severity, issue.severity),
issue.category,
issue.rule_code,
issue.title,
issue.status,
issue.suggestion,
"; ".join(str(item.get("source", "")) for item in issue.citations),
]
)
workbook.save(path)
return ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
export_category="issue_checklist",
export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=path.name,
storage_path=str(path),
)

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from collections import Counter
from review_agent.models import RegulatoryIssue, RegulatoryReviewBatch
from review_agent.regulatory_review.schemas import Finding
from .rag_citation import retrieve_citations
SEVERITY_ORDER = ["blocking", "high", "medium", "low", "info"]
def persist_findings(batch: RegulatoryReviewBatch, findings: list[Finding]) -> list[RegulatoryIssue]:
RegulatoryIssue.objects.filter(batch=batch).delete()
unique = {}
for finding in findings:
unique.setdefault((finding.rule_code, finding.category, finding.title), finding)
issues = []
for finding in unique.values():
citations = finding.citations or _safe_citations(finding)
issues.append(
RegulatoryIssue.objects.create(
batch=batch,
rule_code=finding.rule_code,
category=finding.category,
severity=finding.severity,
title=finding.title,
detail=finding.detail,
suggestion=finding.suggestion,
evidence=finding.evidence,
citations=citations,
)
)
batch.risk_summary = _risk_summary(issues)
batch.save(update_fields=["risk_summary"])
return issues
def _safe_citations(finding: Finding) -> list[dict[str, object]]:
try:
return retrieve_citations(finding.title)
except Exception:
return [{"source": "原文依据待补充", "text": "RAG 索引不可用或无命中", "score": None}]
def _risk_summary(issues: list[RegulatoryIssue]) -> dict[str, int]:
counts = Counter(issue.severity for issue in issues)
return {severity: counts.get(severity, 0) for severity in SEVERITY_ORDER}

View File

@@ -16,6 +16,13 @@ from review_agent.models import (
RegulatoryReviewBatch,
WorkflowNodeRun,
)
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
from review_agent.regulatory_review.services.consistency_check import run_consistency_check
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
from review_agent.regulatory_review.services.risk_assess import persist_findings
from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.services.structure_check import run_structure_check
from review_agent.regulatory_review.services.text_extract import extract_text
from .events import record_event
@@ -89,6 +96,9 @@ def create_regulatory_review_batch(
class RegulatoryWorkflowExecutor:
def __init__(self, batch: RegulatoryReviewBatch):
self.batch = batch
self.rule_set: dict | None = None
self.findings = []
self.document_texts: dict[str, str] = {}
def run(self) -> None:
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -131,6 +141,8 @@ class RegulatoryWorkflowExecutor:
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
self._execute_node(node.node_code)
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
@@ -142,6 +154,51 @@ class RegulatoryWorkflowExecutor:
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
def _execute_node(self, node_code: str) -> None:
if node_code == "rule_scope":
self.rule_set = load_rule_file()
return
if node_code == "completeness_check":
self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules()))
return
if node_code == "text_extract":
self.document_texts = self._extract_source_texts()
return
if node_code == "structure_check":
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
return
if node_code == "consistency_check":
self.findings.extend(run_consistency_check(self.document_texts))
return
if node_code == "risk_assess":
persist_findings(self.batch, self.findings)
return
if node_code == "report_export":
exports = export_review_results(self.batch)
Message.objects.create(
conversation=self.batch.conversation,
role=Message.Role.ASSISTANT,
content=build_assistant_summary(self.batch, exports),
)
def _rules(self) -> dict:
if self.rule_set is None:
self.rule_set = load_rule_file()
return self.rule_set
def _extract_source_texts(self) -> dict[str, str]:
texts = {}
for item in self.batch.source_summary_batch.items.order_by("file_index"):
path = Path(item.storage_path)
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists():
continue
result = extract_text(path)
if result.status == "success" and result.text:
texts[item.file_name] = result.text
return texts
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
executor = RegulatoryWorkflowExecutor(batch)

View File

@@ -0,0 +1,49 @@
import json
import pytest
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileSummaryBatch,
RegulatoryIssue,
RegulatoryReviewBatch,
)
from review_agent.regulatory_review.services.export import export_review_results
pytestmark = pytest.mark.django_db
def test_export_review_results_creates_markdown_excel_and_json(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-EXPORT",
risk_summary={"blocking": 1},
)
RegulatoryIssue.objects.create(
batch=batch,
rule_code="registration_test_report",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.BLOCKING,
title="缺少注册检验报告",
suggestion="请补充注册检验报告并复核。",
)
exports = export_review_results(batch)
assert {export.export_type for export in exports} == {
ExportedSummaryFile.ExportType.MARKDOWN,
ExportedSummaryFile.ExportType.EXCEL,
ExportedSummaryFile.ExportType.JSON,
}
json_export = next(export for export in exports if export.export_type == ExportedSummaryFile.ExportType.JSON)
payload = json.loads(open(json_export.storage_path, encoding="utf-8").read())
assert payload["batch_no"] == "RR-EXPORT"
assert payload["issues"][0]["title"] == "缺少注册检验报告"

View File

@@ -0,0 +1,35 @@
import pytest
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch
from review_agent.regulatory_review.schemas import Finding
from review_agent.regulatory_review.services.risk_assess import persist_findings
pytestmark = pytest.mark.django_db
def test_persist_findings_deduplicates_and_updates_risk_summary(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-RISK",
)
finding = Finding(
rule_code="registration_test_report",
category="completeness",
severity="blocking",
title="缺少注册检验报告",
suggestion="请补充注册检验报告并复核。",
citations=[{"source": "法规.doc", "text": "注册检验报告"}],
)
issues = persist_findings(batch, [finding, finding])
batch.refresh_from_db()
assert len(issues) == 1
assert RegulatoryIssue.objects.count() == 1
assert batch.risk_summary["blocking"] == 1

View File

@@ -2,8 +2,11 @@ import pytest
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileSummaryBatch,
FileSummaryItem,
Message,
RegulatoryIssue,
RegulatoryReviewBatch,
WorkflowEvent,
WorkflowNodeRun,
@@ -155,3 +158,43 @@ def test_stream_message_starts_regulatory_workflow(monkeypatch, settings, django
assert "workflow_started" in joined
assert "\"workflow_type\": \"regulatory_review\"" in joined
assert RegulatoryReviewBatch.objects.filter(conversation=conversation).exists()
def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
ifu_path = tmp_path / "ifu.txt"
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n样本要求:血清\n有效期12个月", encoding="utf-8")
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
file_name="说明书.txt",
file_type="txt",
relative_path="说明书.txt",
storage_path=str(ifu_path),
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
start_regulatory_review_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
assert RegulatoryIssue.objects.filter(batch=batch, severity="blocking").exists()
assert ExportedSummaryFile.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).count() == 3
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()