feat(regulatory): 对齐附件4目录核查规则

This commit is contained in:
2026-06-07 09:27:42 +08:00
parent bbd2d3532a
commit 1bdc7322cf
15 changed files with 753 additions and 43 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import json
import logging
from pathlib import Path
from threading import Thread
@@ -26,6 +27,7 @@ from review_agent.regulatory_review.services.structure_check import run_structur
from review_agent.regulatory_review.services.text_extract import extract_text
from .events import record_event
from .storage import save_artifact
NODE_DEFINITIONS = [
@@ -105,6 +107,7 @@ class RegulatoryWorkflowExecutor:
self.rule_set: dict | None = None
self.findings = []
self.document_texts: dict[str, str] = {}
self.text_extract_status: dict[str, dict[str, object]] = {}
def run(self) -> None:
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -176,6 +179,13 @@ class RegulatoryWorkflowExecutor:
return
if node_code == "text_extract":
self.document_texts = self._extract_source_texts()
save_artifact(
self.batch,
name="text_extract_status.json",
artifact_type="json",
content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
metadata={"artifact": "text_extract_status"},
)
return
if node_code == "structure_check":
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
@@ -184,7 +194,29 @@ class RegulatoryWorkflowExecutor:
self.findings.extend(run_consistency_check(self.document_texts))
return
if node_code == "risk_assess":
persist_findings(self.batch, self.findings)
issues = persist_findings(self.batch, self.findings)
save_artifact(
self.batch,
name="rag_result_json.json",
artifact_type="json",
content=json.dumps(
{
"batch_no": self.batch.batch_no,
"text_extract_status": self.text_extract_status,
"issues": [
{
"rule_code": issue.rule_code,
"title": issue.title,
"citations": issue.citations,
}
for issue in issues
],
},
ensure_ascii=False,
indent=2,
),
metadata={"artifact": "rag_result_json"},
)
return
if node_code == "report_export":
exports = export_review_results(self.batch)
@@ -234,8 +266,25 @@ class RegulatoryWorkflowExecutor:
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists():
self.text_extract_status[item.file_name] = {
"status": "missing",
"path": str(path),
"content_hash": "",
"section_candidates": [],
"field_candidates": {},
"front_text": "",
}
continue
result = extract_text(path)
self.text_extract_status[item.file_name] = {
"status": result.status,
"path": str(path),
"content_hash": result.content_hash,
"section_candidates": result.section_candidates,
"field_candidates": result.field_candidates,
"front_text": result.front_text,
"error_message": result.error_message,
}
if result.status == "success" and result.text:
texts[item.file_name] = result.text
return texts