feat(regulatory): 对齐附件4目录核查规则
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
@@ -26,6 +27,7 @@ from review_agent.regulatory_review.services.structure_check import run_structur
|
||||
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
from .events import record_event
|
||||
from .storage import save_artifact
|
||||
|
||||
|
||||
NODE_DEFINITIONS = [
|
||||
@@ -105,6 +107,7 @@ class RegulatoryWorkflowExecutor:
|
||||
self.rule_set: dict | None = None
|
||||
self.findings = []
|
||||
self.document_texts: dict[str, str] = {}
|
||||
self.text_extract_status: dict[str, dict[str, object]] = {}
|
||||
|
||||
def run(self) -> None:
|
||||
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
|
||||
@@ -176,6 +179,13 @@ class RegulatoryWorkflowExecutor:
|
||||
return
|
||||
if node_code == "text_extract":
|
||||
self.document_texts = self._extract_source_texts()
|
||||
save_artifact(
|
||||
self.batch,
|
||||
name="text_extract_status.json",
|
||||
artifact_type="json",
|
||||
content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
|
||||
metadata={"artifact": "text_extract_status"},
|
||||
)
|
||||
return
|
||||
if node_code == "structure_check":
|
||||
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
|
||||
@@ -184,7 +194,29 @@ class RegulatoryWorkflowExecutor:
|
||||
self.findings.extend(run_consistency_check(self.document_texts))
|
||||
return
|
||||
if node_code == "risk_assess":
|
||||
persist_findings(self.batch, self.findings)
|
||||
issues = persist_findings(self.batch, self.findings)
|
||||
save_artifact(
|
||||
self.batch,
|
||||
name="rag_result_json.json",
|
||||
artifact_type="json",
|
||||
content=json.dumps(
|
||||
{
|
||||
"batch_no": self.batch.batch_no,
|
||||
"text_extract_status": self.text_extract_status,
|
||||
"issues": [
|
||||
{
|
||||
"rule_code": issue.rule_code,
|
||||
"title": issue.title,
|
||||
"citations": issue.citations,
|
||||
}
|
||||
for issue in issues
|
||||
],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
metadata={"artifact": "rag_result_json"},
|
||||
)
|
||||
return
|
||||
if node_code == "report_export":
|
||||
exports = export_review_results(self.batch)
|
||||
@@ -234,8 +266,25 @@ class RegulatoryWorkflowExecutor:
|
||||
if not path.is_absolute():
|
||||
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||
if not path.exists():
|
||||
self.text_extract_status[item.file_name] = {
|
||||
"status": "missing",
|
||||
"path": str(path),
|
||||
"content_hash": "",
|
||||
"section_candidates": [],
|
||||
"field_candidates": {},
|
||||
"front_text": "",
|
||||
}
|
||||
continue
|
||||
result = extract_text(path)
|
||||
self.text_extract_status[item.file_name] = {
|
||||
"status": result.status,
|
||||
"path": str(path),
|
||||
"content_hash": result.content_hash,
|
||||
"section_candidates": result.section_candidates,
|
||||
"field_candidates": result.field_candidates,
|
||||
"front_text": result.front_text,
|
||||
"error_message": result.error_message,
|
||||
}
|
||||
if result.status == "success" and result.text:
|
||||
texts[item.file_name] = result.text
|
||||
return texts
|
||||
|
||||
Reference in New Issue
Block a user