feat(regulatory): 为核查流程增加LLM复核记录

This commit is contained in:
2026-06-07 11:52:54 +08:00
parent 945669b9c2
commit 8f16675a92
4 changed files with 115 additions and 4 deletions

View File

@@ -1,10 +1,13 @@
from __future__ import annotations
import json
import os
import re
from collections.abc import Callable
from typing import Any
from django.conf import settings
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
@@ -22,6 +25,16 @@ def review_condition_fields(
llm_fields: dict[str, str] = {}
status = "skipped"
error_message = ""
if not _should_call_llm(completion_func):
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
return {
"status": status,
"error_message": error_message,
"rule_fields": _clean_fields(rule_fields),
"llm_fields": llm_fields,
"selected_fields": selected_fields,
"selected_sources": selected_sources,
}
try:
raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0)
payload = _parse_json_object(raw)
@@ -48,6 +61,13 @@ def review_workflow_payload(
payload: dict[str, Any],
completion_func: Callable[..., str] | None = None,
) -> dict[str, Any]:
if not _should_call_llm(completion_func):
return {
"status": "skipped",
"stage": stage,
"result": {},
"error_message": "",
}
try:
raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0)
parsed = _parse_json_object(raw)
@@ -122,6 +142,14 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
return parsed
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
if completion_func is not None:
return True
if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
return False
return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
clean = {}
for label in FIELD_LABELS:

View File

@@ -23,6 +23,7 @@ from review_agent.regulatory_review.services.consistency_check import run_consis
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
from review_agent.regulatory_review.services.risk_assess import persist_findings
from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.services.structure_check import run_structure_check
@@ -121,6 +122,7 @@ class RegulatoryWorkflowExecutor:
self.findings = []
self.document_texts: dict[str, str] = {}
self.text_extract_status: dict[str, dict[str, object]] = {}
self.llm_reviews: dict[str, dict[str, object]] = {}
def run(self) -> None:
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -188,10 +190,19 @@ class RegulatoryWorkflowExecutor:
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
return
if node_code == "completeness_check":
self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules()))
findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
self.findings.extend(findings)
self._save_llm_review(
"completeness_check",
{
"findings": [finding.to_dict() for finding in findings],
"rules_count": len(self._rules().get("requirements", [])),
},
)
return
if node_code == "text_extract":
self.document_texts = self._extract_source_texts()
self._save_llm_review("text_extract", {"files": self.text_extract_status})
save_artifact(
self.batch,
name="text_extract_status.json",
@@ -201,12 +212,17 @@ class RegulatoryWorkflowExecutor:
)
return
if node_code == "structure_check":
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
findings = run_structure_check(self.document_texts, self._rules())
self.findings.extend(findings)
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "consistency_check":
self.findings.extend(run_consistency_check(self.document_texts))
findings = run_consistency_check(self.document_texts)
self.findings.extend(findings)
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "risk_assess":
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
issues = persist_findings(self.batch, self.findings)
create_mock_notifications(self.batch)
save_artifact(
@@ -225,6 +241,7 @@ class RegulatoryWorkflowExecutor:
}
for issue in issues
],
"llm_reviews": self.llm_reviews,
},
ensure_ascii=False,
indent=2,
@@ -290,12 +307,18 @@ class RegulatoryWorkflowExecutor:
}
continue
result = extract_text(path)
field_review = review_condition_fields(
text=result.front_text or result.text,
rule_fields=result.field_candidates or {},
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
)
self.text_extract_status[item.file_name] = {
"status": result.status,
"path": str(path),
"content_hash": result.content_hash,
"section_candidates": result.section_candidates,
"field_candidates": result.field_candidates,
"field_candidates": field_review.get("selected_fields", result.field_candidates),
"field_review": field_review,
"front_text": result.front_text,
"error_message": result.error_message,
}
@@ -303,6 +326,18 @@ class RegulatoryWorkflowExecutor:
texts[item.file_name] = result.text
return texts
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
review = review_workflow_payload(stage=stage, payload=payload)
self.llm_reviews[stage] = review
save_artifact(
self.batch,
name=f"llm_review_{stage}.json",
artifact_type="json",
content=json.dumps(review, ensure_ascii=False, indent=2),
metadata={"artifact": "llm_review", "stage": stage},
)
return review
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
executor = RegulatoryWorkflowExecutor(batch)