feat(regulatory): 输出法规核查过程日志

This commit is contained in:
2026-06-07 13:23:55 +08:00
parent 0f9fb980f2
commit 32d258bb75
8 changed files with 200 additions and 2 deletions

View File

@@ -57,7 +57,7 @@ def generate_reply(conversation, user_message: str) -> str:
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0) -> str:
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
if not settings.LLM_API_KEY:
@@ -84,7 +84,7 @@ def generate_completion(messages: list[dict[str, str]], *, temperature: float =
)
try:
with request.urlopen(http_request, timeout=60) as response:
with request.urlopen(http_request, timeout=timeout) as response:
data = json.loads(response.read().decode("utf-8"))
except error.HTTPError as exc:
details = exc.read().decode("utf-8", errors="ignore")

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import logging
import re
class SuppressWorkflowStatusPollFilter(logging.Filter):
"""Hides noisy workflow status polling access logs from runserver output."""
STATUS_POLL_PATTERN = re.compile(
r'"GET /api/review-agent/(?:file-summary|regulatory-review)/\d+/status/ HTTP/[0-9.]+" 200 '
)
def filter(self, record: logging.LogRecord) -> bool:
return not self.STATUS_POLL_PATTERN.search(record.getMessage())

View File

@@ -4,6 +4,7 @@ import json
import os
import re
import time
import inspect
from collections.abc import Callable
from typing import Any
@@ -152,9 +153,13 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
def _call_completion_with_retries(completion_func: Callable[..., str], messages: list[dict[str, str]]) -> str:
attempts = max(1, int(getattr(settings, "REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", 3) or 3))
delay_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", 0.5) or 0)
timeout_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", 15) or 15)
accepts_timeout = _accepts_timeout(completion_func)
last_error: Exception | None = None
for attempt in range(1, attempts + 1):
try:
if accepts_timeout:
return completion_func(messages, temperature=0.0, timeout=timeout_seconds)
return completion_func(messages, temperature=0.0)
except (LLMRequestError, OSError, TimeoutError) as exc:
last_error = exc
@@ -167,6 +172,14 @@ def _call_completion_with_retries(completion_func: Callable[..., str], messages:
raise LLMRequestError("LLM复核调用失败。")
def _accepts_timeout(completion_func: Callable[..., str]) -> bool:
try:
signature = inspect.signature(completion_func)
except (TypeError, ValueError):
return True
return "timeout" in signature.parameters
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
if completion_func is not None:
return True

View File

@@ -125,6 +125,7 @@ class RegulatoryWorkflowExecutor:
self.llm_reviews: dict[str, dict[str, object]] = {}
def run(self) -> None:
logger.info("法规核查工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
@@ -136,6 +137,7 @@ class RegulatoryWorkflowExecutor:
continue
self._run_node(node)
except WorkflowPausedForUser:
logger.info("法规核查工作流等待用户 batch_no=%s node=condition_confirm", self.batch.batch_no)
return
except Exception as exc:
logger.exception("Regulatory workflow failed", extra={"batch_id": self.batch.pk})
@@ -150,6 +152,7 @@ class RegulatoryWorkflowExecutor:
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
logger.info("法规核查工作流完成 batch_no=%s findings=%s", self.batch.batch_no, len(self.findings))
def _nodes(self):
return WorkflowNodeRun.objects.filter(
@@ -158,6 +161,12 @@ class RegulatoryWorkflowExecutor:
).order_by("id")
def _run_node(self, node: WorkflowNodeRun) -> None:
logger.info(
"节点开始 batch_no=%s node=%s name=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
)
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = timezone.now()
@@ -181,6 +190,13 @@ class RegulatoryWorkflowExecutor:
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
logger.info(
"节点完成 batch_no=%s node=%s name=%s progress=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
node.progress,
)
def _execute_node(self, node_code: str) -> None:
if node_code == "condition_confirm":
@@ -188,10 +204,22 @@ class RegulatoryWorkflowExecutor:
return
if node_code == "rule_scope":
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
logger.info(
"方法执行 batch_no=%s method=apply_rule_scope requirements=%s scope=%s",
self.batch.batch_no,
len(self.rule_set.get("requirements", [])),
self.batch.condition_json.get("rule_scope") or {},
)
return
if node_code == "completeness_check":
findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_completeness_check findings=%s source_summary=%s",
self.batch.batch_no,
len(findings),
self.batch.source_summary_batch.batch_no,
)
self._save_llm_review(
"completeness_check",
{
@@ -202,6 +230,12 @@ class RegulatoryWorkflowExecutor:
return
if node_code == "text_extract":
self.document_texts = self._extract_source_texts()
logger.info(
"方法执行 batch_no=%s method=_extract_source_texts success_docs=%s total_files=%s",
self.batch.batch_no,
len(self.document_texts),
len(self.text_extract_status),
)
self._save_llm_review("text_extract", {"files": self.text_extract_status})
save_artifact(
self.batch,
@@ -214,17 +248,35 @@ class RegulatoryWorkflowExecutor:
if node_code == "structure_check":
findings = run_structure_check(self.document_texts, self._rules())
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_structure_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "consistency_check":
findings = run_consistency_check(self.document_texts)
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_consistency_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "risk_assess":
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
issues = persist_findings(self.batch, self.findings)
create_mock_notifications(self.batch)
logger.info(
"方法执行 batch_no=%s method=persist_findings issues=%s findings=%s",
self.batch.batch_no,
len(issues),
len(self.findings),
)
save_artifact(
self.batch,
name="rag_result_json.json",
@@ -251,6 +303,11 @@ class RegulatoryWorkflowExecutor:
return
if node_code == "report_export":
exports = export_review_results(self.batch)
logger.info(
"方法执行 batch_no=%s method=export_review_results exports=%s",
self.batch.batch_no,
len(exports),
)
Message.objects.create(
conversation=self.batch.conversation,
role=Message.Role.ASSISTANT,
@@ -261,6 +318,12 @@ class RegulatoryWorkflowExecutor:
if self.batch.condition_json.get("confirmed"):
return
candidates = detect_regulatory_condition_candidates(self.batch.source_summary_batch)
logger.info(
"方法执行 batch_no=%s method=detect_regulatory_condition_candidates product_category=%s product_name=%s",
self.batch.batch_no,
(candidates.get("product_category") or {}).get("suggested"),
(candidates.get("product_name") or {}).get("suggested"),
)
self.batch.condition_json = {
**(self.batch.condition_json or {}),
"confirmed": False,
@@ -297,6 +360,7 @@ class RegulatoryWorkflowExecutor:
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists():
logger.info("文本抽取跳过 batch_no=%s file=%s reason=missing", self.batch.batch_no, item.file_name)
self.text_extract_status[item.file_name] = {
"status": "missing",
"path": str(path),
@@ -324,11 +388,25 @@ class RegulatoryWorkflowExecutor:
}
if result.status == "success" and result.text:
texts[item.file_name] = result.text
logger.info(
"文本抽取文件 batch_no=%s file=%s status=%s fields=%s chars=%s",
self.batch.batch_no,
item.file_name,
result.status,
len((field_review.get("selected_fields") or {})),
len(result.text or ""),
)
return texts
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
review = review_workflow_payload(stage=stage, payload=payload)
self.llm_reviews[stage] = review
logger.info(
"方法执行 batch_no=%s method=review_workflow_payload stage=%s status=%s",
self.batch.batch_no,
stage,
review.get("status"),
)
save_artifact(
self.batch,
name=f"llm_review_{stage}.json",