feat(regulatory): 输出法规核查过程日志

This commit is contained in:
2026-06-07 13:23:55 +08:00
parent 0f9fb980f2
commit 32d258bb75
8 changed files with 200 additions and 2 deletions

View File

@@ -115,6 +115,9 @@ REGULATORY_RAG_COLLECTION = os.environ.get(
"nmpa_ivd_registration_v1", "nmpa_ivd_registration_v1",
) )
REGULATORY_REVIEW_ASYNC = os.environ.get("REGULATORY_REVIEW_ASYNC", "true").lower() == "true" REGULATORY_REVIEW_ASYNC = os.environ.get("REGULATORY_REVIEW_ASYNC", "true").lower() == "true"
REGULATORY_LLM_REVIEW_MAX_ATTEMPTS = int(os.environ.get("REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", "3"))
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1") SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", "") SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", "")
SILICONFLOW_EMBEDDING_MODEL = os.environ.get( SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
@@ -126,10 +129,16 @@ SILICONFLOW_EMBEDDING_DIMENSIONS = int(os.environ.get("SILICONFLOW_EMBEDDING_DIM
LOGGING = { LOGGING = {
"version": 1, "version": 1,
"disable_existing_loggers": False, "disable_existing_loggers": False,
"filters": {
"suppress_workflow_status_poll": {
"()": "review_agent.logging_filters.SuppressWorkflowStatusPollFilter",
},
},
"handlers": { "handlers": {
"console": { "console": {
"class": "logging.StreamHandler", "class": "logging.StreamHandler",
"formatter": "verbose", "formatter": "verbose",
"filters": ["suppress_workflow_status_poll"],
}, },
}, },
"formatters": { "formatters": {
@@ -143,5 +152,10 @@ LOGGING = {
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"), "level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
"propagate": True, "propagate": True,
}, },
"django.server": {
"handlers": ["console"],
"level": "INFO",
"propagate": False,
},
}, },
} }

View File

@@ -57,7 +57,7 @@ def generate_reply(conversation, user_message: str) -> str:
raise LLMRequestError("模型接口返回格式不符合预期。") from exc raise LLMRequestError("模型接口返回格式不符合预期。") from exc
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0) -> str: def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
"""Calls the configured chat endpoint with explicit messages and returns assistant text.""" """Calls the configured chat endpoint with explicit messages and returns assistant text."""
if not settings.LLM_API_KEY: if not settings.LLM_API_KEY:
@@ -84,7 +84,7 @@ def generate_completion(messages: list[dict[str, str]], *, temperature: float =
) )
try: try:
with request.urlopen(http_request, timeout=60) as response: with request.urlopen(http_request, timeout=timeout) as response:
data = json.loads(response.read().decode("utf-8")) data = json.loads(response.read().decode("utf-8"))
except error.HTTPError as exc: except error.HTTPError as exc:
details = exc.read().decode("utf-8", errors="ignore") details = exc.read().decode("utf-8", errors="ignore")

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import logging
import re
class SuppressWorkflowStatusPollFilter(logging.Filter):
"""Hides noisy workflow status polling access logs from runserver output."""
STATUS_POLL_PATTERN = re.compile(
r'"GET /api/review-agent/(?:file-summary|regulatory-review)/\d+/status/ HTTP/[0-9.]+" 200 '
)
def filter(self, record: logging.LogRecord) -> bool:
return not self.STATUS_POLL_PATTERN.search(record.getMessage())

View File

@@ -4,6 +4,7 @@ import json
import os import os
import re import re
import time import time
import inspect
from collections.abc import Callable from collections.abc import Callable
from typing import Any from typing import Any
@@ -152,9 +153,13 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
def _call_completion_with_retries(completion_func: Callable[..., str], messages: list[dict[str, str]]) -> str: def _call_completion_with_retries(completion_func: Callable[..., str], messages: list[dict[str, str]]) -> str:
attempts = max(1, int(getattr(settings, "REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", 3) or 3)) attempts = max(1, int(getattr(settings, "REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", 3) or 3))
delay_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", 0.5) or 0) delay_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", 0.5) or 0)
timeout_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", 15) or 15)
accepts_timeout = _accepts_timeout(completion_func)
last_error: Exception | None = None last_error: Exception | None = None
for attempt in range(1, attempts + 1): for attempt in range(1, attempts + 1):
try: try:
if accepts_timeout:
return completion_func(messages, temperature=0.0, timeout=timeout_seconds)
return completion_func(messages, temperature=0.0) return completion_func(messages, temperature=0.0)
except (LLMRequestError, OSError, TimeoutError) as exc: except (LLMRequestError, OSError, TimeoutError) as exc:
last_error = exc last_error = exc
@@ -167,6 +172,14 @@ def _call_completion_with_retries(completion_func: Callable[..., str], messages:
raise LLMRequestError("LLM复核调用失败。") raise LLMRequestError("LLM复核调用失败。")
def _accepts_timeout(completion_func: Callable[..., str]) -> bool:
try:
signature = inspect.signature(completion_func)
except (TypeError, ValueError):
return True
return "timeout" in signature.parameters
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool: def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
if completion_func is not None: if completion_func is not None:
return True return True

View File

@@ -125,6 +125,7 @@ class RegulatoryWorkflowExecutor:
self.llm_reviews: dict[str, dict[str, object]] = {} self.llm_reviews: dict[str, dict[str, object]] = {}
def run(self) -> None: def run(self) -> None:
logger.info("法规核查工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
self.batch.status = RegulatoryReviewBatch.Status.RUNNING self.batch.status = RegulatoryReviewBatch.Status.RUNNING
self.batch.started_at = timezone.now() self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"]) self.batch.save(update_fields=["status", "started_at"])
@@ -136,6 +137,7 @@ class RegulatoryWorkflowExecutor:
continue continue
self._run_node(node) self._run_node(node)
except WorkflowPausedForUser: except WorkflowPausedForUser:
logger.info("法规核查工作流等待用户 batch_no=%s node=condition_confirm", self.batch.batch_no)
return return
except Exception as exc: except Exception as exc:
logger.exception("Regulatory workflow failed", extra={"batch_id": self.batch.pk}) logger.exception("Regulatory workflow failed", extra={"batch_id": self.batch.pk})
@@ -150,6 +152,7 @@ class RegulatoryWorkflowExecutor:
self.batch.finished_at = timezone.now() self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"]) self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk}) record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
logger.info("法规核查工作流完成 batch_no=%s findings=%s", self.batch.batch_no, len(self.findings))
def _nodes(self): def _nodes(self):
return WorkflowNodeRun.objects.filter( return WorkflowNodeRun.objects.filter(
@@ -158,6 +161,12 @@ class RegulatoryWorkflowExecutor:
).order_by("id") ).order_by("id")
def _run_node(self, node: WorkflowNodeRun) -> None: def _run_node(self, node: WorkflowNodeRun) -> None:
logger.info(
"节点开始 batch_no=%s node=%s name=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
)
node.status = WorkflowNodeRun.Status.RUNNING node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10 node.progress = 10
node.started_at = timezone.now() node.started_at = timezone.now()
@@ -181,6 +190,13 @@ class RegulatoryWorkflowExecutor:
"node_progress", "node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message}, {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
) )
logger.info(
"节点完成 batch_no=%s node=%s name=%s progress=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
node.progress,
)
def _execute_node(self, node_code: str) -> None: def _execute_node(self, node_code: str) -> None:
if node_code == "condition_confirm": if node_code == "condition_confirm":
@@ -188,10 +204,22 @@ class RegulatoryWorkflowExecutor:
return return
if node_code == "rule_scope": if node_code == "rule_scope":
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {}) self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
logger.info(
"方法执行 batch_no=%s method=apply_rule_scope requirements=%s scope=%s",
self.batch.batch_no,
len(self.rule_set.get("requirements", [])),
self.batch.condition_json.get("rule_scope") or {},
)
return return
if node_code == "completeness_check": if node_code == "completeness_check":
findings = run_completeness_check(self.batch.source_summary_batch, self._rules()) findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
self.findings.extend(findings) self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_completeness_check findings=%s source_summary=%s",
self.batch.batch_no,
len(findings),
self.batch.source_summary_batch.batch_no,
)
self._save_llm_review( self._save_llm_review(
"completeness_check", "completeness_check",
{ {
@@ -202,6 +230,12 @@ class RegulatoryWorkflowExecutor:
return return
if node_code == "text_extract": if node_code == "text_extract":
self.document_texts = self._extract_source_texts() self.document_texts = self._extract_source_texts()
logger.info(
"方法执行 batch_no=%s method=_extract_source_texts success_docs=%s total_files=%s",
self.batch.batch_no,
len(self.document_texts),
len(self.text_extract_status),
)
self._save_llm_review("text_extract", {"files": self.text_extract_status}) self._save_llm_review("text_extract", {"files": self.text_extract_status})
save_artifact( save_artifact(
self.batch, self.batch,
@@ -214,17 +248,35 @@ class RegulatoryWorkflowExecutor:
if node_code == "structure_check": if node_code == "structure_check":
findings = run_structure_check(self.document_texts, self._rules()) findings = run_structure_check(self.document_texts, self._rules())
self.findings.extend(findings) self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_structure_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]}) self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
return return
if node_code == "consistency_check": if node_code == "consistency_check":
findings = run_consistency_check(self.document_texts) findings = run_consistency_check(self.document_texts)
self.findings.extend(findings) self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_consistency_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]}) self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
return return
if node_code == "risk_assess": if node_code == "risk_assess":
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]}) self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
issues = persist_findings(self.batch, self.findings) issues = persist_findings(self.batch, self.findings)
create_mock_notifications(self.batch) create_mock_notifications(self.batch)
logger.info(
"方法执行 batch_no=%s method=persist_findings issues=%s findings=%s",
self.batch.batch_no,
len(issues),
len(self.findings),
)
save_artifact( save_artifact(
self.batch, self.batch,
name="rag_result_json.json", name="rag_result_json.json",
@@ -251,6 +303,11 @@ class RegulatoryWorkflowExecutor:
return return
if node_code == "report_export": if node_code == "report_export":
exports = export_review_results(self.batch) exports = export_review_results(self.batch)
logger.info(
"方法执行 batch_no=%s method=export_review_results exports=%s",
self.batch.batch_no,
len(exports),
)
Message.objects.create( Message.objects.create(
conversation=self.batch.conversation, conversation=self.batch.conversation,
role=Message.Role.ASSISTANT, role=Message.Role.ASSISTANT,
@@ -261,6 +318,12 @@ class RegulatoryWorkflowExecutor:
if self.batch.condition_json.get("confirmed"): if self.batch.condition_json.get("confirmed"):
return return
candidates = detect_regulatory_condition_candidates(self.batch.source_summary_batch) candidates = detect_regulatory_condition_candidates(self.batch.source_summary_batch)
logger.info(
"方法执行 batch_no=%s method=detect_regulatory_condition_candidates product_category=%s product_name=%s",
self.batch.batch_no,
(candidates.get("product_category") or {}).get("suggested"),
(candidates.get("product_name") or {}).get("suggested"),
)
self.batch.condition_json = { self.batch.condition_json = {
**(self.batch.condition_json or {}), **(self.batch.condition_json or {}),
"confirmed": False, "confirmed": False,
@@ -297,6 +360,7 @@ class RegulatoryWorkflowExecutor:
if not path.is_absolute(): if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists(): if not path.exists():
logger.info("文本抽取跳过 batch_no=%s file=%s reason=missing", self.batch.batch_no, item.file_name)
self.text_extract_status[item.file_name] = { self.text_extract_status[item.file_name] = {
"status": "missing", "status": "missing",
"path": str(path), "path": str(path),
@@ -324,11 +388,25 @@ class RegulatoryWorkflowExecutor:
} }
if result.status == "success" and result.text: if result.status == "success" and result.text:
texts[item.file_name] = result.text texts[item.file_name] = result.text
logger.info(
"文本抽取文件 batch_no=%s file=%s status=%s fields=%s chars=%s",
self.batch.batch_no,
item.file_name,
result.status,
len((field_review.get("selected_fields") or {})),
len(result.text or ""),
)
return texts return texts
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]: def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
review = review_workflow_payload(stage=stage, payload=payload) review = review_workflow_payload(stage=stage, payload=payload)
self.llm_reviews[stage] = review self.llm_reviews[stage] = review
logger.info(
"方法执行 batch_no=%s method=review_workflow_payload stage=%s status=%s",
self.batch.batch_no,
stage,
review.get("status"),
)
save_artifact( save_artifact(
self.batch, self.batch,
name=f"llm_review_{stage}.json", name=f"llm_review_{stage}.json",

View File

@@ -0,0 +1,31 @@
import logging
from review_agent.logging_filters import SuppressWorkflowStatusPollFilter
def test_suppress_workflow_status_poll_filter_hides_status_poll_requests():
record = logging.LogRecord(
name="django.server",
level=logging.INFO,
pathname="",
lineno=1,
msg='"GET /api/review-agent/regulatory-review/7/status/ HTTP/1.1" 200 1660',
args=(),
exc_info=None,
)
assert SuppressWorkflowStatusPollFilter().filter(record) is False
def test_suppress_workflow_status_poll_filter_keeps_other_requests():
record = logging.LogRecord(
name="django.server",
level=logging.INFO,
pathname="",
lineno=1,
msg='"POST /api/review-agent/regulatory-review/7/conditions/ HTTP/1.1" 200 256',
args=(),
exc_info=None,
)
assert SuppressWorkflowStatusPollFilter().filter(record) is True

View File

@@ -91,3 +91,21 @@ def test_review_workflow_payload_retries_timeout_before_success(settings):
assert attempts["count"] == 3 assert attempts["count"] == 3
assert result["status"] == "success" assert result["status"] == "success"
assert result["result"]["reviewed"] is True assert result["result"]["reviewed"] is True
def test_review_workflow_payload_passes_configured_timeout(settings):
settings.REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = 0
settings.REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = 7
observed = {}
def completion(messages, temperature=0.0, timeout=None):
observed["timeout"] = timeout
return json.dumps({"reviewed": True})
review_workflow_payload(
stage="completeness_check",
payload={"findings": []},
completion_func=completion,
)
assert observed["timeout"] == 7

View File

@@ -1,3 +1,5 @@
import logging
import pytest import pytest
from review_agent.models import ( from review_agent.models import (
@@ -147,6 +149,33 @@ def test_workflow_continues_when_llm_review_times_out(monkeypatch, settings, dja
assert batch.error_message == "" assert batch.error_message == ""
def test_regulatory_workflow_logs_node_and_method_details(caplog, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
with caplog.at_level(logging.INFO, logger="review_agent.regulatory_review.workflow"):
start_regulatory_review_workflow(batch, async_run=False)
messages = [record.getMessage() for record in caplog.records]
assert any("法规核查工作流开始" in message and batch.batch_no in message for message in messages)
assert any("节点开始" in message and "完整性核查" in message for message in messages)
assert any("方法执行" in message and "run_completeness_check" in message for message in messages)
assert any("节点完成" in message and "完整性核查" in message for message in messages)
def test_stream_message_prompts_for_summary_when_missing(monkeypatch, django_user_model): def test_stream_message_prompts_for_summary_when_missing(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass") user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话") conversation = Conversation.objects.create(user=user, title="会话")