feat(regulatory): 为核查流程增加LLM复核记录

This commit is contained in:
2026-06-07 11:52:54 +08:00
parent 945669b9c2
commit 8f16675a92
4 changed files with 115 additions and 4 deletions

View File

@@ -1,10 +1,13 @@
from __future__ import annotations from __future__ import annotations
import json import json
import os
import re import re
from collections.abc import Callable from collections.abc import Callable
from typing import Any from typing import Any
from django.conf import settings
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
@@ -22,6 +25,16 @@ def review_condition_fields(
llm_fields: dict[str, str] = {} llm_fields: dict[str, str] = {}
status = "skipped" status = "skipped"
error_message = "" error_message = ""
if not _should_call_llm(completion_func):
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
return {
"status": status,
"error_message": error_message,
"rule_fields": _clean_fields(rule_fields),
"llm_fields": llm_fields,
"selected_fields": selected_fields,
"selected_sources": selected_sources,
}
try: try:
raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0) raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0)
payload = _parse_json_object(raw) payload = _parse_json_object(raw)
@@ -48,6 +61,13 @@ def review_workflow_payload(
payload: dict[str, Any], payload: dict[str, Any],
completion_func: Callable[..., str] | None = None, completion_func: Callable[..., str] | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
if not _should_call_llm(completion_func):
return {
"status": "skipped",
"stage": stage,
"result": {},
"error_message": "",
}
try: try:
raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0) raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0)
parsed = _parse_json_object(raw) parsed = _parse_json_object(raw)
@@ -122,6 +142,14 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
return parsed return parsed
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
if completion_func is not None:
return True
if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
return False
return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]: def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
clean = {} clean = {}
for label in FIELD_LABELS: for label in FIELD_LABELS:

View File

@@ -23,6 +23,7 @@ from review_agent.regulatory_review.services.consistency_check import run_consis
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
from review_agent.regulatory_review.services.risk_assess import persist_findings from review_agent.regulatory_review.services.risk_assess import persist_findings
from review_agent.regulatory_review.services.rule_loader import load_rule_file from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.services.structure_check import run_structure_check from review_agent.regulatory_review.services.structure_check import run_structure_check
@@ -121,6 +122,7 @@ class RegulatoryWorkflowExecutor:
self.findings = [] self.findings = []
self.document_texts: dict[str, str] = {} self.document_texts: dict[str, str] = {}
self.text_extract_status: dict[str, dict[str, object]] = {} self.text_extract_status: dict[str, dict[str, object]] = {}
self.llm_reviews: dict[str, dict[str, object]] = {}
def run(self) -> None: def run(self) -> None:
self.batch.status = RegulatoryReviewBatch.Status.RUNNING self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -188,10 +190,19 @@ class RegulatoryWorkflowExecutor:
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {}) self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
return return
if node_code == "completeness_check": if node_code == "completeness_check":
self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules())) findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
self.findings.extend(findings)
self._save_llm_review(
"completeness_check",
{
"findings": [finding.to_dict() for finding in findings],
"rules_count": len(self._rules().get("requirements", [])),
},
)
return return
if node_code == "text_extract": if node_code == "text_extract":
self.document_texts = self._extract_source_texts() self.document_texts = self._extract_source_texts()
self._save_llm_review("text_extract", {"files": self.text_extract_status})
save_artifact( save_artifact(
self.batch, self.batch,
name="text_extract_status.json", name="text_extract_status.json",
@@ -201,12 +212,17 @@ class RegulatoryWorkflowExecutor:
) )
return return
if node_code == "structure_check": if node_code == "structure_check":
self.findings.extend(run_structure_check(self.document_texts, self._rules())) findings = run_structure_check(self.document_texts, self._rules())
self.findings.extend(findings)
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
return return
if node_code == "consistency_check": if node_code == "consistency_check":
self.findings.extend(run_consistency_check(self.document_texts)) findings = run_consistency_check(self.document_texts)
self.findings.extend(findings)
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
return return
if node_code == "risk_assess": if node_code == "risk_assess":
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
issues = persist_findings(self.batch, self.findings) issues = persist_findings(self.batch, self.findings)
create_mock_notifications(self.batch) create_mock_notifications(self.batch)
save_artifact( save_artifact(
@@ -225,6 +241,7 @@ class RegulatoryWorkflowExecutor:
} }
for issue in issues for issue in issues
], ],
"llm_reviews": self.llm_reviews,
}, },
ensure_ascii=False, ensure_ascii=False,
indent=2, indent=2,
@@ -290,12 +307,18 @@ class RegulatoryWorkflowExecutor:
} }
continue continue
result = extract_text(path) result = extract_text(path)
field_review = review_condition_fields(
text=result.front_text or result.text,
rule_fields=result.field_candidates or {},
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
)
self.text_extract_status[item.file_name] = { self.text_extract_status[item.file_name] = {
"status": result.status, "status": result.status,
"path": str(path), "path": str(path),
"content_hash": result.content_hash, "content_hash": result.content_hash,
"section_candidates": result.section_candidates, "section_candidates": result.section_candidates,
"field_candidates": result.field_candidates, "field_candidates": field_review.get("selected_fields", result.field_candidates),
"field_review": field_review,
"front_text": result.front_text, "front_text": result.front_text,
"error_message": result.error_message, "error_message": result.error_message,
} }
@@ -303,6 +326,18 @@ class RegulatoryWorkflowExecutor:
texts[item.file_name] = result.text texts[item.file_name] = result.text
return texts return texts
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
review = review_workflow_payload(stage=stage, payload=payload)
self.llm_reviews[stage] = review
save_artifact(
self.batch,
name=f"llm_review_{stage}.json",
artifact_type="json",
content=json.dumps(review, ensure_ascii=False, indent=2),
metadata={"artifact": "llm_review", "stage": stage},
)
return review
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None: def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
executor = RegulatoryWorkflowExecutor(batch) executor = RegulatoryWorkflowExecutor(batch)

View File

@@ -121,6 +121,7 @@ def test_detect_regulatory_condition_uses_llm_review_for_better_product_name(
monkeypatch, settings, tmp_path, django_user_model monkeypatch, settings, tmp_path, django_user_model
): ):
settings.MEDIA_ROOT = tmp_path settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
user = django_user_model.objects.create_user(username="owner", password="pass") user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话") conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create( summary = FileSummaryBatch.objects.create(

View File

@@ -263,3 +263,50 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists() assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists() assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists() assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
def test_workflow_records_llm_review_artifacts_for_review_nodes(
monkeypatch, settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
ifu_path = tmp_path / "ifu.txt"
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n型号规格20人份/盒", encoding="utf-8")
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
file_name="说明书.txt",
file_type="txt",
relative_path="说明书.txt",
storage_path=str(ifu_path),
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
monkeypatch.setattr(
"review_agent.regulatory_review.workflow.review_workflow_payload",
lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""},
)
start_regulatory_review_workflow(batch, async_run=False)
artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True))
assert "llm_review_completeness_check.json" in artifact_names
assert "llm_review_text_extract.json" in artifact_names
assert "llm_review_structure_check.json" in artifact_names
assert "llm_review_consistency_check.json" in artifact_names
assert "llm_review_risk_assess.json" in artifact_names