feat(regulatory): 为核查流程增加LLM复核记录
This commit is contained in:
@@ -1,10 +1,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
|
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
|
||||||
|
|
||||||
|
|
||||||
@@ -22,6 +25,16 @@ def review_condition_fields(
|
|||||||
llm_fields: dict[str, str] = {}
|
llm_fields: dict[str, str] = {}
|
||||||
status = "skipped"
|
status = "skipped"
|
||||||
error_message = ""
|
error_message = ""
|
||||||
|
if not _should_call_llm(completion_func):
|
||||||
|
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
|
||||||
|
return {
|
||||||
|
"status": status,
|
||||||
|
"error_message": error_message,
|
||||||
|
"rule_fields": _clean_fields(rule_fields),
|
||||||
|
"llm_fields": llm_fields,
|
||||||
|
"selected_fields": selected_fields,
|
||||||
|
"selected_sources": selected_sources,
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0)
|
raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0)
|
||||||
payload = _parse_json_object(raw)
|
payload = _parse_json_object(raw)
|
||||||
@@ -48,6 +61,13 @@ def review_workflow_payload(
|
|||||||
payload: dict[str, Any],
|
payload: dict[str, Any],
|
||||||
completion_func: Callable[..., str] | None = None,
|
completion_func: Callable[..., str] | None = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
if not _should_call_llm(completion_func):
|
||||||
|
return {
|
||||||
|
"status": "skipped",
|
||||||
|
"stage": stage,
|
||||||
|
"result": {},
|
||||||
|
"error_message": "",
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0)
|
raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0)
|
||||||
parsed = _parse_json_object(raw)
|
parsed = _parse_json_object(raw)
|
||||||
@@ -122,6 +142,14 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
|
|||||||
return parsed
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
|
||||||
|
if completion_func is not None:
|
||||||
|
return True
|
||||||
|
if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
|
||||||
|
return False
|
||||||
|
return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
|
||||||
|
|
||||||
|
|
||||||
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
|
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
|
||||||
clean = {}
|
clean = {}
|
||||||
for label in FIELD_LABELS:
|
for label in FIELD_LABELS:
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from review_agent.regulatory_review.services.consistency_check import run_consis
|
|||||||
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
|
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
|
||||||
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
|
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
|
||||||
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
|
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
|
||||||
|
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
|
||||||
from review_agent.regulatory_review.services.risk_assess import persist_findings
|
from review_agent.regulatory_review.services.risk_assess import persist_findings
|
||||||
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
from review_agent.regulatory_review.services.structure_check import run_structure_check
|
from review_agent.regulatory_review.services.structure_check import run_structure_check
|
||||||
@@ -121,6 +122,7 @@ class RegulatoryWorkflowExecutor:
|
|||||||
self.findings = []
|
self.findings = []
|
||||||
self.document_texts: dict[str, str] = {}
|
self.document_texts: dict[str, str] = {}
|
||||||
self.text_extract_status: dict[str, dict[str, object]] = {}
|
self.text_extract_status: dict[str, dict[str, object]] = {}
|
||||||
|
self.llm_reviews: dict[str, dict[str, object]] = {}
|
||||||
|
|
||||||
def run(self) -> None:
|
def run(self) -> None:
|
||||||
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
|
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
|
||||||
@@ -188,10 +190,19 @@ class RegulatoryWorkflowExecutor:
|
|||||||
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
|
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
|
||||||
return
|
return
|
||||||
if node_code == "completeness_check":
|
if node_code == "completeness_check":
|
||||||
self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules()))
|
findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
|
||||||
|
self.findings.extend(findings)
|
||||||
|
self._save_llm_review(
|
||||||
|
"completeness_check",
|
||||||
|
{
|
||||||
|
"findings": [finding.to_dict() for finding in findings],
|
||||||
|
"rules_count": len(self._rules().get("requirements", [])),
|
||||||
|
},
|
||||||
|
)
|
||||||
return
|
return
|
||||||
if node_code == "text_extract":
|
if node_code == "text_extract":
|
||||||
self.document_texts = self._extract_source_texts()
|
self.document_texts = self._extract_source_texts()
|
||||||
|
self._save_llm_review("text_extract", {"files": self.text_extract_status})
|
||||||
save_artifact(
|
save_artifact(
|
||||||
self.batch,
|
self.batch,
|
||||||
name="text_extract_status.json",
|
name="text_extract_status.json",
|
||||||
@@ -201,12 +212,17 @@ class RegulatoryWorkflowExecutor:
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
if node_code == "structure_check":
|
if node_code == "structure_check":
|
||||||
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
|
findings = run_structure_check(self.document_texts, self._rules())
|
||||||
|
self.findings.extend(findings)
|
||||||
|
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
|
||||||
return
|
return
|
||||||
if node_code == "consistency_check":
|
if node_code == "consistency_check":
|
||||||
self.findings.extend(run_consistency_check(self.document_texts))
|
findings = run_consistency_check(self.document_texts)
|
||||||
|
self.findings.extend(findings)
|
||||||
|
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
|
||||||
return
|
return
|
||||||
if node_code == "risk_assess":
|
if node_code == "risk_assess":
|
||||||
|
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
|
||||||
issues = persist_findings(self.batch, self.findings)
|
issues = persist_findings(self.batch, self.findings)
|
||||||
create_mock_notifications(self.batch)
|
create_mock_notifications(self.batch)
|
||||||
save_artifact(
|
save_artifact(
|
||||||
@@ -225,6 +241,7 @@ class RegulatoryWorkflowExecutor:
|
|||||||
}
|
}
|
||||||
for issue in issues
|
for issue in issues
|
||||||
],
|
],
|
||||||
|
"llm_reviews": self.llm_reviews,
|
||||||
},
|
},
|
||||||
ensure_ascii=False,
|
ensure_ascii=False,
|
||||||
indent=2,
|
indent=2,
|
||||||
@@ -290,12 +307,18 @@ class RegulatoryWorkflowExecutor:
|
|||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
result = extract_text(path)
|
result = extract_text(path)
|
||||||
|
field_review = review_condition_fields(
|
||||||
|
text=result.front_text or result.text,
|
||||||
|
rule_fields=result.field_candidates or {},
|
||||||
|
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
|
||||||
|
)
|
||||||
self.text_extract_status[item.file_name] = {
|
self.text_extract_status[item.file_name] = {
|
||||||
"status": result.status,
|
"status": result.status,
|
||||||
"path": str(path),
|
"path": str(path),
|
||||||
"content_hash": result.content_hash,
|
"content_hash": result.content_hash,
|
||||||
"section_candidates": result.section_candidates,
|
"section_candidates": result.section_candidates,
|
||||||
"field_candidates": result.field_candidates,
|
"field_candidates": field_review.get("selected_fields", result.field_candidates),
|
||||||
|
"field_review": field_review,
|
||||||
"front_text": result.front_text,
|
"front_text": result.front_text,
|
||||||
"error_message": result.error_message,
|
"error_message": result.error_message,
|
||||||
}
|
}
|
||||||
@@ -303,6 +326,18 @@ class RegulatoryWorkflowExecutor:
|
|||||||
texts[item.file_name] = result.text
|
texts[item.file_name] = result.text
|
||||||
return texts
|
return texts
|
||||||
|
|
||||||
|
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
|
||||||
|
review = review_workflow_payload(stage=stage, payload=payload)
|
||||||
|
self.llm_reviews[stage] = review
|
||||||
|
save_artifact(
|
||||||
|
self.batch,
|
||||||
|
name=f"llm_review_{stage}.json",
|
||||||
|
artifact_type="json",
|
||||||
|
content=json.dumps(review, ensure_ascii=False, indent=2),
|
||||||
|
metadata={"artifact": "llm_review", "stage": stage},
|
||||||
|
)
|
||||||
|
return review
|
||||||
|
|
||||||
|
|
||||||
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
|
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
|
||||||
executor = RegulatoryWorkflowExecutor(batch)
|
executor = RegulatoryWorkflowExecutor(batch)
|
||||||
|
|||||||
@@ -121,6 +121,7 @@ def test_detect_regulatory_condition_uses_llm_review_for_better_product_name(
|
|||||||
monkeypatch, settings, tmp_path, django_user_model
|
monkeypatch, settings, tmp_path, django_user_model
|
||||||
):
|
):
|
||||||
settings.MEDIA_ROOT = tmp_path
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
conversation = Conversation.objects.create(user=user, title="会话")
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
summary = FileSummaryBatch.objects.create(
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
|||||||
@@ -263,3 +263,50 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p
|
|||||||
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
|
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
|
||||||
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
|
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
|
||||||
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
|
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_records_llm_review_artifacts_for_review_nodes(
|
||||||
|
monkeypatch, settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
ifu_path = tmp_path / "ifu.txt"
|
||||||
|
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒", encoding="utf-8")
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
file_name="说明书.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="说明书.txt",
|
||||||
|
storage_path=str(ifu_path),
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.regulatory_review.workflow.review_workflow_payload",
|
||||||
|
lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""},
|
||||||
|
)
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True))
|
||||||
|
assert "llm_review_completeness_check.json" in artifact_names
|
||||||
|
assert "llm_review_text_extract.json" in artifact_names
|
||||||
|
assert "llm_review_structure_check.json" in artifact_names
|
||||||
|
assert "llm_review_consistency_check.json" in artifact_names
|
||||||
|
assert "llm_review_risk_assess.json" in artifact_names
|
||||||
|
|||||||
Reference in New Issue
Block a user