feat(regulatory): 为核查流程增加LLM复核记录

2026-06-07 11:52:54 +08:00
parent 945669b9c2
commit 8f16675a92
4 changed files with 115 additions and 4 deletions
--- a/review_agent/regulatory_review/services/llm_review.py
+++ b/review_agent/regulatory_review/services/llm_review.py
@@ -1,10 +1,13 @@
 from __future__ import annotations
 import json
 import os
 import re
 from collections.abc import Callable
 from typing import Any
 from django.conf import settings
 from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
@@ -22,6 +25,16 @@ def review_condition_fields(
    llm_fields: dict[str, str] = {}
    status = "skipped"
    error_message = ""
    if not _should_call_llm(completion_func):
        selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
        return {
            "status": status,
            "error_message": error_message,
            "rule_fields": _clean_fields(rule_fields),
            "llm_fields": llm_fields,
            "selected_fields": selected_fields,
            "selected_sources": selected_sources,
        }
    try:
        raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0)
        payload = _parse_json_object(raw)
@@ -48,6 +61,13 @@ def review_workflow_payload(
    payload: dict[str, Any],
    completion_func: Callable[..., str] | None = None,
 ) -> dict[str, Any]:
    if not _should_call_llm(completion_func):
        return {
            "status": "skipped",
            "stage": stage,
            "result": {},
            "error_message": "",
        }
    try:
        raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0)
        parsed = _parse_json_object(raw)
@@ -122,6 +142,14 @@ def _parse_json_object(raw: str) -> dict[str, Any]:
    return parsed
 def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
    if completion_func is not None:
        return True
    if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
        return False
    return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
 def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
    clean = {}
    for label in FIELD_LABELS:
--- a/review_agent/regulatory_review/workflow.py
+++ b/review_agent/regulatory_review/workflow.py
@@ -23,6 +23,7 @@ from review_agent.regulatory_review.services.consistency_check import run_consis
 from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
 from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
 from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
 from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
 from review_agent.regulatory_review.services.risk_assess import persist_findings
 from review_agent.regulatory_review.services.rule_loader import load_rule_file
 from review_agent.regulatory_review.services.structure_check import run_structure_check
@@ -121,6 +122,7 @@ class RegulatoryWorkflowExecutor:
        self.findings = []
        self.document_texts: dict[str, str] = {}
        self.text_extract_status: dict[str, dict[str, object]] = {}
        self.llm_reviews: dict[str, dict[str, object]] = {}
    def run(self) -> None:
        self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -188,10 +190,19 @@ class RegulatoryWorkflowExecutor:
            self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
            return
        if node_code == "completeness_check":
-            self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules()))
+            findings = run_completeness_check(self.batch.source_summary_batch, self._rules())
            self.findings.extend(findings)
            self._save_llm_review(
                "completeness_check",
                {
                    "findings": [finding.to_dict() for finding in findings],
                    "rules_count": len(self._rules().get("requirements", [])),
                },
            )
            return
        if node_code == "text_extract":
            self.document_texts = self._extract_source_texts()
            self._save_llm_review("text_extract", {"files": self.text_extract_status})
            save_artifact(
                self.batch,
                name="text_extract_status.json",
@@ -201,12 +212,17 @@ class RegulatoryWorkflowExecutor:
            )
            return
        if node_code == "structure_check":
-            self.findings.extend(run_structure_check(self.document_texts, self._rules()))
+            findings = run_structure_check(self.document_texts, self._rules())
            self.findings.extend(findings)
            self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
            return
        if node_code == "consistency_check":
-            self.findings.extend(run_consistency_check(self.document_texts))
+            findings = run_consistency_check(self.document_texts)
            self.findings.extend(findings)
            self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
            return
        if node_code == "risk_assess":
            self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
            issues = persist_findings(self.batch, self.findings)
            create_mock_notifications(self.batch)
            save_artifact(
@@ -225,6 +241,7 @@ class RegulatoryWorkflowExecutor:
                            }
                            for issue in issues
                        ],
                        "llm_reviews": self.llm_reviews,
                    },
                    ensure_ascii=False,
                    indent=2,
@@ -290,12 +307,18 @@ class RegulatoryWorkflowExecutor:
                }
                continue
            result = extract_text(path)
            field_review = review_condition_fields(
                text=result.front_text or result.text,
                rule_fields=result.field_candidates or {},
                file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
            )
            self.text_extract_status[item.file_name] = {
                "status": result.status,
                "path": str(path),
                "content_hash": result.content_hash,
                "section_candidates": result.section_candidates,
-                "field_candidates": result.field_candidates,
+                "field_candidates": field_review.get("selected_fields", result.field_candidates),
                "field_review": field_review,
                "front_text": result.front_text,
                "error_message": result.error_message,
            }
@@ -303,6 +326,18 @@ class RegulatoryWorkflowExecutor:
                texts[item.file_name] = result.text
        return texts
    def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
        review = review_workflow_payload(stage=stage, payload=payload)
        self.llm_reviews[stage] = review
        save_artifact(
            self.batch,
            name=f"llm_review_{stage}.json",
            artifact_type="json",
            content=json.dumps(review, ensure_ascii=False, indent=2),
            metadata={"artifact": "llm_review", "stage": stage},
        )
        return review
 def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
    executor = RegulatoryWorkflowExecutor(batch)
--- a/tests/test_regulatory_condition.py
+++ b/tests/test_regulatory_condition.py
@@ -121,6 +121,7 @@ def test_detect_regulatory_condition_uses_llm_review_for_better_product_name(
    monkeypatch, settings, tmp_path, django_user_model
 ):
    settings.MEDIA_ROOT = tmp_path
    settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    summary = FileSummaryBatch.objects.create(
--- a/tests/test_regulatory_workflow.py
+++ b/tests/test_regulatory_workflow.py
@@ -263,3 +263,50 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p
    assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
    assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
    assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
 def test_workflow_records_llm_review_artifacts_for_review_nodes(
    monkeypatch, settings, tmp_path, django_user_model
 ):
    settings.MEDIA_ROOT = tmp_path
    settings.REGULATORY_REVIEW_ASYNC = False
    settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    summary = FileSummaryBatch.objects.create(
        conversation=conversation,
        user=user,
        batch_no="FS-OK",
        status=FileSummaryBatch.Status.SUCCESS,
    )
    ifu_path = tmp_path / "ifu.txt"
    ifu_path.write_text("产品名称：甲胎蛋白检测试剂盒\n型号规格：20人份/盒", encoding="utf-8")
    FileSummaryItem.objects.create(
        batch=summary,
        file_index=1,
        file_name="说明书.txt",
        file_type="txt",
        relative_path="说明书.txt",
        storage_path=str(ifu_path),
    )
    batch = create_regulatory_review_batch(
        conversation=conversation,
        user=user,
        source_summary_batch=summary,
    )
    batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
    batch.save(update_fields=["condition_json"])
    monkeypatch.setattr(
        "review_agent.regulatory_review.workflow.review_workflow_payload",
        lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""},
    )
    start_regulatory_review_workflow(batch, async_run=False)
    artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True))
    assert "llm_review_completeness_check.json" in artifact_names
    assert "llm_review_text_extract.json" in artifact_names
    assert "llm_review_structure_check.json" in artifact_names
    assert "llm_review_consistency_check.json" in artifact_names
    assert "llm_review_risk_assess.json" in artifact_names