diff --git a/review_agent/regulatory_review/services/llm_review.py b/review_agent/regulatory_review/services/llm_review.py index f712ec9..62357b2 100644 --- a/review_agent/regulatory_review/services/llm_review.py +++ b/review_agent/regulatory_review/services/llm_review.py @@ -1,10 +1,13 @@ from __future__ import annotations import json +import os import re from collections.abc import Callable from typing import Any +from django.conf import settings + from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion @@ -22,6 +25,16 @@ def review_condition_fields( llm_fields: dict[str, str] = {} status = "skipped" error_message = "" + if not _should_call_llm(completion_func): + selected_fields, selected_sources = _select_fields(rule_fields, llm_fields) + return { + "status": status, + "error_message": error_message, + "rule_fields": _clean_fields(rule_fields), + "llm_fields": llm_fields, + "selected_fields": selected_fields, + "selected_sources": selected_sources, + } try: raw = (completion_func or generate_completion)(_condition_messages(text, rule_fields, file_context), temperature=0.0) payload = _parse_json_object(raw) @@ -48,6 +61,13 @@ def review_workflow_payload( payload: dict[str, Any], completion_func: Callable[..., str] | None = None, ) -> dict[str, Any]: + if not _should_call_llm(completion_func): + return { + "status": "skipped", + "stage": stage, + "result": {}, + "error_message": "", + } try: raw = (completion_func or generate_completion)(_workflow_messages(stage, payload), temperature=0.0) parsed = _parse_json_object(raw) @@ -122,6 +142,14 @@ def _parse_json_object(raw: str) -> dict[str, Any]: return parsed +def _should_call_llm(completion_func: Callable[..., str] | None) -> bool: + if completion_func is not None: + return True + if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False): + return False + return bool(settings.LLM_API_KEY and settings.LLM_MODEL) + + def _clean_fields(fields: dict[str, Any]) -> dict[str, str]: clean = {} for label in FIELD_LABELS: diff --git a/review_agent/regulatory_review/workflow.py b/review_agent/regulatory_review/workflow.py index 4b70bdf..09499d2 100644 --- a/review_agent/regulatory_review/workflow.py +++ b/review_agent/regulatory_review/workflow.py @@ -23,6 +23,7 @@ from review_agent.regulatory_review.services.consistency_check import run_consis from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates +from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload from review_agent.regulatory_review.services.risk_assess import persist_findings from review_agent.regulatory_review.services.rule_loader import load_rule_file from review_agent.regulatory_review.services.structure_check import run_structure_check @@ -121,6 +122,7 @@ class RegulatoryWorkflowExecutor: self.findings = [] self.document_texts: dict[str, str] = {} self.text_extract_status: dict[str, dict[str, object]] = {} + self.llm_reviews: dict[str, dict[str, object]] = {} def run(self) -> None: self.batch.status = RegulatoryReviewBatch.Status.RUNNING @@ -188,10 +190,19 @@ class RegulatoryWorkflowExecutor: self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {}) return if node_code == "completeness_check": - self.findings.extend(run_completeness_check(self.batch.source_summary_batch, self._rules())) + findings = run_completeness_check(self.batch.source_summary_batch, self._rules()) + self.findings.extend(findings) + self._save_llm_review( + "completeness_check", + { + "findings": [finding.to_dict() for finding in findings], + "rules_count": len(self._rules().get("requirements", [])), + }, + ) return if node_code == "text_extract": self.document_texts = self._extract_source_texts() + self._save_llm_review("text_extract", {"files": self.text_extract_status}) save_artifact( self.batch, name="text_extract_status.json", @@ -201,12 +212,17 @@ class RegulatoryWorkflowExecutor: ) return if node_code == "structure_check": - self.findings.extend(run_structure_check(self.document_texts, self._rules())) + findings = run_structure_check(self.document_texts, self._rules()) + self.findings.extend(findings) + self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]}) return if node_code == "consistency_check": - self.findings.extend(run_consistency_check(self.document_texts)) + findings = run_consistency_check(self.document_texts) + self.findings.extend(findings) + self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]}) return if node_code == "risk_assess": + self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]}) issues = persist_findings(self.batch, self.findings) create_mock_notifications(self.batch) save_artifact( @@ -225,6 +241,7 @@ class RegulatoryWorkflowExecutor: } for issue in issues ], + "llm_reviews": self.llm_reviews, }, ensure_ascii=False, indent=2, @@ -290,12 +307,18 @@ class RegulatoryWorkflowExecutor: } continue result = extract_text(path) + field_review = review_condition_fields( + text=result.front_text or result.text, + rule_fields=result.field_candidates or {}, + file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}", + ) self.text_extract_status[item.file_name] = { "status": result.status, "path": str(path), "content_hash": result.content_hash, "section_candidates": result.section_candidates, - "field_candidates": result.field_candidates, + "field_candidates": field_review.get("selected_fields", result.field_candidates), + "field_review": field_review, "front_text": result.front_text, "error_message": result.error_message, } @@ -303,6 +326,18 @@ class RegulatoryWorkflowExecutor: texts[item.file_name] = result.text return texts + def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]: + review = review_workflow_payload(stage=stage, payload=payload) + self.llm_reviews[stage] = review + save_artifact( + self.batch, + name=f"llm_review_{stage}.json", + artifact_type="json", + content=json.dumps(review, ensure_ascii=False, indent=2), + metadata={"artifact": "llm_review", "stage": stage}, + ) + return review + def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None: executor = RegulatoryWorkflowExecutor(batch) diff --git a/tests/test_regulatory_condition.py b/tests/test_regulatory_condition.py index e8bb232..334ba4a 100644 --- a/tests/test_regulatory_condition.py +++ b/tests/test_regulatory_condition.py @@ -121,6 +121,7 @@ def test_detect_regulatory_condition_uses_llm_review_for_better_product_name( monkeypatch, settings, tmp_path, django_user_model ): settings.MEDIA_ROOT = tmp_path + settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") summary = FileSummaryBatch.objects.create( diff --git a/tests/test_regulatory_workflow.py b/tests/test_regulatory_workflow.py index 98dcb2a..893b103 100644 --- a/tests/test_regulatory_workflow.py +++ b/tests/test_regulatory_workflow.py @@ -263,3 +263,50 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists() assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists() assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists() + + +def test_workflow_records_llm_review_artifacts_for_review_nodes( + monkeypatch, settings, tmp_path, django_user_model +): + settings.MEDIA_ROOT = tmp_path + settings.REGULATORY_REVIEW_ASYNC = False + settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag" + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + summary = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-OK", + status=FileSummaryBatch.Status.SUCCESS, + ) + ifu_path = tmp_path / "ifu.txt" + ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒", encoding="utf-8") + FileSummaryItem.objects.create( + batch=summary, + file_index=1, + file_name="说明书.txt", + file_type="txt", + relative_path="说明书.txt", + storage_path=str(ifu_path), + ) + batch = create_regulatory_review_batch( + conversation=conversation, + user=user, + source_summary_batch=summary, + ) + batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}} + batch.save(update_fields=["condition_json"]) + + monkeypatch.setattr( + "review_agent.regulatory_review.workflow.review_workflow_payload", + lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""}, + ) + + start_regulatory_review_workflow(batch, async_run=False) + + artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True)) + assert "llm_review_completeness_check.json" in artifact_names + assert "llm_review_text_extract.json" in artifact_names + assert "llm_review_structure_check.json" in artifact_names + assert "llm_review_consistency_check.json" in artifact_names + assert "llm_review_risk_assess.json" in artifact_names