feat(regulatory): 对齐附件4目录核查规则

This commit is contained in:
2026-06-07 09:27:42 +08:00
parent bbd2d3532a
commit 1bdc7322cf
15 changed files with 753 additions and 43 deletions

View File

@@ -0,0 +1,8 @@
[
{"code": "1", "title": "监管信息", "children": ["章节目录", "申请表", "术语/缩写词列表", "产品列表", "关联文件", "申报前与监管机构的联系情况和沟通记录", "符合性声明"]},
{"code": "2", "title": "综述资料", "children": ["章节目录", "概述", "产品描述", "预期用途", "申报产品上市历史", "其他需说明的内容"]},
{"code": "3", "title": "非临床资料", "children": ["章节目录", "产品风险管理资料", "体外诊断试剂安全和性能基本原则清单", "产品技术要求及检验报告", "分析性能研究", "稳定性研究", "阳性判断值或参考区间研究", "其他资料"]},
{"code": "4", "title": "临床评价资料", "children": ["章节目录", "临床评价资料"]},
{"code": "5", "title": "产品说明书和标签样稿", "children": ["章节目录", "产品说明书", "标签样稿", "其他资料"]},
{"code": "6", "title": "质量管理体系文件", "children": ["综述", "章节目录", "生产制造信息", "质量管理体系程序", "管理职责程序", "资源管理程序", "产品实现程序", "质量管理体系的测量/分析和改进程序", "其他质量体系程序信息", "质量管理体系核查文件"]}
]

View File

@@ -42,3 +42,30 @@ def test_completeness_check_matches_existing_files_and_reports_missing(django_us
missing = next(finding for finding in findings if finding.rule_code == "registration_test_report")
assert missing.severity == "blocking"
assert missing.category == "completeness"
def test_completeness_check_matches_attachment4_directory_names(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-A4",
status=FileSummaryBatch.Status.SUCCESS,
)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
directory_level="1. 监管信息 / 1.2 申请表",
file_name="注册申请表.pdf",
file_type="pdf",
relative_path="1.监管信息/1.2申请表/注册申请表.pdf",
storage_path="x/app.pdf",
)
findings = run_completeness_check(batch, load_rule_file())
assert not any(finding.rule_code == "attachment4_1_2_application_form" for finding in findings)
missing_qms = next(finding for finding in findings if finding.rule_code == "attachment4_6_quality_system")
assert missing_qms.severity == "high"
assert missing_qms.evidence["searched_fields"] == ["file_name", "relative_path", "directory_level"]

View File

@@ -12,3 +12,16 @@ def test_consistency_check_reports_product_name_mismatch():
assert len(findings) == 1
assert findings[0].category == "consistency"
assert "产品名称" in findings[0].title
def test_consistency_check_reports_registration_scope_fields():
document_texts = {
"申请表.docx": "管理类别:第二类\n分类编码6840\n注册类型:首次注册\n临床评价路径:免临床",
"综述资料.docx": "管理类别:第三类\n分类编码6840\n注册类型:首次注册\n临床评价路径:临床试验",
}
findings = run_consistency_check(document_texts)
titles = [finding.title for finding in findings]
assert "管理类别在不同文件中不一致" in titles
assert "临床评价路径在不同文件中不一致" in titles

View File

@@ -6,6 +6,7 @@ from review_agent.regulatory_review.services.rag_citation import (
)
from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
from review_agent.regulatory_review.services.rag_index import chunk_text
from review_agent.regulatory_review.services.rag_index import collect_source_chunks
def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
@@ -70,3 +71,18 @@ def test_retrieve_citations_raises_when_index_missing(settings, tmp_path):
with pytest.raises(RagIndexUnavailable):
retrieve_citations("注册检验报告", embedding_provider=lambda texts: [[0.1]])
def test_collect_source_chunks_requires_attachment4_extraction(monkeypatch, tmp_path):
source_dir = tmp_path / "sources"
source_dir.mkdir()
attachment4 = source_dir / "附件 4 体外诊断试剂注册申报资料要求及说明.doc"
attachment4.write_bytes(b"legacy-doc")
def fail_extract(path):
raise RuntimeError("无法通过 LibreOffice 转换法规 .doc 材料")
monkeypatch.setattr("review_agent.regulatory_review.services.rag_index.extract_text_from_path", fail_extract)
with pytest.raises(RuntimeError, match="附件 4"):
collect_source_chunks(source_dir)

View File

@@ -1,4 +1,5 @@
from pathlib import Path
import json
import pytest
from django.core.management import call_command
@@ -27,6 +28,30 @@ def test_load_rule_file_reads_demo_requirements():
assert "essential_principles_checklist" in codes
def test_load_rule_file_covers_attachment4_outline():
rule_set = load_rule_file()
requirements = rule_set["requirements"]
outline = json.loads(Path("tests/fixtures/regulatory/attachment4_outline.json").read_text(encoding="utf-8"))
for chapter in outline:
chapter_rule = next(
item for item in requirements if item["title"] == chapter["title"] and item.get("attachment4_code") == chapter["code"]
)
assert chapter_rule["attachment4_code"] == chapter["code"]
assert chapter_rule["severity"] == "high"
assert chapter_rule["citation_query"]
for child in chapter["children"]:
child_rule = next(
item
for item in requirements
if item["title"] == child and str(item.get("attachment4_code", "")).startswith(f"{chapter['code']}.")
)
assert child_rule["rule_id"]
assert child_rule["file_keywords"]
assert child_rule["severity"] in {"blocking", "high", "medium"}
assert child_rule["citation_query"]
def test_compute_file_sha256_changes_when_file_changes(tmp_path):
path = tmp_path / "rule.yaml"
path.write_text("code: demo\n", encoding="utf-8")

View File

@@ -11,3 +11,15 @@ def test_structure_check_reports_missing_instruction_sections():
assert any(finding.rule_code == "instructions_for_use:储存条件" for finding in findings)
assert all("样本要求" not in finding.title for finding in findings)
def test_structure_check_reports_missing_attachment4_outline_heading():
document_texts = {
"申报资料目录.txt": "1. 监管信息\n1.2 申请表\n2. 综述资料\n3. 非临床资料\n"
}
findings = run_structure_check(document_texts, load_rule_file())
missing = next(finding for finding in findings if finding.rule_code == "attachment4_4_clinical_evaluation")
assert missing.category == "structure"
assert missing.evidence["expected_title"] == "临床评价资料"

View File

@@ -7,6 +7,7 @@ from review_agent.models import (
FileSummaryItem,
Message,
RegulatoryIssue,
RegulatoryArtifact,
RegulatoryReviewBatch,
WorkflowEvent,
WorkflowNodeRun,
@@ -201,4 +202,6 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).count() == 3
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()