feat(regulatory): 对齐附件4目录核查规则

2026-06-07 09:27:42 +08:00
parent bbd2d3532a
commit 1bdc7322cf
15 changed files with 753 additions and 43 deletions
--- a/review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml
+++ b/review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml
@@ -1,58 +1,503 @@
 code: nmpa_ivd_registration_v1
-name: NMPA IVD 注册资料 Demo 规则
+name: NMPA IVD 注册资料附件 4 对齐规则
 rag_collection: nmpa_ivd_registration_v1
-source_material_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
+source_material_dir: docs/0.原始材料
+attachment4_required_codes:
+  - "1"
+  - "1.1"
+  - "1.2"
+  - "1.3"
+  - "1.4"
+  - "1.5"
+  - "1.6"
+  - "1.7"
+  - "2"
+  - "2.1"
+  - "2.2"
+  - "2.3"
+  - "2.4"
+  - "2.5"
+  - "2.6"
+  - "3"
+  - "3.1"
+  - "3.2"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+  - "3.7"
+  - "3.8"
+  - "4"
+  - "4.1"
+  - "4.2"
+  - "5"
+  - "5.1"
+  - "5.2"
+  - "5.3"
+  - "5.4"
+  - "6"
+  - "6.1"
+  - "6.2"
+  - "6.3"
+  - "6.4"
+  - "6.5"
+  - "6.6"
+  - "6.7"
+  - "6.8"
+  - "6.9"
+  - "6.10"
 requirements:
-  - code: product_technical_requirements
-    title: 产品技术要求
+  - code: attachment4_1_regulatory_info
+    rule_id: A4-1
+    attachment4_code: "1"
+    title: 监管信息
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [监管信息]
+    aliases: [监管资料]
+    suggestion: 请补充监管信息章节及其目录项。
+    citation_query: 附件4 监管信息 体外诊断试剂 注册申报资料
+    structure_required: true
+  - code: attachment4_1_1_toc
+    rule_id: A4-1.1
+    attachment4_code: "1.1"
+    title: 章节目录
+    type: directory
+    severity: medium
+    category: completeness
+    file_keywords: [章节目录, 目录]
+    aliases: [监管信息目录]
+    suggestion: 请补充监管信息章节目录。
+    citation_query: 附件4 监管信息 章节目录
+  - code: attachment4_1_2_application_form
+    rule_id: A4-1.2
+    attachment4_code: "1.2"
+    title: 申请表
    type: required
    severity: blocking
    category: completeness
-    file_keywords:
-      - 产品技术要求
-    suggestion: 请补充产品技术要求并确认版本与注册申请资料一致。
-    citation_query: 体外诊断试剂 产品技术要求 注册申报资料
-  - code: instructions_for_use
-    title: 说明书
+    file_keywords: [申请表, 注册申请表]
+    aliases: [医疗器械注册申请表]
+    suggestion: 请补充注册申请表并核对注册类型、管理类别和分类编码。
+    citation_query: 附件4 监管信息 申请表
+  - code: attachment4_1_3_terms
+    rule_id: A4-1.3
+    attachment4_code: "1.3"
+    title: 术语/缩写词列表
+    type: recommended
+    severity: medium
+    category: completeness
+    file_keywords: [术语, 缩写词, 缩略语]
+    suggestion: 请补充术语和缩写词列表。
+    citation_query: 附件4 术语 缩写词列表
+  - code: attachment4_1_4_product_list
+    rule_id: A4-1.4
+    attachment4_code: "1.4"
+    title: 产品列表
    type: required
    severity: high
    category: completeness
-    file_keywords:
-      - 说明书
-      - 使用说明
-    required_sections:
-      - 储存条件
-      - 有效期
-      - 样本要求
-    suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
-    citation_query: 体外诊断试剂 说明书 储存条件 有效期 样本要求
+    file_keywords: [产品列表, 产品清单]
+    suggestion: 请补充申报产品列表。
+    citation_query: 附件4 产品列表
+  - code: attachment4_1_5_related_files
+    rule_id: A4-1.5
+    attachment4_code: "1.5"
+    title: 关联文件
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [关联文件, 关联注册, 引用文件]
+    suggestion: 如存在关联注册或引用资料，请补充关联文件说明。
+    citation_query: 附件4 关联文件
+  - code: attachment4_1_6_pre_submission
+    rule_id: A4-1.6
+    attachment4_code: "1.6"
+    title: 申报前与监管机构的联系情况和沟通记录
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [沟通记录, 监管机构, 申报前]
+    suggestion: 如有申报前沟通，请补充沟通记录；如无，请说明不适用。
+    citation_query: 附件4 申报前 监管机构 沟通记录
+  - code: attachment4_1_7_declaration
+    rule_id: A4-1.7
+    attachment4_code: "1.7"
+    title: 符合性声明
+    type: required
+    severity: blocking
+    category: completeness
+    file_keywords: [符合性声明, 声明]
+    suggestion: 请补充符合性声明。
+    citation_query: 附件4 符合性声明
+  - code: attachment4_2_summary
+    rule_id: A4-2
+    attachment4_code: "2"
+    title: 综述资料
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [综述资料]
+    suggestion: 请补充综述资料章节。
+    citation_query: 附件4 综述资料
+    structure_required: true
+  - code: attachment4_2_1_toc
+    rule_id: A4-2.1
+    attachment4_code: "2.1"
+    title: 章节目录
+    type: directory
+    severity: medium
+    category: completeness
+    file_keywords: [章节目录, 综述资料目录]
+    suggestion: 请补充综述资料章节目录。
+    citation_query: 附件4 综述资料 章节目录
+  - code: attachment4_2_2_overview
+    rule_id: A4-2.2
+    attachment4_code: "2.2"
+    title: 概述
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [概述]
+    suggestion: 请补充产品概述。
+    citation_query: 附件4 概述
+  - code: attachment4_2_3_product_description
+    rule_id: A4-2.3
+    attachment4_code: "2.3"
+    title: 产品描述
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [产品描述]
+    suggestion: 请补充产品描述。
+    citation_query: 附件4 产品描述
+  - code: attachment4_2_4_intended_use
+    rule_id: A4-2.4
+    attachment4_code: "2.4"
+    title: 预期用途
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [预期用途]
+    suggestion: 请补充预期用途资料。
+    citation_query: 附件4 预期用途
+  - code: attachment4_2_5_marketing_history
+    rule_id: A4-2.5
+    attachment4_code: "2.5"
+    title: 申报产品上市历史
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [上市历史]
+    suggestion: 如产品已有上市历史，请补充相关说明；如无，请说明不适用。
+    citation_query: 附件4 上市历史
+  - code: attachment4_2_6_other_summary
+    rule_id: A4-2.6
+    attachment4_code: "2.6"
+    title: 其他需说明的内容
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [其他需说明, 其他说明]
+    suggestion: 请补充其他需说明内容或不适用说明。
+    citation_query: 附件4 其他需说明
+  - code: attachment4_3_nonclinical
+    rule_id: A4-3
+    attachment4_code: "3"
+    title: 非临床资料
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [非临床资料]
+    suggestion: 请补充非临床资料章节。
+    citation_query: 附件4 非临床资料
+    structure_required: true
+  - code: attachment4_3_1_toc
+    rule_id: A4-3.1
+    attachment4_code: "3.1"
+    title: 章节目录
+    type: directory
+    severity: medium
+    category: completeness
+    file_keywords: [章节目录, 非临床资料目录]
+    suggestion: 请补充非临床资料章节目录。
+    citation_query: 附件4 非临床资料 章节目录
+  - code: attachment4_3_2_risk_management
+    rule_id: A4-3.2
+    attachment4_code: "3.2"
+    title: 产品风险管理资料
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [产品风险管理, 风险管理资料]
+    suggestion: 请补充产品风险管理资料。
+    citation_query: 附件4 产品风险管理资料
+  - code: essential_principles_checklist
+    rule_id: A4-3.3
+    attachment4_code: "3.3"
+    title: 体外诊断试剂安全和性能基本原则清单
+    type: recommended
+    severity: medium
+    category: completeness
+    file_keywords: [安全和性能基本原则, 基本原则清单]
+    aliases: [安全和性能基本原则清单]
+    suggestion: 建议补充安全和性能基本原则清单，便于审评追溯。
+    citation_query: 附件4 安全和性能基本原则清单
+  - code: product_technical_requirements
+    rule_id: A4-3.4
+    attachment4_code: "3.4"
+    title: 产品技术要求及检验报告
+    type: required
+    severity: blocking
+    category: completeness
+    file_keywords: [产品技术要求, 注册检验报告, 检验报告]
+    aliases: [产品技术要求, 注册检验报告]
+    required_sections: [产品技术要求, 检验报告]
+    suggestion: 请补充产品技术要求及注册检验报告，并确认二者覆盖型号一致。
+    citation_query: 附件4 产品技术要求 检验报告
  - code: registration_test_report
+    rule_id: A4-3.4-R
+    attachment4_code: "3.4"
    title: 注册检验报告
    type: required
    severity: blocking
    category: completeness
-    file_keywords:
-      - 注册检验报告
-      - 检验报告
+    file_keywords: [注册检验报告, 检验报告]
    suggestion: 请补充注册检验报告并复核报告覆盖的产品型号。
-    citation_query: 体外诊断试剂 注册检验报告 注册申报资料
+    citation_query: 附件4 注册检验报告
+  - code: attachment4_3_5_analytical_performance
+    rule_id: A4-3.5
+    attachment4_code: "3.5"
+    title: 分析性能研究
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [分析性能研究, 分析性能]
+    suggestion: 请补充分析性能研究资料。
+    citation_query: 附件4 分析性能研究
+  - code: attachment4_3_6_stability
+    rule_id: A4-3.6
+    attachment4_code: "3.6"
+    title: 稳定性研究
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [稳定性研究, 稳定性]
+    suggestion: 请补充稳定性研究资料。
+    citation_query: 附件4 稳定性研究
+  - code: attachment4_3_7_reference_interval
+    rule_id: A4-3.7
+    attachment4_code: "3.7"
+    title: 阳性判断值或参考区间研究
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [阳性判断值, 参考区间]
+    suggestion: 请补充阳性判断值或参考区间研究资料。
+    citation_query: 附件4 阳性判断值 参考区间
+  - code: attachment4_3_8_other_nonclinical
+    rule_id: A4-3.8
+    attachment4_code: "3.8"
+    title: 其他资料
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [其他资料]
+    suggestion: 请补充非临床其他资料或不适用说明。
+    citation_query: 附件4 非临床 其他资料
+  - code: attachment4_4_clinical_evaluation
+    rule_id: A4-4
+    attachment4_code: "4"
+    title: 临床评价资料
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [临床评价资料, 临床资料]
+    suggestion: 请补充临床评价资料章节。
+    citation_query: 附件4 临床评价资料
+    structure_required: true
+  - code: attachment4_4_1_toc
+    rule_id: A4-4.1
+    attachment4_code: "4.1"
+    title: 章节目录
+    type: directory
+    severity: medium
+    category: completeness
+    file_keywords: [章节目录, 临床评价资料目录]
+    suggestion: 请补充临床评价资料章节目录。
+    citation_query: 附件4 临床评价资料 章节目录
  - code: clinical_evaluation
+    rule_id: A4-4.2
+    attachment4_code: "4.2"
    title: 临床评价资料
    type: conditional
    severity: high
    category: completeness
-    file_keywords:
-      - 临床评价
-      - 临床试验
+    file_keywords: [临床评价, 临床试验, 免临床, 同品种比对]
    suggestion: 请根据适用情形补充临床评价资料或说明豁免依据。
-    citation_query: 体外诊断试剂 临床评价资料 注册申报
-  - code: essential_principles_checklist
-    title: 安全和性能基本原则清单
-    type: recommended
+    citation_query: 附件4 临床评价资料 注册申报
+  - code: attachment4_5_ifu_label
+    rule_id: A4-5
+    attachment4_code: "5"
+    title: 产品说明书和标签样稿
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [产品说明书和标签样稿, 说明书, 标签样稿]
+    suggestion: 请补充产品说明书和标签样稿章节。
+    citation_query: 附件4 产品说明书 标签样稿
+    structure_required: true
+  - code: attachment4_5_1_toc
+    rule_id: A4-5.1
+    attachment4_code: "5.1"
+    title: 章节目录
+    type: directory
    severity: medium
    category: completeness
-    file_keywords:
-      - 安全和性能基本原则
-      - 基本原则清单
-    suggestion: 建议补充安全和性能基本原则清单，便于审评追溯。
-    citation_query: 体外诊断试剂 安全和性能基本原则清单
+    file_keywords: [章节目录, 说明书目录, 标签目录]
+    suggestion: 请补充产品说明书和标签样稿章节目录。
+    citation_query: 附件4 说明书 标签 章节目录
+  - code: instructions_for_use
+    rule_id: A4-5.2
+    attachment4_code: "5.2"
+    title: 产品说明书
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [说明书, 产品说明书, 使用说明]
+    aliases: [说明书]
+    required_sections: [储存条件, 有效期, 样本要求]
+    suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
+    citation_query: 附件4 产品说明书 储存条件 有效期 样本要求
+  - code: attachment4_5_3_label
+    rule_id: A4-5.3
+    attachment4_code: "5.3"
+    title: 标签样稿
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [标签样稿, 标签]
+    suggestion: 请补充标签样稿。
+    citation_query: 附件4 标签样稿
+  - code: attachment4_5_4_other_ifu
+    rule_id: A4-5.4
+    attachment4_code: "5.4"
+    title: 其他资料
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [其他资料]
+    suggestion: 请补充说明书和标签相关其他资料或不适用说明。
+    citation_query: 附件4 说明书 标签 其他资料
+  - code: attachment4_6_quality_system
+    rule_id: A4-6
+    attachment4_code: "6"
+    title: 质量管理体系文件
+    type: chapter
+    severity: high
+    category: completeness
+    file_keywords: [质量管理体系文件, 质量体系, 质量管理体系]
+    suggestion: 请补充质量管理体系文件章节。
+    citation_query: 附件4 质量管理体系文件
+    structure_required: true
+  - code: attachment4_6_1_overview
+    rule_id: A4-6.1
+    attachment4_code: "6.1"
+    title: 综述
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [综述]
+    suggestion: 请补充质量管理体系综述。
+    citation_query: 附件4 质量管理体系 综述
+  - code: attachment4_6_2_toc
+    rule_id: A4-6.2
+    attachment4_code: "6.2"
+    title: 章节目录
+    type: directory
+    severity: medium
+    category: completeness
+    file_keywords: [章节目录, 质量管理体系目录]
+    suggestion: 请补充质量管理体系文件章节目录。
+    citation_query: 附件4 质量管理体系 章节目录
+  - code: attachment4_6_3_manufacturing
+    rule_id: A4-6.3
+    attachment4_code: "6.3"
+    title: 生产制造信息
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [生产制造信息, 生产制造]
+    suggestion: 请补充生产制造信息。
+    citation_query: 附件4 生产制造信息
+  - code: attachment4_6_4_qms_procedure
+    rule_id: A4-6.4
+    attachment4_code: "6.4"
+    title: 质量管理体系程序
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [质量管理体系程序, 质量体系程序]
+    suggestion: 请补充质量管理体系程序。
+    citation_query: 附件4 质量管理体系程序
+  - code: attachment4_6_5_management
+    rule_id: A4-6.5
+    attachment4_code: "6.5"
+    title: 管理职责程序
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [管理职责程序, 管理职责]
+    suggestion: 请补充管理职责程序。
+    citation_query: 附件4 管理职责程序
+  - code: attachment4_6_6_resource
+    rule_id: A4-6.6
+    attachment4_code: "6.6"
+    title: 资源管理程序
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [资源管理程序, 资源管理]
+    suggestion: 请补充资源管理程序。
+    citation_query: 附件4 资源管理程序
+  - code: attachment4_6_7_realization
+    rule_id: A4-6.7
+    attachment4_code: "6.7"
+    title: 产品实现程序
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [产品实现程序, 产品实现]
+    suggestion: 请补充产品实现程序。
+    citation_query: 附件4 产品实现程序
+  - code: attachment4_6_8_measurement
+    rule_id: A4-6.8
+    attachment4_code: "6.8"
+    title: 质量管理体系的测量/分析和改进程序
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [测量, 分析和改进, 改进程序]
+    suggestion: 请补充质量管理体系测量、分析和改进程序。
+    citation_query: 附件4 测量 分析 改进程序
+  - code: attachment4_6_9_other_qms
+    rule_id: A4-6.9
+    attachment4_code: "6.9"
+    title: 其他质量体系程序信息
+    type: conditional
+    severity: medium
+    category: completeness
+    file_keywords: [其他质量体系程序, 其他质量体系]
+    suggestion: 请补充其他质量体系程序信息或不适用说明。
+    citation_query: 附件4 其他质量体系程序信息
+  - code: attachment4_6_10_qms_audit
+    rule_id: A4-6.10
+    attachment4_code: "6.10"
+    title: 质量管理体系核查文件
+    type: required
+    severity: high
+    category: completeness
+    file_keywords: [质量管理体系核查文件, 体系核查文件, 核查文件]
+    suggestion: 请补充质量管理体系核查文件。
+    citation_query: 附件4 质量管理体系核查文件
--- a/review_agent/regulatory_review/services/completeness_check.py
+++ b/review_agent/regulatory_review/services/completeness_check.py
@@ -8,12 +8,17 @@ def run_completeness_check(batch: FileSummaryBatch, rule_set: dict) -> list[Find
    items = list(batch.items.order_by("file_index"))
    findings: list[Finding] = []
    for requirement in rule_set.get("requirements", []):
-        if requirement.get("type") not in {"required", "conditional", "recommended"}:
+        if requirement.get("type") not in {"required", "conditional", "recommended", "chapter", "directory"}:
            continue
        matched = [
            item
            for item in items
-            if _matches_item(item.file_name, item.relative_path, requirement.get("file_keywords", []))
+            if _matches_item(
+                item.file_name,
+                item.relative_path,
+                item.directory_level,
+                [*requirement.get("file_keywords", []), *requirement.get("aliases", [])],
+            )
        ]
        if matched:
            continue
@@ -29,12 +34,13 @@ def run_completeness_check(batch: FileSummaryBatch, rule_set: dict) -> list[Find
                    "requirement_type": requirement.get("type"),
                    "matched_files": [],
                    "searched_keywords": requirement.get("file_keywords", []),
+                    "searched_fields": ["file_name", "relative_path", "directory_level"],
                },
            )
        )
    return findings


-def _matches_item(file_name: str, relative_path: str, keywords: list[str]) -> bool:
-    haystack = f"{file_name} {relative_path}".lower()
+def _matches_item(file_name: str, relative_path: str, directory_level: str, keywords: list[str]) -> bool:
+    haystack = f"{file_name} {relative_path} {directory_level}".lower()
    return any(str(keyword).lower() in haystack for keyword in keywords)
--- a/review_agent/regulatory_review/services/consistency_check.py
+++ b/review_agent/regulatory_review/services/consistency_check.py
@@ -10,6 +10,10 @@ FIELDS = {
    "产品名称": r"产品名称[:：]\s*([^\n\r]+)",
    "型号规格": r"型号规格[:：]\s*([^\n\r]+)",
    "预期用途": r"预期用途[:：]\s*([^\n\r]+)",
+    "管理类别": r"管理类别[:：]\s*([^\n\r]+)",
+    "分类编码": r"分类编码[:：]\s*([^\n\r]+)",
+    "注册类型": r"注册类型[:：]\s*([^\n\r]+)",
+    "临床评价路径": r"临床评价路径[:：]\s*([^\n\r]+)",
 }


--- a/review_agent/regulatory_review/services/rag_index.py
+++ b/review_agent/regulatory_review/services/rag_index.py
@@ -107,12 +107,19 @@ def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
        try:
            text = extract_text_from_path(path)
        except RuntimeError as exc:
+            if _is_attachment4(path):
+                raise RuntimeError(f"附件 4 核心法规材料抽取失败：{path.name}") from exc
            logger.warning("Regulatory source extraction skipped", extra={"path": str(path), "error": str(exc)})
            continue
        chunks.extend(chunk_text(text, source=str(path.relative_to(source_dir))))
    return chunks


+def _is_attachment4(path: Path) -> bool:
+    normalized = path.name.replace(" ", "")
+    return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
+
+
 def build_chroma_index(
    *,
    source_dir: Path,
--- a/review_agent/regulatory_review/services/rule_loader.py
+++ b/review_agent/regulatory_review/services/rule_loader.py
@@ -47,9 +47,30 @@ def load_rule_file(path: str | Path | None = None) -> dict:
        raise ValueError(f"规则 code 必须为 {DEFAULT_RULE_CODE}")
    if not isinstance(payload.get("requirements"), list) or not payload["requirements"]:
        raise ValueError("规则文件必须包含 requirements 列表。")
+    _validate_attachment4_requirements(payload)
    return payload


+def _validate_attachment4_requirements(payload: dict) -> None:
+    requirements = payload.get("requirements") or []
+    required_codes = {str(code) for code in payload.get("attachment4_required_codes") or []}
+    by_attachment4_code: dict[str, list[dict]] = {}
+    for requirement in requirements:
+        attachment4_code = requirement.get("attachment4_code")
+        if attachment4_code:
+            by_attachment4_code.setdefault(str(attachment4_code), []).append(requirement)
+        for field in ["code", "rule_id", "title", "severity", "file_keywords", "citation_query"]:
+            if attachment4_code and not requirement.get(field):
+                raise ValueError(f"附件4规则 {attachment4_code} 缺少 {field}")
+    missing = sorted(required_codes - set(by_attachment4_code), key=_attachment4_sort_key)
+    if missing:
+        raise ValueError(f"附件4目录项缺少规则：{', '.join(missing)}")
+
+
+def _attachment4_sort_key(value: str) -> tuple[int, ...]:
+    return tuple(int(part) for part in value.split(".") if part.isdigit())
+
+
 def check_rule_version(
    *,
    path: str | Path | None = None,
--- a/review_agent/regulatory_review/services/structure_check.py
+++ b/review_agent/regulatory_review/services/structure_check.py
@@ -5,7 +5,27 @@ from review_agent.regulatory_review.schemas import Finding

 def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[Finding]:
    findings: list[Finding] = []
+    combined_all_text = "\n".join(document_texts.values())
    for requirement in rule_set.get("requirements", []):
+        if requirement.get("structure_required") and not _contains_any(
+            combined_all_text,
+            [requirement.get("title", ""), *requirement.get("aliases", [])],
+        ):
+            findings.append(
+                Finding(
+                    rule_code=requirement["code"],
+                    category="structure",
+                    severity=requirement.get("severity", "medium"),
+                    title=f"申报资料目录缺少{requirement['title']}章节",
+                    detail=f"未在申报资料目录或章节标题候选中发现{requirement['title']}。",
+                    suggestion=requirement.get("suggestion", ""),
+                    evidence={
+                        "attachment4_code": requirement.get("attachment4_code"),
+                        "expected_title": requirement["title"],
+                        "aliases": requirement.get("aliases", []),
+                    },
+                )
+            )
        required_sections = requirement.get("required_sections") or []
        if not required_sections:
            continue
@@ -14,7 +34,7 @@ def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[
            continue
        combined_text = "\n".join(matching_docs.values())
        for section in required_sections:
-            if section in combined_text:
+            if _contains_any(combined_text, [section]):
                continue
            findings.append(
                Finding(
@@ -39,3 +59,12 @@ def _matching_documents(document_texts: dict[str, str], keywords: list[str]) ->
        if any(str(keyword).lower() in haystack for keyword in keywords):
            result[name] = text
    return result
+
+
+def _contains_any(text: str, needles: list[str]) -> bool:
+    normalized = _normalize_title(text)
+    return any(_normalize_title(needle) in normalized for needle in needles if needle)
+
+
+def _normalize_title(value: str) -> str:
+    return "".join(str(value).lower().replace("/", "").replace("／", "").split())
--- a/review_agent/regulatory_review/services/text_extract.py
+++ b/review_agent/regulatory_review/services/text_extract.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import hashlib
+import re
 from dataclasses import dataclass
 from pathlib import Path

@@ -14,6 +15,9 @@ class ExtractedText:
    status: str
    content_hash: str = ""
    error_message: str = ""
+    front_text: str = ""
+    section_candidates: list[str] | None = None
+    field_candidates: dict[str, str] | None = None


 SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".pptx", ".xlsx", ".doc"}
@@ -26,6 +30,47 @@ def extract_text(path: str | Path) -> ExtractedText:
    try:
        text = extract_text_from_path(file_path)
    except Exception as exc:
-        return ExtractedText(path=file_path, text="", status="failed", error_message=str(exc))
+        return ExtractedText(
+            path=file_path,
+            text="",
+            status="failed",
+            error_message=str(exc),
+            section_candidates=[],
+            field_candidates={},
+        )
    content_hash = hashlib.sha256(text.encode("utf-8")).hexdigest() if text else ""
-    return ExtractedText(path=file_path, text=text, status="success", content_hash=content_hash)
+    return ExtractedText(
+        path=file_path,
+        text=text,
+        status="success",
+        content_hash=content_hash,
+        front_text=_front_text(text),
+        section_candidates=_section_candidates(text),
+        field_candidates=_field_candidates(text),
+    )
+
+
+def _front_text(text: str, limit: int = 1200) -> str:
+    return text[:limit]
+
+
+def _section_candidates(text: str) -> list[str]:
+    candidates = []
+    for line in text.splitlines():
+        normalized = line.strip()
+        if not normalized:
+            continue
+        if re.match(r"^([一二三四五六七八九十]+[、.．]|[0-9]+(\.[0-9]+)*[、.．\s])", normalized):
+            candidates.append(normalized[:120])
+        elif any(keyword in normalized for keyword in ["章节目录", "监管信息", "综述资料", "非临床资料", "临床评价资料", "质量管理体系"]):
+            candidates.append(normalized[:120])
+    return candidates[:80]
+
+
+def _field_candidates(text: str) -> dict[str, str]:
+    fields = {}
+    for label in ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]:
+        match = re.search(rf"{label}[:：]\s*([^\n\r]+)", text)
+        if match:
+            fields[label] = " ".join(match.group(1).strip().split())
+    return fields
--- a/review_agent/regulatory_review/workflow.py
+++ b/review_agent/regulatory_review/workflow.py
@@ -1,5 +1,6 @@
 from __future__ import annotations

+import json
 import logging
 from pathlib import Path
 from threading import Thread
@@ -26,6 +27,7 @@ from review_agent.regulatory_review.services.structure_check import run_structur
 from review_agent.regulatory_review.services.text_extract import extract_text

 from .events import record_event
+from .storage import save_artifact


 NODE_DEFINITIONS = [
@@ -105,6 +107,7 @@ class RegulatoryWorkflowExecutor:
        self.rule_set: dict | None = None
        self.findings = []
        self.document_texts: dict[str, str] = {}
+        self.text_extract_status: dict[str, dict[str, object]] = {}

    def run(self) -> None:
        self.batch.status = RegulatoryReviewBatch.Status.RUNNING
@@ -176,6 +179,13 @@ class RegulatoryWorkflowExecutor:
            return
        if node_code == "text_extract":
            self.document_texts = self._extract_source_texts()
+            save_artifact(
+                self.batch,
+                name="text_extract_status.json",
+                artifact_type="json",
+                content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
+                metadata={"artifact": "text_extract_status"},
+            )
            return
        if node_code == "structure_check":
            self.findings.extend(run_structure_check(self.document_texts, self._rules()))
@@ -184,7 +194,29 @@ class RegulatoryWorkflowExecutor:
            self.findings.extend(run_consistency_check(self.document_texts))
            return
        if node_code == "risk_assess":
-            persist_findings(self.batch, self.findings)
+            issues = persist_findings(self.batch, self.findings)
+            save_artifact(
+                self.batch,
+                name="rag_result_json.json",
+                artifact_type="json",
+                content=json.dumps(
+                    {
+                        "batch_no": self.batch.batch_no,
+                        "text_extract_status": self.text_extract_status,
+                        "issues": [
+                            {
+                                "rule_code": issue.rule_code,
+                                "title": issue.title,
+                                "citations": issue.citations,
+                            }
+                            for issue in issues
+                        ],
+                    },
+                    ensure_ascii=False,
+                    indent=2,
+                ),
+                metadata={"artifact": "rag_result_json"},
+            )
            return
        if node_code == "report_export":
            exports = export_review_results(self.batch)
@@ -234,8 +266,25 @@ class RegulatoryWorkflowExecutor:
            if not path.is_absolute():
                path = Path(settings.MEDIA_ROOT) / item.storage_path
            if not path.exists():
+                self.text_extract_status[item.file_name] = {
+                    "status": "missing",
+                    "path": str(path),
+                    "content_hash": "",
+                    "section_candidates": [],
+                    "field_candidates": {},
+                    "front_text": "",
+                }
                continue
            result = extract_text(path)
+            self.text_extract_status[item.file_name] = {
+                "status": result.status,
+                "path": str(path),
+                "content_hash": result.content_hash,
+                "section_candidates": result.section_candidates,
+                "field_candidates": result.field_candidates,
+                "front_text": result.front_text,
+                "error_message": result.error_message,
+            }
            if result.status == "success" and result.text:
                texts[item.file_name] = result.text
        return texts
--- a/tests/fixtures/regulatory/attachment4_outline.json
+++ b/tests/fixtures/regulatory/attachment4_outline.json
@@ -0,0 +1,8 @@
+[
+  {"code": "1", "title": "监管信息", "children": ["章节目录", "申请表", "术语/缩写词列表", "产品列表", "关联文件", "申报前与监管机构的联系情况和沟通记录", "符合性声明"]},
+  {"code": "2", "title": "综述资料", "children": ["章节目录", "概述", "产品描述", "预期用途", "申报产品上市历史", "其他需说明的内容"]},
+  {"code": "3", "title": "非临床资料", "children": ["章节目录", "产品风险管理资料", "体外诊断试剂安全和性能基本原则清单", "产品技术要求及检验报告", "分析性能研究", "稳定性研究", "阳性判断值或参考区间研究", "其他资料"]},
+  {"code": "4", "title": "临床评价资料", "children": ["章节目录", "临床评价资料"]},
+  {"code": "5", "title": "产品说明书和标签样稿", "children": ["章节目录", "产品说明书", "标签样稿", "其他资料"]},
+  {"code": "6", "title": "质量管理体系文件", "children": ["综述", "章节目录", "生产制造信息", "质量管理体系程序", "管理职责程序", "资源管理程序", "产品实现程序", "质量管理体系的测量/分析和改进程序", "其他质量体系程序信息", "质量管理体系核查文件"]}
+]
--- a/tests/test_regulatory_completeness.py
+++ b/tests/test_regulatory_completeness.py
@@ -42,3 +42,30 @@ def test_completeness_check_matches_existing_files_and_reports_missing(django_us
    missing = next(finding for finding in findings if finding.rule_code == "registration_test_report")
    assert missing.severity == "blocking"
    assert missing.category == "completeness"
+
+
+def test_completeness_check_matches_attachment4_directory_names(django_user_model):
+    user = django_user_model.objects.create_user(username="owner", password="pass")
+    conversation = Conversation.objects.create(user=user, title="会话")
+    batch = FileSummaryBatch.objects.create(
+        conversation=conversation,
+        user=user,
+        batch_no="FS-A4",
+        status=FileSummaryBatch.Status.SUCCESS,
+    )
+    FileSummaryItem.objects.create(
+        batch=batch,
+        file_index=1,
+        directory_level="1. 监管信息 / 1.2 申请表",
+        file_name="注册申请表.pdf",
+        file_type="pdf",
+        relative_path="1.监管信息/1.2申请表/注册申请表.pdf",
+        storage_path="x/app.pdf",
+    )
+
+    findings = run_completeness_check(batch, load_rule_file())
+
+    assert not any(finding.rule_code == "attachment4_1_2_application_form" for finding in findings)
+    missing_qms = next(finding for finding in findings if finding.rule_code == "attachment4_6_quality_system")
+    assert missing_qms.severity == "high"
+    assert missing_qms.evidence["searched_fields"] == ["file_name", "relative_path", "directory_level"]
--- a/tests/test_regulatory_consistency.py
+++ b/tests/test_regulatory_consistency.py
@@ -12,3 +12,16 @@ def test_consistency_check_reports_product_name_mismatch():
    assert len(findings) == 1
    assert findings[0].category == "consistency"
    assert "产品名称" in findings[0].title
+
+
+def test_consistency_check_reports_registration_scope_fields():
+    document_texts = {
+        "申请表.docx": "管理类别：第二类\n分类编码：6840\n注册类型：首次注册\n临床评价路径：免临床",
+        "综述资料.docx": "管理类别：第三类\n分类编码：6840\n注册类型：首次注册\n临床评价路径：临床试验",
+    }
+
+    findings = run_consistency_check(document_texts)
+    titles = [finding.title for finding in findings]
+
+    assert "管理类别在不同文件中不一致" in titles
+    assert "临床评价路径在不同文件中不一致" in titles
--- a/tests/test_regulatory_rag.py
+++ b/tests/test_regulatory_rag.py
@@ -6,6 +6,7 @@ from review_agent.regulatory_review.services.rag_citation import (
 )
 from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
 from review_agent.regulatory_review.services.rag_index import chunk_text
+from review_agent.regulatory_review.services.rag_index import collect_source_chunks


 def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
@@ -70,3 +71,18 @@ def test_retrieve_citations_raises_when_index_missing(settings, tmp_path):

    with pytest.raises(RagIndexUnavailable):
        retrieve_citations("注册检验报告", embedding_provider=lambda texts: [[0.1]])
+
+
+def test_collect_source_chunks_requires_attachment4_extraction(monkeypatch, tmp_path):
+    source_dir = tmp_path / "sources"
+    source_dir.mkdir()
+    attachment4 = source_dir / "附件 4 体外诊断试剂注册申报资料要求及说明.doc"
+    attachment4.write_bytes(b"legacy-doc")
+
+    def fail_extract(path):
+        raise RuntimeError("无法通过 LibreOffice 转换法规 .doc 材料")
+
+    monkeypatch.setattr("review_agent.regulatory_review.services.rag_index.extract_text_from_path", fail_extract)
+
+    with pytest.raises(RuntimeError, match="附件 4"):
+        collect_source_chunks(source_dir)
--- a/tests/test_regulatory_rule_loader.py
+++ b/tests/test_regulatory_rule_loader.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+import json

 import pytest
 from django.core.management import call_command
@@ -27,6 +28,30 @@ def test_load_rule_file_reads_demo_requirements():
    assert "essential_principles_checklist" in codes


+def test_load_rule_file_covers_attachment4_outline():
+    rule_set = load_rule_file()
+    requirements = rule_set["requirements"]
+    outline = json.loads(Path("tests/fixtures/regulatory/attachment4_outline.json").read_text(encoding="utf-8"))
+
+    for chapter in outline:
+        chapter_rule = next(
+            item for item in requirements if item["title"] == chapter["title"] and item.get("attachment4_code") == chapter["code"]
+        )
+        assert chapter_rule["attachment4_code"] == chapter["code"]
+        assert chapter_rule["severity"] == "high"
+        assert chapter_rule["citation_query"]
+        for child in chapter["children"]:
+            child_rule = next(
+                item
+                for item in requirements
+                if item["title"] == child and str(item.get("attachment4_code", "")).startswith(f"{chapter['code']}.")
+            )
+            assert child_rule["rule_id"]
+            assert child_rule["file_keywords"]
+            assert child_rule["severity"] in {"blocking", "high", "medium"}
+            assert child_rule["citation_query"]
+
+
 def test_compute_file_sha256_changes_when_file_changes(tmp_path):
    path = tmp_path / "rule.yaml"
    path.write_text("code: demo\n", encoding="utf-8")
--- a/tests/test_regulatory_structure.py
+++ b/tests/test_regulatory_structure.py
@@ -11,3 +11,15 @@ def test_structure_check_reports_missing_instruction_sections():

    assert any(finding.rule_code == "instructions_for_use:储存条件" for finding in findings)
    assert all("样本要求" not in finding.title for finding in findings)
+
+
+def test_structure_check_reports_missing_attachment4_outline_heading():
+    document_texts = {
+        "申报资料目录.txt": "1. 监管信息\n1.2 申请表\n2. 综述资料\n3. 非临床资料\n"
+    }
+
+    findings = run_structure_check(document_texts, load_rule_file())
+
+    missing = next(finding for finding in findings if finding.rule_code == "attachment4_4_clinical_evaluation")
+    assert missing.category == "structure"
+    assert missing.evidence["expected_title"] == "临床评价资料"
--- a/tests/test_regulatory_workflow.py
+++ b/tests/test_regulatory_workflow.py
@@ -7,6 +7,7 @@ from review_agent.models import (
    FileSummaryItem,
    Message,
    RegulatoryIssue,
+    RegulatoryArtifact,
    RegulatoryReviewBatch,
    WorkflowEvent,
    WorkflowNodeRun,
@@ -201,4 +202,6 @@ def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_p
        workflow_type="regulatory_review",
        workflow_batch_id=batch.pk,
    ).count() == 3
+    assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
+    assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
    assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()