feat(regulatory): 对齐附件4目录核查规则
This commit is contained in:
@@ -1,58 +1,503 @@
|
||||
code: nmpa_ivd_registration_v1
|
||||
name: NMPA IVD 注册资料 Demo 规则
|
||||
name: NMPA IVD 注册资料附件 4 对齐规则
|
||||
rag_collection: nmpa_ivd_registration_v1
|
||||
source_material_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||
source_material_dir: docs/0.原始材料
|
||||
attachment4_required_codes:
|
||||
- "1"
|
||||
- "1.1"
|
||||
- "1.2"
|
||||
- "1.3"
|
||||
- "1.4"
|
||||
- "1.5"
|
||||
- "1.6"
|
||||
- "1.7"
|
||||
- "2"
|
||||
- "2.1"
|
||||
- "2.2"
|
||||
- "2.3"
|
||||
- "2.4"
|
||||
- "2.5"
|
||||
- "2.6"
|
||||
- "3"
|
||||
- "3.1"
|
||||
- "3.2"
|
||||
- "3.3"
|
||||
- "3.4"
|
||||
- "3.5"
|
||||
- "3.6"
|
||||
- "3.7"
|
||||
- "3.8"
|
||||
- "4"
|
||||
- "4.1"
|
||||
- "4.2"
|
||||
- "5"
|
||||
- "5.1"
|
||||
- "5.2"
|
||||
- "5.3"
|
||||
- "5.4"
|
||||
- "6"
|
||||
- "6.1"
|
||||
- "6.2"
|
||||
- "6.3"
|
||||
- "6.4"
|
||||
- "6.5"
|
||||
- "6.6"
|
||||
- "6.7"
|
||||
- "6.8"
|
||||
- "6.9"
|
||||
- "6.10"
|
||||
requirements:
|
||||
- code: product_technical_requirements
|
||||
title: 产品技术要求
|
||||
- code: attachment4_1_regulatory_info
|
||||
rule_id: A4-1
|
||||
attachment4_code: "1"
|
||||
title: 监管信息
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [监管信息]
|
||||
aliases: [监管资料]
|
||||
suggestion: 请补充监管信息章节及其目录项。
|
||||
citation_query: 附件4 监管信息 体外诊断试剂 注册申报资料
|
||||
structure_required: true
|
||||
- code: attachment4_1_1_toc
|
||||
rule_id: A4-1.1
|
||||
attachment4_code: "1.1"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [章节目录, 目录]
|
||||
aliases: [监管信息目录]
|
||||
suggestion: 请补充监管信息章节目录。
|
||||
citation_query: 附件4 监管信息 章节目录
|
||||
- code: attachment4_1_2_application_form
|
||||
rule_id: A4-1.2
|
||||
attachment4_code: "1.2"
|
||||
title: 申请表
|
||||
type: required
|
||||
severity: blocking
|
||||
category: completeness
|
||||
file_keywords:
|
||||
- 产品技术要求
|
||||
suggestion: 请补充产品技术要求并确认版本与注册申请资料一致。
|
||||
citation_query: 体外诊断试剂 产品技术要求 注册申报资料
|
||||
- code: instructions_for_use
|
||||
title: 说明书
|
||||
file_keywords: [申请表, 注册申请表]
|
||||
aliases: [医疗器械注册申请表]
|
||||
suggestion: 请补充注册申请表并核对注册类型、管理类别和分类编码。
|
||||
citation_query: 附件4 监管信息 申请表
|
||||
- code: attachment4_1_3_terms
|
||||
rule_id: A4-1.3
|
||||
attachment4_code: "1.3"
|
||||
title: 术语/缩写词列表
|
||||
type: recommended
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [术语, 缩写词, 缩略语]
|
||||
suggestion: 请补充术语和缩写词列表。
|
||||
citation_query: 附件4 术语 缩写词列表
|
||||
- code: attachment4_1_4_product_list
|
||||
rule_id: A4-1.4
|
||||
attachment4_code: "1.4"
|
||||
title: 产品列表
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords:
|
||||
- 说明书
|
||||
- 使用说明
|
||||
required_sections:
|
||||
- 储存条件
|
||||
- 有效期
|
||||
- 样本要求
|
||||
suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
|
||||
citation_query: 体外诊断试剂 说明书 储存条件 有效期 样本要求
|
||||
file_keywords: [产品列表, 产品清单]
|
||||
suggestion: 请补充申报产品列表。
|
||||
citation_query: 附件4 产品列表
|
||||
- code: attachment4_1_5_related_files
|
||||
rule_id: A4-1.5
|
||||
attachment4_code: "1.5"
|
||||
title: 关联文件
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [关联文件, 关联注册, 引用文件]
|
||||
suggestion: 如存在关联注册或引用资料,请补充关联文件说明。
|
||||
citation_query: 附件4 关联文件
|
||||
- code: attachment4_1_6_pre_submission
|
||||
rule_id: A4-1.6
|
||||
attachment4_code: "1.6"
|
||||
title: 申报前与监管机构的联系情况和沟通记录
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [沟通记录, 监管机构, 申报前]
|
||||
suggestion: 如有申报前沟通,请补充沟通记录;如无,请说明不适用。
|
||||
citation_query: 附件4 申报前 监管机构 沟通记录
|
||||
- code: attachment4_1_7_declaration
|
||||
rule_id: A4-1.7
|
||||
attachment4_code: "1.7"
|
||||
title: 符合性声明
|
||||
type: required
|
||||
severity: blocking
|
||||
category: completeness
|
||||
file_keywords: [符合性声明, 声明]
|
||||
suggestion: 请补充符合性声明。
|
||||
citation_query: 附件4 符合性声明
|
||||
- code: attachment4_2_summary
|
||||
rule_id: A4-2
|
||||
attachment4_code: "2"
|
||||
title: 综述资料
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [综述资料]
|
||||
suggestion: 请补充综述资料章节。
|
||||
citation_query: 附件4 综述资料
|
||||
structure_required: true
|
||||
- code: attachment4_2_1_toc
|
||||
rule_id: A4-2.1
|
||||
attachment4_code: "2.1"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [章节目录, 综述资料目录]
|
||||
suggestion: 请补充综述资料章节目录。
|
||||
citation_query: 附件4 综述资料 章节目录
|
||||
- code: attachment4_2_2_overview
|
||||
rule_id: A4-2.2
|
||||
attachment4_code: "2.2"
|
||||
title: 概述
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [概述]
|
||||
suggestion: 请补充产品概述。
|
||||
citation_query: 附件4 概述
|
||||
- code: attachment4_2_3_product_description
|
||||
rule_id: A4-2.3
|
||||
attachment4_code: "2.3"
|
||||
title: 产品描述
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [产品描述]
|
||||
suggestion: 请补充产品描述。
|
||||
citation_query: 附件4 产品描述
|
||||
- code: attachment4_2_4_intended_use
|
||||
rule_id: A4-2.4
|
||||
attachment4_code: "2.4"
|
||||
title: 预期用途
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [预期用途]
|
||||
suggestion: 请补充预期用途资料。
|
||||
citation_query: 附件4 预期用途
|
||||
- code: attachment4_2_5_marketing_history
|
||||
rule_id: A4-2.5
|
||||
attachment4_code: "2.5"
|
||||
title: 申报产品上市历史
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [上市历史]
|
||||
suggestion: 如产品已有上市历史,请补充相关说明;如无,请说明不适用。
|
||||
citation_query: 附件4 上市历史
|
||||
- code: attachment4_2_6_other_summary
|
||||
rule_id: A4-2.6
|
||||
attachment4_code: "2.6"
|
||||
title: 其他需说明的内容
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [其他需说明, 其他说明]
|
||||
suggestion: 请补充其他需说明内容或不适用说明。
|
||||
citation_query: 附件4 其他需说明
|
||||
- code: attachment4_3_nonclinical
|
||||
rule_id: A4-3
|
||||
attachment4_code: "3"
|
||||
title: 非临床资料
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [非临床资料]
|
||||
suggestion: 请补充非临床资料章节。
|
||||
citation_query: 附件4 非临床资料
|
||||
structure_required: true
|
||||
- code: attachment4_3_1_toc
|
||||
rule_id: A4-3.1
|
||||
attachment4_code: "3.1"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [章节目录, 非临床资料目录]
|
||||
suggestion: 请补充非临床资料章节目录。
|
||||
citation_query: 附件4 非临床资料 章节目录
|
||||
- code: attachment4_3_2_risk_management
|
||||
rule_id: A4-3.2
|
||||
attachment4_code: "3.2"
|
||||
title: 产品风险管理资料
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [产品风险管理, 风险管理资料]
|
||||
suggestion: 请补充产品风险管理资料。
|
||||
citation_query: 附件4 产品风险管理资料
|
||||
- code: essential_principles_checklist
|
||||
rule_id: A4-3.3
|
||||
attachment4_code: "3.3"
|
||||
title: 体外诊断试剂安全和性能基本原则清单
|
||||
type: recommended
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [安全和性能基本原则, 基本原则清单]
|
||||
aliases: [安全和性能基本原则清单]
|
||||
suggestion: 建议补充安全和性能基本原则清单,便于审评追溯。
|
||||
citation_query: 附件4 安全和性能基本原则清单
|
||||
- code: product_technical_requirements
|
||||
rule_id: A4-3.4
|
||||
attachment4_code: "3.4"
|
||||
title: 产品技术要求及检验报告
|
||||
type: required
|
||||
severity: blocking
|
||||
category: completeness
|
||||
file_keywords: [产品技术要求, 注册检验报告, 检验报告]
|
||||
aliases: [产品技术要求, 注册检验报告]
|
||||
required_sections: [产品技术要求, 检验报告]
|
||||
suggestion: 请补充产品技术要求及注册检验报告,并确认二者覆盖型号一致。
|
||||
citation_query: 附件4 产品技术要求 检验报告
|
||||
- code: registration_test_report
|
||||
rule_id: A4-3.4-R
|
||||
attachment4_code: "3.4"
|
||||
title: 注册检验报告
|
||||
type: required
|
||||
severity: blocking
|
||||
category: completeness
|
||||
file_keywords:
|
||||
- 注册检验报告
|
||||
- 检验报告
|
||||
file_keywords: [注册检验报告, 检验报告]
|
||||
suggestion: 请补充注册检验报告并复核报告覆盖的产品型号。
|
||||
citation_query: 体外诊断试剂 注册检验报告 注册申报资料
|
||||
citation_query: 附件4 注册检验报告
|
||||
- code: attachment4_3_5_analytical_performance
|
||||
rule_id: A4-3.5
|
||||
attachment4_code: "3.5"
|
||||
title: 分析性能研究
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [分析性能研究, 分析性能]
|
||||
suggestion: 请补充分析性能研究资料。
|
||||
citation_query: 附件4 分析性能研究
|
||||
- code: attachment4_3_6_stability
|
||||
rule_id: A4-3.6
|
||||
attachment4_code: "3.6"
|
||||
title: 稳定性研究
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [稳定性研究, 稳定性]
|
||||
suggestion: 请补充稳定性研究资料。
|
||||
citation_query: 附件4 稳定性研究
|
||||
- code: attachment4_3_7_reference_interval
|
||||
rule_id: A4-3.7
|
||||
attachment4_code: "3.7"
|
||||
title: 阳性判断值或参考区间研究
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [阳性判断值, 参考区间]
|
||||
suggestion: 请补充阳性判断值或参考区间研究资料。
|
||||
citation_query: 附件4 阳性判断值 参考区间
|
||||
- code: attachment4_3_8_other_nonclinical
|
||||
rule_id: A4-3.8
|
||||
attachment4_code: "3.8"
|
||||
title: 其他资料
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [其他资料]
|
||||
suggestion: 请补充非临床其他资料或不适用说明。
|
||||
citation_query: 附件4 非临床 其他资料
|
||||
- code: attachment4_4_clinical_evaluation
|
||||
rule_id: A4-4
|
||||
attachment4_code: "4"
|
||||
title: 临床评价资料
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [临床评价资料, 临床资料]
|
||||
suggestion: 请补充临床评价资料章节。
|
||||
citation_query: 附件4 临床评价资料
|
||||
structure_required: true
|
||||
- code: attachment4_4_1_toc
|
||||
rule_id: A4-4.1
|
||||
attachment4_code: "4.1"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [章节目录, 临床评价资料目录]
|
||||
suggestion: 请补充临床评价资料章节目录。
|
||||
citation_query: 附件4 临床评价资料 章节目录
|
||||
- code: clinical_evaluation
|
||||
rule_id: A4-4.2
|
||||
attachment4_code: "4.2"
|
||||
title: 临床评价资料
|
||||
type: conditional
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords:
|
||||
- 临床评价
|
||||
- 临床试验
|
||||
file_keywords: [临床评价, 临床试验, 免临床, 同品种比对]
|
||||
suggestion: 请根据适用情形补充临床评价资料或说明豁免依据。
|
||||
citation_query: 体外诊断试剂 临床评价资料 注册申报
|
||||
- code: essential_principles_checklist
|
||||
title: 安全和性能基本原则清单
|
||||
type: recommended
|
||||
citation_query: 附件4 临床评价资料 注册申报
|
||||
- code: attachment4_5_ifu_label
|
||||
rule_id: A4-5
|
||||
attachment4_code: "5"
|
||||
title: 产品说明书和标签样稿
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [产品说明书和标签样稿, 说明书, 标签样稿]
|
||||
suggestion: 请补充产品说明书和标签样稿章节。
|
||||
citation_query: 附件4 产品说明书 标签样稿
|
||||
structure_required: true
|
||||
- code: attachment4_5_1_toc
|
||||
rule_id: A4-5.1
|
||||
attachment4_code: "5.1"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords:
|
||||
- 安全和性能基本原则
|
||||
- 基本原则清单
|
||||
suggestion: 建议补充安全和性能基本原则清单,便于审评追溯。
|
||||
citation_query: 体外诊断试剂 安全和性能基本原则清单
|
||||
file_keywords: [章节目录, 说明书目录, 标签目录]
|
||||
suggestion: 请补充产品说明书和标签样稿章节目录。
|
||||
citation_query: 附件4 说明书 标签 章节目录
|
||||
- code: instructions_for_use
|
||||
rule_id: A4-5.2
|
||||
attachment4_code: "5.2"
|
||||
title: 产品说明书
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [说明书, 产品说明书, 使用说明]
|
||||
aliases: [说明书]
|
||||
required_sections: [储存条件, 有效期, 样本要求]
|
||||
suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
|
||||
citation_query: 附件4 产品说明书 储存条件 有效期 样本要求
|
||||
- code: attachment4_5_3_label
|
||||
rule_id: A4-5.3
|
||||
attachment4_code: "5.3"
|
||||
title: 标签样稿
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [标签样稿, 标签]
|
||||
suggestion: 请补充标签样稿。
|
||||
citation_query: 附件4 标签样稿
|
||||
- code: attachment4_5_4_other_ifu
|
||||
rule_id: A4-5.4
|
||||
attachment4_code: "5.4"
|
||||
title: 其他资料
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [其他资料]
|
||||
suggestion: 请补充说明书和标签相关其他资料或不适用说明。
|
||||
citation_query: 附件4 说明书 标签 其他资料
|
||||
- code: attachment4_6_quality_system
|
||||
rule_id: A4-6
|
||||
attachment4_code: "6"
|
||||
title: 质量管理体系文件
|
||||
type: chapter
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [质量管理体系文件, 质量体系, 质量管理体系]
|
||||
suggestion: 请补充质量管理体系文件章节。
|
||||
citation_query: 附件4 质量管理体系文件
|
||||
structure_required: true
|
||||
- code: attachment4_6_1_overview
|
||||
rule_id: A4-6.1
|
||||
attachment4_code: "6.1"
|
||||
title: 综述
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [综述]
|
||||
suggestion: 请补充质量管理体系综述。
|
||||
citation_query: 附件4 质量管理体系 综述
|
||||
- code: attachment4_6_2_toc
|
||||
rule_id: A4-6.2
|
||||
attachment4_code: "6.2"
|
||||
title: 章节目录
|
||||
type: directory
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [章节目录, 质量管理体系目录]
|
||||
suggestion: 请补充质量管理体系文件章节目录。
|
||||
citation_query: 附件4 质量管理体系 章节目录
|
||||
- code: attachment4_6_3_manufacturing
|
||||
rule_id: A4-6.3
|
||||
attachment4_code: "6.3"
|
||||
title: 生产制造信息
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [生产制造信息, 生产制造]
|
||||
suggestion: 请补充生产制造信息。
|
||||
citation_query: 附件4 生产制造信息
|
||||
- code: attachment4_6_4_qms_procedure
|
||||
rule_id: A4-6.4
|
||||
attachment4_code: "6.4"
|
||||
title: 质量管理体系程序
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [质量管理体系程序, 质量体系程序]
|
||||
suggestion: 请补充质量管理体系程序。
|
||||
citation_query: 附件4 质量管理体系程序
|
||||
- code: attachment4_6_5_management
|
||||
rule_id: A4-6.5
|
||||
attachment4_code: "6.5"
|
||||
title: 管理职责程序
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [管理职责程序, 管理职责]
|
||||
suggestion: 请补充管理职责程序。
|
||||
citation_query: 附件4 管理职责程序
|
||||
- code: attachment4_6_6_resource
|
||||
rule_id: A4-6.6
|
||||
attachment4_code: "6.6"
|
||||
title: 资源管理程序
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [资源管理程序, 资源管理]
|
||||
suggestion: 请补充资源管理程序。
|
||||
citation_query: 附件4 资源管理程序
|
||||
- code: attachment4_6_7_realization
|
||||
rule_id: A4-6.7
|
||||
attachment4_code: "6.7"
|
||||
title: 产品实现程序
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [产品实现程序, 产品实现]
|
||||
suggestion: 请补充产品实现程序。
|
||||
citation_query: 附件4 产品实现程序
|
||||
- code: attachment4_6_8_measurement
|
||||
rule_id: A4-6.8
|
||||
attachment4_code: "6.8"
|
||||
title: 质量管理体系的测量/分析和改进程序
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [测量, 分析和改进, 改进程序]
|
||||
suggestion: 请补充质量管理体系测量、分析和改进程序。
|
||||
citation_query: 附件4 测量 分析 改进程序
|
||||
- code: attachment4_6_9_other_qms
|
||||
rule_id: A4-6.9
|
||||
attachment4_code: "6.9"
|
||||
title: 其他质量体系程序信息
|
||||
type: conditional
|
||||
severity: medium
|
||||
category: completeness
|
||||
file_keywords: [其他质量体系程序, 其他质量体系]
|
||||
suggestion: 请补充其他质量体系程序信息或不适用说明。
|
||||
citation_query: 附件4 其他质量体系程序信息
|
||||
- code: attachment4_6_10_qms_audit
|
||||
rule_id: A4-6.10
|
||||
attachment4_code: "6.10"
|
||||
title: 质量管理体系核查文件
|
||||
type: required
|
||||
severity: high
|
||||
category: completeness
|
||||
file_keywords: [质量管理体系核查文件, 体系核查文件, 核查文件]
|
||||
suggestion: 请补充质量管理体系核查文件。
|
||||
citation_query: 附件4 质量管理体系核查文件
|
||||
|
||||
@@ -8,12 +8,17 @@ def run_completeness_check(batch: FileSummaryBatch, rule_set: dict) -> list[Find
|
||||
items = list(batch.items.order_by("file_index"))
|
||||
findings: list[Finding] = []
|
||||
for requirement in rule_set.get("requirements", []):
|
||||
if requirement.get("type") not in {"required", "conditional", "recommended"}:
|
||||
if requirement.get("type") not in {"required", "conditional", "recommended", "chapter", "directory"}:
|
||||
continue
|
||||
matched = [
|
||||
item
|
||||
for item in items
|
||||
if _matches_item(item.file_name, item.relative_path, requirement.get("file_keywords", []))
|
||||
if _matches_item(
|
||||
item.file_name,
|
||||
item.relative_path,
|
||||
item.directory_level,
|
||||
[*requirement.get("file_keywords", []), *requirement.get("aliases", [])],
|
||||
)
|
||||
]
|
||||
if matched:
|
||||
continue
|
||||
@@ -29,12 +34,13 @@ def run_completeness_check(batch: FileSummaryBatch, rule_set: dict) -> list[Find
|
||||
"requirement_type": requirement.get("type"),
|
||||
"matched_files": [],
|
||||
"searched_keywords": requirement.get("file_keywords", []),
|
||||
"searched_fields": ["file_name", "relative_path", "directory_level"],
|
||||
},
|
||||
)
|
||||
)
|
||||
return findings
|
||||
|
||||
|
||||
def _matches_item(file_name: str, relative_path: str, keywords: list[str]) -> bool:
|
||||
haystack = f"{file_name} {relative_path}".lower()
|
||||
def _matches_item(file_name: str, relative_path: str, directory_level: str, keywords: list[str]) -> bool:
|
||||
haystack = f"{file_name} {relative_path} {directory_level}".lower()
|
||||
return any(str(keyword).lower() in haystack for keyword in keywords)
|
||||
|
||||
@@ -10,6 +10,10 @@ FIELDS = {
|
||||
"产品名称": r"产品名称[::]\s*([^\n\r]+)",
|
||||
"型号规格": r"型号规格[::]\s*([^\n\r]+)",
|
||||
"预期用途": r"预期用途[::]\s*([^\n\r]+)",
|
||||
"管理类别": r"管理类别[::]\s*([^\n\r]+)",
|
||||
"分类编码": r"分类编码[::]\s*([^\n\r]+)",
|
||||
"注册类型": r"注册类型[::]\s*([^\n\r]+)",
|
||||
"临床评价路径": r"临床评价路径[::]\s*([^\n\r]+)",
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -107,12 +107,19 @@ def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
||||
try:
|
||||
text = extract_text_from_path(path)
|
||||
except RuntimeError as exc:
|
||||
if _is_attachment4(path):
|
||||
raise RuntimeError(f"附件 4 核心法规材料抽取失败:{path.name}") from exc
|
||||
logger.warning("Regulatory source extraction skipped", extra={"path": str(path), "error": str(exc)})
|
||||
continue
|
||||
chunks.extend(chunk_text(text, source=str(path.relative_to(source_dir))))
|
||||
return chunks
|
||||
|
||||
|
||||
def _is_attachment4(path: Path) -> bool:
|
||||
normalized = path.name.replace(" ", "")
|
||||
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
|
||||
|
||||
|
||||
def build_chroma_index(
|
||||
*,
|
||||
source_dir: Path,
|
||||
|
||||
@@ -47,9 +47,30 @@ def load_rule_file(path: str | Path | None = None) -> dict:
|
||||
raise ValueError(f"规则 code 必须为 {DEFAULT_RULE_CODE}")
|
||||
if not isinstance(payload.get("requirements"), list) or not payload["requirements"]:
|
||||
raise ValueError("规则文件必须包含 requirements 列表。")
|
||||
_validate_attachment4_requirements(payload)
|
||||
return payload
|
||||
|
||||
|
||||
def _validate_attachment4_requirements(payload: dict) -> None:
|
||||
requirements = payload.get("requirements") or []
|
||||
required_codes = {str(code) for code in payload.get("attachment4_required_codes") or []}
|
||||
by_attachment4_code: dict[str, list[dict]] = {}
|
||||
for requirement in requirements:
|
||||
attachment4_code = requirement.get("attachment4_code")
|
||||
if attachment4_code:
|
||||
by_attachment4_code.setdefault(str(attachment4_code), []).append(requirement)
|
||||
for field in ["code", "rule_id", "title", "severity", "file_keywords", "citation_query"]:
|
||||
if attachment4_code and not requirement.get(field):
|
||||
raise ValueError(f"附件4规则 {attachment4_code} 缺少 {field}")
|
||||
missing = sorted(required_codes - set(by_attachment4_code), key=_attachment4_sort_key)
|
||||
if missing:
|
||||
raise ValueError(f"附件4目录项缺少规则:{', '.join(missing)}")
|
||||
|
||||
|
||||
def _attachment4_sort_key(value: str) -> tuple[int, ...]:
|
||||
return tuple(int(part) for part in value.split(".") if part.isdigit())
|
||||
|
||||
|
||||
def check_rule_version(
|
||||
*,
|
||||
path: str | Path | None = None,
|
||||
|
||||
@@ -5,7 +5,27 @@ from review_agent.regulatory_review.schemas import Finding
|
||||
|
||||
def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[Finding]:
|
||||
findings: list[Finding] = []
|
||||
combined_all_text = "\n".join(document_texts.values())
|
||||
for requirement in rule_set.get("requirements", []):
|
||||
if requirement.get("structure_required") and not _contains_any(
|
||||
combined_all_text,
|
||||
[requirement.get("title", ""), *requirement.get("aliases", [])],
|
||||
):
|
||||
findings.append(
|
||||
Finding(
|
||||
rule_code=requirement["code"],
|
||||
category="structure",
|
||||
severity=requirement.get("severity", "medium"),
|
||||
title=f"申报资料目录缺少{requirement['title']}章节",
|
||||
detail=f"未在申报资料目录或章节标题候选中发现{requirement['title']}。",
|
||||
suggestion=requirement.get("suggestion", ""),
|
||||
evidence={
|
||||
"attachment4_code": requirement.get("attachment4_code"),
|
||||
"expected_title": requirement["title"],
|
||||
"aliases": requirement.get("aliases", []),
|
||||
},
|
||||
)
|
||||
)
|
||||
required_sections = requirement.get("required_sections") or []
|
||||
if not required_sections:
|
||||
continue
|
||||
@@ -14,7 +34,7 @@ def run_structure_check(document_texts: dict[str, str], rule_set: dict) -> list[
|
||||
continue
|
||||
combined_text = "\n".join(matching_docs.values())
|
||||
for section in required_sections:
|
||||
if section in combined_text:
|
||||
if _contains_any(combined_text, [section]):
|
||||
continue
|
||||
findings.append(
|
||||
Finding(
|
||||
@@ -39,3 +59,12 @@ def _matching_documents(document_texts: dict[str, str], keywords: list[str]) ->
|
||||
if any(str(keyword).lower() in haystack for keyword in keywords):
|
||||
result[name] = text
|
||||
return result
|
||||
|
||||
|
||||
def _contains_any(text: str, needles: list[str]) -> bool:
|
||||
normalized = _normalize_title(text)
|
||||
return any(_normalize_title(needle) in normalized for needle in needles if needle)
|
||||
|
||||
|
||||
def _normalize_title(value: str) -> str:
|
||||
return "".join(str(value).lower().replace("/", "").replace("/", "").split())
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
@@ -14,6 +15,9 @@ class ExtractedText:
|
||||
status: str
|
||||
content_hash: str = ""
|
||||
error_message: str = ""
|
||||
front_text: str = ""
|
||||
section_candidates: list[str] | None = None
|
||||
field_candidates: dict[str, str] | None = None
|
||||
|
||||
|
||||
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".pptx", ".xlsx", ".doc"}
|
||||
@@ -26,6 +30,47 @@ def extract_text(path: str | Path) -> ExtractedText:
|
||||
try:
|
||||
text = extract_text_from_path(file_path)
|
||||
except Exception as exc:
|
||||
return ExtractedText(path=file_path, text="", status="failed", error_message=str(exc))
|
||||
return ExtractedText(
|
||||
path=file_path,
|
||||
text="",
|
||||
status="failed",
|
||||
error_message=str(exc),
|
||||
section_candidates=[],
|
||||
field_candidates={},
|
||||
)
|
||||
content_hash = hashlib.sha256(text.encode("utf-8")).hexdigest() if text else ""
|
||||
return ExtractedText(path=file_path, text=text, status="success", content_hash=content_hash)
|
||||
return ExtractedText(
|
||||
path=file_path,
|
||||
text=text,
|
||||
status="success",
|
||||
content_hash=content_hash,
|
||||
front_text=_front_text(text),
|
||||
section_candidates=_section_candidates(text),
|
||||
field_candidates=_field_candidates(text),
|
||||
)
|
||||
|
||||
|
||||
def _front_text(text: str, limit: int = 1200) -> str:
|
||||
return text[:limit]
|
||||
|
||||
|
||||
def _section_candidates(text: str) -> list[str]:
|
||||
candidates = []
|
||||
for line in text.splitlines():
|
||||
normalized = line.strip()
|
||||
if not normalized:
|
||||
continue
|
||||
if re.match(r"^([一二三四五六七八九十]+[、..]|[0-9]+(\.[0-9]+)*[、..\s])", normalized):
|
||||
candidates.append(normalized[:120])
|
||||
elif any(keyword in normalized for keyword in ["章节目录", "监管信息", "综述资料", "非临床资料", "临床评价资料", "质量管理体系"]):
|
||||
candidates.append(normalized[:120])
|
||||
return candidates[:80]
|
||||
|
||||
|
||||
def _field_candidates(text: str) -> dict[str, str]:
|
||||
fields = {}
|
||||
for label in ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]:
|
||||
match = re.search(rf"{label}[::]\s*([^\n\r]+)", text)
|
||||
if match:
|
||||
fields[label] = " ".join(match.group(1).strip().split())
|
||||
return fields
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
@@ -26,6 +27,7 @@ from review_agent.regulatory_review.services.structure_check import run_structur
|
||||
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||
|
||||
from .events import record_event
|
||||
from .storage import save_artifact
|
||||
|
||||
|
||||
NODE_DEFINITIONS = [
|
||||
@@ -105,6 +107,7 @@ class RegulatoryWorkflowExecutor:
|
||||
self.rule_set: dict | None = None
|
||||
self.findings = []
|
||||
self.document_texts: dict[str, str] = {}
|
||||
self.text_extract_status: dict[str, dict[str, object]] = {}
|
||||
|
||||
def run(self) -> None:
|
||||
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
|
||||
@@ -176,6 +179,13 @@ class RegulatoryWorkflowExecutor:
|
||||
return
|
||||
if node_code == "text_extract":
|
||||
self.document_texts = self._extract_source_texts()
|
||||
save_artifact(
|
||||
self.batch,
|
||||
name="text_extract_status.json",
|
||||
artifact_type="json",
|
||||
content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
|
||||
metadata={"artifact": "text_extract_status"},
|
||||
)
|
||||
return
|
||||
if node_code == "structure_check":
|
||||
self.findings.extend(run_structure_check(self.document_texts, self._rules()))
|
||||
@@ -184,7 +194,29 @@ class RegulatoryWorkflowExecutor:
|
||||
self.findings.extend(run_consistency_check(self.document_texts))
|
||||
return
|
||||
if node_code == "risk_assess":
|
||||
persist_findings(self.batch, self.findings)
|
||||
issues = persist_findings(self.batch, self.findings)
|
||||
save_artifact(
|
||||
self.batch,
|
||||
name="rag_result_json.json",
|
||||
artifact_type="json",
|
||||
content=json.dumps(
|
||||
{
|
||||
"batch_no": self.batch.batch_no,
|
||||
"text_extract_status": self.text_extract_status,
|
||||
"issues": [
|
||||
{
|
||||
"rule_code": issue.rule_code,
|
||||
"title": issue.title,
|
||||
"citations": issue.citations,
|
||||
}
|
||||
for issue in issues
|
||||
],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
metadata={"artifact": "rag_result_json"},
|
||||
)
|
||||
return
|
||||
if node_code == "report_export":
|
||||
exports = export_review_results(self.batch)
|
||||
@@ -234,8 +266,25 @@ class RegulatoryWorkflowExecutor:
|
||||
if not path.is_absolute():
|
||||
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||
if not path.exists():
|
||||
self.text_extract_status[item.file_name] = {
|
||||
"status": "missing",
|
||||
"path": str(path),
|
||||
"content_hash": "",
|
||||
"section_candidates": [],
|
||||
"field_candidates": {},
|
||||
"front_text": "",
|
||||
}
|
||||
continue
|
||||
result = extract_text(path)
|
||||
self.text_extract_status[item.file_name] = {
|
||||
"status": result.status,
|
||||
"path": str(path),
|
||||
"content_hash": result.content_hash,
|
||||
"section_candidates": result.section_candidates,
|
||||
"field_candidates": result.field_candidates,
|
||||
"front_text": result.front_text,
|
||||
"error_message": result.error_message,
|
||||
}
|
||||
if result.status == "success" and result.text:
|
||||
texts[item.file_name] = result.text
|
||||
return texts
|
||||
|
||||
Reference in New Issue
Block a user