Compare commits

...

29 Commits

Author SHA1 Message Date
56225f40d9 docs(application-form-fill): 完善申报文件自动填表设计文档 2026-06-07 17:05:08 +08:00
3e8720e521 feat(regulatory): 按实际处理数量更新节点进度 2026-06-07 13:32:06 +08:00
32d258bb75 feat(regulatory): 输出法规核查过程日志 2026-06-07 13:23:55 +08:00
0f9fb980f2 fix(regulatory): 为LLM复核超时增加重试 2026-06-07 13:03:24 +08:00
9e27c4c684 fix(regulatory): 修复无标签文档适用条件回显 2026-06-07 12:29:22 +08:00
1b4a10b5ba fix(regulatory): 自动执行法规核查前置汇总 2026-06-07 12:17:20 +08:00
911e5378e8 fix(regulatory): 修复条件确认实时轮询和重复提示 2026-06-07 12:09:02 +08:00
8f16675a92 feat(regulatory): 为核查流程增加LLM复核记录 2026-06-07 11:52:54 +08:00
945669b9c2 feat(regulatory): 增加条件字段LLM复核 2026-06-07 11:46:55 +08:00
a34684e490 fix(regulatory): 修复换行产品名称提取不全 2026-06-07 11:30:48 +08:00
72f18167c5 fix(regulatory): 轮询时加载条件确认卡 2026-06-07 11:27:12 +08:00
b8d711729d feat(regulatory): 支持按附件4章节核查 2026-06-07 11:17:57 +08:00
f46d9c5be6 fix(regulatory): 缺失问题标题显示章节序号 2026-06-07 11:12:19 +08:00
462d3ec5f5 fix(regulatory): 优先从附件字段识别适用条件 2026-06-07 10:40:05 +08:00
12b476a8ef fix(regulatory): 将条件确认移入对话区 2026-06-07 10:37:12 +08:00
48d94884b9 docs(regulatory): 补充第二批附件4开发计划 2026-06-07 10:31:39 +08:00
4e46f27c28 feat(regulatory): 完善复核前端入口 2026-06-07 09:40:18 +08:00
d39e3fe2d5 feat(regulatory): 增加mock通知留痕 2026-06-07 09:35:24 +08:00
d88d642f6a feat(regulatory): 增加整改复核闭环 2026-06-07 09:32:39 +08:00
1bdc7322cf feat(regulatory): 对齐附件4目录核查规则 2026-06-07 09:27:42 +08:00
bbd2d3532a feat(regulatory): 增加适用条件确认暂停恢复 2026-06-07 09:19:31 +08:00
bd805203f1 feat(regulatory): 展示法规核查工作流卡片 2026-06-07 00:43:18 +08:00
4c28466fe4 feat(regulatory): 增加风险归并与核查报告导出 2026-06-07 00:39:33 +08:00
ec89e62661 feat(regulatory): 增加法规核查基础服务 2026-06-07 00:36:18 +08:00
44d31d2a14 feat(regulatory): 接入法规核查触发与工作流骨架 2026-06-07 00:34:12 +08:00
26490f7c46 feat(regulatory): 增加本地法规RAG索引检索 2026-06-07 00:30:53 +08:00
2a4dd6cfab feat(regulatory): 增加法规规则版本检查 2026-06-07 00:26:19 +08:00
f52dcc197d feat(regulatory): 新增法规核查模型与工作流通用字段 2026-06-07 00:23:58 +08:00
665403735a chore: 确认法规核查基线回归通过 2026-06-07 00:20:16 +08:00
73 changed files with 9726 additions and 59 deletions

View File

@@ -105,13 +105,40 @@ LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1") LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
LLM_MODEL = os.environ.get("LLM_MODEL", "") LLM_MODEL = os.environ.get("LLM_MODEL", "")
REGULATORY_RAG_PROVIDER = os.environ.get("REGULATORY_RAG_PROVIDER", "siliconflow")
REGULATORY_RAG_CHROMA_PATH = os.environ.get(
"REGULATORY_RAG_CHROMA_PATH",
str(MEDIA_ROOT / "regulatory_review" / "rag" / "chroma"),
)
REGULATORY_RAG_COLLECTION = os.environ.get(
"REGULATORY_RAG_COLLECTION",
"nmpa_ivd_registration_v1",
)
REGULATORY_REVIEW_ASYNC = os.environ.get("REGULATORY_REVIEW_ASYNC", "true").lower() == "true"
REGULATORY_LLM_REVIEW_MAX_ATTEMPTS = int(os.environ.get("REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", "3"))
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", "")
SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
"SILICONFLOW_EMBEDDING_MODEL",
"Qwen/Qwen3-Embedding-4B",
)
SILICONFLOW_EMBEDDING_DIMENSIONS = int(os.environ.get("SILICONFLOW_EMBEDDING_DIMENSIONS", "1024"))
LOGGING = { LOGGING = {
"version": 1, "version": 1,
"disable_existing_loggers": False, "disable_existing_loggers": False,
"filters": {
"suppress_workflow_status_poll": {
"()": "review_agent.logging_filters.SuppressWorkflowStatusPollFilter",
},
},
"handlers": { "handlers": {
"console": { "console": {
"class": "logging.StreamHandler", "class": "logging.StreamHandler",
"formatter": "verbose", "formatter": "verbose",
"filters": ["suppress_workflow_status_poll"],
}, },
}, },
"formatters": { "formatters": {
@@ -125,5 +152,10 @@ LOGGING = {
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"), "level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
"propagate": True, "propagate": True,
}, },
"django.server": {
"handlers": ["console"],
"level": "INFO",
"propagate": False,
},
}, },
} }

View File

@@ -0,0 +1,394 @@
# 产品关键信息提取与申报文件自动填表需求分析
## 文档信息
| 项目 | 内容 |
| --- | --- |
| 原始材料 | docs/原始材料/【模拟题二】试剂盒临床注册文件准备与审核Agent.docx |
| 法规模板来源 | docs/原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 |
| 功能主题 | 从产品文件中提取关键信息并自动填写至指定申报模板 |
| 分析日期 | 2026-06-07 |
| 分析版本 | V1.0 |
---
## 一、需求背景
试剂盒及体外诊断试剂注册申报过程中,注册人员需要将同一批产品关键信息重复填写到注册证格式文件、变更注册或备案文件、安全和性能基本原则清单等申报材料中。人工复制粘贴容易出现字段遗漏、表述不一致、来源不可追溯和模板误改等问题。
原始任务中的第 3 条能力要求系统能够“从产品文件中提取关键信息并自动填写至目标文件”。本功能目标是:系统基于用户上传的产品说明书、产品技术要求、检测报告、性能研究资料等文件,自动抽取产品名称、检测靶标、适用范围、储存条件、性能指标等核心信息,复制指定法规模板生成可填写副本,将抽取结果写入模板,并输出 Word 与 PDF 两种下载文件。
本功能是前两批能力的后续增强依赖第一批文件汇总结果定位产品文件复用第二批文本抽取、适用条件确认和一致性核查能力同时新增“模板识别、字段映射、模板填充、冲突高亮、PDF 转换、来源追溯”能力。
---
## 二、需求范围
### 2.1 本期范围
| 序号 | 范围项 | 说明 |
| --- | --- | --- |
| 1 | 目标模板复制 | 从原始法规资料中复制指定模板,不覆盖原始文件 |
| 2 | 注册类型选择 | 首次注册填写注册证格式;变更注册或备案填写变更注册(备案)文件格式 |
| 3 | 安全和性能基本原则清单填写 | 无论首次注册或变更注册,均生成并填写安全和性能基本原则清单 |
| 4 | 产品信息提取 | 从产品说明书、产品技术要求、检测报告、性能研究资料等文件中抽取模板所需字段 |
| 5 | 模板字段识别 | 读取目标模板中的表格、段落、占位栏位和清单条目,建立字段映射 |
| 6 | 自动填入模板 | 将抽取字段写入模板副本,缺失字段保持留空 |
| 7 | 冲突标记 | 同一字段在多个文件中不一致时,按说明书为准填写,并在模板中黄色底色、红色字体标记 |
| 8 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和待用户下载确认提示 |
| 9 | Word 导出 | 输出填好的 `.docx` 或可编辑 Word 文件 |
| 10 | PDF 导出 | 将填好的 Word 转换为 PDF尽量保持原 Word 模板版式一致,可用于正式提交前预览 |
| 11 | 来源追溯 | 允许额外输出字段来源追溯清单,记录字段来源文件、文本片段、冲突状态和填入目标 |
### 2.2 非本期范围
| 序号 | 范围项 | 说明 |
| --- | --- | --- |
| 1 | 直接覆盖原始法规模板 | 原始材料只作为模板来源,不允许被改写 |
| 2 | 自动代替人工最终确认 | 系统生成带标记文件,用户自行下载核对确认 |
| 3 | 在线提交 NMPA 系统 | 本期只生成申报文件,不对接外部申报系统 |
| 4 | 全部法规表单覆盖 | 本期仅覆盖用户指定的三个目标模板 |
| 5 | 复杂版式人工校订 | 系统尽量保持模板版式,复杂错位仍需人工最终复核 |
---
## 三、目标模板
本期一共处理三个目标模板。用户此前重复提到“体外诊断试剂安全和性能基本原则清单”,经确认属于误填,实际只有一个该清单模板。
| 序号 | 模板名称 | 原始文件 | 使用条件 | 输出要求 |
| --- | --- | --- | --- | --- |
| 1 | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式) | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx | 首次注册 | Word + PDF |
| 2 | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式) | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc | 变更注册或备案 | Word + PDF |
| 3 | 体外诊断试剂安全和性能基本原则清单 | 体外诊断试剂安全和性能基本原则清单.doc | 首次注册、变更注册、备案均适用 | Word + PDF |
### 3.1 已识别注册证模板字段
`中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx` 中已识别到以下表格栏目:
| 字段 | 填写规则 |
| --- | --- |
| 注册人名称 | 从申请人、注册人、企业信息类文件中抽取 |
| 注册人住所 | 从申请人、注册人、企业信息类文件中抽取 |
| 生产地址 | 从注册资料、说明书、质量体系或生产信息文件中抽取 |
| 代理人名称 | 进口体外诊断试剂适用,境内产品可留空 |
| 代理人住所 | 进口体外诊断试剂适用,境内产品可留空 |
| 产品名称 | 优先取说明书字段 |
| 包装规格 | 对应型号规格、包装规格 |
| 主要组成成分 | 优先取说明书和产品技术要求 |
| 预期用途 | 对应适用范围、预期用途 |
| 产品储存条件及有效期 | 对应储存条件、有效期 |
| 附件 | 默认包含产品技术要求、说明书,可根据实际文件匹配补充 |
| 其他内容 | 未识别或需人工确认时留空 |
| 备注 | 未识别或需人工确认时留空 |
### 3.2 模板解析约束
变更注册(备案)文件格式和安全和性能基本原则清单当前为 `.doc` 格式。系统实施时需要支持以下任一方案:
| 方案 | 说明 |
| --- | --- |
| LibreOffice 转换 | 使用 LibreOffice/soffice 将 `.doc` 转为 `.docx` 后识别和填写 |
| 预转换模板 | 项目内预先保存经人工确认的 `.docx` 模板副本 |
| OOXML/COM 方案 | 在 Windows 环境通过 Office 自动化读取和转换模板 |
无论采用哪种方式转换后的模板必须保留原文件表格结构、分页、字体和版式PDF 导出需以填好的 Word 为来源。
---
## 四、用户角色与使用场景
| 角色 | 诉求 | 典型场景 |
| --- | --- | --- |
| 注册人员 | 减少重复填表,提高字段一致性 | 上传注册资料包后生成已填注册证格式和基本原则清单 |
| 变更注册负责人 | 根据变更类型生成变更注册或备案文件 | 上传变更资料后生成已填变更注册(备案)文件 |
| 审核人员 | 快速定位字段来源和冲突 | 下载带冲突高亮的 Word/PDF并查看 AI 对话框冲突摘要 |
| 系统管理员 | 维护模板版本和转换能力 | 更新法规模板、检查 PDF 转换服务和导出记录 |
---
## 五、业务流程分析
### 5.1 主流程
```text
用户上传产品注册资料
-> 系统执行文件目录与页数汇总
-> 系统执行法规核查前置文本抽取
-> 系统识别注册类型:首次注册、变更注册或备案
-> 系统选择本次适用目标模板
-> 系统复制原始模板到批次工作目录
-> 系统读取目标模板栏目和清单条目
-> 系统从产品文件中抽取模板所需字段
-> 系统按字段优先级合并抽取结果
-> 如字段存在跨文件冲突,系统按说明书为准填入,并做黄色底色、红色字体标记
-> 缺失字段保持留空
-> 系统逐条判断安全和性能基本原则清单的适用性、符合性证据和证明文件位置
-> 系统生成已填 Word 文件
-> 系统将已填 Word 转换为 PDF
-> 系统生成来源追溯清单
-> AI 对话框展示生成结果、冲突字段摘要和下载链接
-> 用户下载 Word/PDF 自行确认
```
### 5.2 注册类型分支
| 注册类型 | 生成文件 |
| --- | --- |
| 首次注册 | 注册证格式 Word/PDF安全和性能基本原则清单 Word/PDF |
| 变更注册 | 变更注册(备案)文件 Word/PDF安全和性能基本原则清单 Word/PDF |
| 备案 | 变更注册(备案)文件 Word/PDF安全和性能基本原则清单 Word/PDF |
| 注册类型无法识别 | AI 对话框提示待确认;默认不生成注册证或变更文件,只可生成带待确认标记的草稿版本 |
### 5.3 异常流程
| 异常场景 | 处理方式 |
| --- | --- |
| 模板文件不存在 | 批次标记失败,对话框提示缺少目标模板 |
| `.doc` 模板无法转换 | 对应模板导出失败,其他模板继续生成 |
| 字段未提取到 | 目标栏位留空,来源追溯清单记录为空 |
| 字段冲突 | 按说明书为准填入,模板内高亮标记,对话框展示冲突摘要 |
| PDF 转换失败 | 保留 Word 下载,提示 PDF 生成失败原因 |
| 模板版式明显错位 | 标记为需人工复核,不阻断 Word 文件下载 |
---
## 六、信息提取与字段规则
### 6.1 字段范围
字段范围不固定写死应以三个目标模板的实际栏目和清单条目为准动态建立。Demo 阶段优先覆盖以下字段:
| 字段 | 说明 |
| --- | --- |
| 产品名称 | 产品标准名称 |
| 检测靶标 | 被检测物、基因、抗原、抗体、病原体或生物标志物 |
| 适用范围/预期用途 | 适用人群、样本类型、检测目的、临床用途 |
| 储存条件 | 温度、避光、防潮等保存条件 |
| 性能指标 | 分析灵敏度、特异性、重复性、准确度、检出限等 |
| 型号规格/包装规格 | 规格型号、包装规格、人份数或测试数 |
| 样本类型 | 血清、血浆、全血、咽拭子等 |
| 有效期 | 产品有效期或稳定性期限 |
| 主要组成成分 | 试剂、校准品、质控品、耗材等组成 |
| 检验原理 | 反应原理、方法学或检测平台 |
| 注册人/申请人 | 注册申请主体 |
| 生产地址 | 生产场所地址 |
### 6.2 来源文件优先级
| 优先级 | 文件类型 | 说明 |
| --- | --- | --- |
| 1 | 说明书 | 字段冲突时默认以说明书为准 |
| 2 | 产品技术要求 | 用于补充性能指标、检验方法、组成成分等字段 |
| 3 | 注册检验报告/检测报告 | 用于补充性能指标、样本信息、检验依据和结论 |
| 4 | 性能研究资料 | 用于补充安全和性能基本原则清单证据 |
| 5 | 其他注册资料 | 用于补充申请人、生产地址、附件清单等信息 |
### 6.3 冲突处理规则
| 场景 | 处理方式 |
| --- | --- |
| 说明书与其他文件字段不一致 | 按说明书值填入模板 |
| 多个非说明书文件不一致,说明书缺失 | 目标字段留空或取最高优先级来源,具体规则由实现阶段配置 |
| 字段被高亮标记 | 黄色底色、红色字体,提示用户下载后确认 |
| AI 对话框展示 | 展示字段名、采用值、冲突值、来源文件和目标模板 |
---
## 七、安全和性能基本原则清单填写规则
安全和性能基本原则清单不只填写基础产品信息,还需要根据产品文件内容逐条判断清单条目的适用性、符合性证据和证明文件位置。
| 填写项 | 规则 |
| --- | --- |
| 适用/不适用 | 根据产品特性、检测方法、样本类型、是否含仪器/软件/灭菌/生物材料等信息判断 |
| 符合性说明 | 从产品技术要求、说明书、风险管理、性能研究、稳定性研究等文件中提取证据摘要 |
| 证明文件位置 | 填写证据文件名、章节、页码或可定位文本片段 |
| 无法判断 | 留空或标记待人工确认,来源追溯清单记录原因 |
| 冲突证据 | 如不同文件对同一条款适用性或证据描述冲突,保留高亮并在对话框列出 |
逐条判断结果需要可追溯,不能只输出“适用”或“不适用”结论。
---
## 八、输出要求
### 8.1 文件命名
文件命名规则:
```text
批次号-产品名称-注册证格式.docx
批次号-产品名称-注册证格式.pdf
批次号-产品名称-变更注册备案文件.docx
批次号-产品名称-变更注册备案文件.pdf
批次号-产品名称-安全和性能基本原则清单.docx
批次号-产品名称-安全和性能基本原则清单.pdf
批次号-产品名称-字段来源追溯清单.xlsx
```
产品名称为空时,可使用 `未识别产品名称` 作为文件名占位。
### 8.2 AI 对话框摘要
AI 对话框应展示生成结果、下载链接和冲突字段摘要。
```markdown
已生成申报模板自动填表文件。
| 文件 | Word | PDF |
| --- | --- | --- |
| 注册证格式 | 下载 | 下载 |
| 安全和性能基本原则清单 | 下载 | 下载 |
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
| --- | --- | --- | --- |
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
```
### 8.3 Word 输出
| 要求 | 说明 |
| --- | --- |
| 模板副本 | 从原始法规模板复制生成,不覆盖原始文件 |
| 版式保持 | 保留原模板表格、段落、分页、字体和标题结构 |
| 冲突高亮 | 黄色底色、红色字体 |
| 缺失字段 | 留空,不填“待补充” |
| 可编辑 | 用户可下载后继续人工修改 |
### 8.4 PDF 输出
| 要求 | 说明 |
| --- | --- |
| 来源 | 由填好的 Word 转换生成 |
| 版式 | 尽量与原 Word 模板一致 |
| 用途 | 可作为正式提交前预览 |
| 失败处理 | PDF 失败不影响 Word 下载 |
### 8.5 来源追溯清单
来源追溯清单允许额外生成,建议至少包含:
| 字段 | 说明 |
| --- | --- |
| 目标模板 | 字段填入哪个模板 |
| 目标栏位/条目 | 字段对应的表格栏位或清单条目 |
| 填入值 | 实际写入模板的值 |
| 来源文件 | 取值来源文件 |
| 来源片段 | 支撑取值的文本片段 |
| 是否冲突 | 是/否 |
| 冲突值 | 其他文件中的不同值 |
| 处理方式 | 采用说明书、留空、高亮、待人工确认等 |
---
## 九、功能模块梳理
| 序号 | 功能名称 | 功能描述 | 优先级 |
| --- | --- | --- | --- |
| 1 | 模板管理 | 维护三个目标模板路径、版本和适用注册类型 | P0 |
| 2 | 模板副本生成 | 将原始模板复制到批次工作目录 | P0 |
| 3 | 模板结构识别 | 识别模板中的表格字段、段落占位、清单条目 | P0 |
| 4 | 产品字段抽取 | 从上传文件中抽取模板所需产品字段 | P0 |
| 5 | 字段合并与冲突检测 | 按说明书优先级合并字段,并识别跨文件冲突 | P0 |
| 6 | Word 模板填充 | 将字段写入 Word 模板副本 | P0 |
| 7 | 冲突高亮 | 对冲突字段应用黄色底色和红色字体 | P0 |
| 8 | 基本原则逐条判断 | 判断安全和性能条目的适用性、符合性证据和证明文件位置 | P0 |
| 9 | PDF 转换 | 将填好的 Word 转为 PDF | P0 |
| 10 | 下载链接生成 | 在 AI 对话框提供 Word/PDF 下载链接 | P0 |
| 11 | 来源追溯清单导出 | 输出字段来源、冲突和填入目标 | P1 |
| 12 | 版式 QA | 对 Word/PDF 版式进行自动或人工可见检查 | P1 |
---
## 十、数据实体分析
| 实体名称 | 字段说明 | 关联实体 |
| --- | --- | --- |
| 自动填表批次 | 批次编号、用户、会话、注册类型、产品名称、状态、错误信息、创建时间、完成时间 | 文件汇总批次、法规核查批次 |
| 模板副本 | 模板名称、模板类型、原始模板路径、副本路径、模板版本、适用条件 | 自动填表批次 |
| 提取字段 | 字段名、填入值、来源文件、来源片段、来源优先级、是否冲突、冲突详情 | 自动填表批次 |
| 填表结果文件 | 文件类型、文件名、Word 路径、PDF 路径、下载状态 | 自动填表批次 |
| 清单条目判断 | 条目编号、条目内容、适用性、符合性证据、证明文件位置、判断来源 | 自动填表批次 |
---
## 十一、非功能性需求
### 11.1 可追溯性
| 要求 | 说明 |
| --- | --- |
| 字段来源可追溯 | 每个填入字段应能追溯到来源文件和文本片段 |
| 模板版本可追溯 | 每次生成记录原始模板文件名、版本和路径 |
| 冲突处理可追溯 | 冲突字段记录采用值、冲突值和处理规则 |
| 输出文件可追溯 | Word/PDF 文件关联批次、用户和会话 |
### 11.2 安全要求
| 要求 | 说明 |
| --- | --- |
| 原始模板保护 | 不允许覆盖或修改原始法规资料目录中的模板 |
| 下载权限 | Word/PDF/追溯清单仅允许当前会话授权用户下载 |
| 敏感信息保护 | 对话框只展示必要冲突摘要,不展示大段敏感原文 |
| 文件隔离 | 不同用户、不同批次的模板副本和导出文件隔离存储 |
### 11.3 版式要求
| 要求 | 说明 |
| --- | --- |
| Word 版式 | 尽量保持原模板表格、字体、分页和段落结构 |
| PDF 版式 | 与填好后的 Word 一致,可用于正式提交前预览 |
| 高亮可见 | 冲突字段在 Word 和 PDF 中均应能被用户识别 |
| 缺失字段不污染模板 | 未提取字段留空,不填入系统提示语 |
### 11.4 性能要求
| 场景 | 要求 |
| --- | --- |
| 小批次资料 | 50 个文件以内,应在 1 分钟内完成字段抽取和模板生成 |
| 中等批次资料 | 200 个文件以内支持后台异步处理和进度提示 |
| 单个模板失败 | 不影响其他适用模板生成 |
| 单个字段失败 | 不影响整份模板生成,字段留空并记录原因 |
---
## 十二、待后续确认事项
| 序号 | 待确认项 | 当前建议 |
| --- | --- | --- |
| 1 | `.doc` 模板转换方案 | 优先使用 LibreOffice/soffice 转 docx无法部署时预置人工确认版 docx 模板 |
| 2 | 变更注册(备案)文件字段清单 | 需在模板可解析后补充字段映射 |
| 3 | 安全和性能基本原则清单条目结构 | 需在模板可解析后拆解条目编号、要求、适用性和证据栏 |
| 4 | 说明书识别规则 | 需明确如何从上传资料中判定哪份文件是说明书 |
| 5 | PDF 转换质量标准 | 需明确是否要求逐页渲染检查、页数一致和关键表格不跨页错位 |
| 6 | 注册类型无法识别时是否允许生成草稿 | 建议允许生成安全和性能基本原则清单,注册证或变更文件等待确认 |
---
## 十三、验收标准
| 序号 | 验收项 | 验收标准 |
| --- | --- | --- |
| 1 | 模板复制 | 系统生成模板副本,不修改原始法规模板 |
| 2 | 首次注册文件选择 | 首次注册场景生成注册证格式和安全和性能基本原则清单 |
| 3 | 变更注册/备案文件选择 | 变更注册或备案场景生成变更注册(备案)文件和安全和性能基本原则清单 |
| 4 | 字段自动填写 | 产品名称、预期用途、储存条件、包装规格等字段能自动写入对应栏目 |
| 5 | 缺失字段留空 | 未提取到的字段保持空白 |
| 6 | 冲突字段高亮 | 字段冲突时按说明书值填入,并在 Word/PDF 中黄色底色、红色字体标记 |
| 7 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和处理方式 |
| 8 | 基本原则清单判断 | 系统能逐条输出适用/不适用、符合性证据和证明文件位置 |
| 9 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
| 10 | PDF 下载 | 对话框提供由 Word 转换生成的 PDF 下载链接 |
| 11 | 来源追溯 | 可导出字段来源追溯清单,记录字段来源和冲突情况 |
| 12 | 异常不中断 | 单个字段、单个模板或 PDF 转换失败时,其他结果仍可正常输出 |
---
## 十四、下一步建议
1. 将两个 `.doc` 原始模板转换为可解析的 `.docx` 工作模板,并人工确认版式无明显变化。
2. 拆解三个模板的字段、表格和清单条目,形成模板字段映射配置。
3. 扩展产品信息抽取字段,优先覆盖注册证模板已识别字段和安全和性能基本原则清单证据字段。
4. 设计冲突高亮写入规则,确保 Word 与 PDF 中均可见。
5. 接入 Word 到 PDF 转换能力,并建立页数、版式和关键表格的转换质量检查。

View File

@@ -0,0 +1,816 @@
# 产品关键信息提取与申报文件自动填表功能设计
## 文档信息
| 项目 | 内容 |
| --- | --- |
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
| 依赖功能设计 | docs/2.功能设计/1.自动汇总.mddocs/2.功能设计/2.NMPA注册资料法规核查与整改闭环.md |
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
| 所属模块 | 审核智能体 review_agent |
| 设计日期 | 2026-06-07 |
| 设计版本 | V1.0 |
---
## 一、设计目标
本功能作为独立工作流 `application_form_fill` 建设,由用户在 AI 对话中触发,例如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板”“生成安全和性能基本原则清单”“把产品信息填到申报模板里”等。用户可以明确指定目标模板;未指定时,系统根据识别出的注册类型生成当前注册类型适用的全部模板。
本功能复用第一批文件汇总结果作为文件来源复用第二批法规核查中的文本抽取、适用条件识别、LLM 调用、飞书通知和导出下载能力,但拥有独立批次、独立工作流卡片和独立过程产物。系统复制原始法规模板到批次工作目录,不覆盖原始文件;随后按模板配置识别应填字段,使用规则/正则抽取与 LLM 结构化抽取并行处理,合并字段、识别冲突、写入 Word 模板,并在 AI 对话框和飞书通知中提示生成结果与冲突摘要。
Demo 阶段优先保证 Word 模板自动填写和下载。PDF 转换作为待办增强项:功能设计保留 PDF 导出节点和数据结构,实施时可先返回 Word 与追溯清单,并在待办清单记录 PDF 转换能力。
---
## 二、与既有功能的关系
### 2.1 复用边界
| 能力 | 处理方式 | 现有代码/模型 |
| --- | --- | --- |
| 对话与用户权限 | 复用 | `Conversation``Message` |
| 附件上传与文件绑定 | 复用 | `FileAttachment``FileSummaryBatchAttachment` |
| 文件汇总与页数统计 | 复用 | `FileSummaryBatch``FileSummaryItem``file_summary.workflow` |
| 文本抽取 | 复用并扩展 | `regulatory_review/services/text_extract.py``rag_index.py` |
| 适用条件候选 | 复用并扩展 | `regulatory_review/services/info_extract.py` |
| LLM 调用 | 复用 | `review_agent/llm.py``regulatory_review/services/llm_review.py` |
| 导出记录与下载 | 扩展复用 | `ExportedSummaryFile` |
| 过程产物 | 复用 | `RegulatoryArtifact` 或新增填表过程产物 |
| 飞书通知 | 复用并扩展 | `regulatory_review/services/feishu_notifier.py` |
| SSE 工作流事件 | 复用 | `WorkflowNodeRun``WorkflowEvent` |
### 2.2 新增边界
| 能力 | 说明 |
| --- | --- |
| 独立填表批次 | 新增 `ApplicationFormFillBatch`,不强绑法规核查批次 |
| 模板配置 | 新增 YAML 配置,维护模板路径、适用条件、字段映射和输出规则 |
| 模板选择 | 根据用户指定模板和注册类型选择生成范围 |
| 规则/正则与 LLM 并行抽取 | 两路抽取并行执行,最后统一合并 |
| 字段冲突归并 | 按来源文件优先级处理,说明书优先;冲突字段高亮 |
| Word 模板填充 | 使用 `python-docx``.docx` 表格、段落和占位字段写入 |
| `.doc` 模板转换 | 使用 LibreOffice/soffice 或预转换 `.docx` 模板 |
| 字段来源追溯 | 输出 Excel/JSON 追溯清单,记录抽取、合并和冲突证据 |
---
## 三、总体架构
### 3.1 架构原则
| 原则 | 说明 |
| --- | --- |
| 独立工作流 | 填表流程拥有独立批次、节点和卡片workflow_type 为 `application_form_fill` |
| 复用文件汇总 | 填表不重新实现上传扫描,默认使用当前对话最近成功的 `FileSummaryBatch` |
| 用户指令优先 | 用户明确指定模板或注册类型时,优先使用用户指令 |
| 配置驱动 | 模板路径、字段映射、适用条件和输出规则写入 YAML 配置 |
| Word 优先 | Demo 阶段优先生成可编辑 WordPDF 作为增强项进入待办 |
| 可追溯 | 规则抽取、LLM 抽取、合并结果、冲突列表和来源证据均留底 |
| 失败隔离 | 单字段、单模板或 PDF 转换失败不影响其他模板输出 |
| 通知可控 | 填表完成后可通过飞书通知上传人,通知内容只包含摘要和下载提示 |
### 3.2 逻辑架构
```mermaid
flowchart TD
A["AI 对话页"] --> B["意图识别 application_form_fill"]
B --> C{"本次消息是否带附件"}
C -->|"是"| D["先执行文件汇总工作流"]
C -->|"否"| E["查找最近成功 FileSummaryBatch"]
D --> E
E --> F["ApplicationFormFillBatch"]
F --> G["FormFillWorkflowExecutor"]
G --> H["模板配置 YAML"]
G --> I["模板选择服务"]
G --> J["文本抽取服务"]
J --> K1["规则/正则抽取"]
J --> K2["LLM 结构化抽取"]
K1 --> L["字段合并与冲突归并"]
K2 --> L
L --> M["Word 模板填充服务"]
M --> N["追溯清单导出"]
M --> O["PDF 转换服务 P1"]
N --> P["ExportedSummaryFile"]
O --> P
G --> Q["WorkflowEvent/SSE"]
Q --> R["自动填表工作流卡片"]
G --> S["FeishuNotifier"]
S --> T["上传人通知"]
```
### 3.3 技术选型
| 设计项 | Demo 方案 | 后续演进 |
| --- | --- | --- |
| Web 框架 | Django沿用当前 `review_agent` 应用 | 保持 Django必要时拆分独立 app |
| 工作流编排 | 新增轻量 `FormFillWorkflowExecutor` | 接入 LangGraph 子图 |
| 后台执行 | Django 后台线程,沿用现有工作流方式 | Celery/RQ + Redis |
| 工作流状态 | `WorkflowNodeRun` + `WorkflowEvent`,新增 workflow_type | 独立工作流事件中心 |
| 模板配置 | YAML建议路径 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` | 数据库模板管理后台 |
| Word 处理 | `python-docx` 写入 `.docx` 表格和段落,高亮冲突字段 | OOXML 精细 patch、内容控件 SDT |
| `.doc` 转换 | LibreOffice/soffice headless 转 `.docx`;无法部署时预置 `.docx` 工作模板 | 模板入库前统一转换和人工校验 |
| PDF 导出 | P1 待办LibreOffice/soffice headless 转 PDF | 逐页渲染 QA、版式差异检测 |
| Excel 追溯清单 | `openpyxl` | 增加多 Sheet 审核视图 |
| 文本抽取 | 复用 `text_extract.py``rag_index.py` | OCR、文档文本缓存 |
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并后输出 | 可配置抽取器和置信度模型 |
| 飞书通知 | 复用 `FeishuNotifier`Demo 可 mock 或 CLI | 飞书 Webhook/API |
---
## 四、触发与模板选择设计
### 4.1 意图识别
填表工作流通过用户对话触发。意图识别可先采用关键词规则,必要时调用现有 LLM 路由能力。
| 触发表达 | 触发结果 |
| --- | --- |
| 帮我填注册证 | 触发填表,指定注册证格式 |
| 给我这个内容对应的表格 | 触发填表,未指定模板 |
| 为我该方案生成申报模板 | 触发填表,未指定模板 |
| 生成安全和性能基本原则清单 | 触发填表,指定安全和性能基本原则清单 |
| 把产品信息填到申报模板里 | 触发填表,未指定模板 |
| 只生成变更注册备案文件 | 触发填表,指定变更注册(备案)文件 |
### 4.2 文件来源选择
| 场景 | 处理方式 |
| --- | --- |
| 本次消息带新附件 | 先自动执行文件汇总,汇总成功后启动填表 |
| 本次消息无附件 | 默认使用当前对话最近一次成功 `FileSummaryBatch` |
| 无成功汇总批次 | 对话框提示用户先上传资料或补充附件 |
| 用户明确指定历史批次 | 校验批次属于当前对话和当前用户后使用 |
### 4.3 注册类型识别优先级
注册类型用于决定默认生成哪些模板。优先级如下:
```text
用户话语明确指定
-> 当前对话已确认的法规核查条件
-> 上传文件内容抽取结果
-> 无法识别
```
### 4.4 模板选择规则
| 场景 | 生成模板 |
| --- | --- |
| 用户未指定模板,注册类型为首次注册 | 注册证格式;安全和性能基本原则清单 |
| 用户未指定模板,注册类型为变更注册或备案 | 变更注册(备案)文件;安全和性能基本原则清单 |
| 用户未指定模板,注册类型无法识别 | 安全和性能基本原则清单;注册证/变更文件进入待确认提示 |
| 用户明确指定模板且与注册类型一致 | 只生成用户指定模板 |
| 用户明确指定模板但与注册类型不一致 | 允许生成,并在摘要和追溯清单提示“与识别注册类型不一致,需人工确认” |
| 用户指定“全部模板” | 生成三个目标模板,并提示用户核对注册类型适用性 |
---
## 五、工作流设计
### 5.1 节点图
```mermaid
flowchart LR
N1["准备资料"] --> N2["选择模板"]
N2 --> N3["复制模板"]
N3 --> N4["抽取字段"]
N4 --> N5["冲突归并"]
N5 --> N6["填写 Word"]
N6 --> N7["转换 PDF P1"]
N6 --> N8["追溯清单"]
N7 --> N9["输出下载"]
N8 --> N9
N9 --> N10["飞书通知"]
N10 --> N11["完成"]
```
### 5.2 节点定义
| 节点编码 | 节点名称 | 触发服务 | 成功条件 | 失败处理 |
| --- | --- | --- | --- | --- |
| prepare | 准备资料 | `FormFillWorkflowExecutor` | 找到或生成成功的 `FileSummaryBatch` | 无文件汇总则暂停提示上传 |
| template_select | 选择模板 | `TemplateSelectionService` | 输出本次目标模板列表 | 无适用模板则失败 |
| template_copy | 复制模板 | `TemplateRepository` | 模板副本进入批次工作目录 | 单模板失败不影响其他模板 |
| field_extract | 抽取字段 | `FieldExtractionService` | 规则/正则与 LLM 结果留底 | 单文件失败记录并继续 |
| conflict_merge | 冲突归并 | `FieldMergeService` | 输出最终字段和冲突列表 | 无字段时仍生成空模板 |
| word_fill | 填写 Word | `WordTemplateFillService` | 生成填好后的 Word 文件 | 单模板失败记录失败 |
| pdf_convert | 转换 PDF | `PdfConversionService` | P1生成 PDF 文件 | PDF 失败标记 partial_success |
| trace_export | 追溯清单 | `TraceabilityExportService` | 生成 Excel/JSON 追溯清单 | 失败不影响 Word |
| output_export | 输出下载 | `FormFillExportService` | 写入 `ExportedSummaryFile` 并生成下载链接 | 关键 Word 失败则批次失败 |
| notify | 飞书通知 | `FeishuNotifier` | 通知上传人生成完成 | 通知失败不影响下载 |
| completed | 完成 | 工作流执行器 | 更新批次状态和对话消息 | - |
### 5.3 状态设计
| 状态 | 含义 |
| --- | --- |
| pending | 已创建,等待执行 |
| running | 执行中 |
| waiting_user | 缺少文件或关键条件,等待用户补充 |
| success | Word 和必要追溯产物生成成功 |
| partial_success | Word 已生成但部分模板、PDF、追溯清单或通知失败 |
| failed | 所有目标 Word 模板均生成失败 |
| skipped | 当前节点不适用,例如 Demo 阶段跳过 PDF |
---
## 六、模板配置设计
### 6.1 配置文件路径
建议新增:
```text
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
```
### 6.2 配置结构
```yaml
version: application_form_templates_v1
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
templates:
- code: registration_certificate
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
output_label: 注册证格式
applies_when:
registration_type: ["首次注册"]
file_format: docx
fields:
- key: product_name
label: 产品名称
target:
type: table_row
row_label: 产品名称
sources: ["说明书", "产品技术要求", "注册检验报告"]
- key: package_specification
label: 包装规格
target:
type: table_row
row_label: 包装规格
sources: ["说明书", "产品技术要求"]
```
### 6.3 模板配置项
| 配置项 | 说明 |
| --- | --- |
| code | 模板编码,用于用户指定和导出分类 |
| name | 模板中文名称 |
| source_file | 原始模板文件名 |
| working_template | 可选,预转换 `.docx` 工作模板 |
| output_label | 文件命名中的模板标签 |
| applies_when | 默认适用注册类型 |
| fields | 字段映射列表 |
| checklist_items | 安全和性能基本原则清单条目映射 |
| conversion | `.doc``.docx` 和 PDF 的转换策略 |
### 6.4 已知模板字段
注册证格式当前已从 `.docx` 表格识别到以下字段:注册人名称、注册人住所、生产地址、代理人名称、代理人住所、产品名称、包装规格、主要组成成分、预期用途、产品储存条件及有效期、附件、其他内容、备注。
变更注册(备案)文件和安全和性能基本原则清单当前为 `.doc`,实施前需通过 LibreOffice/soffice 转换或预置人工确认版 `.docx` 工作模板,再补齐字段映射。
---
## 七、字段抽取与合并设计
### 7.1 三层提取链路
```text
模板字段配置
-> 文档字段候选提取
-> 规则/正则抽取与 LLM 结构化抽取并行
-> 字段归一化
-> 来源优先级合并
-> 冲突识别
-> 最终字段包
```
### 7.2 规则/正则抽取
| 能力 | 说明 |
| --- | --- |
| 标签字段识别 | 识别 `产品名称:``预期用途:``储存条件:` 等标签行 |
| 表格字段识别 | 从 Word/Excel 表格中识别左侧字段名、右侧字段值 |
| 章节范围识别 | 从说明书、产品技术要求中按章节提取连续文本 |
| 文件类型识别 | 根据文件名、目录名和首页标题判断说明书、产品技术要求、检验报告 |
| 证据片段截取 | 保存字段前后上下文,用于追溯清单 |
### 7.3 LLM 结构化抽取
LLM 输入为模板字段清单、文件上下文和候选文本片段,输出严格 JSON
```json
{
"fields": [
{
"key": "storage_condition",
"label": "产品储存条件及有效期",
"value": "2-8℃保存有效期12个月",
"source_file": "说明书.docx",
"evidence": "产品储存条件2-8℃保存...",
"confidence": 0.86
}
],
"checklist_items": [
{
"item_code": "A1",
"applicability": "适用",
"compliance_evidence": "产品技术要求中规定了性能指标和检验方法",
"proof_location": "产品技术要求.docx 第2章"
}
]
}
```
### 7.4 并行合并规则
| 场景 | 处理规则 |
| --- | --- |
| 规则和 LLM 值一致 | 合并为同一字段,提高置信度 |
| 规则和 LLM 值不一致,但来源文件不同 | 按来源文件优先级处理,说明书优先 |
| 规则和 LLM 值不一致,来源文件相同 | 标记冲突,模板中高亮 |
| 说明书与其他文件冲突 | 采用说明书值,黄色底色、红色字体标记 |
| 说明书缺失,多个来源冲突 | 取最高优先级文件值并标记冲突;无法判断则留空 |
| 字段缺失 | 模板留空,追溯清单记录未提取 |
### 7.5 过程产物留底
字段抽取结果保存为 `field_extract_result.json`,至少包含:
| 内容 | 说明 |
| --- | --- |
| regex_results | 规则/正则抽取结果 |
| llm_results | LLM 结构化抽取结果 |
| merged_fields | 合并后的最终字段 |
| conflicts | 冲突字段列表 |
| source_evidence | 来源文件和文本片段 |
| selected_templates | 本次选择的模板 |
---
## 八、安全和性能基本原则清单设计
### 8.1 判断策略
安全和性能基本原则清单采用“候选判断 + 高置信度写入”策略。
| 步骤 | 说明 |
| --- | --- |
| 条目拆解 | 从模板配置中读取条目编号、原则内容、适用性栏、证据栏、证明文件位置栏 |
| 候选判断 | 规则和 LLM 均可给出适用/不适用候选 |
| 证据匹配 | 从产品技术要求、说明书、性能研究、稳定性研究、风险管理资料中匹配证明文件 |
| 高置信度写入 | 仅将高置信度判断写入 Word |
| 低置信度留空 | 证据不足或判断不一致时 Word 留空,追溯清单记录候选判断 |
| 冲突提示 | 冲突条目在对话框和追溯清单中提示,不强行填入 |
### 8.2 输出字段
| 字段 | 说明 |
| --- | --- |
| 条目编号 | 基本原则清单中的条目编码 |
| 条目内容 | 原始原则或要求 |
| 适用性 | 适用/不适用,低置信度留空 |
| 符合性证据 | 高置信度证据摘要 |
| 证明文件位置 | 文件名、章节、页码或文本定位 |
| 置信度 | 用于判断是否写入 Word |
| 候选来源 | 规则、LLM 或两者一致 |
---
## 九、Word 与 PDF 生成设计
### 9.1 Word 模板填充
| 能力 | 说明 |
| --- | --- |
| 模板副本 | 原始模板复制到批次工作目录后再写入 |
| 表格行填充 | 根据行首字段名定位目标单元格 |
| 段落占位填充 | 支持 `{{field_key}}` 等占位符 |
| 清单条目填充 | 按条目编号和配置列写入适用性、证据和证明位置 |
| 冲突高亮 | 冲突字段使用黄色底色和红色字体 |
| 缺失字段 | 保持空白,不写“待补充” |
| 版式保持 | 尽量不改变表格结构、分页和字体 |
### 9.2 PDF 转换
PDF 转换作为 P1 待办增强项设计:
| 阶段 | 处理 |
| --- | --- |
| Demo 主链路 | 优先生成 Word不因 PDF 能力缺失阻断工作流 |
| P1 增强 | 使用 LibreOffice/soffice headless 将 Word 转为 PDF |
| 失败处理 | Word 已生成但 PDF 失败时,批次状态为 `partial_success` |
| QA 增强 | 后续增加 PDF 页数非 0、逐页截图或版式差异检查 |
---
## 十、输出与下载设计
### 10.1 输出文件
| 文件 | Demo 阶段 | P1/P2 |
| --- | --- | --- |
| 填好后的 Word | 必须生成 | 持续支持 |
| PDF 预览 | 待办增强 | LibreOffice 转换生成 |
| 字段来源追溯清单 Excel | 允许生成,建议实现 | 增加多 Sheet |
| 字段抽取 JSON | 过程产物留底 | 支持下载或调试查看 |
### 10.2 文件命名
```text
批次号-产品名称-注册证格式.docx
批次号-产品名称-注册证格式.pdf
批次号-产品名称-变更注册备案文件.docx
批次号-产品名称-变更注册备案文件.pdf
批次号-产品名称-安全和性能基本原则清单.docx
批次号-产品名称-安全和性能基本原则清单.pdf
批次号-产品名称-字段来源追溯清单.xlsx
```
### 10.3 ExportedSummaryFile 扩展
继续复用 `ExportedSummaryFile`,但需要扩展 `ExportType`
| export_type | 说明 |
| --- | --- |
| markdown | 既有 Markdown 报告 |
| excel | Excel 追溯清单 |
| json | 字段抽取 JSON 或结果包 |
| word | 填好的 Word 文件,新增 |
| pdf | Word 转换后的 PDF新增 |
填表工作流导出记录建议:
| 字段 | 值 |
| --- | --- |
| workflow_type | `application_form_fill` |
| workflow_batch_id | `ApplicationFormFillBatch.id` |
| export_category | `filled_template``traceability``extract_result` |
| export_type | `word``pdf``excel``json` |
导出服务入参应包含目标输出类型列表,例如:
```json
{
"output_types": ["word", "pdf", "excel"],
"template_codes": ["registration_certificate", "essential_principles"]
}
```
系统根据入参决定生成哪些类型的内容。
---
## 十一、数据模型设计
### 11.1 ApplicationFormFillBatch
新增自动填表批次表。
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| id | BigAutoField | 主键 |
| conversation | ForeignKey(Conversation) | 绑定对话 |
| user | ForeignKey(User) | 发起用户 |
| source_summary_batch | ForeignKey(FileSummaryBatch) | 文件来源批次 |
| source_regulatory_batch | ForeignKey(RegulatoryReviewBatch, null=True) | 可选,复用已确认法规条件 |
| batch_no | CharField | 填表批次号,如 AFF-YYYYMMDDHHMMSS |
| status | CharField | pending、running、waiting_user、success、partial_success、failed |
| trigger_message | ForeignKey(Message, null=True) | 触发消息 |
| requested_templates | JSONField | 用户指定模板 |
| selected_templates | JSONField | 实际生成模板 |
| output_types | JSONField | 请求输出类型,如 word、pdf、excel |
| registration_type | CharField | 注册类型 |
| product_name | CharField | 产品名称 |
| conflict_summary | JSONField | 冲突摘要 |
| risk_notes | JSONField | 不适用模板、低置信度等提示 |
| work_dir | CharField | 批次工作目录 |
| error_message | TextField | 异常说明 |
| created_at | DateTimeField | 创建时间 |
| started_at | DateTimeField | 开始时间 |
| finished_at | DateTimeField | 完成时间 |
### 11.2 ApplicationFormFillArtifact
可新增独立过程产物表,也可复用 `RegulatoryArtifact`。考虑到这是独立工作流,建议新增轻量产物表,结构与 `RegulatoryArtifact` 保持一致。
| 字段 | 类型 | 说明 |
| --- | --- | --- |
| id | BigAutoField | 主键 |
| batch | ForeignKey(ApplicationFormFillBatch) | 所属填表批次 |
| artifact_type | CharField | template_copy、field_extract_result、merged_fields、traceability、notification_record |
| file_format | CharField | json、excel、docx、pdf |
| name | CharField | 产物名称 |
| storage_path | CharField | 存储路径 |
| metadata | JSONField | 模板编码、输出类型、生成状态等 |
| content_hash | CharField | 文件 hash |
| created_at | DateTimeField | 创建时间 |
### 11.3 与既有模型关系
```text
Conversation 1:N ApplicationFormFillBatch
FileSummaryBatch 1:N ApplicationFormFillBatch
RegulatoryReviewBatch 0:N ApplicationFormFillBatch
ApplicationFormFillBatch 1:N ApplicationFormFillArtifact
ApplicationFormFillBatch 1:N WorkflowNodeRun
ApplicationFormFillBatch 1:N ExportedSummaryFile
```
---
## 十二、后端服务设计
### 12.1 FormFillWorkflowExecutor
| 方法 | 说明 |
| --- | --- |
| run(batch) | 串行执行自动填表节点 |
| run_node(node) | 执行单节点并记录进度 |
| resolve_source_summary_batch() | 根据本次附件或最近成功批次确定来源 |
| emit_event() | 写入 `WorkflowEvent` |
| complete_or_partial() | 根据 Word/PDF/通知结果更新批次状态 |
### 12.2 TemplateSelectionService
| 方法 | 说明 |
| --- | --- |
| parse_requested_templates(message) | 从用户话语中识别指定模板 |
| detect_registration_type() | 按用户话语、法规确认条件、文件抽取识别注册类型 |
| select_templates() | 根据注册类型和用户指令输出模板列表 |
### 12.3 TemplateRepository
| 方法 | 说明 |
| --- | --- |
| load_config() | 读取 YAML 模板配置 |
| resolve_source_template(code) | 找到原始模板或预转换模板 |
| copy_to_work_dir(code, batch) | 复制模板到批次目录 |
| convert_doc_to_docx(path) | `.doc``.docx` |
### 12.4 FieldExtractionService
| 方法 | 说明 |
| --- | --- |
| extract_by_rules(texts, template_fields) | 规则/正则抽取 |
| extract_by_llm(texts, template_fields) | LLM 结构化抽取 |
| run_parallel() | 并行执行两路抽取 |
| save_extract_artifact() | 保存 `field_extract_result.json` |
### 12.5 FieldMergeService
| 方法 | 说明 |
| --- | --- |
| normalize_fields() | 字段名、单位、空白和同义词归一 |
| rank_sources() | 按说明书、产品技术要求、检验报告等来源排序 |
| merge() | 输出最终字段 |
| detect_conflicts() | 输出冲突列表和高亮标记 |
### 12.6 WordTemplateFillService
| 方法 | 说明 |
| --- | --- |
| fill_table_rows() | 根据行名定位表格单元格并写入 |
| fill_placeholders() | 替换段落占位符 |
| fill_checklist_items() | 写入安全和性能基本原则清单 |
| apply_conflict_highlight() | 黄底红字标记冲突字段 |
| save_docx() | 保存填好后的 Word |
### 12.7 TraceabilityExportService
| 方法 | 说明 |
| --- | --- |
| build_excel() | 生成字段来源追溯清单 |
| build_json() | 生成结构化追溯 JSON |
| create_export_records() | 写入 `ExportedSummaryFile` |
### 12.8 FormFillNotifier
复用或包装 `FeishuNotifier`
| 通知场景 | 说明 |
| --- | --- |
| 填表成功 | 通知上传人文件已生成 |
| 部分成功 | 通知 Word 已生成,但 PDF/部分模板失败 |
| 冲突字段存在 | 通知中提示存在冲突字段,需下载核对 |
| 失败 | 可选通知失败原因Demo 可只在对话框展示 |
---
## 十三、接口设计
### 13.1 发起自动填表
| 项目 | 内容 |
| --- | --- |
| URL | POST /api/review-agent/application-form-fill/start/ |
| 认证 | 登录用户 |
| 请求 | conversation_id、message_id、file_summary_batch_id 可选、template_codes 可选、output_types 可选 |
| 响应 | batch_id、workflow_type、status、selected_templates |
处理规则:
```text
校验 conversation 属于当前用户
-> 如本次消息带附件,先执行文件汇总
-> 否则查找当前对话最近成功 FileSummaryBatch
-> 创建 ApplicationFormFillBatch
-> 初始化 WorkflowNodeRun
-> 启动 FormFillWorkflowExecutor
-> 返回工作流卡片初始状态
```
### 13.2 查询自动填表状态
| 项目 | 内容 |
| --- | --- |
| URL | GET /api/review-agent/application-form-fill/{batch_id}/ |
| 认证 | 登录用户 |
| 响应 | 批次状态、节点状态、选择模板、冲突摘要、导出文件 |
### 13.3 下载导出文件
继续复用:
| 项目 | 内容 |
| --- | --- |
| URL | GET /api/review-agent/file-summary/exports/{export_id}/download/ |
| 认证 | 登录用户 |
| 响应 | 文件流 |
权限规则:
```text
export_id -> workflow_type/workflow_batch_id -> ApplicationFormFillBatch -> conversation -> user
必须等于当前登录用户,才允许下载。
```
---
## 十四、前端设计
### 14.1 自动填表工作流卡片
前端新增独立卡片类型 `application_form_fill`,展示节点:
| 节点 | 展示文案 |
| --- | --- |
| prepare | 准备资料 |
| template_select | 选择模板 |
| template_copy | 复制模板 |
| field_extract | 抽取字段 |
| conflict_merge | 冲突归并 |
| word_fill | 填写 Word |
| pdf_convert | 转换 PDF |
| output_export | 输出下载 |
| notify | 飞书通知 |
| completed | 已完成 |
### 14.2 对话框结果展示
工作流完成后AI 对话框展示 Markdown 摘要:
```markdown
已生成申报模板自动填表文件。
| 文件 | Word | PDF |
| --- | --- | --- |
| 注册证格式 | 下载 | 待生成 |
| 安全和性能基本原则清单 | 下载 | 待生成 |
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
| --- | --- | --- | --- |
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
[下载字段来源追溯清单](download-url)
```
### 14.3 指定模板交互
用户可以通过自然语言指定模板。前端无需额外表单,后端意图识别后在卡片中展示本次选择模板。
---
## 十五、事件设计
### 15.1 SSE 事件结构
```json
{
"event": "workflow",
"workflow_type": "application_form_fill",
"batch_id": 3001,
"conversation_id": 1001,
"node_code": "field_extract",
"node_group": "form_fill",
"status": "running",
"progress": 55,
"message": "正在并行抽取模板字段",
"payload": {
"selected_templates": ["registration_certificate", "essential_principles"],
"processed_files": 8,
"total_files": 20
}
}
```
### 15.2 节点进度
| 节点 | 进度口径 |
| --- | --- |
| 准备资料 | 是否找到来源批次 |
| 选择模板 | 模板数量 |
| 复制模板 | 已复制模板数/总模板数 |
| 抽取字段 | 已处理文件数/总文件数 |
| 冲突归并 | 字段数量和冲突数量 |
| 填写 Word | 已生成 Word 数/目标 Word 数 |
| 转换 PDF | 已生成 PDF 数/目标 PDF 数 |
| 输出下载 | 已创建下载记录数 |
| 飞书通知 | 通知状态 |
---
## 十六、异常与降级设计
| 场景 | 处理 |
| --- | --- |
| 无成功文件汇总批次 | 进入 waiting_user提示上传资料 |
| 新附件汇总失败 | 填表工作流不启动或标记失败 |
| 用户指定不适用模板 | 允许生成,摘要提示需人工确认 |
| `.doc` 转换失败 | 该模板失败,其他模板继续 |
| 单字段缺失 | Word 留空,追溯清单记录未提取 |
| 规则和 LLM 冲突 | 按来源优先级合并,冲突高亮 |
| 所有 Word 生成失败 | 批次 failed |
| 部分 Word 生成失败 | 批次 partial_success |
| PDF 转换失败 | 批次 partial_success保留 Word 下载 |
| 飞书通知失败 | 不影响文件下载,记录通知失败 |
---
## 十七、安全设计
| 设计点 | 说明 |
| --- | --- |
| 原始模板保护 | 只读原始模板,所有写入发生在批次工作目录副本 |
| 对话隔离 | 填表批次必须绑定当前 Conversation |
| 文件读取权限 | 只能读取关联 `FileSummaryBatch` 下的文件 |
| 下载权限 | 根据 workflow_type 和 workflow_batch_id 校验当前用户 |
| LLM 输入控制 | 只传必要文本片段和字段上下文,避免发送整包敏感资料 |
| 飞书脱敏 | 通知仅包含生成状态、模板名称、冲突数量和系统内下载提示 |
| 命令调用安全 | LibreOffice/飞书 CLI 使用结构化参数,不拼接用户输入 |
---
## 十八、验收设计
| 序号 | 验收项 | 验收标准 |
| --- | --- | --- |
| 1 | 意图触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
| 2 | 指定模板 | 用户指定模板时只生成指定模板 |
| 3 | 默认模板 | 未指定模板时按注册类型生成适用的全部模板 |
| 4 | 新附件串联 | 本次消息带附件时先自动汇总,再执行填表 |
| 5 | 最近批次复用 | 无附件时复用当前对话最近成功文件汇总批次 |
| 6 | 工作流卡片 | 前端展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
| 7 | 字段并行抽取 | 规则/正则和 LLM 抽取结果均进入过程产物 |
| 8 | 冲突归并 | 说明书优先,冲突字段在 Word 中黄底红字 |
| 9 | 缺失字段 | 未提取字段在 Word 中留空 |
| 10 | 基本原则清单 | 高置信度条目写入,低置信度候选留在追溯清单 |
| 11 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
| 12 | PDF 待办 | Demo 阶段 PDF 可展示为待生成,不阻断 Word |
| 13 | 追溯清单 | 生成字段来源追溯清单包含规则、LLM、合并和冲突信息 |
| 14 | 飞书通知 | 填表完成后可通知上传人,失败不影响下载 |
| 15 | 权限隔离 | A 对话生成的 Word/追溯清单不能被 B 对话访问 |
---
## 十九、实施建议
1. 新增 `ApplicationFormFillBatch``ApplicationFormFillArtifact` 数据模型,扩展 `ExportedSummaryFile.ExportType` 支持 `word``pdf`
2. 新增模板配置 `application_form_templates_v1.yaml`,先录入注册证格式 `.docx` 的已识别字段。
3. 将两个 `.doc` 模板转换为 `.docx` 工作模板,或在配置中标记为待转换模板。
4. 实现 `TemplateSelectionService`,支持用户指定模板、注册类型识别和默认模板选择。
5. 实现规则/正则与 LLM 并行字段抽取,并保存 `field_extract_result.json`
6. 实现 `FieldMergeService`,按说明书优先规则处理冲突。
7. 实现 `WordTemplateFillService`,优先支持表格行填充和冲突高亮。
8. 实现追溯清单 Excel 导出和 Word 下载记录。
9. 改造前端工作流卡片,新增 `application_form_fill` 类型。
10. 接入飞书通知摘要。
11. 将 PDF 转换、逐页版式 QA 和更完整的 `.doc` 模板转换纳入后续待办。
---
## 二十、待办与待确认事项
| 序号 | 项目 | 当前建议 |
| --- | --- | --- |
| 1 | PDF 转换 | 放入待办Demo 优先 Word 下载 |
| 2 | `.doc` 模板转换 | 优先 LibreOffice/soffice不可用时预置 `.docx` 工作模板 |
| 3 | 安全和性能基本原则清单条目拆解 | 需转换模板后补齐 YAML 条目配置 |
| 4 | LLM 结构化抽取提示词 | 需约束输出 JSON schema 和置信度 |
| 5 | 飞书通知渠道 | Demo 可 mock 或 CLI正式版接 Webhook/API |
| 6 | 低置信度阈值 | 建议功能实现阶段先配置为 0.75 |
| 7 | 版式验证 | P1 增加 PDF 页数检查和逐页截图 QA |

View File

@@ -0,0 +1,790 @@
# 产品关键信息提取与申报文件自动填表详细设计
## 文档信息
| 项目 | 内容 |
| --- | --- |
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
| 依赖详细设计 | docs/3.详细设计/1.自动汇总.mddocs/3.详细设计/2.NMPA注册资料法规核查与整改闭环.md |
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
| 所属模块 | 审核智能体 review_agent |
| 设计日期 | 2026-06-07 |
| 设计版本 | V1.0 |
---
## 一、详细设计目标
本详细设计用于指导“产品关键信息提取与申报文件自动填表”功能开发落地覆盖代码结构、数据库模型、模板配置、独立工作流、字段抽取、字段合并、Word 模板填充、追溯清单导出、飞书通知、接口契约、前端卡片、异常降级和测试建议。
核心约束:
| 约束 | 说明 |
| --- | --- |
| 独立工作流 | 使用 `workflow_type=application_form_fill`,拥有独立批次和卡片 |
| 对话触发 | 由用户自然语言触发,可指定模板;未指定时按注册类型选择适用模板 |
| 文件来源复用 | 默认使用当前对话最近成功的 `FileSummaryBatch`;本次带附件时先执行自动汇总 |
| 模板配置驱动 | 模板路径、字段映射、适用条件写入 `application_form_fill/templates/application_form_templates_v1.yaml` |
| Word 优先 | Demo 阶段主链路只要求生成 Word 和追溯清单 |
| PDF 待办 | PDF 转换节点保留,但本期可标记 skipped 并写入待办计划 |
| 抽取并行 | 规则/正则抽取与 LLM 结构化抽取并行执行,再统一合并 |
| 冲突可见 | 说明书优先;冲突字段写入 Word 时黄底红字,并在对话框展示摘要 |
| 过程留底 | 规则抽取、LLM 抽取、合并结果、冲突和追溯清单均保存产物 |
| 飞书通知 | 填表完成后通知上传人,通知失败不阻断下载 |
---
## 二、代码结构设计
### 2.1 目录结构
第三批独立为 `review_agent/application_form_fill/` 模块。Django 模型仍集中在 `review_agent/models.py`,业务服务放入独立模块。
```text
review_agent/
models.py
services.py
skill_router.py
application_form_fill/
__init__.py
constants.py
schemas.py
storage.py
workflow.py
views.py
services/
__init__.py
template_config.py
template_select.py
template_repository.py
field_extract.py
field_merge.py
word_fill.py
traceability_export.py
notifier.py
templates/
application_form_templates_v1.yaml
prompts/
field_extract.md
checklist_extract.md
```
### 2.2 文件职责
| 文件 | 职责 |
| --- | --- |
| application_form_fill/constants.py | 工作流节点、模板编码、状态、输出类型常量 |
| application_form_fill/schemas.py | FormFillContext、TemplateSpec、ExtractedField、MergedField 等 dataclass |
| application_form_fill/storage.py | 批次工作目录、模板副本、产物保存、hash 计算 |
| application_form_fill/workflow.py | FormFillWorkflowExecutor串行执行独立填表工作流 |
| application_form_fill/views.py | 启动、状态查询、后续可选下载或重试接口 |
| services/template_config.py | 读取和校验 YAML 模板配置 |
| services/template_select.py | 解析用户指定模板、识别注册类型、选择模板 |
| services/template_repository.py | 定位原始模板、复制模板、`.doc``.docx` 预留 |
| services/field_extract.py | 规则/正则与 LLM 并行字段抽取 |
| services/field_merge.py | 字段归一化、来源排序、冲突识别、最终字段输出 |
| services/word_fill.py | 使用 `python-docx` 写入 Word 表格、段落和高亮 |
| services/traceability_export.py | 生成 Excel/JSON 追溯清单,创建导出记录 |
| services/notifier.py | 包装飞书通知,生成通知记录 |
| prompts/field_extract.md | LLM 字段抽取提示词 |
| prompts/checklist_extract.md | 安全和性能基本原则清单条目判断提示词 |
---
## 三、依赖设计
### 3.1 Python 依赖
| 依赖 | 用途 | 当前项目情况 |
| --- | --- | --- |
| Django | Web、ORM、权限 | 已使用 |
| python-docx | Word 模板读取、表格填充、字体和底色设置 | 已在项目依赖链中使用 |
| openpyxl | 字段来源追溯清单 Excel 导出 | 已使用 |
| PyYAML | YAML 模板配置读取 | 已用于法规规则 |
| pypdf / python-pptx | 文本抽取链路复用 | 已使用 |
| LibreOffice/soffice | `.doc``.docx`、PDF 转换 | 本期非强依赖,后续待办 |
### 3.2 技术边界
| 能力 | 本期实现 | 后续增强 |
| --- | --- | --- |
| `.docx` 模板填充 | 必须支持 | 支持内容控件、复杂 OOXML patch |
| `.doc` 模板处理 | 可通过预转换模板或标记失败 | 自动 LibreOffice 转换 |
| PDF 转换 | 可跳过并提示待生成 | LibreOffice 转 PDF + 视觉 QA |
| 字段级入库 | 不做 | 新增字段明细表和在线编辑 |
| LLM 抽取 | 输出 JSON 并留底 | 增加置信度校准和人工确认 |
---
## 四、数据模型详细设计
模型放在 `review_agent/models.py`
### 4.1 ApplicationFormFillBatch
```python
class ApplicationFormFillBatch(models.Model):
class Status(models.TextChoices):
PENDING = "pending", "待执行"
RUNNING = "running", "执行中"
WAITING_USER = "waiting_user", "等待用户"
SUCCESS = "success", "成功"
PARTIAL_SUCCESS = "partial_success", "部分成功"
FAILED = "failed", "失败"
CANCELLED = "cancelled", "已取消"
```
关键字段:
| 字段 | 说明 |
| --- | --- |
| conversation | 绑定对话 |
| user | 发起用户 |
| trigger_message | 触发消息 |
| source_summary_batch | 文件来源批次 |
| source_regulatory_batch | 可选法规核查批次 |
| batch_no | `AFF-YYYYMMDDHHMMSS-abcdef` |
| requested_templates | 用户指定模板 |
| selected_templates | 实际生成模板 |
| output_types | 本次请求输出类型Demo 默认 `["word", "excel", "json"]` |
| registration_type | 识别出的注册类型 |
| registration_type_source | 注册类型来源 |
| product_name | 产品名称 |
| conflict_summary | 冲突摘要 |
| risk_notes | 不适用模板、PDF 待生成等提示 |
| template_config_version | 模板配置版本 |
| template_config_hash | 模板配置 hash |
| work_dir | 批次工作目录 |
### 4.2 ApplicationFormFillArtifact
用于保存过程产物和模板副本元数据。
```python
class ApplicationFormFillArtifact(models.Model):
class ArtifactType(models.TextChoices):
TEMPLATE_COPY = "template_copy", "模板副本"
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
MERGED_FIELDS = "merged_fields", "字段合并结果"
TRACEABILITY = "traceability", "追溯清单"
FILLED_TEMPLATE = "filled_template", "已填模板"
NOTIFICATION_RECORD = "notification_record", "通知记录"
```
### 4.3 ApplicationFormFillNotificationRecord
通知记录字段与第二批法规通知风格一致,支持重试:
| 字段 | 说明 |
| --- | --- |
| batch | 自动填表批次 |
| recipient | 通知对象 |
| channel | feishu_cli、feishu_api、mock |
| template_codes | 涉及模板 |
| export_ids | 关联下载文件 |
| message_summary | 通知摘要 |
| send_status | pending、success、failed |
| retry_count | 重试次数 |
| external_message_id | 飞书外部消息 ID |
| error_message | 失败原因 |
| sent_at | 发送成功时间 |
### 4.4 ExportedSummaryFile 扩展
`ExportedSummaryFile.ExportType` 增加:
```python
WORD = "word", "Word"
PDF = "pdf", "PDF"
```
填表导出记录使用:
| 字段 | 值 |
| --- | --- |
| workflow_type | application_form_fill |
| workflow_batch_id | ApplicationFormFillBatch.id |
| export_category | filled_template、traceability、extract_result |
| export_type | word、excel、json、pdf |
---
## 五、常量设计
### 5.1 工作流节点
```python
FORM_FILL_NODE_DEFINITIONS = [
("prepare", "准备资料", "form_fill"),
("template_select", "选择模板", "form_fill"),
("template_copy", "复制模板", "form_fill"),
("field_extract", "抽取字段", "form_fill"),
("conflict_merge", "冲突归并", "form_fill"),
("word_fill", "填写 Word", "form_fill"),
("pdf_convert", "转换 PDF", "form_fill"),
("trace_export", "追溯清单", "form_fill"),
("output_export", "输出下载", "form_fill"),
("notify", "飞书通知", "form_fill"),
("completed", "完成", "completed"),
]
```
### 5.2 模板编码
```python
TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate"
TEMPLATE_CHANGE_REGISTRATION = "change_registration"
TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles"
```
### 5.3 触发关键词
```python
FORM_FILL_TRIGGER_KEYWORDS = [
"填注册证",
"对应的表格",
"生成申报模板",
"安全和性能基本原则清单",
"填到申报模板",
"自动填表",
"生成表格",
]
```
---
## 六、核心数据结构
### 6.1 FormFillContext
```python
@dataclass
class FormFillContext:
batch: ApplicationFormFillBatch
source_summary_batch: FileSummaryBatch
source_regulatory_batch: RegulatoryReviewBatch | None
template_config: dict[str, Any]
selected_templates: list["TemplateSpec"]
document_texts: dict[str, str]
regex_results: dict[str, Any]
llm_results: dict[str, Any]
merged_fields: dict[str, "MergedField"]
checklist_items: dict[str, Any]
conflicts: list[dict[str, Any]]
exports: list[ExportedSummaryFile]
```
### 6.2 TemplateSpec
```python
@dataclass(frozen=True)
class TemplateSpec:
code: str
name: str
source_file: str
output_label: str
applies_when: dict[str, Any]
file_format: str
fields: list[dict[str, Any]]
checklist_items: list[dict[str, Any]]
```
### 6.3 ExtractedField
```python
@dataclass(frozen=True)
class ExtractedField:
key: str
label: str
value: str
source_file: str
source_role: str
evidence: str
extractor: str
confidence: float
```
### 6.4 MergedField
```python
@dataclass(frozen=True)
class MergedField:
key: str
label: str
value: str
source_file: str
evidence: str
confidence: float
has_conflict: bool = False
conflict_values: list[dict[str, Any]] = field(default_factory=list)
```
---
## 七、模板配置详细设计
### 7.1 配置路径
```text
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
```
### 7.2 初始配置示例
```yaml
version: application_form_templates_v1
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
templates:
- code: registration_certificate
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
output_label: 注册证格式
applies_when:
registration_type: ["首次注册"]
file_format: docx
fields:
- key: applicant_name
label: 注册人名称
target:
type: table_row
row_label: 注册人名称
source_roles: ["申请表", "说明书", "企业信息"]
- key: product_name
label: 产品名称
target:
type: table_row
row_label: 产品名称
source_roles: ["说明书", "产品技术要求", "注册检验报告"]
- key: intended_use
label: 预期用途
target:
type: table_row
row_label: 预期用途
source_roles: ["说明书", "临床评价资料", "产品技术要求"]
```
### 7.3 配置校验
`TemplateConfigService` 启动时校验:
| 校验项 | 失败处理 |
| --- | --- |
| version 存在 | 批次 failed |
| source_dir 存在 | 批次 failed |
| templates 非空 | 批次 failed |
| code 唯一 | 批次 failed |
| source_file 存在 | 对应模板不可用 |
| target.type 支持 | 对应字段跳过并记录 |
---
## 八、服务详细设计
### 8.1 TemplateConfigService
```python
def load_template_config() -> dict:
"""读取 YAML 模板配置。"""
def validate_template_config(config: dict) -> list[str]:
"""返回配置错误列表。"""
def compute_config_hash(path: Path) -> str:
"""计算模板配置 SHA-256。"""
```
### 8.2 TemplateSelectionService
```python
def parse_requested_templates(message: str) -> list[str]:
"""从用户话语中识别指定模板。"""
def detect_registration_type(batch: ApplicationFormFillBatch, message: str) -> tuple[str, str]:
"""按用户话语、法规核查批次、文件抽取结果识别注册类型及来源。"""
def select_templates(
config: dict,
requested_templates: list[str],
registration_type: str,
) -> tuple[list[TemplateSpec], list[dict]]:
"""输出模板列表和风险提示。"""
```
注册类型优先级:
```text
用户话语明确指定
-> source_regulatory_batch.condition_json / confirmed_conditions
-> source_summary_batch 文件内容抽取候选
-> unknown
```
### 8.3 TemplateRepository
```python
def resolve_source_template(spec: TemplateSpec) -> Path:
"""返回原始模板路径或预转换工作模板路径。"""
def copy_template_to_batch(spec: TemplateSpec, batch: ApplicationFormFillBatch) -> Path:
"""复制模板到批次 work_dir/templates。"""
def convert_doc_to_docx(source: Path, target_dir: Path) -> Path:
"""P1 能力:使用 soffice 转 docx。"""
```
`.doc` 模板本期处理:
| 场景 | 处理 |
| --- | --- |
| 存在 working_template docx | 使用工作模板 |
| 仅有 `.doc` 且无 soffice | 对应模板失败,其他模板继续 |
| 具备 soffice | 转换为 `.docx` 后继续 |
### 8.4 FieldExtractionService
```python
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
"""复用 text_extract 读取文件文本。"""
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
"""规则/正则抽取字段。"""
def extract_by_llm(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
"""LLM 结构化抽取字段。"""
def run_parallel_extract(texts: dict[str, str], specs: list[TemplateSpec]) -> tuple[dict, dict]:
"""并行执行规则/正则与 LLM 抽取。"""
```
并行实现可使用 `ThreadPoolExecutor(max_workers=2)`。LLM 超时或失败时,保留规则/正则结果继续。
### 8.5 FieldMergeService
```python
def normalize_field_value(value: str) -> str:
"""字段值归一化。"""
def rank_source(source_role: str, source_file: str) -> int:
"""说明书优先,其次产品技术要求、检测报告、性能研究等。"""
def merge_fields(regex_results: dict, llm_results: dict) -> tuple[dict[str, MergedField], list[dict]]:
"""合并字段并输出冲突。"""
```
来源优先级:
| 排名 | 来源 |
| --- | --- |
| 1 | 说明书 |
| 2 | 产品技术要求 |
| 3 | 注册检验报告/检测报告 |
| 4 | 性能研究资料 |
| 5 | 其他注册资料 |
### 8.6 WordTemplateFillService
```python
def fill_template(
template_path: Path,
output_path: Path,
spec: TemplateSpec,
fields: dict[str, MergedField],
checklist_items: dict[str, Any],
) -> Path:
"""填充 Word 模板并保存。"""
def fill_table_row(document: Document, row_label: str, value: str, conflict: bool) -> bool:
"""根据表格行首字段名定位并填入第二列。"""
def replace_placeholders(document: Document, fields: dict[str, MergedField]) -> None:
"""替换段落中的 {{field_key}}。"""
def apply_conflict_style(cell_or_run) -> None:
"""应用黄色底色和红色字体。"""
```
冲突样式:
| 样式 | 说明 |
| --- | --- |
| 字体颜色 | 红色 `FF0000` |
| 底色 | 黄色 `FFFF00` |
| 适用范围 | 单元格或字段值 run |
### 8.7 TraceabilityExportService
```python
def build_traceability_workbook(batch, merged_fields, conflicts, specs) -> Workbook:
"""生成追溯清单 Excel。"""
def save_traceability_excel(batch, workbook) -> ExportedSummaryFile:
"""保存 Excel 并写导出记录。"""
def save_extract_json(batch, payload: dict) -> ApplicationFormFillArtifact:
"""保存字段抽取 JSON 过程产物。"""
```
Excel Sheet
| Sheet | 内容 |
| --- | --- |
| 字段追溯 | 模板、字段、填入值、来源文件、证据、冲突状态 |
| 冲突字段 | 字段、采用值、冲突值、处理方式 |
| 低置信度条目 | 安全和性能基本原则清单候选判断 |
| 生成结果 | 模板文件、Word 状态、PDF 状态、错误说明 |
### 8.8 FormFillNotifier
```python
def notify_completion(batch: ApplicationFormFillBatch, exports: list[ExportedSummaryFile]) -> ApplicationFormFillNotificationRecord:
"""发送填表完成通知。"""
```
通知摘要包含:
| 内容 | 说明 |
| --- | --- |
| 批次号 | 填表批次 |
| 产品名称 | 如已识别 |
| 生成模板 | 模板名称列表 |
| 冲突数量 | 提示需下载核对 |
| 下载提示 | 提示回到系统对话下载,不直接暴露敏感全文 |
---
## 九、工作流执行器详细设计
### 9.1 启动入口
```python
def start_application_form_fill_workflow(batch: ApplicationFormFillBatch, *, async_run: bool = True) -> None:
executor = FormFillWorkflowExecutor(batch)
if async_run:
Thread(target=executor.run, daemon=True).start()
else:
executor.run()
```
### 9.2 执行伪代码
```python
class FormFillWorkflowExecutor:
def run(self) -> None:
self.mark_batch_running()
try:
for node in self.nodes():
if node.status == "success":
continue
self.run_node(node)
self.complete_or_partial()
except WorkflowPausedForUser:
self.mark_waiting_user()
except Exception as exc:
self.mark_failed(exc)
```
### 9.3 节点处理要点
| 节点 | 处理 |
| --- | --- |
| prepare | 校验 `source_summary_batch` 成功且属于当前对话 |
| template_select | 读取 YAML、识别注册类型、选择模板 |
| template_copy | 复制模板到 `work_dir/templates` |
| field_extract | 抽取文本,规则/正则与 LLM 并行,保存 JSON |
| conflict_merge | 合并字段,写 `conflict_summary` |
| word_fill | 逐模板生成 Word`ExportedSummaryFile(word)` |
| pdf_convert | 本期 skipped`risk_notes` |
| trace_export | 生成追溯 Excel 和 JSON |
| output_export | 生成 AI 对话 Markdown 摘要 |
| notify | 写飞书通知记录,失败不阻断 |
| completed | 标记 success 或 partial_success |
### 9.4 批次状态决策
| 条件 | 状态 |
| --- | --- |
| 所有目标 Word 均成功,追溯清单成功,通知成功或跳过 | success |
| 至少一个 Word 成功但部分模板、追溯清单、PDF 或通知失败 | partial_success |
| 所有目标 Word 均失败 | failed |
| 无来源文件汇总批次 | waiting_user |
---
## 十、接口详细设计
### 10.1 发起自动填表
```text
POST /api/review-agent/application-form-fill/start/
```
请求:
| 参数 | 类型 | 必填 | 说明 |
| --- | --- | --- | --- |
| conversation_id | integer | 是 | 当前对话 |
| message_id | integer | 否 | 触发消息 |
| file_summary_batch_id | integer | 否 | 指定文件来源批次 |
| template_codes | array | 否 | 指定模板 |
| output_types | array | 否 | 输出类型,默认 word、excel、json |
响应:
```json
{
"batch_id": 3001,
"workflow_type": "application_form_fill",
"status": "pending",
"selected_templates": ["registration_certificate", "essential_principles"]
}
```
### 10.2 查询状态
```text
GET /api/review-agent/application-form-fill/{batch_id}/
```
响应:
```json
{
"batch": {
"id": 3001,
"batch_no": "AFF-20260607153000-a1b2c3",
"status": "success",
"product_name": "甲胎蛋白检测试剂盒",
"selected_templates": ["registration_certificate"]
},
"nodes": [],
"conflicts": [],
"exports": []
}
```
### 10.3 下载文件
继续复用既有导出下载接口:
```text
GET /api/review-agent/file-summary/exports/{export_id}/download/
```
下载权限通过 `workflow_type=application_form_fill``workflow_batch_id` 反查填表批次。
---
## 十一、前端详细设计
### 11.1 工作流卡片
新增卡片类型 `application_form_fill`
| 节点 | 展示 |
| --- | --- |
| prepare | 准备资料 |
| template_select | 选择模板 |
| template_copy | 复制模板 |
| field_extract | 抽取字段 |
| conflict_merge | 冲突归并 |
| word_fill | 填写 Word |
| pdf_convert | 转换 PDF |
| trace_export | 追溯清单 |
| output_export | 输出下载 |
| notify | 飞书通知 |
| completed | 已完成 |
PDF 本期显示为“已跳过/待增强”,不显示为失败。
### 11.2 AI 回复摘要
```markdown
已生成申报模板自动填表文件。
| 文件 | Word | PDF |
| --- | --- | --- |
| 注册证格式 | 下载 | 待增强 |
| 安全和性能基本原则清单 | 下载 | 待增强 |
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
| --- | --- | --- | --- |
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
[下载字段来源追溯清单](download-url)
```
---
## 十二、异常与降级
| 场景 | 处理 |
| --- | --- |
| 无成功汇总批次 | 批次 waiting_user对话提示上传资料 |
| 模板配置不存在 | 批次 failed |
| 指定模板不存在 | 忽略无效模板并提示;若无有效模板则 failed |
| `.doc` 模板无可用工作模板 | 该模板失败,其他模板继续 |
| 文本抽取失败 | 对应文件跳过,记录在追溯清单 |
| LLM 抽取失败 | 使用规则/正则结果继续 |
| 字段缺失 | Word 留空 |
| 字段冲突 | 说明书优先并高亮 |
| 追溯清单失败 | Word 成功时批次 partial_success |
| 飞书通知失败 | 批次 partial_success 或 success取决于核心产物是否成功 |
| PDF 未实现 | 节点 skipped写入待增强提示 |
---
## 十三、测试设计
### 13.1 单元测试
| 用例 | 目标 |
| --- | --- |
| test_form_fill_trigger_keywords | 触发语句识别为自动填表 |
| test_template_config_loads | YAML 配置可加载并校验 |
| test_select_default_templates_initial_registration | 首次注册默认选择注册证和基本原则清单 |
| test_select_user_requested_mismatch | 用户指定不适用模板仍允许生成并提示 |
| test_field_merge_prefers_instructions | 说明书字段优先 |
| test_field_merge_marks_conflict | 冲突字段进入 conflict_summary |
| test_word_fill_table_row | 能按表格行名写入 Word |
| test_word_fill_conflict_highlight | 冲突字段黄底红字 |
| test_traceability_excel | 追溯清单包含字段、来源和冲突 |
| test_notify_records_failure | 飞书失败写通知记录但不阻断 |
### 13.2 集成测试
| 场景 | 验证 |
| --- | --- |
| 最近汇总批次触发填表 | 无附件时复用最近 success `FileSummaryBatch` |
| 新附件触发填表 | 先自动汇总再启动填表 |
| 注册证模板填充 | 生成 Word 导出文件 |
| LLM 失败降级 | LLM 超时后规则抽取仍可生成 Word |
| 部分模板失败 | 至少一个 Word 成功时批次 partial_success |
| 权限隔离 | 不能查询或下载他人填表批次产物 |
### 13.3 前端验证
| 场景 | 验证 |
| --- | --- |
| 自动填表卡片 | 节点状态随 SSE 更新 |
| 指定模板展示 | 卡片展示本次选择模板 |
| PDF 跳过显示 | PDF 节点显示待增强而非失败 |
| 下载链接 | Word 和追溯清单链接可点击下载 |
| 冲突摘要 | 冲突字段表格正常渲染 |
---
## 十四、实施顺序建议
1. 修改功能设计中的模板配置路径为 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`
2. 新增数据库模型和 `ExportedSummaryFile.ExportType` 扩展。
3. 新增 `application_form_fill` 模块目录和常量、schemas、storage。
4. 新增模板配置 YAML先录入注册证 `.docx` 的已识别字段。
5. 实现模板选择、模板复制和 Word 表格行填充。
6. 实现规则/正则字段抽取和 LLM 抽取降级。
7. 实现字段合并、冲突高亮和追溯清单。
8. 实现工作流执行器、节点事件和状态接口。
9. 改造路由和前端工作流卡片。
10. 接入飞书通知记录。
11. 将字段级数据库表和 PDF 转换写入待办计划。

View File

@@ -0,0 +1,433 @@
# 产品关键信息提取与申报文件自动填表数据库设计
## 文档信息
| 项目 | 内容 |
| --- | --- |
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
| 数据库类型 | SQLite / Django ORM |
| 表名前缀 | ra_ |
| 设计日期 | 2026-06-07 |
| 设计版本 | V1.0 |
---
## 一、设计原则
| 原则 | 说明 |
| --- | --- |
| 独立填表批次 | 自动填表作为独立工作流,使用独立批次表,不强绑法规核查批次 |
| 复用文件来源 | 填表批次必须关联一个成功的 `FileSummaryBatch`,不重复保存文件清单 |
| 可选复用法规条件 | 如当前对话已有已确认法规核查批次,可通过可空外键复用注册类型等条件 |
| 导出记录复用 | Word、Excel、JSON、PDF 等下载文件继续进入 `ExportedSummaryFile` |
| 过程产物独立 | 自动填表过程产物单独建表,避免和法规核查 `RegulatoryArtifact` 混用 |
| 通知记录独立 | 自动填表飞书通知单独建表,字段风格与法规通知记录保持一致 |
| 大文本不入库 | 字段抽取 JSON、追溯清单和模板副本保存为文件数据库仅保存路径、hash 和摘要 |
| 字段明细暂不入库 | 本期不新增字段级明细表;字段结果保存在 JSON/Excel 产物与批次摘要中 |
| SQLite 兼容 | 字段类型、索引和约束优先保证当前 SQLite + Django ORM 可运行 |
---
## 二、ER 图
```mermaid
erDiagram
AUTH_USER ||--o{ CONVERSATION : owns
CONVERSATION ||--o{ RA_FILE_SUMMARY_BATCH : has
RA_FILE_SUMMARY_BATCH ||--o{ RA_FILE_SUMMARY_ITEM : produces
RA_FILE_SUMMARY_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : feeds
RA_REGULATORY_REVIEW_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : optionally_confirms
AUTH_USER ||--o{ RA_APPLICATION_FORM_FILL_BATCH : runs
CONVERSATION ||--o{ RA_APPLICATION_FORM_FILL_BATCH : has
MESSAGE ||--o{ RA_APPLICATION_FORM_FILL_BATCH : triggers
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_ARTIFACT : keeps
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_NOTIFICATION_RECORD : sends
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_EXPORTED_SUMMARY_FILE : exports
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_NODE_RUN : tracks
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_EVENT : emits
```
说明:`ra_workflow_node_run``ra_workflow_event``ra_exported_summary_file` 已在第二批中被通用化,通过 `workflow_type``workflow_batch_id` 支持多工作流。本功能使用 `workflow_type=application_form_fill`
---
## 三、表结构设计
### 3.1 ra_application_form_fill_batch
一次自动填表工作流批次。该表记录本次触发来源、选择模板、输出类型、注册类型、产品名称、冲突摘要、工作目录和状态。
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
| --- | --- | --- | --- | --- |
| id | BigAutoField | integer | 是 | 主键 |
| conversation_id | ForeignKey | bigint | 是 | 绑定对话 |
| user_id | ForeignKey | bigint | 是 | 发起用户 |
| trigger_message_id | ForeignKey | bigint | 否 | 触发填表工作流的用户消息 |
| source_summary_batch_id | ForeignKey | bigint | 是 | 文件来源汇总批次 |
| source_regulatory_batch_id | ForeignKey | bigint | 否 | 可选,复用已确认法规核查批次条件 |
| batch_no | CharField(64) | varchar(64) | 是 | 填表批次编号,唯一 |
| status | CharField(30) | varchar(30) | 是 | pending、running、waiting_user、success、partial_success、failed、cancelled |
| requested_templates | JSONField | text/json | 是 | 用户指定模板编码列表;未指定为空数组 |
| selected_templates | JSONField | text/json | 是 | 系统实际选择模板编码列表 |
| output_types | JSONField | text/json | 是 | 请求输出类型,如 word、excel、json、pdf |
| registration_type | CharField(80) | varchar(80) | 否 | 识别出的注册类型 |
| registration_type_source | CharField(40) | varchar(40) | 否 | user_message、regulatory_batch、file_extract、unknown |
| product_name | CharField(200) | varchar(200) | 否 | 产品名称 |
| conflict_summary | JSONField | text/json | 是 | 冲突字段摘要 |
| risk_notes | JSONField | text/json | 是 | 不适用模板、低置信度、PDF 待生成等提示 |
| template_config_version | CharField(80) | varchar(80) | 否 | 模板配置版本 |
| template_config_hash | CharField(128) | varchar(128) | 否 | 模板配置文件 hash |
| work_dir | CharField(500) | varchar(500) | 否 | 批次工作目录 |
| error_message | TextField | text | 否 | 批次异常说明 |
| created_at | DateTimeField | datetime | 是 | 创建时间 |
| started_at | DateTimeField | datetime | 否 | 开始时间 |
| finished_at | DateTimeField | datetime | 否 | 完成时间 |
| archived_at | DateTimeField | datetime | 否 | 归档时间 |
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
唯一约束:
| 约束名 | 字段 |
| --- | --- |
| uq_ra_aff_batch_no | batch_no |
索引:
| 索引名 | 字段 | 说明 |
| --- | --- | --- |
| idx_ra_aff_batch_conv_status | conversation_id, status | 查询对话下填表批次状态 |
| idx_ra_aff_batch_summary | source_summary_batch_id | 根据文件汇总批次查询填表历史 |
| idx_ra_aff_batch_regulatory | source_regulatory_batch_id | 根据法规核查批次查询关联填表历史 |
| idx_ra_aff_batch_user_created | user_id, created_at | 查询用户发起记录 |
| idx_ra_aff_batch_created | created_at | 按创建时间查询 |
---
### 3.2 ra_application_form_fill_artifact
自动填表过程产物表。仅保存文件元数据,不保存字段抽取大 JSON 的全文。
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
| --- | --- | --- | --- | --- |
| id | BigAutoField | integer | 是 | 主键 |
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
| artifact_type | CharField(60) | varchar(60) | 是 | template_copy、field_extract_result、merged_fields、traceability、filled_template、notification_record |
| file_format | CharField(20) | varchar(20) | 是 | json、excel、docx、pdf、markdown |
| name | CharField(160) | varchar(160) | 是 | 产物名称 |
| file_name | CharField(255) | varchar(255) | 是 | 文件名 |
| storage_path | CharField(500) | varchar(500) | 是 | 存储路径 |
| file_size | BigIntegerField | bigint | 是 | 文件大小 |
| content_hash | CharField(128) | varchar(128) | 是 | 文件 SHA-256 hash |
| metadata | JSONField | text/json | 是 | 模板编码、输出类型、生成状态、错误摘要等 |
| created_by_node | CharField(60) | varchar(60) | 否 | 产生该产物的节点 |
| created_at | DateTimeField | datetime | 是 | 创建时间 |
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
索引:
| 索引名 | 字段 | 说明 |
| --- | --- | --- |
| idx_ra_aff_artifact_batch_type | batch_id, artifact_type | 查询批次过程产物 |
| idx_ra_aff_artifact_format | file_format | 按文件格式查询 |
| idx_ra_aff_artifact_created | created_at | 按时间追溯 |
---
### 3.3 ra_application_form_fill_notification_record
自动填表飞书通知记录表。通知失败不阻断文件下载,但需要留痕和支持后续重试。
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
| --- | --- | --- | --- | --- |
| id | BigAutoField | integer | 是 | 主键 |
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
| recipient_id | ForeignKey(User) | bigint | 是 | 通知对象,默认上传人/发起人 |
| channel | CharField(30) | varchar(30) | 是 | feishu_cli、feishu_api、mock |
| template_codes | JSONField | text/json | 是 | 本次通知涉及模板 |
| export_ids | JSONField | text/json | 是 | 本次通知关联导出文件 ID |
| message_summary | TextField | text | 是 | 通知摘要 |
| send_status | CharField(20) | varchar(20) | 是 | pending、success、failed |
| retry_count | PositiveIntegerField | integer | 是 | 已重试次数 |
| external_message_id | CharField(120) | varchar(120) | 否 | 飞书外部消息 ID |
| error_message | TextField | text | 否 | 失败原因 |
| sent_at | DateTimeField | datetime | 否 | 发送成功时间 |
| created_at | DateTimeField | datetime | 是 | 创建时间 |
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
索引:
| 索引名 | 字段 | 说明 |
| --- | --- | --- |
| idx_ra_aff_notify_batch | batch_id, created_at | 查询批次通知记录 |
| idx_ra_aff_notify_recipient | recipient_id, send_status | 查询用户通知状态 |
| idx_ra_aff_notify_status | send_status, retry_count | 查询待重试通知 |
---
## 四、既有表扩展
### 4.1 ra_exported_summary_file
继续复用导出文件表,需扩展导出类型。
| 字段/枚举 | 处理 |
| --- | --- |
| export_type | 增加 `word``pdf` |
| workflow_type | 使用 `application_form_fill` |
| workflow_batch_id | 记录 `ApplicationFormFillBatch.id` |
| export_category | 使用 `filled_template``traceability``extract_result` |
导出类型枚举:
| value | 中文展示 | 说明 |
| --- | --- | --- |
| markdown | Markdown | 既有报告 |
| excel | Excel | 追溯清单 |
| json | JSON | 字段抽取结果包 |
| word | Word | 填好的 Word 模板 |
| pdf | PDF | Word 转换后的 PDFP1 预留 |
### 4.2 ra_workflow_node_run
本功能使用通用工作流字段:
| 字段 | 值 |
| --- | --- |
| workflow_type | application_form_fill |
| workflow_batch_id | ApplicationFormFillBatch.id |
| node_group | form_fill |
| batch_id | 可为空或兼容性填充 source_summary_batch_id |
### 4.3 ra_workflow_event
本功能事件写入:
| 字段 | 值 |
| --- | --- |
| workflow_type | application_form_fill |
| workflow_batch_id | ApplicationFormFillBatch.id |
| conversation_id | 当前对话 ID |
| payload | 节点状态、模板列表、冲突数量、导出文件等 |
---
## 五、枚举设计
### 5.1 ApplicationFormFillBatch.status
| value | 中文展示 | 说明 |
| --- | --- | --- |
| pending | 待执行 | 批次已创建,等待执行 |
| running | 执行中 | 工作流正在执行 |
| waiting_user | 等待用户 | 缺少文件汇总批次或关键条件 |
| success | 成功 | Word 和必要追溯产物生成成功 |
| partial_success | 部分成功 | 部分模板、PDF、追溯清单或通知失败 |
| failed | 失败 | 所有目标 Word 模板均生成失败 |
| cancelled | 已取消 | 用户或系统取消执行 |
### 5.2 artifact_type
| value | 说明 |
| --- | --- |
| template_copy | 模板副本 |
| field_extract_result | 规则/正则与 LLM 抽取原始结果 |
| merged_fields | 合并后的最终字段和冲突 |
| traceability | 字段来源追溯清单 |
| filled_template | 已填写模板 |
| notification_record | 通知记录产物 |
### 5.3 registration_type_source
| value | 说明 |
| --- | --- |
| user_message | 用户话语明确指定 |
| regulatory_batch | 复用已确认法规核查条件 |
| file_extract | 从文件内容抽取 |
| unknown | 未识别 |
### 5.4 通知枚举
| 字段 | value |
| --- | --- |
| channel | feishu_cli、feishu_api、mock |
| send_status | pending、success、failed |
---
## 六、JSON 字段结构建议
### 6.1 requested_templates / selected_templates
```json
["registration_certificate", "essential_principles"]
```
### 6.2 output_types
```json
["word", "excel", "json"]
```
PDF 作为 P1 预留,可在后续加入:
```json
["word", "pdf", "excel", "json"]
```
### 6.3 conflict_summary
```json
[
{
"field_key": "storage_condition",
"field_label": "产品储存条件及有效期",
"selected_value": "2-8℃保存有效期12个月",
"selected_source": "说明书.docx",
"conflict_values": [
{
"value": "-20℃保存",
"source_file": "产品技术要求.docx",
"evidence": "储存条件:-20℃保存"
}
],
"handling": "说明书优先,模板内黄底红字高亮"
}
]
```
### 6.4 risk_notes
```json
[
{
"type": "template_registration_mismatch",
"message": "用户指定变更注册(备案)文件,但系统识别注册类型为首次注册,需人工确认。"
},
{
"type": "pdf_pending",
"message": "PDF 转换为后续增强项,本次优先生成 Word。"
}
]
```
### 6.5 artifact.metadata
```json
{
"template_code": "registration_certificate",
"output_type": "word",
"node_code": "word_fill",
"status": "success",
"conflict_count": 2
}
```
---
## 七、存储路径设计
自动填表工作目录按用户、对话和批次隔离:
```text
media/application_form_fill/{user_id}/{conversation_id}/{batch_no}/
```
目录结构:
```text
media/application_form_fill/12/1001/AFF-20260607153000-a1b2c3/
templates/
registration_certificate.source.docx
essential_principles.source.docx
filled/
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-注册证格式.docx
exports/
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-字段来源追溯清单.xlsx
field_extract_result.json
merged_fields.json
notifications/
notification_record.json
```
所有产物写入 `ApplicationFormFillArtifact` 时必须记录 SHA-256 hash。
---
## 八、权限与查询规则
### 8.1 批次访问权限
```text
ApplicationFormFillBatch -> conversation -> user
必须等于当前 request.user
```
### 8.2 导出下载权限
```text
ExportedSummaryFile.workflow_type == application_form_fill
-> workflow_batch_id
-> ApplicationFormFillBatch.conversation.user
```
`workflow_type=file_summary``regulatory_review`,仍按既有逻辑校验。
### 8.3 文件读取权限
自动填表只能读取 `source_summary_batch.items` 对应的文件,不允许从其他对话或其他批次随意读取文件。
---
## 九、字段级数据库表暂缓说明
本期不新增 `ApplicationFormFillField` 字段级明细表。原因:
| 原因 | 说明 |
| --- | --- |
| Demo 主链路更轻 | 字段结果以 JSON 和 Excel 追溯清单即可满足下载复核 |
| 避免过早建模 | 字段结构依赖模板配置和后续人工修改交互,暂不固化表结构 |
| 查询需求有限 | 本期主要按批次下载文件,不做字段级统计和在线编辑 |
后续如需要在线确认、人工修改、字段级审计或批量统计,再新增字段级表。该事项写入 `docs/6.待办计划/第二阶段暂缓事项.md`
---
## 十、Django Model 命名建议
| 表名 | Model 名称 |
| --- | --- |
| ra_application_form_fill_batch | ApplicationFormFillBatch |
| ra_application_form_fill_artifact | ApplicationFormFillArtifact |
| ra_application_form_fill_notification_record | ApplicationFormFillNotificationRecord |
建议模型仍集中放在 `review_agent/models.py`,与前两批现有模型保持一致;业务逻辑放在 `review_agent/application_form_fill/`
---
## 十一、验收检查点
| 序号 | 检查项 | 验收标准 |
| --- | --- | --- |
| 1 | 独立批次 | 触发填表后生成 `ApplicationFormFillBatch` |
| 2 | 文件来源 | 每个填表批次都关联一个成功的 `FileSummaryBatch` |
| 3 | 可选法规条件 | 如有关联法规核查批次,可记录 `source_regulatory_batch` |
| 4 | 过程产物 | 字段抽取 JSON、合并结果、追溯清单、模板副本均可留底 |
| 5 | 导出复用 | 填好的 Word 和追溯清单进入 `ExportedSummaryFile` |
| 6 | 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word``pdf` |
| 7 | 通知记录 | 飞书通知记录能保存状态、重试次数、失败原因 |
| 8 | 权限隔离 | A 对话的填表批次和导出文件不能被 B 对话访问 |
| 9 | 字段表暂缓 | 字段级结果不入库,但能从 JSON/Excel 追溯产物复核 |
---
## 十二、开发顺序建议
1. 扩展 `ExportedSummaryFile.ExportType`,增加 `word``pdf`
2. 新增 `ApplicationFormFillBatch``ApplicationFormFillArtifact``ApplicationFormFillNotificationRecord`
3. 为新增状态字段定义 Django `TextChoices`
4. 配置表名、索引和唯一约束。
5. 执行 `python manage.py makemigrations review_agent``python manage.py migrate`
6. 编写模型测试,覆盖批次创建、产物 hash、通知重试字段、导出权限查询。
7. 将字段级数据库表和 PDF 转换能力写入待办计划。

View File

@@ -7,6 +7,7 @@
```text ```text
适用条件对话选择框 适用条件对话选择框
-> waiting_user 暂停恢复 -> waiting_user 暂停恢复
-> 附件 4 申报资料目录规则对齐
-> 整包复核 -> 整包复核
-> 缺失项复核 -> 缺失项复核
-> mock 通知留痕 -> mock 通知留痕
@@ -23,7 +24,7 @@
| 阶段 | 名称 | 目标 | 验收 | | 阶段 | 名称 | 目标 | 验收 |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 | | RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 |
| RR2-2 | 核查能力增强 | 扩展章节、一致性、RAG 引用和文本抽取范围 | 复杂样例可识别更多问题 | | RR2-2 | 附件 4 规则对齐与核查能力增强 | 按《体外诊断试剂注册申报资料要求及说明》扩展完整目录规则、章节、一致性、RAG 引用和文本抽取范围 | 能识别附件 4 一级/二级目录缺失和关键字段问题 |
| RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 | | RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 |
| RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record | | RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record |
| RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 | | RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 |
@@ -71,29 +72,89 @@ pytest tests/test_regulatory_condition.py tests/test_regulatory_frontend.py test
--- ---
## 四、RR2-2 核查能力增强 ## 四、RR2-2 附件 4 规则对齐与核查能力增强
### 新增口径:附件 4 必须结构化入规则库
第一批主链路已经可以演示,但现有 Demo YAML 只覆盖 5 类规则:产品技术要求、说明书、注册检验报告、临床评价资料、安全和性能基本原则清单。经人工确认,第一批链路可通过;但与附件《体外诊断试剂注册申报资料要求及说明》相比,规则覆盖仍不完整。第二批 RR2-2 必须将附件 4 的申报资料目录结构补入规则库,并作为完整性和章节核查的主要依据。
附件来源:
```text
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
```
如附件仍为旧版 `.doc`,允许在开发阶段通过 Pandoc、LibreOffice headless、Word COM 或受控脚本转换为 `.docx`/`.txt` 中间产物;中间产物只用于规则抽取和测试夹具,不改变第一阶段文件页数统计口径。
### 附件 4 目录覆盖范围
第二批 Demo 规则至少覆盖以下一级和二级标题。规则应支持“章节目录”类目录项、资料文件项、条件适用项和推荐项的区分。
| 一级目录 | 二级目录/资料项 |
| --- | --- |
| 1. 监管信息 | 1.1 章节目录、1.2 申请表、1.3 术语/缩写词列表、1.4 产品列表、1.5 关联文件、1.6 申报前与监管机构的联系情况和沟通记录、1.7 符合性声明 |
| 2. 综述资料 | 2.1 章节目录、2.2 概述、2.3 产品描述、2.4 预期用途、2.5 申报产品上市历史、2.6 其他需说明的内容 |
| 3. 非临床资料 | 3.1 章节目录、3.2 产品风险管理资料、3.3 体外诊断试剂安全和性能基本原则清单、3.4 产品技术要求及检验报告、3.5 分析性能研究、3.6 稳定性研究、3.7 阳性判断值或参考区间研究、3.8 其他资料 |
| 4. 临床评价资料 | 4.1 章节目录、4.2 临床评价资料 |
| 5. 产品说明书和标签样稿 | 5.1 章节目录、5.2 产品说明书、5.3 标签样稿、5.4 其他资料 |
| 6. 质量管理体系文件 | 6.1 综述、6.2 章节目录、6.3 生产制造信息、6.4 质量管理体系程序、6.5 管理职责程序、6.6 资源管理程序、6.7 产品实现程序、6.8 质量管理体系的测量/分析和改进程序、6.9 其他质量体系程序信息、6.10 质量管理体系核查文件 |
### 规则分级默认值
| 规则类型 | 默认风险 | 说明 |
| --- | --- | --- |
| 一级目录整体缺失 | high | 如缺少“监管信息”“综述资料”“非临床资料”等完整章节 |
| 关键法定资料缺失 | blocking | 申请表、符合性声明、产品技术要求及检验报告等 |
| 关键技术/评价资料缺失 | high | 产品风险管理资料、分析性能研究、稳定性研究、临床评价资料、产品说明书、标签样稿等 |
| 条件适用资料缺失 | medium/high | 如上市历史、申报前沟通记录、其他资料;需结合 RR2-1 适用条件判断 |
| 章节目录缺失 | medium | 各一级目录下的章节目录缺失,影响资料可追溯性 |
### 与现有第一批链路的差异修正
| 当前能力 | 第二批修正 |
| --- | --- |
| 完整性核查只按文件名和相对路径匹配 | 增加目录名、首页文本/前若干页文本、章节标题候选匹配 |
| YAML 只覆盖 5 个 Demo 条目 | 扩展为附件 4 一级/二级目录规则,保留第一批 5 条并映射到附件 4 对应章节 |
| 章节核查只检查说明书储存条件/有效期/样本要求 | 改为同时检查申报资料目录结构和说明书内部关键章节 |
| RAG 可能跳过 `.doc` 材料 | 附件 4 必须可被转换或抽取,构建 RAG 前输出可读文本抽取状态 |
| 一致性只检查产品名称、型号规格、预期用途 | 保留这三项,并增加管理类别、分类编码、注册类型、临床评价路径等候选字段 |
### 任务 ### 任务
| 编号 | 内容 | 文件 | | 编号 | 内容 | 文件 |
| --- | --- | --- | | --- | --- | --- |
| RR2-2-001 | 扩展 YAML 规则中的必需章节和一致性字段 | `rules/nmpa_ivd_registration_v1.yaml` | | RR2-2-001 | 将附件 4 `.doc` 抽取为可测试的结构化目录夹具 | `tests/fixtures/regulatory/attachment4_outline.json` 或同等 fixture |
| RR2-2-002 | 增强文本抽取,缓存章节候选和字段候选 | `services/text_extract.py` | | RR2-2-002 | 扩展 YAML 规则,覆盖附件 4 一级/二级目录、别名、适用条件、风险等级和整改建议 | `rules/nmpa_ivd_registration_v1.yaml` |
| RR2-2-003 | 增强章节核查,支持别名、近似标题和证据片段 | `services/structure_check.py` | | RR2-2-003 | 增强规则加载校验,确保附件 4 必填目录项都有规则 ID、关键词、风险等级和 citation_query | `services/rule_loader.py` |
| RR2-2-004 | 增强一致性核查,支持多个来源值和低置信度提示项 | `services/consistency_check.py` | | RR2-2-004 | 增强完整性核查,支持文件名、目录名、首页文本/前若干页文本、章节标题候选匹配 | `services/completeness_check.py``services/text_extract.py` |
| RR2-2-005 | RAG 引用写入 `rag_result_json` 过程产物 | `services/rag_citation.py``storage.py` | | RR2-2-005 | 增强文本抽取,缓存章节候选、字段候选、首页文本和抽取状态 | `services/text_extract.py``storage.py` |
| RR2-2-006 | 增加测试 | `tests/test_regulatory_structure.py``tests/test_regulatory_consistency.py``tests/test_regulatory_rag.py` | | RR2-2-006 | 增强章节核查,支持附件 4 目录层级、别名、近似标题和证据片段 | `services/structure_check.py` |
| RR2-2-007 | 增强一致性核查,支持产品名称、型号规格、预期用途、管理类别、分类编码、注册类型、临床评价路径等来源值 | `services/consistency_check.py` |
| RR2-2-008 | RAG 引用写入 `rag_result_json` 过程产物,并记录附件 4 文本抽取/索引状态 | `services/rag_citation.py``storage.py` |
| RR2-2-009 | 增加附件 4 对齐测试 | `tests/test_regulatory_rule_loader.py``tests/test_regulatory_completeness.py``tests/test_regulatory_structure.py``tests/test_regulatory_consistency.py``tests/test_regulatory_rag.py` |
### 验收样例
| 样例条件 | 预期 |
| --- | --- |
| 文件包缺少“监管信息/申请表” | 生成 blocking 或 high 问题,并引用附件 4 监管信息要求 |
| 文件包缺少“产品风险管理资料” | 生成 high 问题category 为 completeness |
| 文件包缺少“分析性能研究”或“稳定性研究” | 生成 high 问题,给出补充研究资料建议 |
| 文件包有产品技术要求但无检验报告 | 生成 blocking 问题,规则映射到 3.4 |
| 文件包有产品说明书但无标签样稿 | 生成 high 问题,规则映射到 5.3 |
| 文件包缺少质量管理体系文件 | 生成 high 问题,规则映射到第 6 章 |
| 附件 4 `.doc` 未能抽取 | RAG 构建命令失败或明确报告附件 4 抽取失败,不允许静默跳过该核心材料 |
### 验证命令 ### 验证命令
```bash ```bash
pytest tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py pytest tests/test_regulatory_rule_loader.py tests/test_regulatory_completeness.py tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py
``` ```
### Codex 执行提示 ### Codex 执行提示
```text ```text
增强章节核查、一致性核查和 RAG 过程产物。证据必须包含文件路径、命中片段、字段名或规则 ID便于人工复核。 先将附件 4《体外诊断试剂注册申报资料要求及说明》结构化为规则覆盖清单再增强 YAML、完整性核查、章节核查、一致性核查和 RAG 过程产物。第二批必须覆盖附件 4 的 1-6 章一级目录和主要二级目录;证据必须包含文件路径、命中片段、字段名或规则 ID便于人工复核。附件 4 作为核心法规材料,不允许在 RAG 构建中静默跳过。
``` ```
--- ---
@@ -229,14 +290,15 @@ pytest
第一批主链路已经完成并通过全量测试。 第一批主链路已经完成并通过全量测试。
目标: 目标:
补齐法规核查完整整改闭环包括适用条件对话选择框、waiting_user 暂停恢复、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。 补齐法规核查完整整改闭环包括适用条件对话选择框、waiting_user 暂停恢复、附件 4 申报资料目录规则对齐、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。
执行规则: 执行规则:
1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。 1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。
2. 按 RR2-1 到 RR2-6 顺序执行。 2. 按 RR2-1 到 RR2-6 顺序执行。
3. 每阶段完成后运行对应验证命令。 3. 每阶段完成后运行对应验证命令。
4. 不接真实飞书 CLI/API 4. RR2-2 必须覆盖附件 4 的 1-6 章一级目录和主要二级目录,不能只保留第一批 5 条 Demo 规则
5. 不做规则管理前端 5. 不接真实飞书 CLI/API
6. 不做自动填写目标文件 6. 不做规则管理前端
7. 最后运行 python manage.py check 和 pytest 全量验收 7. 不做自动填写目标文件
8. 最后运行 python manage.py check 和 pytest 全量验收。
``` ```

View File

@@ -0,0 +1,632 @@
# 产品关键信息提取与申报文件自动填表开发计划
## 文档信息
| 项目 | 内容 |
| --- | --- |
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
| 详细设计文档 | docs/3.详细设计/3.产品关键信息提取与申报文件自动填表.md |
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
| 所属模块 | 审核智能体 review_agent |
| 执行方式 | 单人开发 + Codex 目标模式自动化执行 |
| 计划日期 | 2026-06-07 |
| 计划版本 | V1.0 |
---
## 一、开发计划目标
本开发计划用于指导 Codex 目标模式按阶段完成“产品关键信息提取与申报文件自动填表”功能开发。该功能作为独立工作流 `application_form_fill` 实现,由用户对话触发,默认复用当前对话最近成功的文件汇总批次;如本次消息带新附件,则先串联文件汇总,再执行自动填表。
本期必须完成:独立填表批次、过程产物、飞书通知记录、模板配置、注册证 `.docx` 模板填充、字段抽取与合并、冲突高亮、追溯清单、Word 下载、自动填表工作流卡片和权限校验。
本期明确不强制完成PDF 转换、字段级数据库表、`.doc` 模板自动转换、完整安全和性能基本原则清单条目拆解。这些事项已进入 `docs/6.待办计划/第二阶段暂缓事项.md`
---
## 二、已确认开发规则
| 规则项 | 内容 |
| --- | --- |
| 工作流类型 | 新增独立 `application_form_fill`,不塞入 `regulatory_review` 工作流 |
| 触发方式 | 用户对话触发,如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板” |
| 模板指定 | 用户可指定模板;未指定时按注册类型生成适用模板 |
| 文件来源 | 无新附件时复用当前对话最近成功 `FileSummaryBatch`;有新附件时先自动汇总 |
| 模板配置 | 放在 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并处理 |
| 冲突处理 | 说明书优先;冲突字段在 Word 中黄色底色、红色字体 |
| 输出范围 | Demo 主链路优先 Word + Excel/JSON 追溯清单 |
| PDF | 数据结构预留,工作流节点可 skipped不作为本期强验收 |
| 飞书 | 新增自动填表通知记录表,通知失败不阻断下载 |
| 数据库 | 新增三张表;字段级明细表暂缓 |
| Git 提交 | 每个阶段完成并验证通过后提交一次 |
| 测试要求 | 每阶段至少运行对应 pytest前端阶段补卡片和渲染测试 |
---
## 三、总体验收标准
| 类别 | 完成标准 |
| --- | --- |
| 数据库 | `ApplicationFormFillBatch``ApplicationFormFillArtifact``ApplicationFormFillNotificationRecord` 可通过 migration 落库 |
| 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word``pdf`,并兼容既有 markdown/excel/json |
| 模块结构 | 新增 `review_agent/application_form_fill/` 独立模块 |
| 触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
| 文件来源 | 无新附件时复用最近成功汇总批次;无汇总批次时提示上传资料 |
| 模板配置 | YAML 可加载、校验,并至少配置注册证格式 `.docx` 已识别字段 |
| 字段抽取 | 规则/正则与 LLM 抽取结果均可留底LLM 失败时规则结果可继续 |
| 字段合并 | 说明书优先,冲突字段进入 `conflict_summary` 和追溯清单 |
| Word 填充 | 能按表格行名填入注册证模板字段,缺失字段留空 |
| 冲突高亮 | 冲突字段在 Word 内黄底红字 |
| 追溯清单 | 生成 Excel/JSON记录规则结果、LLM 结果、合并字段、冲突和来源证据 |
| 下载 | 对话框提供填好 Word 和追溯清单下载链接 |
| 工作流卡片 | 前端支持 `application_form_fill` 卡片,展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
| 飞书通知 | 填表完成后写通知记录,可 mock失败不阻断文件下载 |
| 权限 | A 对话不能查询或下载 B 对话的填表批次和导出文件 |
| 回归 | 第一批文件汇总、第二批法规核查既有测试不回归 |
---
## 四、阶段总览
| 阶段 | 名称 | 目标 | 阶段验收 |
| --- | --- | --- | --- |
| AFF-0 | 准备与回归 | 创建开发分支,确认现有测试基线 | `python manage.py check` 和关键回归测试通过 |
| AFF-1 | 数据模型与通用导出扩展 | 新增三张表,扩展 word/pdf 导出类型 | migration、模型测试通过 |
| AFF-2 | 模块骨架与模板配置 | 新建独立模块、YAML 配置和配置校验 | 模板配置测试通过 |
| AFF-3 | 触发与工作流骨架 | 对话触发、批次创建、节点事件和状态查询 | 可创建并运行空工作流 |
| AFF-4 | 模板选择与文件来源 | 复用最近汇总批次,支持指定/默认模板选择 | 模板选择和来源批次测试通过 |
| AFF-5 | 字段抽取与合并 | 规则/正则 + LLM 并行抽取、冲突归并和产物留底 | 字段抽取、冲突测试通过 |
| AFF-6 | Word 填充与追溯导出 | 注册证 Word 填充、冲突高亮、Excel/JSON 追溯 | 可下载 Word 和追溯清单 |
| AFF-7 | 飞书通知与对话摘要 | 生成助手摘要、下载链接和通知记录 | 通知、摘要、下载权限测试通过 |
| AFF-8 | 前端卡片与总体验收 | 自动填表工作流卡片、状态恢复、全量回归 | 全量测试通过 |
---
## 五、AFF-0 准备与回归
### AFF-0-001 创建开发分支并确认现状
| 项目 | 内容 |
| --- | --- |
| 任务类型 | Git / 准备 |
| 前置任务 | 无 |
| 涉及文件 | 无固定文件 |
| 目标 | 从当前稳定分支创建 `codex/YYYYMMDD-申报文件自动填表` 开发分支,并确认工作区状态 |
| 开发步骤 | 1. 检查当前分支和 `git status`2. 确认第三批设计文档存在3. 创建开发分支4. 记录已有未提交变更,不得回滚用户变更 |
| 验收标准 | 分支创建成功,工作区变更来源清楚 |
| 验证命令 | `git branch --show-current`; `git status --short` |
| Codex 执行提示 | 请创建第三批自动填表开发分支,检查当前工作区状态和设计文档,不要回滚用户已有变更。 |
### AFF-0-002 运行基线回归
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 测试 / 回归 |
| 前置任务 | AFF-0-001 |
| 涉及文件 | 无固定文件 |
| 目标 | 确认现有文件汇总和法规核查主流程在开发前可用 |
| 开发步骤 | 1. 运行 Django check2. 运行文件汇总测试3. 运行法规核查测试4. 记录失败项并先判断是否为既有问题 |
| 验收标准 | 关键回归测试通过,或记录清楚既有失败和本阶段处理策略 |
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_*.py tests/test_regulatory_*.py` |
| Codex 执行提示 | 请在开发前运行 Django check 和文件汇总/法规核查关键测试,确认基线稳定。若存在既有失败,请记录,不要直接改无关代码。 |
---
## 六、AFF-1 数据模型与通用导出扩展
### AFF-1-001 新增自动填表 ORM 模型
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 数据库 / 后端 |
| 前置任务 | AFF-0 |
| 涉及文件 | `review_agent/models.py` |
| 目标 | 新增 `ApplicationFormFillBatch``ApplicationFormFillArtifact``ApplicationFormFillNotificationRecord` |
| 开发步骤 | 1. 定义批次状态枚举2. 定义产物类型枚举3. 定义通知状态和渠道枚举4. 添加外键到 Conversation、User、Message、FileSummaryBatch、RegulatoryReviewBatch5. 添加 JSONField、hash、路径、时间字段6. 添加 `db_table`、索引和唯一约束 |
| 验收标准 | 模型字段、表名、索引与数据库设计一致 |
| 验证命令 | `python manage.py check` |
| Codex 执行提示 | 请按 `docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md` 新增自动填表三张表模型,模型集中放在 `review_agent/models.py`。 |
### AFF-1-002 扩展导出类型和权限查询能力
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 数据库 / 下载 |
| 前置任务 | AFF-1-001 |
| 涉及文件 | `review_agent/models.py`、导出下载权限相关视图 |
| 目标 | 为 `ExportedSummaryFile.ExportType` 增加 `word``pdf`,并确保下载权限支持 `application_form_fill` |
| 开发步骤 | 1. 扩展 `ExportType.WORD`2. 扩展 `ExportType.PDF`3. 检查下载接口按 workflow_type 分派权限4. 增加 application_form_fill 反查批次的权限路径 |
| 验收标准 | Word/ PDF 导出记录可创建;填表导出下载权限可追溯到当前用户 |
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_views.py -k download` |
| Codex 执行提示 | 请扩展 ExportedSummaryFile 支持 word/pdf并让现有下载接口能通过 workflow_type=application_form_fill 校验填表批次权限。 |
### AFF-1-003 生成迁移并补模型测试
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 数据库 / 测试 |
| 前置任务 | AFF-1-002 |
| 涉及文件 | `review_agent/migrations/``tests/test_application_form_fill_models.py` |
| 目标 | 生成迁移并覆盖新增表的基础约束和权限关系 |
| 开发步骤 | 1. 运行 makemigrations2. 检查 migration 只包含第三批相关变更3. 运行 migrate4. 测试批次创建5. 测试产物 hash 字段6. 测试通知重试字段7. 测试 ExportedSummaryFile word 类型 |
| 验收标准 | migration 可执行,模型测试通过 |
| 验证命令 | `python manage.py makemigrations review_agent`; `python manage.py migrate`; `pytest tests/test_application_form_fill_models.py` |
| Codex 执行提示 | 请为第三批模型生成迁移并新增模型测试,覆盖批次、产物、通知记录和 word/pdf 导出类型。 |
### AFF-1 阶段验证
```bash
python manage.py check
pytest tests/test_application_form_fill_models.py tests/test_file_summary_views.py -k download
```
---
## 七、AFF-2 模块骨架与模板配置
### AFF-2-001 创建 application_form_fill 模块骨架
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 模块 |
| 前置任务 | AFF-1 |
| 涉及文件 | `review_agent/application_form_fill/` |
| 目标 | 建立独立模块目录、常量、schemas、storage、workflow、views 和 services 包 |
| 开发步骤 | 1. 创建模块目录2. 创建 `constants.py`3. 创建 `schemas.py`4. 创建 `storage.py`5. 创建 `workflow.py`6. 创建 `views.py`7. 创建 services 子模块8. 创建 templates 和 prompts 目录 |
| 验收标准 | 模块可 import不影响既有应用启动 |
| 验证命令 | `python manage.py check` |
| Codex 执行提示 | 请新增 `review_agent/application_form_fill/` 独立模块骨架先只放常量、schema、空服务和基础 import不要改动法规核查模块。 |
### AFF-2-002 编写模板配置 YAML
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 配置 / 模板 |
| 前置任务 | AFF-2-001 |
| 涉及文件 | `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
| 目标 | 建立模板配置,至少覆盖注册证 `.docx` 已识别字段 |
| 开发步骤 | 1. 定义 version2. 定义 source_dir3. 配置 `registration_certificate`4. 配置 `change_registration``.doc` 待转换模板5. 配置 `essential_principles``.doc` 待转换模板6. 为注册证配置注册人名称、注册人住所、生产地址、产品名称、包装规格、主要组成成分、预期用途、储存条件及有效期、附件等字段 |
| 验收标准 | YAML 可解析,注册证字段映射到 table_row |
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
| Codex 执行提示 | 请新增自动填表模板配置 YAML配置路径必须是 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`,先完整录入注册证表格字段。 |
### AFF-2-003 实现模板配置加载与校验
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 配置 |
| 前置任务 | AFF-2-002 |
| 涉及文件 | `review_agent/application_form_fill/services/template_config.py``tests/test_application_form_fill_template_config.py` |
| 目标 | 读取、校验模板配置并计算 hash |
| 开发步骤 | 1. 实现 `load_template_config()`2. 实现 `validate_template_config()`3. 实现 `compute_config_hash()`4. 校验 version、source_dir、templates、code 唯一、source_file 存在、target.type 支持5. 对 `.doc` 待转换模板允许配置存在但标记运行时处理 |
| 验收标准 | 有效配置通过,缺失 source_dir 或重复 code 能被测试捕获 |
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
| Codex 执行提示 | 请实现模板配置加载和校验服务,配置错误必须返回清晰错误列表,不要在 import 时直接崩溃。 |
### AFF-2 阶段验证
```bash
python manage.py check
pytest tests/test_application_form_fill_template_config.py
```
---
## 八、AFF-3 触发与工作流骨架
### AFF-3-001 扩展意图路由
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 意图识别 |
| 前置任务 | AFF-2 |
| 涉及文件 | `review_agent/skill_router.py``review_agent/application_form_fill/constants.py``tests/test_application_form_fill_trigger.py` |
| 目标 | 用户话语命中自动填表意图时返回 `application_form_fill` |
| 开发步骤 | 1. 增加触发关键词2. 支持“帮我填注册证”“对应的表格”“生成申报模板”等3. 支持指定模板识别入口4. 保持文件汇总和法规核查路由不回归 |
| 验收标准 | 自动填表语句触发正确,普通对话不误触发 |
| 验证命令 | `pytest tests/test_application_form_fill_trigger.py tests/test_regulatory_workflow.py -k router` |
| Codex 执行提示 | 请扩展现有意图路由,新增 application_form_fill 动作。不要破坏 file_summary 和 regulatory_review 的现有触发。 |
### AFF-3-002 实现批次创建和节点初始化
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 工作流 |
| 前置任务 | AFF-3-001 |
| 涉及文件 | `review_agent/application_form_fill/workflow.py``review_agent/application_form_fill/storage.py``tests/test_application_form_fill_workflow.py` |
| 目标 | 创建填表批次、生成工作目录、初始化节点 |
| 开发步骤 | 1. 实现 `build_batch_no()`2. 实现 `build_batch_work_dir()`3. 实现 `create_application_form_fill_batch()`4. 绑定 conversation、user、trigger_message、source_summary_batch5. 初始化 `FORM_FILL_NODE_DEFINITIONS` 节点6. 写 workflow_created 事件 |
| 验收标准 | 批次编号唯一,节点数量正确,工作目录在受控路径 |
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k create` |
| Codex 执行提示 | 请实现自动填表批次创建和节点初始化workflow_type 必须写 application_form_fill。 |
### AFF-3-003 实现工作流执行器骨架
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 工作流 |
| 前置任务 | AFF-3-002 |
| 涉及文件 | `review_agent/application_form_fill/workflow.py``tests/test_application_form_fill_workflow.py` |
| 目标 | 实现节点串行执行、状态更新、事件推送和 skipped PDF 节点 |
| 开发步骤 | 1. 实现 `FormFillWorkflowExecutor.run()`2. 实现 `_nodes()`3. 实现 `_run_node()`4. 每个节点写 running/success/skipped5. `pdf_convert` 本期标记 skipped6. 失败时写 batch.failed |
| 验收标准 | 空实现节点可完整跑到 successPDF 节点 skipped |
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k executor` |
| Codex 执行提示 | 请实现自动填表工作流执行器骨架先让节点状态可完整流转PDF 转换节点本期标记 skipped。 |
### AFF-3-004 接入流式对话启动逻辑
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 对话 |
| 前置任务 | AFF-3-003 |
| 涉及文件 | `review_agent/services.py``review_agent/application_form_fill/views.py` |
| 目标 | 用户触发自动填表时启动工作流;有附件时先自动汇总,无附件时使用最近成功汇总批次 |
| 开发步骤 | 1. 在 stream_message 中处理 application_form_fill 路由2. 如本次存在新附件复用文件汇总启动逻辑3. 无新附件时查找最近成功 `FileSummaryBatch`4. 无来源批次时回复请上传资料5. 返回 workflow meta |
| 验收标准 | 对话触发能创建填表批次;无汇总批次时不崩溃 |
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k stream` |
| Codex 执行提示 | 请把 application_form_fill 接入现有 stream_message。无附件时复用最近成功汇总批次有新附件时先自动汇总。 |
### AFF-3 阶段验证
```bash
python manage.py check
pytest tests/test_application_form_fill_trigger.py tests/test_application_form_fill_workflow.py
```
---
## 九、AFF-4 模板选择与文件来源
### AFF-4-001 实现模板指定解析
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 模板选择 |
| 前置任务 | AFF-3 |
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py``tests/test_application_form_fill_template_select.py` |
| 目标 | 从用户话语中识别指定模板 |
| 开发步骤 | 1. 识别注册证2. 识别变更注册备案文件3. 识别安全和性能基本原则清单4. 识别全部模板5. 未指定返回空数组 |
| 验收标准 | 指定模板语句可返回正确 template_codes |
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py -k requested` |
| Codex 执行提示 | 请实现用户指定模板解析,支持注册证、变更注册备案文件、安全和性能基本原则清单、全部模板。 |
### AFF-4-002 实现注册类型识别和模板选择
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 模板选择 |
| 前置任务 | AFF-4-001 |
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py``tests/test_application_form_fill_template_select.py` |
| 目标 | 按用户话语、法规确认条件、文件抽取识别注册类型,并选择模板 |
| 开发步骤 | 1. 用户话语识别首次注册、变更注册、备案2. 从 `source_regulatory_batch.condition_json` 读取 confirmed_conditions3. 从文件抽取候选读取 registration_type4. 未指定模板时首次注册生成注册证 + 基本原则清单5. 变更/备案生成变更文件 + 基本原则清单6. 指定不适用模板允许生成但写 risk_notes |
| 验收标准 | 模板选择规则与功能设计一致 |
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py` |
| Codex 执行提示 | 请实现注册类型识别和默认模板选择优先级是用户话语、已确认法规核查条件、文件抽取、unknown。 |
### AFF-4-003 实现模板复制服务
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 模板 |
| 前置任务 | AFF-4-002 |
| 涉及文件 | `review_agent/application_form_fill/services/template_repository.py``review_agent/application_form_fill/storage.py``tests/test_application_form_fill_template_repository.py` |
| 目标 | 将原始模板复制到批次目录,原始模板只读 |
| 开发步骤 | 1. 根据 TemplateSpec 定位 source_file2. 复制到 `work_dir/templates`3. 记录 ApplicationFormFillArtifact(template_copy)4. `.doc` 且无工作模板时返回模板失败不影响其他模板5. 路径必须在受控工作目录内 |
| 验收标准 | 注册证 `.docx` 可复制;原始文件不被修改;产物 hash 写入 |
| 验证命令 | `pytest tests/test_application_form_fill_template_repository.py` |
| Codex 执行提示 | 请实现模板复制服务,只允许复制到批次工作目录,不能直接写原始法规材料目录。 |
### AFF-4 阶段验证
```bash
pytest tests/test_application_form_fill_template_select.py tests/test_application_form_fill_template_repository.py
```
---
## 十、AFF-5 字段抽取与合并
### AFF-5-001 实现规则/正则字段抽取
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 字段抽取 |
| 前置任务 | AFF-4 |
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py``tests/test_application_form_fill_field_extract.py` |
| 目标 | 从说明书、产品技术要求等文本中按标签和章节抽取字段 |
| 开发步骤 | 1. 复用 `regulatory_review.services.text_extract.extract_text`2. 识别文件角色3. 匹配 `字段名:值` 标签行4. 支持多行值拼接5. 保存 source_file、source_role、evidence、confidence、extractor=rule |
| 验收标准 | 能从测试说明书文本抽取产品名称、预期用途、储存条件、有效期、包装规格 |
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k rules` |
| Codex 执行提示 | 请实现自动填表规则/正则字段抽取,优先覆盖注册证模板字段,抽取结果必须包含来源文件、来源角色和证据片段。 |
### AFF-5-002 实现 LLM 结构化抽取封装
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / LLM |
| 前置任务 | AFF-5-001 |
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py``review_agent/application_form_fill/prompts/field_extract.md``tests/test_application_form_fill_field_extract.py` |
| 目标 | 调用现有 LLM 能力输出字段 JSON失败时降级 |
| 开发步骤 | 1. 编写字段抽取 prompt2. 输入模板字段、文件上下文和候选文本3. 要求输出 JSON fields/checklist_items4. 解析 JSON5. 捕获超时和解析失败6. 失败返回空 LLM 结果,不阻断规则抽取 |
| 验收标准 | monkeypatch LLM 后可解析结构化字段LLM 异常时工作流继续 |
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k llm` |
| Codex 执行提示 | 请实现 LLM 结构化抽取封装必须可测试、可降级。LLM 输出解析失败不能导致整个填表批次失败。 |
### AFF-5-003 实现并行抽取和产物留底
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 字段抽取 |
| 前置任务 | AFF-5-002 |
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py``review_agent/application_form_fill/storage.py` |
| 目标 | 并行执行规则/正则和 LLM 抽取,并保存 `field_extract_result.json` |
| 开发步骤 | 1. 使用 ThreadPoolExecutor2. 规则和 LLM 两路并行3. 组装 regex_results、llm_results、selected_templates、source_evidence4. 保存 JSON5. 写 ApplicationFormFillArtifact(field_extract_result) |
| 验收标准 | JSON 产物包含两路结果和模板列表 |
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k parallel` |
| Codex 执行提示 | 请实现字段并行抽取和 field_extract_result.json 产物留底LLM 失败时也必须保存规则结果。 |
### AFF-5-004 实现字段合并与冲突检测
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 字段合并 |
| 前置任务 | AFF-5-003 |
| 涉及文件 | `review_agent/application_form_fill/services/field_merge.py``tests/test_application_form_fill_field_merge.py` |
| 目标 | 合并规则和 LLM 字段,说明书优先,并生成冲突摘要 |
| 开发步骤 | 1. 实现字段值归一化2. 实现来源优先级排序3. 同字段多值一致时合并4. 不一致时选择最高优先级来源5. 说明书与其他文件冲突时标记 conflict6. 输出 merged_fields 和 conflicts |
| 验收标准 | 说明书优先;冲突字段包含 selected_value、selected_source、conflict_values、handling |
| 验证命令 | `pytest tests/test_application_form_fill_field_merge.py` |
| Codex 执行提示 | 请实现字段合并服务,严格按说明书优先处理冲突,并把冲突列表写成可用于对话摘要和追溯清单的结构。 |
### AFF-5 阶段验证
```bash
pytest tests/test_application_form_fill_field_extract.py tests/test_application_form_fill_field_merge.py
```
---
## 十一、AFF-6 Word 填充与追溯导出
### AFF-6-001 实现 Word 表格行填充
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / Word |
| 前置任务 | AFF-5 |
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py``tests/test_application_form_fill_word_fill.py` |
| 目标 | 使用 `python-docx` 按表格行名写入注册证模板 |
| 开发步骤 | 1. 打开 docx 模板副本2. 遍历 tables/rows/cells3. 匹配第一列 row_label4. 写入第二列5. 缺失字段保持空白6. 保存 output_path |
| 验收标准 | 产品名称、包装规格、预期用途等能写入注册证模板对应行 |
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k table` |
| Codex 执行提示 | 请实现 Word 表格行填充服务,先支持注册证模板的两列表格行名匹配。 |
### AFF-6-002 实现冲突高亮
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / Word |
| 前置任务 | AFF-6-001 |
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py``tests/test_application_form_fill_word_fill.py` |
| 目标 | 冲突字段在 Word 中黄底红字 |
| 开发步骤 | 1. 对冲突字段写入 run2. 设置字体颜色 `FF0000`3. 设置单元格 shading `FFFF00`4. 非冲突字段保持原样式5. 测试读取 docx XML 验证颜色和底色 |
| 验收标准 | 冲突字段样式可在 docx XML 中验证 |
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k highlight` |
| Codex 执行提示 | 请实现 Word 冲突高亮,冲突字段必须红色字体和黄色底色,测试需检查 docx XML。 |
### AFF-6-003 创建 Word 导出记录
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 导出 |
| 前置任务 | AFF-6-002 |
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py``review_agent/application_form_fill/workflow.py` |
| 目标 | Word 生成后写入 `ExportedSummaryFile(export_type=word)` 和产物记录 |
| 开发步骤 | 1. 按批次号、产品名、模板标签生成文件名2. 保存到 `work_dir/filled`3. 创建 `ApplicationFormFillArtifact(filled_template)`4. 创建 `ExportedSummaryFile`5. 记录模板失败时错误 |
| 验收标准 | 可查询到 word 导出记录和 filled_template 产物 |
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k export` |
| Codex 执行提示 | 请把 Word 填充结果保存为导出文件export_type 使用 wordworkflow_type 使用 application_form_fill。 |
### AFF-6-004 实现追溯清单 Excel/JSON
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 导出 |
| 前置任务 | AFF-6-003 |
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py``tests/test_application_form_fill_traceability.py` |
| 目标 | 输出字段来源追溯清单和合并结果 JSON |
| 开发步骤 | 1. 生成“字段追溯”Sheet2. 生成“冲突字段”Sheet3. 生成“低置信度条目”Sheet4. 生成“生成结果”Sheet5. 保存 Excel6. 保存 merged_fields.json7. 创建导出和产物记录 |
| 验收标准 | Excel 可打开,包含字段、来源、证据、冲突、处理方式 |
| 验证命令 | `pytest tests/test_application_form_fill_traceability.py` |
| Codex 执行提示 | 请实现字段来源追溯清单导出,必须包含规则/LLM 合并结果、冲突字段和生成结果。 |
### AFF-6 阶段验证
```bash
pytest tests/test_application_form_fill_word_fill.py tests/test_application_form_fill_traceability.py
```
---
## 十二、AFF-7 飞书通知与对话摘要
### AFF-7-001 生成助手 Markdown 摘要
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 对话 |
| 前置任务 | AFF-6 |
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py``review_agent/application_form_fill/workflow.py` |
| 目标 | 工作流完成后向当前对话写入下载链接和冲突摘要 |
| 开发步骤 | 1. 汇总 Word 导出2. 汇总 PDF 状态为待增强3. 汇总冲突字段4. 添加追溯清单下载链接5. 创建 assistant Message |
| 验收标准 | 对话中出现 Markdown 表格、Word 下载、追溯清单下载和冲突摘要 |
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k summary` |
| Codex 执行提示 | 请实现自动填表完成后的助手 Markdown 摘要PDF 本期显示为待增强,不作为失败。 |
### AFF-7-002 实现飞书通知记录和 mock 通知
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 通知 |
| 前置任务 | AFF-7-001 |
| 涉及文件 | `review_agent/application_form_fill/services/notifier.py``tests/test_application_form_fill_notification.py` |
| 目标 | 填表完成后记录通知,可 mock 发送,失败不阻断下载 |
| 开发步骤 | 1. 实现 `notify_completion()`2. 默认 channel=mock3. 写 template_codes、export_ids、message_summary4. 支持 send_status success/failed5. 失败时记录 error_message 和 retry_count |
| 验收标准 | 通知记录可查;通知失败不影响批次核心产物 |
| 验证命令 | `pytest tests/test_application_form_fill_notification.py` |
| Codex 执行提示 | 请实现自动填表通知服务,先用 mock 通知记录即可。通知失败不得阻断 Word 下载。 |
### AFF-7-003 完成工作流状态归并
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 工作流 |
| 前置任务 | AFF-7-002 |
| 涉及文件 | `review_agent/application_form_fill/workflow.py``tests/test_application_form_fill_workflow.py` |
| 目标 | 根据 Word、追溯清单、通知结果标记 success/partial_success/failed |
| 开发步骤 | 1. 所有目标 Word 成功时 success2. 至少一个 Word 成功但非关键产物失败时 partial_success3. 所有 Word 失败时 failed4. PDF skipped 不导致失败5. 发送 workflow_completed 事件 |
| 验收标准 | 批次状态符合详细设计 |
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k status` |
| Codex 执行提示 | 请完成自动填表工作流状态归并PDF skipped 不影响 success通知失败最多导致 partial_success。 |
### AFF-7 阶段验证
```bash
pytest tests/test_application_form_fill_workflow.py tests/test_application_form_fill_notification.py
```
---
## 十三、AFF-8 前端卡片与总体验收
### AFF-8-001 后端状态接口
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 后端 / 接口 |
| 前置任务 | AFF-7 |
| 涉及文件 | `review_agent/application_form_fill/views.py``review_agent/urls.py` 或相关 URL 文件 |
| 目标 | 提供自动填表启动和状态查询接口 |
| 开发步骤 | 1. 新增 start 接口2. 新增 detail/status 接口3. 返回 batch、nodes、conflicts、exports4. 校验 conversation/user 权限5. 接入 URL |
| 验收标准 | 当前用户可查自己的填表批次,不能查他人批次 |
| 验证命令 | `pytest tests/test_application_form_fill_views.py` |
| Codex 执行提示 | 请实现自动填表启动和状态查询接口,所有查询必须校验当前用户权限。 |
### AFF-8-002 前端支持 application_form_fill 卡片
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 前端 / 工作流卡片 |
| 前置任务 | AFF-8-001 |
| 涉及文件 | `static/js/app.js``templates/home.html`、静态 CSS 文件 |
| 目标 | 前端展示自动填表工作流卡片,并根据 SSE 更新节点 |
| 开发步骤 | 1. 解析 workflow_type=application_form_fill2. 定义节点文案3. 创建卡片4. 更新节点状态5. PDF 节点显示待增强/跳过6. 页面刷新后恢复 |
| 验收标准 | 自动填表卡片可显示准备资料、选择模板、复制模板、抽取字段、填写 Word、追溯清单、飞书通知 |
| 验证命令 | `pytest tests/test_application_form_fill_frontend.py` 或现有前端测试命令 |
| Codex 执行提示 | 请在现有工作流卡片逻辑中新增 application_form_fill 类型,展示自动填表节点并支持状态恢复。 |
### AFF-8-003 前端展示结果和下载链接
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 前端 / Markdown |
| 前置任务 | AFF-8-002 |
| 涉及文件 | `static/js/app.js`、模板和 CSS |
| 目标 | 对话框正常展示 Word 下载、追溯清单、冲突摘要 |
| 开发步骤 | 1. 确认助手 Markdown 渲染支持表格2. 验证 Word 下载链接点击3. 验证冲突摘要表格4. PDF 列显示待增强 |
| 验收标准 | 对话结果可读、链接可用、PDF 待增强不被误判为失败 |
| 验证命令 | 前端/Playwright 对应测试 |
| Codex 执行提示 | 请验证并完善自动填表结果展示,确保 Markdown 表格、Word 下载链接、追溯清单链接和冲突摘要正常显示。 |
### AFF-8-004 总体验收与回归
| 项目 | 内容 |
| --- | --- |
| 任务类型 | 验收 / 回归 |
| 前置任务 | AFF-8-003 |
| 涉及文件 | 全项目 |
| 目标 | 运行全量测试,确认前三批能力均不回归 |
| 开发步骤 | 1. 运行 Django check2. 运行自动填表测试3. 运行文件汇总测试4. 运行法规核查测试5. 如可用,运行前端/Playwright 测试6. 检查 git status |
| 验收标准 | 全量测试通过;失败项均有解释;无意外文件变更 |
| 验证命令 | `python manage.py check`; `pytest` |
| Codex 执行提示 | 请执行第三批自动填表总体验收,运行 Django check 和 pytest 全量回归,确认文件汇总与法规核查不回归。 |
### AFF-8 阶段验证
```bash
python manage.py check
pytest
```
---
## 十四、测试分层要求
| 层级 | 验证内容 | 建议文件 |
| --- | --- | --- |
| 模型测试 | 三张新表、word/pdf 导出类型、权限关系 | `tests/test_application_form_fill_models.py` |
| 配置测试 | YAML 加载、模板配置校验、hash | `tests/test_application_form_fill_template_config.py` |
| 选择测试 | 触发语句、指定模板、注册类型优先级、默认模板 | `tests/test_application_form_fill_template_select.py` |
| 抽取测试 | 规则/正则、LLM 降级、并行抽取、字段合并 | `tests/test_application_form_fill_field_extract.py``tests/test_application_form_fill_field_merge.py` |
| Word 测试 | 表格行填充、冲突高亮、导出记录 | `tests/test_application_form_fill_word_fill.py` |
| 导出测试 | 追溯清单 Excel、JSON 产物、下载权限 | `tests/test_application_form_fill_traceability.py``tests/test_application_form_fill_views.py` |
| 工作流测试 | 批次创建、节点流转、状态归并、助手摘要 | `tests/test_application_form_fill_workflow.py` |
| 通知测试 | mock 通知、失败记录、重试字段 | `tests/test_application_form_fill_notification.py` |
| 前端测试 | 卡片节点、PDF 待增强、下载链接、冲突摘要 | `tests/test_application_form_fill_frontend.py` |
---
## 十五、Codex 自动化执行规则
| 规则 | 内容 |
| --- | --- |
| 顺序执行 | 必须从 AFF-0 到 AFF-8 顺序执行,不得跳阶段 |
| TDD | 新行为先写失败测试,再实现 |
| 当前阶段优先 | 某阶段失败时先修复当前阶段,不继续后续阶段 |
| 回归保护 | 文件汇总和法规核查已有测试不得回归 |
| PDF 边界 | PDF 节点本期可 skipped不为 PDF 引入强依赖 |
| 字段表边界 | 不新增字段级数据库表,后续增强已在待办计划 |
| 每阶段验证 | 每阶段完成后运行对应验证命令 |
| 每阶段提交 | 每阶段验证通过后生成提交摘要并本地提交 |
| 不覆盖变更 | 不得回滚或覆盖用户已有未提交变更 |
---
## 十六、推荐目标模式提示词
后续可直接对 Codex 输入:
```text
请按 docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md 执行第三批开发。
目标:
完成独立 application_form_fill 工作流,通过用户对话触发自动填表,复用当前对话最近成功 FileSummaryBatch支持模板配置、注册证 Word 自动填写、规则/正则与 LLM 并行字段抽取、说明书优先冲突归并、冲突高亮、字段来源追溯清单、Word 下载、自动填表工作流卡片和飞书 mock 通知记录。
执行规则:
1. 创建 codex/YYYYMMDD-申报文件自动填表 分支。
2. 按 AFF-0 到 AFF-8 顺序执行,不跳阶段。
3. 每阶段先写测试,再实现,完成后运行对应验证命令。
4. 不实现字段级数据库表。
5. PDF 转换本期作为 skipped/待增强,不引入强制 LibreOffice 依赖。
6. 模板配置路径必须为 review_agent/application_form_fill/templates/application_form_templates_v1.yaml。
7. Word 模板优先支持注册证格式 docx两个 doc 模板可标记待转换或部分成功。
8. 每阶段验证通过后调用 git-commit-summary 生成提交摘要并本地提交。
9. 最后运行 python manage.py check 和 pytest 全量验收。
```
---
## 十七、待执行前检查清单
| 检查项 | 状态 |
| --- | --- |
| 第三批需求分析、功能设计、详细设计、数据库设计均已存在 | 待执行时确认 |
| 当前分支是否适合创建开发分支 | 待执行时确认 |
| 是否存在用户未提交变更 | 待执行时确认 |
| `python-docx``openpyxl``PyYAML` 是否可用 | 待执行时确认 |
| 现有文件汇总和法规核查测试是否通过 | 待执行时确认 |
| 执行机器是否提供 `git-commit-summary` skill | 待执行时确认 |
| `.doc` 模板和 PDF 转换是否保持在待办边界内 | 待执行时确认 |

View File

@@ -33,10 +33,12 @@
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 | | 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
| --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- |
| TODO-FILL-001 | 产品关键信息抽取结果确认 | 原始需求 3 | P1 | 将第二阶段抽取字段转成可人工确认的信息表 | | TODO-FILL-001 | 字段级数据库表 | 第三批自动填表数据库设计 | P1 | 后续新增 `ApplicationFormFillField`,支持字段级查询、人工修改、审计和统计 |
| TODO-FILL-002 | 自动填写目标文件 | 原始需求 3 | P1 | 将确认后的字段写入注册申报表格或对照清单 | | TODO-FILL-002 | PDF 转换与版式 QA | 第三批自动填表详细设计 | P1 | 使用 LibreOffice/soffice 将填好的 Word 转 PDF并增加页数非 0、逐页截图或版式差异检查 |
| TODO-FILL-003 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff供人工复核 | | TODO-FILL-003 | `.doc` 模板预转换管理 | 第三批自动填表模板处理 | P1 | 将变更注册(备案)文件和安全和性能基本原则清单预转换为 `.docx` 工作模板,并人工确认版式 |
| TODO-FILL-004 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前必须人工确认 | | TODO-FILL-004 | 安全和性能基本原则清单完整条目拆解 | 第三批自动填表模板配置 | P1 | 拆解清单条目编号、原则内容、适用性栏、证据栏和证明文件位置栏,写入 YAML 配置 |
| TODO-FILL-005 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff供人工复核 |
| TODO-FILL-006 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前支持人工确认或二次审批 |
--- ---

View File

@@ -7,3 +7,6 @@ xlrd>=2.0
olefile>=0.47 olefile>=0.47
py7zr>=0.21 py7zr>=0.21
playwright>=1.60 playwright>=1.60
PyYAML>=6.0
chromadb>=0.5
httpx>=0.27

View File

@@ -4,7 +4,14 @@ from review_agent.models import FileSummaryBatch, WorkflowEvent
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent: def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {}) return WorkflowEvent.objects.create(
batch=batch,
workflow_type="file_summary",
workflow_batch_id=batch.pk,
conversation=batch.conversation,
event_type=event_type,
payload=payload or {},
)
def serialize_event(event: WorkflowEvent) -> dict[str, object]: def serialize_event(event: WorkflowEvent) -> dict[str, object]:

View File

@@ -54,6 +54,9 @@ def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
workbook.save(path) workbook.save(path)
exported = ExportedSummaryFile.objects.create( exported = ExportedSummaryFile.objects.create(
batch=batch, batch=batch,
workflow_type="file_summary",
workflow_batch_id=batch.pk,
export_category="summary",
export_type=ExportedSummaryFile.ExportType.EXCEL, export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=path.name, file_name=path.name,
storage_path=str(path), storage_path=str(path),

View File

@@ -65,6 +65,9 @@ def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFi
path.write_text(content, encoding="utf-8") path.write_text(content, encoding="utf-8")
exported = ExportedSummaryFile.objects.create( exported = ExportedSummaryFile.objects.create(
batch=batch, batch=batch,
workflow_type="file_summary",
workflow_batch_id=batch.pk,
export_category="summary",
export_type=ExportedSummaryFile.ExportType.MARKDOWN, export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name=path.name, file_name=path.name,
storage_path=str(path), storage_path=str(path),

View File

@@ -229,6 +229,7 @@ def batch_status(request, batch_id: int):
{ {
"batch": { "batch": {
"id": batch.pk, "id": batch.pk,
"workflow_type": "file_summary",
"batch_no": batch.batch_no, "batch_no": batch.batch_no,
"status": batch.status, "status": batch.status,
"product_name": batch.product_name, "product_name": batch.product_name,
@@ -283,11 +284,12 @@ def export_download(request, export_id: int):
extra={"export_id": exported.pk, "storage_path": exported.storage_path}, extra={"export_id": exported.pk, "storage_path": exported.storage_path},
) )
return JsonResponse({"error": "文件不存在。"}, status=404) return JsonResponse({"error": "文件不存在。"}, status=404)
content_type = ( content_types = {
"text/markdown; charset=utf-8" ExportedSummaryFile.ExportType.MARKDOWN: "text/markdown; charset=utf-8",
if exported.export_type == ExportedSummaryFile.ExportType.MARKDOWN ExportedSummaryFile.ExportType.EXCEL: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
else "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ExportedSummaryFile.ExportType.JSON: "application/json; charset=utf-8",
) }
content_type = content_types.get(exported.export_type, "application/octet-stream")
logger.info( logger.info(
"Export download started", "Export download started",
extra={ extra={

View File

@@ -112,7 +112,14 @@ def create_file_summary_batch(
attachment.save(update_fields=["upload_status"]) attachment.save(update_fields=["upload_status"])
for code, name, _skill_name in NODE_DEFINITIONS: for code, name, _skill_name in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name) WorkflowNodeRun.objects.create(
batch=batch,
workflow_type="file_summary",
workflow_batch_id=batch.pk,
node_group="file_summary",
node_code=code,
node_name=name,
)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no}) record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
logger.info( logger.info(

View File

@@ -57,7 +57,7 @@ def generate_reply(conversation, user_message: str) -> str:
raise LLMRequestError("模型接口返回格式不符合预期。") from exc raise LLMRequestError("模型接口返回格式不符合预期。") from exc
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0) -> str: def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
"""Calls the configured chat endpoint with explicit messages and returns assistant text.""" """Calls the configured chat endpoint with explicit messages and returns assistant text."""
if not settings.LLM_API_KEY: if not settings.LLM_API_KEY:
@@ -84,7 +84,7 @@ def generate_completion(messages: list[dict[str, str]], *, temperature: float =
) )
try: try:
with request.urlopen(http_request, timeout=60) as response: with request.urlopen(http_request, timeout=timeout) as response:
data = json.loads(response.read().decode("utf-8")) data = json.loads(response.read().decode("utf-8"))
except error.HTTPError as exc: except error.HTTPError as exc:
details = exc.read().decode("utf-8", errors="ignore") details = exc.read().decode("utf-8", errors="ignore")

View File

@@ -0,0 +1,15 @@
from __future__ import annotations
import logging
import re
class SuppressWorkflowStatusPollFilter(logging.Filter):
"""Hides noisy workflow status polling access logs from runserver output."""
STATUS_POLL_PATTERN = re.compile(
r'"GET /api/review-agent/(?:file-summary|regulatory-review)/\d+/status/ HTTP/[0-9.]+" 200 '
)
def filter(self, record: logging.LogRecord) -> bool:
return not self.STATUS_POLL_PATTERN.search(record.getMessage())

View File

@@ -0,0 +1 @@
"""Management command package for review_agent."""

View File

@@ -0,0 +1 @@
"""Management commands for review_agent."""

View File

@@ -0,0 +1,33 @@
from __future__ import annotations
from pathlib import Path
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
from review_agent.regulatory_review.services.rag_embedding import get_embedding_provider
from review_agent.regulatory_review.services.rag_index import build_chroma_index
from review_agent.regulatory_review.services.rule_loader import load_rule_file
class Command(BaseCommand):
help = "构建 NMPA 法规材料本地 ChromaDB RAG 索引。"
def add_arguments(self, parser):
parser.add_argument("--provider", default=None, help="覆盖 REGULATORY_RAG_PROVIDER。")
def handle(self, *args, **options):
rule_set = load_rule_file()
source_dir = Path(settings.BASE_DIR) / rule_set["source_material_dir"]
if not source_dir.exists():
raise CommandError(f"法规材料目录不存在:{source_dir}")
try:
provider = get_embedding_provider(options["provider"])
count = build_chroma_index(source_dir=source_dir, embedding_provider=provider)
except Exception as exc:
raise CommandError(str(exc)) from exc
self.stdout.write(
self.style.SUCCESS(
f"已构建法规 RAG 索引collection={settings.REGULATORY_RAG_COLLECTION}, chunks={count}"
)
)

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from django.core.management.base import BaseCommand
from review_agent.regulatory_review.services.rule_loader import check_rule_version
class Command(BaseCommand):
help = "检查 NMPA 法规核查 YAML 规则与数据库版本记录。"
def add_arguments(self, parser):
parser.add_argument(
"--no-create",
action="store_true",
help="缺少数据库记录时只报告 missing不创建记录。",
)
def handle(self, *args, **options):
result = check_rule_version(update_missing=not options["no_create"])
self.stdout.write(
f"{result.code}: {result.status}; yaml_hash={result.current_hash}; "
f"db_hash={result.database_hash or '-'}; path={result.path}"
)
if result.status == "mismatch":
self.stdout.write(
self.style.WARNING("YAML 与数据库记录不一致,请人工确认后更新规则版本记录。")
)

View File

@@ -0,0 +1,479 @@
# Generated by Django 5.2.14 on 2026-06-06 16:22
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
(
"review_agent",
"0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more",
),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name="RegulatoryArtifact",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"artifact_type",
models.CharField(
choices=[
("markdown", "Markdown"),
("excel", "Excel"),
("json", "JSON"),
("text", "文本"),
],
max_length=20,
),
),
("name", models.CharField(max_length=160)),
("storage_path", models.CharField(max_length=500)),
(
"content_hash",
models.CharField(blank=True, default="", max_length=128),
),
("metadata", models.JSONField(blank=True, default=dict)),
("created_at", models.DateTimeField(auto_now_add=True)),
],
options={
"db_table": "ra_regulatory_artifact",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="RegulatoryIssue",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("rule_code", models.CharField(blank=True, default="", max_length=120)),
(
"category",
models.CharField(
choices=[
("completeness", "完整性"),
("structure", "章节"),
("consistency", "一致性"),
("rag", "法规依据"),
],
max_length=40,
),
),
(
"severity",
models.CharField(
choices=[
("blocking", "阻断项"),
("high", "高风险"),
("medium", "中风险"),
("low", "低风险"),
("info", "提示"),
],
max_length=20,
),
),
("title", models.CharField(max_length=255)),
("detail", models.TextField(blank=True, default="")),
("suggestion", models.TextField(blank=True, default="")),
(
"status",
models.CharField(
choices=[
("open", "待处理"),
("resolved", "已整改"),
("accepted", "已接受"),
],
default="open",
max_length=20,
),
),
("evidence", models.JSONField(blank=True, default=dict)),
("citations", models.JSONField(blank=True, default=list)),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
],
options={
"db_table": "ra_regulatory_issue",
"ordering": ["severity", "id"],
},
),
migrations.CreateModel(
name="RegulatoryNotificationRecord",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"channel",
models.CharField(
choices=[("mock", "模拟"), ("feishu", "飞书")],
default="mock",
max_length=20,
),
),
("target", models.CharField(blank=True, default="", max_length=160)),
("payload", models.JSONField(blank=True, default=dict)),
(
"status",
models.CharField(
choices=[
("pending", "待发送"),
("sent", "已发送"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("sent_at", models.DateTimeField(blank=True, null=True)),
],
options={
"db_table": "ra_regulatory_notification_record",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="RegulatoryReviewBatch",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("batch_no", models.CharField(max_length=64, unique=True)),
(
"status",
models.CharField(
choices=[
("pending", "待执行"),
("running", "执行中"),
("success", "成功"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
("risk_summary", models.JSONField(blank=True, default=dict)),
("work_dir", models.CharField(blank=True, default="", max_length=500)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
],
options={
"db_table": "ra_regulatory_review_batch",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="RegulatoryRuleVersion",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("code", models.CharField(max_length=80, unique=True)),
("name", models.CharField(max_length=160)),
("yaml_path", models.CharField(max_length=500)),
("yaml_hash", models.CharField(max_length=128)),
(
"rag_collection",
models.CharField(blank=True, default="", max_length=120),
),
(
"rag_index_version",
models.CharField(blank=True, default="", max_length=80),
),
(
"rag_index_hash",
models.CharField(blank=True, default="", max_length=128),
),
(
"status",
models.CharField(
choices=[
("active", "启用"),
("outdated", "待更新"),
("disabled", "停用"),
],
default="active",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
],
options={
"db_table": "ra_regulatory_rule_version",
"ordering": ["-updated_at", "-id"],
},
),
migrations.AddField(
model_name="exportedsummaryfile",
name="export_category",
field=models.CharField(blank=True, default="summary", max_length=40),
),
migrations.AddField(
model_name="exportedsummaryfile",
name="workflow_batch_id",
field=models.PositiveBigIntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="exportedsummaryfile",
name="workflow_type",
field=models.CharField(blank=True, default="file_summary", max_length=40),
),
migrations.AddField(
model_name="workflowevent",
name="conversation",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="workflow_events",
to="review_agent.conversation",
),
),
migrations.AddField(
model_name="workflowevent",
name="workflow_batch_id",
field=models.PositiveBigIntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="workflowevent",
name="workflow_type",
field=models.CharField(blank=True, default="file_summary", max_length=40),
),
migrations.AddField(
model_name="workflownoderun",
name="node_group",
field=models.CharField(blank=True, default="file_summary", max_length=40),
),
migrations.AddField(
model_name="workflownoderun",
name="workflow_batch_id",
field=models.PositiveBigIntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="workflownoderun",
name="workflow_type",
field=models.CharField(blank=True, default="file_summary", max_length=40),
),
migrations.AlterField(
model_name="exportedsummaryfile",
name="export_type",
field=models.CharField(
choices=[
("markdown", "Markdown"),
("excel", "Excel"),
("json", "JSON"),
],
max_length=20,
),
),
migrations.AlterField(
model_name="workflowevent",
name="batch",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="events",
to="review_agent.filesummarybatch",
),
),
migrations.AlterField(
model_name="workflownoderun",
name="batch",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="node_runs",
to="review_agent.filesummarybatch",
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["workflow_type", "workflow_batch_id"],
name="idx_ra_export_workflow",
),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(
fields=["workflow_type", "workflow_batch_id", "id"],
name="idx_ra_event_workflow_id",
),
),
migrations.AddIndex(
model_name="workflownoderun",
index=models.Index(
fields=["workflow_type", "workflow_batch_id"],
name="idx_ra_node_workflow",
),
),
migrations.AddField(
model_name="regulatoryreviewbatch",
name="conversation",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="regulatory_review_batches",
to="review_agent.conversation",
),
),
migrations.AddField(
model_name="regulatoryreviewbatch",
name="source_summary_batch",
field=models.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
related_name="regulatory_review_batches",
to="review_agent.filesummarybatch",
),
),
migrations.AddField(
model_name="regulatoryreviewbatch",
name="trigger_message",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="triggered_regulatory_batches",
to="review_agent.message",
),
),
migrations.AddField(
model_name="regulatoryreviewbatch",
name="user",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_regulatory_batches",
to=settings.AUTH_USER_MODEL,
),
),
migrations.AddField(
model_name="regulatorynotificationrecord",
name="batch",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="notifications",
to="review_agent.regulatoryreviewbatch",
),
),
migrations.AddField(
model_name="regulatoryissue",
name="batch",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="issues",
to="review_agent.regulatoryreviewbatch",
),
),
migrations.AddField(
model_name="regulatoryartifact",
name="batch",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="artifacts",
to="review_agent.regulatoryreviewbatch",
),
),
migrations.AddIndex(
model_name="regulatoryruleversion",
index=models.Index(
fields=["code", "status"], name="idx_ra_rule_code_status"
),
),
migrations.AddField(
model_name="regulatoryreviewbatch",
name="rule_version",
field=models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="review_batches",
to="review_agent.regulatoryruleversion",
),
),
migrations.AddIndex(
model_name="regulatorynotificationrecord",
index=models.Index(
fields=["batch", "status"], name="idx_ra_rr_notify_status"
),
),
migrations.AddIndex(
model_name="regulatoryissue",
index=models.Index(
fields=["batch", "severity"], name="idx_ra_rr_issue_severity"
),
),
migrations.AddIndex(
model_name="regulatoryissue",
index=models.Index(
fields=["batch", "category"], name="idx_ra_rr_issue_category"
),
),
migrations.AddIndex(
model_name="regulatoryartifact",
index=models.Index(
fields=["batch", "artifact_type"], name="idx_ra_rr_artifact_type"
),
),
migrations.AddIndex(
model_name="regulatoryreviewbatch",
index=models.Index(
fields=["conversation", "created_at"], name="idx_ra_rr_batch_conv"
),
),
migrations.AddIndex(
model_name="regulatoryreviewbatch",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_rr_batch_user"
),
),
migrations.AddIndex(
model_name="regulatoryreviewbatch",
index=models.Index(
fields=["status", "created_at"], name="idx_ra_rr_batch_status"
),
),
]

View File

@@ -0,0 +1,50 @@
# Generated by Django 5.2.14 on 2026-06-07 01:15
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("review_agent", "0003_regulatoryartifact_regulatoryissue_and_more"),
]
operations = [
migrations.AddField(
model_name="regulatoryreviewbatch",
name="condition_json",
field=models.JSONField(blank=True, default=dict),
),
migrations.AlterField(
model_name="regulatoryreviewbatch",
name="status",
field=models.CharField(
choices=[
("pending", "待执行"),
("running", "执行中"),
("waiting_user", "等待用户确认"),
("success", "成功"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
migrations.AlterField(
model_name="workflownoderun",
name="status",
field=models.CharField(
choices=[
("pending", "等待中"),
("running", "执行中"),
("waiting_user", "等待用户确认"),
("retrying", "重试中"),
("success", "成功"),
("failed", "失败"),
("skipped", "跳过"),
],
default="pending",
max_length=20,
),
),
]

View File

@@ -0,0 +1,28 @@
# Generated by Django 5.2.14 on 2026-06-07 01:29
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("review_agent", "0004_regulatoryreviewbatch_condition_json_and_more"),
]
operations = [
migrations.AlterField(
model_name="regulatoryissue",
name="status",
field=models.CharField(
choices=[
("open", "待处理"),
("resolved", "已整改"),
("accepted", "已接受"),
("review_passed", "复核通过"),
("review_failed", "复核未通过"),
],
default="open",
max_length=20,
),
),
]

View File

@@ -253,6 +253,7 @@ class WorkflowNodeRun(models.Model):
class Status(models.TextChoices): class Status(models.TextChoices):
PENDING = "pending", "等待中" PENDING = "pending", "等待中"
RUNNING = "running", "执行中" RUNNING = "running", "执行中"
WAITING_USER = "waiting_user", "等待用户确认"
RETRYING = "retrying", "重试中" RETRYING = "retrying", "重试中"
SUCCESS = "success", "成功" SUCCESS = "success", "成功"
FAILED = "failed", "失败" FAILED = "failed", "失败"
@@ -261,8 +262,13 @@ class WorkflowNodeRun(models.Model):
batch = models.ForeignKey( batch = models.ForeignKey(
FileSummaryBatch, FileSummaryBatch,
on_delete=models.CASCADE, on_delete=models.CASCADE,
null=True,
blank=True,
related_name="node_runs", related_name="node_runs",
) )
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
node_group = models.CharField(max_length=40, blank=True, default="file_summary")
node_code = models.CharField(max_length=40) node_code = models.CharField(max_length=40)
node_name = models.CharField(max_length=80) node_name = models.CharField(max_length=80)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING) status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
@@ -278,6 +284,10 @@ class WorkflowNodeRun(models.Model):
] ]
indexes = [ indexes = [
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"), models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
models.Index(
fields=["workflow_type", "workflow_batch_id"],
name="idx_ra_node_workflow",
),
] ]
@@ -287,8 +297,19 @@ class WorkflowEvent(models.Model):
batch = models.ForeignKey( batch = models.ForeignKey(
FileSummaryBatch, FileSummaryBatch,
on_delete=models.CASCADE, on_delete=models.CASCADE,
null=True,
blank=True,
related_name="events", related_name="events",
) )
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
null=True,
blank=True,
related_name="workflow_events",
)
event_type = models.CharField(max_length=40) event_type = models.CharField(max_length=40)
payload = models.JSONField(default=dict) payload = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True) created_at = models.DateTimeField(auto_now_add=True)
@@ -299,6 +320,10 @@ class WorkflowEvent(models.Model):
indexes = [ indexes = [
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"), models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"), models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
models.Index(
fields=["workflow_type", "workflow_batch_id", "id"],
name="idx_ra_event_workflow_id",
),
] ]
@@ -308,6 +333,7 @@ class ExportedSummaryFile(models.Model):
class ExportType(models.TextChoices): class ExportType(models.TextChoices):
MARKDOWN = "markdown", "Markdown" MARKDOWN = "markdown", "Markdown"
EXCEL = "excel", "Excel" EXCEL = "excel", "Excel"
JSON = "json", "JSON"
class Status(models.TextChoices): class Status(models.TextChoices):
SUCCESS = "success", "成功" SUCCESS = "success", "成功"
@@ -318,6 +344,9 @@ class ExportedSummaryFile(models.Model):
on_delete=models.CASCADE, on_delete=models.CASCADE,
related_name="exports", related_name="exports",
) )
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
export_category = models.CharField(max_length=40, blank=True, default="summary")
export_type = models.CharField(max_length=20, choices=ExportType.choices) export_type = models.CharField(max_length=20, choices=ExportType.choices)
file_name = models.CharField(max_length=255) file_name = models.CharField(max_length=255)
storage_path = models.CharField(max_length=500) storage_path = models.CharField(max_length=500)
@@ -331,4 +360,214 @@ class ExportedSummaryFile(models.Model):
indexes = [ indexes = [
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"), models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"), models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
models.Index(
fields=["workflow_type", "workflow_batch_id"],
name="idx_ra_export_workflow",
),
]
class RegulatoryRuleVersion(models.Model):
"""Tracks the local regulatory rule YAML and its matching RAG index."""
class Status(models.TextChoices):
ACTIVE = "active", "启用"
OUTDATED = "outdated", "待更新"
DISABLED = "disabled", "停用"
code = models.CharField(max_length=80, unique=True)
name = models.CharField(max_length=160)
yaml_path = models.CharField(max_length=500)
yaml_hash = models.CharField(max_length=128)
rag_collection = models.CharField(max_length=120, blank=True, default="")
rag_index_version = models.CharField(max_length=80, blank=True, default="")
rag_index_hash = models.CharField(max_length=128, blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, default=Status.ACTIVE)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "ra_regulatory_rule_version"
ordering = ["-updated_at", "-id"]
indexes = [
models.Index(fields=["code", "status"], name="idx_ra_rule_code_status"),
]
def __str__(self) -> str:
return self.code
class RegulatoryReviewBatch(models.Model):
"""Tracks one NMPA regulatory review workflow run."""
class Status(models.TextChoices):
PENDING = "pending", "待执行"
RUNNING = "running", "执行中"
WAITING_USER = "waiting_user", "等待用户确认"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="regulatory_review_batches",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_regulatory_batches",
)
trigger_message = models.ForeignKey(
Message,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="triggered_regulatory_batches",
)
source_summary_batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.PROTECT,
related_name="regulatory_review_batches",
)
rule_version = models.ForeignKey(
RegulatoryRuleVersion,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="review_batches",
)
batch_no = models.CharField(max_length=64, unique=True)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
condition_json = models.JSONField(default=dict, blank=True)
risk_summary = models.JSONField(default=dict, blank=True)
work_dir = models.CharField(max_length=500, blank=True, default="")
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_regulatory_review_batch"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["conversation", "created_at"], name="idx_ra_rr_batch_conv"),
models.Index(fields=["user", "created_at"], name="idx_ra_rr_batch_user"),
models.Index(fields=["status", "created_at"], name="idx_ra_rr_batch_status"),
]
def __str__(self) -> str:
return self.batch_no
class RegulatoryIssue(models.Model):
"""Stores one regulatory finding after risk consolidation."""
class Severity(models.TextChoices):
BLOCKING = "blocking", "阻断项"
HIGH = "high", "高风险"
MEDIUM = "medium", "中风险"
LOW = "low", "低风险"
INFO = "info", "提示"
class Category(models.TextChoices):
COMPLETENESS = "completeness", "完整性"
STRUCTURE = "structure", "章节"
CONSISTENCY = "consistency", "一致性"
RAG = "rag", "法规依据"
class Status(models.TextChoices):
OPEN = "open", "待处理"
RESOLVED = "resolved", "已整改"
ACCEPTED = "accepted", "已接受"
REVIEW_PASSED = "review_passed", "复核通过"
REVIEW_FAILED = "review_failed", "复核未通过"
batch = models.ForeignKey(
RegulatoryReviewBatch,
on_delete=models.CASCADE,
related_name="issues",
)
rule_code = models.CharField(max_length=120, blank=True, default="")
category = models.CharField(max_length=40, choices=Category.choices)
severity = models.CharField(max_length=20, choices=Severity.choices)
title = models.CharField(max_length=255)
detail = models.TextField(blank=True, default="")
suggestion = models.TextField(blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, default=Status.OPEN)
evidence = models.JSONField(default=dict, blank=True)
citations = models.JSONField(default=list, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "ra_regulatory_issue"
ordering = ["severity", "id"]
indexes = [
models.Index(fields=["batch", "severity"], name="idx_ra_rr_issue_severity"),
models.Index(fields=["batch", "category"], name="idx_ra_rr_issue_category"),
]
def __str__(self) -> str:
return self.title
class RegulatoryArtifact(models.Model):
"""Stores regulatory review intermediate and exported artifacts."""
class ArtifactType(models.TextChoices):
MARKDOWN = "markdown", "Markdown"
EXCEL = "excel", "Excel"
JSON = "json", "JSON"
TEXT = "text", "文本"
batch = models.ForeignKey(
RegulatoryReviewBatch,
on_delete=models.CASCADE,
related_name="artifacts",
)
artifact_type = models.CharField(max_length=20, choices=ArtifactType.choices)
name = models.CharField(max_length=160)
storage_path = models.CharField(max_length=500)
content_hash = models.CharField(max_length=128, blank=True, default="")
metadata = models.JSONField(default=dict, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_regulatory_artifact"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["batch", "artifact_type"], name="idx_ra_rr_artifact_type"),
]
class RegulatoryNotificationRecord(models.Model):
"""Stores mock notification records for future Feishu integration."""
class Channel(models.TextChoices):
MOCK = "mock", "模拟"
FEISHU = "feishu", "飞书"
class Status(models.TextChoices):
PENDING = "pending", "待发送"
SENT = "sent", "已发送"
FAILED = "failed", "失败"
batch = models.ForeignKey(
RegulatoryReviewBatch,
on_delete=models.CASCADE,
related_name="notifications",
)
channel = models.CharField(max_length=20, choices=Channel.choices, default=Channel.MOCK)
target = models.CharField(max_length=160, blank=True, default="")
payload = models.JSONField(default=dict, blank=True)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
sent_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_regulatory_notification_record"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["batch", "status"], name="idx_ra_rr_notify_status"),
] ]

View File

@@ -0,0 +1 @@
"""NMPA regulatory review workflow package."""

View File

@@ -0,0 +1,26 @@
from __future__ import annotations
from review_agent.models import RegulatoryReviewBatch, WorkflowEvent
def record_event(
batch: RegulatoryReviewBatch,
event_type: str,
payload: dict | None = None,
) -> WorkflowEvent:
return WorkflowEvent.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
conversation=batch.conversation,
event_type=event_type,
payload=payload or {},
)
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
return {
"id": event.pk,
"event_type": event.event_type,
"payload": event.payload,
"created_at": event.created_at.isoformat(),
}

View File

@@ -0,0 +1,503 @@
code: nmpa_ivd_registration_v1
name: NMPA IVD 注册资料附件 4 对齐规则
rag_collection: nmpa_ivd_registration_v1
source_material_dir: docs/0.原始材料
attachment4_required_codes:
- "1"
- "1.1"
- "1.2"
- "1.3"
- "1.4"
- "1.5"
- "1.6"
- "1.7"
- "2"
- "2.1"
- "2.2"
- "2.3"
- "2.4"
- "2.5"
- "2.6"
- "3"
- "3.1"
- "3.2"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- "3.7"
- "3.8"
- "4"
- "4.1"
- "4.2"
- "5"
- "5.1"
- "5.2"
- "5.3"
- "5.4"
- "6"
- "6.1"
- "6.2"
- "6.3"
- "6.4"
- "6.5"
- "6.6"
- "6.7"
- "6.8"
- "6.9"
- "6.10"
requirements:
- code: attachment4_1_regulatory_info
rule_id: A4-1
attachment4_code: "1"
title: 监管信息
type: chapter
severity: high
category: completeness
file_keywords: [监管信息]
aliases: [监管资料]
suggestion: 请补充监管信息章节及其目录项。
citation_query: 附件4 监管信息 体外诊断试剂 注册申报资料
structure_required: true
- code: attachment4_1_1_toc
rule_id: A4-1.1
attachment4_code: "1.1"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 目录]
aliases: [监管信息目录]
suggestion: 请补充监管信息章节目录。
citation_query: 附件4 监管信息 章节目录
- code: attachment4_1_2_application_form
rule_id: A4-1.2
attachment4_code: "1.2"
title: 申请表
type: required
severity: blocking
category: completeness
file_keywords: [申请表, 注册申请表]
aliases: [医疗器械注册申请表]
suggestion: 请补充注册申请表并核对注册类型、管理类别和分类编码。
citation_query: 附件4 监管信息 申请表
- code: attachment4_1_3_terms
rule_id: A4-1.3
attachment4_code: "1.3"
title: 术语/缩写词列表
type: recommended
severity: medium
category: completeness
file_keywords: [术语, 缩写词, 缩略语]
suggestion: 请补充术语和缩写词列表。
citation_query: 附件4 术语 缩写词列表
- code: attachment4_1_4_product_list
rule_id: A4-1.4
attachment4_code: "1.4"
title: 产品列表
type: required
severity: high
category: completeness
file_keywords: [产品列表, 产品清单]
suggestion: 请补充申报产品列表。
citation_query: 附件4 产品列表
- code: attachment4_1_5_related_files
rule_id: A4-1.5
attachment4_code: "1.5"
title: 关联文件
type: conditional
severity: medium
category: completeness
file_keywords: [关联文件, 关联注册, 引用文件]
suggestion: 如存在关联注册或引用资料,请补充关联文件说明。
citation_query: 附件4 关联文件
- code: attachment4_1_6_pre_submission
rule_id: A4-1.6
attachment4_code: "1.6"
title: 申报前与监管机构的联系情况和沟通记录
type: conditional
severity: medium
category: completeness
file_keywords: [沟通记录, 监管机构, 申报前]
suggestion: 如有申报前沟通,请补充沟通记录;如无,请说明不适用。
citation_query: 附件4 申报前 监管机构 沟通记录
- code: attachment4_1_7_declaration
rule_id: A4-1.7
attachment4_code: "1.7"
title: 符合性声明
type: required
severity: blocking
category: completeness
file_keywords: [符合性声明, 声明]
suggestion: 请补充符合性声明。
citation_query: 附件4 符合性声明
- code: attachment4_2_summary
rule_id: A4-2
attachment4_code: "2"
title: 综述资料
type: chapter
severity: high
category: completeness
file_keywords: [综述资料]
suggestion: 请补充综述资料章节。
citation_query: 附件4 综述资料
structure_required: true
- code: attachment4_2_1_toc
rule_id: A4-2.1
attachment4_code: "2.1"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 综述资料目录]
suggestion: 请补充综述资料章节目录。
citation_query: 附件4 综述资料 章节目录
- code: attachment4_2_2_overview
rule_id: A4-2.2
attachment4_code: "2.2"
title: 概述
type: required
severity: high
category: completeness
file_keywords: [概述]
suggestion: 请补充产品概述。
citation_query: 附件4 概述
- code: attachment4_2_3_product_description
rule_id: A4-2.3
attachment4_code: "2.3"
title: 产品描述
type: required
severity: high
category: completeness
file_keywords: [产品描述]
suggestion: 请补充产品描述。
citation_query: 附件4 产品描述
- code: attachment4_2_4_intended_use
rule_id: A4-2.4
attachment4_code: "2.4"
title: 预期用途
type: required
severity: high
category: completeness
file_keywords: [预期用途]
suggestion: 请补充预期用途资料。
citation_query: 附件4 预期用途
- code: attachment4_2_5_marketing_history
rule_id: A4-2.5
attachment4_code: "2.5"
title: 申报产品上市历史
type: conditional
severity: medium
category: completeness
file_keywords: [上市历史]
suggestion: 如产品已有上市历史,请补充相关说明;如无,请说明不适用。
citation_query: 附件4 上市历史
- code: attachment4_2_6_other_summary
rule_id: A4-2.6
attachment4_code: "2.6"
title: 其他需说明的内容
type: conditional
severity: medium
category: completeness
file_keywords: [其他需说明, 其他说明]
suggestion: 请补充其他需说明内容或不适用说明。
citation_query: 附件4 其他需说明
- code: attachment4_3_nonclinical
rule_id: A4-3
attachment4_code: "3"
title: 非临床资料
type: chapter
severity: high
category: completeness
file_keywords: [非临床资料]
suggestion: 请补充非临床资料章节。
citation_query: 附件4 非临床资料
structure_required: true
- code: attachment4_3_1_toc
rule_id: A4-3.1
attachment4_code: "3.1"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 非临床资料目录]
suggestion: 请补充非临床资料章节目录。
citation_query: 附件4 非临床资料 章节目录
- code: attachment4_3_2_risk_management
rule_id: A4-3.2
attachment4_code: "3.2"
title: 产品风险管理资料
type: required
severity: high
category: completeness
file_keywords: [产品风险管理, 风险管理资料]
suggestion: 请补充产品风险管理资料。
citation_query: 附件4 产品风险管理资料
- code: essential_principles_checklist
rule_id: A4-3.3
attachment4_code: "3.3"
title: 体外诊断试剂安全和性能基本原则清单
type: recommended
severity: medium
category: completeness
file_keywords: [安全和性能基本原则, 基本原则清单]
aliases: [安全和性能基本原则清单]
suggestion: 建议补充安全和性能基本原则清单,便于审评追溯。
citation_query: 附件4 安全和性能基本原则清单
- code: product_technical_requirements
rule_id: A4-3.4
attachment4_code: "3.4"
title: 产品技术要求及检验报告
type: required
severity: blocking
category: completeness
file_keywords: [产品技术要求, 注册检验报告, 检验报告]
aliases: [产品技术要求, 注册检验报告]
required_sections: [产品技术要求, 检验报告]
suggestion: 请补充产品技术要求及注册检验报告,并确认二者覆盖型号一致。
citation_query: 附件4 产品技术要求 检验报告
- code: registration_test_report
rule_id: A4-3.4-R
attachment4_code: "3.4"
title: 注册检验报告
type: required
severity: blocking
category: completeness
file_keywords: [注册检验报告, 检验报告]
suggestion: 请补充注册检验报告并复核报告覆盖的产品型号。
citation_query: 附件4 注册检验报告
- code: attachment4_3_5_analytical_performance
rule_id: A4-3.5
attachment4_code: "3.5"
title: 分析性能研究
type: required
severity: high
category: completeness
file_keywords: [分析性能研究, 分析性能]
suggestion: 请补充分析性能研究资料。
citation_query: 附件4 分析性能研究
- code: attachment4_3_6_stability
rule_id: A4-3.6
attachment4_code: "3.6"
title: 稳定性研究
type: required
severity: high
category: completeness
file_keywords: [稳定性研究, 稳定性]
suggestion: 请补充稳定性研究资料。
citation_query: 附件4 稳定性研究
- code: attachment4_3_7_reference_interval
rule_id: A4-3.7
attachment4_code: "3.7"
title: 阳性判断值或参考区间研究
type: required
severity: high
category: completeness
file_keywords: [阳性判断值, 参考区间]
suggestion: 请补充阳性判断值或参考区间研究资料。
citation_query: 附件4 阳性判断值 参考区间
- code: attachment4_3_8_other_nonclinical
rule_id: A4-3.8
attachment4_code: "3.8"
title: 其他资料
type: conditional
severity: medium
category: completeness
file_keywords: [其他资料]
suggestion: 请补充非临床其他资料或不适用说明。
citation_query: 附件4 非临床 其他资料
- code: attachment4_4_clinical_evaluation
rule_id: A4-4
attachment4_code: "4"
title: 临床评价资料
type: chapter
severity: high
category: completeness
file_keywords: [临床评价资料, 临床资料]
suggestion: 请补充临床评价资料章节。
citation_query: 附件4 临床评价资料
structure_required: true
- code: attachment4_4_1_toc
rule_id: A4-4.1
attachment4_code: "4.1"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 临床评价资料目录]
suggestion: 请补充临床评价资料章节目录。
citation_query: 附件4 临床评价资料 章节目录
- code: clinical_evaluation
rule_id: A4-4.2
attachment4_code: "4.2"
title: 临床评价资料
type: conditional
severity: high
category: completeness
file_keywords: [临床评价, 临床试验, 免临床, 同品种比对]
suggestion: 请根据适用情形补充临床评价资料或说明豁免依据。
citation_query: 附件4 临床评价资料 注册申报
- code: attachment4_5_ifu_label
rule_id: A4-5
attachment4_code: "5"
title: 产品说明书和标签样稿
type: chapter
severity: high
category: completeness
file_keywords: [产品说明书和标签样稿, 说明书, 标签样稿]
suggestion: 请补充产品说明书和标签样稿章节。
citation_query: 附件4 产品说明书 标签样稿
structure_required: true
- code: attachment4_5_1_toc
rule_id: A4-5.1
attachment4_code: "5.1"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 说明书目录, 标签目录]
suggestion: 请补充产品说明书和标签样稿章节目录。
citation_query: 附件4 说明书 标签 章节目录
- code: instructions_for_use
rule_id: A4-5.2
attachment4_code: "5.2"
title: 产品说明书
type: required
severity: high
category: completeness
file_keywords: [说明书, 产品说明书, 使用说明]
aliases: [说明书]
required_sections: [储存条件, 有效期, 样本要求]
suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
citation_query: 附件4 产品说明书 储存条件 有效期 样本要求
- code: attachment4_5_3_label
rule_id: A4-5.3
attachment4_code: "5.3"
title: 标签样稿
type: required
severity: high
category: completeness
file_keywords: [标签样稿, 标签]
suggestion: 请补充标签样稿。
citation_query: 附件4 标签样稿
- code: attachment4_5_4_other_ifu
rule_id: A4-5.4
attachment4_code: "5.4"
title: 其他资料
type: conditional
severity: medium
category: completeness
file_keywords: [其他资料]
suggestion: 请补充说明书和标签相关其他资料或不适用说明。
citation_query: 附件4 说明书 标签 其他资料
- code: attachment4_6_quality_system
rule_id: A4-6
attachment4_code: "6"
title: 质量管理体系文件
type: chapter
severity: high
category: completeness
file_keywords: [质量管理体系文件, 质量体系, 质量管理体系]
suggestion: 请补充质量管理体系文件章节。
citation_query: 附件4 质量管理体系文件
structure_required: true
- code: attachment4_6_1_overview
rule_id: A4-6.1
attachment4_code: "6.1"
title: 综述
type: required
severity: high
category: completeness
file_keywords: [综述]
suggestion: 请补充质量管理体系综述。
citation_query: 附件4 质量管理体系 综述
- code: attachment4_6_2_toc
rule_id: A4-6.2
attachment4_code: "6.2"
title: 章节目录
type: directory
severity: medium
category: completeness
file_keywords: [章节目录, 质量管理体系目录]
suggestion: 请补充质量管理体系文件章节目录。
citation_query: 附件4 质量管理体系 章节目录
- code: attachment4_6_3_manufacturing
rule_id: A4-6.3
attachment4_code: "6.3"
title: 生产制造信息
type: required
severity: high
category: completeness
file_keywords: [生产制造信息, 生产制造]
suggestion: 请补充生产制造信息。
citation_query: 附件4 生产制造信息
- code: attachment4_6_4_qms_procedure
rule_id: A4-6.4
attachment4_code: "6.4"
title: 质量管理体系程序
type: required
severity: high
category: completeness
file_keywords: [质量管理体系程序, 质量体系程序]
suggestion: 请补充质量管理体系程序。
citation_query: 附件4 质量管理体系程序
- code: attachment4_6_5_management
rule_id: A4-6.5
attachment4_code: "6.5"
title: 管理职责程序
type: required
severity: high
category: completeness
file_keywords: [管理职责程序, 管理职责]
suggestion: 请补充管理职责程序。
citation_query: 附件4 管理职责程序
- code: attachment4_6_6_resource
rule_id: A4-6.6
attachment4_code: "6.6"
title: 资源管理程序
type: required
severity: high
category: completeness
file_keywords: [资源管理程序, 资源管理]
suggestion: 请补充资源管理程序。
citation_query: 附件4 资源管理程序
- code: attachment4_6_7_realization
rule_id: A4-6.7
attachment4_code: "6.7"
title: 产品实现程序
type: required
severity: high
category: completeness
file_keywords: [产品实现程序, 产品实现]
suggestion: 请补充产品实现程序。
citation_query: 附件4 产品实现程序
- code: attachment4_6_8_measurement
rule_id: A4-6.8
attachment4_code: "6.8"
title: 质量管理体系的测量/分析和改进程序
type: required
severity: high
category: completeness
file_keywords: [测量, 分析和改进, 改进程序]
suggestion: 请补充质量管理体系测量、分析和改进程序。
citation_query: 附件4 测量 分析 改进程序
- code: attachment4_6_9_other_qms
rule_id: A4-6.9
attachment4_code: "6.9"
title: 其他质量体系程序信息
type: conditional
severity: medium
category: completeness
file_keywords: [其他质量体系程序, 其他质量体系]
suggestion: 请补充其他质量体系程序信息或不适用说明。
citation_query: 附件4 其他质量体系程序信息
- code: attachment4_6_10_qms_audit
rule_id: A4-6.10
attachment4_code: "6.10"
title: 质量管理体系核查文件
type: required
severity: high
category: completeness
file_keywords: [质量管理体系核查文件, 体系核查文件, 核查文件]
suggestion: 请补充质量管理体系核查文件。
citation_query: 附件4 质量管理体系核查文件

View File

@@ -0,0 +1,18 @@
from __future__ import annotations
from dataclasses import asdict, dataclass, field
@dataclass(frozen=True)
class Finding:
rule_code: str
category: str
severity: str
title: str
detail: str = ""
suggestion: str = ""
evidence: dict[str, object] = field(default_factory=dict)
citations: list[dict[str, object]] = field(default_factory=list)
def to_dict(self) -> dict[str, object]:
return asdict(self)

View File

@@ -0,0 +1 @@
"""Services for NMPA regulatory review."""

View File

@@ -0,0 +1,73 @@
from __future__ import annotations
from collections.abc import Callable
from review_agent.models import FileSummaryBatch
from review_agent.regulatory_review.schemas import Finding
def run_completeness_check(
batch: FileSummaryBatch,
rule_set: dict,
progress_callback: Callable[[dict[str, object]], None] | None = None,
) -> list[Finding]:
items = list(batch.items.order_by("file_index"))
findings: list[Finding] = []
requirements = [
requirement
for requirement in rule_set.get("requirements", [])
if requirement.get("type") in {"required", "conditional", "recommended", "chapter", "directory"}
]
total = len(requirements)
for index, requirement in enumerate(requirements, start=1):
if requirement.get("type") not in {"required", "conditional", "recommended", "chapter", "directory"}:
continue
matched = [
item
for item in items
if _matches_item(
item.file_name,
item.relative_path,
item.directory_level,
[*requirement.get("file_keywords", []), *requirement.get("aliases", [])],
)
]
if not matched:
findings.append(
Finding(
rule_code=requirement["code"],
category=requirement.get("category", "completeness"),
severity=requirement.get("severity", "medium"),
title=f"缺少{_numbered_title(requirement)}",
detail=f"当前文件汇总批次未发现{_numbered_title(requirement)}",
suggestion=requirement.get("suggestion", ""),
evidence={
"requirement_type": requirement.get("type"),
"matched_files": [],
"searched_keywords": requirement.get("file_keywords", []),
"searched_fields": ["file_name", "relative_path", "directory_level"],
},
)
)
if progress_callback:
progress_callback(
{
"processed": index,
"total": total,
"label": _numbered_title(requirement),
"finding_count": len(findings),
}
)
return findings
def _matches_item(file_name: str, relative_path: str, directory_level: str, keywords: list[str]) -> bool:
haystack = f"{file_name} {relative_path} {directory_level}".lower()
return any(str(keyword).lower() in haystack for keyword in keywords)
def _numbered_title(requirement: dict) -> str:
attachment4_code = requirement.get("attachment4_code")
if not attachment4_code:
return requirement["title"]
return f"{attachment4_code}{requirement['title']}"

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
import re
from collections import defaultdict
from collections.abc import Callable
from review_agent.regulatory_review.schemas import Finding
FIELDS = {
"产品名称": r"产品名称[:]\s*([^\n\r]+)",
"型号规格": r"型号规格[:]\s*([^\n\r]+)",
"预期用途": r"预期用途[:]\s*([^\n\r]+)",
"管理类别": r"管理类别[:]\s*([^\n\r]+)",
"分类编码": r"分类编码[:]\s*([^\n\r]+)",
"注册类型": r"注册类型[:]\s*([^\n\r]+)",
"临床评价路径": r"临床评价路径[:]\s*([^\n\r]+)",
}
def run_consistency_check(
document_texts: dict[str, str],
progress_callback: Callable[[dict[str, object]], None] | None = None,
) -> list[Finding]:
findings: list[Finding] = []
fields = list(FIELDS.items())
total = len(fields)
for index, (label, pattern) in enumerate(fields, start=1):
values: dict[str, list[str]] = defaultdict(list)
for file_name, text in document_texts.items():
match = re.search(pattern, text)
if match:
values[_normalize(match.group(1))].append(file_name)
if len(values) > 1:
findings.append(
Finding(
rule_code=f"consistency:{label}",
category="consistency",
severity="high",
title=f"{label}在不同文件中不一致",
detail=f"发现 {len(values)} 个不同的{label}取值。",
suggestion=f"请统一各注册资料中的{label}",
evidence={"field": label, "values": dict(values)},
)
)
if progress_callback:
progress_callback(
{
"processed": index,
"total": total,
"label": label,
"finding_count": len(findings),
}
)
return findings
def _normalize(value: str) -> str:
return " ".join(value.strip().split())

View File

@@ -0,0 +1,225 @@
from __future__ import annotations
import json
from pathlib import Path
from django.conf import settings
from openpyxl import Workbook
from review_agent.models import ExportedSummaryFile, RegulatoryIssue, RegulatoryReviewBatch
SEVERITY_LABELS = {
"blocking": "阻断项",
"high": "高风险",
"medium": "中风险",
"low": "低风险",
"info": "提示",
}
def export_review_results(batch: RegulatoryReviewBatch) -> list[ExportedSummaryFile]:
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
markdown = _create_export(
batch,
export_dir / f"{batch.batch_no}-regulatory-review.md",
ExportedSummaryFile.ExportType.MARKDOWN,
"markdown_report",
build_markdown_report(batch),
)
excel = _create_excel_export(batch, export_dir / f"{batch.batch_no}-regulatory-issues.xlsx")
result_json = _create_export(
batch,
export_dir / f"{batch.batch_no}-regulatory-result.json",
ExportedSummaryFile.ExportType.JSON,
"result_package",
json.dumps(build_result_payload(batch), ensure_ascii=False, indent=2),
)
return [markdown, excel, result_json]
def build_markdown_report(batch: RegulatoryReviewBatch) -> str:
lines = [
"# NMPA 注册资料法规核查报告",
"",
f"批次号:{batch.batch_no}",
]
regenerated_from = (batch.condition_json or {}).get("regenerated_from")
if regenerated_from:
lines.extend(
[
"",
"## 复核来源",
"",
f"- 来源法规核查批次:{regenerated_from.get('batch_no')}",
f"- 来源文件汇总批次:{regenerated_from.get('file_summary_batch_no')}",
]
)
lines.extend(["", "## 风险汇总", "", "| 风险等级 | 数量 |", "| --- | --- |"])
summary = batch.risk_summary or {}
for severity, label in SEVERITY_LABELS.items():
lines.append(f"| {label} | {summary.get(severity, 0)} |")
lines.extend(["", "## 问题清单", "", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
for issue in batch.issues.order_by("id"):
lines.append(
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
)
review_records = _review_records(batch)
if review_records:
lines.extend(["", "## 复核记录", "", "| 补充批次 | 问题数 | 通过数 | 未通过数 |", "| --- | --- | --- | --- |"])
for record in review_records:
items = record.get("items", [])
passed = sum(1 for item in items if item.get("status") == RegulatoryIssue.Status.REVIEW_PASSED)
failed = sum(1 for item in items if item.get("status") == RegulatoryIssue.Status.REVIEW_FAILED)
lines.append(f"| {record.get('file_summary_batch_no')} | {len(items)} | {passed} | {failed} |")
notifications = _notification_records(batch)
if notifications:
lines.extend(["", "## 通知记录", "", "| 渠道 | 对象 | 状态 | 问题 |", "| --- | --- | --- | --- |"])
for record in notifications:
lines.append(
f"| {record['channel']} | {record['target'] or '-'} | {record['status']} | {record['payload'].get('title', '-')} |"
)
return "\n".join(lines)
def build_result_payload(batch: RegulatoryReviewBatch) -> dict[str, object]:
return {
"batch_no": batch.batch_no,
"source_summary_batch": batch.source_summary_batch.batch_no,
"regenerated_from": (batch.condition_json or {}).get("regenerated_from"),
"risk_summary": batch.risk_summary,
"issues": [
{
"severity": issue.severity,
"category": issue.category,
"rule_code": issue.rule_code,
"title": issue.title,
"detail": issue.detail,
"suggestion": issue.suggestion,
"status": issue.status,
"evidence": issue.evidence,
"citations": issue.citations,
}
for issue in batch.issues.order_by("id")
],
"review_records": _review_records(batch),
"notifications": _notification_records(batch),
}
def build_assistant_summary(batch: RegulatoryReviewBatch, exports: list[ExportedSummaryFile]) -> str:
export_by_type = {export.export_type: export for export in exports}
lines = [
"已完成 NMPA 注册资料法规核查。",
"",
"| 风险等级 | 数量 |",
"| --- | --- |",
]
summary = batch.risk_summary or {}
for severity, label in SEVERITY_LABELS.items():
if summary.get(severity, 0):
lines.append(f"| {label} | {summary[severity]} |")
lines.extend(["", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
for issue in batch.issues.order_by("id")[:8]:
lines.append(
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
)
lines.extend(
[
"",
_download_link("下载 Markdown 核查报告", export_by_type.get(ExportedSummaryFile.ExportType.MARKDOWN)),
_download_link("下载 Excel 缺失清单", export_by_type.get(ExportedSummaryFile.ExportType.EXCEL)),
_download_link("下载 JSON 结果包", export_by_type.get(ExportedSummaryFile.ExportType.JSON)),
]
)
return "\n".join(line for line in lines if line is not None)
def _download_link(label: str, exported: ExportedSummaryFile | None) -> str | None:
if not exported:
return None
return f"[{label}](/api/review-agent/file-summary/exports/{exported.pk}/download/)"
def _create_export(
batch: RegulatoryReviewBatch,
path: Path,
export_type: str,
category: str,
content: str,
) -> ExportedSummaryFile:
path.write_text(content, encoding="utf-8")
return ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
export_category=category,
export_type=export_type,
file_name=path.name,
storage_path=str(path),
)
def _create_excel_export(batch: RegulatoryReviewBatch, path: Path) -> ExportedSummaryFile:
workbook = Workbook()
sheet = workbook.active
sheet.title = "法规问题清单"
sheet.append(["等级", "类别", "规则", "问题", "状态", "建议", "法规依据", "通知记录"])
for issue in batch.issues.order_by("id"):
sheet.append(
[
SEVERITY_LABELS.get(issue.severity, issue.severity),
issue.category,
issue.rule_code,
issue.title,
issue.status,
issue.suggestion,
"; ".join(str(item.get("source", "")) for item in issue.citations),
_notification_summary_for_issue(batch, issue.pk),
]
)
workbook.save(path)
return ExportedSummaryFile.objects.create(
batch=batch.source_summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
export_category="issue_checklist",
export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=path.name,
storage_path=str(path),
)
def _review_records(batch: RegulatoryReviewBatch) -> list[dict[str, object]]:
records = []
for artifact in batch.artifacts.filter(metadata__artifact="review_record").order_by("created_at", "id"):
try:
records.append(json.loads(Path(artifact.storage_path).read_text(encoding="utf-8")))
except (OSError, json.JSONDecodeError):
continue
return records
def _notification_records(batch: RegulatoryReviewBatch) -> list[dict[str, object]]:
return [
{
"channel": record.channel,
"target": record.target,
"status": record.status,
"payload": record.payload,
"sent_at": record.sent_at.isoformat() if record.sent_at else "",
}
for record in batch.notifications.order_by("created_at", "id")
]
def _notification_summary_for_issue(batch: RegulatoryReviewBatch, issue_id: int) -> str:
records = [
record
for record in batch.notifications.all()
if isinstance(record.payload, dict) and record.payload.get("issue_id") == issue_id
]
return "; ".join(f"{record.channel}:{record.status}" for record in records)

View File

@@ -0,0 +1,39 @@
from __future__ import annotations
from django.utils import timezone
from review_agent.models import RegulatoryNotificationRecord, RegulatoryReviewBatch
NOTIFIABLE_SEVERITIES = {"blocking", "high", "medium"}
def create_mock_notifications(batch: RegulatoryReviewBatch) -> list[RegulatoryNotificationRecord]:
records = []
existing_issue_ids = {
item.get("issue_id")
for item in RegulatoryNotificationRecord.objects.filter(batch=batch, channel=RegulatoryNotificationRecord.Channel.MOCK).values_list(
"payload", flat=True
)
if isinstance(item, dict)
}
for issue in batch.issues.order_by("id"):
if issue.severity not in NOTIFIABLE_SEVERITIES or issue.pk in existing_issue_ids:
continue
records.append(
RegulatoryNotificationRecord.objects.create(
batch=batch,
channel=RegulatoryNotificationRecord.Channel.MOCK,
target="法规整改负责人",
status=RegulatoryNotificationRecord.Status.SENT,
sent_at=timezone.now(),
payload={
"issue_id": issue.pk,
"rule_code": issue.rule_code,
"severity": issue.severity,
"title": issue.title,
"suggestion": issue.suggestion,
},
)
)
return records

View File

@@ -0,0 +1,241 @@
from __future__ import annotations
import re
from pathlib import Path
from django.conf import settings
from review_agent.models import FileSummaryBatch, RegulatoryReviewBatch
from review_agent.regulatory_review.services.llm_review import review_condition_fields
from review_agent.regulatory_review.services.text_extract import extract_text
OPTION_FIELDS = {
"product_category": ["体外诊断试剂", "医疗器械", "其他"],
"registration_type": ["首次注册", "变更注册", "延续注册"],
"clinical_evaluation_path": ["临床试验", "免临床", "同品种比对", "待确认"],
}
def ensure_regulatory_condition_candidates(batch: RegulatoryReviewBatch) -> dict[str, dict[str, object]]:
condition_json = batch.condition_json or {}
candidates = condition_json.get("candidates") or {}
if batch.status != RegulatoryReviewBatch.Status.WAITING_USER or not _condition_candidates_incomplete(candidates):
return candidates
refreshed = detect_regulatory_condition_candidates(batch.source_summary_batch)
refreshed = _merge_condition_candidates(candidates, refreshed)
batch.condition_json = {**condition_json, "candidates": refreshed}
batch.save(update_fields=["condition_json"])
return refreshed
def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> dict[str, dict[str, object]]:
"""Infers review-scope conditions from the summary batch and file names."""
corpus_parts = [summary_batch.product_name or ""]
field_candidates: dict[str, str] = {}
field_sources: dict[str, str] = {}
for item in summary_batch.items.order_by("file_index"):
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
review = _extract_item_fields(item)
extracted = review.get("selected_fields", {})
sources = review.get("selected_sources", {})
field_candidates.update({key: value for key, value in extracted.items() if value and key not in field_candidates})
field_sources.update({key: value for key, value in sources.items() if value and key not in field_sources})
corpus_parts.extend(extracted.values())
if review.get("front_text"):
corpus_parts.append(str(review["front_text"]))
corpus = "\n".join(part for part in corpus_parts if part)
product_name = field_candidates.get("产品名称") or _safe_summary_product_name(summary_batch.product_name)
return {
"product_category": {
"label": "产品类别",
"input_type": "select",
"options": OPTION_FIELDS["product_category"],
"suggested": _detect_product_category(corpus),
},
"registration_type": {
"label": "注册类型",
"input_type": "select",
"options": OPTION_FIELDS["registration_type"],
"suggested": _detect_registration_type(corpus),
},
"clinical_evaluation_path": {
"label": "临床评价路径",
"input_type": "select",
"options": OPTION_FIELDS["clinical_evaluation_path"],
"suggested": _detect_clinical_path(corpus),
},
"product_name": {
"label": "产品名称",
"input_type": "text",
"suggested": product_name,
"source": field_sources.get("产品名称", "summary" if product_name else ""),
},
"model_spec": {
"label": "型号规格",
"input_type": "text",
"suggested": field_candidates.get("型号规格", ""),
"source": field_sources.get("型号规格", ""),
},
"intended_use": {
"label": "预期用途",
"input_type": "text",
"suggested": field_candidates.get("预期用途", ""),
"source": field_sources.get("预期用途", ""),
},
}
def _extract_item_fields(item) -> dict[str, object]:
path = Path(item.storage_path)
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists():
return {}
result = extract_text(path)
if result.status != "success" or not result.text:
return {}
inferred_fields = _infer_fields_from_text(result.front_text or result.text)
rule_fields = {**inferred_fields, **(result.field_candidates or {})}
review = review_condition_fields(
text=result.front_text or result.text,
rule_fields=rule_fields,
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
)
selected_sources = dict(review.get("selected_sources") or {})
for key in inferred_fields:
if selected_sources.get(key) == "rule" and key not in (result.field_candidates or {}):
selected_sources[key] = "inferred"
review["selected_sources"] = selected_sources
review["front_text"] = result.front_text or result.text[:1200]
return review
def _safe_summary_product_name(product_name: str) -> str:
value = (product_name or "").strip()
if not value:
return ""
if any(keyword in value for keyword in ["第1章", "第2章", "监管信息", "综述资料", "非临床资料", "章节目录"]):
return ""
return value
def _infer_fields_from_text(text: str) -> dict[str, str]:
normalized = _normalize_text_for_inference(text)
fields = {}
product_name = _infer_product_name(normalized)
if product_name:
fields["产品名称"] = product_name
model_spec = _infer_model_spec(normalized)
if model_spec:
fields["型号规格"] = model_spec
return fields
def _normalize_text_for_inference(text: str) -> str:
value = re.sub(r"\s+", "", text or "")
value = value.replace("", "(").replace("", ")")
return value
def _infer_product_name(text: str) -> str:
patterns = [
r"体外诊断试剂(?P<name>[^。;;,]{4,120}?试剂盒\([^()]{2,30}\))产品注册",
r"(?P<name>[^。;;,]{4,120}?试剂盒\([^()]{2,30}\))",
]
for pattern in patterns:
match = re.search(pattern, text)
if match:
return _restore_chinese_parentheses(_trim_product_name(match.group("name")))
return ""
def _trim_product_name(value: str) -> str:
prefixes = ["申请境内第三类体外诊断试剂", "申请境内第二类体外诊断试剂", "境内第三类体外诊断试剂", "境内第二类体外诊断试剂"]
result = value
for prefix in prefixes:
if prefix in result:
result = result.split(prefix, 1)[-1]
return result
def _infer_model_spec(text: str) -> str:
specs = sorted(set(re.findall(r"规格[A-Z-]", text)))
if specs:
return "".join(specs)
match = re.search(r"产品的包装规格(?P<spec>.{1,80}?(?:人份/盒|测试/盒|反应/盒)(?:[、,].{1,30}?(?:人份/盒|测试/盒|反应/盒))*)", text)
if not match:
return ""
return _restore_chinese_parentheses(match.group("spec").strip(":,。;;"))
def _restore_chinese_parentheses(value: str) -> str:
return value.replace("(", "").replace(")", "")
def _condition_candidates_incomplete(candidates: dict[str, dict[str, object]]) -> bool:
if not candidates:
return True
product_name = str((candidates.get("product_name") or {}).get("suggested") or "").strip()
product_category = str((candidates.get("product_category") or {}).get("suggested") or "").strip()
return not product_name or "<EFBFBD>" in product_name or product_category == "其他"
def _merge_condition_candidates(
current: dict[str, dict[str, object]],
refreshed: dict[str, dict[str, object]],
) -> dict[str, dict[str, object]]:
merged = {**(current or {})}
for field, config in (refreshed or {}).items():
current_config = merged.get(field) or {}
current_value = str(current_config.get("suggested") or "").strip()
refreshed_value = str((config or {}).get("suggested") or "").strip()
if _is_better_condition_value(current_value, refreshed_value):
merged[field] = config
elif field not in merged:
merged[field] = config
return merged
def _is_better_condition_value(current_value: str, refreshed_value: str) -> bool:
if not refreshed_value:
return False
if "<EFBFBD>" in refreshed_value:
return False
if "<EFBFBD>" in current_value:
return True
if not current_value:
return True
if current_value == "其他" and refreshed_value != "其他":
return True
if current_value == "待确认" and refreshed_value != "待确认":
return True
return len(refreshed_value) > len(current_value) and current_value in refreshed_value
def _detect_product_category(corpus: str) -> str:
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
return "体外诊断试剂"
if "医疗器械" in corpus:
return "医疗器械"
return "其他"
def _detect_registration_type(corpus: str) -> str:
if "延续" in corpus:
return "延续注册"
if "变更" in corpus:
return "变更注册"
return "首次注册"
def _detect_clinical_path(corpus: str) -> str:
if "免临床" in corpus or "免于临床" in corpus:
return "免临床"
if "同品种" in corpus or "同类" in corpus:
return "同品种比对"
if "临床试验" in corpus:
return "临床试验"
return "待确认"

View File

@@ -0,0 +1,243 @@
from __future__ import annotations
import json
import os
import re
import time
import inspect
from collections.abc import Callable
from typing import Any
from django.conf import settings
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
FIELD_LABELS = ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]
CompletionFunc = Callable[[list[dict[str, str]]], str]
def review_condition_fields(
*,
text: str,
rule_fields: dict[str, str],
file_context: str = "",
completion_func: Callable[..., str] | None = None,
) -> dict[str, Any]:
llm_fields: dict[str, str] = {}
status = "skipped"
error_message = ""
if not _should_call_llm(completion_func):
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
return {
"status": status,
"error_message": error_message,
"rule_fields": _clean_fields(rule_fields),
"llm_fields": llm_fields,
"selected_fields": selected_fields,
"selected_sources": selected_sources,
}
try:
raw = _call_completion_with_retries(
completion_func or generate_completion,
_condition_messages(text, rule_fields, file_context),
)
payload = _parse_json_object(raw)
llm_fields = _clean_fields(payload.get("fields") or payload)
status = "success"
except (LLMConfigurationError, LLMRequestError, json.JSONDecodeError, TypeError, ValueError, OSError, TimeoutError) as exc:
status = "failed"
error_message = str(exc)
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
return {
"status": status,
"error_message": error_message,
"rule_fields": _clean_fields(rule_fields),
"llm_fields": llm_fields,
"selected_fields": selected_fields,
"selected_sources": selected_sources,
}
def review_workflow_payload(
*,
stage: str,
payload: dict[str, Any],
completion_func: Callable[..., str] | None = None,
) -> dict[str, Any]:
if not _should_call_llm(completion_func):
return {
"status": "skipped",
"stage": stage,
"result": {},
"error_message": "",
}
try:
raw = _call_completion_with_retries(
completion_func or generate_completion,
_workflow_messages(stage, payload),
)
parsed = _parse_json_object(raw)
return {
"status": "success",
"stage": stage,
"result": parsed,
"error_message": "",
}
except (LLMConfigurationError, LLMRequestError, json.JSONDecodeError, TypeError, ValueError, OSError, TimeoutError) as exc:
return {
"status": "failed",
"stage": stage,
"result": {},
"error_message": str(exc),
}
def _condition_messages(text: str, rule_fields: dict[str, str], file_context: str) -> list[dict[str, str]]:
return [
{
"role": "system",
"content": (
"你是NMPA注册资料字段复核助手。请从附件文本中提取最合理的字段值"
"只返回JSON格式为 {\"fields\": {\"产品名称\": \"...\"}}。"
"产品名称应包含完整名称、检测对象和方法学括号;不要把章节标题当产品名称。"
),
},
{
"role": "user",
"content": json.dumps(
{
"file_context": file_context,
"rule_fields": rule_fields,
"text": text[:4000],
"allowed_fields": FIELD_LABELS,
},
ensure_ascii=False,
),
},
]
def _workflow_messages(stage: str, payload: dict[str, Any]) -> list[dict[str, str]]:
return [
{
"role": "system",
"content": (
"你是NMPA法规核查复核助手。请复核当前流程节点的规则结果"
"指出可能误判、漏判和更合理的建议。只返回JSON。"
),
},
{
"role": "user",
"content": json.dumps({"stage": stage, "payload": payload}, ensure_ascii=False)[:6000],
},
]
def _parse_json_object(raw: str) -> dict[str, Any]:
value = (raw or "").strip()
if value.startswith("```"):
value = re.sub(r"^```(?:json)?\s*", "", value)
value = re.sub(r"\s*```$", "", value)
start = value.find("{")
end = value.rfind("}")
if start >= 0 and end >= start:
value = value[start : end + 1]
parsed = json.loads(value)
if not isinstance(parsed, dict):
raise ValueError("LLM复核结果不是JSON对象。")
return parsed
def _call_completion_with_retries(completion_func: Callable[..., str], messages: list[dict[str, str]]) -> str:
attempts = max(1, int(getattr(settings, "REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", 3) or 3))
delay_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", 0.5) or 0)
timeout_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", 15) or 15)
accepts_timeout = _accepts_timeout(completion_func)
last_error: Exception | None = None
for attempt in range(1, attempts + 1):
try:
if accepts_timeout:
return completion_func(messages, temperature=0.0, timeout=timeout_seconds)
return completion_func(messages, temperature=0.0)
except (LLMRequestError, OSError, TimeoutError) as exc:
last_error = exc
if attempt >= attempts:
break
if delay_seconds > 0:
time.sleep(delay_seconds)
if last_error:
raise last_error
raise LLMRequestError("LLM复核调用失败。")
def _accepts_timeout(completion_func: Callable[..., str]) -> bool:
try:
signature = inspect.signature(completion_func)
except (TypeError, ValueError):
return True
return "timeout" in signature.parameters
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
if completion_func is not None:
return True
if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
return False
return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
clean = {}
for label in FIELD_LABELS:
value = fields.get(label)
if not isinstance(value, str):
continue
normalized = " ".join(value.strip().split()).replace("(", "").replace(")", "")
if normalized:
clean[label] = normalized
return clean
def _select_fields(rule_fields: dict[str, str], llm_fields: dict[str, str]) -> tuple[dict[str, str], dict[str, str]]:
rule_clean = _clean_fields(rule_fields)
selected = {}
sources = {}
for label in FIELD_LABELS:
rule_value = rule_clean.get(label, "")
llm_value = llm_fields.get(label, "")
value, source = _select_field(label, rule_value, llm_value)
if value:
selected[label] = value
sources[label] = source
return selected, sources
def _select_field(label: str, rule_value: str, llm_value: str) -> tuple[str, str]:
if _invalid_field_value(llm_value):
return rule_value, "rule" if rule_value else ""
if not rule_value:
return llm_value, "llm" if llm_value else ""
if not llm_value:
return rule_value, "rule"
if label == "产品名称" and _better_product_name(llm_value, rule_value):
return llm_value, "llm"
if len(llm_value) > len(rule_value) * 1.35 and rule_value in llm_value:
return llm_value, "llm"
return rule_value, "rule"
def _better_product_name(candidate: str, current: str) -> bool:
if current and current in candidate and len(candidate) > len(current):
return True
product_keywords = ["试剂盒", "检测试剂", "荧光PCR法", "PCR法", "核酸检测"]
return len(candidate) > len(current) and any(keyword in candidate for keyword in product_keywords)
def _invalid_field_value(value: str) -> bool:
if not value:
return True
if "<EFBFBD>" in value:
return True
return any(keyword in value for keyword in ["第1章", "第2章", "第3章", "监管信息", "综述资料", "章节目录"])

View File

@@ -0,0 +1,57 @@
from __future__ import annotations
from pathlib import Path
from django.conf import settings
from .rag_embedding import EmbeddingFunction, get_embedding_provider
class RagIndexUnavailable(RuntimeError):
pass
def retrieve_citations(
query: str,
*,
embedding_provider: EmbeddingFunction | None = None,
collection=None,
n_results: int = 3,
) -> list[dict[str, object]]:
provider = embedding_provider or get_embedding_provider()
if collection is None:
collection = _load_collection()
embeddings = provider([query])
result = collection.query(query_embeddings=embeddings, n_results=n_results)
documents = (result.get("documents") or [[]])[0]
metadatas = (result.get("metadatas") or [[]])[0]
distances = (result.get("distances") or [[]])[0]
if not documents:
return [{"source": "原文依据待补充", "text": "RAG 无命中", "score": None}]
citations = []
for index, document in enumerate(documents):
metadata = metadatas[index] if index < len(metadatas) else {}
distance = distances[index] if index < len(distances) else None
citations.append(
{
"source": metadata.get("source", "法规材料"),
"text": document,
"score": distance,
}
)
return citations
def _load_collection():
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
if not persist_path.exists():
raise RagIndexUnavailable("法规 RAG 索引不存在,请先运行 regulatory_rag_build。")
try:
import chromadb
except ImportError as exc:
raise RagIndexUnavailable("chromadb 未安装,请先安装 requirements.txt。") from exc
client = chromadb.PersistentClient(path=str(persist_path))
try:
return client.get_collection(settings.REGULATORY_RAG_COLLECTION)
except Exception as exc:
raise RagIndexUnavailable("法规 RAG collection 不存在,请先运行 regulatory_rag_build。") from exc

View File

@@ -0,0 +1,82 @@
from __future__ import annotations
import hashlib
import random
from typing import Callable, Iterable
import httpx
from django.conf import settings
EmbeddingFunction = Callable[[list[str]], list[list[float]]]
class EmbeddingConfigurationError(RuntimeError):
pass
class SiliconFlowEmbeddingProvider:
def __init__(
self,
*,
api_key: str,
base_url: str,
model: str,
dimensions: int,
timeout: float = 60.0,
):
if not api_key:
raise EmbeddingConfigurationError("SILICONFLOW_API_KEY 未配置。")
self.api_key = api_key
self.base_url = base_url.rstrip("/")
self.model = model
self.dimensions = dimensions
self.timeout = timeout
def embed(self, texts: Iterable[str]) -> list[list[float]]:
inputs = list(texts)
response = httpx.post(
f"{self.base_url}/embeddings",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"input": inputs,
"dimensions": self.dimensions,
},
timeout=self.timeout,
)
response.raise_for_status()
payload = response.json()
return [item["embedding"] for item in payload.get("data", [])]
def __call__(self, texts: list[str]) -> list[list[float]]:
return self.embed(texts)
class DeterministicEmbeddingProvider:
"""Small local embedding substitute for tests and explicit dry runs."""
def __init__(self, dimensions: int = 16):
self.dimensions = dimensions
def __call__(self, texts: list[str]) -> list[list[float]]:
vectors = []
for text in texts:
seed = int(hashlib.sha256(text.encode("utf-8")).hexdigest()[:16], 16)
rng = random.Random(seed)
vectors.append([rng.uniform(-1, 1) for _ in range(self.dimensions)])
return vectors
def get_embedding_provider(provider_name: str | None = None) -> EmbeddingFunction:
provider = provider_name or settings.REGULATORY_RAG_PROVIDER
if provider == "siliconflow":
return SiliconFlowEmbeddingProvider(
api_key=settings.SILICONFLOW_API_KEY,
base_url=settings.SILICONFLOW_BASE_URL,
model=settings.SILICONFLOW_EMBEDDING_MODEL,
dimensions=settings.SILICONFLOW_EMBEDDING_DIMENSIONS,
)
if provider in {"deterministic", "local"}:
return DeterministicEmbeddingProvider()
raise EmbeddingConfigurationError(f"不支持的 embedding provider{provider}")

View File

@@ -0,0 +1,155 @@
from __future__ import annotations
import hashlib
import logging
import subprocess
import tempfile
from dataclasses import dataclass
from pathlib import Path
from django.conf import settings
from docx import Document
from openpyxl import load_workbook
from pypdf import PdfReader
from pptx import Presentation
from .rag_embedding import EmbeddingFunction
logger = logging.getLogger("review_agent.regulatory_review.rag_index")
@dataclass(frozen=True)
class TextChunk:
text: str
metadata: dict[str, object]
def chunk_text(text: str, *, source: str, chunk_size: int = 900, overlap: int = 120) -> list[TextChunk]:
normalized = "\n".join(line.strip() for line in text.splitlines() if line.strip())
if not normalized:
return []
chunks = []
start = 0
index = 0
step = max(1, chunk_size - overlap)
while start < len(normalized):
part = normalized[start : start + chunk_size].strip()
if part:
chunks.append(TextChunk(text=part, metadata={"source": source, "chunk_index": index}))
index += 1
start += step
return chunks
def extract_text_from_path(path: Path) -> str:
suffix = path.suffix.lower()
if suffix in {".txt", ".md"}:
return path.read_text(encoding="utf-8", errors="ignore")
if suffix == ".pdf":
return "\n".join(page.extract_text() or "" for page in PdfReader(str(path)).pages)
if suffix == ".docx":
return "\n".join(paragraph.text for paragraph in Document(str(path)).paragraphs)
if suffix == ".pptx":
presentation = Presentation(str(path))
lines = []
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
lines.append(shape.text)
return "\n".join(lines)
if suffix == ".xlsx":
workbook = load_workbook(path, data_only=True, read_only=True)
lines = []
for sheet in workbook.worksheets:
for row in sheet.iter_rows(values_only=True):
values = [str(cell) for cell in row if cell not in {None, ""}]
if values:
lines.append("\t".join(values))
return "\n".join(lines)
if suffix == ".doc":
return _extract_legacy_doc_with_libreoffice(path)
return ""
def _extract_legacy_doc_with_libreoffice(path: Path) -> str:
with tempfile.TemporaryDirectory() as tmp_dir:
target_dir = Path(tmp_dir)
try:
subprocess.run(
[
"soffice",
"--headless",
"--convert-to",
"docx",
"--outdir",
str(target_dir),
str(path),
],
check=True,
capture_output=True,
text=True,
timeout=60,
)
except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc:
raise RuntimeError(f"无法通过 LibreOffice 转换法规 .doc 材料:{path.name}") from exc
converted = target_dir / f"{path.stem}.docx"
if not converted.exists():
raise RuntimeError(f"LibreOffice 未生成 docx{path.name}")
return extract_text_from_path(converted)
def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
chunks: list[TextChunk] = []
for path in sorted(source_dir.rglob("*")):
if not path.is_file():
continue
try:
text = extract_text_from_path(path)
except RuntimeError as exc:
if _is_attachment4(path):
raise RuntimeError(f"附件 4 核心法规材料抽取失败:{path.name}") from exc
logger.warning("Regulatory source extraction skipped", extra={"path": str(path), "error": str(exc)})
continue
chunks.extend(chunk_text(text, source=str(path.relative_to(source_dir))))
return chunks
def _is_attachment4(path: Path) -> bool:
normalized = path.name.replace(" ", "")
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
def build_chroma_index(
*,
source_dir: Path,
embedding_provider: EmbeddingFunction,
persist_path: Path | None = None,
collection_name: str | None = None,
) -> int:
try:
import chromadb
except ImportError as exc:
raise RuntimeError("chromadb 未安装,请先安装 requirements.txt。") from exc
persist_path = persist_path or Path(settings.REGULATORY_RAG_CHROMA_PATH)
collection_name = collection_name or settings.REGULATORY_RAG_COLLECTION
persist_path.mkdir(parents=True, exist_ok=True)
chunks = collect_source_chunks(source_dir)
client = chromadb.PersistentClient(path=str(persist_path))
collection = client.get_or_create_collection(collection_name)
if not chunks:
return 0
texts = [chunk.text for chunk in chunks]
embeddings = embedding_provider(texts)
ids = [
hashlib.sha256(f"{chunk.metadata['source']}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
for chunk in chunks
]
collection.upsert(
ids=ids,
documents=texts,
metadatas=[chunk.metadata for chunk in chunks],
embeddings=embeddings,
)
return len(chunks)

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import json
from django.utils import timezone
from review_agent.models import FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch
from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.storage import save_artifact
def review_missing_issues(
*,
batch: RegulatoryReviewBatch,
issue_ids: list[int],
file_summary_batch: FileSummaryBatch,
) -> dict[str, object]:
rule_set = load_rule_file()
rules_by_code = {rule["code"]: rule for rule in rule_set.get("requirements", [])}
items = list(file_summary_batch.items.order_by("file_index"))
record = {
"type": "review_record",
"reviewed_at": timezone.localtime().isoformat(),
"source_review_batch_id": batch.pk,
"source_review_batch_no": batch.batch_no,
"file_summary_batch_id": file_summary_batch.pk,
"file_summary_batch_no": file_summary_batch.batch_no,
"items": [],
}
issues = RegulatoryIssue.objects.filter(batch=batch, pk__in=issue_ids).order_by("id")
for issue in issues:
rule = rules_by_code.get(issue.rule_code, {})
matched_files = _match_items(items, [*rule.get("file_keywords", []), issue.title])
passed = bool(matched_files)
issue.status = RegulatoryIssue.Status.REVIEW_PASSED if passed else RegulatoryIssue.Status.REVIEW_FAILED
issue.evidence = {
**(issue.evidence or {}),
"latest_review": {
"file_summary_batch_id": file_summary_batch.pk,
"file_summary_batch_no": file_summary_batch.batch_no,
"matched_files": matched_files,
},
}
issue.save(update_fields=["status", "evidence", "updated_at"])
record["items"].append(
{
"issue_id": issue.pk,
"rule_code": issue.rule_code,
"title": issue.title,
"status": issue.status,
"matched_files": matched_files,
}
)
artifact = save_artifact(
batch,
name=f"review_record_{timezone.now().strftime('%Y%m%d%H%M%S')}.json",
artifact_type="json",
content=json.dumps(record, ensure_ascii=False, indent=2),
metadata={"artifact": "review_record", "file_summary_batch_id": file_summary_batch.pk},
)
record["artifact_id"] = artifact.pk
return record
def _match_items(items, keywords: list[str]) -> list[dict[str, str]]:
normalized_keywords = [str(keyword).lower() for keyword in keywords if keyword]
matched = []
for item in items:
haystack = f"{item.file_name} {item.relative_path} {item.directory_level}".lower()
if any(keyword in haystack for keyword in normalized_keywords):
matched.append(
{
"file_name": item.file_name,
"relative_path": item.relative_path,
"directory_level": item.directory_level,
}
)
return matched

View File

@@ -0,0 +1,50 @@
from __future__ import annotations
from collections import Counter
from review_agent.models import RegulatoryIssue, RegulatoryReviewBatch
from review_agent.regulatory_review.schemas import Finding
from .rag_citation import retrieve_citations
SEVERITY_ORDER = ["blocking", "high", "medium", "low", "info"]
def persist_findings(batch: RegulatoryReviewBatch, findings: list[Finding]) -> list[RegulatoryIssue]:
RegulatoryIssue.objects.filter(batch=batch).delete()
unique = {}
for finding in findings:
unique.setdefault((finding.rule_code, finding.category, finding.title), finding)
issues = []
for finding in unique.values():
citations = finding.citations or _safe_citations(finding)
issues.append(
RegulatoryIssue.objects.create(
batch=batch,
rule_code=finding.rule_code,
category=finding.category,
severity=finding.severity,
title=finding.title,
detail=finding.detail,
suggestion=finding.suggestion,
evidence=finding.evidence,
citations=citations,
)
)
batch.risk_summary = _risk_summary(issues)
batch.save(update_fields=["risk_summary"])
return issues
def _safe_citations(finding: Finding) -> list[dict[str, object]]:
try:
return retrieve_citations(finding.title)
except Exception:
return [{"source": "原文依据待补充", "text": "RAG 索引不可用或无命中", "score": None}]
def _risk_summary(issues: list[RegulatoryIssue]) -> dict[str, int]:
counts = Counter(issue.severity for issue in issues)
return {severity: counts.get(severity, 0) for severity in SEVERITY_ORDER}

View File

@@ -0,0 +1,127 @@
from __future__ import annotations
import hashlib
from dataclasses import dataclass
from pathlib import Path
import yaml
from django.conf import settings
from review_agent.models import RegulatoryRuleVersion
DEFAULT_RULE_CODE = "nmpa_ivd_registration_v1"
DEFAULT_RULE_PATH = (
Path(settings.BASE_DIR)
/ "review_agent"
/ "regulatory_review"
/ "rules"
/ "nmpa_ivd_registration_v1.yaml"
)
@dataclass(frozen=True)
class RuleVersionCheck:
status: str
code: str
path: Path
current_hash: str
database_hash: str = ""
record: RegulatoryRuleVersion | None = None
def compute_file_sha256(path: str | Path) -> str:
file_path = Path(path)
digest = hashlib.sha256()
with file_path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def load_rule_file(path: str | Path | None = None) -> dict:
rule_path = Path(path) if path else DEFAULT_RULE_PATH
with rule_path.open("r", encoding="utf-8") as handle:
payload = yaml.safe_load(handle) or {}
if payload.get("code") != DEFAULT_RULE_CODE:
raise ValueError(f"规则 code 必须为 {DEFAULT_RULE_CODE}")
if not isinstance(payload.get("requirements"), list) or not payload["requirements"]:
raise ValueError("规则文件必须包含 requirements 列表。")
_validate_attachment4_requirements(payload)
return payload
def _validate_attachment4_requirements(payload: dict) -> None:
requirements = payload.get("requirements") or []
required_codes = {str(code) for code in payload.get("attachment4_required_codes") or []}
by_attachment4_code: dict[str, list[dict]] = {}
for requirement in requirements:
attachment4_code = requirement.get("attachment4_code")
if attachment4_code:
by_attachment4_code.setdefault(str(attachment4_code), []).append(requirement)
for field in ["code", "rule_id", "title", "severity", "file_keywords", "citation_query"]:
if attachment4_code and not requirement.get(field):
raise ValueError(f"附件4规则 {attachment4_code} 缺少 {field}")
missing = sorted(required_codes - set(by_attachment4_code), key=_attachment4_sort_key)
if missing:
raise ValueError(f"附件4目录项缺少规则{', '.join(missing)}")
def _attachment4_sort_key(value: str) -> tuple[int, ...]:
return tuple(int(part) for part in value.split(".") if part.isdigit())
def check_rule_version(
*,
path: str | Path | None = None,
update_missing: bool = True,
) -> RuleVersionCheck:
rule_path = Path(path) if path else DEFAULT_RULE_PATH
rule_set = load_rule_file(rule_path)
current_hash = compute_file_sha256(rule_path)
record = RegulatoryRuleVersion.objects.filter(code=rule_set["code"]).first()
yaml_path = str(rule_path.relative_to(settings.BASE_DIR))
if record is None:
if not update_missing:
return RuleVersionCheck(
status="missing",
code=rule_set["code"],
path=rule_path,
current_hash=current_hash,
)
record = RegulatoryRuleVersion.objects.create(
code=rule_set["code"],
name=rule_set.get("name") or rule_set["code"],
yaml_path=yaml_path,
yaml_hash=current_hash,
rag_collection=rule_set.get("rag_collection", ""),
status=RegulatoryRuleVersion.Status.ACTIVE,
)
return RuleVersionCheck(
status="created",
code=record.code,
path=rule_path,
current_hash=current_hash,
database_hash=record.yaml_hash,
record=record,
)
if record.yaml_hash != current_hash:
return RuleVersionCheck(
status="mismatch",
code=record.code,
path=rule_path,
current_hash=current_hash,
database_hash=record.yaml_hash,
record=record,
)
return RuleVersionCheck(
status="ok",
code=record.code,
path=rule_path,
current_hash=current_hash,
database_hash=record.yaml_hash,
record=record,
)

View File

@@ -0,0 +1,92 @@
from __future__ import annotations
from collections.abc import Callable
from review_agent.regulatory_review.schemas import Finding
def run_structure_check(
document_texts: dict[str, str],
rule_set: dict,
progress_callback: Callable[[dict[str, object]], None] | None = None,
) -> list[Finding]:
findings: list[Finding] = []
combined_all_text = "\n".join(document_texts.values())
requirements = list(rule_set.get("requirements", []))
total = len(requirements)
for index, requirement in enumerate(requirements, start=1):
if requirement.get("structure_required") and not _contains_any(
combined_all_text,
[requirement.get("title", ""), *requirement.get("aliases", [])],
):
findings.append(
Finding(
rule_code=requirement["code"],
category="structure",
severity=requirement.get("severity", "medium"),
title=f"申报资料目录缺少{_numbered_title(requirement)}章节",
detail=f"未在申报资料目录或章节标题候选中发现{_numbered_title(requirement)}",
suggestion=requirement.get("suggestion", ""),
evidence={
"attachment4_code": requirement.get("attachment4_code"),
"expected_title": requirement["title"],
"aliases": requirement.get("aliases", []),
},
)
)
required_sections = requirement.get("required_sections") or []
if required_sections:
matching_docs = _matching_documents(document_texts, requirement.get("file_keywords", []))
if matching_docs:
combined_text = "\n".join(matching_docs.values())
for section in required_sections:
if _contains_any(combined_text, [section]):
continue
findings.append(
Finding(
rule_code=f"{requirement['code']}:{section}",
category="structure",
severity=requirement.get("severity", "medium"),
title=f"{requirement['title']}缺少{section}章节",
detail=f"已匹配{requirement['title']}文件,但未发现{section}相关内容。",
suggestion=requirement.get("suggestion", ""),
evidence={"section": section, "files": list(matching_docs)},
)
)
if progress_callback:
progress_callback(
{
"processed": index,
"total": total,
"label": _numbered_title(requirement),
"finding_count": len(findings),
}
)
return findings
def _matching_documents(document_texts: dict[str, str], keywords: list[str]) -> dict[str, str]:
if not keywords:
return document_texts
result = {}
for name, text in document_texts.items():
haystack = f"{name}\n{text}".lower()
if any(str(keyword).lower() in haystack for keyword in keywords):
result[name] = text
return result
def _contains_any(text: str, needles: list[str]) -> bool:
normalized = _normalize_title(text)
return any(_normalize_title(needle) in normalized for needle in needles if needle)
def _normalize_title(value: str) -> str:
return "".join(str(value).lower().replace("/", "").replace("", "").split())
def _numbered_title(requirement: dict) -> str:
attachment4_code = requirement.get("attachment4_code")
if not attachment4_code:
return requirement["title"]
return f"{attachment4_code}{requirement['title']}"

View File

@@ -0,0 +1,101 @@
from __future__ import annotations
import hashlib
import re
from dataclasses import dataclass
from pathlib import Path
from .rag_index import extract_text_from_path
@dataclass(frozen=True)
class ExtractedText:
path: Path
text: str
status: str
content_hash: str = ""
error_message: str = ""
front_text: str = ""
section_candidates: list[str] | None = None
field_candidates: dict[str, str] | None = None
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".pptx", ".xlsx", ".doc"}
FIELD_LABELS = ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]
def extract_text(path: str | Path) -> ExtractedText:
file_path = Path(path)
if file_path.suffix.lower() not in SUPPORTED_EXTENSIONS:
return ExtractedText(path=file_path, text="", status="unsupported")
try:
text = extract_text_from_path(file_path)
except Exception as exc:
return ExtractedText(
path=file_path,
text="",
status="failed",
error_message=str(exc),
section_candidates=[],
field_candidates={},
)
content_hash = hashlib.sha256(text.encode("utf-8")).hexdigest() if text else ""
return ExtractedText(
path=file_path,
text=text,
status="success",
content_hash=content_hash,
front_text=_front_text(text),
section_candidates=_section_candidates(text),
field_candidates=_field_candidates(text),
)
def _front_text(text: str, limit: int = 1200) -> str:
return text[:limit]
def _section_candidates(text: str) -> list[str]:
candidates = []
for line in text.splitlines():
normalized = line.strip()
if not normalized:
continue
if re.match(r"^([一二三四五六七八九十]+[、.]|[0-9]+(\.[0-9]+)*[、.\s])", normalized):
candidates.append(normalized[:120])
elif any(keyword in normalized for keyword in ["章节目录", "监管信息", "综述资料", "非临床资料", "临床评价资料", "质量管理体系"]):
candidates.append(normalized[:120])
return candidates[:80]
def _field_candidates(text: str) -> dict[str, str]:
fields = {}
lines = text.splitlines()
for index, line in enumerate(lines):
normalized = line.strip()
if not normalized:
continue
for label in FIELD_LABELS:
match = re.match(rf"^{re.escape(label)}[:]\s*(.*)$", normalized)
if not match or label in fields:
continue
value_parts = [match.group(1).strip()]
for next_line in lines[index + 1 :]:
continuation = next_line.strip()
if not continuation or _starts_field_line(continuation) or _looks_like_section_heading(continuation):
break
value_parts.append(continuation)
value = " ".join(part for part in value_parts if part)
if value:
fields[label] = " ".join(value.split())
return fields
def _starts_field_line(line: str) -> bool:
if any(re.match(rf"^{re.escape(label)}[:]", line) for label in FIELD_LABELS):
return True
return bool(re.match(r"^[^\s:]{2,24}[:]", line))
def _looks_like_section_heading(line: str) -> bool:
return bool(re.match(r"^([一二三四五六七八九十]+[、.]|[0-9]+(\.[0-9]+)*[、.\s])", line))

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
import hashlib
from pathlib import Path
from django.conf import settings
from review_agent.models import RegulatoryArtifact, RegulatoryReviewBatch
def save_artifact(
batch: RegulatoryReviewBatch,
*,
name: str,
content: str | bytes,
artifact_type: str,
metadata: dict | None = None,
) -> RegulatoryArtifact:
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
root.mkdir(parents=True, exist_ok=True)
path = root / Path(name).name
if isinstance(content, bytes):
path.write_bytes(content)
digest = hashlib.sha256(content).hexdigest()
else:
path.write_text(content, encoding="utf-8")
digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
return RegulatoryArtifact.objects.create(
batch=batch,
artifact_type=artifact_type,
name=path.name,
storage_path=str(path),
content_hash=digest,
metadata=metadata or {},
)

View File

@@ -0,0 +1,225 @@
from __future__ import annotations
import json
from django.conf import settings
from django.http import Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from django.contrib.auth.decorators import login_required
from review_agent.models import FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
from review_agent.regulatory_review.events import record_event
from review_agent.regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
from review_agent.regulatory_review.services.rectification_review import review_missing_issues
from review_agent.regulatory_review.workflow import create_regulatory_review_batch, start_regulatory_review_workflow
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
condition_candidates = ensure_regulatory_condition_candidates(batch)
nodes = WorkflowNodeRun.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).order_by("id")
payload = {
"batch": {
"id": batch.pk,
"workflow_type": "regulatory_review",
"batch_no": batch.batch_no,
"status": batch.status,
"source_summary_batch_id": batch.source_summary_batch_id,
"risk_summary": batch.risk_summary,
"risk_summary_text": _format_risk_summary(batch.risk_summary or {}),
"error_message": batch.error_message,
},
"nodes": [
{
"node_code": node.node_code,
"node_name": node.node_name,
"status": node.status,
"progress": node.progress,
"message": node.message,
}
for node in nodes
],
}
if batch.status == RegulatoryReviewBatch.Status.WAITING_USER and condition_candidates:
payload["condition_confirmation"] = {
"batch_id": batch.pk,
"batch_no": batch.batch_no,
"confirm_url": f"/api/review-agent/regulatory-review/{batch.pk}/conditions/",
"candidates": condition_candidates,
}
return JsonResponse(payload)
@require_http_methods(["POST"])
@login_required
def confirm_conditions(request, batch_id: int):
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
try:
payload = json.loads(request.body.decode("utf-8") or "{}")
except json.JSONDecodeError:
return JsonResponse({"error": "请求体不是有效 JSON。"}, status=400)
conditions = payload.get("conditions")
if not isinstance(conditions, dict):
return JsonResponse({"error": "conditions 必须是对象。"}, status=400)
batch.condition_json = {
**(batch.condition_json or {}),
"confirmed": True,
"confirmed_conditions": _normalize_conditions(conditions),
}
batch.status = RegulatoryReviewBatch.Status.RUNNING
batch.save(update_fields=["condition_json", "status"])
WorkflowNodeRun.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_code="condition_confirm",
).update(
status=WorkflowNodeRun.Status.SUCCESS,
progress=100,
message="适用条件已确认",
)
record_event(
batch,
"condition_confirmed",
{"conditions": batch.condition_json["confirmed_conditions"], "resume_from": "rule_scope"},
)
start_regulatory_review_workflow(
batch,
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
)
batch.refresh_from_db()
return JsonResponse(
{
"batch": {
"id": batch.pk,
"workflow_type": "regulatory_review",
"batch_no": batch.batch_no,
"status": batch.status,
"condition_json": batch.condition_json,
}
}
)
@require_http_methods(["POST"])
@login_required
def start_full_review(request, batch_id: int):
source_batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
if not source_batch:
raise Http404("批次不存在。")
payload, error_response = _json_payload(request)
if error_response:
return error_response
summary_batch = FileSummaryBatch.objects.filter(
pk=payload.get("file_summary_batch_id"),
conversation=source_batch.conversation,
user=request.user,
status=FileSummaryBatch.Status.SUCCESS,
).first()
if not summary_batch:
return JsonResponse({"error": "file_summary_batch_id 不存在或未成功。"}, status=400)
new_batch = create_regulatory_review_batch(
conversation=source_batch.conversation,
user=request.user,
source_summary_batch=summary_batch,
)
new_batch.condition_json = {
"source_review_batch_id": source_batch.pk,
"regenerated_from": {
"batch_id": source_batch.pk,
"batch_no": source_batch.batch_no,
"file_summary_batch_id": source_batch.source_summary_batch_id,
"file_summary_batch_no": source_batch.source_summary_batch.batch_no,
},
"confirmed": True,
"confirmed_conditions": source_batch.condition_json.get("confirmed_conditions", {}),
}
new_batch.save(update_fields=["condition_json"])
record_event(
new_batch,
"full_package_review_started",
{"source_review_batch_id": source_batch.pk, "source_review_batch_no": source_batch.batch_no},
)
start_regulatory_review_workflow(
new_batch,
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
)
new_batch.refresh_from_db()
return JsonResponse(
{
"batch": {
"id": new_batch.pk,
"workflow_type": "regulatory_review",
"batch_no": new_batch.batch_no,
"status": new_batch.status,
"source_review_batch_id": source_batch.pk,
}
}
)
@require_http_methods(["POST"])
@login_required
def review_issues(request, batch_id: int):
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
payload, error_response = _json_payload(request)
if error_response:
return error_response
issue_ids = payload.get("issue_ids")
if not isinstance(issue_ids, list):
return JsonResponse({"error": "issue_ids 必须是列表。"}, status=400)
summary_batch = FileSummaryBatch.objects.filter(
pk=payload.get("file_summary_batch_id"),
conversation=batch.conversation,
user=request.user,
status=FileSummaryBatch.Status.SUCCESS,
).first()
if not summary_batch:
return JsonResponse({"error": "file_summary_batch_id 不存在或未成功。"}, status=400)
record = review_missing_issues(batch=batch, issue_ids=[int(item) for item in issue_ids], file_summary_batch=summary_batch)
return JsonResponse({"review_record": record})
def _format_risk_summary(risk_summary: dict) -> str:
labels = [
("blocking", "阻断项"),
("high", "高风险"),
("medium", "中风险"),
("low", "低风险"),
("info", "提示"),
]
return " · ".join(
f"{label} {int(risk_summary.get(key) or 0)}"
for key, label in labels
if int(risk_summary.get(key) or 0)
)
def _normalize_conditions(conditions: dict) -> dict[str, str]:
allowed = [
"product_category",
"registration_type",
"clinical_evaluation_path",
"product_name",
"model_spec",
"intended_use",
]
return {key: str(conditions.get(key) or "").strip() for key in allowed}
def _json_payload(request):
try:
return json.loads(request.body.decode("utf-8") or "{}"), None
except json.JSONDecodeError:
return {}, JsonResponse({"error": "请求体不是有效 JSON。"}, status=400)

View File

@@ -0,0 +1,562 @@
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from threading import Thread
from uuid import uuid4
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from review_agent.models import (
Conversation,
FileSummaryBatch,
Message,
RegulatoryReviewBatch,
WorkflowNodeRun,
)
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
from review_agent.regulatory_review.services.consistency_check import run_consistency_check
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
from review_agent.regulatory_review.services.risk_assess import persist_findings
from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.services.structure_check import run_structure_check
from review_agent.regulatory_review.services.text_extract import extract_text
from .events import record_event
from .storage import save_artifact
NODE_DEFINITIONS = [
("prepare", "准备", "prepare"),
("condition_confirm", "适用条件确认", "condition_confirm"),
("rule_scope", "规则范围", "rule_scope"),
("completeness_check", "完整性核查", "completeness_check"),
("text_extract", "文本抽取", "text_extract"),
("structure_check", "章节核查", "structure_check"),
("consistency_check", "一致性核查", "consistency_check"),
("risk_assess", "风险评估", "risk_assess"),
("report_export", "报告输出", "report_export"),
("completed", "完成", "completed"),
]
logger = logging.getLogger("review_agent.regulatory_review.workflow")
ATTACHMENT4_CHAPTER_LABELS = {
"1": "第1章 监管信息",
"2": "第2章 综述资料",
"3": "第3章 非临床资料",
"4": "第4章 临床评价资料",
"5": "第5章 产品说明书和标签样稿",
"6": "第6章 质量管理体系文件",
}
class WorkflowPausedForUser(Exception):
pass
def build_batch_no() -> str:
return f"RR-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
def build_batch_work_dir(batch_no: str) -> Path:
return Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch_no
def find_latest_successful_summary_batch(conversation: Conversation) -> FileSummaryBatch | None:
return (
FileSummaryBatch.objects.filter(
conversation=conversation,
status=FileSummaryBatch.Status.SUCCESS,
)
.order_by("-finished_at", "-created_at", "-id")
.first()
)
@transaction.atomic
def create_regulatory_review_batch(
*,
conversation: Conversation,
user,
source_summary_batch: FileSummaryBatch,
trigger_message: Message | None = None,
) -> RegulatoryReviewBatch:
batch_no = build_batch_no()
work_dir = build_batch_work_dir(batch_no)
work_dir.mkdir(parents=True, exist_ok=True)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
source_summary_batch=source_summary_batch,
batch_no=batch_no,
work_dir=str(work_dir),
condition_json=_initial_condition_json(trigger_message),
)
for code, name, group in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_group=group,
node_code=code,
node_name=name,
)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
return batch
class RegulatoryWorkflowExecutor:
def __init__(self, batch: RegulatoryReviewBatch):
self.batch = batch
self.rule_set: dict | None = None
self.findings = []
self.document_texts: dict[str, str] = {}
self.text_extract_status: dict[str, dict[str, object]] = {}
self.llm_reviews: dict[str, dict[str, object]] = {}
def run(self) -> None:
logger.info("法规核查工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
try:
for node in self._nodes():
if node.status == WorkflowNodeRun.Status.SUCCESS:
continue
self._run_node(node)
except WorkflowPausedForUser:
logger.info("法规核查工作流等待用户 batch_no=%s node=condition_confirm", self.batch.batch_no)
return
except Exception as exc:
logger.exception("Regulatory workflow failed", extra={"batch_id": self.batch.pk})
self.batch.status = RegulatoryReviewBatch.Status.FAILED
self.batch.error_message = str(exc)
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "error_message", "finished_at"])
record_event(self.batch, "workflow_failed", {"message": str(exc)})
return
self.batch.status = RegulatoryReviewBatch.Status.SUCCESS
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
logger.info("法规核查工作流完成 batch_no=%s findings=%s", self.batch.batch_no, len(self.findings))
def _nodes(self):
return WorkflowNodeRun.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=self.batch.pk,
).order_by("id")
def _run_node(self, node: WorkflowNodeRun) -> None:
logger.info(
"节点开始 batch_no=%s node=%s name=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
)
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = timezone.now()
node.message = f"{node.node_name}处理中"
node.save(update_fields=["status", "progress", "started_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
self._execute_node(node)
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
node.message = f"{node.node_name}完成"
node.save(update_fields=["status", "progress", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
logger.info(
"节点完成 batch_no=%s node=%s name=%s progress=%s",
self.batch.batch_no,
node.node_code,
node.node_name,
node.progress,
)
def _update_node_progress(
self,
node: WorkflowNodeRun,
*,
processed: int,
total: int,
message: str,
) -> None:
if total <= 0:
return
progress = min(95, 10 + int((max(processed, 0) / total) * 85))
node.progress = progress
node.message = message
node.save(update_fields=["progress", "message"])
record_event(
self.batch,
"node_progress",
{
"node_code": node.node_code,
"status": node.status,
"progress": node.progress,
"message": node.message,
"processed": processed,
"total": total,
},
)
logger.info(
"节点进度 batch_no=%s node=%s progress=%s processed=%s total=%s message=%s",
self.batch.batch_no,
node.node_code,
progress,
processed,
total,
message,
)
def _execute_node(self, node: WorkflowNodeRun) -> None:
node_code = node.node_code
if node_code == "condition_confirm":
self._pause_for_condition_confirmation()
return
if node_code == "rule_scope":
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
logger.info(
"方法执行 batch_no=%s method=apply_rule_scope requirements=%s scope=%s",
self.batch.batch_no,
len(self.rule_set.get("requirements", [])),
self.batch.condition_json.get("rule_scope") or {},
)
return
if node_code == "completeness_check":
findings = run_completeness_check(
self.batch.source_summary_batch,
self._rules(),
progress_callback=lambda update: self._update_node_progress(
node,
processed=int(update.get("processed") or 0),
total=int(update.get("total") or 0),
message=(
f"完整性核查 {update.get('processed')}/{update.get('total')}"
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
),
),
)
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_completeness_check findings=%s source_summary=%s",
self.batch.batch_no,
len(findings),
self.batch.source_summary_batch.batch_no,
)
self._save_llm_review(
"completeness_check",
{
"findings": [finding.to_dict() for finding in findings],
"rules_count": len(self._rules().get("requirements", [])),
},
)
return
if node_code == "text_extract":
self.document_texts = self._extract_source_texts(node)
logger.info(
"方法执行 batch_no=%s method=_extract_source_texts success_docs=%s total_files=%s",
self.batch.batch_no,
len(self.document_texts),
len(self.text_extract_status),
)
self._save_llm_review("text_extract", {"files": self.text_extract_status})
save_artifact(
self.batch,
name="text_extract_status.json",
artifact_type="json",
content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
metadata={"artifact": "text_extract_status"},
)
return
if node_code == "structure_check":
findings = run_structure_check(
self.document_texts,
self._rules(),
progress_callback=lambda update: self._update_node_progress(
node,
processed=int(update.get("processed") or 0),
total=int(update.get("total") or 0),
message=(
f"章节核查 {update.get('processed')}/{update.get('total')}"
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
),
),
)
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_structure_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "consistency_check":
findings = run_consistency_check(
self.document_texts,
progress_callback=lambda update: self._update_node_progress(
node,
processed=int(update.get("processed") or 0),
total=int(update.get("total") or 0),
message=(
f"一致性核查 {update.get('processed')}/{update.get('total')}"
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
),
),
)
self.findings.extend(findings)
logger.info(
"方法执行 batch_no=%s method=run_consistency_check findings=%s docs=%s",
self.batch.batch_no,
len(findings),
len(self.document_texts),
)
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
return
if node_code == "risk_assess":
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
issues = persist_findings(self.batch, self.findings)
create_mock_notifications(self.batch)
logger.info(
"方法执行 batch_no=%s method=persist_findings issues=%s findings=%s",
self.batch.batch_no,
len(issues),
len(self.findings),
)
save_artifact(
self.batch,
name="rag_result_json.json",
artifact_type="json",
content=json.dumps(
{
"batch_no": self.batch.batch_no,
"text_extract_status": self.text_extract_status,
"issues": [
{
"rule_code": issue.rule_code,
"title": issue.title,
"citations": issue.citations,
}
for issue in issues
],
"llm_reviews": self.llm_reviews,
},
ensure_ascii=False,
indent=2,
),
metadata={"artifact": "rag_result_json"},
)
return
if node_code == "report_export":
exports = export_review_results(self.batch)
logger.info(
"方法执行 batch_no=%s method=export_review_results exports=%s",
self.batch.batch_no,
len(exports),
)
Message.objects.create(
conversation=self.batch.conversation,
role=Message.Role.ASSISTANT,
content=build_assistant_summary(self.batch, exports),
)
def _pause_for_condition_confirmation(self) -> None:
if self.batch.condition_json.get("confirmed"):
return
candidates = detect_regulatory_condition_candidates(self.batch.source_summary_batch)
logger.info(
"方法执行 batch_no=%s method=detect_regulatory_condition_candidates product_category=%s product_name=%s",
self.batch.batch_no,
(candidates.get("product_category") or {}).get("suggested"),
(candidates.get("product_name") or {}).get("suggested"),
)
self.batch.condition_json = {
**(self.batch.condition_json or {}),
"confirmed": False,
"resume_from": "rule_scope",
"candidates": candidates,
}
self.batch.status = RegulatoryReviewBatch.Status.WAITING_USER
self.batch.save(update_fields=["status", "condition_json"])
node = WorkflowNodeRun.objects.get(
workflow_type="regulatory_review",
workflow_batch_id=self.batch.pk,
node_code="condition_confirm",
)
node.status = WorkflowNodeRun.Status.WAITING_USER
node.progress = 50
node.message = "请确认产品类别、注册类型、临床评价路径等适用条件"
node.save(update_fields=["status", "progress", "message"])
record_event(
self.batch,
"waiting_user",
{"node_code": "condition_confirm", "candidates": candidates, "resume_from": "rule_scope"},
)
raise WorkflowPausedForUser()
def _rules(self) -> dict:
if self.rule_set is None:
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
return self.rule_set
def _extract_source_texts(self, node: WorkflowNodeRun | None = None) -> dict[str, str]:
texts = {}
items = list(self.batch.source_summary_batch.items.order_by("file_index"))
total = len(items)
for index, item in enumerate(items, start=1):
path = Path(item.storage_path)
if not path.is_absolute():
path = Path(settings.MEDIA_ROOT) / item.storage_path
if not path.exists():
logger.info("文本抽取跳过 batch_no=%s file=%s reason=missing", self.batch.batch_no, item.file_name)
self.text_extract_status[item.file_name] = {
"status": "missing",
"path": str(path),
"content_hash": "",
"section_candidates": [],
"field_candidates": {},
"front_text": "",
}
if node:
self._update_node_progress(
node,
processed=index,
total=total,
message=f"文本抽取 {index}/{total}{item.file_name}(文件不存在)",
)
continue
result = extract_text(path)
field_review = review_condition_fields(
text=result.front_text or result.text,
rule_fields=result.field_candidates or {},
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
)
self.text_extract_status[item.file_name] = {
"status": result.status,
"path": str(path),
"content_hash": result.content_hash,
"section_candidates": result.section_candidates,
"field_candidates": field_review.get("selected_fields", result.field_candidates),
"field_review": field_review,
"front_text": result.front_text,
"error_message": result.error_message,
}
if result.status == "success" and result.text:
texts[item.file_name] = result.text
logger.info(
"文本抽取文件 batch_no=%s file=%s status=%s fields=%s chars=%s",
self.batch.batch_no,
item.file_name,
result.status,
len((field_review.get("selected_fields") or {})),
len(result.text or ""),
)
if node:
self._update_node_progress(
node,
processed=index,
total=total,
message=f"文本抽取 {index}/{total}{item.file_name}{result.status}",
)
return texts
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
review = review_workflow_payload(stage=stage, payload=payload)
self.llm_reviews[stage] = review
logger.info(
"方法执行 batch_no=%s method=review_workflow_payload stage=%s status=%s",
self.batch.batch_no,
stage,
review.get("status"),
)
save_artifact(
self.batch,
name=f"llm_review_{stage}.json",
artifact_type="json",
content=json.dumps(review, ensure_ascii=False, indent=2),
metadata={"artifact": "llm_review", "stage": stage},
)
return review
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
executor = RegulatoryWorkflowExecutor(batch)
if not async_run:
executor.run()
return
Thread(target=executor.run, daemon=True).start()
def _initial_condition_json(trigger_message: Message | None) -> dict:
scope = detect_attachment4_chapter_scope(trigger_message.content if trigger_message else "")
return {"rule_scope": scope} if scope else {}
def detect_attachment4_chapter_scope(content: str) -> dict[str, str] | None:
normalized = (content or "").strip()
if not normalized:
return None
chapter = _extract_chapter_number(normalized)
if chapter not in ATTACHMENT4_CHAPTER_LABELS:
return None
return {"attachment4_chapter": chapter, "label": ATTACHMENT4_CHAPTER_LABELS[chapter]}
def apply_rule_scope(rule_set: dict, rule_scope: dict) -> dict:
chapter = str(rule_scope.get("attachment4_chapter") or "")
if chapter not in ATTACHMENT4_CHAPTER_LABELS:
return rule_set
scoped = {**rule_set}
scoped["requirements"] = [
requirement
for requirement in rule_set.get("requirements", [])
if _requirement_in_chapter(requirement, chapter)
]
scoped["active_rule_scope"] = rule_scope
return scoped
def _requirement_in_chapter(requirement: dict, chapter: str) -> bool:
attachment4_code = str(requirement.get("attachment4_code") or "")
return attachment4_code == chapter or attachment4_code.startswith(f"{chapter}.")
def _extract_chapter_number(content: str) -> str:
match = re.search(r"\s*([一二三四五六1-6])\s*[章节张]", content)
if match:
return _normalize_chapter_number(match.group(1))
match = re.search(r"(^|[^\d])([1-6])\s*[章节张]", content)
if match:
return match.group(2)
return ""
def _normalize_chapter_number(value: str) -> str:
chinese = {"": "1", "": "2", "": "3", "": "4", "": "5", "": "6"}
return chinese.get(value, value)

View File

@@ -10,7 +10,12 @@ from django.utils import timezone
from .file_summary.skills.attachment_reader import AttachmentReaderSkill from .file_summary.skills.attachment_reader import AttachmentReaderSkill
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
from .models import Conversation, FileAttachment, Message from .models import Conversation, FileAttachment, FileSummaryBatch, Message
from .regulatory_review.workflow import (
create_regulatory_review_batch,
find_latest_successful_summary_batch,
start_regulatory_review_workflow,
)
from .skill_router import route_message_intent from .skill_router import route_message_intent
@@ -219,6 +224,85 @@ def stream_message(conversation: Conversation, content: str):
) )
return return
if route.starts_regulatory_review:
source_summary_batch = find_latest_successful_summary_batch(conversation)
if not source_summary_batch:
if not _has_active_attachments(conversation):
reply_content = "请先在当前对话右侧上传需要核查的文件或压缩包,我会先自动汇总再继续法规核查。"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
summary_batch = create_file_summary_batch(
conversation=conversation,
user=conversation.user,
trigger_message=user_message,
)
yield sse_event(
"workflow_started",
{
"workflow_type": "file_summary",
"batch_id": summary_batch.pk,
"batch_no": summary_batch.batch_no,
},
)
start_file_summary_workflow(summary_batch, async_run=False)
summary_batch.refresh_from_db()
if summary_batch.status != FileSummaryBatch.Status.SUCCESS:
reply_content = f"已先启动文件目录与页数自动汇总工作流,批次号:{summary_batch.batch_no},但汇总未成功:{summary_batch.error_message or '原因待查看'}。请处理后再启动法规核查。"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
source_summary_batch = summary_batch
reply_prefix = f"已先启动文件目录与页数自动汇总工作流,批次号:{summary_batch.batch_no},汇总完成后继续法规核查。\n"
else:
reply_prefix = ""
batch = create_regulatory_review_batch(
conversation=conversation,
user=conversation.user,
trigger_message=user_message,
source_summary_batch=source_summary_batch,
)
start_regulatory_review_workflow(
batch,
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
)
reply_content = f"{reply_prefix}已启动 NMPA 注册资料法规核查工作流,批次号:{batch.batch_no}"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event(
"workflow_started",
{
"workflow_type": "regulatory_review",
"batch_id": batch.pk,
"batch_no": batch.batch_no,
},
)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
stream_failed = False stream_failed = False
stream_error = "" stream_error = ""
try: try:

View File

@@ -15,6 +15,7 @@ from .models import Conversation, FileAttachment
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"} ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
ROUTE_ACTIONS.add("regulatory_review")
@dataclass(frozen=True) @dataclass(frozen=True)
@@ -34,6 +35,10 @@ class SkillRoute:
def starts_file_summary(self) -> bool: def starts_file_summary(self) -> bool:
return self.action == "file_summary" return self.action == "file_summary"
@property
def starts_regulatory_review(self) -> bool:
return self.action == "regulatory_review"
@property @property
def is_normal_chat(self) -> bool: def is_normal_chat(self) -> bool:
return self.action == "normal_chat" return self.action == "normal_chat"
@@ -100,7 +105,7 @@ def _route_with_llm(
return SkillRoute( return SkillRoute(
action=action, action=action,
skill_name="attachment_reader" if action == "attachment_reader" else "", skill_name="attachment_reader" if action == "attachment_reader" else "",
workflow_type="file_summary" if action == "file_summary" else "", workflow_type=action if action in {"file_summary", "regulatory_review"} else "",
confidence=_float_or_zero(payload.get("confidence")), confidence=_float_or_zero(payload.get("confidence")),
reason=str(payload.get("reason") or ""), reason=str(payload.get("reason") or ""),
source="llm", source="llm",
@@ -108,6 +113,15 @@ def _route_with_llm(
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute: def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
if _matches_regulatory_review(content):
return SkillRoute(
action="regulatory_review",
workflow_type="regulatory_review",
confidence=0.7,
reason="命中法规核查关键词。",
source="rule_fallback",
)
file_summary = evaluate_file_summary_trigger(conversation, content) file_summary = evaluate_file_summary_trigger(conversation, content)
if file_summary.should_start or file_summary.reason == "missing_attachment": if file_summary.should_start or file_summary.reason == "missing_attachment":
return SkillRoute( return SkillRoute(
@@ -148,9 +162,10 @@ def _router_system_prompt() -> str:
return ( return (
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。" "你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
"你必须只输出 JSON 对象,不要输出 Markdown。" "你必须只输出 JSON 对象,不要输出 Markdown。"
"可选 actionnormal_chat、attachment_reader、file_summary。" "可选 actionnormal_chat、attachment_reader、file_summary、regulatory_review"
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。" "attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。" "file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
"regulatory_review 用于用户要求法规核查、NMPA核查、完整性核查、章节一致性核查、风险预警或整改建议。"
"normal_chat 用于不需要读取附件或执行工作流的一般问答。" "normal_chat 用于不需要读取附件或执行工作流的一般问答。"
"输出字段action、confidence、reason。" "输出字段action、confidence、reason。"
) )
@@ -187,3 +202,18 @@ def _float_or_zero(value) -> float:
return float(value) return float(value)
except (TypeError, ValueError): except (TypeError, ValueError):
return 0.0 return 0.0
def _matches_regulatory_review(content: str) -> bool:
normalized = content.lower()
keywords = [
"法规核查",
"nmpa核查",
"nmpa 核查",
"完整性核查",
"风险预警",
"整改建议",
"章节核查",
"一致性核查",
]
return any(keyword in normalized for keyword in keywords)

View File

@@ -10,6 +10,12 @@ from .file_summary.views import (
conversation_messages, conversation_messages,
export_download, export_download,
) )
from .regulatory_review.views import (
batch_status as regulatory_review_batch_status,
confirm_conditions as regulatory_review_confirm_conditions,
review_issues as regulatory_review_review_issues,
start_full_review as regulatory_review_start_full_review,
)
urlpatterns = [ urlpatterns = [
@@ -58,4 +64,24 @@ urlpatterns = [
export_download, export_download,
name="file_summary_export_download", name="file_summary_export_download",
), ),
path(
"api/review-agent/regulatory-review/<int:batch_id>/status/",
regulatory_review_batch_status,
name="regulatory_review_batch_status",
),
path(
"api/review-agent/regulatory-review/<int:batch_id>/conditions/",
regulatory_review_confirm_conditions,
name="regulatory_review_confirm_conditions",
),
path(
"api/review-agent/regulatory-review/<int:batch_id>/full-review/",
regulatory_review_start_full_review,
name="regulatory_review_start_full_review",
),
path(
"api/review-agent/regulatory-review/<int:batch_id>/issue-review/",
regulatory_review_review_issues,
name="regulatory_review_review_issues",
),
] ]

View File

@@ -11,7 +11,8 @@ from .services import (
send_message, send_message,
stream_message, stream_message,
) )
from .models import Conversation, FileAttachment, FileSummaryBatch from .models import Conversation, FileAttachment, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
@login_required @login_required
@@ -42,6 +43,9 @@ def workspace(request: HttpRequest) -> HttpResponse:
if current is None and conversations.exists(): if current is None and conversations.exists():
current = conversations.first() current = conversations.first()
workflow_cards = build_workflow_cards(current) if current else []
condition_confirmation = build_condition_confirmation(workflow_cards)
return render( return render(
request, request,
"home.html", "home.html",
@@ -52,7 +56,8 @@ def workspace(request: HttpRequest) -> HttpResponse:
"current_conversation": current, "current_conversation": current,
"messages": current.messages.all() if current else [], "messages": current.messages.all() if current else [],
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [], "attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
"summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [], "workflow_cards": workflow_cards,
"condition_confirmation": condition_confirmation,
}, },
) )
@@ -109,3 +114,76 @@ def stream_chat(request: HttpRequest) -> HttpResponse:
response["Cache-Control"] = "no-cache" response["Cache-Control"] = "no-cache"
response["X-Accel-Buffering"] = "no" response["X-Accel-Buffering"] = "no"
return response return response
def build_workflow_cards(conversation: Conversation) -> list[dict[str, object]]:
cards: list[dict[str, object]] = []
for batch in FileSummaryBatch.objects.filter(conversation=conversation).prefetch_related("node_runs"):
cards.append(
{
"id": batch.pk,
"workflow_type": "file_summary",
"batch_no": batch.batch_no,
"status": batch.status,
"error_message": batch.error_message,
"risk_label": "",
"created_at": batch.created_at,
"nodes": list(batch.node_runs.order_by("id")),
}
)
regulatory_batches = RegulatoryReviewBatch.objects.filter(conversation=conversation)
for batch in regulatory_batches:
condition_candidates = ensure_regulatory_condition_candidates(batch)
cards.append(
{
"id": batch.pk,
"workflow_type": "regulatory_review",
"batch_no": batch.batch_no,
"status": batch.status,
"error_message": batch.error_message,
"risk_label": _format_risk_label(batch.risk_summary or {}),
"condition_json": batch.condition_json or {},
"condition_candidates": condition_candidates,
"notification_count": batch.notifications.count(),
"review_record_count": batch.artifacts.filter(metadata__artifact="review_record").count(),
"created_at": batch.created_at,
"nodes": list(
WorkflowNodeRun.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).order_by("id")
),
}
)
return sorted(cards, key=lambda item: item["created_at"], reverse=True)[:5]
def build_condition_confirmation(workflow_cards: list[dict[str, object]]) -> dict[str, object] | None:
for card in workflow_cards:
if (
card.get("workflow_type") == "regulatory_review"
and card.get("status") == RegulatoryReviewBatch.Status.WAITING_USER
and card.get("condition_candidates")
):
return {
"id": card["id"],
"batch_no": card["batch_no"],
"candidates": card["condition_candidates"],
}
return None
def _format_risk_label(risk_summary: dict) -> str:
parts = []
labels = [
("blocking", "阻断项"),
("high", "高风险"),
("medium", "中风险"),
("low", "低风险"),
("info", "提示"),
]
for key, label in labels:
count = int(risk_summary.get(key) or 0)
if count:
parts.append(f"{label} {count}")
return " · ".join(parts)

View File

@@ -310,7 +310,7 @@
function appendConversationMessage(message) { function appendConversationMessage(message) {
if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) { if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) {
return; return false;
} }
var label = message.role === "assistant" ? "AI " : "用户 "; var label = message.role === "assistant" ? "AI " : "用户 ";
label += document.querySelectorAll(".message").length + 1; label += document.querySelectorAll(".message").length + 1;
@@ -320,6 +320,7 @@
if (message.role === "user") { if (message.role === "user") {
appendNode(created.article.id, label, true); appendNode(created.article.id, label, true);
} }
return true;
} }
async function refreshConversationMessages() { async function refreshConversationMessages() {
@@ -337,14 +338,21 @@
return; return;
} }
var payload = await response.json(); var payload = await response.json();
(payload.messages || []).forEach(appendConversationMessage); var appendedCount = 0;
(payload.messages || []).forEach(function (message) {
if (appendConversationMessage(message)) {
appendedCount += 1;
}
});
if (payload.latest_message_id) { if (payload.latest_message_id) {
latestMessageId = Math.max(latestMessageId, payload.latest_message_id); latestMessageId = Math.max(latestMessageId, payload.latest_message_id);
} }
syncNodeRailVisibility(); syncNodeRailVisibility();
bindNodeAnchorClicks(); bindNodeAnchorClicks();
setActiveNode(); setActiveNode();
if (appendedCount > 0) {
scrollChatToBottom(); scrollChatToBottom();
}
} catch (error) { } catch (error) {
console.error("Conversation message refresh failed", error); console.error("Conversation message refresh failed", error);
} }
@@ -455,6 +463,12 @@
return summaryPanel.getAttribute(attributeName).replace(token, value); return summaryPanel.getAttribute(attributeName).replace(token, value);
} }
function statusUrlForWorkflow(workflow_type, batchId) {
var attributeName =
workflow_type === "regulatory_review" ? "data-regulatory-status-url-template" : "data-status-url-template";
return templateUrl(attributeName, "__batch_id__", batchId);
}
function renderAttachments(attachments) { function renderAttachments(attachments) {
if (!attachmentList) { if (!attachmentList) {
return; return;
@@ -542,13 +556,17 @@
if (empty) { if (empty) {
empty.remove(); empty.remove();
} }
var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]'); var workflow_type = batch.workflow_type || "file_summary";
var card = workflowCardList.querySelector(
'[data-batch-id="' + batch.batch_id + '"][data-workflow-type="' + workflow_type + '"]'
);
if (card) { if (card) {
return card; return card;
} }
card = document.createElement("article"); card = document.createElement("article");
card.className = "workflow-card"; card.className = "workflow-card";
card.setAttribute("data-batch-id", batch.batch_id); card.setAttribute("data-batch-id", batch.batch_id);
card.setAttribute("data-workflow-type", workflow_type);
card.innerHTML = card.innerHTML =
"<header><strong>" + "<header><strong>" +
escapeHtml(batch.batch_no || "文件汇总") + escapeHtml(batch.batch_no || "文件汇总") +
@@ -634,13 +652,91 @@
selectWorkflowBatchIndex(activeIndex); selectWorkflowBatchIndex(activeIndex);
} }
async function refreshWorkflowCard(batchId) { function ensureConditionConfirmationCard(confirmation) {
if (!chatScroll || !confirmation || !confirmation.candidates) {
return;
}
var cardId = "condition-confirmation-" + confirmation.batch_id;
removeStaleConditionConfirmationCards(cardId);
if (document.getElementById(cardId)) {
return;
}
var article = document.createElement("article");
article.className = "message assistant";
article.id = cardId;
article.setAttribute("data-condition-confirmation-card", "");
article.setAttribute("data-node-label", "AI 适用条件确认");
var avatar = document.createElement("div");
avatar.className = "message-avatar";
avatar.textContent = "AI";
var bubble = document.createElement("div");
bubble.className = "message-bubble";
var form = document.createElement("form");
form.className = "condition-confirm-form";
form.setAttribute("data-condition-confirm-form", "");
form.setAttribute("data-batch-id", confirmation.batch_id);
form.setAttribute("data-confirm-url", confirmation.confirm_url);
form.innerHTML =
'<input type="hidden" name="csrfmiddlewaretoken" value="' +
escapeHtml(new FormData(composer).get("csrfmiddlewaretoken") || "") +
'">' +
"<strong>适用条件确认</strong>" +
"<p>请确认 " +
escapeHtml(confirmation.batch_no || "") +
" 的产品类别、注册类型和临床评价路径,确认后我会继续法规核查。</p>" +
renderConditionFields(confirmation.candidates) +
'<button type="submit">确认并继续</button>' +
'<p class="condition-confirm-status" data-condition-confirm-status></p>';
bubble.appendChild(form);
article.appendChild(avatar);
article.appendChild(bubble);
chatScroll.appendChild(article);
bindConditionConfirmForms();
scrollChatToBottom();
}
function removeStaleConditionConfirmationCards(activeCardId) {
document.querySelectorAll("[data-condition-confirmation-card]").forEach(function (card) {
if (card.id !== activeCardId) {
card.remove();
}
});
}
function renderConditionFields(candidates) {
var html = "";
Object.keys(candidates || {}).forEach(function (field) {
var config = candidates[field] || {};
html += "<label><span>" + escapeHtml(config.label || field) + "</span>";
if (config.input_type === "select") {
html += '<select name="' + escapeHtml(field) + '">';
(config.options || []).forEach(function (option) {
var selected = option === config.suggested ? " selected" : "";
html += '<option value="' + escapeHtml(option) + '"' + selected + ">" + escapeHtml(option) + "</option>";
});
html += "</select>";
} else {
html +=
'<input type="text" name="' +
escapeHtml(field) +
'" value="' +
escapeHtml(config.suggested || "") +
'">';
}
html += "</label>";
});
return html;
}
async function refreshWorkflowCard(batchId, workflow_type) {
if (!summaryPanel || !batchId) { if (!summaryPanel || !batchId) {
return ""; return "";
} }
var response; var response;
try { try {
response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId), { response = await fetch(statusUrlForWorkflow(workflow_type || "file_summary", batchId), {
cache: "no-store", cache: "no-store",
}); });
} catch (error) { } catch (error) {
@@ -652,9 +748,13 @@
return ""; return "";
} }
var payload = await response.json(); var payload = await response.json();
if (payload.condition_confirmation) {
ensureConditionConfirmationCard(payload.condition_confirmation);
}
var card = ensureWorkflowCard({ var card = ensureWorkflowCard({
batch_id: payload.batch.id, batch_id: payload.batch.id,
batch_no: payload.batch.batch_no, batch_no: payload.batch.batch_no,
workflow_type: payload.batch.workflow_type || workflow_type || "file_summary",
}); });
if (!card) { if (!card) {
return payload.batch.status || ""; return payload.batch.status || "";
@@ -673,6 +773,17 @@
} else if (batchError) { } else if (batchError) {
batchError.remove(); batchError.remove();
} }
var riskSummary = card.querySelector(".workflow-risk-summary");
if (payload.batch.risk_summary_text) {
if (!riskSummary) {
riskSummary = document.createElement("p");
riskSummary.className = "workflow-risk-summary";
card.insertBefore(riskSummary, card.querySelector("ol"));
}
riskSummary.textContent = payload.batch.risk_summary_text;
} else if (riskSummary) {
riskSummary.remove();
}
var list = card.querySelector("ol"); var list = card.querySelector("ol");
list.innerHTML = ""; list.innerHTML = "";
(payload.nodes || []).forEach(function (node) { (payload.nodes || []).forEach(function (node) {
@@ -724,29 +835,37 @@
return status === "success" || status === "failed"; return status === "success" || status === "failed";
} }
function stopWorkflowPolling(batchId) { function workflowTimerKey(batchId, workflow_type) {
if (!workflowPollingTimers[batchId]) { return (workflow_type || "file_summary") + ":" + batchId;
return;
}
window.clearInterval(workflowPollingTimers[batchId]);
delete workflowPollingTimers[batchId];
} }
function startWorkflowPolling(batchId) { function stopWorkflowPolling(batchId, workflow_type) {
if (!batchId || workflowPollingTimers[batchId]) { var key = workflowTimerKey(batchId, workflow_type);
if (!workflowPollingTimers[key]) {
return; return;
} }
workflowPollingTimers[batchId] = window.setInterval(async function () { window.clearInterval(workflowPollingTimers[key]);
var status = await refreshWorkflowCard(batchId); delete workflowPollingTimers[key];
}
function startWorkflowPolling(batchId, workflow_type) {
var card = workflowCardList ? workflowCardList.querySelector('[data-batch-id="' + batchId + '"]') : null;
workflow_type = workflow_type || (card ? card.getAttribute("data-workflow-type") || "file_summary" : "file_summary");
var key = workflowTimerKey(batchId, workflow_type);
if (!batchId || workflowPollingTimers[key]) {
return;
}
workflowPollingTimers[key] = window.setInterval(async function () {
var status = await refreshWorkflowCard(batchId, workflow_type);
if (isWorkflowTerminalStatus(status)) { if (isWorkflowTerminalStatus(status)) {
refreshConversationMessages(); refreshConversationMessages();
stopWorkflowPolling(batchId); stopWorkflowPolling(batchId, workflow_type);
} }
}, WORKFLOW_POLL_INTERVAL_MS); }, WORKFLOW_POLL_INTERVAL_MS);
refreshWorkflowCard(batchId).then(function (status) { refreshWorkflowCard(batchId, workflow_type).then(function (status) {
if (isWorkflowTerminalStatus(status)) { if (isWorkflowTerminalStatus(status)) {
refreshConversationMessages(); refreshConversationMessages();
stopWorkflowPolling(batchId); stopWorkflowPolling(batchId, workflow_type);
} }
}); });
} }
@@ -757,14 +876,91 @@
} }
workflowCardList.querySelectorAll(".workflow-card").forEach(function (card) { workflowCardList.querySelectorAll(".workflow-card").forEach(function (card) {
var batchId = card.getAttribute("data-batch-id"); var batchId = card.getAttribute("data-batch-id");
var workflow_type = card.getAttribute("data-workflow-type") || "file_summary";
var status = card.querySelector(".workflow-status"); var status = card.querySelector(".workflow-status");
var statusText = status ? status.textContent.trim() : ""; var statusText = status ? status.textContent.trim() : "";
if (!isWorkflowTerminalStatus(statusText)) { if (!isWorkflowTerminalStatus(statusText)) {
startWorkflowPolling(batchId); startWorkflowPolling(batchId, workflow_type);
} }
}); });
} }
function bindConditionConfirmForms() {
document.querySelectorAll("[data-condition-confirm-form]").forEach(function (form) {
if (form.dataset.bound === "true") {
return;
}
form.dataset.bound = "true";
form.addEventListener("submit", async function (event) {
event.preventDefault();
var batchId = form.getAttribute("data-batch-id");
var status = form.querySelector("[data-condition-confirm-status]");
var submitButton = form.querySelector('button[type="submit"]');
var formData = new FormData(form);
var conditions = {};
formData.forEach(function (value, key) {
if (key !== "csrfmiddlewaretoken") {
conditions[key] = value;
}
});
if (submitButton) {
submitButton.disabled = true;
}
if (status) {
status.textContent = "正在恢复法规核查...";
}
try {
var response = await fetch(form.getAttribute("data-confirm-url"), {
method: "POST",
headers: {
"Content-Type": "application/json",
"X-CSRFToken": formData.get("csrfmiddlewaretoken"),
},
body: JSON.stringify({ conditions: conditions }),
});
if (!response.ok) {
throw new Error("确认失败。");
}
if (status) {
status.textContent = "已确认,工作流继续执行。";
}
form.classList.add("confirmed");
startWorkflowPolling(batchId, "regulatory_review");
await refreshWorkflowCard(batchId, "regulatory_review");
} catch (error) {
if (status) {
status.textContent = "确认失败,请稍后重试。";
}
if (submitButton) {
submitButton.disabled = false;
}
}
});
});
}
function bindRectificationActionButtons() {
document.querySelectorAll("[data-rectification-action]").forEach(function (button) {
if (button.dataset.bound === "true") {
return;
}
button.dataset.bound = "true";
button.addEventListener("click", function () {
if (!promptInput) {
return;
}
var action = button.getAttribute("data-rectification-action");
var batchNo = button.getAttribute("data-batch-no") || "";
if (action === "full-review") {
promptInput.value = "请基于新的文件汇总批次,对法规核查批次 " + batchNo + " 发起整包复核,并先确认使用哪个补充批次。";
} else {
promptInput.value = "请对法规核查批次 " + batchNo + " 的缺失项发起复核,并先确认 issue_ids 和补充文件汇总批次。";
}
promptInput.focus();
});
});
}
async function streamChat(event) { async function streamChat(event) {
event.preventDefault(); event.preventDefault();
if (!composer || !promptInput || !sendButton || !chatStage) { if (!composer || !promptInput || !sendButton || !chatStage) {
@@ -872,7 +1068,7 @@
assistantMessage.text.innerHTML = renderAssistantContent(assistantText); assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
} else if (eventName === "workflow_started") { } else if (eventName === "workflow_started") {
ensureWorkflowCard(payload); ensureWorkflowCard(payload);
startWorkflowPolling(payload.batch_id); startWorkflowPolling(payload.batch_id, payload.workflow_type);
} else if (eventName === "done") { } else if (eventName === "done") {
if (payload.assistant_message_id) { if (payload.assistant_message_id) {
assistantMessage.article.id = "message-" + payload.assistant_message_id; assistantMessage.article.id = "message-" + payload.assistant_message_id;
@@ -924,6 +1120,8 @@
renderExistingAssistantMessages(); renderExistingAssistantMessages();
refreshWorkflowBatchCarousel(0); refreshWorkflowBatchCarousel(0);
bindWorkflowBatchCarouselControls(); bindWorkflowBatchCarouselControls();
bindConditionConfirmForms();
bindRectificationActionButtons();
refreshRunningWorkflowCards(); refreshRunningWorkflowCards();
if (chatScroll) { if (chatScroll) {

View File

@@ -124,6 +124,44 @@
</div> </div>
</article> </article>
{% endfor %} {% endfor %}
{% if condition_confirmation %}
<article
class="message assistant"
id="condition-confirmation-{{ condition_confirmation.id }}"
data-condition-confirmation-card
data-node-label="AI 适用条件确认"
>
<div class="message-avatar">AI</div>
<div class="message-bubble">
<form
class="condition-confirm-form"
data-condition-confirm-form
data-batch-id="{{ condition_confirmation.id }}"
data-confirm-url="/api/review-agent/regulatory-review/{{ condition_confirmation.id }}/conditions/"
>
{% csrf_token %}
<strong>适用条件确认</strong>
<p>请确认 {{ condition_confirmation.batch_no }} 的产品类别、注册类型和临床评价路径,确认后我会继续法规核查。</p>
{% for field, config in condition_confirmation.candidates.items %}
<label>
<span>{{ config.label }}</span>
{% if config.input_type == "select" %}
<select name="{{ field }}">
{% for option in config.options %}
<option value="{{ option }}"{% if option == config.suggested %} selected{% endif %}>{{ option }}</option>
{% endfor %}
</select>
{% else %}
<input type="text" name="{{ field }}" value="{{ config.suggested|default:'' }}">
{% endif %}
</label>
{% endfor %}
<button type="submit">确认并继续</button>
<p class="condition-confirm-status" data-condition-confirm-status></p>
</form>
</div>
</article>
{% endif %}
{% else %} {% else %}
<div class="empty-state"> <div class="empty-state">
<p class="eyebrow">审核智能体</p> <p class="eyebrow">审核智能体</p>
@@ -177,6 +215,7 @@
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/" data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/" data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/"
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/" data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
data-regulatory-status-url-template="/api/review-agent/regulatory-review/__batch_id__/status/"
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/" data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
> >
<section class="summary-section upload-section"> <section class="summary-section upload-section">
@@ -221,10 +260,11 @@
<h3>工作流</h3> <h3>工作流</h3>
</div> </div>
<div class="workflow-card-list workflow-batch-carousel" id="workflowCardList" data-active-index="0"> <div class="workflow-card-list workflow-batch-carousel" id="workflowCardList" data-active-index="0">
{% for batch in summary_batches %} {% for batch in workflow_cards %}
<article <article
class="workflow-card{% if forloop.first %} active{% endif %}" class="workflow-card{% if forloop.first %} active{% endif %}"
data-batch-id="{{ batch.pk }}" data-batch-id="{{ batch.id }}"
data-workflow-type="{{ batch.workflow_type }}"
data-workflow-index="{{ forloop.counter0 }}" data-workflow-index="{{ forloop.counter0 }}"
aria-hidden="{% if forloop.first %}false{% else %}true{% endif %}" aria-hidden="{% if forloop.first %}false{% else %}true{% endif %}"
> >
@@ -232,11 +272,31 @@
<strong>{{ batch.batch_no }}</strong> <strong>{{ batch.batch_no }}</strong>
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span> <span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
</header> </header>
{% if batch.risk_label %}
<p class="workflow-risk-summary">{{ batch.risk_label }}</p>
{% endif %}
{% if batch.workflow_type == "regulatory_review" %}
<div class="workflow-card-actions">
<button
type="button"
data-rectification-action="full-review"
data-batch-no="{{ batch.batch_no }}"
>整包复核</button>
<button
type="button"
data-rectification-action="issue-review"
data-batch-no="{{ batch.batch_no }}"
>缺失项复核</button>
</div>
<p class="workflow-record-summary">
通知 {{ batch.notification_count|default:0 }} · 复核记录 {{ batch.review_record_count|default:0 }}
</p>
{% endif %}
{% if batch.error_message %} {% if batch.error_message %}
<p class="workflow-error">{{ batch.error_message }}</p> <p class="workflow-error">{{ batch.error_message }}</p>
{% endif %} {% endif %}
<ol> <ol>
{% for node in batch.node_runs.all %} {% for node in batch.nodes %}
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}"> <li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
<div> <div>
<span>{{ node.node_name }}</span> <span>{{ node.node_name }}</span>
@@ -250,11 +310,11 @@
{% empty %} {% empty %}
<div class="panel-empty">暂无工作流</div> <div class="panel-empty">暂无工作流</div>
{% endfor %} {% endfor %}
{% if summary_batches %} {% if workflow_cards %}
<div class="workflow-batch-controls"> <div class="workflow-batch-controls">
<button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">&lsaquo;</button> <button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">&lsaquo;</button>
<div class="workflow-batch-dots" aria-label="工作流批次"> <div class="workflow-batch-dots" aria-label="工作流批次">
{% for batch in summary_batches %} {% for batch in workflow_cards %}
<button <button
type="button" type="button"
class="workflow-batch-dot{% if forloop.first %} active{% endif %}" class="workflow-batch-dot{% if forloop.first %} active{% endif %}"

View File

@@ -0,0 +1,8 @@
[
{"code": "1", "title": "监管信息", "children": ["章节目录", "申请表", "术语/缩写词列表", "产品列表", "关联文件", "申报前与监管机构的联系情况和沟通记录", "符合性声明"]},
{"code": "2", "title": "综述资料", "children": ["章节目录", "概述", "产品描述", "预期用途", "申报产品上市历史", "其他需说明的内容"]},
{"code": "3", "title": "非临床资料", "children": ["章节目录", "产品风险管理资料", "体外诊断试剂安全和性能基本原则清单", "产品技术要求及检验报告", "分析性能研究", "稳定性研究", "阳性判断值或参考区间研究", "其他资料"]},
{"code": "4", "title": "临床评价资料", "children": ["章节目录", "临床评价资料"]},
{"code": "5", "title": "产品说明书和标签样稿", "children": ["章节目录", "产品说明书", "标签样稿", "其他资料"]},
{"code": "6", "title": "质量管理体系文件", "children": ["综述", "章节目录", "生产制造信息", "质量管理体系程序", "管理职责程序", "资源管理程序", "产品实现程序", "质量管理体系的测量/分析和改进程序", "其他质量体系程序信息", "质量管理体系核查文件"]}
]

View File

@@ -187,6 +187,16 @@ def test_frontend_refreshes_generated_workflow_messages():
assert "data-message-url-template" in script assert "data-message-url-template" in script
def test_frontend_only_scrolls_after_appending_new_messages():
script = open("static/js/app.js", encoding="utf-8").read()
assert "return false;" in script
assert "return true;" in script
assert "var appendedCount = 0;" in script
assert "if (appendConversationMessage(message))" in script
assert "if (appendedCount > 0)" in script
def test_frontend_can_replace_partial_stream_content(): def test_frontend_can_replace_partial_stream_content():
script = open("static/js/app.js", encoding="utf-8").read() script = open("static/js/app.js", encoding="utf-8").read()

View File

@@ -0,0 +1,31 @@
import logging
from review_agent.logging_filters import SuppressWorkflowStatusPollFilter
def test_suppress_workflow_status_poll_filter_hides_status_poll_requests():
record = logging.LogRecord(
name="django.server",
level=logging.INFO,
pathname="",
lineno=1,
msg='"GET /api/review-agent/regulatory-review/7/status/ HTTP/1.1" 200 1660',
args=(),
exc_info=None,
)
assert SuppressWorkflowStatusPollFilter().filter(record) is False
def test_suppress_workflow_status_poll_filter_keeps_other_requests():
record = logging.LogRecord(
name="django.server",
level=logging.INFO,
pathname="",
lineno=1,
msg='"POST /api/review-agent/regulatory-review/7/conditions/ HTTP/1.1" 200 256',
args=(),
exc_info=None,
)
assert SuppressWorkflowStatusPollFilter().filter(record) is True

View File

@@ -0,0 +1,72 @@
import pytest
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
from review_agent.regulatory_review.services.rule_loader import load_rule_file
pytestmark = pytest.mark.django_db
def test_completeness_check_matches_existing_files_and_reports_missing(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-CHECK",
status=FileSummaryBatch.Status.SUCCESS,
)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="产品技术要求.docx",
file_type="docx",
relative_path="产品技术要求.docx",
storage_path="x/product.docx",
)
FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="说明书.docx",
file_type="docx",
relative_path="说明书.docx",
storage_path="x/ifu.docx",
)
findings = run_completeness_check(batch, load_rule_file())
titles = [finding.title for finding in findings]
assert "缺少3.4注册检验报告" in titles
assert "缺少产品技术要求" not in titles
missing = next(finding for finding in findings if finding.rule_code == "registration_test_report")
assert missing.severity == "blocking"
assert missing.category == "completeness"
def test_completeness_check_matches_attachment4_directory_names(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-A4",
status=FileSummaryBatch.Status.SUCCESS,
)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
directory_level="1. 监管信息 / 1.2 申请表",
file_name="注册申请表.pdf",
file_type="pdf",
relative_path="1.监管信息/1.2申请表/注册申请表.pdf",
storage_path="x/app.pdf",
)
findings = run_completeness_check(batch, load_rule_file())
assert not any(finding.rule_code == "attachment4_1_2_application_form" for finding in findings)
missing_qms = next(finding for finding in findings if finding.rule_code == "attachment4_6_quality_system")
assert missing_qms.title == "缺少6质量管理体系文件"
assert missing_qms.severity == "high"
assert missing_qms.evidence["searched_fields"] == ["file_name", "relative_path", "directory_level"]

View File

@@ -0,0 +1,306 @@
import json
import pytest
from django.urls import reverse
from review_agent.models import (
Conversation,
FileSummaryBatch,
FileSummaryItem,
RegulatoryReviewBatch,
WorkflowEvent,
WorkflowNodeRun,
)
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
from review_agent.regulatory_review.workflow import (
create_regulatory_review_batch,
start_regulatory_review_workflow,
)
pytestmark = pytest.mark.django_db
def test_detect_regulatory_condition_candidates_from_summary_items(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="甲胎蛋白检测试剂盒",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="临床评价资料",
file_name="免临床评价资料.docx",
file_type="docx",
relative_path="4.临床评价资料/免临床评价资料.docx",
storage_path="missing.docx",
)
candidates = detect_regulatory_condition_candidates(summary)
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
assert candidates["registration_type"]["suggested"] == "首次注册"
assert candidates["clinical_evaluation_path"]["suggested"] == "免临床"
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
def test_detect_regulatory_condition_prefers_attachment_fields_over_chapter_title(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
application = tmp_path / "application.txt"
application.write_text(
"产品名称:甲胎蛋白检测试剂盒\n型号规格20人份/盒\n预期用途:用于人血清中甲胎蛋白检测\n注册类型:首次注册\n",
encoding="utf-8",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="1. 监管信息 / 1.2 申请表",
file_name="申请表.txt",
file_type="txt",
relative_path="1.监管信息/申请表.txt",
storage_path=str(application),
)
candidates = detect_regulatory_condition_candidates(summary)
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
assert candidates["model_spec"]["suggested"] == "20人份/盒"
assert candidates["intended_use"]["suggested"] == "用于人血清中甲胎蛋白检测"
def test_detect_regulatory_condition_keeps_wrapped_product_name(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
application = tmp_path / "application.txt"
application.write_text(
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
"荧光PCR法\n"
"型号规格24人份/盒\n"
"预期用途:用于呼吸道合胞病毒、肺炎支原体核酸检测\n",
encoding="utf-8",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="1. 监管信息 / 1.2 申请表",
file_name="申请表.txt",
file_type="txt",
relative_path="1.监管信息/申请表.txt",
storage_path=str(application),
)
candidates = detect_regulatory_condition_candidates(summary)
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法"
assert candidates["model_spec"]["suggested"] == "24人份/盒"
def test_detect_regulatory_condition_uses_llm_review_for_better_product_name(
monkeypatch, settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
application = tmp_path / "application.txt"
application.write_text(
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
"型号规格24人份/盒\n",
encoding="utf-8",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="1. 监管信息 / 1.2 申请表",
file_name="申请表.txt",
file_type="txt",
relative_path="1.监管信息/申请表.txt",
storage_path=str(application),
)
monkeypatch.setattr(
"review_agent.regulatory_review.services.llm_review.generate_completion",
lambda messages, temperature=0.0: json.dumps(
{"fields": {"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法"}},
ensure_ascii=False,
),
)
candidates = detect_regulatory_condition_candidates(summary)
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法"
assert candidates["product_name"]["source"] == "llm"
def test_detect_regulatory_condition_infers_fields_from_unlabeled_attachment_text(
settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
standard_list = tmp_path / "standard_list.txt"
standard_list.write_text(
"国家药品监督管理局:\n"
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法产品注册。\n",
encoding="utf-8",
)
product_list = tmp_path / "product_list.txt"
product_list.write_text(
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
"荧光PCR法\n"
"产品的包装规格\n"
"24人份/盒、48人份/盒\n",
encoding="utf-8",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="第1章 监管信息",
file_name="符合标准的清单.txt",
file_type="txt",
relative_path="第1章 监管信息/符合标准的清单.txt",
storage_path=str(standard_list),
)
FileSummaryItem.objects.create(
batch=summary,
file_index=2,
directory_level="第1章 监管信息",
file_name="产品列表.txt",
file_type="txt",
relative_path="第1章 监管信息/产品列表.txt",
storage_path=str(product_list),
)
candidates = detect_regulatory_condition_candidates(summary)
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法"
assert candidates["product_name"]["source"] == "inferred"
assert candidates["model_spec"]["suggested"] == "24人份/盒、48人份/盒"
def test_workflow_pauses_before_rule_scope_until_conditions_confirmed(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="甲胎蛋白检测试剂盒",
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
start_regulatory_review_workflow(batch, async_run=False)
batch.refresh_from_db()
condition_node = WorkflowNodeRun.objects.get(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_code="condition_confirm",
)
rule_scope_node = WorkflowNodeRun.objects.get(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_code="rule_scope",
)
assert batch.status == RegulatoryReviewBatch.Status.WAITING_USER
assert condition_node.status == WorkflowNodeRun.Status.WAITING_USER
assert rule_scope_node.status == WorkflowNodeRun.Status.PENDING
assert batch.condition_json["candidates"]["product_category"]["suggested"] == "体外诊断试剂"
assert WorkflowEvent.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
event_type="waiting_user",
).exists()
def test_confirm_conditions_endpoint_resumes_workflow(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-COND",
status=FileSummaryBatch.Status.SUCCESS,
product_name="甲胎蛋白检测试剂盒",
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
start_regulatory_review_workflow(batch, async_run=False)
client.force_login(user)
response = client.post(
reverse("regulatory_review_confirm_conditions", args=[batch.pk]),
data=json.dumps(
{
"conditions": {
"product_category": "体外诊断试剂",
"registration_type": "首次注册",
"clinical_evaluation_path": "免临床",
"product_name": "甲胎蛋白检测试剂盒",
"model_spec": "卡型",
"intended_use": "用于甲胎蛋白检测",
}
}
),
content_type="application/json",
)
batch.refresh_from_db()
assert response.status_code == 200
assert response.json()["batch"]["status"] == RegulatoryReviewBatch.Status.SUCCESS
assert batch.condition_json["confirmed"] is True
assert batch.condition_json["confirmed_conditions"]["model_spec"] == "卡型"
assert WorkflowNodeRun.objects.get(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_code="condition_confirm",
).status == WorkflowNodeRun.Status.SUCCESS

View File

@@ -0,0 +1,27 @@
from review_agent.regulatory_review.services.consistency_check import run_consistency_check
def test_consistency_check_reports_product_name_mismatch():
document_texts = {
"说明书.docx": "产品名称:甲胎蛋白检测试剂盒\n型号规格20人份/盒\n预期用途定量检测AFP",
"技术要求.docx": "产品名称:乙肝表面抗原检测试剂盒\n型号规格20人份/盒\n预期用途定量检测AFP",
}
findings = run_consistency_check(document_texts)
assert len(findings) == 1
assert findings[0].category == "consistency"
assert "产品名称" in findings[0].title
def test_consistency_check_reports_registration_scope_fields():
document_texts = {
"申请表.docx": "管理类别:第二类\n分类编码6840\n注册类型:首次注册\n临床评价路径:免临床",
"综述资料.docx": "管理类别:第三类\n分类编码6840\n注册类型:首次注册\n临床评价路径:临床试验",
}
findings = run_consistency_check(document_texts)
titles = [finding.title for finding in findings]
assert "管理类别在不同文件中不一致" in titles
assert "临床评价路径在不同文件中不一致" in titles

View File

@@ -0,0 +1,49 @@
import json
import pytest
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileSummaryBatch,
RegulatoryIssue,
RegulatoryReviewBatch,
)
from review_agent.regulatory_review.services.export import export_review_results
pytestmark = pytest.mark.django_db
def test_export_review_results_creates_markdown_excel_and_json(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-EXPORT",
risk_summary={"blocking": 1},
)
RegulatoryIssue.objects.create(
batch=batch,
rule_code="registration_test_report",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.BLOCKING,
title="缺少注册检验报告",
suggestion="请补充注册检验报告并复核。",
)
exports = export_review_results(batch)
assert {export.export_type for export in exports} == {
ExportedSummaryFile.ExportType.MARKDOWN,
ExportedSummaryFile.ExportType.EXCEL,
ExportedSummaryFile.ExportType.JSON,
}
json_export = next(export for export in exports if export.export_type == ExportedSummaryFile.ExportType.JSON)
payload = json.loads(open(json_export.storage_path, encoding="utf-8").read())
assert payload["batch_no"] == "RR-EXPORT"
assert payload["issues"][0]["title"] == "缺少注册检验报告"

View File

@@ -0,0 +1,232 @@
import pytest
from django.urls import reverse
from review_agent.models import (
Conversation,
FileSummaryBatch,
FileSummaryItem,
RegulatoryArtifact,
RegulatoryNotificationRecord,
RegulatoryReviewBatch,
WorkflowNodeRun,
)
pytestmark = pytest.mark.django_db
def test_workspace_renders_regulatory_workflow_card(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
regulatory = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-CARD",
status=RegulatoryReviewBatch.Status.SUCCESS,
risk_summary={"blocking": 1, "high": 1},
)
WorkflowNodeRun.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=regulatory.pk,
node_group="regulatory_review",
node_code="risk_assess",
node_name="风险评估",
status=WorkflowNodeRun.Status.SUCCESS,
progress=100,
)
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
content = response.content.decode("utf-8")
assert "RR-CARD" in content
assert 'data-workflow-type="regulatory_review"' in content
assert "阻断项 1" in content
assert "风险评估" in content
assert "data-regulatory-status-url-template" in content
def test_workspace_renders_condition_confirmation_form(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
regulatory = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-WAIT",
status=RegulatoryReviewBatch.Status.WAITING_USER,
condition_json={
"confirmed": False,
"candidates": {
"product_category": {
"label": "产品类别",
"input_type": "select",
"options": ["体外诊断试剂", "医疗器械", "其他"],
"suggested": "体外诊断试剂",
},
"product_name": {
"label": "产品名称",
"input_type": "text",
"suggested": "甲胎蛋白检测试剂盒",
},
},
},
)
WorkflowNodeRun.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=regulatory.pk,
node_group="condition_confirm",
node_code="condition_confirm",
node_name="适用条件确认",
status=WorkflowNodeRun.Status.WAITING_USER,
progress=50,
)
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
content = response.content.decode("utf-8")
assert "适用条件确认" in content
assert "data-condition-confirm-form" in content
assert "体外诊断试剂" in content
assert "甲胎蛋白检测试剂盒" in content
form_index = content.index("data-condition-confirm-form")
summary_index = content.index('id="summaryPanel"')
assert form_index < summary_index
assert "data-condition-confirm-form" not in content[summary_index:]
def test_workspace_refreshes_incomplete_condition_confirmation_candidates(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
application = tmp_path / "application.txt"
application.write_text(
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法产品注册。",
encoding="utf-8",
)
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="第1章 监管信息",
file_name="符合标准的清单.txt",
file_type="txt",
relative_path="第1章 监管信息/符合标准的清单.txt",
storage_path=str(application),
)
RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-WAIT-EMPTY",
status=RegulatoryReviewBatch.Status.WAITING_USER,
condition_json={
"confirmed": False,
"candidates": {
"product_category": {"label": "产品类别", "input_type": "select", "options": ["其他"], "suggested": "其他"},
"product_name": {"label": "产品名称", "input_type": "text", "suggested": ""},
},
},
)
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
content = response.content.decode("utf-8")
assert "体外诊断试剂" in content
assert "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法" in content
def test_workspace_renders_rectification_actions_and_summaries(client, tmp_path, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
regulatory = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-RECTIFY",
status=RegulatoryReviewBatch.Status.SUCCESS,
)
record_path = tmp_path / "review_record.json"
record_path.write_text('{"items":[{"status":"review_passed"}]}', encoding="utf-8")
RegulatoryArtifact.objects.create(
batch=regulatory,
artifact_type=RegulatoryArtifact.ArtifactType.JSON,
name="review_record.json",
storage_path=str(record_path),
metadata={"artifact": "review_record"},
)
RegulatoryNotificationRecord.objects.create(
batch=regulatory,
channel=RegulatoryNotificationRecord.Channel.MOCK,
target="法规整改负责人",
status=RegulatoryNotificationRecord.Status.SENT,
payload={"title": "缺少申请表"},
)
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
content = response.content.decode("utf-8")
assert "data-rectification-action=\"full-review\"" in content
assert "data-rectification-action=\"issue-review\"" in content
assert "通知 1" in content
assert "复核记录 1" in content
def test_frontend_selects_status_url_by_workflow_type():
script = open("static/js/app.js", encoding="utf-8").read()
assert "workflow_type" in script
assert "data-regulatory-status-url-template" in script
assert "statusUrlForWorkflow" in script
assert "bindConditionConfirmForms" in script
assert "data-condition-confirm-form" in script
assert "ensureConditionConfirmationCard" in script
assert "condition_confirmation" in script
assert "bindRectificationActionButtons" in script
assert "data-rectification-action" in script
def test_frontend_polls_regulatory_workflow_with_explicit_workflow_type():
script = open("static/js/app.js", encoding="utf-8").read()
assert "function startWorkflowPolling(batchId, workflow_type)" in script
assert "startWorkflowPolling(payload.batch_id, payload.workflow_type)" in script
assert 'startWorkflowPolling(batchId, "regulatory_review")' in script
assert 'workflow_type || (card ? card.getAttribute("data-workflow-type") || "file_summary" : "file_summary")' in script
def test_frontend_keeps_single_condition_confirmation_prompt():
script = open("static/js/app.js", encoding="utf-8").read()
assert "data-condition-confirmation-card" in script
assert "removeStaleConditionConfirmationCards" in script
assert '[data-condition-confirmation-card]' in script

View File

@@ -0,0 +1,111 @@
import json
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
def test_review_condition_fields_selects_more_complete_llm_product_name():
def completion(messages, temperature=0.0):
return json.dumps(
{
"fields": {
"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法",
"型号规格": "24人份/盒",
}
},
ensure_ascii=False,
)
result = review_condition_fields(
text="产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n荧光PCR法\n型号规格24人份/盒",
rule_fields={"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒", "型号规格": "24人份/盒"},
file_context="申请表.txt",
completion_func=completion,
)
assert result["selected_fields"]["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法"
assert result["selected_sources"]["产品名称"] == "llm"
assert result["selected_sources"]["型号规格"] == "rule"
def test_review_condition_fields_falls_back_when_llm_returns_chapter_title():
def completion(messages, temperature=0.0):
return json.dumps({"fields": {"产品名称": "第1章 监管信息"}}, ensure_ascii=False)
result = review_condition_fields(
text="产品名称:甲胎蛋白检测试剂盒",
rule_fields={"产品名称": "甲胎蛋白检测试剂盒"},
file_context="申请表.txt",
completion_func=completion,
)
assert result["selected_fields"]["产品名称"] == "甲胎蛋白检测试剂盒"
assert result["selected_sources"]["产品名称"] == "rule"
def test_review_condition_fields_rejects_garbled_llm_product_name():
def completion(messages, temperature=0.0):
return json.dumps({"fields": {"产品名称": "呼吸道合胞病毒、 <20>肺炎支原体核酸检测试剂盒 (荧光PCR法)"}}, ensure_ascii=False)
result = review_condition_fields(
text="呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法",
rule_fields={"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法"},
file_context="产品列表.txt",
completion_func=completion,
)
assert result["selected_fields"]["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法"
assert result["selected_sources"]["产品名称"] == "rule"
def test_review_workflow_payload_handles_timeout_without_raising():
def completion(messages, temperature=0.0):
raise TimeoutError("The read operation timed out")
result = review_workflow_payload(
stage="completeness_check",
payload={"findings": []},
completion_func=completion,
)
assert result["status"] == "failed"
assert result["stage"] == "completeness_check"
assert "timed out" in result["error_message"]
def test_review_workflow_payload_retries_timeout_before_success(settings):
settings.REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = 0
attempts = {"count": 0}
def completion(messages, temperature=0.0):
attempts["count"] += 1
if attempts["count"] < 3:
raise TimeoutError("The read operation timed out")
return json.dumps({"reviewed": True})
result = review_workflow_payload(
stage="completeness_check",
payload={"findings": []},
completion_func=completion,
)
assert attempts["count"] == 3
assert result["status"] == "success"
assert result["result"]["reviewed"] is True
def test_review_workflow_payload_passes_configured_timeout(settings):
settings.REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = 0
settings.REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = 7
observed = {}
def completion(messages, temperature=0.0, timeout=None):
observed["timeout"] = timeout
return json.dumps({"reviewed": True})
review_workflow_payload(
stage="completeness_check",
payload={"findings": []},
completion_func=completion,
)
assert observed["timeout"] == 7

View File

@@ -0,0 +1,137 @@
import pytest
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileSummaryBatch,
Message,
RegulatoryArtifact,
RegulatoryIssue,
RegulatoryNotificationRecord,
RegulatoryReviewBatch,
RegulatoryRuleVersion,
WorkflowEvent,
WorkflowNodeRun,
)
pytestmark = pytest.mark.django_db
def test_regulatory_models_store_batch_issue_artifact_and_notification(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="法规核查")
trigger = Message.objects.create(
conversation=conversation,
role=Message.Role.USER,
content="请做NMPA法规核查",
)
summary_batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-READY",
status=FileSummaryBatch.Status.SUCCESS,
)
rule_version = RegulatoryRuleVersion.objects.create(
code="nmpa_ivd_registration_v1",
name="NMPA IVD 注册资料 Demo 规则",
yaml_path="review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml",
yaml_hash="abc123",
rag_collection="nmpa_ivd_registration_v1",
rag_index_version="idx-1",
rag_index_hash="hash-1",
status=RegulatoryRuleVersion.Status.ACTIVE,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger,
source_summary_batch=summary_batch,
rule_version=rule_version,
batch_no="RR-202606070001-abcdef",
)
issue = RegulatoryIssue.objects.create(
batch=batch,
rule_code="registration_test_report",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.BLOCKING,
title="缺少注册检验报告",
suggestion="请补充注册检验报告并复核。",
evidence={"matched_files": []},
citations=[{"source": "法规.doc", "text": "注册检验报告"}],
)
artifact = RegulatoryArtifact.objects.create(
batch=batch,
artifact_type=RegulatoryArtifact.ArtifactType.JSON,
name="结果包",
storage_path="media/regulatory_review/result.json",
content_hash="hash",
)
notification = RegulatoryNotificationRecord.objects.create(
batch=batch,
channel=RegulatoryNotificationRecord.Channel.MOCK,
target="todo-plan",
payload={"issue_id": issue.pk},
)
assert batch.status == RegulatoryReviewBatch.Status.PENDING
assert batch.source_summary_batch == summary_batch
assert issue.status == RegulatoryIssue.Status.OPEN
assert artifact.artifact_type == RegulatoryArtifact.ArtifactType.JSON
assert notification.status == RegulatoryNotificationRecord.Status.PENDING
def test_generic_workflow_fields_support_file_summary_and_regulatory_batches(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary_batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-GENERIC",
)
regulatory_batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary_batch,
batch_no="RR-GENERIC",
)
file_node = WorkflowNodeRun.objects.create(
batch=summary_batch,
workflow_type="file_summary",
workflow_batch_id=summary_batch.pk,
node_group="file_summary",
node_code="inventory",
node_name="文件扫描",
)
regulatory_node = WorkflowNodeRun.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=regulatory_batch.pk,
node_group="regulatory_review",
node_code="prepare",
node_name="准备",
)
event = WorkflowEvent.objects.create(
batch=summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=regulatory_batch.pk,
conversation=conversation,
event_type="workflow_created",
payload={"batch_no": regulatory_batch.batch_no},
)
exported = ExportedSummaryFile.objects.create(
batch=summary_batch,
workflow_type="regulatory_review",
workflow_batch_id=regulatory_batch.pk,
export_category="result_package",
export_type=ExportedSummaryFile.ExportType.JSON,
file_name="result.json",
storage_path="media/regulatory_review/result.json",
)
assert file_node.batch == summary_batch
assert regulatory_node.batch is None
assert regulatory_node.workflow_batch_id == regulatory_batch.pk
assert event.conversation == conversation
assert exported.export_type == ExportedSummaryFile.ExportType.JSON

View File

@@ -0,0 +1,79 @@
import pytest
from review_agent.models import (
Conversation,
FileSummaryBatch,
RegulatoryIssue,
RegulatoryNotificationRecord,
RegulatoryReviewBatch,
)
from review_agent.regulatory_review.services.export import build_markdown_report, build_result_payload
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
pytestmark = pytest.mark.django_db
def test_create_mock_notifications_for_medium_and_above(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-NOTIFY",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-NOTIFY",
)
high = RegulatoryIssue.objects.create(
batch=batch,
rule_code="attachment4_1_2_application_form",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.HIGH,
title="缺少申请表",
)
RegulatoryIssue.objects.create(
batch=batch,
rule_code="info",
category=RegulatoryIssue.Category.RAG,
severity=RegulatoryIssue.Severity.INFO,
title="提示项",
)
records = create_mock_notifications(batch)
assert len(records) == 1
assert records[0].channel == RegulatoryNotificationRecord.Channel.MOCK
assert records[0].status == RegulatoryNotificationRecord.Status.SENT
assert records[0].payload["issue_id"] == high.pk
def test_notification_records_enter_reports(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-NOTIFY",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-NOTIFY",
)
RegulatoryNotificationRecord.objects.create(
batch=batch,
channel=RegulatoryNotificationRecord.Channel.MOCK,
target="法规整改负责人",
status=RegulatoryNotificationRecord.Status.SENT,
payload={"title": "缺少申请表", "severity": "high"},
)
assert "通知记录" in build_markdown_report(batch)
assert build_result_payload(batch)["notifications"][0]["channel"] == "mock"

View File

@@ -0,0 +1,88 @@
import pytest
from review_agent.regulatory_review.services.rag_citation import (
RagIndexUnavailable,
retrieve_citations,
)
from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
from review_agent.regulatory_review.services.rag_index import chunk_text
from review_agent.regulatory_review.services.rag_index import collect_source_chunks
def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
calls = []
class FakeResponse:
def raise_for_status(self):
return None
def json(self):
return {"data": [{"embedding": [0.1, 0.2]}, {"embedding": [0.3, 0.4]}]}
def fake_post(url, headers, json, timeout):
calls.append({"url": url, "headers": headers, "json": json, "timeout": timeout})
return FakeResponse()
monkeypatch.setattr("review_agent.regulatory_review.services.rag_embedding.httpx.post", fake_post)
provider = SiliconFlowEmbeddingProvider(
api_key="secret",
base_url="https://api.siliconflow.cn/v1",
model="Qwen/Qwen3-Embedding-4B",
dimensions=1024,
)
assert provider.embed(["法规依据", "注册检验报告"]) == [[0.1, 0.2], [0.3, 0.4]]
assert calls[0]["url"] == "https://api.siliconflow.cn/v1/embeddings"
assert calls[0]["headers"]["Authorization"] == "Bearer secret"
assert calls[0]["json"]["model"] == "Qwen/Qwen3-Embedding-4B"
assert calls[0]["json"]["dimensions"] == 1024
def test_chunk_text_preserves_source_metadata():
chunks = chunk_text(
"第一段法规内容。\n" * 20,
source="法规.doc",
chunk_size=30,
overlap=5,
)
assert len(chunks) > 1
assert chunks[0].metadata["source"] == "法规.doc"
assert chunks[0].text
def test_retrieve_citations_returns_placeholder_when_no_hits():
class EmptyCollection:
def query(self, query_embeddings, n_results):
return {"documents": [[]], "metadatas": [[]], "distances": [[]]}
citations = retrieve_citations(
"注册检验报告",
embedding_provider=lambda texts: [[0.1, 0.2]],
collection=EmptyCollection(),
)
assert citations[0]["source"] == "原文依据待补充"
def test_retrieve_citations_raises_when_index_missing(settings, tmp_path):
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing"
with pytest.raises(RagIndexUnavailable):
retrieve_citations("注册检验报告", embedding_provider=lambda texts: [[0.1]])
def test_collect_source_chunks_requires_attachment4_extraction(monkeypatch, tmp_path):
source_dir = tmp_path / "sources"
source_dir.mkdir()
attachment4 = source_dir / "附件 4 体外诊断试剂注册申报资料要求及说明.doc"
attachment4.write_bytes(b"legacy-doc")
def fail_extract(path):
raise RuntimeError("无法通过 LibreOffice 转换法规 .doc 材料")
monkeypatch.setattr("review_agent.regulatory_review.services.rag_index.extract_text_from_path", fail_extract)
with pytest.raises(RuntimeError, match="附件 4"):
collect_source_chunks(source_dir)

View File

@@ -0,0 +1,133 @@
import json
import pytest
from django.urls import reverse
from review_agent.models import (
Conversation,
FileSummaryBatch,
FileSummaryItem,
RegulatoryArtifact,
RegulatoryIssue,
RegulatoryReviewBatch,
)
from review_agent.regulatory_review.services.export import build_markdown_report, build_result_payload
from review_agent.regulatory_review.services.rectification_review import review_missing_issues
pytestmark = pytest.mark.django_db
def _make_review_batch(user):
conversation = Conversation.objects.create(user=user, title="会话")
original_summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-ORIGINAL",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=original_summary,
batch_no="RR-ORIGINAL",
status=RegulatoryReviewBatch.Status.SUCCESS,
)
return conversation, original_summary, batch
def test_start_full_package_review_creates_new_traceable_batch(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation, _original_summary, original_batch = _make_review_batch(user)
new_summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-NEW",
status=FileSummaryBatch.Status.SUCCESS,
)
client.force_login(user)
response = client.post(
reverse("regulatory_review_start_full_review", args=[original_batch.pk]),
data=json.dumps({"file_summary_batch_id": new_summary.pk}),
content_type="application/json",
)
assert response.status_code == 200
new_batch = RegulatoryReviewBatch.objects.exclude(pk=original_batch.pk).get()
assert new_batch.source_summary_batch == new_summary
assert new_batch.condition_json["source_review_batch_id"] == original_batch.pk
assert new_batch.condition_json["regenerated_from"]["batch_no"] == "RR-ORIGINAL"
def test_review_missing_issues_updates_status_and_writes_record(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation, _original_summary, batch = _make_review_batch(user)
issue = RegulatoryIssue.objects.create(
batch=batch,
rule_code="attachment4_5_3_label",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.HIGH,
title="缺少标签样稿",
suggestion="请补充标签样稿。",
)
supplement = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-SUPPLEMENT",
status=FileSummaryBatch.Status.SUCCESS,
)
FileSummaryItem.objects.create(
batch=supplement,
file_index=1,
directory_level="5. 产品说明书和标签样稿",
file_name="标签样稿.pdf",
file_type="pdf",
relative_path="5.3 标签样稿/标签样稿.pdf",
storage_path="x/label.pdf",
)
record = review_missing_issues(batch=batch, issue_ids=[issue.pk], file_summary_batch=supplement)
issue.refresh_from_db()
assert issue.status == RegulatoryIssue.Status.REVIEW_PASSED
assert record["items"][0]["status"] == "review_passed"
assert RegulatoryArtifact.objects.filter(batch=batch, name__startswith="review_record").exists()
def test_missing_issue_review_endpoint_and_report_output(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation, _original_summary, batch = _make_review_batch(user)
issue = RegulatoryIssue.objects.create(
batch=batch,
rule_code="attachment4_6_quality_system",
category=RegulatoryIssue.Category.COMPLETENESS,
severity=RegulatoryIssue.Severity.HIGH,
title="缺少质量管理体系文件",
suggestion="请补充质量管理体系文件。",
)
supplement = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-SUPPLEMENT",
status=FileSummaryBatch.Status.SUCCESS,
)
client.force_login(user)
response = client.post(
reverse("regulatory_review_review_issues", args=[batch.pk]),
data=json.dumps({"issue_ids": [issue.pk], "file_summary_batch_id": supplement.pk}),
content_type="application/json",
)
issue.refresh_from_db()
payload = build_result_payload(batch)
markdown = build_markdown_report(batch)
assert response.status_code == 200
assert issue.status == RegulatoryIssue.Status.REVIEW_FAILED
assert payload["review_records"][0]["file_summary_batch_no"] == "FS-SUPPLEMENT"
assert "复核记录" in markdown

View File

@@ -0,0 +1,35 @@
import pytest
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch
from review_agent.regulatory_review.schemas import Finding
from review_agent.regulatory_review.services.risk_assess import persist_findings
pytestmark = pytest.mark.django_db
def test_persist_findings_deduplicates_and_updates_risk_summary(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-RISK",
)
finding = Finding(
rule_code="registration_test_report",
category="completeness",
severity="blocking",
title="缺少注册检验报告",
suggestion="请补充注册检验报告并复核。",
citations=[{"source": "法规.doc", "text": "注册检验报告"}],
)
issues = persist_findings(batch, [finding, finding])
batch.refresh_from_db()
assert len(issues) == 1
assert RegulatoryIssue.objects.count() == 1
assert batch.risk_summary["blocking"] == 1

View File

@@ -0,0 +1,93 @@
from pathlib import Path
import json
import pytest
from django.core.management import call_command
from review_agent.models import RegulatoryRuleVersion
from review_agent.regulatory_review.services.rule_loader import (
DEFAULT_RULE_CODE,
check_rule_version,
compute_file_sha256,
load_rule_file,
)
pytestmark = pytest.mark.django_db
def test_load_rule_file_reads_demo_requirements():
rule_set = load_rule_file()
codes = {item["code"] for item in rule_set["requirements"]}
assert rule_set["code"] == DEFAULT_RULE_CODE
assert "product_technical_requirements" in codes
assert "instructions_for_use" in codes
assert "registration_test_report" in codes
assert "clinical_evaluation" in codes
assert "essential_principles_checklist" in codes
def test_load_rule_file_covers_attachment4_outline():
rule_set = load_rule_file()
requirements = rule_set["requirements"]
outline = json.loads(Path("tests/fixtures/regulatory/attachment4_outline.json").read_text(encoding="utf-8"))
for chapter in outline:
chapter_rule = next(
item for item in requirements if item["title"] == chapter["title"] and item.get("attachment4_code") == chapter["code"]
)
assert chapter_rule["attachment4_code"] == chapter["code"]
assert chapter_rule["severity"] == "high"
assert chapter_rule["citation_query"]
for child in chapter["children"]:
child_rule = next(
item
for item in requirements
if item["title"] == child and str(item.get("attachment4_code", "")).startswith(f"{chapter['code']}.")
)
assert child_rule["rule_id"]
assert child_rule["file_keywords"]
assert child_rule["severity"] in {"blocking", "high", "medium"}
assert child_rule["citation_query"]
def test_compute_file_sha256_changes_when_file_changes(tmp_path):
path = tmp_path / "rule.yaml"
path.write_text("code: demo\n", encoding="utf-8")
first = compute_file_sha256(path)
path.write_text("code: demo2\n", encoding="utf-8")
assert compute_file_sha256(path) != first
def test_check_rule_version_creates_missing_db_record():
result = check_rule_version(update_missing=True)
record = RegulatoryRuleVersion.objects.get(code=DEFAULT_RULE_CODE)
assert result.status == "created"
assert result.current_hash == record.yaml_hash
assert record.rag_collection == "nmpa_ivd_registration_v1"
def test_check_rule_version_reports_hash_mismatch_without_overwriting():
created = check_rule_version(update_missing=True)
record = RegulatoryRuleVersion.objects.get(code=DEFAULT_RULE_CODE)
record.yaml_hash = "stale"
record.save(update_fields=["yaml_hash"])
result = check_rule_version(update_missing=False)
record.refresh_from_db()
assert result.status == "mismatch"
assert result.database_hash == "stale"
assert result.current_hash == created.current_hash
assert record.yaml_hash == "stale"
def test_regulatory_rules_check_command_reports_status(capsys):
call_command("regulatory_rules_check")
captured = capsys.readouterr()
assert DEFAULT_RULE_CODE in captured.out
assert "created" in captured.out or "ok" in captured.out

View File

@@ -0,0 +1,26 @@
import pytest
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryReviewBatch
from review_agent.regulatory_review.storage import save_artifact
pytestmark = pytest.mark.django_db
def test_save_artifact_writes_file_and_records_hash(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=user,
source_summary_batch=summary,
batch_no="RR-ART",
)
artifact = save_artifact(batch, name="raw.json", content='{"ok": true}', artifact_type="json")
assert artifact.content_hash
assert artifact.storage_path.endswith("raw.json")
assert (tmp_path / "regulatory_review" / "work" / "RR-ART" / "raw.json").exists()

View File

@@ -0,0 +1,26 @@
from review_agent.regulatory_review.services.rule_loader import load_rule_file
from review_agent.regulatory_review.services.structure_check import run_structure_check
def test_structure_check_reports_missing_instruction_sections():
document_texts = {
"说明书.docx": "产品名称:甲胎蛋白检测试剂盒\n样本要求:血清样本\n有效期12个月"
}
findings = run_structure_check(document_texts, load_rule_file())
assert any(finding.rule_code == "instructions_for_use:储存条件" for finding in findings)
assert all("样本要求" not in finding.title for finding in findings)
def test_structure_check_reports_missing_attachment4_outline_heading():
document_texts = {
"申报资料目录.txt": "1. 监管信息\n1.2 申请表\n2. 综述资料\n3. 非临床资料\n"
}
findings = run_structure_check(document_texts, load_rule_file())
missing = next(finding for finding in findings if finding.rule_code == "attachment4_4_clinical_evaluation")
assert missing.category == "structure"
assert missing.title == "申报资料目录缺少4临床评价资料章节"
assert missing.evidence["expected_title"] == "临床评价资料"

View File

@@ -0,0 +1,39 @@
from pathlib import Path
from review_agent.regulatory_review.services.text_extract import extract_text
def test_extract_text_reads_plain_text(tmp_path):
path = tmp_path / "说明书.txt"
path.write_text("产品名称:甲胎蛋白检测试剂盒\n储存条件2-8℃", encoding="utf-8")
result = extract_text(path)
assert "甲胎蛋白" in result.text
assert result.status == "success"
assert result.content_hash
def test_extract_text_keeps_wrapped_product_name(tmp_path):
path = tmp_path / "申请表.txt"
path.write_text(
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
"荧光PCR法\n"
"型号规格24人份/盒\n",
encoding="utf-8",
)
result = extract_text(path)
assert result.field_candidates["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 荧光PCR法"
assert result.field_candidates["型号规格"] == "24人份/盒"
def test_extract_text_reports_unsupported_file(tmp_path):
path = tmp_path / "image.png"
path.write_bytes(b"png")
result = extract_text(path)
assert result.status == "unsupported"
assert result.text == ""

View File

@@ -0,0 +1,136 @@
import pytest
from django.urls import reverse
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
pytestmark = pytest.mark.django_db
def test_regulatory_batch_status_requires_owner(client, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=owner,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=owner,
source_summary_batch=summary,
batch_no="RR-STATUS",
)
WorkflowNodeRun.objects.create(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_group="regulatory_review",
node_code="prepare",
node_name="准备",
progress=50,
)
client.force_login(other)
denied = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
assert denied.status_code == 404
client.force_login(owner)
allowed = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
assert allowed.status_code == 200
payload = allowed.json()
assert payload["batch"]["workflow_type"] == "regulatory_review"
assert payload["batch"]["batch_no"] == "RR-STATUS"
assert payload["nodes"][0]["node_code"] == "prepare"
def test_regulatory_batch_status_exposes_condition_confirmation(client, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=owner,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=owner,
source_summary_batch=summary,
batch_no="RR-WAIT",
status=RegulatoryReviewBatch.Status.WAITING_USER,
condition_json={
"confirmed": False,
"candidates": {
"product_category": {
"label": "产品类别",
"input_type": "select",
"options": ["体外诊断试剂", "医疗器械", "其他"],
"suggested": "体外诊断试剂",
}
},
},
)
client.force_login(owner)
response = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
payload = response.json()
assert payload["batch"]["status"] == RegulatoryReviewBatch.Status.WAITING_USER
assert payload["condition_confirmation"]["batch_id"] == batch.pk
assert payload["condition_confirmation"]["candidates"]["product_category"]["suggested"] == "体外诊断试剂"
def test_regulatory_batch_status_refreshes_incomplete_condition_candidates(
client, settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
owner = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=owner,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
product_name="第1章 监管信息",
)
application = tmp_path / "application.txt"
application.write_text(
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法产品注册。",
encoding="utf-8",
)
from review_agent.models import FileSummaryItem
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
directory_level="第1章 监管信息",
file_name="符合标准的清单.txt",
file_type="txt",
relative_path="第1章 监管信息/符合标准的清单.txt",
storage_path=str(application),
)
batch = RegulatoryReviewBatch.objects.create(
conversation=conversation,
user=owner,
source_summary_batch=summary,
batch_no="RR-WAIT-EMPTY",
status=RegulatoryReviewBatch.Status.WAITING_USER,
condition_json={
"confirmed": False,
"candidates": {
"product_category": {"suggested": "其他"},
"product_name": {"suggested": ""},
},
},
)
client.force_login(owner)
response = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
payload = response.json()
candidates = payload["condition_confirmation"]["candidates"]
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒荧光PCR法"

View File

@@ -0,0 +1,502 @@
import logging
import pytest
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileAttachment,
FileSummaryBatch,
FileSummaryItem,
Message,
RegulatoryIssue,
RegulatoryArtifact,
RegulatoryReviewBatch,
WorkflowEvent,
WorkflowNodeRun,
)
from review_agent.regulatory_review.workflow import (
NODE_DEFINITIONS,
RegulatoryWorkflowExecutor,
create_regulatory_review_batch,
find_latest_successful_summary_batch,
start_regulatory_review_workflow,
)
from review_agent.services import stream_message
from review_agent.skill_router import SkillRoute, route_message_intent
pytestmark = pytest.mark.django_db
def test_rule_router_starts_regulatory_review_for_nmpa_keywords(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
monkeypatch.setattr(
"review_agent.skill_router._route_with_llm",
lambda conversation, content, attachments: (_ for _ in ()).throw(ValueError("fallback")),
)
route = route_message_intent(conversation, "请做NMPA核查和风险预警")
assert route.action == "regulatory_review"
assert route.workflow_type == "regulatory_review"
assert route.starts_regulatory_review
def test_find_latest_successful_summary_batch_ignores_failed_batches(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
success = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-FAILED",
status=FileSummaryBatch.Status.FAILED,
)
assert find_latest_successful_summary_batch(conversation) == success
def test_create_regulatory_review_batch_initializes_nodes(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="法规核查")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
trigger_message=message,
source_summary_batch=summary,
)
assert batch.status == RegulatoryReviewBatch.Status.PENDING
assert WorkflowNodeRun.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).count() == len(NODE_DEFINITIONS)
assert WorkflowEvent.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
event_type="workflow_created",
).exists()
def test_start_regulatory_review_workflow_runs_synchronously(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
start_regulatory_review_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
assert WorkflowEvent.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
event_type="workflow_completed",
).exists()
def test_workflow_continues_when_llm_review_times_out(monkeypatch, settings, django_user_model):
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
monkeypatch.setattr(
"review_agent.regulatory_review.services.llm_review.generate_completion",
lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("The read operation timed out")),
)
start_regulatory_review_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
assert batch.error_message == ""
def test_regulatory_workflow_logs_node_and_method_details(caplog, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
with caplog.at_level(logging.INFO, logger="review_agent.regulatory_review.workflow"):
start_regulatory_review_workflow(batch, async_run=False)
messages = [record.getMessage() for record in caplog.records]
assert any("法规核查工作流开始" in message and batch.batch_no in message for message in messages)
assert any("节点开始" in message and "完整性核查" in message for message in messages)
assert any("方法执行" in message and "run_completeness_check" in message for message in messages)
assert any("节点完成" in message and "完整性核查" in message for message in messages)
def test_stream_message_prompts_for_summary_when_missing(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
monkeypatch.setattr(
"review_agent.services.route_message_intent",
lambda conversation, content: SkillRoute(
action="regulatory_review",
workflow_type="regulatory_review",
confidence=0.9,
),
)
frames = list(stream_message(conversation, "请做法规核查"))
joined = "".join(frames)
assert "请先在当前对话右侧上传需要核查的文件或压缩包" in joined
assert "我会先自动汇总再继续法规核查" in joined
assert not RegulatoryReviewBatch.objects.exists()
def test_stream_message_auto_runs_summary_before_regulatory_review(
monkeypatch, settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
attachment_path = tmp_path / "application.txt"
attachment_path.write_text("产品名称:甲胎蛋白检测试剂盒", encoding="utf-8")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="application.txt",
storage_path=str(attachment_path),
file_size=attachment_path.stat().st_size,
is_active=True,
)
monkeypatch.setattr(
"review_agent.services.route_message_intent",
lambda conversation, content: SkillRoute(
action="regulatory_review",
workflow_type="regulatory_review",
confidence=0.9,
),
)
def finish_summary(batch, async_run=True):
batch.status = FileSummaryBatch.Status.SUCCESS
batch.save(update_fields=["status"])
monkeypatch.setattr("review_agent.services.start_file_summary_workflow", finish_summary)
frames = list(stream_message(conversation, "进行第一章NMPA 法规核查"))
joined = "".join(frames)
assert "\"workflow_type\": \"file_summary\"" in joined
assert "\"workflow_type\": \"regulatory_review\"" in joined
assert "已先启动文件目录与页数自动汇总工作流" in joined
assert FileSummaryBatch.objects.filter(conversation=conversation, status=FileSummaryBatch.Status.SUCCESS).exists()
regulatory = RegulatoryReviewBatch.objects.get(conversation=conversation)
assert regulatory.condition_json["rule_scope"]["attachment4_chapter"] == "1"
def test_stream_message_starts_regulatory_workflow(monkeypatch, settings, django_user_model):
settings.REGULATORY_REVIEW_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
monkeypatch.setattr(
"review_agent.services.route_message_intent",
lambda conversation, content: SkillRoute(
action="regulatory_review",
workflow_type="regulatory_review",
confidence=0.9,
),
)
frames = list(stream_message(conversation, "请做法规核查"))
joined = "".join(frames)
assert "workflow_started" in joined
assert "\"workflow_type\": \"regulatory_review\"" in joined
assert RegulatoryReviewBatch.objects.filter(conversation=conversation).exists()
def test_stream_message_records_attachment4_chapter_scope(monkeypatch, settings, django_user_model):
settings.REGULATORY_REVIEW_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
monkeypatch.setattr(
"review_agent.services.route_message_intent",
lambda conversation, content: SkillRoute(
action="regulatory_review",
workflow_type="regulatory_review",
confidence=0.9,
),
)
list(stream_message(conversation, "请做第一章 NMPA 法规核查"))
batch = RegulatoryReviewBatch.objects.get(conversation=conversation)
assert batch.condition_json["rule_scope"]["attachment4_chapter"] == "1"
assert batch.condition_json["rule_scope"]["label"] == "第1章 监管信息"
def test_workflow_chapter_scope_only_checks_selected_attachment4_chapter(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {
"confirmed": True,
"confirmed_conditions": {"product_category": "体外诊断试剂"},
"rule_scope": {"attachment4_chapter": "1", "label": "第1章 监管信息"},
}
batch.save(update_fields=["condition_json"])
start_regulatory_review_workflow(batch, async_run=False)
issue_codes = list(RegulatoryIssue.objects.filter(batch=batch).values_list("rule_code", flat=True))
assert issue_codes
assert all(code.startswith("attachment4_1") for code in issue_codes)
assert not any(code.startswith("attachment4_2") for code in issue_codes)
def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
ifu_path = tmp_path / "ifu.txt"
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n样本要求:血清\n有效期12个月", encoding="utf-8")
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
file_name="说明书.txt",
file_type="txt",
relative_path="说明书.txt",
storage_path=str(ifu_path),
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
start_regulatory_review_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
assert RegulatoryIssue.objects.filter(batch=batch, severity="blocking").exists()
assert ExportedSummaryFile.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
).count() == 3
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
def test_workflow_records_llm_review_artifacts_for_review_nodes(
monkeypatch, settings, tmp_path, django_user_model
):
settings.MEDIA_ROOT = tmp_path
settings.REGULATORY_REVIEW_ASYNC = False
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
ifu_path = tmp_path / "ifu.txt"
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n型号规格20人份/盒", encoding="utf-8")
FileSummaryItem.objects.create(
batch=summary,
file_index=1,
file_name="说明书.txt",
file_type="txt",
relative_path="说明书.txt",
storage_path=str(ifu_path),
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
batch.save(update_fields=["condition_json"])
monkeypatch.setattr(
"review_agent.regulatory_review.workflow.review_workflow_payload",
lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""},
)
start_regulatory_review_workflow(batch, async_run=False)
artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True))
assert "llm_review_completeness_check.json" in artifact_names
assert "llm_review_text_extract.json" in artifact_names
assert "llm_review_structure_check.json" in artifact_names
assert "llm_review_consistency_check.json" in artifact_names
assert "llm_review_risk_assess.json" in artifact_names
def test_workflow_progress_uses_processed_file_counts(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
for index, name in enumerate(["注册信息.txt", "说明书.txt", "综述.txt"], start=1):
path = tmp_path / name
path.write_text(f"产品名称:甲胎蛋白检测试剂盒\n文件:{name}", encoding="utf-8")
FileSummaryItem.objects.create(
batch=summary,
file_index=index,
file_name=name,
file_type="txt",
relative_path=name,
storage_path=str(path),
)
batch = create_regulatory_review_batch(
conversation=conversation,
user=user,
source_summary_batch=summary,
)
node = WorkflowNodeRun.objects.get(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
node_code="text_extract",
)
executor = RegulatoryWorkflowExecutor(batch)
texts = executor._extract_source_texts(node)
node.refresh_from_db()
assert len(texts) == 3
assert node.progress == 95
assert "文本抽取 3/3" in node.message
assert "综述.txt" in node.message
assert WorkflowEvent.objects.filter(
workflow_type="regulatory_review",
workflow_batch_id=batch.pk,
event_type="node_progress",
payload__node_code="text_extract",
payload__processed=3,
payload__total=3,
).exists()
def test_review_services_emit_actual_workload_progress_callbacks(django_user_model):
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
from review_agent.regulatory_review.services.consistency_check import FIELDS, run_consistency_check
from review_agent.regulatory_review.services.structure_check import run_structure_check
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
summary = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-OK",
status=FileSummaryBatch.Status.SUCCESS,
)
rule_set = {
"requirements": [
{"code": "r1", "title": "注册信息", "type": "required", "file_keywords": ["注册信息"]},
{"code": "r2", "title": "说明书", "type": "required", "file_keywords": ["说明书"]},
]
}
completeness_updates = []
structure_updates = []
consistency_updates = []
run_completeness_check(summary, rule_set, progress_callback=completeness_updates.append)
run_structure_check({"注册信息.txt": "注册信息"}, rule_set, progress_callback=structure_updates.append)
run_consistency_check({"注册信息.txt": "产品名称A"}, progress_callback=consistency_updates.append)
assert completeness_updates[-1]["processed"] == 2
assert completeness_updates[-1]["total"] == 2
assert completeness_updates[-1]["label"] == "说明书"
assert structure_updates[-1]["processed"] == 2
assert structure_updates[-1]["total"] == 2
assert consistency_updates[-1]["processed"] == len(FIELDS)
assert consistency_updates[-1]["total"] == len(FIELDS)