Compare commits
29 Commits
f179749cfb
...
56225f40d9
| Author | SHA1 | Date | |
|---|---|---|---|
| 56225f40d9 | |||
| 3e8720e521 | |||
| 32d258bb75 | |||
| 0f9fb980f2 | |||
| 9e27c4c684 | |||
| 1b4a10b5ba | |||
| 911e5378e8 | |||
| 8f16675a92 | |||
| 945669b9c2 | |||
| a34684e490 | |||
| 72f18167c5 | |||
| b8d711729d | |||
| f46d9c5be6 | |||
| 462d3ec5f5 | |||
| 12b476a8ef | |||
| 48d94884b9 | |||
| 4e46f27c28 | |||
| d39e3fe2d5 | |||
| d88d642f6a | |||
| 1bdc7322cf | |||
| bbd2d3532a | |||
| bd805203f1 | |||
| 4c28466fe4 | |||
| ec89e62661 | |||
| 44d31d2a14 | |||
| 26490f7c46 | |||
| 2a4dd6cfab | |||
| f52dcc197d | |||
| 665403735a |
@@ -105,13 +105,40 @@ LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
|
|||||||
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
|
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||||
LLM_MODEL = os.environ.get("LLM_MODEL", "")
|
LLM_MODEL = os.environ.get("LLM_MODEL", "")
|
||||||
|
|
||||||
|
REGULATORY_RAG_PROVIDER = os.environ.get("REGULATORY_RAG_PROVIDER", "siliconflow")
|
||||||
|
REGULATORY_RAG_CHROMA_PATH = os.environ.get(
|
||||||
|
"REGULATORY_RAG_CHROMA_PATH",
|
||||||
|
str(MEDIA_ROOT / "regulatory_review" / "rag" / "chroma"),
|
||||||
|
)
|
||||||
|
REGULATORY_RAG_COLLECTION = os.environ.get(
|
||||||
|
"REGULATORY_RAG_COLLECTION",
|
||||||
|
"nmpa_ivd_registration_v1",
|
||||||
|
)
|
||||||
|
REGULATORY_REVIEW_ASYNC = os.environ.get("REGULATORY_REVIEW_ASYNC", "true").lower() == "true"
|
||||||
|
REGULATORY_LLM_REVIEW_MAX_ATTEMPTS = int(os.environ.get("REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", "3"))
|
||||||
|
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
|
||||||
|
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
|
||||||
|
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||||
|
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", "")
|
||||||
|
SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
|
||||||
|
"SILICONFLOW_EMBEDDING_MODEL",
|
||||||
|
"Qwen/Qwen3-Embedding-4B",
|
||||||
|
)
|
||||||
|
SILICONFLOW_EMBEDDING_DIMENSIONS = int(os.environ.get("SILICONFLOW_EMBEDDING_DIMENSIONS", "1024"))
|
||||||
|
|
||||||
LOGGING = {
|
LOGGING = {
|
||||||
"version": 1,
|
"version": 1,
|
||||||
"disable_existing_loggers": False,
|
"disable_existing_loggers": False,
|
||||||
|
"filters": {
|
||||||
|
"suppress_workflow_status_poll": {
|
||||||
|
"()": "review_agent.logging_filters.SuppressWorkflowStatusPollFilter",
|
||||||
|
},
|
||||||
|
},
|
||||||
"handlers": {
|
"handlers": {
|
||||||
"console": {
|
"console": {
|
||||||
"class": "logging.StreamHandler",
|
"class": "logging.StreamHandler",
|
||||||
"formatter": "verbose",
|
"formatter": "verbose",
|
||||||
|
"filters": ["suppress_workflow_status_poll"],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"formatters": {
|
"formatters": {
|
||||||
@@ -125,5 +152,10 @@ LOGGING = {
|
|||||||
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
|
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
|
||||||
"propagate": True,
|
"propagate": True,
|
||||||
},
|
},
|
||||||
|
"django.server": {
|
||||||
|
"handlers": ["console"],
|
||||||
|
"level": "INFO",
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
BIN
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
Normal file
BIN
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
Normal file
Binary file not shown.
394
docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md
Normal file
394
docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表需求分析
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始材料 | docs/原始材料/【模拟题二】试剂盒临床注册文件准备与审核Agent.docx |
|
||||||
|
| 法规模板来源 | docs/原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 |
|
||||||
|
| 功能主题 | 从产品文件中提取关键信息并自动填写至指定申报模板 |
|
||||||
|
| 分析日期 | 2026-06-07 |
|
||||||
|
| 分析版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、需求背景
|
||||||
|
|
||||||
|
试剂盒及体外诊断试剂注册申报过程中,注册人员需要将同一批产品关键信息重复填写到注册证格式文件、变更注册或备案文件、安全和性能基本原则清单等申报材料中。人工复制粘贴容易出现字段遗漏、表述不一致、来源不可追溯和模板误改等问题。
|
||||||
|
|
||||||
|
原始任务中的第 3 条能力要求系统能够“从产品文件中提取关键信息并自动填写至目标文件”。本功能目标是:系统基于用户上传的产品说明书、产品技术要求、检测报告、性能研究资料等文件,自动抽取产品名称、检测靶标、适用范围、储存条件、性能指标等核心信息,复制指定法规模板生成可填写副本,将抽取结果写入模板,并输出 Word 与 PDF 两种下载文件。
|
||||||
|
|
||||||
|
本功能是前两批能力的后续增强:依赖第一批文件汇总结果定位产品文件,复用第二批文本抽取、适用条件确认和一致性核查能力,同时新增“模板识别、字段映射、模板填充、冲突高亮、PDF 转换、来源追溯”能力。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、需求范围
|
||||||
|
|
||||||
|
### 2.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 目标模板复制 | 从原始法规资料中复制指定模板,不覆盖原始文件 |
|
||||||
|
| 2 | 注册类型选择 | 首次注册填写注册证格式;变更注册或备案填写变更注册(备案)文件格式 |
|
||||||
|
| 3 | 安全和性能基本原则清单填写 | 无论首次注册或变更注册,均生成并填写安全和性能基本原则清单 |
|
||||||
|
| 4 | 产品信息提取 | 从产品说明书、产品技术要求、检测报告、性能研究资料等文件中抽取模板所需字段 |
|
||||||
|
| 5 | 模板字段识别 | 读取目标模板中的表格、段落、占位栏位和清单条目,建立字段映射 |
|
||||||
|
| 6 | 自动填入模板 | 将抽取字段写入模板副本,缺失字段保持留空 |
|
||||||
|
| 7 | 冲突标记 | 同一字段在多个文件中不一致时,按说明书为准填写,并在模板中黄色底色、红色字体标记 |
|
||||||
|
| 8 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和待用户下载确认提示 |
|
||||||
|
| 9 | Word 导出 | 输出填好的 `.docx` 或可编辑 Word 文件 |
|
||||||
|
| 10 | PDF 导出 | 将填好的 Word 转换为 PDF,尽量保持原 Word 模板版式一致,可用于正式提交前预览 |
|
||||||
|
| 11 | 来源追溯 | 允许额外输出字段来源追溯清单,记录字段来源文件、文本片段、冲突状态和填入目标 |
|
||||||
|
|
||||||
|
### 2.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 直接覆盖原始法规模板 | 原始材料只作为模板来源,不允许被改写 |
|
||||||
|
| 2 | 自动代替人工最终确认 | 系统生成带标记文件,用户自行下载核对确认 |
|
||||||
|
| 3 | 在线提交 NMPA 系统 | 本期只生成申报文件,不对接外部申报系统 |
|
||||||
|
| 4 | 全部法规表单覆盖 | 本期仅覆盖用户指定的三个目标模板 |
|
||||||
|
| 5 | 复杂版式人工校订 | 系统尽量保持模板版式,复杂错位仍需人工最终复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、目标模板
|
||||||
|
|
||||||
|
本期一共处理三个目标模板。用户此前重复提到“体外诊断试剂安全和性能基本原则清单”,经确认属于误填,实际只有一个该清单模板。
|
||||||
|
|
||||||
|
| 序号 | 模板名称 | 原始文件 | 使用条件 | 输出要求 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| 1 | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式) | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx | 首次注册 | Word + PDF |
|
||||||
|
| 2 | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式) | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc | 变更注册或备案 | Word + PDF |
|
||||||
|
| 3 | 体外诊断试剂安全和性能基本原则清单 | 体外诊断试剂安全和性能基本原则清单.doc | 首次注册、变更注册、备案均适用 | Word + PDF |
|
||||||
|
|
||||||
|
### 3.1 已识别注册证模板字段
|
||||||
|
|
||||||
|
从 `中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx` 中已识别到以下表格栏目:
|
||||||
|
|
||||||
|
| 字段 | 填写规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 注册人名称 | 从申请人、注册人、企业信息类文件中抽取 |
|
||||||
|
| 注册人住所 | 从申请人、注册人、企业信息类文件中抽取 |
|
||||||
|
| 生产地址 | 从注册资料、说明书、质量体系或生产信息文件中抽取 |
|
||||||
|
| 代理人名称 | 进口体外诊断试剂适用,境内产品可留空 |
|
||||||
|
| 代理人住所 | 进口体外诊断试剂适用,境内产品可留空 |
|
||||||
|
| 产品名称 | 优先取说明书字段 |
|
||||||
|
| 包装规格 | 对应型号规格、包装规格 |
|
||||||
|
| 主要组成成分 | 优先取说明书和产品技术要求 |
|
||||||
|
| 预期用途 | 对应适用范围、预期用途 |
|
||||||
|
| 产品储存条件及有效期 | 对应储存条件、有效期 |
|
||||||
|
| 附件 | 默认包含产品技术要求、说明书,可根据实际文件匹配补充 |
|
||||||
|
| 其他内容 | 未识别或需人工确认时留空 |
|
||||||
|
| 备注 | 未识别或需人工确认时留空 |
|
||||||
|
|
||||||
|
### 3.2 模板解析约束
|
||||||
|
|
||||||
|
变更注册(备案)文件格式和安全和性能基本原则清单当前为 `.doc` 格式。系统实施时需要支持以下任一方案:
|
||||||
|
|
||||||
|
| 方案 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| LibreOffice 转换 | 使用 LibreOffice/soffice 将 `.doc` 转为 `.docx` 后识别和填写 |
|
||||||
|
| 预转换模板 | 项目内预先保存经人工确认的 `.docx` 模板副本 |
|
||||||
|
| OOXML/COM 方案 | 在 Windows 环境通过 Office 自动化读取和转换模板 |
|
||||||
|
|
||||||
|
无论采用哪种方式,转换后的模板必须保留原文件表格结构、分页、字体和版式,PDF 导出需以填好的 Word 为来源。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、用户角色与使用场景
|
||||||
|
|
||||||
|
| 角色 | 诉求 | 典型场景 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册人员 | 减少重复填表,提高字段一致性 | 上传注册资料包后生成已填注册证格式和基本原则清单 |
|
||||||
|
| 变更注册负责人 | 根据变更类型生成变更注册或备案文件 | 上传变更资料后生成已填变更注册(备案)文件 |
|
||||||
|
| 审核人员 | 快速定位字段来源和冲突 | 下载带冲突高亮的 Word/PDF,并查看 AI 对话框冲突摘要 |
|
||||||
|
| 系统管理员 | 维护模板版本和转换能力 | 更新法规模板、检查 PDF 转换服务和导出记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、业务流程分析
|
||||||
|
|
||||||
|
### 5.1 主流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户上传产品注册资料
|
||||||
|
-> 系统执行文件目录与页数汇总
|
||||||
|
-> 系统执行法规核查前置文本抽取
|
||||||
|
-> 系统识别注册类型:首次注册、变更注册或备案
|
||||||
|
-> 系统选择本次适用目标模板
|
||||||
|
-> 系统复制原始模板到批次工作目录
|
||||||
|
-> 系统读取目标模板栏目和清单条目
|
||||||
|
-> 系统从产品文件中抽取模板所需字段
|
||||||
|
-> 系统按字段优先级合并抽取结果
|
||||||
|
-> 如字段存在跨文件冲突,系统按说明书为准填入,并做黄色底色、红色字体标记
|
||||||
|
-> 缺失字段保持留空
|
||||||
|
-> 系统逐条判断安全和性能基本原则清单的适用性、符合性证据和证明文件位置
|
||||||
|
-> 系统生成已填 Word 文件
|
||||||
|
-> 系统将已填 Word 转换为 PDF
|
||||||
|
-> 系统生成来源追溯清单
|
||||||
|
-> AI 对话框展示生成结果、冲突字段摘要和下载链接
|
||||||
|
-> 用户下载 Word/PDF 自行确认
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 注册类型分支
|
||||||
|
|
||||||
|
| 注册类型 | 生成文件 |
|
||||||
|
| --- | --- |
|
||||||
|
| 首次注册 | 注册证格式 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 变更注册 | 变更注册(备案)文件 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 备案 | 变更注册(备案)文件 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 注册类型无法识别 | AI 对话框提示待确认;默认不生成注册证或变更文件,只可生成带待确认标记的草稿版本 |
|
||||||
|
|
||||||
|
### 5.3 异常流程
|
||||||
|
|
||||||
|
| 异常场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板文件不存在 | 批次标记失败,对话框提示缺少目标模板 |
|
||||||
|
| `.doc` 模板无法转换 | 对应模板导出失败,其他模板继续生成 |
|
||||||
|
| 字段未提取到 | 目标栏位留空,来源追溯清单记录为空 |
|
||||||
|
| 字段冲突 | 按说明书为准填入,模板内高亮标记,对话框展示冲突摘要 |
|
||||||
|
| PDF 转换失败 | 保留 Word 下载,提示 PDF 生成失败原因 |
|
||||||
|
| 模板版式明显错位 | 标记为需人工复核,不阻断 Word 文件下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、信息提取与字段规则
|
||||||
|
|
||||||
|
### 6.1 字段范围
|
||||||
|
|
||||||
|
字段范围不固定写死,应以三个目标模板的实际栏目和清单条目为准动态建立。Demo 阶段优先覆盖以下字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 产品标准名称 |
|
||||||
|
| 检测靶标 | 被检测物、基因、抗原、抗体、病原体或生物标志物 |
|
||||||
|
| 适用范围/预期用途 | 适用人群、样本类型、检测目的、临床用途 |
|
||||||
|
| 储存条件 | 温度、避光、防潮等保存条件 |
|
||||||
|
| 性能指标 | 分析灵敏度、特异性、重复性、准确度、检出限等 |
|
||||||
|
| 型号规格/包装规格 | 规格型号、包装规格、人份数或测试数 |
|
||||||
|
| 样本类型 | 血清、血浆、全血、咽拭子等 |
|
||||||
|
| 有效期 | 产品有效期或稳定性期限 |
|
||||||
|
| 主要组成成分 | 试剂、校准品、质控品、耗材等组成 |
|
||||||
|
| 检验原理 | 反应原理、方法学或检测平台 |
|
||||||
|
| 注册人/申请人 | 注册申请主体 |
|
||||||
|
| 生产地址 | 生产场所地址 |
|
||||||
|
|
||||||
|
### 6.2 来源文件优先级
|
||||||
|
|
||||||
|
| 优先级 | 文件类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 说明书 | 字段冲突时默认以说明书为准 |
|
||||||
|
| 2 | 产品技术要求 | 用于补充性能指标、检验方法、组成成分等字段 |
|
||||||
|
| 3 | 注册检验报告/检测报告 | 用于补充性能指标、样本信息、检验依据和结论 |
|
||||||
|
| 4 | 性能研究资料 | 用于补充安全和性能基本原则清单证据 |
|
||||||
|
| 5 | 其他注册资料 | 用于补充申请人、生产地址、附件清单等信息 |
|
||||||
|
|
||||||
|
### 6.3 冲突处理规则
|
||||||
|
|
||||||
|
| 场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书与其他文件字段不一致 | 按说明书值填入模板 |
|
||||||
|
| 多个非说明书文件不一致,说明书缺失 | 目标字段留空或取最高优先级来源,具体规则由实现阶段配置 |
|
||||||
|
| 字段被高亮标记 | 黄色底色、红色字体,提示用户下载后确认 |
|
||||||
|
| AI 对话框展示 | 展示字段名、采用值、冲突值、来源文件和目标模板 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、安全和性能基本原则清单填写规则
|
||||||
|
|
||||||
|
安全和性能基本原则清单不只填写基础产品信息,还需要根据产品文件内容逐条判断清单条目的适用性、符合性证据和证明文件位置。
|
||||||
|
|
||||||
|
| 填写项 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 适用/不适用 | 根据产品特性、检测方法、样本类型、是否含仪器/软件/灭菌/生物材料等信息判断 |
|
||||||
|
| 符合性说明 | 从产品技术要求、说明书、风险管理、性能研究、稳定性研究等文件中提取证据摘要 |
|
||||||
|
| 证明文件位置 | 填写证据文件名、章节、页码或可定位文本片段 |
|
||||||
|
| 无法判断 | 留空或标记待人工确认,来源追溯清单记录原因 |
|
||||||
|
| 冲突证据 | 如不同文件对同一条款适用性或证据描述冲突,保留高亮并在对话框列出 |
|
||||||
|
|
||||||
|
逐条判断结果需要可追溯,不能只输出“适用”或“不适用”结论。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、输出要求
|
||||||
|
|
||||||
|
### 8.1 文件命名
|
||||||
|
|
||||||
|
文件命名规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
批次号-产品名称-注册证格式.docx
|
||||||
|
批次号-产品名称-注册证格式.pdf
|
||||||
|
批次号-产品名称-变更注册备案文件.docx
|
||||||
|
批次号-产品名称-变更注册备案文件.pdf
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.docx
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.pdf
|
||||||
|
批次号-产品名称-字段来源追溯清单.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
产品名称为空时,可使用 `未识别产品名称` 作为文件名占位。
|
||||||
|
|
||||||
|
### 8.2 AI 对话框摘要
|
||||||
|
|
||||||
|
AI 对话框应展示生成结果、下载链接和冲突字段摘要。
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 下载 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 下载 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 Word 输出
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板副本 | 从原始法规模板复制生成,不覆盖原始文件 |
|
||||||
|
| 版式保持 | 保留原模板表格、段落、分页、字体和标题结构 |
|
||||||
|
| 冲突高亮 | 黄色底色、红色字体 |
|
||||||
|
| 缺失字段 | 留空,不填“待补充” |
|
||||||
|
| 可编辑 | 用户可下载后继续人工修改 |
|
||||||
|
|
||||||
|
### 8.4 PDF 输出
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 来源 | 由填好的 Word 转换生成 |
|
||||||
|
| 版式 | 尽量与原 Word 模板一致 |
|
||||||
|
| 用途 | 可作为正式提交前预览 |
|
||||||
|
| 失败处理 | PDF 失败不影响 Word 下载 |
|
||||||
|
|
||||||
|
### 8.5 来源追溯清单
|
||||||
|
|
||||||
|
来源追溯清单允许额外生成,建议至少包含:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标模板 | 字段填入哪个模板 |
|
||||||
|
| 目标栏位/条目 | 字段对应的表格栏位或清单条目 |
|
||||||
|
| 填入值 | 实际写入模板的值 |
|
||||||
|
| 来源文件 | 取值来源文件 |
|
||||||
|
| 来源片段 | 支撑取值的文本片段 |
|
||||||
|
| 是否冲突 | 是/否 |
|
||||||
|
| 冲突值 | 其他文件中的不同值 |
|
||||||
|
| 处理方式 | 采用说明书、留空、高亮、待人工确认等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、功能模块梳理
|
||||||
|
|
||||||
|
| 序号 | 功能名称 | 功能描述 | 优先级 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 1 | 模板管理 | 维护三个目标模板路径、版本和适用注册类型 | P0 |
|
||||||
|
| 2 | 模板副本生成 | 将原始模板复制到批次工作目录 | P0 |
|
||||||
|
| 3 | 模板结构识别 | 识别模板中的表格字段、段落占位、清单条目 | P0 |
|
||||||
|
| 4 | 产品字段抽取 | 从上传文件中抽取模板所需产品字段 | P0 |
|
||||||
|
| 5 | 字段合并与冲突检测 | 按说明书优先级合并字段,并识别跨文件冲突 | P0 |
|
||||||
|
| 6 | Word 模板填充 | 将字段写入 Word 模板副本 | P0 |
|
||||||
|
| 7 | 冲突高亮 | 对冲突字段应用黄色底色和红色字体 | P0 |
|
||||||
|
| 8 | 基本原则逐条判断 | 判断安全和性能条目的适用性、符合性证据和证明文件位置 | P0 |
|
||||||
|
| 9 | PDF 转换 | 将填好的 Word 转为 PDF | P0 |
|
||||||
|
| 10 | 下载链接生成 | 在 AI 对话框提供 Word/PDF 下载链接 | P0 |
|
||||||
|
| 11 | 来源追溯清单导出 | 输出字段来源、冲突和填入目标 | P1 |
|
||||||
|
| 12 | 版式 QA | 对 Word/PDF 版式进行自动或人工可见检查 | P1 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、数据实体分析
|
||||||
|
|
||||||
|
| 实体名称 | 字段说明 | 关联实体 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 自动填表批次 | 批次编号、用户、会话、注册类型、产品名称、状态、错误信息、创建时间、完成时间 | 文件汇总批次、法规核查批次 |
|
||||||
|
| 模板副本 | 模板名称、模板类型、原始模板路径、副本路径、模板版本、适用条件 | 自动填表批次 |
|
||||||
|
| 提取字段 | 字段名、填入值、来源文件、来源片段、来源优先级、是否冲突、冲突详情 | 自动填表批次 |
|
||||||
|
| 填表结果文件 | 文件类型、文件名、Word 路径、PDF 路径、下载状态 | 自动填表批次 |
|
||||||
|
| 清单条目判断 | 条目编号、条目内容、适用性、符合性证据、证明文件位置、判断来源 | 自动填表批次 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、非功能性需求
|
||||||
|
|
||||||
|
### 11.1 可追溯性
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字段来源可追溯 | 每个填入字段应能追溯到来源文件和文本片段 |
|
||||||
|
| 模板版本可追溯 | 每次生成记录原始模板文件名、版本和路径 |
|
||||||
|
| 冲突处理可追溯 | 冲突字段记录采用值、冲突值和处理规则 |
|
||||||
|
| 输出文件可追溯 | Word/PDF 文件关联批次、用户和会话 |
|
||||||
|
|
||||||
|
### 11.2 安全要求
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始模板保护 | 不允许覆盖或修改原始法规资料目录中的模板 |
|
||||||
|
| 下载权限 | Word/PDF/追溯清单仅允许当前会话授权用户下载 |
|
||||||
|
| 敏感信息保护 | 对话框只展示必要冲突摘要,不展示大段敏感原文 |
|
||||||
|
| 文件隔离 | 不同用户、不同批次的模板副本和导出文件隔离存储 |
|
||||||
|
|
||||||
|
### 11.3 版式要求
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| Word 版式 | 尽量保持原模板表格、字体、分页和段落结构 |
|
||||||
|
| PDF 版式 | 与填好后的 Word 一致,可用于正式提交前预览 |
|
||||||
|
| 高亮可见 | 冲突字段在 Word 和 PDF 中均应能被用户识别 |
|
||||||
|
| 缺失字段不污染模板 | 未提取字段留空,不填入系统提示语 |
|
||||||
|
|
||||||
|
### 11.4 性能要求
|
||||||
|
|
||||||
|
| 场景 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 小批次资料 | 50 个文件以内,应在 1 分钟内完成字段抽取和模板生成 |
|
||||||
|
| 中等批次资料 | 200 个文件以内支持后台异步处理和进度提示 |
|
||||||
|
| 单个模板失败 | 不影响其他适用模板生成 |
|
||||||
|
| 单个字段失败 | 不影响整份模板生成,字段留空并记录原因 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、待后续确认事项
|
||||||
|
|
||||||
|
| 序号 | 待确认项 | 当前建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | `.doc` 模板转换方案 | 优先使用 LibreOffice/soffice 转 docx;无法部署时预置人工确认版 docx 模板 |
|
||||||
|
| 2 | 变更注册(备案)文件字段清单 | 需在模板可解析后补充字段映射 |
|
||||||
|
| 3 | 安全和性能基本原则清单条目结构 | 需在模板可解析后拆解条目编号、要求、适用性和证据栏 |
|
||||||
|
| 4 | 说明书识别规则 | 需明确如何从上传资料中判定哪份文件是说明书 |
|
||||||
|
| 5 | PDF 转换质量标准 | 需明确是否要求逐页渲染检查、页数一致和关键表格不跨页错位 |
|
||||||
|
| 6 | 注册类型无法识别时是否允许生成草稿 | 建议允许生成安全和性能基本原则清单,注册证或变更文件等待确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 模板复制 | 系统生成模板副本,不修改原始法规模板 |
|
||||||
|
| 2 | 首次注册文件选择 | 首次注册场景生成注册证格式和安全和性能基本原则清单 |
|
||||||
|
| 3 | 变更注册/备案文件选择 | 变更注册或备案场景生成变更注册(备案)文件和安全和性能基本原则清单 |
|
||||||
|
| 4 | 字段自动填写 | 产品名称、预期用途、储存条件、包装规格等字段能自动写入对应栏目 |
|
||||||
|
| 5 | 缺失字段留空 | 未提取到的字段保持空白 |
|
||||||
|
| 6 | 冲突字段高亮 | 字段冲突时按说明书值填入,并在 Word/PDF 中黄色底色、红色字体标记 |
|
||||||
|
| 7 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和处理方式 |
|
||||||
|
| 8 | 基本原则清单判断 | 系统能逐条输出适用/不适用、符合性证据和证明文件位置 |
|
||||||
|
| 9 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
|
||||||
|
| 10 | PDF 下载 | 对话框提供由 Word 转换生成的 PDF 下载链接 |
|
||||||
|
| 11 | 来源追溯 | 可导出字段来源追溯清单,记录字段来源和冲突情况 |
|
||||||
|
| 12 | 异常不中断 | 单个字段、单个模板或 PDF 转换失败时,其他结果仍可正常输出 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、下一步建议
|
||||||
|
|
||||||
|
1. 将两个 `.doc` 原始模板转换为可解析的 `.docx` 工作模板,并人工确认版式无明显变化。
|
||||||
|
2. 拆解三个模板的字段、表格和清单条目,形成模板字段映射配置。
|
||||||
|
3. 扩展产品信息抽取字段,优先覆盖注册证模板已识别字段和安全和性能基本原则清单证据字段。
|
||||||
|
4. 设计冲突高亮写入规则,确保 Word 与 PDF 中均可见。
|
||||||
|
5. 接入 Word 到 PDF 转换能力,并建立页数、版式和关键表格的转换质量检查。
|
||||||
816
docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
816
docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,816 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表功能设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 依赖功能设计 | docs/2.功能设计/1.自动汇总.md;docs/2.功能设计/2.NMPA注册资料法规核查与整改闭环.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计目标
|
||||||
|
|
||||||
|
本功能作为独立工作流 `application_form_fill` 建设,由用户在 AI 对话中触发,例如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板”“生成安全和性能基本原则清单”“把产品信息填到申报模板里”等。用户可以明确指定目标模板;未指定时,系统根据识别出的注册类型生成当前注册类型适用的全部模板。
|
||||||
|
|
||||||
|
本功能复用第一批文件汇总结果作为文件来源,复用第二批法规核查中的文本抽取、适用条件识别、LLM 调用、飞书通知和导出下载能力,但拥有独立批次、独立工作流卡片和独立过程产物。系统复制原始法规模板到批次工作目录,不覆盖原始文件;随后按模板配置识别应填字段,使用规则/正则抽取与 LLM 结构化抽取并行处理,合并字段、识别冲突、写入 Word 模板,并在 AI 对话框和飞书通知中提示生成结果与冲突摘要。
|
||||||
|
|
||||||
|
Demo 阶段优先保证 Word 模板自动填写和下载。PDF 转换作为待办增强项:功能设计保留 PDF 导出节点和数据结构,实施时可先返回 Word 与追溯清单,并在待办清单记录 PDF 转换能力。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、与既有功能的关系
|
||||||
|
|
||||||
|
### 2.1 复用边界
|
||||||
|
|
||||||
|
| 能力 | 处理方式 | 现有代码/模型 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 对话与用户权限 | 复用 | `Conversation`、`Message` |
|
||||||
|
| 附件上传与文件绑定 | 复用 | `FileAttachment`、`FileSummaryBatchAttachment` |
|
||||||
|
| 文件汇总与页数统计 | 复用 | `FileSummaryBatch`、`FileSummaryItem`、`file_summary.workflow` |
|
||||||
|
| 文本抽取 | 复用并扩展 | `regulatory_review/services/text_extract.py`、`rag_index.py` |
|
||||||
|
| 适用条件候选 | 复用并扩展 | `regulatory_review/services/info_extract.py` |
|
||||||
|
| LLM 调用 | 复用 | `review_agent/llm.py`、`regulatory_review/services/llm_review.py` |
|
||||||
|
| 导出记录与下载 | 扩展复用 | `ExportedSummaryFile` |
|
||||||
|
| 过程产物 | 复用 | `RegulatoryArtifact` 或新增填表过程产物 |
|
||||||
|
| 飞书通知 | 复用并扩展 | `regulatory_review/services/feishu_notifier.py` |
|
||||||
|
| SSE 工作流事件 | 复用 | `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
|
||||||
|
### 2.2 新增边界
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立填表批次 | 新增 `ApplicationFormFillBatch`,不强绑法规核查批次 |
|
||||||
|
| 模板配置 | 新增 YAML 配置,维护模板路径、适用条件、字段映射和输出规则 |
|
||||||
|
| 模板选择 | 根据用户指定模板和注册类型选择生成范围 |
|
||||||
|
| 规则/正则与 LLM 并行抽取 | 两路抽取并行执行,最后统一合并 |
|
||||||
|
| 字段冲突归并 | 按来源文件优先级处理,说明书优先;冲突字段高亮 |
|
||||||
|
| Word 模板填充 | 使用 `python-docx` 对 `.docx` 表格、段落和占位字段写入 |
|
||||||
|
| `.doc` 模板转换 | 使用 LibreOffice/soffice 或预转换 `.docx` 模板 |
|
||||||
|
| 字段来源追溯 | 输出 Excel/JSON 追溯清单,记录抽取、合并和冲突证据 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体架构
|
||||||
|
|
||||||
|
### 3.1 架构原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 填表流程拥有独立批次、节点和卡片,workflow_type 为 `application_form_fill` |
|
||||||
|
| 复用文件汇总 | 填表不重新实现上传扫描,默认使用当前对话最近成功的 `FileSummaryBatch` |
|
||||||
|
| 用户指令优先 | 用户明确指定模板或注册类型时,优先使用用户指令 |
|
||||||
|
| 配置驱动 | 模板路径、字段映射、适用条件和输出规则写入 YAML 配置 |
|
||||||
|
| Word 优先 | Demo 阶段优先生成可编辑 Word,PDF 作为增强项进入待办 |
|
||||||
|
| 可追溯 | 规则抽取、LLM 抽取、合并结果、冲突列表和来源证据均留底 |
|
||||||
|
| 失败隔离 | 单字段、单模板或 PDF 转换失败不影响其他模板输出 |
|
||||||
|
| 通知可控 | 填表完成后可通过飞书通知上传人,通知内容只包含摘要和下载提示 |
|
||||||
|
|
||||||
|
### 3.2 逻辑架构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["AI 对话页"] --> B["意图识别 application_form_fill"]
|
||||||
|
B --> C{"本次消息是否带附件"}
|
||||||
|
C -->|"是"| D["先执行文件汇总工作流"]
|
||||||
|
C -->|"否"| E["查找最近成功 FileSummaryBatch"]
|
||||||
|
D --> E
|
||||||
|
E --> F["ApplicationFormFillBatch"]
|
||||||
|
F --> G["FormFillWorkflowExecutor"]
|
||||||
|
G --> H["模板配置 YAML"]
|
||||||
|
G --> I["模板选择服务"]
|
||||||
|
G --> J["文本抽取服务"]
|
||||||
|
J --> K1["规则/正则抽取"]
|
||||||
|
J --> K2["LLM 结构化抽取"]
|
||||||
|
K1 --> L["字段合并与冲突归并"]
|
||||||
|
K2 --> L
|
||||||
|
L --> M["Word 模板填充服务"]
|
||||||
|
M --> N["追溯清单导出"]
|
||||||
|
M --> O["PDF 转换服务 P1"]
|
||||||
|
N --> P["ExportedSummaryFile"]
|
||||||
|
O --> P
|
||||||
|
G --> Q["WorkflowEvent/SSE"]
|
||||||
|
Q --> R["自动填表工作流卡片"]
|
||||||
|
G --> S["FeishuNotifier"]
|
||||||
|
S --> T["上传人通知"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 技术选型
|
||||||
|
|
||||||
|
| 设计项 | Demo 方案 | 后续演进 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django,沿用当前 `review_agent` 应用 | 保持 Django,必要时拆分独立 app |
|
||||||
|
| 工作流编排 | 新增轻量 `FormFillWorkflowExecutor` | 接入 LangGraph 子图 |
|
||||||
|
| 后台执行 | Django 后台线程,沿用现有工作流方式 | Celery/RQ + Redis |
|
||||||
|
| 工作流状态 | `WorkflowNodeRun` + `WorkflowEvent`,新增 workflow_type | 独立工作流事件中心 |
|
||||||
|
| 模板配置 | YAML,建议路径 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` | 数据库模板管理后台 |
|
||||||
|
| Word 处理 | `python-docx` 写入 `.docx` 表格和段落,高亮冲突字段 | OOXML 精细 patch、内容控件 SDT |
|
||||||
|
| `.doc` 转换 | LibreOffice/soffice headless 转 `.docx`;无法部署时预置 `.docx` 工作模板 | 模板入库前统一转换和人工校验 |
|
||||||
|
| PDF 导出 | P1 待办:LibreOffice/soffice headless 转 PDF | 逐页渲染 QA、版式差异检测 |
|
||||||
|
| Excel 追溯清单 | `openpyxl` | 增加多 Sheet 审核视图 |
|
||||||
|
| 文本抽取 | 复用 `text_extract.py`、`rag_index.py` | OCR、文档文本缓存 |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并后输出 | 可配置抽取器和置信度模型 |
|
||||||
|
| 飞书通知 | 复用 `FeishuNotifier`,Demo 可 mock 或 CLI | 飞书 Webhook/API |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、触发与模板选择设计
|
||||||
|
|
||||||
|
### 4.1 意图识别
|
||||||
|
|
||||||
|
填表工作流通过用户对话触发。意图识别可先采用关键词规则,必要时调用现有 LLM 路由能力。
|
||||||
|
|
||||||
|
| 触发表达 | 触发结果 |
|
||||||
|
| --- | --- |
|
||||||
|
| 帮我填注册证 | 触发填表,指定注册证格式 |
|
||||||
|
| 给我这个内容对应的表格 | 触发填表,未指定模板 |
|
||||||
|
| 为我该方案生成申报模板 | 触发填表,未指定模板 |
|
||||||
|
| 生成安全和性能基本原则清单 | 触发填表,指定安全和性能基本原则清单 |
|
||||||
|
| 把产品信息填到申报模板里 | 触发填表,未指定模板 |
|
||||||
|
| 只生成变更注册备案文件 | 触发填表,指定变更注册(备案)文件 |
|
||||||
|
|
||||||
|
### 4.2 文件来源选择
|
||||||
|
|
||||||
|
| 场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 本次消息带新附件 | 先自动执行文件汇总,汇总成功后启动填表 |
|
||||||
|
| 本次消息无附件 | 默认使用当前对话最近一次成功 `FileSummaryBatch` |
|
||||||
|
| 无成功汇总批次 | 对话框提示用户先上传资料或补充附件 |
|
||||||
|
| 用户明确指定历史批次 | 校验批次属于当前对话和当前用户后使用 |
|
||||||
|
|
||||||
|
### 4.3 注册类型识别优先级
|
||||||
|
|
||||||
|
注册类型用于决定默认生成哪些模板。优先级如下:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户话语明确指定
|
||||||
|
-> 当前对话已确认的法规核查条件
|
||||||
|
-> 上传文件内容抽取结果
|
||||||
|
-> 无法识别
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.4 模板选择规则
|
||||||
|
|
||||||
|
| 场景 | 生成模板 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户未指定模板,注册类型为首次注册 | 注册证格式;安全和性能基本原则清单 |
|
||||||
|
| 用户未指定模板,注册类型为变更注册或备案 | 变更注册(备案)文件;安全和性能基本原则清单 |
|
||||||
|
| 用户未指定模板,注册类型无法识别 | 安全和性能基本原则清单;注册证/变更文件进入待确认提示 |
|
||||||
|
| 用户明确指定模板且与注册类型一致 | 只生成用户指定模板 |
|
||||||
|
| 用户明确指定模板但与注册类型不一致 | 允许生成,并在摘要和追溯清单提示“与识别注册类型不一致,需人工确认” |
|
||||||
|
| 用户指定“全部模板” | 生成三个目标模板,并提示用户核对注册类型适用性 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、工作流设计
|
||||||
|
|
||||||
|
### 5.1 节点图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
N1["准备资料"] --> N2["选择模板"]
|
||||||
|
N2 --> N3["复制模板"]
|
||||||
|
N3 --> N4["抽取字段"]
|
||||||
|
N4 --> N5["冲突归并"]
|
||||||
|
N5 --> N6["填写 Word"]
|
||||||
|
N6 --> N7["转换 PDF P1"]
|
||||||
|
N6 --> N8["追溯清单"]
|
||||||
|
N7 --> N9["输出下载"]
|
||||||
|
N8 --> N9
|
||||||
|
N9 --> N10["飞书通知"]
|
||||||
|
N10 --> N11["完成"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 节点定义
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 触发服务 | 成功条件 | 失败处理 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | `FormFillWorkflowExecutor` | 找到或生成成功的 `FileSummaryBatch` | 无文件汇总则暂停提示上传 |
|
||||||
|
| template_select | 选择模板 | `TemplateSelectionService` | 输出本次目标模板列表 | 无适用模板则失败 |
|
||||||
|
| template_copy | 复制模板 | `TemplateRepository` | 模板副本进入批次工作目录 | 单模板失败不影响其他模板 |
|
||||||
|
| field_extract | 抽取字段 | `FieldExtractionService` | 规则/正则与 LLM 结果留底 | 单文件失败记录并继续 |
|
||||||
|
| conflict_merge | 冲突归并 | `FieldMergeService` | 输出最终字段和冲突列表 | 无字段时仍生成空模板 |
|
||||||
|
| word_fill | 填写 Word | `WordTemplateFillService` | 生成填好后的 Word 文件 | 单模板失败记录失败 |
|
||||||
|
| pdf_convert | 转换 PDF | `PdfConversionService` | P1:生成 PDF 文件 | PDF 失败标记 partial_success |
|
||||||
|
| trace_export | 追溯清单 | `TraceabilityExportService` | 生成 Excel/JSON 追溯清单 | 失败不影响 Word |
|
||||||
|
| output_export | 输出下载 | `FormFillExportService` | 写入 `ExportedSummaryFile` 并生成下载链接 | 关键 Word 失败则批次失败 |
|
||||||
|
| notify | 飞书通知 | `FeishuNotifier` | 通知上传人生成完成 | 通知失败不影响下载 |
|
||||||
|
| completed | 完成 | 工作流执行器 | 更新批次状态和对话消息 | - |
|
||||||
|
|
||||||
|
### 5.3 状态设计
|
||||||
|
|
||||||
|
| 状态 | 含义 |
|
||||||
|
| --- | --- |
|
||||||
|
| pending | 已创建,等待执行 |
|
||||||
|
| running | 执行中 |
|
||||||
|
| waiting_user | 缺少文件或关键条件,等待用户补充 |
|
||||||
|
| success | Word 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | Word 已生成,但部分模板、PDF、追溯清单或通知失败 |
|
||||||
|
| failed | 所有目标 Word 模板均生成失败 |
|
||||||
|
| skipped | 当前节点不适用,例如 Demo 阶段跳过 PDF |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、模板配置设计
|
||||||
|
|
||||||
|
### 6.1 配置文件路径
|
||||||
|
|
||||||
|
建议新增:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 配置结构
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: application_form_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
templates:
|
||||||
|
- code: registration_certificate
|
||||||
|
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
|
||||||
|
output_label: 注册证格式
|
||||||
|
applies_when:
|
||||||
|
registration_type: ["首次注册"]
|
||||||
|
file_format: docx
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品名称
|
||||||
|
sources: ["说明书", "产品技术要求", "注册检验报告"]
|
||||||
|
- key: package_specification
|
||||||
|
label: 包装规格
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 包装规格
|
||||||
|
sources: ["说明书", "产品技术要求"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 模板配置项
|
||||||
|
|
||||||
|
| 配置项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| code | 模板编码,用于用户指定和导出分类 |
|
||||||
|
| name | 模板中文名称 |
|
||||||
|
| source_file | 原始模板文件名 |
|
||||||
|
| working_template | 可选,预转换 `.docx` 工作模板 |
|
||||||
|
| output_label | 文件命名中的模板标签 |
|
||||||
|
| applies_when | 默认适用注册类型 |
|
||||||
|
| fields | 字段映射列表 |
|
||||||
|
| checklist_items | 安全和性能基本原则清单条目映射 |
|
||||||
|
| conversion | `.doc` 转 `.docx` 和 PDF 的转换策略 |
|
||||||
|
|
||||||
|
### 6.4 已知模板字段
|
||||||
|
|
||||||
|
注册证格式当前已从 `.docx` 表格识别到以下字段:注册人名称、注册人住所、生产地址、代理人名称、代理人住所、产品名称、包装规格、主要组成成分、预期用途、产品储存条件及有效期、附件、其他内容、备注。
|
||||||
|
|
||||||
|
变更注册(备案)文件和安全和性能基本原则清单当前为 `.doc`,实施前需通过 LibreOffice/soffice 转换或预置人工确认版 `.docx` 工作模板,再补齐字段映射。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、字段抽取与合并设计
|
||||||
|
|
||||||
|
### 7.1 三层提取链路
|
||||||
|
|
||||||
|
```text
|
||||||
|
模板字段配置
|
||||||
|
-> 文档字段候选提取
|
||||||
|
-> 规则/正则抽取与 LLM 结构化抽取并行
|
||||||
|
-> 字段归一化
|
||||||
|
-> 来源优先级合并
|
||||||
|
-> 冲突识别
|
||||||
|
-> 最终字段包
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 规则/正则抽取
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 标签字段识别 | 识别 `产品名称:`、`预期用途:`、`储存条件:` 等标签行 |
|
||||||
|
| 表格字段识别 | 从 Word/Excel 表格中识别左侧字段名、右侧字段值 |
|
||||||
|
| 章节范围识别 | 从说明书、产品技术要求中按章节提取连续文本 |
|
||||||
|
| 文件类型识别 | 根据文件名、目录名和首页标题判断说明书、产品技术要求、检验报告 |
|
||||||
|
| 证据片段截取 | 保存字段前后上下文,用于追溯清单 |
|
||||||
|
|
||||||
|
### 7.3 LLM 结构化抽取
|
||||||
|
|
||||||
|
LLM 输入为模板字段清单、文件上下文和候选文本片段,输出严格 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "storage_condition",
|
||||||
|
"label": "产品储存条件及有效期",
|
||||||
|
"value": "2-8℃保存,有效期12个月",
|
||||||
|
"source_file": "说明书.docx",
|
||||||
|
"evidence": "产品储存条件:2-8℃保存...",
|
||||||
|
"confidence": 0.86
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"checklist_items": [
|
||||||
|
{
|
||||||
|
"item_code": "A1",
|
||||||
|
"applicability": "适用",
|
||||||
|
"compliance_evidence": "产品技术要求中规定了性能指标和检验方法",
|
||||||
|
"proof_location": "产品技术要求.docx 第2章"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.4 并行合并规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 规则和 LLM 值一致 | 合并为同一字段,提高置信度 |
|
||||||
|
| 规则和 LLM 值不一致,但来源文件不同 | 按来源文件优先级处理,说明书优先 |
|
||||||
|
| 规则和 LLM 值不一致,来源文件相同 | 标记冲突,模板中高亮 |
|
||||||
|
| 说明书与其他文件冲突 | 采用说明书值,黄色底色、红色字体标记 |
|
||||||
|
| 说明书缺失,多个来源冲突 | 取最高优先级文件值并标记冲突;无法判断则留空 |
|
||||||
|
| 字段缺失 | 模板留空,追溯清单记录未提取 |
|
||||||
|
|
||||||
|
### 7.5 过程产物留底
|
||||||
|
|
||||||
|
字段抽取结果保存为 `field_extract_result.json`,至少包含:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| regex_results | 规则/正则抽取结果 |
|
||||||
|
| llm_results | LLM 结构化抽取结果 |
|
||||||
|
| merged_fields | 合并后的最终字段 |
|
||||||
|
| conflicts | 冲突字段列表 |
|
||||||
|
| source_evidence | 来源文件和文本片段 |
|
||||||
|
| selected_templates | 本次选择的模板 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、安全和性能基本原则清单设计
|
||||||
|
|
||||||
|
### 8.1 判断策略
|
||||||
|
|
||||||
|
安全和性能基本原则清单采用“候选判断 + 高置信度写入”策略。
|
||||||
|
|
||||||
|
| 步骤 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 条目拆解 | 从模板配置中读取条目编号、原则内容、适用性栏、证据栏、证明文件位置栏 |
|
||||||
|
| 候选判断 | 规则和 LLM 均可给出适用/不适用候选 |
|
||||||
|
| 证据匹配 | 从产品技术要求、说明书、性能研究、稳定性研究、风险管理资料中匹配证明文件 |
|
||||||
|
| 高置信度写入 | 仅将高置信度判断写入 Word |
|
||||||
|
| 低置信度留空 | 证据不足或判断不一致时 Word 留空,追溯清单记录候选判断 |
|
||||||
|
| 冲突提示 | 冲突条目在对话框和追溯清单中提示,不强行填入 |
|
||||||
|
|
||||||
|
### 8.2 输出字段
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 条目编号 | 基本原则清单中的条目编码 |
|
||||||
|
| 条目内容 | 原始原则或要求 |
|
||||||
|
| 适用性 | 适用/不适用,低置信度留空 |
|
||||||
|
| 符合性证据 | 高置信度证据摘要 |
|
||||||
|
| 证明文件位置 | 文件名、章节、页码或文本定位 |
|
||||||
|
| 置信度 | 用于判断是否写入 Word |
|
||||||
|
| 候选来源 | 规则、LLM 或两者一致 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、Word 与 PDF 生成设计
|
||||||
|
|
||||||
|
### 9.1 Word 模板填充
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板副本 | 原始模板复制到批次工作目录后再写入 |
|
||||||
|
| 表格行填充 | 根据行首字段名定位目标单元格 |
|
||||||
|
| 段落占位填充 | 支持 `{{field_key}}` 等占位符 |
|
||||||
|
| 清单条目填充 | 按条目编号和配置列写入适用性、证据和证明位置 |
|
||||||
|
| 冲突高亮 | 冲突字段使用黄色底色和红色字体 |
|
||||||
|
| 缺失字段 | 保持空白,不写“待补充” |
|
||||||
|
| 版式保持 | 尽量不改变表格结构、分页和字体 |
|
||||||
|
|
||||||
|
### 9.2 PDF 转换
|
||||||
|
|
||||||
|
PDF 转换作为 P1 待办增强项设计:
|
||||||
|
|
||||||
|
| 阶段 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| Demo 主链路 | 优先生成 Word,不因 PDF 能力缺失阻断工作流 |
|
||||||
|
| P1 增强 | 使用 LibreOffice/soffice headless 将 Word 转为 PDF |
|
||||||
|
| 失败处理 | Word 已生成但 PDF 失败时,批次状态为 `partial_success` |
|
||||||
|
| QA 增强 | 后续增加 PDF 页数非 0、逐页截图或版式差异检查 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、输出与下载设计
|
||||||
|
|
||||||
|
### 10.1 输出文件
|
||||||
|
|
||||||
|
| 文件 | Demo 阶段 | P1/P2 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 填好后的 Word | 必须生成 | 持续支持 |
|
||||||
|
| PDF 预览 | 待办增强 | LibreOffice 转换生成 |
|
||||||
|
| 字段来源追溯清单 Excel | 允许生成,建议实现 | 增加多 Sheet |
|
||||||
|
| 字段抽取 JSON | 过程产物留底 | 支持下载或调试查看 |
|
||||||
|
|
||||||
|
### 10.2 文件命名
|
||||||
|
|
||||||
|
```text
|
||||||
|
批次号-产品名称-注册证格式.docx
|
||||||
|
批次号-产品名称-注册证格式.pdf
|
||||||
|
批次号-产品名称-变更注册备案文件.docx
|
||||||
|
批次号-产品名称-变更注册备案文件.pdf
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.docx
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.pdf
|
||||||
|
批次号-产品名称-字段来源追溯清单.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.3 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
继续复用 `ExportedSummaryFile`,但需要扩展 `ExportType`:
|
||||||
|
|
||||||
|
| export_type | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| markdown | 既有 Markdown 报告 |
|
||||||
|
| excel | Excel 追溯清单 |
|
||||||
|
| json | 字段抽取 JSON 或结果包 |
|
||||||
|
| word | 填好的 Word 文件,新增 |
|
||||||
|
| pdf | Word 转换后的 PDF,新增 |
|
||||||
|
|
||||||
|
填表工作流导出记录建议:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | `application_form_fill` |
|
||||||
|
| workflow_batch_id | `ApplicationFormFillBatch.id` |
|
||||||
|
| export_category | `filled_template`、`traceability`、`extract_result` |
|
||||||
|
| export_type | `word`、`pdf`、`excel`、`json` |
|
||||||
|
|
||||||
|
导出服务入参应包含目标输出类型列表,例如:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"output_types": ["word", "pdf", "excel"],
|
||||||
|
"template_codes": ["registration_certificate", "essential_principles"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
系统根据入参决定生成哪些类型的内容。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、数据模型设计
|
||||||
|
|
||||||
|
### 11.1 ApplicationFormFillBatch
|
||||||
|
|
||||||
|
新增自动填表批次表。
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| id | BigAutoField | 主键 |
|
||||||
|
| conversation | ForeignKey(Conversation) | 绑定对话 |
|
||||||
|
| user | ForeignKey(User) | 发起用户 |
|
||||||
|
| source_summary_batch | ForeignKey(FileSummaryBatch) | 文件来源批次 |
|
||||||
|
| source_regulatory_batch | ForeignKey(RegulatoryReviewBatch, null=True) | 可选,复用已确认法规条件 |
|
||||||
|
| batch_no | CharField | 填表批次号,如 AFF-YYYYMMDDHHMMSS |
|
||||||
|
| status | CharField | pending、running、waiting_user、success、partial_success、failed |
|
||||||
|
| trigger_message | ForeignKey(Message, null=True) | 触发消息 |
|
||||||
|
| requested_templates | JSONField | 用户指定模板 |
|
||||||
|
| selected_templates | JSONField | 实际生成模板 |
|
||||||
|
| output_types | JSONField | 请求输出类型,如 word、pdf、excel |
|
||||||
|
| registration_type | CharField | 注册类型 |
|
||||||
|
| product_name | CharField | 产品名称 |
|
||||||
|
| conflict_summary | JSONField | 冲突摘要 |
|
||||||
|
| risk_notes | JSONField | 不适用模板、低置信度等提示 |
|
||||||
|
| work_dir | CharField | 批次工作目录 |
|
||||||
|
| error_message | TextField | 异常说明 |
|
||||||
|
| created_at | DateTimeField | 创建时间 |
|
||||||
|
| started_at | DateTimeField | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | 完成时间 |
|
||||||
|
|
||||||
|
### 11.2 ApplicationFormFillArtifact
|
||||||
|
|
||||||
|
可新增独立过程产物表,也可复用 `RegulatoryArtifact`。考虑到这是独立工作流,建议新增轻量产物表,结构与 `RegulatoryArtifact` 保持一致。
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| id | BigAutoField | 主键 |
|
||||||
|
| batch | ForeignKey(ApplicationFormFillBatch) | 所属填表批次 |
|
||||||
|
| artifact_type | CharField | template_copy、field_extract_result、merged_fields、traceability、notification_record |
|
||||||
|
| file_format | CharField | json、excel、docx、pdf |
|
||||||
|
| name | CharField | 产物名称 |
|
||||||
|
| storage_path | CharField | 存储路径 |
|
||||||
|
| metadata | JSONField | 模板编码、输出类型、生成状态等 |
|
||||||
|
| content_hash | CharField | 文件 hash |
|
||||||
|
| created_at | DateTimeField | 创建时间 |
|
||||||
|
|
||||||
|
### 11.3 与既有模型关系
|
||||||
|
|
||||||
|
```text
|
||||||
|
Conversation 1:N ApplicationFormFillBatch
|
||||||
|
FileSummaryBatch 1:N ApplicationFormFillBatch
|
||||||
|
RegulatoryReviewBatch 0:N ApplicationFormFillBatch
|
||||||
|
ApplicationFormFillBatch 1:N ApplicationFormFillArtifact
|
||||||
|
ApplicationFormFillBatch 1:N WorkflowNodeRun
|
||||||
|
ApplicationFormFillBatch 1:N ExportedSummaryFile
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、后端服务设计
|
||||||
|
|
||||||
|
### 12.1 FormFillWorkflowExecutor
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| run(batch) | 串行执行自动填表节点 |
|
||||||
|
| run_node(node) | 执行单节点并记录进度 |
|
||||||
|
| resolve_source_summary_batch() | 根据本次附件或最近成功批次确定来源 |
|
||||||
|
| emit_event() | 写入 `WorkflowEvent` |
|
||||||
|
| complete_or_partial() | 根据 Word/PDF/通知结果更新批次状态 |
|
||||||
|
|
||||||
|
### 12.2 TemplateSelectionService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| parse_requested_templates(message) | 从用户话语中识别指定模板 |
|
||||||
|
| detect_registration_type() | 按用户话语、法规确认条件、文件抽取识别注册类型 |
|
||||||
|
| select_templates() | 根据注册类型和用户指令输出模板列表 |
|
||||||
|
|
||||||
|
### 12.3 TemplateRepository
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| load_config() | 读取 YAML 模板配置 |
|
||||||
|
| resolve_source_template(code) | 找到原始模板或预转换模板 |
|
||||||
|
| copy_to_work_dir(code, batch) | 复制模板到批次目录 |
|
||||||
|
| convert_doc_to_docx(path) | `.doc` 转 `.docx` |
|
||||||
|
|
||||||
|
### 12.4 FieldExtractionService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| extract_by_rules(texts, template_fields) | 规则/正则抽取 |
|
||||||
|
| extract_by_llm(texts, template_fields) | LLM 结构化抽取 |
|
||||||
|
| run_parallel() | 并行执行两路抽取 |
|
||||||
|
| save_extract_artifact() | 保存 `field_extract_result.json` |
|
||||||
|
|
||||||
|
### 12.5 FieldMergeService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| normalize_fields() | 字段名、单位、空白和同义词归一 |
|
||||||
|
| rank_sources() | 按说明书、产品技术要求、检验报告等来源排序 |
|
||||||
|
| merge() | 输出最终字段 |
|
||||||
|
| detect_conflicts() | 输出冲突列表和高亮标记 |
|
||||||
|
|
||||||
|
### 12.6 WordTemplateFillService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| fill_table_rows() | 根据行名定位表格单元格并写入 |
|
||||||
|
| fill_placeholders() | 替换段落占位符 |
|
||||||
|
| fill_checklist_items() | 写入安全和性能基本原则清单 |
|
||||||
|
| apply_conflict_highlight() | 黄底红字标记冲突字段 |
|
||||||
|
| save_docx() | 保存填好后的 Word |
|
||||||
|
|
||||||
|
### 12.7 TraceabilityExportService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| build_excel() | 生成字段来源追溯清单 |
|
||||||
|
| build_json() | 生成结构化追溯 JSON |
|
||||||
|
| create_export_records() | 写入 `ExportedSummaryFile` |
|
||||||
|
|
||||||
|
### 12.8 FormFillNotifier
|
||||||
|
|
||||||
|
复用或包装 `FeishuNotifier`。
|
||||||
|
|
||||||
|
| 通知场景 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 填表成功 | 通知上传人文件已生成 |
|
||||||
|
| 部分成功 | 通知 Word 已生成,但 PDF/部分模板失败 |
|
||||||
|
| 冲突字段存在 | 通知中提示存在冲突字段,需下载核对 |
|
||||||
|
| 失败 | 可选通知失败原因,Demo 可只在对话框展示 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、接口设计
|
||||||
|
|
||||||
|
### 13.1 发起自动填表
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | POST /api/review-agent/application-form-fill/start/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 请求 | conversation_id、message_id、file_summary_batch_id 可选、template_codes 可选、output_types 可选 |
|
||||||
|
| 响应 | batch_id、workflow_type、status、selected_templates |
|
||||||
|
|
||||||
|
处理规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
校验 conversation 属于当前用户
|
||||||
|
-> 如本次消息带附件,先执行文件汇总
|
||||||
|
-> 否则查找当前对话最近成功 FileSummaryBatch
|
||||||
|
-> 创建 ApplicationFormFillBatch
|
||||||
|
-> 初始化 WorkflowNodeRun
|
||||||
|
-> 启动 FormFillWorkflowExecutor
|
||||||
|
-> 返回工作流卡片初始状态
|
||||||
|
```
|
||||||
|
|
||||||
|
### 13.2 查询自动填表状态
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | GET /api/review-agent/application-form-fill/{batch_id}/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 响应 | 批次状态、节点状态、选择模板、冲突摘要、导出文件 |
|
||||||
|
|
||||||
|
### 13.3 下载导出文件
|
||||||
|
|
||||||
|
继续复用:
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | GET /api/review-agent/file-summary/exports/{export_id}/download/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 响应 | 文件流 |
|
||||||
|
|
||||||
|
权限规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
export_id -> workflow_type/workflow_batch_id -> ApplicationFormFillBatch -> conversation -> user
|
||||||
|
必须等于当前登录用户,才允许下载。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、前端设计
|
||||||
|
|
||||||
|
### 14.1 自动填表工作流卡片
|
||||||
|
|
||||||
|
前端新增独立卡片类型 `application_form_fill`,展示节点:
|
||||||
|
|
||||||
|
| 节点 | 展示文案 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 准备资料 |
|
||||||
|
| template_select | 选择模板 |
|
||||||
|
| template_copy | 复制模板 |
|
||||||
|
| field_extract | 抽取字段 |
|
||||||
|
| conflict_merge | 冲突归并 |
|
||||||
|
| word_fill | 填写 Word |
|
||||||
|
| pdf_convert | 转换 PDF |
|
||||||
|
| output_export | 输出下载 |
|
||||||
|
| notify | 飞书通知 |
|
||||||
|
| completed | 已完成 |
|
||||||
|
|
||||||
|
### 14.2 对话框结果展示
|
||||||
|
|
||||||
|
工作流完成后,AI 对话框展示 Markdown 摘要:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 待生成 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 待生成 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
|
||||||
|
[下载字段来源追溯清单](download-url)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.3 指定模板交互
|
||||||
|
|
||||||
|
用户可以通过自然语言指定模板。前端无需额外表单,后端意图识别后在卡片中展示本次选择模板。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、事件设计
|
||||||
|
|
||||||
|
### 15.1 SSE 事件结构
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"event": "workflow",
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"batch_id": 3001,
|
||||||
|
"conversation_id": 1001,
|
||||||
|
"node_code": "field_extract",
|
||||||
|
"node_group": "form_fill",
|
||||||
|
"status": "running",
|
||||||
|
"progress": 55,
|
||||||
|
"message": "正在并行抽取模板字段",
|
||||||
|
"payload": {
|
||||||
|
"selected_templates": ["registration_certificate", "essential_principles"],
|
||||||
|
"processed_files": 8,
|
||||||
|
"total_files": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 15.2 节点进度
|
||||||
|
|
||||||
|
| 节点 | 进度口径 |
|
||||||
|
| --- | --- |
|
||||||
|
| 准备资料 | 是否找到来源批次 |
|
||||||
|
| 选择模板 | 模板数量 |
|
||||||
|
| 复制模板 | 已复制模板数/总模板数 |
|
||||||
|
| 抽取字段 | 已处理文件数/总文件数 |
|
||||||
|
| 冲突归并 | 字段数量和冲突数量 |
|
||||||
|
| 填写 Word | 已生成 Word 数/目标 Word 数 |
|
||||||
|
| 转换 PDF | 已生成 PDF 数/目标 PDF 数 |
|
||||||
|
| 输出下载 | 已创建下载记录数 |
|
||||||
|
| 飞书通知 | 通知状态 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、异常与降级设计
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 无成功文件汇总批次 | 进入 waiting_user,提示上传资料 |
|
||||||
|
| 新附件汇总失败 | 填表工作流不启动或标记失败 |
|
||||||
|
| 用户指定不适用模板 | 允许生成,摘要提示需人工确认 |
|
||||||
|
| `.doc` 转换失败 | 该模板失败,其他模板继续 |
|
||||||
|
| 单字段缺失 | Word 留空,追溯清单记录未提取 |
|
||||||
|
| 规则和 LLM 冲突 | 按来源优先级合并,冲突高亮 |
|
||||||
|
| 所有 Word 生成失败 | 批次 failed |
|
||||||
|
| 部分 Word 生成失败 | 批次 partial_success |
|
||||||
|
| PDF 转换失败 | 批次 partial_success,保留 Word 下载 |
|
||||||
|
| 飞书通知失败 | 不影响文件下载,记录通知失败 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、安全设计
|
||||||
|
|
||||||
|
| 设计点 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始模板保护 | 只读原始模板,所有写入发生在批次工作目录副本 |
|
||||||
|
| 对话隔离 | 填表批次必须绑定当前 Conversation |
|
||||||
|
| 文件读取权限 | 只能读取关联 `FileSummaryBatch` 下的文件 |
|
||||||
|
| 下载权限 | 根据 workflow_type 和 workflow_batch_id 校验当前用户 |
|
||||||
|
| LLM 输入控制 | 只传必要文本片段和字段上下文,避免发送整包敏感资料 |
|
||||||
|
| 飞书脱敏 | 通知仅包含生成状态、模板名称、冲突数量和系统内下载提示 |
|
||||||
|
| 命令调用安全 | LibreOffice/飞书 CLI 使用结构化参数,不拼接用户输入 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、验收设计
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 意图触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
|
||||||
|
| 2 | 指定模板 | 用户指定模板时只生成指定模板 |
|
||||||
|
| 3 | 默认模板 | 未指定模板时按注册类型生成适用的全部模板 |
|
||||||
|
| 4 | 新附件串联 | 本次消息带附件时先自动汇总,再执行填表 |
|
||||||
|
| 5 | 最近批次复用 | 无附件时复用当前对话最近成功文件汇总批次 |
|
||||||
|
| 6 | 工作流卡片 | 前端展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
|
||||||
|
| 7 | 字段并行抽取 | 规则/正则和 LLM 抽取结果均进入过程产物 |
|
||||||
|
| 8 | 冲突归并 | 说明书优先,冲突字段在 Word 中黄底红字 |
|
||||||
|
| 9 | 缺失字段 | 未提取字段在 Word 中留空 |
|
||||||
|
| 10 | 基本原则清单 | 高置信度条目写入,低置信度候选留在追溯清单 |
|
||||||
|
| 11 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
|
||||||
|
| 12 | PDF 待办 | Demo 阶段 PDF 可展示为待生成,不阻断 Word |
|
||||||
|
| 13 | 追溯清单 | 生成字段来源追溯清单,包含规则、LLM、合并和冲突信息 |
|
||||||
|
| 14 | 飞书通知 | 填表完成后可通知上传人,失败不影响下载 |
|
||||||
|
| 15 | 权限隔离 | A 对话生成的 Word/追溯清单不能被 B 对话访问 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、实施建议
|
||||||
|
|
||||||
|
1. 新增 `ApplicationFormFillBatch` 和 `ApplicationFormFillArtifact` 数据模型,扩展 `ExportedSummaryFile.ExportType` 支持 `word`、`pdf`。
|
||||||
|
2. 新增模板配置 `application_form_templates_v1.yaml`,先录入注册证格式 `.docx` 的已识别字段。
|
||||||
|
3. 将两个 `.doc` 模板转换为 `.docx` 工作模板,或在配置中标记为待转换模板。
|
||||||
|
4. 实现 `TemplateSelectionService`,支持用户指定模板、注册类型识别和默认模板选择。
|
||||||
|
5. 实现规则/正则与 LLM 并行字段抽取,并保存 `field_extract_result.json`。
|
||||||
|
6. 实现 `FieldMergeService`,按说明书优先规则处理冲突。
|
||||||
|
7. 实现 `WordTemplateFillService`,优先支持表格行填充和冲突高亮。
|
||||||
|
8. 实现追溯清单 Excel 导出和 Word 下载记录。
|
||||||
|
9. 改造前端工作流卡片,新增 `application_form_fill` 类型。
|
||||||
|
10. 接入飞书通知摘要。
|
||||||
|
11. 将 PDF 转换、逐页版式 QA 和更完整的 `.doc` 模板转换纳入后续待办。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、待办与待确认事项
|
||||||
|
|
||||||
|
| 序号 | 项目 | 当前建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | PDF 转换 | 放入待办,Demo 优先 Word 下载 |
|
||||||
|
| 2 | `.doc` 模板转换 | 优先 LibreOffice/soffice;不可用时预置 `.docx` 工作模板 |
|
||||||
|
| 3 | 安全和性能基本原则清单条目拆解 | 需转换模板后补齐 YAML 条目配置 |
|
||||||
|
| 4 | LLM 结构化抽取提示词 | 需约束输出 JSON schema 和置信度 |
|
||||||
|
| 5 | 飞书通知渠道 | Demo 可 mock 或 CLI,正式版接 Webhook/API |
|
||||||
|
| 6 | 低置信度阈值 | 建议功能实现阶段先配置为 0.75 |
|
||||||
|
| 7 | 版式验证 | P1 增加 PDF 页数检查和逐页截图 QA |
|
||||||
790
docs/3.详细设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
790
docs/3.详细设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,790 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表详细设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 依赖详细设计 | docs/3.详细设计/1.自动汇总.md;docs/3.详细设计/2.NMPA注册资料法规核查与整改闭环.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、详细设计目标
|
||||||
|
|
||||||
|
本详细设计用于指导“产品关键信息提取与申报文件自动填表”功能开发落地,覆盖代码结构、数据库模型、模板配置、独立工作流、字段抽取、字段合并、Word 模板填充、追溯清单导出、飞书通知、接口契约、前端卡片、异常降级和测试建议。
|
||||||
|
|
||||||
|
核心约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 使用 `workflow_type=application_form_fill`,拥有独立批次和卡片 |
|
||||||
|
| 对话触发 | 由用户自然语言触发,可指定模板;未指定时按注册类型选择适用模板 |
|
||||||
|
| 文件来源复用 | 默认使用当前对话最近成功的 `FileSummaryBatch`;本次带附件时先执行自动汇总 |
|
||||||
|
| 模板配置驱动 | 模板路径、字段映射、适用条件写入 `application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| Word 优先 | Demo 阶段主链路只要求生成 Word 和追溯清单 |
|
||||||
|
| PDF 待办 | PDF 转换节点保留,但本期可标记 skipped 并写入待办计划 |
|
||||||
|
| 抽取并行 | 规则/正则抽取与 LLM 结构化抽取并行执行,再统一合并 |
|
||||||
|
| 冲突可见 | 说明书优先;冲突字段写入 Word 时黄底红字,并在对话框展示摘要 |
|
||||||
|
| 过程留底 | 规则抽取、LLM 抽取、合并结果、冲突和追溯清单均保存产物 |
|
||||||
|
| 飞书通知 | 填表完成后通知上传人,通知失败不阻断下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、代码结构设计
|
||||||
|
|
||||||
|
### 2.1 目录结构
|
||||||
|
|
||||||
|
第三批独立为 `review_agent/application_form_fill/` 模块。Django 模型仍集中在 `review_agent/models.py`,业务服务放入独立模块。
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
models.py
|
||||||
|
services.py
|
||||||
|
skill_router.py
|
||||||
|
application_form_fill/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
template_config.py
|
||||||
|
template_select.py
|
||||||
|
template_repository.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
word_fill.py
|
||||||
|
traceability_export.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
application_form_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
checklist_extract.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 文件职责
|
||||||
|
|
||||||
|
| 文件 | 职责 |
|
||||||
|
| --- | --- |
|
||||||
|
| application_form_fill/constants.py | 工作流节点、模板编码、状态、输出类型常量 |
|
||||||
|
| application_form_fill/schemas.py | FormFillContext、TemplateSpec、ExtractedField、MergedField 等 dataclass |
|
||||||
|
| application_form_fill/storage.py | 批次工作目录、模板副本、产物保存、hash 计算 |
|
||||||
|
| application_form_fill/workflow.py | FormFillWorkflowExecutor,串行执行独立填表工作流 |
|
||||||
|
| application_form_fill/views.py | 启动、状态查询、后续可选下载或重试接口 |
|
||||||
|
| services/template_config.py | 读取和校验 YAML 模板配置 |
|
||||||
|
| services/template_select.py | 解析用户指定模板、识别注册类型、选择模板 |
|
||||||
|
| services/template_repository.py | 定位原始模板、复制模板、`.doc` 转 `.docx` 预留 |
|
||||||
|
| services/field_extract.py | 规则/正则与 LLM 并行字段抽取 |
|
||||||
|
| services/field_merge.py | 字段归一化、来源排序、冲突识别、最终字段输出 |
|
||||||
|
| services/word_fill.py | 使用 `python-docx` 写入 Word 表格、段落和高亮 |
|
||||||
|
| services/traceability_export.py | 生成 Excel/JSON 追溯清单,创建导出记录 |
|
||||||
|
| services/notifier.py | 包装飞书通知,生成通知记录 |
|
||||||
|
| prompts/field_extract.md | LLM 字段抽取提示词 |
|
||||||
|
| prompts/checklist_extract.md | 安全和性能基本原则清单条目判断提示词 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、依赖设计
|
||||||
|
|
||||||
|
### 3.1 Python 依赖
|
||||||
|
|
||||||
|
| 依赖 | 用途 | 当前项目情况 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Django | Web、ORM、权限 | 已使用 |
|
||||||
|
| python-docx | Word 模板读取、表格填充、字体和底色设置 | 已在项目依赖链中使用 |
|
||||||
|
| openpyxl | 字段来源追溯清单 Excel 导出 | 已使用 |
|
||||||
|
| PyYAML | YAML 模板配置读取 | 已用于法规规则 |
|
||||||
|
| pypdf / python-pptx | 文本抽取链路复用 | 已使用 |
|
||||||
|
| LibreOffice/soffice | `.doc` 转 `.docx`、PDF 转换 | 本期非强依赖,后续待办 |
|
||||||
|
|
||||||
|
### 3.2 技术边界
|
||||||
|
|
||||||
|
| 能力 | 本期实现 | 后续增强 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `.docx` 模板填充 | 必须支持 | 支持内容控件、复杂 OOXML patch |
|
||||||
|
| `.doc` 模板处理 | 可通过预转换模板或标记失败 | 自动 LibreOffice 转换 |
|
||||||
|
| PDF 转换 | 可跳过并提示待生成 | LibreOffice 转 PDF + 视觉 QA |
|
||||||
|
| 字段级入库 | 不做 | 新增字段明细表和在线编辑 |
|
||||||
|
| LLM 抽取 | 输出 JSON 并留底 | 增加置信度校准和人工确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、数据模型详细设计
|
||||||
|
|
||||||
|
模型放在 `review_agent/models.py`。
|
||||||
|
|
||||||
|
### 4.1 ApplicationFormFillBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ApplicationFormFillBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| conversation | 绑定对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_summary_batch | 文件来源批次 |
|
||||||
|
| source_regulatory_batch | 可选法规核查批次 |
|
||||||
|
| batch_no | `AFF-YYYYMMDDHHMMSS-abcdef` |
|
||||||
|
| requested_templates | 用户指定模板 |
|
||||||
|
| selected_templates | 实际生成模板 |
|
||||||
|
| output_types | 本次请求输出类型,Demo 默认 `["word", "excel", "json"]` |
|
||||||
|
| registration_type | 识别出的注册类型 |
|
||||||
|
| registration_type_source | 注册类型来源 |
|
||||||
|
| product_name | 产品名称 |
|
||||||
|
| conflict_summary | 冲突摘要 |
|
||||||
|
| risk_notes | 不适用模板、PDF 待生成等提示 |
|
||||||
|
| template_config_version | 模板配置版本 |
|
||||||
|
| template_config_hash | 模板配置 hash |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
|
||||||
|
### 4.2 ApplicationFormFillArtifact
|
||||||
|
|
||||||
|
用于保存过程产物和模板副本元数据。
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ApplicationFormFillArtifact(models.Model):
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
TEMPLATE_COPY = "template_copy", "模板副本"
|
||||||
|
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
|
||||||
|
MERGED_FIELDS = "merged_fields", "字段合并结果"
|
||||||
|
TRACEABILITY = "traceability", "追溯清单"
|
||||||
|
FILLED_TEMPLATE = "filled_template", "已填模板"
|
||||||
|
NOTIFICATION_RECORD = "notification_record", "通知记录"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 ApplicationFormFillNotificationRecord
|
||||||
|
|
||||||
|
通知记录字段与第二批法规通知风格一致,支持重试:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 自动填表批次 |
|
||||||
|
| recipient | 通知对象 |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| template_codes | 涉及模板 |
|
||||||
|
| export_ids | 关联下载文件 |
|
||||||
|
| message_summary | 通知摘要 |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
| retry_count | 重试次数 |
|
||||||
|
| external_message_id | 飞书外部消息 ID |
|
||||||
|
| error_message | 失败原因 |
|
||||||
|
| sent_at | 发送成功时间 |
|
||||||
|
|
||||||
|
### 4.4 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
WORD = "word", "Word"
|
||||||
|
PDF = "pdf", "PDF"
|
||||||
|
```
|
||||||
|
|
||||||
|
填表导出记录使用:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| export_category | filled_template、traceability、extract_result |
|
||||||
|
| export_type | word、excel、json、pdf |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、常量设计
|
||||||
|
|
||||||
|
### 5.1 工作流节点
|
||||||
|
|
||||||
|
```python
|
||||||
|
FORM_FILL_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "form_fill"),
|
||||||
|
("template_select", "选择模板", "form_fill"),
|
||||||
|
("template_copy", "复制模板", "form_fill"),
|
||||||
|
("field_extract", "抽取字段", "form_fill"),
|
||||||
|
("conflict_merge", "冲突归并", "form_fill"),
|
||||||
|
("word_fill", "填写 Word", "form_fill"),
|
||||||
|
("pdf_convert", "转换 PDF", "form_fill"),
|
||||||
|
("trace_export", "追溯清单", "form_fill"),
|
||||||
|
("output_export", "输出下载", "form_fill"),
|
||||||
|
("notify", "飞书通知", "form_fill"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 模板编码
|
||||||
|
|
||||||
|
```python
|
||||||
|
TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate"
|
||||||
|
TEMPLATE_CHANGE_REGISTRATION = "change_registration"
|
||||||
|
TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 触发关键词
|
||||||
|
|
||||||
|
```python
|
||||||
|
FORM_FILL_TRIGGER_KEYWORDS = [
|
||||||
|
"填注册证",
|
||||||
|
"对应的表格",
|
||||||
|
"生成申报模板",
|
||||||
|
"安全和性能基本原则清单",
|
||||||
|
"填到申报模板",
|
||||||
|
"自动填表",
|
||||||
|
"生成表格",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、核心数据结构
|
||||||
|
|
||||||
|
### 6.1 FormFillContext
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class FormFillContext:
|
||||||
|
batch: ApplicationFormFillBatch
|
||||||
|
source_summary_batch: FileSummaryBatch
|
||||||
|
source_regulatory_batch: RegulatoryReviewBatch | None
|
||||||
|
template_config: dict[str, Any]
|
||||||
|
selected_templates: list["TemplateSpec"]
|
||||||
|
document_texts: dict[str, str]
|
||||||
|
regex_results: dict[str, Any]
|
||||||
|
llm_results: dict[str, Any]
|
||||||
|
merged_fields: dict[str, "MergedField"]
|
||||||
|
checklist_items: dict[str, Any]
|
||||||
|
conflicts: list[dict[str, Any]]
|
||||||
|
exports: list[ExportedSummaryFile]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 TemplateSpec
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
name: str
|
||||||
|
source_file: str
|
||||||
|
output_label: str
|
||||||
|
applies_when: dict[str, Any]
|
||||||
|
file_format: str
|
||||||
|
fields: list[dict[str, Any]]
|
||||||
|
checklist_items: list[dict[str, Any]]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 ExtractedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ExtractedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
source_role: str
|
||||||
|
evidence: str
|
||||||
|
extractor: str
|
||||||
|
confidence: float
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 MergedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
has_conflict: bool = False
|
||||||
|
conflict_values: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、模板配置详细设计
|
||||||
|
|
||||||
|
### 7.1 配置路径
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 初始配置示例
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: application_form_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
templates:
|
||||||
|
- code: registration_certificate
|
||||||
|
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
|
||||||
|
output_label: 注册证格式
|
||||||
|
applies_when:
|
||||||
|
registration_type: ["首次注册"]
|
||||||
|
file_format: docx
|
||||||
|
fields:
|
||||||
|
- key: applicant_name
|
||||||
|
label: 注册人名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 注册人名称
|
||||||
|
source_roles: ["申请表", "说明书", "企业信息"]
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品名称
|
||||||
|
source_roles: ["说明书", "产品技术要求", "注册检验报告"]
|
||||||
|
- key: intended_use
|
||||||
|
label: 预期用途
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 预期用途
|
||||||
|
source_roles: ["说明书", "临床评价资料", "产品技术要求"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.3 配置校验
|
||||||
|
|
||||||
|
`TemplateConfigService` 启动时校验:
|
||||||
|
|
||||||
|
| 校验项 | 失败处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| version 存在 | 批次 failed |
|
||||||
|
| source_dir 存在 | 批次 failed |
|
||||||
|
| templates 非空 | 批次 failed |
|
||||||
|
| code 唯一 | 批次 failed |
|
||||||
|
| source_file 存在 | 对应模板不可用 |
|
||||||
|
| target.type 支持 | 对应字段跳过并记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、服务详细设计
|
||||||
|
|
||||||
|
### 8.1 TemplateConfigService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def load_template_config() -> dict:
|
||||||
|
"""读取 YAML 模板配置。"""
|
||||||
|
|
||||||
|
def validate_template_config(config: dict) -> list[str]:
|
||||||
|
"""返回配置错误列表。"""
|
||||||
|
|
||||||
|
def compute_config_hash(path: Path) -> str:
|
||||||
|
"""计算模板配置 SHA-256。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 TemplateSelectionService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def parse_requested_templates(message: str) -> list[str]:
|
||||||
|
"""从用户话语中识别指定模板。"""
|
||||||
|
|
||||||
|
def detect_registration_type(batch: ApplicationFormFillBatch, message: str) -> tuple[str, str]:
|
||||||
|
"""按用户话语、法规核查批次、文件抽取结果识别注册类型及来源。"""
|
||||||
|
|
||||||
|
def select_templates(
|
||||||
|
config: dict,
|
||||||
|
requested_templates: list[str],
|
||||||
|
registration_type: str,
|
||||||
|
) -> tuple[list[TemplateSpec], list[dict]]:
|
||||||
|
"""输出模板列表和风险提示。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
注册类型优先级:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户话语明确指定
|
||||||
|
-> source_regulatory_batch.condition_json / confirmed_conditions
|
||||||
|
-> source_summary_batch 文件内容抽取候选
|
||||||
|
-> unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 TemplateRepository
|
||||||
|
|
||||||
|
```python
|
||||||
|
def resolve_source_template(spec: TemplateSpec) -> Path:
|
||||||
|
"""返回原始模板路径或预转换工作模板路径。"""
|
||||||
|
|
||||||
|
def copy_template_to_batch(spec: TemplateSpec, batch: ApplicationFormFillBatch) -> Path:
|
||||||
|
"""复制模板到批次 work_dir/templates。"""
|
||||||
|
|
||||||
|
def convert_doc_to_docx(source: Path, target_dir: Path) -> Path:
|
||||||
|
"""P1 能力:使用 soffice 转 docx。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
`.doc` 模板本期处理:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 存在 working_template docx | 使用工作模板 |
|
||||||
|
| 仅有 `.doc` 且无 soffice | 对应模板失败,其他模板继续 |
|
||||||
|
| 具备 soffice | 转换为 `.docx` 后继续 |
|
||||||
|
|
||||||
|
### 8.4 FieldExtractionService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||||
|
"""复用 text_extract 读取文件文本。"""
|
||||||
|
|
||||||
|
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
|
||||||
|
"""规则/正则抽取字段。"""
|
||||||
|
|
||||||
|
def extract_by_llm(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
|
||||||
|
"""LLM 结构化抽取字段。"""
|
||||||
|
|
||||||
|
def run_parallel_extract(texts: dict[str, str], specs: list[TemplateSpec]) -> tuple[dict, dict]:
|
||||||
|
"""并行执行规则/正则与 LLM 抽取。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
并行实现可使用 `ThreadPoolExecutor(max_workers=2)`。LLM 超时或失败时,保留规则/正则结果继续。
|
||||||
|
|
||||||
|
### 8.5 FieldMergeService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def normalize_field_value(value: str) -> str:
|
||||||
|
"""字段值归一化。"""
|
||||||
|
|
||||||
|
def rank_source(source_role: str, source_file: str) -> int:
|
||||||
|
"""说明书优先,其次产品技术要求、检测报告、性能研究等。"""
|
||||||
|
|
||||||
|
def merge_fields(regex_results: dict, llm_results: dict) -> tuple[dict[str, MergedField], list[dict]]:
|
||||||
|
"""合并字段并输出冲突。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
来源优先级:
|
||||||
|
|
||||||
|
| 排名 | 来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1 | 说明书 |
|
||||||
|
| 2 | 产品技术要求 |
|
||||||
|
| 3 | 注册检验报告/检测报告 |
|
||||||
|
| 4 | 性能研究资料 |
|
||||||
|
| 5 | 其他注册资料 |
|
||||||
|
|
||||||
|
### 8.6 WordTemplateFillService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def fill_template(
|
||||||
|
template_path: Path,
|
||||||
|
output_path: Path,
|
||||||
|
spec: TemplateSpec,
|
||||||
|
fields: dict[str, MergedField],
|
||||||
|
checklist_items: dict[str, Any],
|
||||||
|
) -> Path:
|
||||||
|
"""填充 Word 模板并保存。"""
|
||||||
|
|
||||||
|
def fill_table_row(document: Document, row_label: str, value: str, conflict: bool) -> bool:
|
||||||
|
"""根据表格行首字段名定位并填入第二列。"""
|
||||||
|
|
||||||
|
def replace_placeholders(document: Document, fields: dict[str, MergedField]) -> None:
|
||||||
|
"""替换段落中的 {{field_key}}。"""
|
||||||
|
|
||||||
|
def apply_conflict_style(cell_or_run) -> None:
|
||||||
|
"""应用黄色底色和红色字体。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
冲突样式:
|
||||||
|
|
||||||
|
| 样式 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字体颜色 | 红色 `FF0000` |
|
||||||
|
| 底色 | 黄色 `FFFF00` |
|
||||||
|
| 适用范围 | 单元格或字段值 run |
|
||||||
|
|
||||||
|
### 8.7 TraceabilityExportService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def build_traceability_workbook(batch, merged_fields, conflicts, specs) -> Workbook:
|
||||||
|
"""生成追溯清单 Excel。"""
|
||||||
|
|
||||||
|
def save_traceability_excel(batch, workbook) -> ExportedSummaryFile:
|
||||||
|
"""保存 Excel 并写导出记录。"""
|
||||||
|
|
||||||
|
def save_extract_json(batch, payload: dict) -> ApplicationFormFillArtifact:
|
||||||
|
"""保存字段抽取 JSON 过程产物。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
Excel Sheet:
|
||||||
|
|
||||||
|
| Sheet | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字段追溯 | 模板、字段、填入值、来源文件、证据、冲突状态 |
|
||||||
|
| 冲突字段 | 字段、采用值、冲突值、处理方式 |
|
||||||
|
| 低置信度条目 | 安全和性能基本原则清单候选判断 |
|
||||||
|
| 生成结果 | 模板文件、Word 状态、PDF 状态、错误说明 |
|
||||||
|
|
||||||
|
### 8.8 FormFillNotifier
|
||||||
|
|
||||||
|
```python
|
||||||
|
def notify_completion(batch: ApplicationFormFillBatch, exports: list[ExportedSummaryFile]) -> ApplicationFormFillNotificationRecord:
|
||||||
|
"""发送填表完成通知。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
通知摘要包含:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | 填表批次 |
|
||||||
|
| 产品名称 | 如已识别 |
|
||||||
|
| 生成模板 | 模板名称列表 |
|
||||||
|
| 冲突数量 | 提示需下载核对 |
|
||||||
|
| 下载提示 | 提示回到系统对话下载,不直接暴露敏感全文 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、工作流执行器详细设计
|
||||||
|
|
||||||
|
### 9.1 启动入口
|
||||||
|
|
||||||
|
```python
|
||||||
|
def start_application_form_fill_workflow(batch: ApplicationFormFillBatch, *, async_run: bool = True) -> None:
|
||||||
|
executor = FormFillWorkflowExecutor(batch)
|
||||||
|
if async_run:
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
|
else:
|
||||||
|
executor.run()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9.2 执行伪代码
|
||||||
|
|
||||||
|
```python
|
||||||
|
class FormFillWorkflowExecutor:
|
||||||
|
def run(self) -> None:
|
||||||
|
self.mark_batch_running()
|
||||||
|
try:
|
||||||
|
for node in self.nodes():
|
||||||
|
if node.status == "success":
|
||||||
|
continue
|
||||||
|
self.run_node(node)
|
||||||
|
self.complete_or_partial()
|
||||||
|
except WorkflowPausedForUser:
|
||||||
|
self.mark_waiting_user()
|
||||||
|
except Exception as exc:
|
||||||
|
self.mark_failed(exc)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9.3 节点处理要点
|
||||||
|
|
||||||
|
| 节点 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 校验 `source_summary_batch` 成功且属于当前对话 |
|
||||||
|
| template_select | 读取 YAML、识别注册类型、选择模板 |
|
||||||
|
| template_copy | 复制模板到 `work_dir/templates` |
|
||||||
|
| field_extract | 抽取文本,规则/正则与 LLM 并行,保存 JSON |
|
||||||
|
| conflict_merge | 合并字段,写 `conflict_summary` |
|
||||||
|
| word_fill | 逐模板生成 Word,写 `ExportedSummaryFile(word)` |
|
||||||
|
| pdf_convert | 本期 skipped,写 `risk_notes` |
|
||||||
|
| trace_export | 生成追溯 Excel 和 JSON |
|
||||||
|
| output_export | 生成 AI 对话 Markdown 摘要 |
|
||||||
|
| notify | 写飞书通知记录,失败不阻断 |
|
||||||
|
| completed | 标记 success 或 partial_success |
|
||||||
|
|
||||||
|
### 9.4 批次状态决策
|
||||||
|
|
||||||
|
| 条件 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 所有目标 Word 均成功,追溯清单成功,通知成功或跳过 | success |
|
||||||
|
| 至少一个 Word 成功,但部分模板、追溯清单、PDF 或通知失败 | partial_success |
|
||||||
|
| 所有目标 Word 均失败 | failed |
|
||||||
|
| 无来源文件汇总批次 | waiting_user |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、接口详细设计
|
||||||
|
|
||||||
|
### 10.1 发起自动填表
|
||||||
|
|
||||||
|
```text
|
||||||
|
POST /api/review-agent/application-form-fill/start/
|
||||||
|
```
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
| 参数 | 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| conversation_id | integer | 是 | 当前对话 |
|
||||||
|
| message_id | integer | 否 | 触发消息 |
|
||||||
|
| file_summary_batch_id | integer | 否 | 指定文件来源批次 |
|
||||||
|
| template_codes | array | 否 | 指定模板 |
|
||||||
|
| output_types | array | 否 | 输出类型,默认 word、excel、json |
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch_id": 3001,
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"status": "pending",
|
||||||
|
"selected_templates": ["registration_certificate", "essential_principles"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.2 查询状态
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/application-form-fill/{batch_id}/
|
||||||
|
```
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": 3001,
|
||||||
|
"batch_no": "AFF-20260607153000-a1b2c3",
|
||||||
|
"status": "success",
|
||||||
|
"product_name": "甲胎蛋白检测试剂盒",
|
||||||
|
"selected_templates": ["registration_certificate"]
|
||||||
|
},
|
||||||
|
"nodes": [],
|
||||||
|
"conflicts": [],
|
||||||
|
"exports": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.3 下载文件
|
||||||
|
|
||||||
|
继续复用既有导出下载接口:
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/file-summary/exports/{export_id}/download/
|
||||||
|
```
|
||||||
|
|
||||||
|
下载权限通过 `workflow_type=application_form_fill` 和 `workflow_batch_id` 反查填表批次。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、前端详细设计
|
||||||
|
|
||||||
|
### 11.1 工作流卡片
|
||||||
|
|
||||||
|
新增卡片类型 `application_form_fill`。
|
||||||
|
|
||||||
|
| 节点 | 展示 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 准备资料 |
|
||||||
|
| template_select | 选择模板 |
|
||||||
|
| template_copy | 复制模板 |
|
||||||
|
| field_extract | 抽取字段 |
|
||||||
|
| conflict_merge | 冲突归并 |
|
||||||
|
| word_fill | 填写 Word |
|
||||||
|
| pdf_convert | 转换 PDF |
|
||||||
|
| trace_export | 追溯清单 |
|
||||||
|
| output_export | 输出下载 |
|
||||||
|
| notify | 飞书通知 |
|
||||||
|
| completed | 已完成 |
|
||||||
|
|
||||||
|
PDF 本期显示为“已跳过/待增强”,不显示为失败。
|
||||||
|
|
||||||
|
### 11.2 AI 回复摘要
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 待增强 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 待增强 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
|
||||||
|
[下载字段来源追溯清单](download-url)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、异常与降级
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 无成功汇总批次 | 批次 waiting_user,对话提示上传资料 |
|
||||||
|
| 模板配置不存在 | 批次 failed |
|
||||||
|
| 指定模板不存在 | 忽略无效模板并提示;若无有效模板则 failed |
|
||||||
|
| `.doc` 模板无可用工作模板 | 该模板失败,其他模板继续 |
|
||||||
|
| 文本抽取失败 | 对应文件跳过,记录在追溯清单 |
|
||||||
|
| LLM 抽取失败 | 使用规则/正则结果继续 |
|
||||||
|
| 字段缺失 | Word 留空 |
|
||||||
|
| 字段冲突 | 说明书优先并高亮 |
|
||||||
|
| 追溯清单失败 | Word 成功时批次 partial_success |
|
||||||
|
| 飞书通知失败 | 批次 partial_success 或 success,取决于核心产物是否成功 |
|
||||||
|
| PDF 未实现 | 节点 skipped,写入待增强提示 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、测试设计
|
||||||
|
|
||||||
|
### 13.1 单元测试
|
||||||
|
|
||||||
|
| 用例 | 目标 |
|
||||||
|
| --- | --- |
|
||||||
|
| test_form_fill_trigger_keywords | 触发语句识别为自动填表 |
|
||||||
|
| test_template_config_loads | YAML 配置可加载并校验 |
|
||||||
|
| test_select_default_templates_initial_registration | 首次注册默认选择注册证和基本原则清单 |
|
||||||
|
| test_select_user_requested_mismatch | 用户指定不适用模板仍允许生成并提示 |
|
||||||
|
| test_field_merge_prefers_instructions | 说明书字段优先 |
|
||||||
|
| test_field_merge_marks_conflict | 冲突字段进入 conflict_summary |
|
||||||
|
| test_word_fill_table_row | 能按表格行名写入 Word |
|
||||||
|
| test_word_fill_conflict_highlight | 冲突字段黄底红字 |
|
||||||
|
| test_traceability_excel | 追溯清单包含字段、来源和冲突 |
|
||||||
|
| test_notify_records_failure | 飞书失败写通知记录但不阻断 |
|
||||||
|
|
||||||
|
### 13.2 集成测试
|
||||||
|
|
||||||
|
| 场景 | 验证 |
|
||||||
|
| --- | --- |
|
||||||
|
| 最近汇总批次触发填表 | 无附件时复用最近 success `FileSummaryBatch` |
|
||||||
|
| 新附件触发填表 | 先自动汇总再启动填表 |
|
||||||
|
| 注册证模板填充 | 生成 Word 导出文件 |
|
||||||
|
| LLM 失败降级 | LLM 超时后规则抽取仍可生成 Word |
|
||||||
|
| 部分模板失败 | 至少一个 Word 成功时批次 partial_success |
|
||||||
|
| 权限隔离 | 不能查询或下载他人填表批次产物 |
|
||||||
|
|
||||||
|
### 13.3 前端验证
|
||||||
|
|
||||||
|
| 场景 | 验证 |
|
||||||
|
| --- | --- |
|
||||||
|
| 自动填表卡片 | 节点状态随 SSE 更新 |
|
||||||
|
| 指定模板展示 | 卡片展示本次选择模板 |
|
||||||
|
| PDF 跳过显示 | PDF 节点显示待增强而非失败 |
|
||||||
|
| 下载链接 | Word 和追溯清单链接可点击下载 |
|
||||||
|
| 冲突摘要 | 冲突字段表格正常渲染 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、实施顺序建议
|
||||||
|
|
||||||
|
1. 修改功能设计中的模板配置路径为 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`。
|
||||||
|
2. 新增数据库模型和 `ExportedSummaryFile.ExportType` 扩展。
|
||||||
|
3. 新增 `application_form_fill` 模块目录和常量、schemas、storage。
|
||||||
|
4. 新增模板配置 YAML,先录入注册证 `.docx` 的已识别字段。
|
||||||
|
5. 实现模板选择、模板复制和 Word 表格行填充。
|
||||||
|
6. 实现规则/正则字段抽取和 LLM 抽取降级。
|
||||||
|
7. 实现字段合并、冲突高亮和追溯清单。
|
||||||
|
8. 实现工作流执行器、节点事件和状态接口。
|
||||||
|
9. 改造路由和前端工作流卡片。
|
||||||
|
10. 接入飞书通知记录。
|
||||||
|
11. 将字段级数据库表和 PDF 转换写入待办计划。
|
||||||
433
docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
433
docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,433 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表数据库设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库类型 | SQLite / Django ORM |
|
||||||
|
| 表名前缀 | ra_ |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立填表批次 | 自动填表作为独立工作流,使用独立批次表,不强绑法规核查批次 |
|
||||||
|
| 复用文件来源 | 填表批次必须关联一个成功的 `FileSummaryBatch`,不重复保存文件清单 |
|
||||||
|
| 可选复用法规条件 | 如当前对话已有已确认法规核查批次,可通过可空外键复用注册类型等条件 |
|
||||||
|
| 导出记录复用 | Word、Excel、JSON、PDF 等下载文件继续进入 `ExportedSummaryFile` |
|
||||||
|
| 过程产物独立 | 自动填表过程产物单独建表,避免和法规核查 `RegulatoryArtifact` 混用 |
|
||||||
|
| 通知记录独立 | 自动填表飞书通知单独建表,字段风格与法规通知记录保持一致 |
|
||||||
|
| 大文本不入库 | 字段抽取 JSON、追溯清单和模板副本保存为文件,数据库仅保存路径、hash 和摘要 |
|
||||||
|
| 字段明细暂不入库 | 本期不新增字段级明细表;字段结果保存在 JSON/Excel 产物与批次摘要中 |
|
||||||
|
| SQLite 兼容 | 字段类型、索引和约束优先保证当前 SQLite + Django ORM 可运行 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、ER 图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
AUTH_USER ||--o{ CONVERSATION : owns
|
||||||
|
CONVERSATION ||--o{ RA_FILE_SUMMARY_BATCH : has
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_FILE_SUMMARY_ITEM : produces
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : feeds
|
||||||
|
RA_REGULATORY_REVIEW_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : optionally_confirms
|
||||||
|
AUTH_USER ||--o{ RA_APPLICATION_FORM_FILL_BATCH : runs
|
||||||
|
CONVERSATION ||--o{ RA_APPLICATION_FORM_FILL_BATCH : has
|
||||||
|
MESSAGE ||--o{ RA_APPLICATION_FORM_FILL_BATCH : triggers
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_ARTIFACT : keeps
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_NOTIFICATION_RECORD : sends
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_EXPORTED_SUMMARY_FILE : exports
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_NODE_RUN : tracks
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_EVENT : emits
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:`ra_workflow_node_run`、`ra_workflow_event`、`ra_exported_summary_file` 已在第二批中被通用化,通过 `workflow_type` 与 `workflow_batch_id` 支持多工作流。本功能使用 `workflow_type=application_form_fill`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、表结构设计
|
||||||
|
|
||||||
|
### 3.1 ra_application_form_fill_batch
|
||||||
|
|
||||||
|
一次自动填表工作流批次。该表记录本次触发来源、选择模板、输出类型、注册类型、产品名称、冲突摘要、工作目录和状态。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| conversation_id | ForeignKey | bigint | 是 | 绑定对话 |
|
||||||
|
| user_id | ForeignKey | bigint | 是 | 发起用户 |
|
||||||
|
| trigger_message_id | ForeignKey | bigint | 否 | 触发填表工作流的用户消息 |
|
||||||
|
| source_summary_batch_id | ForeignKey | bigint | 是 | 文件来源汇总批次 |
|
||||||
|
| source_regulatory_batch_id | ForeignKey | bigint | 否 | 可选,复用已确认法规核查批次条件 |
|
||||||
|
| batch_no | CharField(64) | varchar(64) | 是 | 填表批次编号,唯一 |
|
||||||
|
| status | CharField(30) | varchar(30) | 是 | pending、running、waiting_user、success、partial_success、failed、cancelled |
|
||||||
|
| requested_templates | JSONField | text/json | 是 | 用户指定模板编码列表;未指定为空数组 |
|
||||||
|
| selected_templates | JSONField | text/json | 是 | 系统实际选择模板编码列表 |
|
||||||
|
| output_types | JSONField | text/json | 是 | 请求输出类型,如 word、excel、json、pdf |
|
||||||
|
| registration_type | CharField(80) | varchar(80) | 否 | 识别出的注册类型 |
|
||||||
|
| registration_type_source | CharField(40) | varchar(40) | 否 | user_message、regulatory_batch、file_extract、unknown |
|
||||||
|
| product_name | CharField(200) | varchar(200) | 否 | 产品名称 |
|
||||||
|
| conflict_summary | JSONField | text/json | 是 | 冲突字段摘要 |
|
||||||
|
| risk_notes | JSONField | text/json | 是 | 不适用模板、低置信度、PDF 待生成等提示 |
|
||||||
|
| template_config_version | CharField(80) | varchar(80) | 否 | 模板配置版本 |
|
||||||
|
| template_config_hash | CharField(128) | varchar(128) | 否 | 模板配置文件 hash |
|
||||||
|
| work_dir | CharField(500) | varchar(500) | 否 | 批次工作目录 |
|
||||||
|
| error_message | TextField | text | 否 | 批次异常说明 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| started_at | DateTimeField | datetime | 否 | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | datetime | 否 | 完成时间 |
|
||||||
|
| archived_at | DateTimeField | datetime | 否 | 归档时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
唯一约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| uq_ra_aff_batch_no | batch_no |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_batch_conv_status | conversation_id, status | 查询对话下填表批次状态 |
|
||||||
|
| idx_ra_aff_batch_summary | source_summary_batch_id | 根据文件汇总批次查询填表历史 |
|
||||||
|
| idx_ra_aff_batch_regulatory | source_regulatory_batch_id | 根据法规核查批次查询关联填表历史 |
|
||||||
|
| idx_ra_aff_batch_user_created | user_id, created_at | 查询用户发起记录 |
|
||||||
|
| idx_ra_aff_batch_created | created_at | 按创建时间查询 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.2 ra_application_form_fill_artifact
|
||||||
|
|
||||||
|
自动填表过程产物表。仅保存文件元数据,不保存字段抽取大 JSON 的全文。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
|
||||||
|
| artifact_type | CharField(60) | varchar(60) | 是 | template_copy、field_extract_result、merged_fields、traceability、filled_template、notification_record |
|
||||||
|
| file_format | CharField(20) | varchar(20) | 是 | json、excel、docx、pdf、markdown |
|
||||||
|
| name | CharField(160) | varchar(160) | 是 | 产物名称 |
|
||||||
|
| file_name | CharField(255) | varchar(255) | 是 | 文件名 |
|
||||||
|
| storage_path | CharField(500) | varchar(500) | 是 | 存储路径 |
|
||||||
|
| file_size | BigIntegerField | bigint | 是 | 文件大小 |
|
||||||
|
| content_hash | CharField(128) | varchar(128) | 是 | 文件 SHA-256 hash |
|
||||||
|
| metadata | JSONField | text/json | 是 | 模板编码、输出类型、生成状态、错误摘要等 |
|
||||||
|
| created_by_node | CharField(60) | varchar(60) | 否 | 产生该产物的节点 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_artifact_batch_type | batch_id, artifact_type | 查询批次过程产物 |
|
||||||
|
| idx_ra_aff_artifact_format | file_format | 按文件格式查询 |
|
||||||
|
| idx_ra_aff_artifact_created | created_at | 按时间追溯 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.3 ra_application_form_fill_notification_record
|
||||||
|
|
||||||
|
自动填表飞书通知记录表。通知失败不阻断文件下载,但需要留痕和支持后续重试。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
|
||||||
|
| recipient_id | ForeignKey(User) | bigint | 是 | 通知对象,默认上传人/发起人 |
|
||||||
|
| channel | CharField(30) | varchar(30) | 是 | feishu_cli、feishu_api、mock |
|
||||||
|
| template_codes | JSONField | text/json | 是 | 本次通知涉及模板 |
|
||||||
|
| export_ids | JSONField | text/json | 是 | 本次通知关联导出文件 ID |
|
||||||
|
| message_summary | TextField | text | 是 | 通知摘要 |
|
||||||
|
| send_status | CharField(20) | varchar(20) | 是 | pending、success、failed |
|
||||||
|
| retry_count | PositiveIntegerField | integer | 是 | 已重试次数 |
|
||||||
|
| external_message_id | CharField(120) | varchar(120) | 否 | 飞书外部消息 ID |
|
||||||
|
| error_message | TextField | text | 否 | 失败原因 |
|
||||||
|
| sent_at | DateTimeField | datetime | 否 | 发送成功时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_notify_batch | batch_id, created_at | 查询批次通知记录 |
|
||||||
|
| idx_ra_aff_notify_recipient | recipient_id, send_status | 查询用户通知状态 |
|
||||||
|
| idx_ra_aff_notify_status | send_status, retry_count | 查询待重试通知 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、既有表扩展
|
||||||
|
|
||||||
|
### 4.1 ra_exported_summary_file
|
||||||
|
|
||||||
|
继续复用导出文件表,需扩展导出类型。
|
||||||
|
|
||||||
|
| 字段/枚举 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type | 增加 `word`、`pdf` |
|
||||||
|
| workflow_type | 使用 `application_form_fill` |
|
||||||
|
| workflow_batch_id | 记录 `ApplicationFormFillBatch.id` |
|
||||||
|
| export_category | 使用 `filled_template`、`traceability`、`extract_result` |
|
||||||
|
|
||||||
|
导出类型枚举:
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| markdown | Markdown | 既有报告 |
|
||||||
|
| excel | Excel | 追溯清单 |
|
||||||
|
| json | JSON | 字段抽取结果包 |
|
||||||
|
| word | Word | 填好的 Word 模板 |
|
||||||
|
| pdf | PDF | Word 转换后的 PDF,P1 预留 |
|
||||||
|
|
||||||
|
### 4.2 ra_workflow_node_run
|
||||||
|
|
||||||
|
本功能使用通用工作流字段:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| node_group | form_fill |
|
||||||
|
| batch_id | 可为空或兼容性填充 source_summary_batch_id |
|
||||||
|
|
||||||
|
### 4.3 ra_workflow_event
|
||||||
|
|
||||||
|
本功能事件写入:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| conversation_id | 当前对话 ID |
|
||||||
|
| payload | 节点状态、模板列表、冲突数量、导出文件等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、枚举设计
|
||||||
|
|
||||||
|
### 5.1 ApplicationFormFillBatch.status
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| pending | 待执行 | 批次已创建,等待执行 |
|
||||||
|
| running | 执行中 | 工作流正在执行 |
|
||||||
|
| waiting_user | 等待用户 | 缺少文件汇总批次或关键条件 |
|
||||||
|
| success | 成功 | Word 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | 部分成功 | 部分模板、PDF、追溯清单或通知失败 |
|
||||||
|
| failed | 失败 | 所有目标 Word 模板均生成失败 |
|
||||||
|
| cancelled | 已取消 | 用户或系统取消执行 |
|
||||||
|
|
||||||
|
### 5.2 artifact_type
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| field_extract_result | 规则/正则与 LLM 抽取原始结果 |
|
||||||
|
| merged_fields | 合并后的最终字段和冲突 |
|
||||||
|
| traceability | 字段来源追溯清单 |
|
||||||
|
| filled_template | 已填写模板 |
|
||||||
|
| notification_record | 通知记录产物 |
|
||||||
|
|
||||||
|
### 5.3 registration_type_source
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| user_message | 用户话语明确指定 |
|
||||||
|
| regulatory_batch | 复用已确认法规核查条件 |
|
||||||
|
| file_extract | 从文件内容抽取 |
|
||||||
|
| unknown | 未识别 |
|
||||||
|
|
||||||
|
### 5.4 通知枚举
|
||||||
|
|
||||||
|
| 字段 | value |
|
||||||
|
| --- | --- |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、JSON 字段结构建议
|
||||||
|
|
||||||
|
### 6.1 requested_templates / selected_templates
|
||||||
|
|
||||||
|
```json
|
||||||
|
["registration_certificate", "essential_principles"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 output_types
|
||||||
|
|
||||||
|
```json
|
||||||
|
["word", "excel", "json"]
|
||||||
|
```
|
||||||
|
|
||||||
|
PDF 作为 P1 预留,可在后续加入:
|
||||||
|
|
||||||
|
```json
|
||||||
|
["word", "pdf", "excel", "json"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 conflict_summary
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"field_key": "storage_condition",
|
||||||
|
"field_label": "产品储存条件及有效期",
|
||||||
|
"selected_value": "2-8℃保存,有效期12个月",
|
||||||
|
"selected_source": "说明书.docx",
|
||||||
|
"conflict_values": [
|
||||||
|
{
|
||||||
|
"value": "-20℃保存",
|
||||||
|
"source_file": "产品技术要求.docx",
|
||||||
|
"evidence": "储存条件:-20℃保存"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"handling": "说明书优先,模板内黄底红字高亮"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 risk_notes
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "template_registration_mismatch",
|
||||||
|
"message": "用户指定变更注册(备案)文件,但系统识别注册类型为首次注册,需人工确认。"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "pdf_pending",
|
||||||
|
"message": "PDF 转换为后续增强项,本次优先生成 Word。"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.5 artifact.metadata
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_code": "registration_certificate",
|
||||||
|
"output_type": "word",
|
||||||
|
"node_code": "word_fill",
|
||||||
|
"status": "success",
|
||||||
|
"conflict_count": 2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、存储路径设计
|
||||||
|
|
||||||
|
自动填表工作目录按用户、对话和批次隔离:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/application_form_fill/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
```
|
||||||
|
|
||||||
|
目录结构:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/application_form_fill/12/1001/AFF-20260607153000-a1b2c3/
|
||||||
|
templates/
|
||||||
|
registration_certificate.source.docx
|
||||||
|
essential_principles.source.docx
|
||||||
|
filled/
|
||||||
|
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-注册证格式.docx
|
||||||
|
exports/
|
||||||
|
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-字段来源追溯清单.xlsx
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
notifications/
|
||||||
|
notification_record.json
|
||||||
|
```
|
||||||
|
|
||||||
|
所有产物写入 `ApplicationFormFillArtifact` 时必须记录 SHA-256 hash。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、权限与查询规则
|
||||||
|
|
||||||
|
### 8.1 批次访问权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
ApplicationFormFillBatch -> conversation -> user
|
||||||
|
必须等于当前 request.user
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 导出下载权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
ExportedSummaryFile.workflow_type == application_form_fill
|
||||||
|
-> workflow_batch_id
|
||||||
|
-> ApplicationFormFillBatch.conversation.user
|
||||||
|
```
|
||||||
|
|
||||||
|
若 `workflow_type=file_summary` 或 `regulatory_review`,仍按既有逻辑校验。
|
||||||
|
|
||||||
|
### 8.3 文件读取权限
|
||||||
|
|
||||||
|
自动填表只能读取 `source_summary_batch.items` 对应的文件,不允许从其他对话或其他批次随意读取文件。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、字段级数据库表暂缓说明
|
||||||
|
|
||||||
|
本期不新增 `ApplicationFormFillField` 字段级明细表。原因:
|
||||||
|
|
||||||
|
| 原因 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| Demo 主链路更轻 | 字段结果以 JSON 和 Excel 追溯清单即可满足下载复核 |
|
||||||
|
| 避免过早建模 | 字段结构依赖模板配置和后续人工修改交互,暂不固化表结构 |
|
||||||
|
| 查询需求有限 | 本期主要按批次下载文件,不做字段级统计和在线编辑 |
|
||||||
|
|
||||||
|
后续如需要在线确认、人工修改、字段级审计或批量统计,再新增字段级表。该事项写入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、Django Model 命名建议
|
||||||
|
|
||||||
|
| 表名 | Model 名称 |
|
||||||
|
| --- | --- |
|
||||||
|
| ra_application_form_fill_batch | ApplicationFormFillBatch |
|
||||||
|
| ra_application_form_fill_artifact | ApplicationFormFillArtifact |
|
||||||
|
| ra_application_form_fill_notification_record | ApplicationFormFillNotificationRecord |
|
||||||
|
|
||||||
|
建议模型仍集中放在 `review_agent/models.py`,与前两批现有模型保持一致;业务逻辑放在 `review_agent/application_form_fill/`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、验收检查点
|
||||||
|
|
||||||
|
| 序号 | 检查项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 独立批次 | 触发填表后生成 `ApplicationFormFillBatch` |
|
||||||
|
| 2 | 文件来源 | 每个填表批次都关联一个成功的 `FileSummaryBatch` |
|
||||||
|
| 3 | 可选法规条件 | 如有关联法规核查批次,可记录 `source_regulatory_batch` |
|
||||||
|
| 4 | 过程产物 | 字段抽取 JSON、合并结果、追溯清单、模板副本均可留底 |
|
||||||
|
| 5 | 导出复用 | 填好的 Word 和追溯清单进入 `ExportedSummaryFile` |
|
||||||
|
| 6 | 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word`、`pdf` |
|
||||||
|
| 7 | 通知记录 | 飞书通知记录能保存状态、重试次数、失败原因 |
|
||||||
|
| 8 | 权限隔离 | A 对话的填表批次和导出文件不能被 B 对话访问 |
|
||||||
|
| 9 | 字段表暂缓 | 字段级结果不入库,但能从 JSON/Excel 追溯产物复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、开发顺序建议
|
||||||
|
|
||||||
|
1. 扩展 `ExportedSummaryFile.ExportType`,增加 `word`、`pdf`。
|
||||||
|
2. 新增 `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord`。
|
||||||
|
3. 为新增状态字段定义 Django `TextChoices`。
|
||||||
|
4. 配置表名、索引和唯一约束。
|
||||||
|
5. 执行 `python manage.py makemigrations review_agent` 和 `python manage.py migrate`。
|
||||||
|
6. 编写模型测试,覆盖批次创建、产物 hash、通知重试字段、导出权限查询。
|
||||||
|
7. 将字段级数据库表和 PDF 转换能力写入待办计划。
|
||||||
@@ -7,6 +7,7 @@
|
|||||||
```text
|
```text
|
||||||
适用条件对话选择框
|
适用条件对话选择框
|
||||||
-> waiting_user 暂停恢复
|
-> waiting_user 暂停恢复
|
||||||
|
-> 附件 4 申报资料目录规则对齐
|
||||||
-> 整包复核
|
-> 整包复核
|
||||||
-> 缺失项复核
|
-> 缺失项复核
|
||||||
-> mock 通知留痕
|
-> mock 通知留痕
|
||||||
@@ -23,7 +24,7 @@
|
|||||||
| 阶段 | 名称 | 目标 | 验收 |
|
| 阶段 | 名称 | 目标 | 验收 |
|
||||||
| --- | --- | --- | --- |
|
| --- | --- | --- | --- |
|
||||||
| RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 |
|
| RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 |
|
||||||
| RR2-2 | 核查能力增强 | 扩展章节、一致性、RAG 引用和文本抽取范围 | 复杂样例可识别更多问题 |
|
| RR2-2 | 附件 4 规则对齐与核查能力增强 | 按《体外诊断试剂注册申报资料要求及说明》扩展完整目录规则、章节、一致性、RAG 引用和文本抽取范围 | 能识别附件 4 一级/二级目录缺失和关键字段问题 |
|
||||||
| RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 |
|
| RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 |
|
||||||
| RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record |
|
| RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record |
|
||||||
| RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 |
|
| RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 |
|
||||||
@@ -71,29 +72,89 @@ pytest tests/test_regulatory_condition.py tests/test_regulatory_frontend.py test
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 四、RR2-2 核查能力增强
|
## 四、RR2-2 附件 4 规则对齐与核查能力增强
|
||||||
|
|
||||||
|
### 新增口径:附件 4 必须结构化入规则库
|
||||||
|
|
||||||
|
第一批主链路已经可以演示,但现有 Demo YAML 只覆盖 5 类规则:产品技术要求、说明书、注册检验报告、临床评价资料、安全和性能基本原则清单。经人工确认,第一批链路可通过;但与附件《体外诊断试剂注册申报资料要求及说明》相比,规则覆盖仍不完整。第二批 RR2-2 必须将附件 4 的申报资料目录结构补入规则库,并作为完整性和章节核查的主要依据。
|
||||||
|
|
||||||
|
附件来源:
|
||||||
|
|
||||||
|
```text
|
||||||
|
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
|
||||||
|
```
|
||||||
|
|
||||||
|
如附件仍为旧版 `.doc`,允许在开发阶段通过 Pandoc、LibreOffice headless、Word COM 或受控脚本转换为 `.docx`/`.txt` 中间产物;中间产物只用于规则抽取和测试夹具,不改变第一阶段文件页数统计口径。
|
||||||
|
|
||||||
|
### 附件 4 目录覆盖范围
|
||||||
|
|
||||||
|
第二批 Demo 规则至少覆盖以下一级和二级标题。规则应支持“章节目录”类目录项、资料文件项、条件适用项和推荐项的区分。
|
||||||
|
|
||||||
|
| 一级目录 | 二级目录/资料项 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1. 监管信息 | 1.1 章节目录、1.2 申请表、1.3 术语/缩写词列表、1.4 产品列表、1.5 关联文件、1.6 申报前与监管机构的联系情况和沟通记录、1.7 符合性声明 |
|
||||||
|
| 2. 综述资料 | 2.1 章节目录、2.2 概述、2.3 产品描述、2.4 预期用途、2.5 申报产品上市历史、2.6 其他需说明的内容 |
|
||||||
|
| 3. 非临床资料 | 3.1 章节目录、3.2 产品风险管理资料、3.3 体外诊断试剂安全和性能基本原则清单、3.4 产品技术要求及检验报告、3.5 分析性能研究、3.6 稳定性研究、3.7 阳性判断值或参考区间研究、3.8 其他资料 |
|
||||||
|
| 4. 临床评价资料 | 4.1 章节目录、4.2 临床评价资料 |
|
||||||
|
| 5. 产品说明书和标签样稿 | 5.1 章节目录、5.2 产品说明书、5.3 标签样稿、5.4 其他资料 |
|
||||||
|
| 6. 质量管理体系文件 | 6.1 综述、6.2 章节目录、6.3 生产制造信息、6.4 质量管理体系程序、6.5 管理职责程序、6.6 资源管理程序、6.7 产品实现程序、6.8 质量管理体系的测量/分析和改进程序、6.9 其他质量体系程序信息、6.10 质量管理体系核查文件 |
|
||||||
|
|
||||||
|
### 规则分级默认值
|
||||||
|
|
||||||
|
| 规则类型 | 默认风险 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 一级目录整体缺失 | high | 如缺少“监管信息”“综述资料”“非临床资料”等完整章节 |
|
||||||
|
| 关键法定资料缺失 | blocking | 申请表、符合性声明、产品技术要求及检验报告等 |
|
||||||
|
| 关键技术/评价资料缺失 | high | 产品风险管理资料、分析性能研究、稳定性研究、临床评价资料、产品说明书、标签样稿等 |
|
||||||
|
| 条件适用资料缺失 | medium/high | 如上市历史、申报前沟通记录、其他资料;需结合 RR2-1 适用条件判断 |
|
||||||
|
| 章节目录缺失 | medium | 各一级目录下的章节目录缺失,影响资料可追溯性 |
|
||||||
|
|
||||||
|
### 与现有第一批链路的差异修正
|
||||||
|
|
||||||
|
| 当前能力 | 第二批修正 |
|
||||||
|
| --- | --- |
|
||||||
|
| 完整性核查只按文件名和相对路径匹配 | 增加目录名、首页文本/前若干页文本、章节标题候选匹配 |
|
||||||
|
| YAML 只覆盖 5 个 Demo 条目 | 扩展为附件 4 一级/二级目录规则,保留第一批 5 条并映射到附件 4 对应章节 |
|
||||||
|
| 章节核查只检查说明书储存条件/有效期/样本要求 | 改为同时检查申报资料目录结构和说明书内部关键章节 |
|
||||||
|
| RAG 可能跳过 `.doc` 材料 | 附件 4 必须可被转换或抽取,构建 RAG 前输出可读文本抽取状态 |
|
||||||
|
| 一致性只检查产品名称、型号规格、预期用途 | 保留这三项,并增加管理类别、分类编码、注册类型、临床评价路径等候选字段 |
|
||||||
|
|
||||||
### 任务
|
### 任务
|
||||||
|
|
||||||
| 编号 | 内容 | 文件 |
|
| 编号 | 内容 | 文件 |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| RR2-2-001 | 扩展 YAML 规则中的必需章节和一致性字段 | `rules/nmpa_ivd_registration_v1.yaml` |
|
| RR2-2-001 | 将附件 4 `.doc` 抽取为可测试的结构化目录夹具 | `tests/fixtures/regulatory/attachment4_outline.json` 或同等 fixture |
|
||||||
| RR2-2-002 | 增强文本抽取,缓存章节候选和字段候选 | `services/text_extract.py` |
|
| RR2-2-002 | 扩展 YAML 规则,覆盖附件 4 一级/二级目录、别名、适用条件、风险等级和整改建议 | `rules/nmpa_ivd_registration_v1.yaml` |
|
||||||
| RR2-2-003 | 增强章节核查,支持别名、近似标题和证据片段 | `services/structure_check.py` |
|
| RR2-2-003 | 增强规则加载校验,确保附件 4 必填目录项都有规则 ID、关键词、风险等级和 citation_query | `services/rule_loader.py` |
|
||||||
| RR2-2-004 | 增强一致性核查,支持多个来源值和低置信度提示项 | `services/consistency_check.py` |
|
| RR2-2-004 | 增强完整性核查,支持文件名、目录名、首页文本/前若干页文本、章节标题候选匹配 | `services/completeness_check.py`、`services/text_extract.py` |
|
||||||
| RR2-2-005 | RAG 引用写入 `rag_result_json` 过程产物 | `services/rag_citation.py`、`storage.py` |
|
| RR2-2-005 | 增强文本抽取,缓存章节候选、字段候选、首页文本和抽取状态 | `services/text_extract.py`、`storage.py` |
|
||||||
| RR2-2-006 | 增加测试 | `tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py`、`tests/test_regulatory_rag.py` |
|
| RR2-2-006 | 增强章节核查,支持附件 4 目录层级、别名、近似标题和证据片段 | `services/structure_check.py` |
|
||||||
|
| RR2-2-007 | 增强一致性核查,支持产品名称、型号规格、预期用途、管理类别、分类编码、注册类型、临床评价路径等来源值 | `services/consistency_check.py` |
|
||||||
|
| RR2-2-008 | RAG 引用写入 `rag_result_json` 过程产物,并记录附件 4 文本抽取/索引状态 | `services/rag_citation.py`、`storage.py` |
|
||||||
|
| RR2-2-009 | 增加附件 4 对齐测试 | `tests/test_regulatory_rule_loader.py`、`tests/test_regulatory_completeness.py`、`tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py`、`tests/test_regulatory_rag.py` |
|
||||||
|
|
||||||
|
### 验收样例
|
||||||
|
|
||||||
|
| 样例条件 | 预期 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件包缺少“监管信息/申请表” | 生成 blocking 或 high 问题,并引用附件 4 监管信息要求 |
|
||||||
|
| 文件包缺少“产品风险管理资料” | 生成 high 问题,category 为 completeness |
|
||||||
|
| 文件包缺少“分析性能研究”或“稳定性研究” | 生成 high 问题,给出补充研究资料建议 |
|
||||||
|
| 文件包有产品技术要求但无检验报告 | 生成 blocking 问题,规则映射到 3.4 |
|
||||||
|
| 文件包有产品说明书但无标签样稿 | 生成 high 问题,规则映射到 5.3 |
|
||||||
|
| 文件包缺少质量管理体系文件 | 生成 high 问题,规则映射到第 6 章 |
|
||||||
|
| 附件 4 `.doc` 未能抽取 | RAG 构建命令失败或明确报告附件 4 抽取失败,不允许静默跳过该核心材料 |
|
||||||
|
|
||||||
### 验证命令
|
### 验证命令
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pytest tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py
|
pytest tests/test_regulatory_rule_loader.py tests/test_regulatory_completeness.py tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py
|
||||||
```
|
```
|
||||||
|
|
||||||
### Codex 执行提示
|
### Codex 执行提示
|
||||||
|
|
||||||
```text
|
```text
|
||||||
请增强章节核查、一致性核查和 RAG 过程产物。证据必须包含文件路径、命中片段、字段名或规则 ID,便于人工复核。
|
请先将附件 4《体外诊断试剂注册申报资料要求及说明》结构化为规则覆盖清单,再增强 YAML、完整性核查、章节核查、一致性核查和 RAG 过程产物。第二批必须覆盖附件 4 的 1-6 章一级目录和主要二级目录;证据必须包含文件路径、命中片段、字段名或规则 ID,便于人工复核。附件 4 作为核心法规材料,不允许在 RAG 构建中静默跳过。
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -229,14 +290,15 @@ pytest
|
|||||||
第一批主链路已经完成并通过全量测试。
|
第一批主链路已经完成并通过全量测试。
|
||||||
|
|
||||||
目标:
|
目标:
|
||||||
补齐法规核查完整整改闭环,包括适用条件对话选择框、waiting_user 暂停恢复、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。
|
补齐法规核查完整整改闭环,包括适用条件对话选择框、waiting_user 暂停恢复、附件 4 申报资料目录规则对齐、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。
|
||||||
|
|
||||||
执行规则:
|
执行规则:
|
||||||
1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。
|
1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。
|
||||||
2. 按 RR2-1 到 RR2-6 顺序执行。
|
2. 按 RR2-1 到 RR2-6 顺序执行。
|
||||||
3. 每阶段完成后运行对应验证命令。
|
3. 每阶段完成后运行对应验证命令。
|
||||||
4. 不接真实飞书 CLI/API。
|
4. RR2-2 必须覆盖附件 4 的 1-6 章一级目录和主要二级目录,不能只保留第一批 5 条 Demo 规则。
|
||||||
5. 不做规则管理前端。
|
5. 不接真实飞书 CLI/API。
|
||||||
6. 不做自动填写目标文件。
|
6. 不做规则管理前端。
|
||||||
7. 最后运行 python manage.py check 和 pytest 全量验收。
|
7. 不做自动填写目标文件。
|
||||||
|
8. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
```
|
```
|
||||||
|
|||||||
632
docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md
Normal file
632
docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,632 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表开发计划
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 详细设计文档 | docs/3.详细设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 执行方式 | 单人开发 + Codex 目标模式自动化执行 |
|
||||||
|
| 计划日期 | 2026-06-07 |
|
||||||
|
| 计划版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、开发计划目标
|
||||||
|
|
||||||
|
本开发计划用于指导 Codex 目标模式按阶段完成“产品关键信息提取与申报文件自动填表”功能开发。该功能作为独立工作流 `application_form_fill` 实现,由用户对话触发,默认复用当前对话最近成功的文件汇总批次;如本次消息带新附件,则先串联文件汇总,再执行自动填表。
|
||||||
|
|
||||||
|
本期必须完成:独立填表批次、过程产物、飞书通知记录、模板配置、注册证 `.docx` 模板填充、字段抽取与合并、冲突高亮、追溯清单、Word 下载、自动填表工作流卡片和权限校验。
|
||||||
|
|
||||||
|
本期明确不强制完成:PDF 转换、字段级数据库表、`.doc` 模板自动转换、完整安全和性能基本原则清单条目拆解。这些事项已进入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、已确认开发规则
|
||||||
|
|
||||||
|
| 规则项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流类型 | 新增独立 `application_form_fill`,不塞入 `regulatory_review` 工作流 |
|
||||||
|
| 触发方式 | 用户对话触发,如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板” |
|
||||||
|
| 模板指定 | 用户可指定模板;未指定时按注册类型生成适用模板 |
|
||||||
|
| 文件来源 | 无新附件时复用当前对话最近成功 `FileSummaryBatch`;有新附件时先自动汇总 |
|
||||||
|
| 模板配置 | 放在 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并处理 |
|
||||||
|
| 冲突处理 | 说明书优先;冲突字段在 Word 中黄色底色、红色字体 |
|
||||||
|
| 输出范围 | Demo 主链路优先 Word + Excel/JSON 追溯清单 |
|
||||||
|
| PDF | 数据结构预留,工作流节点可 skipped,不作为本期强验收 |
|
||||||
|
| 飞书 | 新增自动填表通知记录表,通知失败不阻断下载 |
|
||||||
|
| 数据库 | 新增三张表;字段级明细表暂缓 |
|
||||||
|
| Git 提交 | 每个阶段完成并验证通过后提交一次 |
|
||||||
|
| 测试要求 | 每阶段至少运行对应 pytest;前端阶段补卡片和渲染测试 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体验收标准
|
||||||
|
|
||||||
|
| 类别 | 完成标准 |
|
||||||
|
| --- | --- |
|
||||||
|
| 数据库 | `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord` 可通过 migration 落库 |
|
||||||
|
| 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word`、`pdf`,并兼容既有 markdown/excel/json |
|
||||||
|
| 模块结构 | 新增 `review_agent/application_form_fill/` 独立模块 |
|
||||||
|
| 触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
|
||||||
|
| 文件来源 | 无新附件时复用最近成功汇总批次;无汇总批次时提示上传资料 |
|
||||||
|
| 模板配置 | YAML 可加载、校验,并至少配置注册证格式 `.docx` 已识别字段 |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 抽取结果均可留底;LLM 失败时规则结果可继续 |
|
||||||
|
| 字段合并 | 说明书优先,冲突字段进入 `conflict_summary` 和追溯清单 |
|
||||||
|
| Word 填充 | 能按表格行名填入注册证模板字段,缺失字段留空 |
|
||||||
|
| 冲突高亮 | 冲突字段在 Word 内黄底红字 |
|
||||||
|
| 追溯清单 | 生成 Excel/JSON,记录规则结果、LLM 结果、合并字段、冲突和来源证据 |
|
||||||
|
| 下载 | 对话框提供填好 Word 和追溯清单下载链接 |
|
||||||
|
| 工作流卡片 | 前端支持 `application_form_fill` 卡片,展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
|
||||||
|
| 飞书通知 | 填表完成后写通知记录,可 mock;失败不阻断文件下载 |
|
||||||
|
| 权限 | A 对话不能查询或下载 B 对话的填表批次和导出文件 |
|
||||||
|
| 回归 | 第一批文件汇总、第二批法规核查既有测试不回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 阶段验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| AFF-0 | 准备与回归 | 创建开发分支,确认现有测试基线 | `python manage.py check` 和关键回归测试通过 |
|
||||||
|
| AFF-1 | 数据模型与通用导出扩展 | 新增三张表,扩展 word/pdf 导出类型 | migration、模型测试通过 |
|
||||||
|
| AFF-2 | 模块骨架与模板配置 | 新建独立模块、YAML 配置和配置校验 | 模板配置测试通过 |
|
||||||
|
| AFF-3 | 触发与工作流骨架 | 对话触发、批次创建、节点事件和状态查询 | 可创建并运行空工作流 |
|
||||||
|
| AFF-4 | 模板选择与文件来源 | 复用最近汇总批次,支持指定/默认模板选择 | 模板选择和来源批次测试通过 |
|
||||||
|
| AFF-5 | 字段抽取与合并 | 规则/正则 + LLM 并行抽取、冲突归并和产物留底 | 字段抽取、冲突测试通过 |
|
||||||
|
| AFF-6 | Word 填充与追溯导出 | 注册证 Word 填充、冲突高亮、Excel/JSON 追溯 | 可下载 Word 和追溯清单 |
|
||||||
|
| AFF-7 | 飞书通知与对话摘要 | 生成助手摘要、下载链接和通知记录 | 通知、摘要、下载权限测试通过 |
|
||||||
|
| AFF-8 | 前端卡片与总体验收 | 自动填表工作流卡片、状态恢复、全量回归 | 全量测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、AFF-0 准备与回归
|
||||||
|
|
||||||
|
### AFF-0-001 创建开发分支并确认现状
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | Git / 准备 |
|
||||||
|
| 前置任务 | 无 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 从当前稳定分支创建 `codex/YYYYMMDD-申报文件自动填表` 开发分支,并确认工作区状态 |
|
||||||
|
| 开发步骤 | 1. 检查当前分支和 `git status`;2. 确认第三批设计文档存在;3. 创建开发分支;4. 记录已有未提交变更,不得回滚用户变更 |
|
||||||
|
| 验收标准 | 分支创建成功,工作区变更来源清楚 |
|
||||||
|
| 验证命令 | `git branch --show-current`; `git status --short` |
|
||||||
|
| Codex 执行提示 | 请创建第三批自动填表开发分支,检查当前工作区状态和设计文档,不要回滚用户已有变更。 |
|
||||||
|
|
||||||
|
### AFF-0-002 运行基线回归
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 回归 |
|
||||||
|
| 前置任务 | AFF-0-001 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 确认现有文件汇总和法规核查主流程在开发前可用 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行文件汇总测试;3. 运行法规核查测试;4. 记录失败项并先判断是否为既有问题 |
|
||||||
|
| 验收标准 | 关键回归测试通过,或记录清楚既有失败和本阶段处理策略 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_*.py tests/test_regulatory_*.py` |
|
||||||
|
| Codex 执行提示 | 请在开发前运行 Django check 和文件汇总/法规核查关键测试,确认基线稳定。若存在既有失败,请记录,不要直接改无关代码。 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、AFF-1 数据模型与通用导出扩展
|
||||||
|
|
||||||
|
### AFF-1-001 新增自动填表 ORM 模型
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 后端 |
|
||||||
|
| 前置任务 | AFF-0 |
|
||||||
|
| 涉及文件 | `review_agent/models.py` |
|
||||||
|
| 目标 | 新增 `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord` |
|
||||||
|
| 开发步骤 | 1. 定义批次状态枚举;2. 定义产物类型枚举;3. 定义通知状态和渠道枚举;4. 添加外键到 Conversation、User、Message、FileSummaryBatch、RegulatoryReviewBatch;5. 添加 JSONField、hash、路径、时间字段;6. 添加 `db_table`、索引和唯一约束 |
|
||||||
|
| 验收标准 | 模型字段、表名、索引与数据库设计一致 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请按 `docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md` 新增自动填表三张表模型,模型集中放在 `review_agent/models.py`。 |
|
||||||
|
|
||||||
|
### AFF-1-002 扩展导出类型和权限查询能力
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 下载 |
|
||||||
|
| 前置任务 | AFF-1-001 |
|
||||||
|
| 涉及文件 | `review_agent/models.py`、导出下载权限相关视图 |
|
||||||
|
| 目标 | 为 `ExportedSummaryFile.ExportType` 增加 `word`、`pdf`,并确保下载权限支持 `application_form_fill` |
|
||||||
|
| 开发步骤 | 1. 扩展 `ExportType.WORD`;2. 扩展 `ExportType.PDF`;3. 检查下载接口按 workflow_type 分派权限;4. 增加 application_form_fill 反查批次的权限路径 |
|
||||||
|
| 验收标准 | Word/ PDF 导出记录可创建;填表导出下载权限可追溯到当前用户 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_views.py -k download` |
|
||||||
|
| Codex 执行提示 | 请扩展 ExportedSummaryFile 支持 word/pdf,并让现有下载接口能通过 workflow_type=application_form_fill 校验填表批次权限。 |
|
||||||
|
|
||||||
|
### AFF-1-003 生成迁移并补模型测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 测试 |
|
||||||
|
| 前置任务 | AFF-1-002 |
|
||||||
|
| 涉及文件 | `review_agent/migrations/`、`tests/test_application_form_fill_models.py` |
|
||||||
|
| 目标 | 生成迁移并覆盖新增表的基础约束和权限关系 |
|
||||||
|
| 开发步骤 | 1. 运行 makemigrations;2. 检查 migration 只包含第三批相关变更;3. 运行 migrate;4. 测试批次创建;5. 测试产物 hash 字段;6. 测试通知重试字段;7. 测试 ExportedSummaryFile word 类型 |
|
||||||
|
| 验收标准 | migration 可执行,模型测试通过 |
|
||||||
|
| 验证命令 | `python manage.py makemigrations review_agent`; `python manage.py migrate`; `pytest tests/test_application_form_fill_models.py` |
|
||||||
|
| Codex 执行提示 | 请为第三批模型生成迁移并新增模型测试,覆盖批次、产物、通知记录和 word/pdf 导出类型。 |
|
||||||
|
|
||||||
|
### AFF-1 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_models.py tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、AFF-2 模块骨架与模板配置
|
||||||
|
|
||||||
|
### AFF-2-001 创建 application_form_fill 模块骨架
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模块 |
|
||||||
|
| 前置任务 | AFF-1 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/` |
|
||||||
|
| 目标 | 建立独立模块目录、常量、schemas、storage、workflow、views 和 services 包 |
|
||||||
|
| 开发步骤 | 1. 创建模块目录;2. 创建 `constants.py`;3. 创建 `schemas.py`;4. 创建 `storage.py`;5. 创建 `workflow.py`;6. 创建 `views.py`;7. 创建 services 子模块;8. 创建 templates 和 prompts 目录 |
|
||||||
|
| 验收标准 | 模块可 import,不影响既有应用启动 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请新增 `review_agent/application_form_fill/` 独立模块骨架,先只放常量、schema、空服务和基础 import,不要改动法规核查模块。 |
|
||||||
|
|
||||||
|
### AFF-2-002 编写模板配置 YAML
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 配置 / 模板 |
|
||||||
|
| 前置任务 | AFF-2-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| 目标 | 建立模板配置,至少覆盖注册证 `.docx` 已识别字段 |
|
||||||
|
| 开发步骤 | 1. 定义 version;2. 定义 source_dir;3. 配置 `registration_certificate`;4. 配置 `change_registration` 为 `.doc` 待转换模板;5. 配置 `essential_principles` 为 `.doc` 待转换模板;6. 为注册证配置注册人名称、注册人住所、生产地址、产品名称、包装规格、主要组成成分、预期用途、储存条件及有效期、附件等字段 |
|
||||||
|
| 验收标准 | YAML 可解析,注册证字段映射到 table_row |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
|
||||||
|
| Codex 执行提示 | 请新增自动填表模板配置 YAML,配置路径必须是 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`,先完整录入注册证表格字段。 |
|
||||||
|
|
||||||
|
### AFF-2-003 实现模板配置加载与校验
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 配置 |
|
||||||
|
| 前置任务 | AFF-2-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_config.py`、`tests/test_application_form_fill_template_config.py` |
|
||||||
|
| 目标 | 读取、校验模板配置并计算 hash |
|
||||||
|
| 开发步骤 | 1. 实现 `load_template_config()`;2. 实现 `validate_template_config()`;3. 实现 `compute_config_hash()`;4. 校验 version、source_dir、templates、code 唯一、source_file 存在、target.type 支持;5. 对 `.doc` 待转换模板允许配置存在但标记运行时处理 |
|
||||||
|
| 验收标准 | 有效配置通过,缺失 source_dir 或重复 code 能被测试捕获 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
|
||||||
|
| Codex 执行提示 | 请实现模板配置加载和校验服务,配置错误必须返回清晰错误列表,不要在 import 时直接崩溃。 |
|
||||||
|
|
||||||
|
### AFF-2 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_template_config.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、AFF-3 触发与工作流骨架
|
||||||
|
|
||||||
|
### AFF-3-001 扩展意图路由
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 意图识别 |
|
||||||
|
| 前置任务 | AFF-2 |
|
||||||
|
| 涉及文件 | `review_agent/skill_router.py`、`review_agent/application_form_fill/constants.py`、`tests/test_application_form_fill_trigger.py` |
|
||||||
|
| 目标 | 用户话语命中自动填表意图时返回 `application_form_fill` |
|
||||||
|
| 开发步骤 | 1. 增加触发关键词;2. 支持“帮我填注册证”“对应的表格”“生成申报模板”等;3. 支持指定模板识别入口;4. 保持文件汇总和法规核查路由不回归 |
|
||||||
|
| 验收标准 | 自动填表语句触发正确,普通对话不误触发 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_trigger.py tests/test_regulatory_workflow.py -k router` |
|
||||||
|
| Codex 执行提示 | 请扩展现有意图路由,新增 application_form_fill 动作。不要破坏 file_summary 和 regulatory_review 的现有触发。 |
|
||||||
|
|
||||||
|
### AFF-3-002 实现批次创建和节点初始化
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-3-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`review_agent/application_form_fill/storage.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 创建填表批次、生成工作目录、初始化节点 |
|
||||||
|
| 开发步骤 | 1. 实现 `build_batch_no()`;2. 实现 `build_batch_work_dir()`;3. 实现 `create_application_form_fill_batch()`;4. 绑定 conversation、user、trigger_message、source_summary_batch;5. 初始化 `FORM_FILL_NODE_DEFINITIONS` 节点;6. 写 workflow_created 事件 |
|
||||||
|
| 验收标准 | 批次编号唯一,节点数量正确,工作目录在受控路径 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k create` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表批次创建和节点初始化,workflow_type 必须写 application_form_fill。 |
|
||||||
|
|
||||||
|
### AFF-3-003 实现工作流执行器骨架
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-3-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 实现节点串行执行、状态更新、事件推送和 skipped PDF 节点 |
|
||||||
|
| 开发步骤 | 1. 实现 `FormFillWorkflowExecutor.run()`;2. 实现 `_nodes()`;3. 实现 `_run_node()`;4. 每个节点写 running/success/skipped;5. `pdf_convert` 本期标记 skipped;6. 失败时写 batch.failed |
|
||||||
|
| 验收标准 | 空实现节点可完整跑到 success;PDF 节点 skipped |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k executor` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表工作流执行器骨架,先让节点状态可完整流转,PDF 转换节点本期标记 skipped。 |
|
||||||
|
|
||||||
|
### AFF-3-004 接入流式对话启动逻辑
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 对话 |
|
||||||
|
| 前置任务 | AFF-3-003 |
|
||||||
|
| 涉及文件 | `review_agent/services.py`、`review_agent/application_form_fill/views.py` |
|
||||||
|
| 目标 | 用户触发自动填表时启动工作流;有附件时先自动汇总,无附件时使用最近成功汇总批次 |
|
||||||
|
| 开发步骤 | 1. 在 stream_message 中处理 application_form_fill 路由;2. 如本次存在新附件,复用文件汇总启动逻辑;3. 无新附件时查找最近成功 `FileSummaryBatch`;4. 无来源批次时回复请上传资料;5. 返回 workflow meta |
|
||||||
|
| 验收标准 | 对话触发能创建填表批次;无汇总批次时不崩溃 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k stream` |
|
||||||
|
| Codex 执行提示 | 请把 application_form_fill 接入现有 stream_message。无附件时复用最近成功汇总批次;有新附件时先自动汇总。 |
|
||||||
|
|
||||||
|
### AFF-3 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_trigger.py tests/test_application_form_fill_workflow.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、AFF-4 模板选择与文件来源
|
||||||
|
|
||||||
|
### AFF-4-001 实现模板指定解析
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板选择 |
|
||||||
|
| 前置任务 | AFF-3 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py`、`tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 目标 | 从用户话语中识别指定模板 |
|
||||||
|
| 开发步骤 | 1. 识别注册证;2. 识别变更注册备案文件;3. 识别安全和性能基本原则清单;4. 识别全部模板;5. 未指定返回空数组 |
|
||||||
|
| 验收标准 | 指定模板语句可返回正确 template_codes |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py -k requested` |
|
||||||
|
| Codex 执行提示 | 请实现用户指定模板解析,支持注册证、变更注册备案文件、安全和性能基本原则清单、全部模板。 |
|
||||||
|
|
||||||
|
### AFF-4-002 实现注册类型识别和模板选择
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板选择 |
|
||||||
|
| 前置任务 | AFF-4-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py`、`tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 目标 | 按用户话语、法规确认条件、文件抽取识别注册类型,并选择模板 |
|
||||||
|
| 开发步骤 | 1. 用户话语识别首次注册、变更注册、备案;2. 从 `source_regulatory_batch.condition_json` 读取 confirmed_conditions;3. 从文件抽取候选读取 registration_type;4. 未指定模板时首次注册生成注册证 + 基本原则清单;5. 变更/备案生成变更文件 + 基本原则清单;6. 指定不适用模板允许生成但写 risk_notes |
|
||||||
|
| 验收标准 | 模板选择规则与功能设计一致 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py` |
|
||||||
|
| Codex 执行提示 | 请实现注册类型识别和默认模板选择,优先级是用户话语、已确认法规核查条件、文件抽取、unknown。 |
|
||||||
|
|
||||||
|
### AFF-4-003 实现模板复制服务
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板 |
|
||||||
|
| 前置任务 | AFF-4-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_repository.py`、`review_agent/application_form_fill/storage.py`、`tests/test_application_form_fill_template_repository.py` |
|
||||||
|
| 目标 | 将原始模板复制到批次目录,原始模板只读 |
|
||||||
|
| 开发步骤 | 1. 根据 TemplateSpec 定位 source_file;2. 复制到 `work_dir/templates`;3. 记录 ApplicationFormFillArtifact(template_copy);4. `.doc` 且无工作模板时返回模板失败,不影响其他模板;5. 路径必须在受控工作目录内 |
|
||||||
|
| 验收标准 | 注册证 `.docx` 可复制;原始文件不被修改;产物 hash 写入 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_repository.py` |
|
||||||
|
| Codex 执行提示 | 请实现模板复制服务,只允许复制到批次工作目录,不能直接写原始法规材料目录。 |
|
||||||
|
|
||||||
|
### AFF-4 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_template_select.py tests/test_application_form_fill_template_repository.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、AFF-5 字段抽取与合并
|
||||||
|
|
||||||
|
### AFF-5-001 实现规则/正则字段抽取
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段抽取 |
|
||||||
|
| 前置任务 | AFF-4 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`tests/test_application_form_fill_field_extract.py` |
|
||||||
|
| 目标 | 从说明书、产品技术要求等文本中按标签和章节抽取字段 |
|
||||||
|
| 开发步骤 | 1. 复用 `regulatory_review.services.text_extract.extract_text`;2. 识别文件角色;3. 匹配 `字段名:值` 标签行;4. 支持多行值拼接;5. 保存 source_file、source_role、evidence、confidence、extractor=rule |
|
||||||
|
| 验收标准 | 能从测试说明书文本抽取产品名称、预期用途、储存条件、有效期、包装规格 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k rules` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表规则/正则字段抽取,优先覆盖注册证模板字段,抽取结果必须包含来源文件、来源角色和证据片段。 |
|
||||||
|
|
||||||
|
### AFF-5-002 实现 LLM 结构化抽取封装
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / LLM |
|
||||||
|
| 前置任务 | AFF-5-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`review_agent/application_form_fill/prompts/field_extract.md`、`tests/test_application_form_fill_field_extract.py` |
|
||||||
|
| 目标 | 调用现有 LLM 能力输出字段 JSON,失败时降级 |
|
||||||
|
| 开发步骤 | 1. 编写字段抽取 prompt;2. 输入模板字段、文件上下文和候选文本;3. 要求输出 JSON fields/checklist_items;4. 解析 JSON;5. 捕获超时和解析失败;6. 失败返回空 LLM 结果,不阻断规则抽取 |
|
||||||
|
| 验收标准 | monkeypatch LLM 后可解析结构化字段;LLM 异常时工作流继续 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k llm` |
|
||||||
|
| Codex 执行提示 | 请实现 LLM 结构化抽取封装,必须可测试、可降级。LLM 输出解析失败不能导致整个填表批次失败。 |
|
||||||
|
|
||||||
|
### AFF-5-003 实现并行抽取和产物留底
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段抽取 |
|
||||||
|
| 前置任务 | AFF-5-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`review_agent/application_form_fill/storage.py` |
|
||||||
|
| 目标 | 并行执行规则/正则和 LLM 抽取,并保存 `field_extract_result.json` |
|
||||||
|
| 开发步骤 | 1. 使用 ThreadPoolExecutor;2. 规则和 LLM 两路并行;3. 组装 regex_results、llm_results、selected_templates、source_evidence;4. 保存 JSON;5. 写 ApplicationFormFillArtifact(field_extract_result) |
|
||||||
|
| 验收标准 | JSON 产物包含两路结果和模板列表 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k parallel` |
|
||||||
|
| Codex 执行提示 | 请实现字段并行抽取和 field_extract_result.json 产物留底,LLM 失败时也必须保存规则结果。 |
|
||||||
|
|
||||||
|
### AFF-5-004 实现字段合并与冲突检测
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段合并 |
|
||||||
|
| 前置任务 | AFF-5-003 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_merge.py`、`tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| 目标 | 合并规则和 LLM 字段,说明书优先,并生成冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 实现字段值归一化;2. 实现来源优先级排序;3. 同字段多值一致时合并;4. 不一致时选择最高优先级来源;5. 说明书与其他文件冲突时标记 conflict;6. 输出 merged_fields 和 conflicts |
|
||||||
|
| 验收标准 | 说明书优先;冲突字段包含 selected_value、selected_source、conflict_values、handling |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| Codex 执行提示 | 请实现字段合并服务,严格按说明书优先处理冲突,并把冲突列表写成可用于对话摘要和追溯清单的结构。 |
|
||||||
|
|
||||||
|
### AFF-5 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_field_extract.py tests/test_application_form_fill_field_merge.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、AFF-6 Word 填充与追溯导出
|
||||||
|
|
||||||
|
### AFF-6-001 实现 Word 表格行填充
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / Word |
|
||||||
|
| 前置任务 | AFF-5 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 目标 | 使用 `python-docx` 按表格行名写入注册证模板 |
|
||||||
|
| 开发步骤 | 1. 打开 docx 模板副本;2. 遍历 tables/rows/cells;3. 匹配第一列 row_label;4. 写入第二列;5. 缺失字段保持空白;6. 保存 output_path |
|
||||||
|
| 验收标准 | 产品名称、包装规格、预期用途等能写入注册证模板对应行 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k table` |
|
||||||
|
| Codex 执行提示 | 请实现 Word 表格行填充服务,先支持注册证模板的两列表格行名匹配。 |
|
||||||
|
|
||||||
|
### AFF-6-002 实现冲突高亮
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / Word |
|
||||||
|
| 前置任务 | AFF-6-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 目标 | 冲突字段在 Word 中黄底红字 |
|
||||||
|
| 开发步骤 | 1. 对冲突字段写入 run;2. 设置字体颜色 `FF0000`;3. 设置单元格 shading `FFFF00`;4. 非冲突字段保持原样式;5. 测试读取 docx XML 验证颜色和底色 |
|
||||||
|
| 验收标准 | 冲突字段样式可在 docx XML 中验证 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k highlight` |
|
||||||
|
| Codex 执行提示 | 请实现 Word 冲突高亮,冲突字段必须红色字体和黄色底色,测试需检查 docx XML。 |
|
||||||
|
|
||||||
|
### AFF-6-003 创建 Word 导出记录
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 导出 |
|
||||||
|
| 前置任务 | AFF-6-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`review_agent/application_form_fill/workflow.py` |
|
||||||
|
| 目标 | Word 生成后写入 `ExportedSummaryFile(export_type=word)` 和产物记录 |
|
||||||
|
| 开发步骤 | 1. 按批次号、产品名、模板标签生成文件名;2. 保存到 `work_dir/filled`;3. 创建 `ApplicationFormFillArtifact(filled_template)`;4. 创建 `ExportedSummaryFile`;5. 记录模板失败时错误 |
|
||||||
|
| 验收标准 | 可查询到 word 导出记录和 filled_template 产物 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k export` |
|
||||||
|
| Codex 执行提示 | 请把 Word 填充结果保存为导出文件,export_type 使用 word,workflow_type 使用 application_form_fill。 |
|
||||||
|
|
||||||
|
### AFF-6-004 实现追溯清单 Excel/JSON
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 导出 |
|
||||||
|
| 前置任务 | AFF-6-003 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py`、`tests/test_application_form_fill_traceability.py` |
|
||||||
|
| 目标 | 输出字段来源追溯清单和合并结果 JSON |
|
||||||
|
| 开发步骤 | 1. 生成“字段追溯”Sheet;2. 生成“冲突字段”Sheet;3. 生成“低置信度条目”Sheet;4. 生成“生成结果”Sheet;5. 保存 Excel;6. 保存 merged_fields.json;7. 创建导出和产物记录 |
|
||||||
|
| 验收标准 | Excel 可打开,包含字段、来源、证据、冲突、处理方式 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_traceability.py` |
|
||||||
|
| Codex 执行提示 | 请实现字段来源追溯清单导出,必须包含规则/LLM 合并结果、冲突字段和生成结果。 |
|
||||||
|
|
||||||
|
### AFF-6 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_word_fill.py tests/test_application_form_fill_traceability.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、AFF-7 飞书通知与对话摘要
|
||||||
|
|
||||||
|
### AFF-7-001 生成助手 Markdown 摘要
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 对话 |
|
||||||
|
| 前置任务 | AFF-6 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py`、`review_agent/application_form_fill/workflow.py` |
|
||||||
|
| 目标 | 工作流完成后向当前对话写入下载链接和冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 汇总 Word 导出;2. 汇总 PDF 状态为待增强;3. 汇总冲突字段;4. 添加追溯清单下载链接;5. 创建 assistant Message |
|
||||||
|
| 验收标准 | 对话中出现 Markdown 表格、Word 下载、追溯清单下载和冲突摘要 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k summary` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表完成后的助手 Markdown 摘要,PDF 本期显示为待增强,不作为失败。 |
|
||||||
|
|
||||||
|
### AFF-7-002 实现飞书通知记录和 mock 通知
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 通知 |
|
||||||
|
| 前置任务 | AFF-7-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/notifier.py`、`tests/test_application_form_fill_notification.py` |
|
||||||
|
| 目标 | 填表完成后记录通知,可 mock 发送,失败不阻断下载 |
|
||||||
|
| 开发步骤 | 1. 实现 `notify_completion()`;2. 默认 channel=mock;3. 写 template_codes、export_ids、message_summary;4. 支持 send_status success/failed;5. 失败时记录 error_message 和 retry_count |
|
||||||
|
| 验收标准 | 通知记录可查;通知失败不影响批次核心产物 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_notification.py` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表通知服务,先用 mock 通知记录即可。通知失败不得阻断 Word 下载。 |
|
||||||
|
|
||||||
|
### AFF-7-003 完成工作流状态归并
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-7-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 根据 Word、追溯清单、通知结果标记 success/partial_success/failed |
|
||||||
|
| 开发步骤 | 1. 所有目标 Word 成功时 success;2. 至少一个 Word 成功但非关键产物失败时 partial_success;3. 所有 Word 失败时 failed;4. PDF skipped 不导致失败;5. 发送 workflow_completed 事件 |
|
||||||
|
| 验收标准 | 批次状态符合详细设计 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k status` |
|
||||||
|
| Codex 执行提示 | 请完成自动填表工作流状态归并,PDF skipped 不影响 success,通知失败最多导致 partial_success。 |
|
||||||
|
|
||||||
|
### AFF-7 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_workflow.py tests/test_application_form_fill_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、AFF-8 前端卡片与总体验收
|
||||||
|
|
||||||
|
### AFF-8-001 后端状态接口
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 接口 |
|
||||||
|
| 前置任务 | AFF-7 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/views.py`、`review_agent/urls.py` 或相关 URL 文件 |
|
||||||
|
| 目标 | 提供自动填表启动和状态查询接口 |
|
||||||
|
| 开发步骤 | 1. 新增 start 接口;2. 新增 detail/status 接口;3. 返回 batch、nodes、conflicts、exports;4. 校验 conversation/user 权限;5. 接入 URL |
|
||||||
|
| 验收标准 | 当前用户可查自己的填表批次,不能查他人批次 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_views.py` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表启动和状态查询接口,所有查询必须校验当前用户权限。 |
|
||||||
|
|
||||||
|
### AFF-8-002 前端支持 application_form_fill 卡片
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 前端 / 工作流卡片 |
|
||||||
|
| 前置任务 | AFF-8-001 |
|
||||||
|
| 涉及文件 | `static/js/app.js`、`templates/home.html`、静态 CSS 文件 |
|
||||||
|
| 目标 | 前端展示自动填表工作流卡片,并根据 SSE 更新节点 |
|
||||||
|
| 开发步骤 | 1. 解析 workflow_type=application_form_fill;2. 定义节点文案;3. 创建卡片;4. 更新节点状态;5. PDF 节点显示待增强/跳过;6. 页面刷新后恢复 |
|
||||||
|
| 验收标准 | 自动填表卡片可显示准备资料、选择模板、复制模板、抽取字段、填写 Word、追溯清单、飞书通知 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_frontend.py` 或现有前端测试命令 |
|
||||||
|
| Codex 执行提示 | 请在现有工作流卡片逻辑中新增 application_form_fill 类型,展示自动填表节点并支持状态恢复。 |
|
||||||
|
|
||||||
|
### AFF-8-003 前端展示结果和下载链接
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 前端 / Markdown |
|
||||||
|
| 前置任务 | AFF-8-002 |
|
||||||
|
| 涉及文件 | `static/js/app.js`、模板和 CSS |
|
||||||
|
| 目标 | 对话框正常展示 Word 下载、追溯清单、冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 确认助手 Markdown 渲染支持表格;2. 验证 Word 下载链接点击;3. 验证冲突摘要表格;4. PDF 列显示待增强 |
|
||||||
|
| 验收标准 | 对话结果可读、链接可用、PDF 待增强不被误判为失败 |
|
||||||
|
| 验证命令 | 前端/Playwright 对应测试 |
|
||||||
|
| Codex 执行提示 | 请验证并完善自动填表结果展示,确保 Markdown 表格、Word 下载链接、追溯清单链接和冲突摘要正常显示。 |
|
||||||
|
|
||||||
|
### AFF-8-004 总体验收与回归
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 验收 / 回归 |
|
||||||
|
| 前置任务 | AFF-8-003 |
|
||||||
|
| 涉及文件 | 全项目 |
|
||||||
|
| 目标 | 运行全量测试,确认前三批能力均不回归 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行自动填表测试;3. 运行文件汇总测试;4. 运行法规核查测试;5. 如可用,运行前端/Playwright 测试;6. 检查 git status |
|
||||||
|
| 验收标准 | 全量测试通过;失败项均有解释;无意外文件变更 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest` |
|
||||||
|
| Codex 执行提示 | 请执行第三批自动填表总体验收,运行 Django check 和 pytest 全量回归,确认文件汇总与法规核查不回归。 |
|
||||||
|
|
||||||
|
### AFF-8 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、测试分层要求
|
||||||
|
|
||||||
|
| 层级 | 验证内容 | 建议文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 模型测试 | 三张新表、word/pdf 导出类型、权限关系 | `tests/test_application_form_fill_models.py` |
|
||||||
|
| 配置测试 | YAML 加载、模板配置校验、hash | `tests/test_application_form_fill_template_config.py` |
|
||||||
|
| 选择测试 | 触发语句、指定模板、注册类型优先级、默认模板 | `tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 抽取测试 | 规则/正则、LLM 降级、并行抽取、字段合并 | `tests/test_application_form_fill_field_extract.py`、`tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| Word 测试 | 表格行填充、冲突高亮、导出记录 | `tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 导出测试 | 追溯清单 Excel、JSON 产物、下载权限 | `tests/test_application_form_fill_traceability.py`、`tests/test_application_form_fill_views.py` |
|
||||||
|
| 工作流测试 | 批次创建、节点流转、状态归并、助手摘要 | `tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 通知测试 | mock 通知、失败记录、重试字段 | `tests/test_application_form_fill_notification.py` |
|
||||||
|
| 前端测试 | 卡片节点、PDF 待增强、下载链接、冲突摘要 | `tests/test_application_form_fill_frontend.py` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、Codex 自动化执行规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 顺序执行 | 必须从 AFF-0 到 AFF-8 顺序执行,不得跳阶段 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| 当前阶段优先 | 某阶段失败时先修复当前阶段,不继续后续阶段 |
|
||||||
|
| 回归保护 | 文件汇总和法规核查已有测试不得回归 |
|
||||||
|
| PDF 边界 | PDF 节点本期可 skipped,不为 PDF 引入强依赖 |
|
||||||
|
| 字段表边界 | 不新增字段级数据库表,后续增强已在待办计划 |
|
||||||
|
| 每阶段验证 | 每阶段完成后运行对应验证命令 |
|
||||||
|
| 每阶段提交 | 每阶段验证通过后生成提交摘要并本地提交 |
|
||||||
|
| 不覆盖变更 | 不得回滚或覆盖用户已有未提交变更 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、推荐目标模式提示词
|
||||||
|
|
||||||
|
后续可直接对 Codex 输入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md 执行第三批开发。
|
||||||
|
|
||||||
|
目标:
|
||||||
|
完成独立 application_form_fill 工作流,通过用户对话触发自动填表,复用当前对话最近成功 FileSummaryBatch,支持模板配置、注册证 Word 自动填写、规则/正则与 LLM 并行字段抽取、说明书优先冲突归并、冲突高亮、字段来源追溯清单、Word 下载、自动填表工作流卡片和飞书 mock 通知记录。
|
||||||
|
|
||||||
|
执行规则:
|
||||||
|
1. 创建 codex/YYYYMMDD-申报文件自动填表 分支。
|
||||||
|
2. 按 AFF-0 到 AFF-8 顺序执行,不跳阶段。
|
||||||
|
3. 每阶段先写测试,再实现,完成后运行对应验证命令。
|
||||||
|
4. 不实现字段级数据库表。
|
||||||
|
5. PDF 转换本期作为 skipped/待增强,不引入强制 LibreOffice 依赖。
|
||||||
|
6. 模板配置路径必须为 review_agent/application_form_fill/templates/application_form_templates_v1.yaml。
|
||||||
|
7. Word 模板优先支持注册证格式 docx,两个 doc 模板可标记待转换或部分成功。
|
||||||
|
8. 每阶段验证通过后调用 git-commit-summary 生成提交摘要并本地提交。
|
||||||
|
9. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、待执行前检查清单
|
||||||
|
|
||||||
|
| 检查项 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 第三批需求分析、功能设计、详细设计、数据库设计均已存在 | 待执行时确认 |
|
||||||
|
| 当前分支是否适合创建开发分支 | 待执行时确认 |
|
||||||
|
| 是否存在用户未提交变更 | 待执行时确认 |
|
||||||
|
| `python-docx`、`openpyxl`、`PyYAML` 是否可用 | 待执行时确认 |
|
||||||
|
| 现有文件汇总和法规核查测试是否通过 | 待执行时确认 |
|
||||||
|
| 执行机器是否提供 `git-commit-summary` skill | 待执行时确认 |
|
||||||
|
| `.doc` 模板和 PDF 转换是否保持在待办边界内 | 待执行时确认 |
|
||||||
@@ -33,10 +33,12 @@
|
|||||||
|
|
||||||
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||||
| --- | --- | --- | --- | --- |
|
| --- | --- | --- | --- | --- |
|
||||||
| TODO-FILL-001 | 产品关键信息抽取结果确认 | 原始需求 3 | P1 | 将第二阶段抽取字段转成可人工确认的信息表 |
|
| TODO-FILL-001 | 字段级数据库表 | 第三批自动填表数据库设计 | P1 | 后续新增 `ApplicationFormFillField`,支持字段级查询、人工修改、审计和统计 |
|
||||||
| TODO-FILL-002 | 自动填写目标文件 | 原始需求 3 | P1 | 将确认后的字段写入注册申报表格或对照清单 |
|
| TODO-FILL-002 | PDF 转换与版式 QA | 第三批自动填表详细设计 | P1 | 使用 LibreOffice/soffice 将填好的 Word 转 PDF,并增加页数非 0、逐页截图或版式差异检查 |
|
||||||
| TODO-FILL-003 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff,供人工复核 |
|
| TODO-FILL-003 | `.doc` 模板预转换管理 | 第三批自动填表模板处理 | P1 | 将变更注册(备案)文件和安全和性能基本原则清单预转换为 `.docx` 工作模板,并人工确认版式 |
|
||||||
| TODO-FILL-004 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前必须人工确认 |
|
| TODO-FILL-004 | 安全和性能基本原则清单完整条目拆解 | 第三批自动填表模板配置 | P1 | 拆解清单条目编号、原则内容、适用性栏、证据栏和证明文件位置栏,写入 YAML 配置 |
|
||||||
|
| TODO-FILL-005 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff,供人工复核 |
|
||||||
|
| TODO-FILL-006 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前支持人工确认或二次审批 |
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -7,3 +7,6 @@ xlrd>=2.0
|
|||||||
olefile>=0.47
|
olefile>=0.47
|
||||||
py7zr>=0.21
|
py7zr>=0.21
|
||||||
playwright>=1.60
|
playwright>=1.60
|
||||||
|
PyYAML>=6.0
|
||||||
|
chromadb>=0.5
|
||||||
|
httpx>=0.27
|
||||||
|
|||||||
@@ -4,7 +4,14 @@ from review_agent.models import FileSummaryBatch, WorkflowEvent
|
|||||||
|
|
||||||
|
|
||||||
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
|
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
|
||||||
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
|
return WorkflowEvent.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
conversation=batch.conversation,
|
||||||
|
event_type=event_type,
|
||||||
|
payload=payload or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
||||||
|
|||||||
@@ -54,6 +54,9 @@ def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
|
|||||||
workbook.save(path)
|
workbook.save(path)
|
||||||
exported = ExportedSummaryFile.objects.create(
|
exported = ExportedSummaryFile.objects.create(
|
||||||
batch=batch,
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="summary",
|
||||||
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||||
file_name=path.name,
|
file_name=path.name,
|
||||||
storage_path=str(path),
|
storage_path=str(path),
|
||||||
|
|||||||
@@ -65,6 +65,9 @@ def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFi
|
|||||||
path.write_text(content, encoding="utf-8")
|
path.write_text(content, encoding="utf-8")
|
||||||
exported = ExportedSummaryFile.objects.create(
|
exported = ExportedSummaryFile.objects.create(
|
||||||
batch=batch,
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="summary",
|
||||||
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
||||||
file_name=path.name,
|
file_name=path.name,
|
||||||
storage_path=str(path),
|
storage_path=str(path),
|
||||||
|
|||||||
@@ -229,6 +229,7 @@ def batch_status(request, batch_id: int):
|
|||||||
{
|
{
|
||||||
"batch": {
|
"batch": {
|
||||||
"id": batch.pk,
|
"id": batch.pk,
|
||||||
|
"workflow_type": "file_summary",
|
||||||
"batch_no": batch.batch_no,
|
"batch_no": batch.batch_no,
|
||||||
"status": batch.status,
|
"status": batch.status,
|
||||||
"product_name": batch.product_name,
|
"product_name": batch.product_name,
|
||||||
@@ -283,11 +284,12 @@ def export_download(request, export_id: int):
|
|||||||
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
||||||
)
|
)
|
||||||
return JsonResponse({"error": "文件不存在。"}, status=404)
|
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||||
content_type = (
|
content_types = {
|
||||||
"text/markdown; charset=utf-8"
|
ExportedSummaryFile.ExportType.MARKDOWN: "text/markdown; charset=utf-8",
|
||||||
if exported.export_type == ExportedSummaryFile.ExportType.MARKDOWN
|
ExportedSummaryFile.ExportType.EXCEL: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
else "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
ExportedSummaryFile.ExportType.JSON: "application/json; charset=utf-8",
|
||||||
)
|
}
|
||||||
|
content_type = content_types.get(exported.export_type, "application/octet-stream")
|
||||||
logger.info(
|
logger.info(
|
||||||
"Export download started",
|
"Export download started",
|
||||||
extra={
|
extra={
|
||||||
|
|||||||
@@ -112,7 +112,14 @@ def create_file_summary_batch(
|
|||||||
attachment.save(update_fields=["upload_status"])
|
attachment.save(update_fields=["upload_status"])
|
||||||
|
|
||||||
for code, name, _skill_name in NODE_DEFINITIONS:
|
for code, name, _skill_name in NODE_DEFINITIONS:
|
||||||
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
|
WorkflowNodeRun.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="file_summary",
|
||||||
|
node_code=code,
|
||||||
|
node_name=name,
|
||||||
|
)
|
||||||
|
|
||||||
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ def generate_reply(conversation, user_message: str) -> str:
|
|||||||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||||||
|
|
||||||
|
|
||||||
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0) -> str:
|
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
|
||||||
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
|
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
|
||||||
|
|
||||||
if not settings.LLM_API_KEY:
|
if not settings.LLM_API_KEY:
|
||||||
@@ -84,7 +84,7 @@ def generate_completion(messages: list[dict[str, str]], *, temperature: float =
|
|||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with request.urlopen(http_request, timeout=60) as response:
|
with request.urlopen(http_request, timeout=timeout) as response:
|
||||||
data = json.loads(response.read().decode("utf-8"))
|
data = json.loads(response.read().decode("utf-8"))
|
||||||
except error.HTTPError as exc:
|
except error.HTTPError as exc:
|
||||||
details = exc.read().decode("utf-8", errors="ignore")
|
details = exc.read().decode("utf-8", errors="ignore")
|
||||||
|
|||||||
15
review_agent/logging_filters.py
Normal file
15
review_agent/logging_filters.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class SuppressWorkflowStatusPollFilter(logging.Filter):
|
||||||
|
"""Hides noisy workflow status polling access logs from runserver output."""
|
||||||
|
|
||||||
|
STATUS_POLL_PATTERN = re.compile(
|
||||||
|
r'"GET /api/review-agent/(?:file-summary|regulatory-review)/\d+/status/ HTTP/[0-9.]+" 200 '
|
||||||
|
)
|
||||||
|
|
||||||
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
|
return not self.STATUS_POLL_PATTERN.search(record.getMessage())
|
||||||
1
review_agent/management/__init__.py
Normal file
1
review_agent/management/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Management command package for review_agent."""
|
||||||
1
review_agent/management/commands/__init__.py
Normal file
1
review_agent/management/commands/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Management commands for review_agent."""
|
||||||
33
review_agent/management/commands/regulatory_rag_build.py
Normal file
33
review_agent/management/commands/regulatory_rag_build.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.services.rag_embedding import get_embedding_provider
|
||||||
|
from review_agent.regulatory_review.services.rag_index import build_chroma_index
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "构建 NMPA 法规材料本地 ChromaDB RAG 索引。"
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument("--provider", default=None, help="覆盖 REGULATORY_RAG_PROVIDER。")
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
rule_set = load_rule_file()
|
||||||
|
source_dir = Path(settings.BASE_DIR) / rule_set["source_material_dir"]
|
||||||
|
if not source_dir.exists():
|
||||||
|
raise CommandError(f"法规材料目录不存在:{source_dir}")
|
||||||
|
try:
|
||||||
|
provider = get_embedding_provider(options["provider"])
|
||||||
|
count = build_chroma_index(source_dir=source_dir, embedding_provider=provider)
|
||||||
|
except Exception as exc:
|
||||||
|
raise CommandError(str(exc)) from exc
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(
|
||||||
|
f"已构建法规 RAG 索引:collection={settings.REGULATORY_RAG_COLLECTION}, chunks={count}"
|
||||||
|
)
|
||||||
|
)
|
||||||
27
review_agent/management/commands/regulatory_rules_check.py
Normal file
27
review_agent/management/commands/regulatory_rules_check.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.core.management.base import BaseCommand
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import check_rule_version
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "检查 NMPA 法规核查 YAML 规则与数据库版本记录。"
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-create",
|
||||||
|
action="store_true",
|
||||||
|
help="缺少数据库记录时只报告 missing,不创建记录。",
|
||||||
|
)
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
result = check_rule_version(update_missing=not options["no_create"])
|
||||||
|
self.stdout.write(
|
||||||
|
f"{result.code}: {result.status}; yaml_hash={result.current_hash}; "
|
||||||
|
f"db_hash={result.database_hash or '-'}; path={result.path}"
|
||||||
|
)
|
||||||
|
if result.status == "mismatch":
|
||||||
|
self.stdout.write(
|
||||||
|
self.style.WARNING("YAML 与数据库记录不一致,请人工确认后更新规则版本记录。")
|
||||||
|
)
|
||||||
@@ -0,0 +1,479 @@
|
|||||||
|
# Generated by Django 5.2.14 on 2026-06-06 16:22
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
(
|
||||||
|
"review_agent",
|
||||||
|
"0002_fileattachment_filesummarybatch_exportedsummaryfile_and_more",
|
||||||
|
),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryArtifact",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"artifact_type",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("markdown", "Markdown"),
|
||||||
|
("excel", "Excel"),
|
||||||
|
("json", "JSON"),
|
||||||
|
("text", "文本"),
|
||||||
|
],
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=160)),
|
||||||
|
("storage_path", models.CharField(max_length=500)),
|
||||||
|
(
|
||||||
|
"content_hash",
|
||||||
|
models.CharField(blank=True, default="", max_length=128),
|
||||||
|
),
|
||||||
|
("metadata", models.JSONField(blank=True, default=dict)),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_artifact",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryIssue",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("rule_code", models.CharField(blank=True, default="", max_length=120)),
|
||||||
|
(
|
||||||
|
"category",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("completeness", "完整性"),
|
||||||
|
("structure", "章节"),
|
||||||
|
("consistency", "一致性"),
|
||||||
|
("rag", "法规依据"),
|
||||||
|
],
|
||||||
|
max_length=40,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"severity",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("blocking", "阻断项"),
|
||||||
|
("high", "高风险"),
|
||||||
|
("medium", "中风险"),
|
||||||
|
("low", "低风险"),
|
||||||
|
("info", "提示"),
|
||||||
|
],
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("title", models.CharField(max_length=255)),
|
||||||
|
("detail", models.TextField(blank=True, default="")),
|
||||||
|
("suggestion", models.TextField(blank=True, default="")),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("open", "待处理"),
|
||||||
|
("resolved", "已整改"),
|
||||||
|
("accepted", "已接受"),
|
||||||
|
],
|
||||||
|
default="open",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("evidence", models.JSONField(blank=True, default=dict)),
|
||||||
|
("citations", models.JSONField(blank=True, default=list)),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("updated_at", models.DateTimeField(auto_now=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_issue",
|
||||||
|
"ordering": ["severity", "id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryNotificationRecord",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"channel",
|
||||||
|
models.CharField(
|
||||||
|
choices=[("mock", "模拟"), ("feishu", "飞书")],
|
||||||
|
default="mock",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("target", models.CharField(blank=True, default="", max_length=160)),
|
||||||
|
("payload", models.JSONField(blank=True, default=dict)),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "待发送"),
|
||||||
|
("sent", "已发送"),
|
||||||
|
("failed", "失败"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("error_message", models.TextField(blank=True, default="")),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("sent_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_notification_record",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryReviewBatch",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("batch_no", models.CharField(max_length=64, unique=True)),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "待执行"),
|
||||||
|
("running", "执行中"),
|
||||||
|
("success", "成功"),
|
||||||
|
("failed", "失败"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("risk_summary", models.JSONField(blank=True, default=dict)),
|
||||||
|
("work_dir", models.CharField(blank=True, default="", max_length=500)),
|
||||||
|
("error_message", models.TextField(blank=True, default="")),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("started_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("finished_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_review_batch",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryRuleVersion",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("code", models.CharField(max_length=80, unique=True)),
|
||||||
|
("name", models.CharField(max_length=160)),
|
||||||
|
("yaml_path", models.CharField(max_length=500)),
|
||||||
|
("yaml_hash", models.CharField(max_length=128)),
|
||||||
|
(
|
||||||
|
"rag_collection",
|
||||||
|
models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"rag_index_version",
|
||||||
|
models.CharField(blank=True, default="", max_length=80),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"rag_index_hash",
|
||||||
|
models.CharField(blank=True, default="", max_length=128),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("active", "启用"),
|
||||||
|
("outdated", "待更新"),
|
||||||
|
("disabled", "停用"),
|
||||||
|
],
|
||||||
|
default="active",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("updated_at", models.DateTimeField(auto_now=True)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_rule_version",
|
||||||
|
"ordering": ["-updated_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="export_category",
|
||||||
|
field=models.CharField(blank=True, default="summary", max_length=40),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="workflow_batch_id",
|
||||||
|
field=models.PositiveBigIntegerField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="workflow_type",
|
||||||
|
field=models.CharField(blank=True, default="file_summary", max_length=40),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflowevent",
|
||||||
|
name="conversation",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="workflow_events",
|
||||||
|
to="review_agent.conversation",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflowevent",
|
||||||
|
name="workflow_batch_id",
|
||||||
|
field=models.PositiveBigIntegerField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflowevent",
|
||||||
|
name="workflow_type",
|
||||||
|
field=models.CharField(blank=True, default="file_summary", max_length=40),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
name="node_group",
|
||||||
|
field=models.CharField(blank=True, default="file_summary", max_length=40),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
name="workflow_batch_id",
|
||||||
|
field=models.PositiveBigIntegerField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
name="workflow_type",
|
||||||
|
field=models.CharField(blank=True, default="file_summary", max_length=40),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="export_type",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("markdown", "Markdown"),
|
||||||
|
("excel", "Excel"),
|
||||||
|
("json", "JSON"),
|
||||||
|
],
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="workflowevent",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="events",
|
||||||
|
to="review_agent.filesummarybatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="node_runs",
|
||||||
|
to="review_agent.filesummarybatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id"],
|
||||||
|
name="idx_ra_export_workflow",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="workflowevent",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id", "id"],
|
||||||
|
name="idx_ra_event_workflow_id",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id"],
|
||||||
|
name="idx_ra_node_workflow",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="conversation",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="regulatory_review_batches",
|
||||||
|
to="review_agent.conversation",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="source_summary_batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.PROTECT,
|
||||||
|
related_name="regulatory_review_batches",
|
||||||
|
to="review_agent.filesummarybatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="trigger_message",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="triggered_regulatory_batches",
|
||||||
|
to="review_agent.message",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="user",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="review_regulatory_batches",
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatorynotificationrecord",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="notifications",
|
||||||
|
to="review_agent.regulatoryreviewbatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryissue",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="issues",
|
||||||
|
to="review_agent.regulatoryreviewbatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryartifact",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="artifacts",
|
||||||
|
to="review_agent.regulatoryreviewbatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryruleversion",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["code", "status"], name="idx_ra_rule_code_status"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="rule_version",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="review_batches",
|
||||||
|
to="review_agent.regulatoryruleversion",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatorynotificationrecord",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "status"], name="idx_ra_rr_notify_status"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryissue",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "severity"], name="idx_ra_rr_issue_severity"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryissue",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "category"], name="idx_ra_rr_issue_category"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryartifact",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "artifact_type"], name="idx_ra_rr_artifact_type"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["conversation", "created_at"], name="idx_ra_rr_batch_conv"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["user", "created_at"], name="idx_ra_rr_batch_user"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["status", "created_at"], name="idx_ra_rr_batch_status"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
# Generated by Django 5.2.14 on 2026-06-07 01:15
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("review_agent", "0003_regulatoryartifact_regulatoryissue_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="condition_json",
|
||||||
|
field=models.JSONField(blank=True, default=dict),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="regulatoryreviewbatch",
|
||||||
|
name="status",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "待执行"),
|
||||||
|
("running", "执行中"),
|
||||||
|
("waiting_user", "等待用户确认"),
|
||||||
|
("success", "成功"),
|
||||||
|
("failed", "失败"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
name="status",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "等待中"),
|
||||||
|
("running", "执行中"),
|
||||||
|
("waiting_user", "等待用户确认"),
|
||||||
|
("retrying", "重试中"),
|
||||||
|
("success", "成功"),
|
||||||
|
("failed", "失败"),
|
||||||
|
("skipped", "跳过"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
28
review_agent/migrations/0005_alter_regulatoryissue_status.py
Normal file
28
review_agent/migrations/0005_alter_regulatoryissue_status.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Generated by Django 5.2.14 on 2026-06-07 01:29
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("review_agent", "0004_regulatoryreviewbatch_condition_json_and_more"),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="regulatoryissue",
|
||||||
|
name="status",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("open", "待处理"),
|
||||||
|
("resolved", "已整改"),
|
||||||
|
("accepted", "已接受"),
|
||||||
|
("review_passed", "复核通过"),
|
||||||
|
("review_failed", "复核未通过"),
|
||||||
|
],
|
||||||
|
default="open",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -253,6 +253,7 @@ class WorkflowNodeRun(models.Model):
|
|||||||
class Status(models.TextChoices):
|
class Status(models.TextChoices):
|
||||||
PENDING = "pending", "等待中"
|
PENDING = "pending", "等待中"
|
||||||
RUNNING = "running", "执行中"
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户确认"
|
||||||
RETRYING = "retrying", "重试中"
|
RETRYING = "retrying", "重试中"
|
||||||
SUCCESS = "success", "成功"
|
SUCCESS = "success", "成功"
|
||||||
FAILED = "failed", "失败"
|
FAILED = "failed", "失败"
|
||||||
@@ -261,8 +262,13 @@ class WorkflowNodeRun(models.Model):
|
|||||||
batch = models.ForeignKey(
|
batch = models.ForeignKey(
|
||||||
FileSummaryBatch,
|
FileSummaryBatch,
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
related_name="node_runs",
|
related_name="node_runs",
|
||||||
)
|
)
|
||||||
|
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
|
||||||
|
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
|
||||||
|
node_group = models.CharField(max_length=40, blank=True, default="file_summary")
|
||||||
node_code = models.CharField(max_length=40)
|
node_code = models.CharField(max_length=40)
|
||||||
node_name = models.CharField(max_length=80)
|
node_name = models.CharField(max_length=80)
|
||||||
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
||||||
@@ -278,6 +284,10 @@ class WorkflowNodeRun(models.Model):
|
|||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
|
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
|
||||||
|
models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id"],
|
||||||
|
name="idx_ra_node_workflow",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -287,8 +297,19 @@ class WorkflowEvent(models.Model):
|
|||||||
batch = models.ForeignKey(
|
batch = models.ForeignKey(
|
||||||
FileSummaryBatch,
|
FileSummaryBatch,
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
related_name="events",
|
related_name="events",
|
||||||
)
|
)
|
||||||
|
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
|
||||||
|
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
|
||||||
|
conversation = models.ForeignKey(
|
||||||
|
Conversation,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="workflow_events",
|
||||||
|
)
|
||||||
event_type = models.CharField(max_length=40)
|
event_type = models.CharField(max_length=40)
|
||||||
payload = models.JSONField(default=dict)
|
payload = models.JSONField(default=dict)
|
||||||
created_at = models.DateTimeField(auto_now_add=True)
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
@@ -299,6 +320,10 @@ class WorkflowEvent(models.Model):
|
|||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
|
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
|
||||||
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
|
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
|
||||||
|
models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id", "id"],
|
||||||
|
name="idx_ra_event_workflow_id",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -308,6 +333,7 @@ class ExportedSummaryFile(models.Model):
|
|||||||
class ExportType(models.TextChoices):
|
class ExportType(models.TextChoices):
|
||||||
MARKDOWN = "markdown", "Markdown"
|
MARKDOWN = "markdown", "Markdown"
|
||||||
EXCEL = "excel", "Excel"
|
EXCEL = "excel", "Excel"
|
||||||
|
JSON = "json", "JSON"
|
||||||
|
|
||||||
class Status(models.TextChoices):
|
class Status(models.TextChoices):
|
||||||
SUCCESS = "success", "成功"
|
SUCCESS = "success", "成功"
|
||||||
@@ -318,6 +344,9 @@ class ExportedSummaryFile(models.Model):
|
|||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
related_name="exports",
|
related_name="exports",
|
||||||
)
|
)
|
||||||
|
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
|
||||||
|
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
|
||||||
|
export_category = models.CharField(max_length=40, blank=True, default="summary")
|
||||||
export_type = models.CharField(max_length=20, choices=ExportType.choices)
|
export_type = models.CharField(max_length=20, choices=ExportType.choices)
|
||||||
file_name = models.CharField(max_length=255)
|
file_name = models.CharField(max_length=255)
|
||||||
storage_path = models.CharField(max_length=500)
|
storage_path = models.CharField(max_length=500)
|
||||||
@@ -331,4 +360,214 @@ class ExportedSummaryFile(models.Model):
|
|||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
|
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
|
||||||
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
|
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
|
||||||
|
models.Index(
|
||||||
|
fields=["workflow_type", "workflow_batch_id"],
|
||||||
|
name="idx_ra_export_workflow",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryRuleVersion(models.Model):
|
||||||
|
"""Tracks the local regulatory rule YAML and its matching RAG index."""
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
ACTIVE = "active", "启用"
|
||||||
|
OUTDATED = "outdated", "待更新"
|
||||||
|
DISABLED = "disabled", "停用"
|
||||||
|
|
||||||
|
code = models.CharField(max_length=80, unique=True)
|
||||||
|
name = models.CharField(max_length=160)
|
||||||
|
yaml_path = models.CharField(max_length=500)
|
||||||
|
yaml_hash = models.CharField(max_length=128)
|
||||||
|
rag_collection = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
rag_index_version = models.CharField(max_length=80, blank=True, default="")
|
||||||
|
rag_index_hash = models.CharField(max_length=128, blank=True, default="")
|
||||||
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.ACTIVE)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_rule_version"
|
||||||
|
ordering = ["-updated_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["code", "status"], name="idx_ra_rule_code_status"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.code
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryReviewBatch(models.Model):
|
||||||
|
"""Tracks one NMPA regulatory review workflow run."""
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户确认"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
|
||||||
|
conversation = models.ForeignKey(
|
||||||
|
Conversation,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="regulatory_review_batches",
|
||||||
|
)
|
||||||
|
user = models.ForeignKey(
|
||||||
|
settings.AUTH_USER_MODEL,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="review_regulatory_batches",
|
||||||
|
)
|
||||||
|
trigger_message = models.ForeignKey(
|
||||||
|
Message,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="triggered_regulatory_batches",
|
||||||
|
)
|
||||||
|
source_summary_batch = models.ForeignKey(
|
||||||
|
FileSummaryBatch,
|
||||||
|
on_delete=models.PROTECT,
|
||||||
|
related_name="regulatory_review_batches",
|
||||||
|
)
|
||||||
|
rule_version = models.ForeignKey(
|
||||||
|
RegulatoryRuleVersion,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="review_batches",
|
||||||
|
)
|
||||||
|
batch_no = models.CharField(max_length=64, unique=True)
|
||||||
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
||||||
|
condition_json = models.JSONField(default=dict, blank=True)
|
||||||
|
risk_summary = models.JSONField(default=dict, blank=True)
|
||||||
|
work_dir = models.CharField(max_length=500, blank=True, default="")
|
||||||
|
error_message = models.TextField(blank=True, default="")
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
started_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
finished_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_review_batch"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["conversation", "created_at"], name="idx_ra_rr_batch_conv"),
|
||||||
|
models.Index(fields=["user", "created_at"], name="idx_ra_rr_batch_user"),
|
||||||
|
models.Index(fields=["status", "created_at"], name="idx_ra_rr_batch_status"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.batch_no
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryIssue(models.Model):
|
||||||
|
"""Stores one regulatory finding after risk consolidation."""
|
||||||
|
|
||||||
|
class Severity(models.TextChoices):
|
||||||
|
BLOCKING = "blocking", "阻断项"
|
||||||
|
HIGH = "high", "高风险"
|
||||||
|
MEDIUM = "medium", "中风险"
|
||||||
|
LOW = "low", "低风险"
|
||||||
|
INFO = "info", "提示"
|
||||||
|
|
||||||
|
class Category(models.TextChoices):
|
||||||
|
COMPLETENESS = "completeness", "完整性"
|
||||||
|
STRUCTURE = "structure", "章节"
|
||||||
|
CONSISTENCY = "consistency", "一致性"
|
||||||
|
RAG = "rag", "法规依据"
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
OPEN = "open", "待处理"
|
||||||
|
RESOLVED = "resolved", "已整改"
|
||||||
|
ACCEPTED = "accepted", "已接受"
|
||||||
|
REVIEW_PASSED = "review_passed", "复核通过"
|
||||||
|
REVIEW_FAILED = "review_failed", "复核未通过"
|
||||||
|
|
||||||
|
batch = models.ForeignKey(
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="issues",
|
||||||
|
)
|
||||||
|
rule_code = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
category = models.CharField(max_length=40, choices=Category.choices)
|
||||||
|
severity = models.CharField(max_length=20, choices=Severity.choices)
|
||||||
|
title = models.CharField(max_length=255)
|
||||||
|
detail = models.TextField(blank=True, default="")
|
||||||
|
suggestion = models.TextField(blank=True, default="")
|
||||||
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.OPEN)
|
||||||
|
evidence = models.JSONField(default=dict, blank=True)
|
||||||
|
citations = models.JSONField(default=list, blank=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_issue"
|
||||||
|
ordering = ["severity", "id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["batch", "severity"], name="idx_ra_rr_issue_severity"),
|
||||||
|
models.Index(fields=["batch", "category"], name="idx_ra_rr_issue_category"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.title
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryArtifact(models.Model):
|
||||||
|
"""Stores regulatory review intermediate and exported artifacts."""
|
||||||
|
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
MARKDOWN = "markdown", "Markdown"
|
||||||
|
EXCEL = "excel", "Excel"
|
||||||
|
JSON = "json", "JSON"
|
||||||
|
TEXT = "text", "文本"
|
||||||
|
|
||||||
|
batch = models.ForeignKey(
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="artifacts",
|
||||||
|
)
|
||||||
|
artifact_type = models.CharField(max_length=20, choices=ArtifactType.choices)
|
||||||
|
name = models.CharField(max_length=160)
|
||||||
|
storage_path = models.CharField(max_length=500)
|
||||||
|
content_hash = models.CharField(max_length=128, blank=True, default="")
|
||||||
|
metadata = models.JSONField(default=dict, blank=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_artifact"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["batch", "artifact_type"], name="idx_ra_rr_artifact_type"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryNotificationRecord(models.Model):
|
||||||
|
"""Stores mock notification records for future Feishu integration."""
|
||||||
|
|
||||||
|
class Channel(models.TextChoices):
|
||||||
|
MOCK = "mock", "模拟"
|
||||||
|
FEISHU = "feishu", "飞书"
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待发送"
|
||||||
|
SENT = "sent", "已发送"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
|
||||||
|
batch = models.ForeignKey(
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="notifications",
|
||||||
|
)
|
||||||
|
channel = models.CharField(max_length=20, choices=Channel.choices, default=Channel.MOCK)
|
||||||
|
target = models.CharField(max_length=160, blank=True, default="")
|
||||||
|
payload = models.JSONField(default=dict, blank=True)
|
||||||
|
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
||||||
|
error_message = models.TextField(blank=True, default="")
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
sent_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_notification_record"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["batch", "status"], name="idx_ra_rr_notify_status"),
|
||||||
]
|
]
|
||||||
|
|||||||
1
review_agent/regulatory_review/__init__.py
Normal file
1
review_agent/regulatory_review/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""NMPA regulatory review workflow package."""
|
||||||
26
review_agent/regulatory_review/events.py
Normal file
26
review_agent/regulatory_review/events.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryReviewBatch, WorkflowEvent
|
||||||
|
|
||||||
|
|
||||||
|
def record_event(
|
||||||
|
batch: RegulatoryReviewBatch,
|
||||||
|
event_type: str,
|
||||||
|
payload: dict | None = None,
|
||||||
|
) -> WorkflowEvent:
|
||||||
|
return WorkflowEvent.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
conversation=batch.conversation,
|
||||||
|
event_type=event_type,
|
||||||
|
payload=payload or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"id": event.pk,
|
||||||
|
"event_type": event.event_type,
|
||||||
|
"payload": event.payload,
|
||||||
|
"created_at": event.created_at.isoformat(),
|
||||||
|
}
|
||||||
@@ -0,0 +1,503 @@
|
|||||||
|
code: nmpa_ivd_registration_v1
|
||||||
|
name: NMPA IVD 注册资料附件 4 对齐规则
|
||||||
|
rag_collection: nmpa_ivd_registration_v1
|
||||||
|
source_material_dir: docs/0.原始材料
|
||||||
|
attachment4_required_codes:
|
||||||
|
- "1"
|
||||||
|
- "1.1"
|
||||||
|
- "1.2"
|
||||||
|
- "1.3"
|
||||||
|
- "1.4"
|
||||||
|
- "1.5"
|
||||||
|
- "1.6"
|
||||||
|
- "1.7"
|
||||||
|
- "2"
|
||||||
|
- "2.1"
|
||||||
|
- "2.2"
|
||||||
|
- "2.3"
|
||||||
|
- "2.4"
|
||||||
|
- "2.5"
|
||||||
|
- "2.6"
|
||||||
|
- "3"
|
||||||
|
- "3.1"
|
||||||
|
- "3.2"
|
||||||
|
- "3.3"
|
||||||
|
- "3.4"
|
||||||
|
- "3.5"
|
||||||
|
- "3.6"
|
||||||
|
- "3.7"
|
||||||
|
- "3.8"
|
||||||
|
- "4"
|
||||||
|
- "4.1"
|
||||||
|
- "4.2"
|
||||||
|
- "5"
|
||||||
|
- "5.1"
|
||||||
|
- "5.2"
|
||||||
|
- "5.3"
|
||||||
|
- "5.4"
|
||||||
|
- "6"
|
||||||
|
- "6.1"
|
||||||
|
- "6.2"
|
||||||
|
- "6.3"
|
||||||
|
- "6.4"
|
||||||
|
- "6.5"
|
||||||
|
- "6.6"
|
||||||
|
- "6.7"
|
||||||
|
- "6.8"
|
||||||
|
- "6.9"
|
||||||
|
- "6.10"
|
||||||
|
requirements:
|
||||||
|
- code: attachment4_1_regulatory_info
|
||||||
|
rule_id: A4-1
|
||||||
|
attachment4_code: "1"
|
||||||
|
title: 监管信息
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [监管信息]
|
||||||
|
aliases: [监管资料]
|
||||||
|
suggestion: 请补充监管信息章节及其目录项。
|
||||||
|
citation_query: 附件4 监管信息 体外诊断试剂 注册申报资料
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_1_1_toc
|
||||||
|
rule_id: A4-1.1
|
||||||
|
attachment4_code: "1.1"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 目录]
|
||||||
|
aliases: [监管信息目录]
|
||||||
|
suggestion: 请补充监管信息章节目录。
|
||||||
|
citation_query: 附件4 监管信息 章节目录
|
||||||
|
- code: attachment4_1_2_application_form
|
||||||
|
rule_id: A4-1.2
|
||||||
|
attachment4_code: "1.2"
|
||||||
|
title: 申请表
|
||||||
|
type: required
|
||||||
|
severity: blocking
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [申请表, 注册申请表]
|
||||||
|
aliases: [医疗器械注册申请表]
|
||||||
|
suggestion: 请补充注册申请表并核对注册类型、管理类别和分类编码。
|
||||||
|
citation_query: 附件4 监管信息 申请表
|
||||||
|
- code: attachment4_1_3_terms
|
||||||
|
rule_id: A4-1.3
|
||||||
|
attachment4_code: "1.3"
|
||||||
|
title: 术语/缩写词列表
|
||||||
|
type: recommended
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [术语, 缩写词, 缩略语]
|
||||||
|
suggestion: 请补充术语和缩写词列表。
|
||||||
|
citation_query: 附件4 术语 缩写词列表
|
||||||
|
- code: attachment4_1_4_product_list
|
||||||
|
rule_id: A4-1.4
|
||||||
|
attachment4_code: "1.4"
|
||||||
|
title: 产品列表
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品列表, 产品清单]
|
||||||
|
suggestion: 请补充申报产品列表。
|
||||||
|
citation_query: 附件4 产品列表
|
||||||
|
- code: attachment4_1_5_related_files
|
||||||
|
rule_id: A4-1.5
|
||||||
|
attachment4_code: "1.5"
|
||||||
|
title: 关联文件
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [关联文件, 关联注册, 引用文件]
|
||||||
|
suggestion: 如存在关联注册或引用资料,请补充关联文件说明。
|
||||||
|
citation_query: 附件4 关联文件
|
||||||
|
- code: attachment4_1_6_pre_submission
|
||||||
|
rule_id: A4-1.6
|
||||||
|
attachment4_code: "1.6"
|
||||||
|
title: 申报前与监管机构的联系情况和沟通记录
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [沟通记录, 监管机构, 申报前]
|
||||||
|
suggestion: 如有申报前沟通,请补充沟通记录;如无,请说明不适用。
|
||||||
|
citation_query: 附件4 申报前 监管机构 沟通记录
|
||||||
|
- code: attachment4_1_7_declaration
|
||||||
|
rule_id: A4-1.7
|
||||||
|
attachment4_code: "1.7"
|
||||||
|
title: 符合性声明
|
||||||
|
type: required
|
||||||
|
severity: blocking
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [符合性声明, 声明]
|
||||||
|
suggestion: 请补充符合性声明。
|
||||||
|
citation_query: 附件4 符合性声明
|
||||||
|
- code: attachment4_2_summary
|
||||||
|
rule_id: A4-2
|
||||||
|
attachment4_code: "2"
|
||||||
|
title: 综述资料
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [综述资料]
|
||||||
|
suggestion: 请补充综述资料章节。
|
||||||
|
citation_query: 附件4 综述资料
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_2_1_toc
|
||||||
|
rule_id: A4-2.1
|
||||||
|
attachment4_code: "2.1"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 综述资料目录]
|
||||||
|
suggestion: 请补充综述资料章节目录。
|
||||||
|
citation_query: 附件4 综述资料 章节目录
|
||||||
|
- code: attachment4_2_2_overview
|
||||||
|
rule_id: A4-2.2
|
||||||
|
attachment4_code: "2.2"
|
||||||
|
title: 概述
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [概述]
|
||||||
|
suggestion: 请补充产品概述。
|
||||||
|
citation_query: 附件4 概述
|
||||||
|
- code: attachment4_2_3_product_description
|
||||||
|
rule_id: A4-2.3
|
||||||
|
attachment4_code: "2.3"
|
||||||
|
title: 产品描述
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品描述]
|
||||||
|
suggestion: 请补充产品描述。
|
||||||
|
citation_query: 附件4 产品描述
|
||||||
|
- code: attachment4_2_4_intended_use
|
||||||
|
rule_id: A4-2.4
|
||||||
|
attachment4_code: "2.4"
|
||||||
|
title: 预期用途
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [预期用途]
|
||||||
|
suggestion: 请补充预期用途资料。
|
||||||
|
citation_query: 附件4 预期用途
|
||||||
|
- code: attachment4_2_5_marketing_history
|
||||||
|
rule_id: A4-2.5
|
||||||
|
attachment4_code: "2.5"
|
||||||
|
title: 申报产品上市历史
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [上市历史]
|
||||||
|
suggestion: 如产品已有上市历史,请补充相关说明;如无,请说明不适用。
|
||||||
|
citation_query: 附件4 上市历史
|
||||||
|
- code: attachment4_2_6_other_summary
|
||||||
|
rule_id: A4-2.6
|
||||||
|
attachment4_code: "2.6"
|
||||||
|
title: 其他需说明的内容
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [其他需说明, 其他说明]
|
||||||
|
suggestion: 请补充其他需说明内容或不适用说明。
|
||||||
|
citation_query: 附件4 其他需说明
|
||||||
|
- code: attachment4_3_nonclinical
|
||||||
|
rule_id: A4-3
|
||||||
|
attachment4_code: "3"
|
||||||
|
title: 非临床资料
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [非临床资料]
|
||||||
|
suggestion: 请补充非临床资料章节。
|
||||||
|
citation_query: 附件4 非临床资料
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_3_1_toc
|
||||||
|
rule_id: A4-3.1
|
||||||
|
attachment4_code: "3.1"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 非临床资料目录]
|
||||||
|
suggestion: 请补充非临床资料章节目录。
|
||||||
|
citation_query: 附件4 非临床资料 章节目录
|
||||||
|
- code: attachment4_3_2_risk_management
|
||||||
|
rule_id: A4-3.2
|
||||||
|
attachment4_code: "3.2"
|
||||||
|
title: 产品风险管理资料
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品风险管理, 风险管理资料]
|
||||||
|
suggestion: 请补充产品风险管理资料。
|
||||||
|
citation_query: 附件4 产品风险管理资料
|
||||||
|
- code: essential_principles_checklist
|
||||||
|
rule_id: A4-3.3
|
||||||
|
attachment4_code: "3.3"
|
||||||
|
title: 体外诊断试剂安全和性能基本原则清单
|
||||||
|
type: recommended
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [安全和性能基本原则, 基本原则清单]
|
||||||
|
aliases: [安全和性能基本原则清单]
|
||||||
|
suggestion: 建议补充安全和性能基本原则清单,便于审评追溯。
|
||||||
|
citation_query: 附件4 安全和性能基本原则清单
|
||||||
|
- code: product_technical_requirements
|
||||||
|
rule_id: A4-3.4
|
||||||
|
attachment4_code: "3.4"
|
||||||
|
title: 产品技术要求及检验报告
|
||||||
|
type: required
|
||||||
|
severity: blocking
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品技术要求, 注册检验报告, 检验报告]
|
||||||
|
aliases: [产品技术要求, 注册检验报告]
|
||||||
|
required_sections: [产品技术要求, 检验报告]
|
||||||
|
suggestion: 请补充产品技术要求及注册检验报告,并确认二者覆盖型号一致。
|
||||||
|
citation_query: 附件4 产品技术要求 检验报告
|
||||||
|
- code: registration_test_report
|
||||||
|
rule_id: A4-3.4-R
|
||||||
|
attachment4_code: "3.4"
|
||||||
|
title: 注册检验报告
|
||||||
|
type: required
|
||||||
|
severity: blocking
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [注册检验报告, 检验报告]
|
||||||
|
suggestion: 请补充注册检验报告并复核报告覆盖的产品型号。
|
||||||
|
citation_query: 附件4 注册检验报告
|
||||||
|
- code: attachment4_3_5_analytical_performance
|
||||||
|
rule_id: A4-3.5
|
||||||
|
attachment4_code: "3.5"
|
||||||
|
title: 分析性能研究
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [分析性能研究, 分析性能]
|
||||||
|
suggestion: 请补充分析性能研究资料。
|
||||||
|
citation_query: 附件4 分析性能研究
|
||||||
|
- code: attachment4_3_6_stability
|
||||||
|
rule_id: A4-3.6
|
||||||
|
attachment4_code: "3.6"
|
||||||
|
title: 稳定性研究
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [稳定性研究, 稳定性]
|
||||||
|
suggestion: 请补充稳定性研究资料。
|
||||||
|
citation_query: 附件4 稳定性研究
|
||||||
|
- code: attachment4_3_7_reference_interval
|
||||||
|
rule_id: A4-3.7
|
||||||
|
attachment4_code: "3.7"
|
||||||
|
title: 阳性判断值或参考区间研究
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [阳性判断值, 参考区间]
|
||||||
|
suggestion: 请补充阳性判断值或参考区间研究资料。
|
||||||
|
citation_query: 附件4 阳性判断值 参考区间
|
||||||
|
- code: attachment4_3_8_other_nonclinical
|
||||||
|
rule_id: A4-3.8
|
||||||
|
attachment4_code: "3.8"
|
||||||
|
title: 其他资料
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [其他资料]
|
||||||
|
suggestion: 请补充非临床其他资料或不适用说明。
|
||||||
|
citation_query: 附件4 非临床 其他资料
|
||||||
|
- code: attachment4_4_clinical_evaluation
|
||||||
|
rule_id: A4-4
|
||||||
|
attachment4_code: "4"
|
||||||
|
title: 临床评价资料
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [临床评价资料, 临床资料]
|
||||||
|
suggestion: 请补充临床评价资料章节。
|
||||||
|
citation_query: 附件4 临床评价资料
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_4_1_toc
|
||||||
|
rule_id: A4-4.1
|
||||||
|
attachment4_code: "4.1"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 临床评价资料目录]
|
||||||
|
suggestion: 请补充临床评价资料章节目录。
|
||||||
|
citation_query: 附件4 临床评价资料 章节目录
|
||||||
|
- code: clinical_evaluation
|
||||||
|
rule_id: A4-4.2
|
||||||
|
attachment4_code: "4.2"
|
||||||
|
title: 临床评价资料
|
||||||
|
type: conditional
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [临床评价, 临床试验, 免临床, 同品种比对]
|
||||||
|
suggestion: 请根据适用情形补充临床评价资料或说明豁免依据。
|
||||||
|
citation_query: 附件4 临床评价资料 注册申报
|
||||||
|
- code: attachment4_5_ifu_label
|
||||||
|
rule_id: A4-5
|
||||||
|
attachment4_code: "5"
|
||||||
|
title: 产品说明书和标签样稿
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品说明书和标签样稿, 说明书, 标签样稿]
|
||||||
|
suggestion: 请补充产品说明书和标签样稿章节。
|
||||||
|
citation_query: 附件4 产品说明书 标签样稿
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_5_1_toc
|
||||||
|
rule_id: A4-5.1
|
||||||
|
attachment4_code: "5.1"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 说明书目录, 标签目录]
|
||||||
|
suggestion: 请补充产品说明书和标签样稿章节目录。
|
||||||
|
citation_query: 附件4 说明书 标签 章节目录
|
||||||
|
- code: instructions_for_use
|
||||||
|
rule_id: A4-5.2
|
||||||
|
attachment4_code: "5.2"
|
||||||
|
title: 产品说明书
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [说明书, 产品说明书, 使用说明]
|
||||||
|
aliases: [说明书]
|
||||||
|
required_sections: [储存条件, 有效期, 样本要求]
|
||||||
|
suggestion: 请补充说明书并核对储存条件、有效期和样本要求章节。
|
||||||
|
citation_query: 附件4 产品说明书 储存条件 有效期 样本要求
|
||||||
|
- code: attachment4_5_3_label
|
||||||
|
rule_id: A4-5.3
|
||||||
|
attachment4_code: "5.3"
|
||||||
|
title: 标签样稿
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [标签样稿, 标签]
|
||||||
|
suggestion: 请补充标签样稿。
|
||||||
|
citation_query: 附件4 标签样稿
|
||||||
|
- code: attachment4_5_4_other_ifu
|
||||||
|
rule_id: A4-5.4
|
||||||
|
attachment4_code: "5.4"
|
||||||
|
title: 其他资料
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [其他资料]
|
||||||
|
suggestion: 请补充说明书和标签相关其他资料或不适用说明。
|
||||||
|
citation_query: 附件4 说明书 标签 其他资料
|
||||||
|
- code: attachment4_6_quality_system
|
||||||
|
rule_id: A4-6
|
||||||
|
attachment4_code: "6"
|
||||||
|
title: 质量管理体系文件
|
||||||
|
type: chapter
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [质量管理体系文件, 质量体系, 质量管理体系]
|
||||||
|
suggestion: 请补充质量管理体系文件章节。
|
||||||
|
citation_query: 附件4 质量管理体系文件
|
||||||
|
structure_required: true
|
||||||
|
- code: attachment4_6_1_overview
|
||||||
|
rule_id: A4-6.1
|
||||||
|
attachment4_code: "6.1"
|
||||||
|
title: 综述
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [综述]
|
||||||
|
suggestion: 请补充质量管理体系综述。
|
||||||
|
citation_query: 附件4 质量管理体系 综述
|
||||||
|
- code: attachment4_6_2_toc
|
||||||
|
rule_id: A4-6.2
|
||||||
|
attachment4_code: "6.2"
|
||||||
|
title: 章节目录
|
||||||
|
type: directory
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [章节目录, 质量管理体系目录]
|
||||||
|
suggestion: 请补充质量管理体系文件章节目录。
|
||||||
|
citation_query: 附件4 质量管理体系 章节目录
|
||||||
|
- code: attachment4_6_3_manufacturing
|
||||||
|
rule_id: A4-6.3
|
||||||
|
attachment4_code: "6.3"
|
||||||
|
title: 生产制造信息
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [生产制造信息, 生产制造]
|
||||||
|
suggestion: 请补充生产制造信息。
|
||||||
|
citation_query: 附件4 生产制造信息
|
||||||
|
- code: attachment4_6_4_qms_procedure
|
||||||
|
rule_id: A4-6.4
|
||||||
|
attachment4_code: "6.4"
|
||||||
|
title: 质量管理体系程序
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [质量管理体系程序, 质量体系程序]
|
||||||
|
suggestion: 请补充质量管理体系程序。
|
||||||
|
citation_query: 附件4 质量管理体系程序
|
||||||
|
- code: attachment4_6_5_management
|
||||||
|
rule_id: A4-6.5
|
||||||
|
attachment4_code: "6.5"
|
||||||
|
title: 管理职责程序
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [管理职责程序, 管理职责]
|
||||||
|
suggestion: 请补充管理职责程序。
|
||||||
|
citation_query: 附件4 管理职责程序
|
||||||
|
- code: attachment4_6_6_resource
|
||||||
|
rule_id: A4-6.6
|
||||||
|
attachment4_code: "6.6"
|
||||||
|
title: 资源管理程序
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [资源管理程序, 资源管理]
|
||||||
|
suggestion: 请补充资源管理程序。
|
||||||
|
citation_query: 附件4 资源管理程序
|
||||||
|
- code: attachment4_6_7_realization
|
||||||
|
rule_id: A4-6.7
|
||||||
|
attachment4_code: "6.7"
|
||||||
|
title: 产品实现程序
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [产品实现程序, 产品实现]
|
||||||
|
suggestion: 请补充产品实现程序。
|
||||||
|
citation_query: 附件4 产品实现程序
|
||||||
|
- code: attachment4_6_8_measurement
|
||||||
|
rule_id: A4-6.8
|
||||||
|
attachment4_code: "6.8"
|
||||||
|
title: 质量管理体系的测量/分析和改进程序
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [测量, 分析和改进, 改进程序]
|
||||||
|
suggestion: 请补充质量管理体系测量、分析和改进程序。
|
||||||
|
citation_query: 附件4 测量 分析 改进程序
|
||||||
|
- code: attachment4_6_9_other_qms
|
||||||
|
rule_id: A4-6.9
|
||||||
|
attachment4_code: "6.9"
|
||||||
|
title: 其他质量体系程序信息
|
||||||
|
type: conditional
|
||||||
|
severity: medium
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [其他质量体系程序, 其他质量体系]
|
||||||
|
suggestion: 请补充其他质量体系程序信息或不适用说明。
|
||||||
|
citation_query: 附件4 其他质量体系程序信息
|
||||||
|
- code: attachment4_6_10_qms_audit
|
||||||
|
rule_id: A4-6.10
|
||||||
|
attachment4_code: "6.10"
|
||||||
|
title: 质量管理体系核查文件
|
||||||
|
type: required
|
||||||
|
severity: high
|
||||||
|
category: completeness
|
||||||
|
file_keywords: [质量管理体系核查文件, 体系核查文件, 核查文件]
|
||||||
|
suggestion: 请补充质量管理体系核查文件。
|
||||||
|
citation_query: 附件4 质量管理体系核查文件
|
||||||
18
review_agent/regulatory_review/schemas.py
Normal file
18
review_agent/regulatory_review/schemas.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Finding:
|
||||||
|
rule_code: str
|
||||||
|
category: str
|
||||||
|
severity: str
|
||||||
|
title: str
|
||||||
|
detail: str = ""
|
||||||
|
suggestion: str = ""
|
||||||
|
evidence: dict[str, object] = field(default_factory=dict)
|
||||||
|
citations: list[dict[str, object]] = field(default_factory=list)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, object]:
|
||||||
|
return asdict(self)
|
||||||
1
review_agent/regulatory_review/services/__init__.py
Normal file
1
review_agent/regulatory_review/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Services for NMPA regulatory review."""
|
||||||
@@ -0,0 +1,73 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch
|
||||||
|
from review_agent.regulatory_review.schemas import Finding
|
||||||
|
|
||||||
|
|
||||||
|
def run_completeness_check(
|
||||||
|
batch: FileSummaryBatch,
|
||||||
|
rule_set: dict,
|
||||||
|
progress_callback: Callable[[dict[str, object]], None] | None = None,
|
||||||
|
) -> list[Finding]:
|
||||||
|
items = list(batch.items.order_by("file_index"))
|
||||||
|
findings: list[Finding] = []
|
||||||
|
requirements = [
|
||||||
|
requirement
|
||||||
|
for requirement in rule_set.get("requirements", [])
|
||||||
|
if requirement.get("type") in {"required", "conditional", "recommended", "chapter", "directory"}
|
||||||
|
]
|
||||||
|
total = len(requirements)
|
||||||
|
for index, requirement in enumerate(requirements, start=1):
|
||||||
|
if requirement.get("type") not in {"required", "conditional", "recommended", "chapter", "directory"}:
|
||||||
|
continue
|
||||||
|
matched = [
|
||||||
|
item
|
||||||
|
for item in items
|
||||||
|
if _matches_item(
|
||||||
|
item.file_name,
|
||||||
|
item.relative_path,
|
||||||
|
item.directory_level,
|
||||||
|
[*requirement.get("file_keywords", []), *requirement.get("aliases", [])],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
if not matched:
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_code=requirement["code"],
|
||||||
|
category=requirement.get("category", "completeness"),
|
||||||
|
severity=requirement.get("severity", "medium"),
|
||||||
|
title=f"缺少{_numbered_title(requirement)}",
|
||||||
|
detail=f"当前文件汇总批次未发现{_numbered_title(requirement)}。",
|
||||||
|
suggestion=requirement.get("suggestion", ""),
|
||||||
|
evidence={
|
||||||
|
"requirement_type": requirement.get("type"),
|
||||||
|
"matched_files": [],
|
||||||
|
"searched_keywords": requirement.get("file_keywords", []),
|
||||||
|
"searched_fields": ["file_name", "relative_path", "directory_level"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(
|
||||||
|
{
|
||||||
|
"processed": index,
|
||||||
|
"total": total,
|
||||||
|
"label": _numbered_title(requirement),
|
||||||
|
"finding_count": len(findings),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_item(file_name: str, relative_path: str, directory_level: str, keywords: list[str]) -> bool:
|
||||||
|
haystack = f"{file_name} {relative_path} {directory_level}".lower()
|
||||||
|
return any(str(keyword).lower() in haystack for keyword in keywords)
|
||||||
|
|
||||||
|
|
||||||
|
def _numbered_title(requirement: dict) -> str:
|
||||||
|
attachment4_code = requirement.get("attachment4_code")
|
||||||
|
if not attachment4_code:
|
||||||
|
return requirement["title"]
|
||||||
|
return f"{attachment4_code}{requirement['title']}"
|
||||||
59
review_agent/regulatory_review/services/consistency_check.py
Normal file
59
review_agent/regulatory_review/services/consistency_check.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.schemas import Finding
|
||||||
|
|
||||||
|
|
||||||
|
FIELDS = {
|
||||||
|
"产品名称": r"产品名称[::]\s*([^\n\r]+)",
|
||||||
|
"型号规格": r"型号规格[::]\s*([^\n\r]+)",
|
||||||
|
"预期用途": r"预期用途[::]\s*([^\n\r]+)",
|
||||||
|
"管理类别": r"管理类别[::]\s*([^\n\r]+)",
|
||||||
|
"分类编码": r"分类编码[::]\s*([^\n\r]+)",
|
||||||
|
"注册类型": r"注册类型[::]\s*([^\n\r]+)",
|
||||||
|
"临床评价路径": r"临床评价路径[::]\s*([^\n\r]+)",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def run_consistency_check(
|
||||||
|
document_texts: dict[str, str],
|
||||||
|
progress_callback: Callable[[dict[str, object]], None] | None = None,
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings: list[Finding] = []
|
||||||
|
fields = list(FIELDS.items())
|
||||||
|
total = len(fields)
|
||||||
|
for index, (label, pattern) in enumerate(fields, start=1):
|
||||||
|
values: dict[str, list[str]] = defaultdict(list)
|
||||||
|
for file_name, text in document_texts.items():
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
values[_normalize(match.group(1))].append(file_name)
|
||||||
|
if len(values) > 1:
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_code=f"consistency:{label}",
|
||||||
|
category="consistency",
|
||||||
|
severity="high",
|
||||||
|
title=f"{label}在不同文件中不一致",
|
||||||
|
detail=f"发现 {len(values)} 个不同的{label}取值。",
|
||||||
|
suggestion=f"请统一各注册资料中的{label}。",
|
||||||
|
evidence={"field": label, "values": dict(values)},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(
|
||||||
|
{
|
||||||
|
"processed": index,
|
||||||
|
"total": total,
|
||||||
|
"label": label,
|
||||||
|
"finding_count": len(findings),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize(value: str) -> str:
|
||||||
|
return " ".join(value.strip().split())
|
||||||
225
review_agent/regulatory_review/services/export.py
Normal file
225
review_agent/regulatory_review/services/export.py
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
from review_agent.models import ExportedSummaryFile, RegulatoryIssue, RegulatoryReviewBatch
|
||||||
|
|
||||||
|
|
||||||
|
SEVERITY_LABELS = {
|
||||||
|
"blocking": "阻断项",
|
||||||
|
"high": "高风险",
|
||||||
|
"medium": "中风险",
|
||||||
|
"low": "低风险",
|
||||||
|
"info": "提示",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def export_review_results(batch: RegulatoryReviewBatch) -> list[ExportedSummaryFile]:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
|
||||||
|
export_dir = root / "exports"
|
||||||
|
export_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
markdown = _create_export(
|
||||||
|
batch,
|
||||||
|
export_dir / f"{batch.batch_no}-regulatory-review.md",
|
||||||
|
ExportedSummaryFile.ExportType.MARKDOWN,
|
||||||
|
"markdown_report",
|
||||||
|
build_markdown_report(batch),
|
||||||
|
)
|
||||||
|
excel = _create_excel_export(batch, export_dir / f"{batch.batch_no}-regulatory-issues.xlsx")
|
||||||
|
result_json = _create_export(
|
||||||
|
batch,
|
||||||
|
export_dir / f"{batch.batch_no}-regulatory-result.json",
|
||||||
|
ExportedSummaryFile.ExportType.JSON,
|
||||||
|
"result_package",
|
||||||
|
json.dumps(build_result_payload(batch), ensure_ascii=False, indent=2),
|
||||||
|
)
|
||||||
|
return [markdown, excel, result_json]
|
||||||
|
|
||||||
|
|
||||||
|
def build_markdown_report(batch: RegulatoryReviewBatch) -> str:
|
||||||
|
lines = [
|
||||||
|
"# NMPA 注册资料法规核查报告",
|
||||||
|
"",
|
||||||
|
f"批次号:{batch.batch_no}",
|
||||||
|
]
|
||||||
|
regenerated_from = (batch.condition_json or {}).get("regenerated_from")
|
||||||
|
if regenerated_from:
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
"## 复核来源",
|
||||||
|
"",
|
||||||
|
f"- 来源法规核查批次:{regenerated_from.get('batch_no')}",
|
||||||
|
f"- 来源文件汇总批次:{regenerated_from.get('file_summary_batch_no')}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
lines.extend(["", "## 风险汇总", "", "| 风险等级 | 数量 |", "| --- | --- |"])
|
||||||
|
summary = batch.risk_summary or {}
|
||||||
|
for severity, label in SEVERITY_LABELS.items():
|
||||||
|
lines.append(f"| {label} | {summary.get(severity, 0)} |")
|
||||||
|
lines.extend(["", "## 问题清单", "", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
|
||||||
|
for issue in batch.issues.order_by("id"):
|
||||||
|
lines.append(
|
||||||
|
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
|
||||||
|
)
|
||||||
|
review_records = _review_records(batch)
|
||||||
|
if review_records:
|
||||||
|
lines.extend(["", "## 复核记录", "", "| 补充批次 | 问题数 | 通过数 | 未通过数 |", "| --- | --- | --- | --- |"])
|
||||||
|
for record in review_records:
|
||||||
|
items = record.get("items", [])
|
||||||
|
passed = sum(1 for item in items if item.get("status") == RegulatoryIssue.Status.REVIEW_PASSED)
|
||||||
|
failed = sum(1 for item in items if item.get("status") == RegulatoryIssue.Status.REVIEW_FAILED)
|
||||||
|
lines.append(f"| {record.get('file_summary_batch_no')} | {len(items)} | {passed} | {failed} |")
|
||||||
|
notifications = _notification_records(batch)
|
||||||
|
if notifications:
|
||||||
|
lines.extend(["", "## 通知记录", "", "| 渠道 | 对象 | 状态 | 问题 |", "| --- | --- | --- | --- |"])
|
||||||
|
for record in notifications:
|
||||||
|
lines.append(
|
||||||
|
f"| {record['channel']} | {record['target'] or '-'} | {record['status']} | {record['payload'].get('title', '-')} |"
|
||||||
|
)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_result_payload(batch: RegulatoryReviewBatch) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"source_summary_batch": batch.source_summary_batch.batch_no,
|
||||||
|
"regenerated_from": (batch.condition_json or {}).get("regenerated_from"),
|
||||||
|
"risk_summary": batch.risk_summary,
|
||||||
|
"issues": [
|
||||||
|
{
|
||||||
|
"severity": issue.severity,
|
||||||
|
"category": issue.category,
|
||||||
|
"rule_code": issue.rule_code,
|
||||||
|
"title": issue.title,
|
||||||
|
"detail": issue.detail,
|
||||||
|
"suggestion": issue.suggestion,
|
||||||
|
"status": issue.status,
|
||||||
|
"evidence": issue.evidence,
|
||||||
|
"citations": issue.citations,
|
||||||
|
}
|
||||||
|
for issue in batch.issues.order_by("id")
|
||||||
|
],
|
||||||
|
"review_records": _review_records(batch),
|
||||||
|
"notifications": _notification_records(batch),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_assistant_summary(batch: RegulatoryReviewBatch, exports: list[ExportedSummaryFile]) -> str:
|
||||||
|
export_by_type = {export.export_type: export for export in exports}
|
||||||
|
lines = [
|
||||||
|
"已完成 NMPA 注册资料法规核查。",
|
||||||
|
"",
|
||||||
|
"| 风险等级 | 数量 |",
|
||||||
|
"| --- | --- |",
|
||||||
|
]
|
||||||
|
summary = batch.risk_summary or {}
|
||||||
|
for severity, label in SEVERITY_LABELS.items():
|
||||||
|
if summary.get(severity, 0):
|
||||||
|
lines.append(f"| {label} | {summary[severity]} |")
|
||||||
|
lines.extend(["", "| 等级 | 问题 | 状态 | 建议 |", "| --- | --- | --- | --- |"])
|
||||||
|
for issue in batch.issues.order_by("id")[:8]:
|
||||||
|
lines.append(
|
||||||
|
f"| {SEVERITY_LABELS.get(issue.severity, issue.severity)} | {issue.title} | {issue.status} | {issue.suggestion or '-'} |"
|
||||||
|
)
|
||||||
|
lines.extend(
|
||||||
|
[
|
||||||
|
"",
|
||||||
|
_download_link("下载 Markdown 核查报告", export_by_type.get(ExportedSummaryFile.ExportType.MARKDOWN)),
|
||||||
|
_download_link("下载 Excel 缺失清单", export_by_type.get(ExportedSummaryFile.ExportType.EXCEL)),
|
||||||
|
_download_link("下载 JSON 结果包", export_by_type.get(ExportedSummaryFile.ExportType.JSON)),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return "\n".join(line for line in lines if line is not None)
|
||||||
|
|
||||||
|
|
||||||
|
def _download_link(label: str, exported: ExportedSummaryFile | None) -> str | None:
|
||||||
|
if not exported:
|
||||||
|
return None
|
||||||
|
return f"[{label}](/api/review-agent/file-summary/exports/{exported.pk}/download/)"
|
||||||
|
|
||||||
|
|
||||||
|
def _create_export(
|
||||||
|
batch: RegulatoryReviewBatch,
|
||||||
|
path: Path,
|
||||||
|
export_type: str,
|
||||||
|
category: str,
|
||||||
|
content: str,
|
||||||
|
) -> ExportedSummaryFile:
|
||||||
|
path.write_text(content, encoding="utf-8")
|
||||||
|
return ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch.source_summary_batch,
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category=category,
|
||||||
|
export_type=export_type,
|
||||||
|
file_name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_excel_export(batch: RegulatoryReviewBatch, path: Path) -> ExportedSummaryFile:
|
||||||
|
workbook = Workbook()
|
||||||
|
sheet = workbook.active
|
||||||
|
sheet.title = "法规问题清单"
|
||||||
|
sheet.append(["等级", "类别", "规则", "问题", "状态", "建议", "法规依据", "通知记录"])
|
||||||
|
for issue in batch.issues.order_by("id"):
|
||||||
|
sheet.append(
|
||||||
|
[
|
||||||
|
SEVERITY_LABELS.get(issue.severity, issue.severity),
|
||||||
|
issue.category,
|
||||||
|
issue.rule_code,
|
||||||
|
issue.title,
|
||||||
|
issue.status,
|
||||||
|
issue.suggestion,
|
||||||
|
"; ".join(str(item.get("source", "")) for item in issue.citations),
|
||||||
|
_notification_summary_for_issue(batch, issue.pk),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
workbook.save(path)
|
||||||
|
return ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch.source_summary_batch,
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="issue_checklist",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||||
|
file_name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _review_records(batch: RegulatoryReviewBatch) -> list[dict[str, object]]:
|
||||||
|
records = []
|
||||||
|
for artifact in batch.artifacts.filter(metadata__artifact="review_record").order_by("created_at", "id"):
|
||||||
|
try:
|
||||||
|
records.append(json.loads(Path(artifact.storage_path).read_text(encoding="utf-8")))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
continue
|
||||||
|
return records
|
||||||
|
|
||||||
|
|
||||||
|
def _notification_records(batch: RegulatoryReviewBatch) -> list[dict[str, object]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"channel": record.channel,
|
||||||
|
"target": record.target,
|
||||||
|
"status": record.status,
|
||||||
|
"payload": record.payload,
|
||||||
|
"sent_at": record.sent_at.isoformat() if record.sent_at else "",
|
||||||
|
}
|
||||||
|
for record in batch.notifications.order_by("created_at", "id")
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _notification_summary_for_issue(batch: RegulatoryReviewBatch, issue_id: int) -> str:
|
||||||
|
records = [
|
||||||
|
record
|
||||||
|
for record in batch.notifications.all()
|
||||||
|
if isinstance(record.payload, dict) and record.payload.get("issue_id") == issue_id
|
||||||
|
]
|
||||||
|
return "; ".join(f"{record.channel}:{record.status}" for record in records)
|
||||||
39
review_agent/regulatory_review/services/feishu_notifier.py
Normal file
39
review_agent/regulatory_review/services/feishu_notifier.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryNotificationRecord, RegulatoryReviewBatch
|
||||||
|
|
||||||
|
|
||||||
|
NOTIFIABLE_SEVERITIES = {"blocking", "high", "medium"}
|
||||||
|
|
||||||
|
|
||||||
|
def create_mock_notifications(batch: RegulatoryReviewBatch) -> list[RegulatoryNotificationRecord]:
|
||||||
|
records = []
|
||||||
|
existing_issue_ids = {
|
||||||
|
item.get("issue_id")
|
||||||
|
for item in RegulatoryNotificationRecord.objects.filter(batch=batch, channel=RegulatoryNotificationRecord.Channel.MOCK).values_list(
|
||||||
|
"payload", flat=True
|
||||||
|
)
|
||||||
|
if isinstance(item, dict)
|
||||||
|
}
|
||||||
|
for issue in batch.issues.order_by("id"):
|
||||||
|
if issue.severity not in NOTIFIABLE_SEVERITIES or issue.pk in existing_issue_ids:
|
||||||
|
continue
|
||||||
|
records.append(
|
||||||
|
RegulatoryNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
channel=RegulatoryNotificationRecord.Channel.MOCK,
|
||||||
|
target="法规整改负责人",
|
||||||
|
status=RegulatoryNotificationRecord.Status.SENT,
|
||||||
|
sent_at=timezone.now(),
|
||||||
|
payload={
|
||||||
|
"issue_id": issue.pk,
|
||||||
|
"rule_code": issue.rule_code,
|
||||||
|
"severity": issue.severity,
|
||||||
|
"title": issue.title,
|
||||||
|
"suggestion": issue.suggestion,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return records
|
||||||
241
review_agent/regulatory_review/services/info_extract.py
Normal file
241
review_agent/regulatory_review/services/info_extract.py
Normal file
@@ -0,0 +1,241 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch, RegulatoryReviewBatch
|
||||||
|
from review_agent.regulatory_review.services.llm_review import review_condition_fields
|
||||||
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||||
|
|
||||||
|
|
||||||
|
OPTION_FIELDS = {
|
||||||
|
"product_category": ["体外诊断试剂", "医疗器械", "其他"],
|
||||||
|
"registration_type": ["首次注册", "变更注册", "延续注册"],
|
||||||
|
"clinical_evaluation_path": ["临床试验", "免临床", "同品种比对", "待确认"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_regulatory_condition_candidates(batch: RegulatoryReviewBatch) -> dict[str, dict[str, object]]:
|
||||||
|
condition_json = batch.condition_json or {}
|
||||||
|
candidates = condition_json.get("candidates") or {}
|
||||||
|
if batch.status != RegulatoryReviewBatch.Status.WAITING_USER or not _condition_candidates_incomplete(candidates):
|
||||||
|
return candidates
|
||||||
|
refreshed = detect_regulatory_condition_candidates(batch.source_summary_batch)
|
||||||
|
refreshed = _merge_condition_candidates(candidates, refreshed)
|
||||||
|
batch.condition_json = {**condition_json, "candidates": refreshed}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
return refreshed
|
||||||
|
|
||||||
|
|
||||||
|
def detect_regulatory_condition_candidates(summary_batch: FileSummaryBatch) -> dict[str, dict[str, object]]:
|
||||||
|
"""Infers review-scope conditions from the summary batch and file names."""
|
||||||
|
|
||||||
|
corpus_parts = [summary_batch.product_name or ""]
|
||||||
|
field_candidates: dict[str, str] = {}
|
||||||
|
field_sources: dict[str, str] = {}
|
||||||
|
for item in summary_batch.items.order_by("file_index"):
|
||||||
|
corpus_parts.extend([item.directory_level, item.file_name, item.relative_path])
|
||||||
|
review = _extract_item_fields(item)
|
||||||
|
extracted = review.get("selected_fields", {})
|
||||||
|
sources = review.get("selected_sources", {})
|
||||||
|
field_candidates.update({key: value for key, value in extracted.items() if value and key not in field_candidates})
|
||||||
|
field_sources.update({key: value for key, value in sources.items() if value and key not in field_sources})
|
||||||
|
corpus_parts.extend(extracted.values())
|
||||||
|
if review.get("front_text"):
|
||||||
|
corpus_parts.append(str(review["front_text"]))
|
||||||
|
corpus = "\n".join(part for part in corpus_parts if part)
|
||||||
|
product_name = field_candidates.get("产品名称") or _safe_summary_product_name(summary_batch.product_name)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"product_category": {
|
||||||
|
"label": "产品类别",
|
||||||
|
"input_type": "select",
|
||||||
|
"options": OPTION_FIELDS["product_category"],
|
||||||
|
"suggested": _detect_product_category(corpus),
|
||||||
|
},
|
||||||
|
"registration_type": {
|
||||||
|
"label": "注册类型",
|
||||||
|
"input_type": "select",
|
||||||
|
"options": OPTION_FIELDS["registration_type"],
|
||||||
|
"suggested": _detect_registration_type(corpus),
|
||||||
|
},
|
||||||
|
"clinical_evaluation_path": {
|
||||||
|
"label": "临床评价路径",
|
||||||
|
"input_type": "select",
|
||||||
|
"options": OPTION_FIELDS["clinical_evaluation_path"],
|
||||||
|
"suggested": _detect_clinical_path(corpus),
|
||||||
|
},
|
||||||
|
"product_name": {
|
||||||
|
"label": "产品名称",
|
||||||
|
"input_type": "text",
|
||||||
|
"suggested": product_name,
|
||||||
|
"source": field_sources.get("产品名称", "summary" if product_name else ""),
|
||||||
|
},
|
||||||
|
"model_spec": {
|
||||||
|
"label": "型号规格",
|
||||||
|
"input_type": "text",
|
||||||
|
"suggested": field_candidates.get("型号规格", ""),
|
||||||
|
"source": field_sources.get("型号规格", ""),
|
||||||
|
},
|
||||||
|
"intended_use": {
|
||||||
|
"label": "预期用途",
|
||||||
|
"input_type": "text",
|
||||||
|
"suggested": field_candidates.get("预期用途", ""),
|
||||||
|
"source": field_sources.get("预期用途", ""),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_item_fields(item) -> dict[str, object]:
|
||||||
|
path = Path(item.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||||
|
if not path.exists():
|
||||||
|
return {}
|
||||||
|
result = extract_text(path)
|
||||||
|
if result.status != "success" or not result.text:
|
||||||
|
return {}
|
||||||
|
inferred_fields = _infer_fields_from_text(result.front_text or result.text)
|
||||||
|
rule_fields = {**inferred_fields, **(result.field_candidates or {})}
|
||||||
|
review = review_condition_fields(
|
||||||
|
text=result.front_text or result.text,
|
||||||
|
rule_fields=rule_fields,
|
||||||
|
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
|
||||||
|
)
|
||||||
|
selected_sources = dict(review.get("selected_sources") or {})
|
||||||
|
for key in inferred_fields:
|
||||||
|
if selected_sources.get(key) == "rule" and key not in (result.field_candidates or {}):
|
||||||
|
selected_sources[key] = "inferred"
|
||||||
|
review["selected_sources"] = selected_sources
|
||||||
|
review["front_text"] = result.front_text or result.text[:1200]
|
||||||
|
return review
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_summary_product_name(product_name: str) -> str:
|
||||||
|
value = (product_name or "").strip()
|
||||||
|
if not value:
|
||||||
|
return ""
|
||||||
|
if any(keyword in value for keyword in ["第1章", "第2章", "监管信息", "综述资料", "非临床资料", "章节目录"]):
|
||||||
|
return ""
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_fields_from_text(text: str) -> dict[str, str]:
|
||||||
|
normalized = _normalize_text_for_inference(text)
|
||||||
|
fields = {}
|
||||||
|
product_name = _infer_product_name(normalized)
|
||||||
|
if product_name:
|
||||||
|
fields["产品名称"] = product_name
|
||||||
|
model_spec = _infer_model_spec(normalized)
|
||||||
|
if model_spec:
|
||||||
|
fields["型号规格"] = model_spec
|
||||||
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_text_for_inference(text: str) -> str:
|
||||||
|
value = re.sub(r"\s+", "", text or "")
|
||||||
|
value = value.replace("(", "(").replace(")", ")")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_product_name(text: str) -> str:
|
||||||
|
patterns = [
|
||||||
|
r"体外诊断试剂(?P<name>[^。;;,,]{4,120}?试剂盒\([^()]{2,30}\))产品注册",
|
||||||
|
r"(?P<name>[^。;;,,]{4,120}?试剂盒\([^()]{2,30}\))",
|
||||||
|
]
|
||||||
|
for pattern in patterns:
|
||||||
|
match = re.search(pattern, text)
|
||||||
|
if match:
|
||||||
|
return _restore_chinese_parentheses(_trim_product_name(match.group("name")))
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _trim_product_name(value: str) -> str:
|
||||||
|
prefixes = ["申请境内第三类体外诊断试剂", "申请境内第二类体外诊断试剂", "境内第三类体外诊断试剂", "境内第二类体外诊断试剂"]
|
||||||
|
result = value
|
||||||
|
for prefix in prefixes:
|
||||||
|
if prefix in result:
|
||||||
|
result = result.split(prefix, 1)[-1]
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_model_spec(text: str) -> str:
|
||||||
|
specs = sorted(set(re.findall(r"规格[A-ZA-Z]", text)))
|
||||||
|
if specs:
|
||||||
|
return "、".join(specs)
|
||||||
|
match = re.search(r"产品的包装规格(?P<spec>.{1,80}?(?:人份/盒|测试/盒|反应/盒)(?:[、,,].{1,30}?(?:人份/盒|测试/盒|反应/盒))*)", text)
|
||||||
|
if not match:
|
||||||
|
return ""
|
||||||
|
return _restore_chinese_parentheses(match.group("spec").strip("::,,。;;"))
|
||||||
|
|
||||||
|
|
||||||
|
def _restore_chinese_parentheses(value: str) -> str:
|
||||||
|
return value.replace("(", "(").replace(")", ")")
|
||||||
|
|
||||||
|
|
||||||
|
def _condition_candidates_incomplete(candidates: dict[str, dict[str, object]]) -> bool:
|
||||||
|
if not candidates:
|
||||||
|
return True
|
||||||
|
product_name = str((candidates.get("product_name") or {}).get("suggested") or "").strip()
|
||||||
|
product_category = str((candidates.get("product_category") or {}).get("suggested") or "").strip()
|
||||||
|
return not product_name or "<EFBFBD>" in product_name or product_category == "其他"
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_condition_candidates(
|
||||||
|
current: dict[str, dict[str, object]],
|
||||||
|
refreshed: dict[str, dict[str, object]],
|
||||||
|
) -> dict[str, dict[str, object]]:
|
||||||
|
merged = {**(current or {})}
|
||||||
|
for field, config in (refreshed or {}).items():
|
||||||
|
current_config = merged.get(field) or {}
|
||||||
|
current_value = str(current_config.get("suggested") or "").strip()
|
||||||
|
refreshed_value = str((config or {}).get("suggested") or "").strip()
|
||||||
|
if _is_better_condition_value(current_value, refreshed_value):
|
||||||
|
merged[field] = config
|
||||||
|
elif field not in merged:
|
||||||
|
merged[field] = config
|
||||||
|
return merged
|
||||||
|
|
||||||
|
|
||||||
|
def _is_better_condition_value(current_value: str, refreshed_value: str) -> bool:
|
||||||
|
if not refreshed_value:
|
||||||
|
return False
|
||||||
|
if "<EFBFBD>" in refreshed_value:
|
||||||
|
return False
|
||||||
|
if "<EFBFBD>" in current_value:
|
||||||
|
return True
|
||||||
|
if not current_value:
|
||||||
|
return True
|
||||||
|
if current_value == "其他" and refreshed_value != "其他":
|
||||||
|
return True
|
||||||
|
if current_value == "待确认" and refreshed_value != "待确认":
|
||||||
|
return True
|
||||||
|
return len(refreshed_value) > len(current_value) and current_value in refreshed_value
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_product_category(corpus: str) -> str:
|
||||||
|
if any(keyword in corpus for keyword in ["体外诊断", "检测试剂", "试剂盒", "IVD"]):
|
||||||
|
return "体外诊断试剂"
|
||||||
|
if "医疗器械" in corpus:
|
||||||
|
return "医疗器械"
|
||||||
|
return "其他"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_registration_type(corpus: str) -> str:
|
||||||
|
if "延续" in corpus:
|
||||||
|
return "延续注册"
|
||||||
|
if "变更" in corpus:
|
||||||
|
return "变更注册"
|
||||||
|
return "首次注册"
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_clinical_path(corpus: str) -> str:
|
||||||
|
if "免临床" in corpus or "免于临床" in corpus:
|
||||||
|
return "免临床"
|
||||||
|
if "同品种" in corpus or "同类" in corpus:
|
||||||
|
return "同品种比对"
|
||||||
|
if "临床试验" in corpus:
|
||||||
|
return "临床试验"
|
||||||
|
return "待确认"
|
||||||
243
review_agent/regulatory_review/services/llm_review.py
Normal file
243
review_agent/regulatory_review/services/llm_review.py
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import inspect
|
||||||
|
from collections.abc import Callable
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.llm import LLMConfigurationError, LLMRequestError, generate_completion
|
||||||
|
|
||||||
|
|
||||||
|
FIELD_LABELS = ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]
|
||||||
|
CompletionFunc = Callable[[list[dict[str, str]]], str]
|
||||||
|
|
||||||
|
|
||||||
|
def review_condition_fields(
|
||||||
|
*,
|
||||||
|
text: str,
|
||||||
|
rule_fields: dict[str, str],
|
||||||
|
file_context: str = "",
|
||||||
|
completion_func: Callable[..., str] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
llm_fields: dict[str, str] = {}
|
||||||
|
status = "skipped"
|
||||||
|
error_message = ""
|
||||||
|
if not _should_call_llm(completion_func):
|
||||||
|
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
|
||||||
|
return {
|
||||||
|
"status": status,
|
||||||
|
"error_message": error_message,
|
||||||
|
"rule_fields": _clean_fields(rule_fields),
|
||||||
|
"llm_fields": llm_fields,
|
||||||
|
"selected_fields": selected_fields,
|
||||||
|
"selected_sources": selected_sources,
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
raw = _call_completion_with_retries(
|
||||||
|
completion_func or generate_completion,
|
||||||
|
_condition_messages(text, rule_fields, file_context),
|
||||||
|
)
|
||||||
|
payload = _parse_json_object(raw)
|
||||||
|
llm_fields = _clean_fields(payload.get("fields") or payload)
|
||||||
|
status = "success"
|
||||||
|
except (LLMConfigurationError, LLMRequestError, json.JSONDecodeError, TypeError, ValueError, OSError, TimeoutError) as exc:
|
||||||
|
status = "failed"
|
||||||
|
error_message = str(exc)
|
||||||
|
|
||||||
|
selected_fields, selected_sources = _select_fields(rule_fields, llm_fields)
|
||||||
|
return {
|
||||||
|
"status": status,
|
||||||
|
"error_message": error_message,
|
||||||
|
"rule_fields": _clean_fields(rule_fields),
|
||||||
|
"llm_fields": llm_fields,
|
||||||
|
"selected_fields": selected_fields,
|
||||||
|
"selected_sources": selected_sources,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def review_workflow_payload(
|
||||||
|
*,
|
||||||
|
stage: str,
|
||||||
|
payload: dict[str, Any],
|
||||||
|
completion_func: Callable[..., str] | None = None,
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if not _should_call_llm(completion_func):
|
||||||
|
return {
|
||||||
|
"status": "skipped",
|
||||||
|
"stage": stage,
|
||||||
|
"result": {},
|
||||||
|
"error_message": "",
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
raw = _call_completion_with_retries(
|
||||||
|
completion_func or generate_completion,
|
||||||
|
_workflow_messages(stage, payload),
|
||||||
|
)
|
||||||
|
parsed = _parse_json_object(raw)
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"stage": stage,
|
||||||
|
"result": parsed,
|
||||||
|
"error_message": "",
|
||||||
|
}
|
||||||
|
except (LLMConfigurationError, LLMRequestError, json.JSONDecodeError, TypeError, ValueError, OSError, TimeoutError) as exc:
|
||||||
|
return {
|
||||||
|
"status": "failed",
|
||||||
|
"stage": stage,
|
||||||
|
"result": {},
|
||||||
|
"error_message": str(exc),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _condition_messages(text: str, rule_fields: dict[str, str], file_context: str) -> list[dict[str, str]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"你是NMPA注册资料字段复核助手。请从附件文本中提取最合理的字段值,"
|
||||||
|
"只返回JSON,格式为 {\"fields\": {\"产品名称\": \"...\"}}。"
|
||||||
|
"产品名称应包含完整名称、检测对象和方法学括号;不要把章节标题当产品名称。"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": json.dumps(
|
||||||
|
{
|
||||||
|
"file_context": file_context,
|
||||||
|
"rule_fields": rule_fields,
|
||||||
|
"text": text[:4000],
|
||||||
|
"allowed_fields": FIELD_LABELS,
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _workflow_messages(stage: str, payload: dict[str, Any]) -> list[dict[str, str]]:
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"你是NMPA法规核查复核助手。请复核当前流程节点的规则结果,"
|
||||||
|
"指出可能误判、漏判和更合理的建议。只返回JSON。"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": json.dumps({"stage": stage, "payload": payload}, ensure_ascii=False)[:6000],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_object(raw: str) -> dict[str, Any]:
|
||||||
|
value = (raw or "").strip()
|
||||||
|
if value.startswith("```"):
|
||||||
|
value = re.sub(r"^```(?:json)?\s*", "", value)
|
||||||
|
value = re.sub(r"\s*```$", "", value)
|
||||||
|
start = value.find("{")
|
||||||
|
end = value.rfind("}")
|
||||||
|
if start >= 0 and end >= start:
|
||||||
|
value = value[start : end + 1]
|
||||||
|
parsed = json.loads(value)
|
||||||
|
if not isinstance(parsed, dict):
|
||||||
|
raise ValueError("LLM复核结果不是JSON对象。")
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def _call_completion_with_retries(completion_func: Callable[..., str], messages: list[dict[str, str]]) -> str:
|
||||||
|
attempts = max(1, int(getattr(settings, "REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", 3) or 3))
|
||||||
|
delay_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", 0.5) or 0)
|
||||||
|
timeout_seconds = float(getattr(settings, "REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", 15) or 15)
|
||||||
|
accepts_timeout = _accepts_timeout(completion_func)
|
||||||
|
last_error: Exception | None = None
|
||||||
|
for attempt in range(1, attempts + 1):
|
||||||
|
try:
|
||||||
|
if accepts_timeout:
|
||||||
|
return completion_func(messages, temperature=0.0, timeout=timeout_seconds)
|
||||||
|
return completion_func(messages, temperature=0.0)
|
||||||
|
except (LLMRequestError, OSError, TimeoutError) as exc:
|
||||||
|
last_error = exc
|
||||||
|
if attempt >= attempts:
|
||||||
|
break
|
||||||
|
if delay_seconds > 0:
|
||||||
|
time.sleep(delay_seconds)
|
||||||
|
if last_error:
|
||||||
|
raise last_error
|
||||||
|
raise LLMRequestError("LLM复核调用失败。")
|
||||||
|
|
||||||
|
|
||||||
|
def _accepts_timeout(completion_func: Callable[..., str]) -> bool:
|
||||||
|
try:
|
||||||
|
signature = inspect.signature(completion_func)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return True
|
||||||
|
return "timeout" in signature.parameters
|
||||||
|
|
||||||
|
|
||||||
|
def _should_call_llm(completion_func: Callable[..., str] | None) -> bool:
|
||||||
|
if completion_func is not None:
|
||||||
|
return True
|
||||||
|
if os.environ.get("PYTEST_CURRENT_TEST") and not getattr(settings, "REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS", False):
|
||||||
|
return False
|
||||||
|
return bool(settings.LLM_API_KEY and settings.LLM_MODEL)
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_fields(fields: dict[str, Any]) -> dict[str, str]:
|
||||||
|
clean = {}
|
||||||
|
for label in FIELD_LABELS:
|
||||||
|
value = fields.get(label)
|
||||||
|
if not isinstance(value, str):
|
||||||
|
continue
|
||||||
|
normalized = " ".join(value.strip().split()).replace("(", "(").replace(")", ")")
|
||||||
|
if normalized:
|
||||||
|
clean[label] = normalized
|
||||||
|
return clean
|
||||||
|
|
||||||
|
|
||||||
|
def _select_fields(rule_fields: dict[str, str], llm_fields: dict[str, str]) -> tuple[dict[str, str], dict[str, str]]:
|
||||||
|
rule_clean = _clean_fields(rule_fields)
|
||||||
|
selected = {}
|
||||||
|
sources = {}
|
||||||
|
for label in FIELD_LABELS:
|
||||||
|
rule_value = rule_clean.get(label, "")
|
||||||
|
llm_value = llm_fields.get(label, "")
|
||||||
|
value, source = _select_field(label, rule_value, llm_value)
|
||||||
|
if value:
|
||||||
|
selected[label] = value
|
||||||
|
sources[label] = source
|
||||||
|
return selected, sources
|
||||||
|
|
||||||
|
|
||||||
|
def _select_field(label: str, rule_value: str, llm_value: str) -> tuple[str, str]:
|
||||||
|
if _invalid_field_value(llm_value):
|
||||||
|
return rule_value, "rule" if rule_value else ""
|
||||||
|
if not rule_value:
|
||||||
|
return llm_value, "llm" if llm_value else ""
|
||||||
|
if not llm_value:
|
||||||
|
return rule_value, "rule"
|
||||||
|
if label == "产品名称" and _better_product_name(llm_value, rule_value):
|
||||||
|
return llm_value, "llm"
|
||||||
|
if len(llm_value) > len(rule_value) * 1.35 and rule_value in llm_value:
|
||||||
|
return llm_value, "llm"
|
||||||
|
return rule_value, "rule"
|
||||||
|
|
||||||
|
|
||||||
|
def _better_product_name(candidate: str, current: str) -> bool:
|
||||||
|
if current and current in candidate and len(candidate) > len(current):
|
||||||
|
return True
|
||||||
|
product_keywords = ["试剂盒", "检测试剂", "荧光PCR法", "PCR法", "核酸检测"]
|
||||||
|
return len(candidate) > len(current) and any(keyword in candidate for keyword in product_keywords)
|
||||||
|
|
||||||
|
|
||||||
|
def _invalid_field_value(value: str) -> bool:
|
||||||
|
if not value:
|
||||||
|
return True
|
||||||
|
if "<EFBFBD>" in value:
|
||||||
|
return True
|
||||||
|
return any(keyword in value for keyword in ["第1章", "第2章", "第3章", "监管信息", "综述资料", "章节目录"])
|
||||||
57
review_agent/regulatory_review/services/rag_citation.py
Normal file
57
review_agent/regulatory_review/services/rag_citation.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from .rag_embedding import EmbeddingFunction, get_embedding_provider
|
||||||
|
|
||||||
|
|
||||||
|
class RagIndexUnavailable(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def retrieve_citations(
|
||||||
|
query: str,
|
||||||
|
*,
|
||||||
|
embedding_provider: EmbeddingFunction | None = None,
|
||||||
|
collection=None,
|
||||||
|
n_results: int = 3,
|
||||||
|
) -> list[dict[str, object]]:
|
||||||
|
provider = embedding_provider or get_embedding_provider()
|
||||||
|
if collection is None:
|
||||||
|
collection = _load_collection()
|
||||||
|
embeddings = provider([query])
|
||||||
|
result = collection.query(query_embeddings=embeddings, n_results=n_results)
|
||||||
|
documents = (result.get("documents") or [[]])[0]
|
||||||
|
metadatas = (result.get("metadatas") or [[]])[0]
|
||||||
|
distances = (result.get("distances") or [[]])[0]
|
||||||
|
if not documents:
|
||||||
|
return [{"source": "原文依据待补充", "text": "RAG 无命中", "score": None}]
|
||||||
|
citations = []
|
||||||
|
for index, document in enumerate(documents):
|
||||||
|
metadata = metadatas[index] if index < len(metadatas) else {}
|
||||||
|
distance = distances[index] if index < len(distances) else None
|
||||||
|
citations.append(
|
||||||
|
{
|
||||||
|
"source": metadata.get("source", "法规材料"),
|
||||||
|
"text": document,
|
||||||
|
"score": distance,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return citations
|
||||||
|
|
||||||
|
|
||||||
|
def _load_collection():
|
||||||
|
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
if not persist_path.exists():
|
||||||
|
raise RagIndexUnavailable("法规 RAG 索引不存在,请先运行 regulatory_rag_build。")
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RagIndexUnavailable("chromadb 未安装,请先安装 requirements.txt。") from exc
|
||||||
|
client = chromadb.PersistentClient(path=str(persist_path))
|
||||||
|
try:
|
||||||
|
return client.get_collection(settings.REGULATORY_RAG_COLLECTION)
|
||||||
|
except Exception as exc:
|
||||||
|
raise RagIndexUnavailable("法规 RAG collection 不存在,请先运行 regulatory_rag_build。") from exc
|
||||||
82
review_agent/regulatory_review/services/rag_embedding.py
Normal file
82
review_agent/regulatory_review/services/rag_embedding.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import random
|
||||||
|
from typing import Callable, Iterable
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
EmbeddingFunction = Callable[[list[str]], list[list[float]]]
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingConfigurationError(RuntimeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class SiliconFlowEmbeddingProvider:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
api_key: str,
|
||||||
|
base_url: str,
|
||||||
|
model: str,
|
||||||
|
dimensions: int,
|
||||||
|
timeout: float = 60.0,
|
||||||
|
):
|
||||||
|
if not api_key:
|
||||||
|
raise EmbeddingConfigurationError("SILICONFLOW_API_KEY 未配置。")
|
||||||
|
self.api_key = api_key
|
||||||
|
self.base_url = base_url.rstrip("/")
|
||||||
|
self.model = model
|
||||||
|
self.dimensions = dimensions
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
|
def embed(self, texts: Iterable[str]) -> list[list[float]]:
|
||||||
|
inputs = list(texts)
|
||||||
|
response = httpx.post(
|
||||||
|
f"{self.base_url}/embeddings",
|
||||||
|
headers={"Authorization": f"Bearer {self.api_key}"},
|
||||||
|
json={
|
||||||
|
"model": self.model,
|
||||||
|
"input": inputs,
|
||||||
|
"dimensions": self.dimensions,
|
||||||
|
},
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
payload = response.json()
|
||||||
|
return [item["embedding"] for item in payload.get("data", [])]
|
||||||
|
|
||||||
|
def __call__(self, texts: list[str]) -> list[list[float]]:
|
||||||
|
return self.embed(texts)
|
||||||
|
|
||||||
|
|
||||||
|
class DeterministicEmbeddingProvider:
|
||||||
|
"""Small local embedding substitute for tests and explicit dry runs."""
|
||||||
|
|
||||||
|
def __init__(self, dimensions: int = 16):
|
||||||
|
self.dimensions = dimensions
|
||||||
|
|
||||||
|
def __call__(self, texts: list[str]) -> list[list[float]]:
|
||||||
|
vectors = []
|
||||||
|
for text in texts:
|
||||||
|
seed = int(hashlib.sha256(text.encode("utf-8")).hexdigest()[:16], 16)
|
||||||
|
rng = random.Random(seed)
|
||||||
|
vectors.append([rng.uniform(-1, 1) for _ in range(self.dimensions)])
|
||||||
|
return vectors
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedding_provider(provider_name: str | None = None) -> EmbeddingFunction:
|
||||||
|
provider = provider_name or settings.REGULATORY_RAG_PROVIDER
|
||||||
|
if provider == "siliconflow":
|
||||||
|
return SiliconFlowEmbeddingProvider(
|
||||||
|
api_key=settings.SILICONFLOW_API_KEY,
|
||||||
|
base_url=settings.SILICONFLOW_BASE_URL,
|
||||||
|
model=settings.SILICONFLOW_EMBEDDING_MODEL,
|
||||||
|
dimensions=settings.SILICONFLOW_EMBEDDING_DIMENSIONS,
|
||||||
|
)
|
||||||
|
if provider in {"deterministic", "local"}:
|
||||||
|
return DeterministicEmbeddingProvider()
|
||||||
|
raise EmbeddingConfigurationError(f"不支持的 embedding provider:{provider}")
|
||||||
155
review_agent/regulatory_review/services/rag_index.py
Normal file
155
review_agent/regulatory_review/services/rag_index.py
Normal file
@@ -0,0 +1,155 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from docx import Document
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
from pypdf import PdfReader
|
||||||
|
from pptx import Presentation
|
||||||
|
|
||||||
|
from .rag_embedding import EmbeddingFunction
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.regulatory_review.rag_index")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TextChunk:
|
||||||
|
text: str
|
||||||
|
metadata: dict[str, object]
|
||||||
|
|
||||||
|
|
||||||
|
def chunk_text(text: str, *, source: str, chunk_size: int = 900, overlap: int = 120) -> list[TextChunk]:
|
||||||
|
normalized = "\n".join(line.strip() for line in text.splitlines() if line.strip())
|
||||||
|
if not normalized:
|
||||||
|
return []
|
||||||
|
chunks = []
|
||||||
|
start = 0
|
||||||
|
index = 0
|
||||||
|
step = max(1, chunk_size - overlap)
|
||||||
|
while start < len(normalized):
|
||||||
|
part = normalized[start : start + chunk_size].strip()
|
||||||
|
if part:
|
||||||
|
chunks.append(TextChunk(text=part, metadata={"source": source, "chunk_index": index}))
|
||||||
|
index += 1
|
||||||
|
start += step
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text_from_path(path: Path) -> str:
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
if suffix in {".txt", ".md"}:
|
||||||
|
return path.read_text(encoding="utf-8", errors="ignore")
|
||||||
|
if suffix == ".pdf":
|
||||||
|
return "\n".join(page.extract_text() or "" for page in PdfReader(str(path)).pages)
|
||||||
|
if suffix == ".docx":
|
||||||
|
return "\n".join(paragraph.text for paragraph in Document(str(path)).paragraphs)
|
||||||
|
if suffix == ".pptx":
|
||||||
|
presentation = Presentation(str(path))
|
||||||
|
lines = []
|
||||||
|
for slide in presentation.slides:
|
||||||
|
for shape in slide.shapes:
|
||||||
|
if hasattr(shape, "text"):
|
||||||
|
lines.append(shape.text)
|
||||||
|
return "\n".join(lines)
|
||||||
|
if suffix == ".xlsx":
|
||||||
|
workbook = load_workbook(path, data_only=True, read_only=True)
|
||||||
|
lines = []
|
||||||
|
for sheet in workbook.worksheets:
|
||||||
|
for row in sheet.iter_rows(values_only=True):
|
||||||
|
values = [str(cell) for cell in row if cell not in {None, ""}]
|
||||||
|
if values:
|
||||||
|
lines.append("\t".join(values))
|
||||||
|
return "\n".join(lines)
|
||||||
|
if suffix == ".doc":
|
||||||
|
return _extract_legacy_doc_with_libreoffice(path)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_legacy_doc_with_libreoffice(path: Path) -> str:
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
target_dir = Path(tmp_dir)
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"soffice",
|
||||||
|
"--headless",
|
||||||
|
"--convert-to",
|
||||||
|
"docx",
|
||||||
|
"--outdir",
|
||||||
|
str(target_dir),
|
||||||
|
str(path),
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
except (FileNotFoundError, subprocess.CalledProcessError, subprocess.TimeoutExpired) as exc:
|
||||||
|
raise RuntimeError(f"无法通过 LibreOffice 转换法规 .doc 材料:{path.name}") from exc
|
||||||
|
converted = target_dir / f"{path.stem}.docx"
|
||||||
|
if not converted.exists():
|
||||||
|
raise RuntimeError(f"LibreOffice 未生成 docx:{path.name}")
|
||||||
|
return extract_text_from_path(converted)
|
||||||
|
|
||||||
|
|
||||||
|
def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
||||||
|
chunks: list[TextChunk] = []
|
||||||
|
for path in sorted(source_dir.rglob("*")):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
text = extract_text_from_path(path)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
if _is_attachment4(path):
|
||||||
|
raise RuntimeError(f"附件 4 核心法规材料抽取失败:{path.name}") from exc
|
||||||
|
logger.warning("Regulatory source extraction skipped", extra={"path": str(path), "error": str(exc)})
|
||||||
|
continue
|
||||||
|
chunks.extend(chunk_text(text, source=str(path.relative_to(source_dir))))
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _is_attachment4(path: Path) -> bool:
|
||||||
|
normalized = path.name.replace(" ", "")
|
||||||
|
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
|
||||||
|
|
||||||
|
|
||||||
|
def build_chroma_index(
|
||||||
|
*,
|
||||||
|
source_dir: Path,
|
||||||
|
embedding_provider: EmbeddingFunction,
|
||||||
|
persist_path: Path | None = None,
|
||||||
|
collection_name: str | None = None,
|
||||||
|
) -> int:
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError("chromadb 未安装,请先安装 requirements.txt。") from exc
|
||||||
|
|
||||||
|
persist_path = persist_path or Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
collection_name = collection_name or settings.REGULATORY_RAG_COLLECTION
|
||||||
|
persist_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
chunks = collect_source_chunks(source_dir)
|
||||||
|
client = chromadb.PersistentClient(path=str(persist_path))
|
||||||
|
collection = client.get_or_create_collection(collection_name)
|
||||||
|
if not chunks:
|
||||||
|
return 0
|
||||||
|
texts = [chunk.text for chunk in chunks]
|
||||||
|
embeddings = embedding_provider(texts)
|
||||||
|
ids = [
|
||||||
|
hashlib.sha256(f"{chunk.metadata['source']}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
||||||
|
for chunk in chunks
|
||||||
|
]
|
||||||
|
collection.upsert(
|
||||||
|
ids=ids,
|
||||||
|
documents=texts,
|
||||||
|
metadatas=[chunk.metadata for chunk in chunks],
|
||||||
|
embeddings=embeddings,
|
||||||
|
)
|
||||||
|
return len(chunks)
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
from review_agent.regulatory_review.storage import save_artifact
|
||||||
|
|
||||||
|
|
||||||
|
def review_missing_issues(
|
||||||
|
*,
|
||||||
|
batch: RegulatoryReviewBatch,
|
||||||
|
issue_ids: list[int],
|
||||||
|
file_summary_batch: FileSummaryBatch,
|
||||||
|
) -> dict[str, object]:
|
||||||
|
rule_set = load_rule_file()
|
||||||
|
rules_by_code = {rule["code"]: rule for rule in rule_set.get("requirements", [])}
|
||||||
|
items = list(file_summary_batch.items.order_by("file_index"))
|
||||||
|
record = {
|
||||||
|
"type": "review_record",
|
||||||
|
"reviewed_at": timezone.localtime().isoformat(),
|
||||||
|
"source_review_batch_id": batch.pk,
|
||||||
|
"source_review_batch_no": batch.batch_no,
|
||||||
|
"file_summary_batch_id": file_summary_batch.pk,
|
||||||
|
"file_summary_batch_no": file_summary_batch.batch_no,
|
||||||
|
"items": [],
|
||||||
|
}
|
||||||
|
issues = RegulatoryIssue.objects.filter(batch=batch, pk__in=issue_ids).order_by("id")
|
||||||
|
for issue in issues:
|
||||||
|
rule = rules_by_code.get(issue.rule_code, {})
|
||||||
|
matched_files = _match_items(items, [*rule.get("file_keywords", []), issue.title])
|
||||||
|
passed = bool(matched_files)
|
||||||
|
issue.status = RegulatoryIssue.Status.REVIEW_PASSED if passed else RegulatoryIssue.Status.REVIEW_FAILED
|
||||||
|
issue.evidence = {
|
||||||
|
**(issue.evidence or {}),
|
||||||
|
"latest_review": {
|
||||||
|
"file_summary_batch_id": file_summary_batch.pk,
|
||||||
|
"file_summary_batch_no": file_summary_batch.batch_no,
|
||||||
|
"matched_files": matched_files,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
issue.save(update_fields=["status", "evidence", "updated_at"])
|
||||||
|
record["items"].append(
|
||||||
|
{
|
||||||
|
"issue_id": issue.pk,
|
||||||
|
"rule_code": issue.rule_code,
|
||||||
|
"title": issue.title,
|
||||||
|
"status": issue.status,
|
||||||
|
"matched_files": matched_files,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
artifact = save_artifact(
|
||||||
|
batch,
|
||||||
|
name=f"review_record_{timezone.now().strftime('%Y%m%d%H%M%S')}.json",
|
||||||
|
artifact_type="json",
|
||||||
|
content=json.dumps(record, ensure_ascii=False, indent=2),
|
||||||
|
metadata={"artifact": "review_record", "file_summary_batch_id": file_summary_batch.pk},
|
||||||
|
)
|
||||||
|
record["artifact_id"] = artifact.pk
|
||||||
|
return record
|
||||||
|
|
||||||
|
|
||||||
|
def _match_items(items, keywords: list[str]) -> list[dict[str, str]]:
|
||||||
|
normalized_keywords = [str(keyword).lower() for keyword in keywords if keyword]
|
||||||
|
matched = []
|
||||||
|
for item in items:
|
||||||
|
haystack = f"{item.file_name} {item.relative_path} {item.directory_level}".lower()
|
||||||
|
if any(keyword in haystack for keyword in normalized_keywords):
|
||||||
|
matched.append(
|
||||||
|
{
|
||||||
|
"file_name": item.file_name,
|
||||||
|
"relative_path": item.relative_path,
|
||||||
|
"directory_level": item.directory_level,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return matched
|
||||||
50
review_agent/regulatory_review/services/risk_assess.py
Normal file
50
review_agent/regulatory_review/services/risk_assess.py
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryIssue, RegulatoryReviewBatch
|
||||||
|
from review_agent.regulatory_review.schemas import Finding
|
||||||
|
|
||||||
|
from .rag_citation import retrieve_citations
|
||||||
|
|
||||||
|
|
||||||
|
SEVERITY_ORDER = ["blocking", "high", "medium", "low", "info"]
|
||||||
|
|
||||||
|
|
||||||
|
def persist_findings(batch: RegulatoryReviewBatch, findings: list[Finding]) -> list[RegulatoryIssue]:
|
||||||
|
RegulatoryIssue.objects.filter(batch=batch).delete()
|
||||||
|
unique = {}
|
||||||
|
for finding in findings:
|
||||||
|
unique.setdefault((finding.rule_code, finding.category, finding.title), finding)
|
||||||
|
|
||||||
|
issues = []
|
||||||
|
for finding in unique.values():
|
||||||
|
citations = finding.citations or _safe_citations(finding)
|
||||||
|
issues.append(
|
||||||
|
RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code=finding.rule_code,
|
||||||
|
category=finding.category,
|
||||||
|
severity=finding.severity,
|
||||||
|
title=finding.title,
|
||||||
|
detail=finding.detail,
|
||||||
|
suggestion=finding.suggestion,
|
||||||
|
evidence=finding.evidence,
|
||||||
|
citations=citations,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
batch.risk_summary = _risk_summary(issues)
|
||||||
|
batch.save(update_fields=["risk_summary"])
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_citations(finding: Finding) -> list[dict[str, object]]:
|
||||||
|
try:
|
||||||
|
return retrieve_citations(finding.title)
|
||||||
|
except Exception:
|
||||||
|
return [{"source": "原文依据待补充", "text": "RAG 索引不可用或无命中", "score": None}]
|
||||||
|
|
||||||
|
|
||||||
|
def _risk_summary(issues: list[RegulatoryIssue]) -> dict[str, int]:
|
||||||
|
counts = Counter(issue.severity for issue in issues)
|
||||||
|
return {severity: counts.get(severity, 0) for severity in SEVERITY_ORDER}
|
||||||
127
review_agent/regulatory_review/services/rule_loader.py
Normal file
127
review_agent/regulatory_review/services/rule_loader.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryRuleVersion
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_RULE_CODE = "nmpa_ivd_registration_v1"
|
||||||
|
DEFAULT_RULE_PATH = (
|
||||||
|
Path(settings.BASE_DIR)
|
||||||
|
/ "review_agent"
|
||||||
|
/ "regulatory_review"
|
||||||
|
/ "rules"
|
||||||
|
/ "nmpa_ivd_registration_v1.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class RuleVersionCheck:
|
||||||
|
status: str
|
||||||
|
code: str
|
||||||
|
path: Path
|
||||||
|
current_hash: str
|
||||||
|
database_hash: str = ""
|
||||||
|
record: RegulatoryRuleVersion | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def compute_file_sha256(path: str | Path) -> str:
|
||||||
|
file_path = Path(path)
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
with file_path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||||
|
digest.update(chunk)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def load_rule_file(path: str | Path | None = None) -> dict:
|
||||||
|
rule_path = Path(path) if path else DEFAULT_RULE_PATH
|
||||||
|
with rule_path.open("r", encoding="utf-8") as handle:
|
||||||
|
payload = yaml.safe_load(handle) or {}
|
||||||
|
if payload.get("code") != DEFAULT_RULE_CODE:
|
||||||
|
raise ValueError(f"规则 code 必须为 {DEFAULT_RULE_CODE}")
|
||||||
|
if not isinstance(payload.get("requirements"), list) or not payload["requirements"]:
|
||||||
|
raise ValueError("规则文件必须包含 requirements 列表。")
|
||||||
|
_validate_attachment4_requirements(payload)
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_attachment4_requirements(payload: dict) -> None:
|
||||||
|
requirements = payload.get("requirements") or []
|
||||||
|
required_codes = {str(code) for code in payload.get("attachment4_required_codes") or []}
|
||||||
|
by_attachment4_code: dict[str, list[dict]] = {}
|
||||||
|
for requirement in requirements:
|
||||||
|
attachment4_code = requirement.get("attachment4_code")
|
||||||
|
if attachment4_code:
|
||||||
|
by_attachment4_code.setdefault(str(attachment4_code), []).append(requirement)
|
||||||
|
for field in ["code", "rule_id", "title", "severity", "file_keywords", "citation_query"]:
|
||||||
|
if attachment4_code and not requirement.get(field):
|
||||||
|
raise ValueError(f"附件4规则 {attachment4_code} 缺少 {field}")
|
||||||
|
missing = sorted(required_codes - set(by_attachment4_code), key=_attachment4_sort_key)
|
||||||
|
if missing:
|
||||||
|
raise ValueError(f"附件4目录项缺少规则:{', '.join(missing)}")
|
||||||
|
|
||||||
|
|
||||||
|
def _attachment4_sort_key(value: str) -> tuple[int, ...]:
|
||||||
|
return tuple(int(part) for part in value.split(".") if part.isdigit())
|
||||||
|
|
||||||
|
|
||||||
|
def check_rule_version(
|
||||||
|
*,
|
||||||
|
path: str | Path | None = None,
|
||||||
|
update_missing: bool = True,
|
||||||
|
) -> RuleVersionCheck:
|
||||||
|
rule_path = Path(path) if path else DEFAULT_RULE_PATH
|
||||||
|
rule_set = load_rule_file(rule_path)
|
||||||
|
current_hash = compute_file_sha256(rule_path)
|
||||||
|
record = RegulatoryRuleVersion.objects.filter(code=rule_set["code"]).first()
|
||||||
|
yaml_path = str(rule_path.relative_to(settings.BASE_DIR))
|
||||||
|
|
||||||
|
if record is None:
|
||||||
|
if not update_missing:
|
||||||
|
return RuleVersionCheck(
|
||||||
|
status="missing",
|
||||||
|
code=rule_set["code"],
|
||||||
|
path=rule_path,
|
||||||
|
current_hash=current_hash,
|
||||||
|
)
|
||||||
|
record = RegulatoryRuleVersion.objects.create(
|
||||||
|
code=rule_set["code"],
|
||||||
|
name=rule_set.get("name") or rule_set["code"],
|
||||||
|
yaml_path=yaml_path,
|
||||||
|
yaml_hash=current_hash,
|
||||||
|
rag_collection=rule_set.get("rag_collection", ""),
|
||||||
|
status=RegulatoryRuleVersion.Status.ACTIVE,
|
||||||
|
)
|
||||||
|
return RuleVersionCheck(
|
||||||
|
status="created",
|
||||||
|
code=record.code,
|
||||||
|
path=rule_path,
|
||||||
|
current_hash=current_hash,
|
||||||
|
database_hash=record.yaml_hash,
|
||||||
|
record=record,
|
||||||
|
)
|
||||||
|
|
||||||
|
if record.yaml_hash != current_hash:
|
||||||
|
return RuleVersionCheck(
|
||||||
|
status="mismatch",
|
||||||
|
code=record.code,
|
||||||
|
path=rule_path,
|
||||||
|
current_hash=current_hash,
|
||||||
|
database_hash=record.yaml_hash,
|
||||||
|
record=record,
|
||||||
|
)
|
||||||
|
|
||||||
|
return RuleVersionCheck(
|
||||||
|
status="ok",
|
||||||
|
code=record.code,
|
||||||
|
path=rule_path,
|
||||||
|
current_hash=current_hash,
|
||||||
|
database_hash=record.yaml_hash,
|
||||||
|
record=record,
|
||||||
|
)
|
||||||
92
review_agent/regulatory_review/services/structure_check.py
Normal file
92
review_agent/regulatory_review/services/structure_check.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from collections.abc import Callable
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.schemas import Finding
|
||||||
|
|
||||||
|
|
||||||
|
def run_structure_check(
|
||||||
|
document_texts: dict[str, str],
|
||||||
|
rule_set: dict,
|
||||||
|
progress_callback: Callable[[dict[str, object]], None] | None = None,
|
||||||
|
) -> list[Finding]:
|
||||||
|
findings: list[Finding] = []
|
||||||
|
combined_all_text = "\n".join(document_texts.values())
|
||||||
|
requirements = list(rule_set.get("requirements", []))
|
||||||
|
total = len(requirements)
|
||||||
|
for index, requirement in enumerate(requirements, start=1):
|
||||||
|
if requirement.get("structure_required") and not _contains_any(
|
||||||
|
combined_all_text,
|
||||||
|
[requirement.get("title", ""), *requirement.get("aliases", [])],
|
||||||
|
):
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_code=requirement["code"],
|
||||||
|
category="structure",
|
||||||
|
severity=requirement.get("severity", "medium"),
|
||||||
|
title=f"申报资料目录缺少{_numbered_title(requirement)}章节",
|
||||||
|
detail=f"未在申报资料目录或章节标题候选中发现{_numbered_title(requirement)}。",
|
||||||
|
suggestion=requirement.get("suggestion", ""),
|
||||||
|
evidence={
|
||||||
|
"attachment4_code": requirement.get("attachment4_code"),
|
||||||
|
"expected_title": requirement["title"],
|
||||||
|
"aliases": requirement.get("aliases", []),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
required_sections = requirement.get("required_sections") or []
|
||||||
|
if required_sections:
|
||||||
|
matching_docs = _matching_documents(document_texts, requirement.get("file_keywords", []))
|
||||||
|
if matching_docs:
|
||||||
|
combined_text = "\n".join(matching_docs.values())
|
||||||
|
for section in required_sections:
|
||||||
|
if _contains_any(combined_text, [section]):
|
||||||
|
continue
|
||||||
|
findings.append(
|
||||||
|
Finding(
|
||||||
|
rule_code=f"{requirement['code']}:{section}",
|
||||||
|
category="structure",
|
||||||
|
severity=requirement.get("severity", "medium"),
|
||||||
|
title=f"{requirement['title']}缺少{section}章节",
|
||||||
|
detail=f"已匹配{requirement['title']}文件,但未发现{section}相关内容。",
|
||||||
|
suggestion=requirement.get("suggestion", ""),
|
||||||
|
evidence={"section": section, "files": list(matching_docs)},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if progress_callback:
|
||||||
|
progress_callback(
|
||||||
|
{
|
||||||
|
"processed": index,
|
||||||
|
"total": total,
|
||||||
|
"label": _numbered_title(requirement),
|
||||||
|
"finding_count": len(findings),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return findings
|
||||||
|
|
||||||
|
|
||||||
|
def _matching_documents(document_texts: dict[str, str], keywords: list[str]) -> dict[str, str]:
|
||||||
|
if not keywords:
|
||||||
|
return document_texts
|
||||||
|
result = {}
|
||||||
|
for name, text in document_texts.items():
|
||||||
|
haystack = f"{name}\n{text}".lower()
|
||||||
|
if any(str(keyword).lower() in haystack for keyword in keywords):
|
||||||
|
result[name] = text
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _contains_any(text: str, needles: list[str]) -> bool:
|
||||||
|
normalized = _normalize_title(text)
|
||||||
|
return any(_normalize_title(needle) in normalized for needle in needles if needle)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_title(value: str) -> str:
|
||||||
|
return "".join(str(value).lower().replace("/", "").replace("/", "").split())
|
||||||
|
|
||||||
|
|
||||||
|
def _numbered_title(requirement: dict) -> str:
|
||||||
|
attachment4_code = requirement.get("attachment4_code")
|
||||||
|
if not attachment4_code:
|
||||||
|
return requirement["title"]
|
||||||
|
return f"{attachment4_code}{requirement['title']}"
|
||||||
101
review_agent/regulatory_review/services/text_extract.py
Normal file
101
review_agent/regulatory_review/services/text_extract.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import re
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .rag_index import extract_text_from_path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ExtractedText:
|
||||||
|
path: Path
|
||||||
|
text: str
|
||||||
|
status: str
|
||||||
|
content_hash: str = ""
|
||||||
|
error_message: str = ""
|
||||||
|
front_text: str = ""
|
||||||
|
section_candidates: list[str] | None = None
|
||||||
|
field_candidates: dict[str, str] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {".txt", ".md", ".pdf", ".docx", ".pptx", ".xlsx", ".doc"}
|
||||||
|
FIELD_LABELS = ["产品名称", "型号规格", "预期用途", "管理类别", "分类编码", "注册类型", "临床评价路径"]
|
||||||
|
|
||||||
|
|
||||||
|
def extract_text(path: str | Path) -> ExtractedText:
|
||||||
|
file_path = Path(path)
|
||||||
|
if file_path.suffix.lower() not in SUPPORTED_EXTENSIONS:
|
||||||
|
return ExtractedText(path=file_path, text="", status="unsupported")
|
||||||
|
try:
|
||||||
|
text = extract_text_from_path(file_path)
|
||||||
|
except Exception as exc:
|
||||||
|
return ExtractedText(
|
||||||
|
path=file_path,
|
||||||
|
text="",
|
||||||
|
status="failed",
|
||||||
|
error_message=str(exc),
|
||||||
|
section_candidates=[],
|
||||||
|
field_candidates={},
|
||||||
|
)
|
||||||
|
content_hash = hashlib.sha256(text.encode("utf-8")).hexdigest() if text else ""
|
||||||
|
return ExtractedText(
|
||||||
|
path=file_path,
|
||||||
|
text=text,
|
||||||
|
status="success",
|
||||||
|
content_hash=content_hash,
|
||||||
|
front_text=_front_text(text),
|
||||||
|
section_candidates=_section_candidates(text),
|
||||||
|
field_candidates=_field_candidates(text),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _front_text(text: str, limit: int = 1200) -> str:
|
||||||
|
return text[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def _section_candidates(text: str) -> list[str]:
|
||||||
|
candidates = []
|
||||||
|
for line in text.splitlines():
|
||||||
|
normalized = line.strip()
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
if re.match(r"^([一二三四五六七八九十]+[、..]|[0-9]+(\.[0-9]+)*[、..\s])", normalized):
|
||||||
|
candidates.append(normalized[:120])
|
||||||
|
elif any(keyword in normalized for keyword in ["章节目录", "监管信息", "综述资料", "非临床资料", "临床评价资料", "质量管理体系"]):
|
||||||
|
candidates.append(normalized[:120])
|
||||||
|
return candidates[:80]
|
||||||
|
|
||||||
|
|
||||||
|
def _field_candidates(text: str) -> dict[str, str]:
|
||||||
|
fields = {}
|
||||||
|
lines = text.splitlines()
|
||||||
|
for index, line in enumerate(lines):
|
||||||
|
normalized = line.strip()
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
for label in FIELD_LABELS:
|
||||||
|
match = re.match(rf"^{re.escape(label)}[::]\s*(.*)$", normalized)
|
||||||
|
if not match or label in fields:
|
||||||
|
continue
|
||||||
|
value_parts = [match.group(1).strip()]
|
||||||
|
for next_line in lines[index + 1 :]:
|
||||||
|
continuation = next_line.strip()
|
||||||
|
if not continuation or _starts_field_line(continuation) or _looks_like_section_heading(continuation):
|
||||||
|
break
|
||||||
|
value_parts.append(continuation)
|
||||||
|
value = " ".join(part for part in value_parts if part)
|
||||||
|
if value:
|
||||||
|
fields[label] = " ".join(value.split())
|
||||||
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
def _starts_field_line(line: str) -> bool:
|
||||||
|
if any(re.match(rf"^{re.escape(label)}[::]", line) for label in FIELD_LABELS):
|
||||||
|
return True
|
||||||
|
return bool(re.match(r"^[^\s::]{2,24}[::]", line))
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_section_heading(line: str) -> bool:
|
||||||
|
return bool(re.match(r"^([一二三四五六七八九十]+[、..]|[0-9]+(\.[0-9]+)*[、..\s])", line))
|
||||||
35
review_agent/regulatory_review/storage.py
Normal file
35
review_agent/regulatory_review/storage.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryArtifact, RegulatoryReviewBatch
|
||||||
|
|
||||||
|
|
||||||
|
def save_artifact(
|
||||||
|
batch: RegulatoryReviewBatch,
|
||||||
|
*,
|
||||||
|
name: str,
|
||||||
|
content: str | bytes,
|
||||||
|
artifact_type: str,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
) -> RegulatoryArtifact:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch.batch_no
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
path = root / Path(name).name
|
||||||
|
if isinstance(content, bytes):
|
||||||
|
path.write_bytes(content)
|
||||||
|
digest = hashlib.sha256(content).hexdigest()
|
||||||
|
else:
|
||||||
|
path.write_text(content, encoding="utf-8")
|
||||||
|
digest = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||||
|
return RegulatoryArtifact.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
artifact_type=artifact_type,
|
||||||
|
name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
content_hash=digest,
|
||||||
|
metadata=metadata or {},
|
||||||
|
)
|
||||||
225
review_agent/regulatory_review/views.py
Normal file
225
review_agent/regulatory_review/views.py
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.http import Http404, JsonResponse
|
||||||
|
from django.views.decorators.http import require_http_methods
|
||||||
|
from django.contrib.auth.decorators import login_required
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
||||||
|
from review_agent.regulatory_review.events import record_event
|
||||||
|
from review_agent.regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
||||||
|
from review_agent.regulatory_review.services.rectification_review import review_missing_issues
|
||||||
|
from review_agent.regulatory_review.workflow import create_regulatory_review_batch, start_regulatory_review_workflow
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def batch_status(request, batch_id: int):
|
||||||
|
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
condition_candidates = ensure_regulatory_condition_candidates(batch)
|
||||||
|
nodes = WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
payload = {
|
||||||
|
"batch": {
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "regulatory_review",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"source_summary_batch_id": batch.source_summary_batch_id,
|
||||||
|
"risk_summary": batch.risk_summary,
|
||||||
|
"risk_summary_text": _format_risk_summary(batch.risk_summary or {}),
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
},
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"node_name": node.node_name,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
}
|
||||||
|
for node in nodes
|
||||||
|
],
|
||||||
|
}
|
||||||
|
if batch.status == RegulatoryReviewBatch.Status.WAITING_USER and condition_candidates:
|
||||||
|
payload["condition_confirmation"] = {
|
||||||
|
"batch_id": batch.pk,
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"confirm_url": f"/api/review-agent/regulatory-review/{batch.pk}/conditions/",
|
||||||
|
"candidates": condition_candidates,
|
||||||
|
}
|
||||||
|
return JsonResponse(payload)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
@login_required
|
||||||
|
def confirm_conditions(request, batch_id: int):
|
||||||
|
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return JsonResponse({"error": "请求体不是有效 JSON。"}, status=400)
|
||||||
|
conditions = payload.get("conditions")
|
||||||
|
if not isinstance(conditions, dict):
|
||||||
|
return JsonResponse({"error": "conditions 必须是对象。"}, status=400)
|
||||||
|
|
||||||
|
batch.condition_json = {
|
||||||
|
**(batch.condition_json or {}),
|
||||||
|
"confirmed": True,
|
||||||
|
"confirmed_conditions": _normalize_conditions(conditions),
|
||||||
|
}
|
||||||
|
batch.status = RegulatoryReviewBatch.Status.RUNNING
|
||||||
|
batch.save(update_fields=["condition_json", "status"])
|
||||||
|
WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code="condition_confirm",
|
||||||
|
).update(
|
||||||
|
status=WorkflowNodeRun.Status.SUCCESS,
|
||||||
|
progress=100,
|
||||||
|
message="适用条件已确认",
|
||||||
|
)
|
||||||
|
record_event(
|
||||||
|
batch,
|
||||||
|
"condition_confirmed",
|
||||||
|
{"conditions": batch.condition_json["confirmed_conditions"], "resume_from": "rule_scope"},
|
||||||
|
)
|
||||||
|
start_regulatory_review_workflow(
|
||||||
|
batch,
|
||||||
|
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
|
||||||
|
)
|
||||||
|
batch.refresh_from_db()
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "regulatory_review",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"condition_json": batch.condition_json,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
@login_required
|
||||||
|
def start_full_review(request, batch_id: int):
|
||||||
|
source_batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not source_batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
payload, error_response = _json_payload(request)
|
||||||
|
if error_response:
|
||||||
|
return error_response
|
||||||
|
summary_batch = FileSummaryBatch.objects.filter(
|
||||||
|
pk=payload.get("file_summary_batch_id"),
|
||||||
|
conversation=source_batch.conversation,
|
||||||
|
user=request.user,
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
).first()
|
||||||
|
if not summary_batch:
|
||||||
|
return JsonResponse({"error": "file_summary_batch_id 不存在或未成功。"}, status=400)
|
||||||
|
new_batch = create_regulatory_review_batch(
|
||||||
|
conversation=source_batch.conversation,
|
||||||
|
user=request.user,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
)
|
||||||
|
new_batch.condition_json = {
|
||||||
|
"source_review_batch_id": source_batch.pk,
|
||||||
|
"regenerated_from": {
|
||||||
|
"batch_id": source_batch.pk,
|
||||||
|
"batch_no": source_batch.batch_no,
|
||||||
|
"file_summary_batch_id": source_batch.source_summary_batch_id,
|
||||||
|
"file_summary_batch_no": source_batch.source_summary_batch.batch_no,
|
||||||
|
},
|
||||||
|
"confirmed": True,
|
||||||
|
"confirmed_conditions": source_batch.condition_json.get("confirmed_conditions", {}),
|
||||||
|
}
|
||||||
|
new_batch.save(update_fields=["condition_json"])
|
||||||
|
record_event(
|
||||||
|
new_batch,
|
||||||
|
"full_package_review_started",
|
||||||
|
{"source_review_batch_id": source_batch.pk, "source_review_batch_no": source_batch.batch_no},
|
||||||
|
)
|
||||||
|
start_regulatory_review_workflow(
|
||||||
|
new_batch,
|
||||||
|
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
|
||||||
|
)
|
||||||
|
new_batch.refresh_from_db()
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": new_batch.pk,
|
||||||
|
"workflow_type": "regulatory_review",
|
||||||
|
"batch_no": new_batch.batch_no,
|
||||||
|
"status": new_batch.status,
|
||||||
|
"source_review_batch_id": source_batch.pk,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
@login_required
|
||||||
|
def review_issues(request, batch_id: int):
|
||||||
|
batch = RegulatoryReviewBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
payload, error_response = _json_payload(request)
|
||||||
|
if error_response:
|
||||||
|
return error_response
|
||||||
|
issue_ids = payload.get("issue_ids")
|
||||||
|
if not isinstance(issue_ids, list):
|
||||||
|
return JsonResponse({"error": "issue_ids 必须是列表。"}, status=400)
|
||||||
|
summary_batch = FileSummaryBatch.objects.filter(
|
||||||
|
pk=payload.get("file_summary_batch_id"),
|
||||||
|
conversation=batch.conversation,
|
||||||
|
user=request.user,
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
).first()
|
||||||
|
if not summary_batch:
|
||||||
|
return JsonResponse({"error": "file_summary_batch_id 不存在或未成功。"}, status=400)
|
||||||
|
record = review_missing_issues(batch=batch, issue_ids=[int(item) for item in issue_ids], file_summary_batch=summary_batch)
|
||||||
|
return JsonResponse({"review_record": record})
|
||||||
|
|
||||||
|
|
||||||
|
def _format_risk_summary(risk_summary: dict) -> str:
|
||||||
|
labels = [
|
||||||
|
("blocking", "阻断项"),
|
||||||
|
("high", "高风险"),
|
||||||
|
("medium", "中风险"),
|
||||||
|
("low", "低风险"),
|
||||||
|
("info", "提示"),
|
||||||
|
]
|
||||||
|
return " · ".join(
|
||||||
|
f"{label} {int(risk_summary.get(key) or 0)}"
|
||||||
|
for key, label in labels
|
||||||
|
if int(risk_summary.get(key) or 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_conditions(conditions: dict) -> dict[str, str]:
|
||||||
|
allowed = [
|
||||||
|
"product_category",
|
||||||
|
"registration_type",
|
||||||
|
"clinical_evaluation_path",
|
||||||
|
"product_name",
|
||||||
|
"model_spec",
|
||||||
|
"intended_use",
|
||||||
|
]
|
||||||
|
return {key: str(conditions.get(key) or "").strip() for key in allowed}
|
||||||
|
|
||||||
|
|
||||||
|
def _json_payload(request):
|
||||||
|
try:
|
||||||
|
return json.loads(request.body.decode("utf-8") or "{}"), None
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {}, JsonResponse({"error": "请求体不是有效 JSON。"}, status=400)
|
||||||
562
review_agent/regulatory_review/workflow.py
Normal file
562
review_agent/regulatory_review/workflow.py
Normal file
@@ -0,0 +1,562 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Thread
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileSummaryBatch,
|
||||||
|
Message,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
|
||||||
|
from review_agent.regulatory_review.services.consistency_check import run_consistency_check
|
||||||
|
from review_agent.regulatory_review.services.export import build_assistant_summary, export_review_results
|
||||||
|
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
|
||||||
|
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
|
||||||
|
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
|
||||||
|
from review_agent.regulatory_review.services.risk_assess import persist_findings
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
from review_agent.regulatory_review.services.structure_check import run_structure_check
|
||||||
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||||
|
|
||||||
|
from .events import record_event
|
||||||
|
from .storage import save_artifact
|
||||||
|
|
||||||
|
|
||||||
|
NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备", "prepare"),
|
||||||
|
("condition_confirm", "适用条件确认", "condition_confirm"),
|
||||||
|
("rule_scope", "规则范围", "rule_scope"),
|
||||||
|
("completeness_check", "完整性核查", "completeness_check"),
|
||||||
|
("text_extract", "文本抽取", "text_extract"),
|
||||||
|
("structure_check", "章节核查", "structure_check"),
|
||||||
|
("consistency_check", "一致性核查", "consistency_check"),
|
||||||
|
("risk_assess", "风险评估", "risk_assess"),
|
||||||
|
("report_export", "报告输出", "report_export"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.regulatory_review.workflow")
|
||||||
|
|
||||||
|
|
||||||
|
ATTACHMENT4_CHAPTER_LABELS = {
|
||||||
|
"1": "第1章 监管信息",
|
||||||
|
"2": "第2章 综述资料",
|
||||||
|
"3": "第3章 非临床资料",
|
||||||
|
"4": "第4章 临床评价资料",
|
||||||
|
"5": "第5章 产品说明书和标签样稿",
|
||||||
|
"6": "第6章 质量管理体系文件",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowPausedForUser(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_no() -> str:
|
||||||
|
return f"RR-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_work_dir(batch_no: str) -> Path:
|
||||||
|
return Path(settings.MEDIA_ROOT) / "regulatory_review" / "work" / batch_no
|
||||||
|
|
||||||
|
|
||||||
|
def find_latest_successful_summary_batch(conversation: Conversation) -> FileSummaryBatch | None:
|
||||||
|
return (
|
||||||
|
FileSummaryBatch.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
.order_by("-finished_at", "-created_at", "-id")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def create_regulatory_review_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
source_summary_batch: FileSummaryBatch,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
) -> RegulatoryReviewBatch:
|
||||||
|
batch_no = build_batch_no()
|
||||||
|
work_dir = build_batch_work_dir(batch_no)
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger_message,
|
||||||
|
source_summary_batch=source_summary_batch,
|
||||||
|
batch_no=batch_no,
|
||||||
|
work_dir=str(work_dir),
|
||||||
|
condition_json=_initial_condition_json(trigger_message),
|
||||||
|
)
|
||||||
|
for code, name, group in NODE_DEFINITIONS:
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group=group,
|
||||||
|
node_code=code,
|
||||||
|
node_name=name,
|
||||||
|
)
|
||||||
|
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryWorkflowExecutor:
|
||||||
|
def __init__(self, batch: RegulatoryReviewBatch):
|
||||||
|
self.batch = batch
|
||||||
|
self.rule_set: dict | None = None
|
||||||
|
self.findings = []
|
||||||
|
self.document_texts: dict[str, str] = {}
|
||||||
|
self.text_extract_status: dict[str, dict[str, object]] = {}
|
||||||
|
self.llm_reviews: dict[str, dict[str, object]] = {}
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
logger.info("法规核查工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
|
||||||
|
self.batch.status = RegulatoryReviewBatch.Status.RUNNING
|
||||||
|
self.batch.started_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "started_at"])
|
||||||
|
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
|
||||||
|
|
||||||
|
try:
|
||||||
|
for node in self._nodes():
|
||||||
|
if node.status == WorkflowNodeRun.Status.SUCCESS:
|
||||||
|
continue
|
||||||
|
self._run_node(node)
|
||||||
|
except WorkflowPausedForUser:
|
||||||
|
logger.info("法规核查工作流等待用户 batch_no=%s node=condition_confirm", self.batch.batch_no)
|
||||||
|
return
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Regulatory workflow failed", extra={"batch_id": self.batch.pk})
|
||||||
|
self.batch.status = RegulatoryReviewBatch.Status.FAILED
|
||||||
|
self.batch.error_message = str(exc)
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "error_message", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_failed", {"message": str(exc)})
|
||||||
|
return
|
||||||
|
|
||||||
|
self.batch.status = RegulatoryReviewBatch.Status.SUCCESS
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
|
||||||
|
logger.info("法规核查工作流完成 batch_no=%s findings=%s", self.batch.batch_no, len(self.findings))
|
||||||
|
|
||||||
|
def _nodes(self):
|
||||||
|
return WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
logger.info(
|
||||||
|
"节点开始 batch_no=%s node=%s name=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
node.node_code,
|
||||||
|
node.node_name,
|
||||||
|
)
|
||||||
|
node.status = WorkflowNodeRun.Status.RUNNING
|
||||||
|
node.progress = 10
|
||||||
|
node.started_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}处理中"
|
||||||
|
node.save(update_fields=["status", "progress", "started_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
|
||||||
|
self._execute_node(node)
|
||||||
|
|
||||||
|
node.status = WorkflowNodeRun.Status.SUCCESS
|
||||||
|
node.progress = 100
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}完成"
|
||||||
|
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"节点完成 batch_no=%s node=%s name=%s progress=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
node.node_code,
|
||||||
|
node.node_name,
|
||||||
|
node.progress,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _update_node_progress(
|
||||||
|
self,
|
||||||
|
node: WorkflowNodeRun,
|
||||||
|
*,
|
||||||
|
processed: int,
|
||||||
|
total: int,
|
||||||
|
message: str,
|
||||||
|
) -> None:
|
||||||
|
if total <= 0:
|
||||||
|
return
|
||||||
|
progress = min(95, 10 + int((max(processed, 0) / total) * 85))
|
||||||
|
node.progress = progress
|
||||||
|
node.message = message
|
||||||
|
node.save(update_fields=["progress", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
"processed": processed,
|
||||||
|
"total": total,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"节点进度 batch_no=%s node=%s progress=%s processed=%s total=%s message=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
node.node_code,
|
||||||
|
progress,
|
||||||
|
processed,
|
||||||
|
total,
|
||||||
|
message,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _execute_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
node_code = node.node_code
|
||||||
|
if node_code == "condition_confirm":
|
||||||
|
self._pause_for_condition_confirmation()
|
||||||
|
return
|
||||||
|
if node_code == "rule_scope":
|
||||||
|
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=apply_rule_scope requirements=%s scope=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(self.rule_set.get("requirements", [])),
|
||||||
|
self.batch.condition_json.get("rule_scope") or {},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node_code == "completeness_check":
|
||||||
|
findings = run_completeness_check(
|
||||||
|
self.batch.source_summary_batch,
|
||||||
|
self._rules(),
|
||||||
|
progress_callback=lambda update: self._update_node_progress(
|
||||||
|
node,
|
||||||
|
processed=int(update.get("processed") or 0),
|
||||||
|
total=int(update.get("total") or 0),
|
||||||
|
message=(
|
||||||
|
f"完整性核查 {update.get('processed')}/{update.get('total')}:"
|
||||||
|
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.findings.extend(findings)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=run_completeness_check findings=%s source_summary=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(findings),
|
||||||
|
self.batch.source_summary_batch.batch_no,
|
||||||
|
)
|
||||||
|
self._save_llm_review(
|
||||||
|
"completeness_check",
|
||||||
|
{
|
||||||
|
"findings": [finding.to_dict() for finding in findings],
|
||||||
|
"rules_count": len(self._rules().get("requirements", [])),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node_code == "text_extract":
|
||||||
|
self.document_texts = self._extract_source_texts(node)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=_extract_source_texts success_docs=%s total_files=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(self.document_texts),
|
||||||
|
len(self.text_extract_status),
|
||||||
|
)
|
||||||
|
self._save_llm_review("text_extract", {"files": self.text_extract_status})
|
||||||
|
save_artifact(
|
||||||
|
self.batch,
|
||||||
|
name="text_extract_status.json",
|
||||||
|
artifact_type="json",
|
||||||
|
content=json.dumps(self.text_extract_status, ensure_ascii=False, indent=2),
|
||||||
|
metadata={"artifact": "text_extract_status"},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node_code == "structure_check":
|
||||||
|
findings = run_structure_check(
|
||||||
|
self.document_texts,
|
||||||
|
self._rules(),
|
||||||
|
progress_callback=lambda update: self._update_node_progress(
|
||||||
|
node,
|
||||||
|
processed=int(update.get("processed") or 0),
|
||||||
|
total=int(update.get("total") or 0),
|
||||||
|
message=(
|
||||||
|
f"章节核查 {update.get('processed')}/{update.get('total')}:"
|
||||||
|
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.findings.extend(findings)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=run_structure_check findings=%s docs=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(findings),
|
||||||
|
len(self.document_texts),
|
||||||
|
)
|
||||||
|
self._save_llm_review("structure_check", {"findings": [finding.to_dict() for finding in findings]})
|
||||||
|
return
|
||||||
|
if node_code == "consistency_check":
|
||||||
|
findings = run_consistency_check(
|
||||||
|
self.document_texts,
|
||||||
|
progress_callback=lambda update: self._update_node_progress(
|
||||||
|
node,
|
||||||
|
processed=int(update.get("processed") or 0),
|
||||||
|
total=int(update.get("total") or 0),
|
||||||
|
message=(
|
||||||
|
f"一致性核查 {update.get('processed')}/{update.get('total')}:"
|
||||||
|
f"{update.get('label') or ''},发现{update.get('finding_count') or 0}项问题"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
self.findings.extend(findings)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=run_consistency_check findings=%s docs=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(findings),
|
||||||
|
len(self.document_texts),
|
||||||
|
)
|
||||||
|
self._save_llm_review("consistency_check", {"findings": [finding.to_dict() for finding in findings]})
|
||||||
|
return
|
||||||
|
if node_code == "risk_assess":
|
||||||
|
self._save_llm_review("risk_assess", {"findings": [finding.to_dict() for finding in self.findings]})
|
||||||
|
issues = persist_findings(self.batch, self.findings)
|
||||||
|
create_mock_notifications(self.batch)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=persist_findings issues=%s findings=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(issues),
|
||||||
|
len(self.findings),
|
||||||
|
)
|
||||||
|
save_artifact(
|
||||||
|
self.batch,
|
||||||
|
name="rag_result_json.json",
|
||||||
|
artifact_type="json",
|
||||||
|
content=json.dumps(
|
||||||
|
{
|
||||||
|
"batch_no": self.batch.batch_no,
|
||||||
|
"text_extract_status": self.text_extract_status,
|
||||||
|
"issues": [
|
||||||
|
{
|
||||||
|
"rule_code": issue.rule_code,
|
||||||
|
"title": issue.title,
|
||||||
|
"citations": issue.citations,
|
||||||
|
}
|
||||||
|
for issue in issues
|
||||||
|
],
|
||||||
|
"llm_reviews": self.llm_reviews,
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
indent=2,
|
||||||
|
),
|
||||||
|
metadata={"artifact": "rag_result_json"},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node_code == "report_export":
|
||||||
|
exports = export_review_results(self.batch)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=export_review_results exports=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
len(exports),
|
||||||
|
)
|
||||||
|
Message.objects.create(
|
||||||
|
conversation=self.batch.conversation,
|
||||||
|
role=Message.Role.ASSISTANT,
|
||||||
|
content=build_assistant_summary(self.batch, exports),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _pause_for_condition_confirmation(self) -> None:
|
||||||
|
if self.batch.condition_json.get("confirmed"):
|
||||||
|
return
|
||||||
|
candidates = detect_regulatory_condition_candidates(self.batch.source_summary_batch)
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=detect_regulatory_condition_candidates product_category=%s product_name=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
(candidates.get("product_category") or {}).get("suggested"),
|
||||||
|
(candidates.get("product_name") or {}).get("suggested"),
|
||||||
|
)
|
||||||
|
self.batch.condition_json = {
|
||||||
|
**(self.batch.condition_json or {}),
|
||||||
|
"confirmed": False,
|
||||||
|
"resume_from": "rule_scope",
|
||||||
|
"candidates": candidates,
|
||||||
|
}
|
||||||
|
self.batch.status = RegulatoryReviewBatch.Status.WAITING_USER
|
||||||
|
self.batch.save(update_fields=["status", "condition_json"])
|
||||||
|
node = WorkflowNodeRun.objects.get(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
node_code="condition_confirm",
|
||||||
|
)
|
||||||
|
node.status = WorkflowNodeRun.Status.WAITING_USER
|
||||||
|
node.progress = 50
|
||||||
|
node.message = "请确认产品类别、注册类型、临床评价路径等适用条件"
|
||||||
|
node.save(update_fields=["status", "progress", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"waiting_user",
|
||||||
|
{"node_code": "condition_confirm", "candidates": candidates, "resume_from": "rule_scope"},
|
||||||
|
)
|
||||||
|
raise WorkflowPausedForUser()
|
||||||
|
|
||||||
|
def _rules(self) -> dict:
|
||||||
|
if self.rule_set is None:
|
||||||
|
self.rule_set = apply_rule_scope(load_rule_file(), self.batch.condition_json.get("rule_scope") or {})
|
||||||
|
return self.rule_set
|
||||||
|
|
||||||
|
def _extract_source_texts(self, node: WorkflowNodeRun | None = None) -> dict[str, str]:
|
||||||
|
texts = {}
|
||||||
|
items = list(self.batch.source_summary_batch.items.order_by("file_index"))
|
||||||
|
total = len(items)
|
||||||
|
for index, item in enumerate(items, start=1):
|
||||||
|
path = Path(item.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||||
|
if not path.exists():
|
||||||
|
logger.info("文本抽取跳过 batch_no=%s file=%s reason=missing", self.batch.batch_no, item.file_name)
|
||||||
|
self.text_extract_status[item.file_name] = {
|
||||||
|
"status": "missing",
|
||||||
|
"path": str(path),
|
||||||
|
"content_hash": "",
|
||||||
|
"section_candidates": [],
|
||||||
|
"field_candidates": {},
|
||||||
|
"front_text": "",
|
||||||
|
}
|
||||||
|
if node:
|
||||||
|
self._update_node_progress(
|
||||||
|
node,
|
||||||
|
processed=index,
|
||||||
|
total=total,
|
||||||
|
message=f"文本抽取 {index}/{total}:{item.file_name}(文件不存在)",
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
result = extract_text(path)
|
||||||
|
field_review = review_condition_fields(
|
||||||
|
text=result.front_text or result.text,
|
||||||
|
rule_fields=result.field_candidates or {},
|
||||||
|
file_context=f"{item.directory_level}\n{item.file_name}\n{item.relative_path}",
|
||||||
|
)
|
||||||
|
self.text_extract_status[item.file_name] = {
|
||||||
|
"status": result.status,
|
||||||
|
"path": str(path),
|
||||||
|
"content_hash": result.content_hash,
|
||||||
|
"section_candidates": result.section_candidates,
|
||||||
|
"field_candidates": field_review.get("selected_fields", result.field_candidates),
|
||||||
|
"field_review": field_review,
|
||||||
|
"front_text": result.front_text,
|
||||||
|
"error_message": result.error_message,
|
||||||
|
}
|
||||||
|
if result.status == "success" and result.text:
|
||||||
|
texts[item.file_name] = result.text
|
||||||
|
logger.info(
|
||||||
|
"文本抽取文件 batch_no=%s file=%s status=%s fields=%s chars=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
item.file_name,
|
||||||
|
result.status,
|
||||||
|
len((field_review.get("selected_fields") or {})),
|
||||||
|
len(result.text or ""),
|
||||||
|
)
|
||||||
|
if node:
|
||||||
|
self._update_node_progress(
|
||||||
|
node,
|
||||||
|
processed=index,
|
||||||
|
total=total,
|
||||||
|
message=f"文本抽取 {index}/{total}:{item.file_name}({result.status})",
|
||||||
|
)
|
||||||
|
return texts
|
||||||
|
|
||||||
|
def _save_llm_review(self, stage: str, payload: dict[str, object]) -> dict[str, object]:
|
||||||
|
review = review_workflow_payload(stage=stage, payload=payload)
|
||||||
|
self.llm_reviews[stage] = review
|
||||||
|
logger.info(
|
||||||
|
"方法执行 batch_no=%s method=review_workflow_payload stage=%s status=%s",
|
||||||
|
self.batch.batch_no,
|
||||||
|
stage,
|
||||||
|
review.get("status"),
|
||||||
|
)
|
||||||
|
save_artifact(
|
||||||
|
self.batch,
|
||||||
|
name=f"llm_review_{stage}.json",
|
||||||
|
artifact_type="json",
|
||||||
|
content=json.dumps(review, ensure_ascii=False, indent=2),
|
||||||
|
metadata={"artifact": "llm_review", "stage": stage},
|
||||||
|
)
|
||||||
|
return review
|
||||||
|
|
||||||
|
|
||||||
|
def start_regulatory_review_workflow(batch: RegulatoryReviewBatch, *, async_run: bool = True) -> None:
|
||||||
|
executor = RegulatoryWorkflowExecutor(batch)
|
||||||
|
if not async_run:
|
||||||
|
executor.run()
|
||||||
|
return
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
|
def _initial_condition_json(trigger_message: Message | None) -> dict:
|
||||||
|
scope = detect_attachment4_chapter_scope(trigger_message.content if trigger_message else "")
|
||||||
|
return {"rule_scope": scope} if scope else {}
|
||||||
|
|
||||||
|
|
||||||
|
def detect_attachment4_chapter_scope(content: str) -> dict[str, str] | None:
|
||||||
|
normalized = (content or "").strip()
|
||||||
|
if not normalized:
|
||||||
|
return None
|
||||||
|
chapter = _extract_chapter_number(normalized)
|
||||||
|
if chapter not in ATTACHMENT4_CHAPTER_LABELS:
|
||||||
|
return None
|
||||||
|
return {"attachment4_chapter": chapter, "label": ATTACHMENT4_CHAPTER_LABELS[chapter]}
|
||||||
|
|
||||||
|
|
||||||
|
def apply_rule_scope(rule_set: dict, rule_scope: dict) -> dict:
|
||||||
|
chapter = str(rule_scope.get("attachment4_chapter") or "")
|
||||||
|
if chapter not in ATTACHMENT4_CHAPTER_LABELS:
|
||||||
|
return rule_set
|
||||||
|
scoped = {**rule_set}
|
||||||
|
scoped["requirements"] = [
|
||||||
|
requirement
|
||||||
|
for requirement in rule_set.get("requirements", [])
|
||||||
|
if _requirement_in_chapter(requirement, chapter)
|
||||||
|
]
|
||||||
|
scoped["active_rule_scope"] = rule_scope
|
||||||
|
return scoped
|
||||||
|
|
||||||
|
|
||||||
|
def _requirement_in_chapter(requirement: dict, chapter: str) -> bool:
|
||||||
|
attachment4_code = str(requirement.get("attachment4_code") or "")
|
||||||
|
return attachment4_code == chapter or attachment4_code.startswith(f"{chapter}.")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_chapter_number(content: str) -> str:
|
||||||
|
match = re.search(r"第\s*([一二三四五六1-6])\s*[章节张]", content)
|
||||||
|
if match:
|
||||||
|
return _normalize_chapter_number(match.group(1))
|
||||||
|
match = re.search(r"(^|[^\d])([1-6])\s*[章节张]", content)
|
||||||
|
if match:
|
||||||
|
return match.group(2)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_chapter_number(value: str) -> str:
|
||||||
|
chinese = {"一": "1", "二": "2", "三": "3", "四": "4", "五": "5", "六": "6"}
|
||||||
|
return chinese.get(value, value)
|
||||||
@@ -10,7 +10,12 @@ from django.utils import timezone
|
|||||||
from .file_summary.skills.attachment_reader import AttachmentReaderSkill
|
from .file_summary.skills.attachment_reader import AttachmentReaderSkill
|
||||||
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
|
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
|
||||||
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
|
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
|
||||||
from .models import Conversation, FileAttachment, Message
|
from .models import Conversation, FileAttachment, FileSummaryBatch, Message
|
||||||
|
from .regulatory_review.workflow import (
|
||||||
|
create_regulatory_review_batch,
|
||||||
|
find_latest_successful_summary_batch,
|
||||||
|
start_regulatory_review_workflow,
|
||||||
|
)
|
||||||
from .skill_router import route_message_intent
|
from .skill_router import route_message_intent
|
||||||
|
|
||||||
|
|
||||||
@@ -219,6 +224,85 @@ def stream_message(conversation: Conversation, content: str):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if route.starts_regulatory_review:
|
||||||
|
source_summary_batch = find_latest_successful_summary_batch(conversation)
|
||||||
|
if not source_summary_batch:
|
||||||
|
if not _has_active_attachments(conversation):
|
||||||
|
reply_content = "请先在当前对话右侧上传需要核查的文件或压缩包,我会先自动汇总再继续法规核查。"
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
summary_batch = create_file_summary_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=conversation.user,
|
||||||
|
trigger_message=user_message,
|
||||||
|
)
|
||||||
|
yield sse_event(
|
||||||
|
"workflow_started",
|
||||||
|
{
|
||||||
|
"workflow_type": "file_summary",
|
||||||
|
"batch_id": summary_batch.pk,
|
||||||
|
"batch_no": summary_batch.batch_no,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
start_file_summary_workflow(summary_batch, async_run=False)
|
||||||
|
summary_batch.refresh_from_db()
|
||||||
|
if summary_batch.status != FileSummaryBatch.Status.SUCCESS:
|
||||||
|
reply_content = f"已先启动文件目录与页数自动汇总工作流,批次号:{summary_batch.batch_no},但汇总未成功:{summary_batch.error_message or '原因待查看'}。请处理后再启动法规核查。"
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
source_summary_batch = summary_batch
|
||||||
|
reply_prefix = f"已先启动文件目录与页数自动汇总工作流,批次号:{summary_batch.batch_no},汇总完成后继续法规核查。\n"
|
||||||
|
else:
|
||||||
|
reply_prefix = ""
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=conversation.user,
|
||||||
|
trigger_message=user_message,
|
||||||
|
source_summary_batch=source_summary_batch,
|
||||||
|
)
|
||||||
|
start_regulatory_review_workflow(
|
||||||
|
batch,
|
||||||
|
async_run=getattr(settings, "REGULATORY_REVIEW_ASYNC", True),
|
||||||
|
)
|
||||||
|
reply_content = f"{reply_prefix}已启动 NMPA 注册资料法规核查工作流,批次号:{batch.batch_no}。"
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event(
|
||||||
|
"workflow_started",
|
||||||
|
{
|
||||||
|
"workflow_type": "regulatory_review",
|
||||||
|
"batch_id": batch.pk,
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
stream_failed = False
|
stream_failed = False
|
||||||
stream_error = ""
|
stream_error = ""
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from .models import Conversation, FileAttachment
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
|
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
|
||||||
|
ROUTE_ACTIONS.add("regulatory_review")
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -34,6 +35,10 @@ class SkillRoute:
|
|||||||
def starts_file_summary(self) -> bool:
|
def starts_file_summary(self) -> bool:
|
||||||
return self.action == "file_summary"
|
return self.action == "file_summary"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def starts_regulatory_review(self) -> bool:
|
||||||
|
return self.action == "regulatory_review"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_normal_chat(self) -> bool:
|
def is_normal_chat(self) -> bool:
|
||||||
return self.action == "normal_chat"
|
return self.action == "normal_chat"
|
||||||
@@ -100,7 +105,7 @@ def _route_with_llm(
|
|||||||
return SkillRoute(
|
return SkillRoute(
|
||||||
action=action,
|
action=action,
|
||||||
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
||||||
workflow_type="file_summary" if action == "file_summary" else "",
|
workflow_type=action if action in {"file_summary", "regulatory_review"} else "",
|
||||||
confidence=_float_or_zero(payload.get("confidence")),
|
confidence=_float_or_zero(payload.get("confidence")),
|
||||||
reason=str(payload.get("reason") or ""),
|
reason=str(payload.get("reason") or ""),
|
||||||
source="llm",
|
source="llm",
|
||||||
@@ -108,6 +113,15 @@ def _route_with_llm(
|
|||||||
|
|
||||||
|
|
||||||
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
|
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
|
||||||
|
if _matches_regulatory_review(content):
|
||||||
|
return SkillRoute(
|
||||||
|
action="regulatory_review",
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
confidence=0.7,
|
||||||
|
reason="命中法规核查关键词。",
|
||||||
|
source="rule_fallback",
|
||||||
|
)
|
||||||
|
|
||||||
file_summary = evaluate_file_summary_trigger(conversation, content)
|
file_summary = evaluate_file_summary_trigger(conversation, content)
|
||||||
if file_summary.should_start or file_summary.reason == "missing_attachment":
|
if file_summary.should_start or file_summary.reason == "missing_attachment":
|
||||||
return SkillRoute(
|
return SkillRoute(
|
||||||
@@ -148,9 +162,10 @@ def _router_system_prompt() -> str:
|
|||||||
return (
|
return (
|
||||||
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
|
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
|
||||||
"你必须只输出 JSON 对象,不要输出 Markdown。"
|
"你必须只输出 JSON 对象,不要输出 Markdown。"
|
||||||
"可选 action:normal_chat、attachment_reader、file_summary。"
|
"可选 action:normal_chat、attachment_reader、file_summary、regulatory_review。"
|
||||||
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
|
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
|
||||||
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
|
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
|
||||||
|
"regulatory_review 用于用户要求法规核查、NMPA核查、完整性核查、章节一致性核查、风险预警或整改建议。"
|
||||||
"normal_chat 用于不需要读取附件或执行工作流的一般问答。"
|
"normal_chat 用于不需要读取附件或执行工作流的一般问答。"
|
||||||
"输出字段:action、confidence、reason。"
|
"输出字段:action、confidence、reason。"
|
||||||
)
|
)
|
||||||
@@ -187,3 +202,18 @@ def _float_or_zero(value) -> float:
|
|||||||
return float(value)
|
return float(value)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_regulatory_review(content: str) -> bool:
|
||||||
|
normalized = content.lower()
|
||||||
|
keywords = [
|
||||||
|
"法规核查",
|
||||||
|
"nmpa核查",
|
||||||
|
"nmpa 核查",
|
||||||
|
"完整性核查",
|
||||||
|
"风险预警",
|
||||||
|
"整改建议",
|
||||||
|
"章节核查",
|
||||||
|
"一致性核查",
|
||||||
|
]
|
||||||
|
return any(keyword in normalized for keyword in keywords)
|
||||||
|
|||||||
@@ -10,6 +10,12 @@ from .file_summary.views import (
|
|||||||
conversation_messages,
|
conversation_messages,
|
||||||
export_download,
|
export_download,
|
||||||
)
|
)
|
||||||
|
from .regulatory_review.views import (
|
||||||
|
batch_status as regulatory_review_batch_status,
|
||||||
|
confirm_conditions as regulatory_review_confirm_conditions,
|
||||||
|
review_issues as regulatory_review_review_issues,
|
||||||
|
start_full_review as regulatory_review_start_full_review,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
@@ -58,4 +64,24 @@ urlpatterns = [
|
|||||||
export_download,
|
export_download,
|
||||||
name="file_summary_export_download",
|
name="file_summary_export_download",
|
||||||
),
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-review/<int:batch_id>/status/",
|
||||||
|
regulatory_review_batch_status,
|
||||||
|
name="regulatory_review_batch_status",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-review/<int:batch_id>/conditions/",
|
||||||
|
regulatory_review_confirm_conditions,
|
||||||
|
name="regulatory_review_confirm_conditions",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-review/<int:batch_id>/full-review/",
|
||||||
|
regulatory_review_start_full_review,
|
||||||
|
name="regulatory_review_start_full_review",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-review/<int:batch_id>/issue-review/",
|
||||||
|
regulatory_review_review_issues,
|
||||||
|
name="regulatory_review_review_issues",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ from .services import (
|
|||||||
send_message,
|
send_message,
|
||||||
stream_message,
|
stream_message,
|
||||||
)
|
)
|
||||||
from .models import Conversation, FileAttachment, FileSummaryBatch
|
from .models import Conversation, FileAttachment, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
||||||
|
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@@ -42,6 +43,9 @@ def workspace(request: HttpRequest) -> HttpResponse:
|
|||||||
if current is None and conversations.exists():
|
if current is None and conversations.exists():
|
||||||
current = conversations.first()
|
current = conversations.first()
|
||||||
|
|
||||||
|
workflow_cards = build_workflow_cards(current) if current else []
|
||||||
|
condition_confirmation = build_condition_confirmation(workflow_cards)
|
||||||
|
|
||||||
return render(
|
return render(
|
||||||
request,
|
request,
|
||||||
"home.html",
|
"home.html",
|
||||||
@@ -52,7 +56,8 @@ def workspace(request: HttpRequest) -> HttpResponse:
|
|||||||
"current_conversation": current,
|
"current_conversation": current,
|
||||||
"messages": current.messages.all() if current else [],
|
"messages": current.messages.all() if current else [],
|
||||||
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
|
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
|
||||||
"summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [],
|
"workflow_cards": workflow_cards,
|
||||||
|
"condition_confirmation": condition_confirmation,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -109,3 +114,76 @@ def stream_chat(request: HttpRequest) -> HttpResponse:
|
|||||||
response["Cache-Control"] = "no-cache"
|
response["Cache-Control"] = "no-cache"
|
||||||
response["X-Accel-Buffering"] = "no"
|
response["X-Accel-Buffering"] = "no"
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def build_workflow_cards(conversation: Conversation) -> list[dict[str, object]]:
|
||||||
|
cards: list[dict[str, object]] = []
|
||||||
|
for batch in FileSummaryBatch.objects.filter(conversation=conversation).prefetch_related("node_runs"):
|
||||||
|
cards.append(
|
||||||
|
{
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "file_summary",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
"risk_label": "",
|
||||||
|
"created_at": batch.created_at,
|
||||||
|
"nodes": list(batch.node_runs.order_by("id")),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
regulatory_batches = RegulatoryReviewBatch.objects.filter(conversation=conversation)
|
||||||
|
for batch in regulatory_batches:
|
||||||
|
condition_candidates = ensure_regulatory_condition_candidates(batch)
|
||||||
|
cards.append(
|
||||||
|
{
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "regulatory_review",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
"risk_label": _format_risk_label(batch.risk_summary or {}),
|
||||||
|
"condition_json": batch.condition_json or {},
|
||||||
|
"condition_candidates": condition_candidates,
|
||||||
|
"notification_count": batch.notifications.count(),
|
||||||
|
"review_record_count": batch.artifacts.filter(metadata__artifact="review_record").count(),
|
||||||
|
"created_at": batch.created_at,
|
||||||
|
"nodes": list(
|
||||||
|
WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return sorted(cards, key=lambda item: item["created_at"], reverse=True)[:5]
|
||||||
|
|
||||||
|
|
||||||
|
def build_condition_confirmation(workflow_cards: list[dict[str, object]]) -> dict[str, object] | None:
|
||||||
|
for card in workflow_cards:
|
||||||
|
if (
|
||||||
|
card.get("workflow_type") == "regulatory_review"
|
||||||
|
and card.get("status") == RegulatoryReviewBatch.Status.WAITING_USER
|
||||||
|
and card.get("condition_candidates")
|
||||||
|
):
|
||||||
|
return {
|
||||||
|
"id": card["id"],
|
||||||
|
"batch_no": card["batch_no"],
|
||||||
|
"candidates": card["condition_candidates"],
|
||||||
|
}
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _format_risk_label(risk_summary: dict) -> str:
|
||||||
|
parts = []
|
||||||
|
labels = [
|
||||||
|
("blocking", "阻断项"),
|
||||||
|
("high", "高风险"),
|
||||||
|
("medium", "中风险"),
|
||||||
|
("low", "低风险"),
|
||||||
|
("info", "提示"),
|
||||||
|
]
|
||||||
|
for key, label in labels:
|
||||||
|
count = int(risk_summary.get(key) or 0)
|
||||||
|
if count:
|
||||||
|
parts.append(f"{label} {count}")
|
||||||
|
return " · ".join(parts)
|
||||||
|
|||||||
238
static/js/app.js
238
static/js/app.js
@@ -310,7 +310,7 @@
|
|||||||
|
|
||||||
function appendConversationMessage(message) {
|
function appendConversationMessage(message) {
|
||||||
if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) {
|
if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
var label = message.role === "assistant" ? "AI " : "用户 ";
|
var label = message.role === "assistant" ? "AI " : "用户 ";
|
||||||
label += document.querySelectorAll(".message").length + 1;
|
label += document.querySelectorAll(".message").length + 1;
|
||||||
@@ -320,6 +320,7 @@
|
|||||||
if (message.role === "user") {
|
if (message.role === "user") {
|
||||||
appendNode(created.article.id, label, true);
|
appendNode(created.article.id, label, true);
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function refreshConversationMessages() {
|
async function refreshConversationMessages() {
|
||||||
@@ -337,14 +338,21 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
var payload = await response.json();
|
var payload = await response.json();
|
||||||
(payload.messages || []).forEach(appendConversationMessage);
|
var appendedCount = 0;
|
||||||
|
(payload.messages || []).forEach(function (message) {
|
||||||
|
if (appendConversationMessage(message)) {
|
||||||
|
appendedCount += 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
if (payload.latest_message_id) {
|
if (payload.latest_message_id) {
|
||||||
latestMessageId = Math.max(latestMessageId, payload.latest_message_id);
|
latestMessageId = Math.max(latestMessageId, payload.latest_message_id);
|
||||||
}
|
}
|
||||||
syncNodeRailVisibility();
|
syncNodeRailVisibility();
|
||||||
bindNodeAnchorClicks();
|
bindNodeAnchorClicks();
|
||||||
setActiveNode();
|
setActiveNode();
|
||||||
|
if (appendedCount > 0) {
|
||||||
scrollChatToBottom();
|
scrollChatToBottom();
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Conversation message refresh failed", error);
|
console.error("Conversation message refresh failed", error);
|
||||||
}
|
}
|
||||||
@@ -455,6 +463,12 @@
|
|||||||
return summaryPanel.getAttribute(attributeName).replace(token, value);
|
return summaryPanel.getAttribute(attributeName).replace(token, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function statusUrlForWorkflow(workflow_type, batchId) {
|
||||||
|
var attributeName =
|
||||||
|
workflow_type === "regulatory_review" ? "data-regulatory-status-url-template" : "data-status-url-template";
|
||||||
|
return templateUrl(attributeName, "__batch_id__", batchId);
|
||||||
|
}
|
||||||
|
|
||||||
function renderAttachments(attachments) {
|
function renderAttachments(attachments) {
|
||||||
if (!attachmentList) {
|
if (!attachmentList) {
|
||||||
return;
|
return;
|
||||||
@@ -542,13 +556,17 @@
|
|||||||
if (empty) {
|
if (empty) {
|
||||||
empty.remove();
|
empty.remove();
|
||||||
}
|
}
|
||||||
var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]');
|
var workflow_type = batch.workflow_type || "file_summary";
|
||||||
|
var card = workflowCardList.querySelector(
|
||||||
|
'[data-batch-id="' + batch.batch_id + '"][data-workflow-type="' + workflow_type + '"]'
|
||||||
|
);
|
||||||
if (card) {
|
if (card) {
|
||||||
return card;
|
return card;
|
||||||
}
|
}
|
||||||
card = document.createElement("article");
|
card = document.createElement("article");
|
||||||
card.className = "workflow-card";
|
card.className = "workflow-card";
|
||||||
card.setAttribute("data-batch-id", batch.batch_id);
|
card.setAttribute("data-batch-id", batch.batch_id);
|
||||||
|
card.setAttribute("data-workflow-type", workflow_type);
|
||||||
card.innerHTML =
|
card.innerHTML =
|
||||||
"<header><strong>" +
|
"<header><strong>" +
|
||||||
escapeHtml(batch.batch_no || "文件汇总") +
|
escapeHtml(batch.batch_no || "文件汇总") +
|
||||||
@@ -634,13 +652,91 @@
|
|||||||
selectWorkflowBatchIndex(activeIndex);
|
selectWorkflowBatchIndex(activeIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function refreshWorkflowCard(batchId) {
|
function ensureConditionConfirmationCard(confirmation) {
|
||||||
|
if (!chatScroll || !confirmation || !confirmation.candidates) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var cardId = "condition-confirmation-" + confirmation.batch_id;
|
||||||
|
removeStaleConditionConfirmationCards(cardId);
|
||||||
|
if (document.getElementById(cardId)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var article = document.createElement("article");
|
||||||
|
article.className = "message assistant";
|
||||||
|
article.id = cardId;
|
||||||
|
article.setAttribute("data-condition-confirmation-card", "");
|
||||||
|
article.setAttribute("data-node-label", "AI 适用条件确认");
|
||||||
|
|
||||||
|
var avatar = document.createElement("div");
|
||||||
|
avatar.className = "message-avatar";
|
||||||
|
avatar.textContent = "AI";
|
||||||
|
|
||||||
|
var bubble = document.createElement("div");
|
||||||
|
bubble.className = "message-bubble";
|
||||||
|
var form = document.createElement("form");
|
||||||
|
form.className = "condition-confirm-form";
|
||||||
|
form.setAttribute("data-condition-confirm-form", "");
|
||||||
|
form.setAttribute("data-batch-id", confirmation.batch_id);
|
||||||
|
form.setAttribute("data-confirm-url", confirmation.confirm_url);
|
||||||
|
form.innerHTML =
|
||||||
|
'<input type="hidden" name="csrfmiddlewaretoken" value="' +
|
||||||
|
escapeHtml(new FormData(composer).get("csrfmiddlewaretoken") || "") +
|
||||||
|
'">' +
|
||||||
|
"<strong>适用条件确认</strong>" +
|
||||||
|
"<p>请确认 " +
|
||||||
|
escapeHtml(confirmation.batch_no || "") +
|
||||||
|
" 的产品类别、注册类型和临床评价路径,确认后我会继续法规核查。</p>" +
|
||||||
|
renderConditionFields(confirmation.candidates) +
|
||||||
|
'<button type="submit">确认并继续</button>' +
|
||||||
|
'<p class="condition-confirm-status" data-condition-confirm-status></p>';
|
||||||
|
bubble.appendChild(form);
|
||||||
|
article.appendChild(avatar);
|
||||||
|
article.appendChild(bubble);
|
||||||
|
chatScroll.appendChild(article);
|
||||||
|
bindConditionConfirmForms();
|
||||||
|
scrollChatToBottom();
|
||||||
|
}
|
||||||
|
|
||||||
|
function removeStaleConditionConfirmationCards(activeCardId) {
|
||||||
|
document.querySelectorAll("[data-condition-confirmation-card]").forEach(function (card) {
|
||||||
|
if (card.id !== activeCardId) {
|
||||||
|
card.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderConditionFields(candidates) {
|
||||||
|
var html = "";
|
||||||
|
Object.keys(candidates || {}).forEach(function (field) {
|
||||||
|
var config = candidates[field] || {};
|
||||||
|
html += "<label><span>" + escapeHtml(config.label || field) + "</span>";
|
||||||
|
if (config.input_type === "select") {
|
||||||
|
html += '<select name="' + escapeHtml(field) + '">';
|
||||||
|
(config.options || []).forEach(function (option) {
|
||||||
|
var selected = option === config.suggested ? " selected" : "";
|
||||||
|
html += '<option value="' + escapeHtml(option) + '"' + selected + ">" + escapeHtml(option) + "</option>";
|
||||||
|
});
|
||||||
|
html += "</select>";
|
||||||
|
} else {
|
||||||
|
html +=
|
||||||
|
'<input type="text" name="' +
|
||||||
|
escapeHtml(field) +
|
||||||
|
'" value="' +
|
||||||
|
escapeHtml(config.suggested || "") +
|
||||||
|
'">';
|
||||||
|
}
|
||||||
|
html += "</label>";
|
||||||
|
});
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function refreshWorkflowCard(batchId, workflow_type) {
|
||||||
if (!summaryPanel || !batchId) {
|
if (!summaryPanel || !batchId) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
var response;
|
var response;
|
||||||
try {
|
try {
|
||||||
response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId), {
|
response = await fetch(statusUrlForWorkflow(workflow_type || "file_summary", batchId), {
|
||||||
cache: "no-store",
|
cache: "no-store",
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -652,9 +748,13 @@
|
|||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
var payload = await response.json();
|
var payload = await response.json();
|
||||||
|
if (payload.condition_confirmation) {
|
||||||
|
ensureConditionConfirmationCard(payload.condition_confirmation);
|
||||||
|
}
|
||||||
var card = ensureWorkflowCard({
|
var card = ensureWorkflowCard({
|
||||||
batch_id: payload.batch.id,
|
batch_id: payload.batch.id,
|
||||||
batch_no: payload.batch.batch_no,
|
batch_no: payload.batch.batch_no,
|
||||||
|
workflow_type: payload.batch.workflow_type || workflow_type || "file_summary",
|
||||||
});
|
});
|
||||||
if (!card) {
|
if (!card) {
|
||||||
return payload.batch.status || "";
|
return payload.batch.status || "";
|
||||||
@@ -673,6 +773,17 @@
|
|||||||
} else if (batchError) {
|
} else if (batchError) {
|
||||||
batchError.remove();
|
batchError.remove();
|
||||||
}
|
}
|
||||||
|
var riskSummary = card.querySelector(".workflow-risk-summary");
|
||||||
|
if (payload.batch.risk_summary_text) {
|
||||||
|
if (!riskSummary) {
|
||||||
|
riskSummary = document.createElement("p");
|
||||||
|
riskSummary.className = "workflow-risk-summary";
|
||||||
|
card.insertBefore(riskSummary, card.querySelector("ol"));
|
||||||
|
}
|
||||||
|
riskSummary.textContent = payload.batch.risk_summary_text;
|
||||||
|
} else if (riskSummary) {
|
||||||
|
riskSummary.remove();
|
||||||
|
}
|
||||||
var list = card.querySelector("ol");
|
var list = card.querySelector("ol");
|
||||||
list.innerHTML = "";
|
list.innerHTML = "";
|
||||||
(payload.nodes || []).forEach(function (node) {
|
(payload.nodes || []).forEach(function (node) {
|
||||||
@@ -724,29 +835,37 @@
|
|||||||
return status === "success" || status === "failed";
|
return status === "success" || status === "failed";
|
||||||
}
|
}
|
||||||
|
|
||||||
function stopWorkflowPolling(batchId) {
|
function workflowTimerKey(batchId, workflow_type) {
|
||||||
if (!workflowPollingTimers[batchId]) {
|
return (workflow_type || "file_summary") + ":" + batchId;
|
||||||
return;
|
|
||||||
}
|
|
||||||
window.clearInterval(workflowPollingTimers[batchId]);
|
|
||||||
delete workflowPollingTimers[batchId];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function startWorkflowPolling(batchId) {
|
function stopWorkflowPolling(batchId, workflow_type) {
|
||||||
if (!batchId || workflowPollingTimers[batchId]) {
|
var key = workflowTimerKey(batchId, workflow_type);
|
||||||
|
if (!workflowPollingTimers[key]) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
workflowPollingTimers[batchId] = window.setInterval(async function () {
|
window.clearInterval(workflowPollingTimers[key]);
|
||||||
var status = await refreshWorkflowCard(batchId);
|
delete workflowPollingTimers[key];
|
||||||
|
}
|
||||||
|
|
||||||
|
function startWorkflowPolling(batchId, workflow_type) {
|
||||||
|
var card = workflowCardList ? workflowCardList.querySelector('[data-batch-id="' + batchId + '"]') : null;
|
||||||
|
workflow_type = workflow_type || (card ? card.getAttribute("data-workflow-type") || "file_summary" : "file_summary");
|
||||||
|
var key = workflowTimerKey(batchId, workflow_type);
|
||||||
|
if (!batchId || workflowPollingTimers[key]) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
workflowPollingTimers[key] = window.setInterval(async function () {
|
||||||
|
var status = await refreshWorkflowCard(batchId, workflow_type);
|
||||||
if (isWorkflowTerminalStatus(status)) {
|
if (isWorkflowTerminalStatus(status)) {
|
||||||
refreshConversationMessages();
|
refreshConversationMessages();
|
||||||
stopWorkflowPolling(batchId);
|
stopWorkflowPolling(batchId, workflow_type);
|
||||||
}
|
}
|
||||||
}, WORKFLOW_POLL_INTERVAL_MS);
|
}, WORKFLOW_POLL_INTERVAL_MS);
|
||||||
refreshWorkflowCard(batchId).then(function (status) {
|
refreshWorkflowCard(batchId, workflow_type).then(function (status) {
|
||||||
if (isWorkflowTerminalStatus(status)) {
|
if (isWorkflowTerminalStatus(status)) {
|
||||||
refreshConversationMessages();
|
refreshConversationMessages();
|
||||||
stopWorkflowPolling(batchId);
|
stopWorkflowPolling(batchId, workflow_type);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -757,14 +876,91 @@
|
|||||||
}
|
}
|
||||||
workflowCardList.querySelectorAll(".workflow-card").forEach(function (card) {
|
workflowCardList.querySelectorAll(".workflow-card").forEach(function (card) {
|
||||||
var batchId = card.getAttribute("data-batch-id");
|
var batchId = card.getAttribute("data-batch-id");
|
||||||
|
var workflow_type = card.getAttribute("data-workflow-type") || "file_summary";
|
||||||
var status = card.querySelector(".workflow-status");
|
var status = card.querySelector(".workflow-status");
|
||||||
var statusText = status ? status.textContent.trim() : "";
|
var statusText = status ? status.textContent.trim() : "";
|
||||||
if (!isWorkflowTerminalStatus(statusText)) {
|
if (!isWorkflowTerminalStatus(statusText)) {
|
||||||
startWorkflowPolling(batchId);
|
startWorkflowPolling(batchId, workflow_type);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function bindConditionConfirmForms() {
|
||||||
|
document.querySelectorAll("[data-condition-confirm-form]").forEach(function (form) {
|
||||||
|
if (form.dataset.bound === "true") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
form.dataset.bound = "true";
|
||||||
|
form.addEventListener("submit", async function (event) {
|
||||||
|
event.preventDefault();
|
||||||
|
var batchId = form.getAttribute("data-batch-id");
|
||||||
|
var status = form.querySelector("[data-condition-confirm-status]");
|
||||||
|
var submitButton = form.querySelector('button[type="submit"]');
|
||||||
|
var formData = new FormData(form);
|
||||||
|
var conditions = {};
|
||||||
|
formData.forEach(function (value, key) {
|
||||||
|
if (key !== "csrfmiddlewaretoken") {
|
||||||
|
conditions[key] = value;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (submitButton) {
|
||||||
|
submitButton.disabled = true;
|
||||||
|
}
|
||||||
|
if (status) {
|
||||||
|
status.textContent = "正在恢复法规核查...";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
var response = await fetch(form.getAttribute("data-confirm-url"), {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"X-CSRFToken": formData.get("csrfmiddlewaretoken"),
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ conditions: conditions }),
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("确认失败。");
|
||||||
|
}
|
||||||
|
if (status) {
|
||||||
|
status.textContent = "已确认,工作流继续执行。";
|
||||||
|
}
|
||||||
|
form.classList.add("confirmed");
|
||||||
|
startWorkflowPolling(batchId, "regulatory_review");
|
||||||
|
await refreshWorkflowCard(batchId, "regulatory_review");
|
||||||
|
} catch (error) {
|
||||||
|
if (status) {
|
||||||
|
status.textContent = "确认失败,请稍后重试。";
|
||||||
|
}
|
||||||
|
if (submitButton) {
|
||||||
|
submitButton.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function bindRectificationActionButtons() {
|
||||||
|
document.querySelectorAll("[data-rectification-action]").forEach(function (button) {
|
||||||
|
if (button.dataset.bound === "true") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
button.dataset.bound = "true";
|
||||||
|
button.addEventListener("click", function () {
|
||||||
|
if (!promptInput) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var action = button.getAttribute("data-rectification-action");
|
||||||
|
var batchNo = button.getAttribute("data-batch-no") || "";
|
||||||
|
if (action === "full-review") {
|
||||||
|
promptInput.value = "请基于新的文件汇总批次,对法规核查批次 " + batchNo + " 发起整包复核,并先确认使用哪个补充批次。";
|
||||||
|
} else {
|
||||||
|
promptInput.value = "请对法规核查批次 " + batchNo + " 的缺失项发起复核,并先确认 issue_ids 和补充文件汇总批次。";
|
||||||
|
}
|
||||||
|
promptInput.focus();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
async function streamChat(event) {
|
async function streamChat(event) {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
if (!composer || !promptInput || !sendButton || !chatStage) {
|
if (!composer || !promptInput || !sendButton || !chatStage) {
|
||||||
@@ -872,7 +1068,7 @@
|
|||||||
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
|
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
|
||||||
} else if (eventName === "workflow_started") {
|
} else if (eventName === "workflow_started") {
|
||||||
ensureWorkflowCard(payload);
|
ensureWorkflowCard(payload);
|
||||||
startWorkflowPolling(payload.batch_id);
|
startWorkflowPolling(payload.batch_id, payload.workflow_type);
|
||||||
} else if (eventName === "done") {
|
} else if (eventName === "done") {
|
||||||
if (payload.assistant_message_id) {
|
if (payload.assistant_message_id) {
|
||||||
assistantMessage.article.id = "message-" + payload.assistant_message_id;
|
assistantMessage.article.id = "message-" + payload.assistant_message_id;
|
||||||
@@ -924,6 +1120,8 @@
|
|||||||
renderExistingAssistantMessages();
|
renderExistingAssistantMessages();
|
||||||
refreshWorkflowBatchCarousel(0);
|
refreshWorkflowBatchCarousel(0);
|
||||||
bindWorkflowBatchCarouselControls();
|
bindWorkflowBatchCarouselControls();
|
||||||
|
bindConditionConfirmForms();
|
||||||
|
bindRectificationActionButtons();
|
||||||
refreshRunningWorkflowCards();
|
refreshRunningWorkflowCards();
|
||||||
|
|
||||||
if (chatScroll) {
|
if (chatScroll) {
|
||||||
|
|||||||
@@ -124,6 +124,44 @@
|
|||||||
</div>
|
</div>
|
||||||
</article>
|
</article>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
{% if condition_confirmation %}
|
||||||
|
<article
|
||||||
|
class="message assistant"
|
||||||
|
id="condition-confirmation-{{ condition_confirmation.id }}"
|
||||||
|
data-condition-confirmation-card
|
||||||
|
data-node-label="AI 适用条件确认"
|
||||||
|
>
|
||||||
|
<div class="message-avatar">AI</div>
|
||||||
|
<div class="message-bubble">
|
||||||
|
<form
|
||||||
|
class="condition-confirm-form"
|
||||||
|
data-condition-confirm-form
|
||||||
|
data-batch-id="{{ condition_confirmation.id }}"
|
||||||
|
data-confirm-url="/api/review-agent/regulatory-review/{{ condition_confirmation.id }}/conditions/"
|
||||||
|
>
|
||||||
|
{% csrf_token %}
|
||||||
|
<strong>适用条件确认</strong>
|
||||||
|
<p>请确认 {{ condition_confirmation.batch_no }} 的产品类别、注册类型和临床评价路径,确认后我会继续法规核查。</p>
|
||||||
|
{% for field, config in condition_confirmation.candidates.items %}
|
||||||
|
<label>
|
||||||
|
<span>{{ config.label }}</span>
|
||||||
|
{% if config.input_type == "select" %}
|
||||||
|
<select name="{{ field }}">
|
||||||
|
{% for option in config.options %}
|
||||||
|
<option value="{{ option }}"{% if option == config.suggested %} selected{% endif %}>{{ option }}</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
{% else %}
|
||||||
|
<input type="text" name="{{ field }}" value="{{ config.suggested|default:'' }}">
|
||||||
|
{% endif %}
|
||||||
|
</label>
|
||||||
|
{% endfor %}
|
||||||
|
<button type="submit">确认并继续</button>
|
||||||
|
<p class="condition-confirm-status" data-condition-confirm-status></p>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
{% endif %}
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="empty-state">
|
<div class="empty-state">
|
||||||
<p class="eyebrow">审核智能体</p>
|
<p class="eyebrow">审核智能体</p>
|
||||||
@@ -177,6 +215,7 @@
|
|||||||
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
|
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
|
||||||
data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/"
|
data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/"
|
||||||
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
|
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
|
||||||
|
data-regulatory-status-url-template="/api/review-agent/regulatory-review/__batch_id__/status/"
|
||||||
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
|
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
|
||||||
>
|
>
|
||||||
<section class="summary-section upload-section">
|
<section class="summary-section upload-section">
|
||||||
@@ -221,10 +260,11 @@
|
|||||||
<h3>工作流</h3>
|
<h3>工作流</h3>
|
||||||
</div>
|
</div>
|
||||||
<div class="workflow-card-list workflow-batch-carousel" id="workflowCardList" data-active-index="0">
|
<div class="workflow-card-list workflow-batch-carousel" id="workflowCardList" data-active-index="0">
|
||||||
{% for batch in summary_batches %}
|
{% for batch in workflow_cards %}
|
||||||
<article
|
<article
|
||||||
class="workflow-card{% if forloop.first %} active{% endif %}"
|
class="workflow-card{% if forloop.first %} active{% endif %}"
|
||||||
data-batch-id="{{ batch.pk }}"
|
data-batch-id="{{ batch.id }}"
|
||||||
|
data-workflow-type="{{ batch.workflow_type }}"
|
||||||
data-workflow-index="{{ forloop.counter0 }}"
|
data-workflow-index="{{ forloop.counter0 }}"
|
||||||
aria-hidden="{% if forloop.first %}false{% else %}true{% endif %}"
|
aria-hidden="{% if forloop.first %}false{% else %}true{% endif %}"
|
||||||
>
|
>
|
||||||
@@ -232,11 +272,31 @@
|
|||||||
<strong>{{ batch.batch_no }}</strong>
|
<strong>{{ batch.batch_no }}</strong>
|
||||||
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
|
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
|
||||||
</header>
|
</header>
|
||||||
|
{% if batch.risk_label %}
|
||||||
|
<p class="workflow-risk-summary">{{ batch.risk_label }}</p>
|
||||||
|
{% endif %}
|
||||||
|
{% if batch.workflow_type == "regulatory_review" %}
|
||||||
|
<div class="workflow-card-actions">
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
data-rectification-action="full-review"
|
||||||
|
data-batch-no="{{ batch.batch_no }}"
|
||||||
|
>整包复核</button>
|
||||||
|
<button
|
||||||
|
type="button"
|
||||||
|
data-rectification-action="issue-review"
|
||||||
|
data-batch-no="{{ batch.batch_no }}"
|
||||||
|
>缺失项复核</button>
|
||||||
|
</div>
|
||||||
|
<p class="workflow-record-summary">
|
||||||
|
通知 {{ batch.notification_count|default:0 }} · 复核记录 {{ batch.review_record_count|default:0 }}
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
{% if batch.error_message %}
|
{% if batch.error_message %}
|
||||||
<p class="workflow-error">{{ batch.error_message }}</p>
|
<p class="workflow-error">{{ batch.error_message }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<ol>
|
<ol>
|
||||||
{% for node in batch.node_runs.all %}
|
{% for node in batch.nodes %}
|
||||||
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
|
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
|
||||||
<div>
|
<div>
|
||||||
<span>{{ node.node_name }}</span>
|
<span>{{ node.node_name }}</span>
|
||||||
@@ -250,11 +310,11 @@
|
|||||||
{% empty %}
|
{% empty %}
|
||||||
<div class="panel-empty">暂无工作流</div>
|
<div class="panel-empty">暂无工作流</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% if summary_batches %}
|
{% if workflow_cards %}
|
||||||
<div class="workflow-batch-controls">
|
<div class="workflow-batch-controls">
|
||||||
<button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">‹</button>
|
<button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">‹</button>
|
||||||
<div class="workflow-batch-dots" aria-label="工作流批次">
|
<div class="workflow-batch-dots" aria-label="工作流批次">
|
||||||
{% for batch in summary_batches %}
|
{% for batch in workflow_cards %}
|
||||||
<button
|
<button
|
||||||
type="button"
|
type="button"
|
||||||
class="workflow-batch-dot{% if forloop.first %} active{% endif %}"
|
class="workflow-batch-dot{% if forloop.first %} active{% endif %}"
|
||||||
|
|||||||
8
tests/fixtures/regulatory/attachment4_outline.json
vendored
Normal file
8
tests/fixtures/regulatory/attachment4_outline.json
vendored
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
[
|
||||||
|
{"code": "1", "title": "监管信息", "children": ["章节目录", "申请表", "术语/缩写词列表", "产品列表", "关联文件", "申报前与监管机构的联系情况和沟通记录", "符合性声明"]},
|
||||||
|
{"code": "2", "title": "综述资料", "children": ["章节目录", "概述", "产品描述", "预期用途", "申报产品上市历史", "其他需说明的内容"]},
|
||||||
|
{"code": "3", "title": "非临床资料", "children": ["章节目录", "产品风险管理资料", "体外诊断试剂安全和性能基本原则清单", "产品技术要求及检验报告", "分析性能研究", "稳定性研究", "阳性判断值或参考区间研究", "其他资料"]},
|
||||||
|
{"code": "4", "title": "临床评价资料", "children": ["章节目录", "临床评价资料"]},
|
||||||
|
{"code": "5", "title": "产品说明书和标签样稿", "children": ["章节目录", "产品说明书", "标签样稿", "其他资料"]},
|
||||||
|
{"code": "6", "title": "质量管理体系文件", "children": ["综述", "章节目录", "生产制造信息", "质量管理体系程序", "管理职责程序", "资源管理程序", "产品实现程序", "质量管理体系的测量/分析和改进程序", "其他质量体系程序信息", "质量管理体系核查文件"]}
|
||||||
|
]
|
||||||
@@ -187,6 +187,16 @@ def test_frontend_refreshes_generated_workflow_messages():
|
|||||||
assert "data-message-url-template" in script
|
assert "data-message-url-template" in script
|
||||||
|
|
||||||
|
|
||||||
|
def test_frontend_only_scrolls_after_appending_new_messages():
|
||||||
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
assert "return false;" in script
|
||||||
|
assert "return true;" in script
|
||||||
|
assert "var appendedCount = 0;" in script
|
||||||
|
assert "if (appendConversationMessage(message))" in script
|
||||||
|
assert "if (appendedCount > 0)" in script
|
||||||
|
|
||||||
|
|
||||||
def test_frontend_can_replace_partial_stream_content():
|
def test_frontend_can_replace_partial_stream_content():
|
||||||
script = open("static/js/app.js", encoding="utf-8").read()
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
|||||||
31
tests/test_logging_filters.py
Normal file
31
tests/test_logging_filters.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from review_agent.logging_filters import SuppressWorkflowStatusPollFilter
|
||||||
|
|
||||||
|
|
||||||
|
def test_suppress_workflow_status_poll_filter_hides_status_poll_requests():
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="django.server",
|
||||||
|
level=logging.INFO,
|
||||||
|
pathname="",
|
||||||
|
lineno=1,
|
||||||
|
msg='"GET /api/review-agent/regulatory-review/7/status/ HTTP/1.1" 200 1660',
|
||||||
|
args=(),
|
||||||
|
exc_info=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert SuppressWorkflowStatusPollFilter().filter(record) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_suppress_workflow_status_poll_filter_keeps_other_requests():
|
||||||
|
record = logging.LogRecord(
|
||||||
|
name="django.server",
|
||||||
|
level=logging.INFO,
|
||||||
|
pathname="",
|
||||||
|
lineno=1,
|
||||||
|
msg='"POST /api/review-agent/regulatory-review/7/conditions/ HTTP/1.1" 200 256',
|
||||||
|
args=(),
|
||||||
|
exc_info=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert SuppressWorkflowStatusPollFilter().filter(record) is True
|
||||||
72
tests/test_regulatory_completeness.py
Normal file
72
tests/test_regulatory_completeness.py
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||||
|
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_completeness_check_matches_existing_files_and_reports_missing(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-CHECK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
file_index=1,
|
||||||
|
file_name="产品技术要求.docx",
|
||||||
|
file_type="docx",
|
||||||
|
relative_path="产品技术要求.docx",
|
||||||
|
storage_path="x/product.docx",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
file_index=2,
|
||||||
|
file_name="说明书.docx",
|
||||||
|
file_type="docx",
|
||||||
|
relative_path="说明书.docx",
|
||||||
|
storage_path="x/ifu.docx",
|
||||||
|
)
|
||||||
|
|
||||||
|
findings = run_completeness_check(batch, load_rule_file())
|
||||||
|
|
||||||
|
titles = [finding.title for finding in findings]
|
||||||
|
assert "缺少3.4注册检验报告" in titles
|
||||||
|
assert "缺少产品技术要求" not in titles
|
||||||
|
missing = next(finding for finding in findings if finding.rule_code == "registration_test_report")
|
||||||
|
assert missing.severity == "blocking"
|
||||||
|
assert missing.category == "completeness"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completeness_check_matches_attachment4_directory_names(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-A4",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="1. 监管信息 / 1.2 申请表",
|
||||||
|
file_name="注册申请表.pdf",
|
||||||
|
file_type="pdf",
|
||||||
|
relative_path="1.监管信息/1.2申请表/注册申请表.pdf",
|
||||||
|
storage_path="x/app.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
findings = run_completeness_check(batch, load_rule_file())
|
||||||
|
|
||||||
|
assert not any(finding.rule_code == "attachment4_1_2_application_form" for finding in findings)
|
||||||
|
missing_qms = next(finding for finding in findings if finding.rule_code == "attachment4_6_quality_system")
|
||||||
|
assert missing_qms.title == "缺少6质量管理体系文件"
|
||||||
|
assert missing_qms.severity == "high"
|
||||||
|
assert missing_qms.evidence["searched_fields"] == ["file_name", "relative_path", "directory_level"]
|
||||||
306
tests/test_regulatory_condition.py
Normal file
306
tests/test_regulatory_condition.py
Normal file
@@ -0,0 +1,306 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileSummaryBatch,
|
||||||
|
FileSummaryItem,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
WorkflowEvent,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.info_extract import detect_regulatory_condition_candidates
|
||||||
|
from review_agent.regulatory_review.workflow import (
|
||||||
|
create_regulatory_review_batch,
|
||||||
|
start_regulatory_review_workflow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_candidates_from_summary_items(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="甲胎蛋白检测试剂盒",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="临床评价资料",
|
||||||
|
file_name="免临床评价资料.docx",
|
||||||
|
file_type="docx",
|
||||||
|
relative_path="4.临床评价资料/免临床评价资料.docx",
|
||||||
|
storage_path="missing.docx",
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
|
||||||
|
assert candidates["registration_type"]["suggested"] == "首次注册"
|
||||||
|
assert candidates["clinical_evaluation_path"]["suggested"] == "免临床"
|
||||||
|
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_prefers_attachment_fields_over_chapter_title(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒\n预期用途:用于人血清中甲胎蛋白检测\n注册类型:首次注册\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="1. 监管信息 / 1.2 申请表",
|
||||||
|
file_name="申请表.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="1.监管信息/申请表.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_name"]["suggested"] == "甲胎蛋白检测试剂盒"
|
||||||
|
assert candidates["model_spec"]["suggested"] == "20人份/盒"
|
||||||
|
assert candidates["intended_use"]["suggested"] == "用于人血清中甲胎蛋白检测"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_keeps_wrapped_product_name(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
|
||||||
|
"(荧光PCR法)\n"
|
||||||
|
"型号规格:24人份/盒\n"
|
||||||
|
"预期用途:用于呼吸道合胞病毒、肺炎支原体核酸检测\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="1. 监管信息 / 1.2 申请表",
|
||||||
|
file_name="申请表.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="1.监管信息/申请表.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)"
|
||||||
|
assert candidates["model_spec"]["suggested"] == "24人份/盒"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_uses_llm_review_for_better_product_name(
|
||||||
|
monkeypatch, settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
|
||||||
|
"型号规格:24人份/盒\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="1. 监管信息 / 1.2 申请表",
|
||||||
|
file_name="申请表.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="1.监管信息/申请表.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.regulatory_review.services.llm_review.generate_completion",
|
||||||
|
lambda messages, temperature=0.0: json.dumps(
|
||||||
|
{"fields": {"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)"}},
|
||||||
|
ensure_ascii=False,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)"
|
||||||
|
assert candidates["product_name"]["source"] == "llm"
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_regulatory_condition_infers_fields_from_unlabeled_attachment_text(
|
||||||
|
settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
standard_list = tmp_path / "standard_list.txt"
|
||||||
|
standard_list.write_text(
|
||||||
|
"国家药品监督管理局:\n"
|
||||||
|
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)产品注册。\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
product_list = tmp_path / "product_list.txt"
|
||||||
|
product_list.write_text(
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
|
||||||
|
"(荧光PCR法)\n"
|
||||||
|
"产品的包装规格\n"
|
||||||
|
"24人份/盒、48人份/盒\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="第1章 监管信息",
|
||||||
|
file_name="符合标准的清单.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="第1章 监管信息/符合标准的清单.txt",
|
||||||
|
storage_path=str(standard_list),
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=2,
|
||||||
|
directory_level="第1章 监管信息",
|
||||||
|
file_name="产品列表.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="第1章 监管信息/产品列表.txt",
|
||||||
|
storage_path=str(product_list),
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = detect_regulatory_condition_candidates(summary)
|
||||||
|
|
||||||
|
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
|
||||||
|
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)"
|
||||||
|
assert candidates["product_name"]["source"] == "inferred"
|
||||||
|
assert candidates["model_spec"]["suggested"] == "24人份/盒、48人份/盒"
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_pauses_before_rule_scope_until_conditions_confirmed(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="甲胎蛋白检测试剂盒",
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
condition_node = WorkflowNodeRun.objects.get(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code="condition_confirm",
|
||||||
|
)
|
||||||
|
rule_scope_node = WorkflowNodeRun.objects.get(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code="rule_scope",
|
||||||
|
)
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.WAITING_USER
|
||||||
|
assert condition_node.status == WorkflowNodeRun.Status.WAITING_USER
|
||||||
|
assert rule_scope_node.status == WorkflowNodeRun.Status.PENDING
|
||||||
|
assert batch.condition_json["candidates"]["product_category"]["suggested"] == "体外诊断试剂"
|
||||||
|
assert WorkflowEvent.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
event_type="waiting_user",
|
||||||
|
).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_confirm_conditions_endpoint_resumes_workflow(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-COND",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="甲胎蛋白检测试剂盒",
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
reverse("regulatory_review_confirm_conditions", args=[batch.pk]),
|
||||||
|
data=json.dumps(
|
||||||
|
{
|
||||||
|
"conditions": {
|
||||||
|
"product_category": "体外诊断试剂",
|
||||||
|
"registration_type": "首次注册",
|
||||||
|
"clinical_evaluation_path": "免临床",
|
||||||
|
"product_name": "甲胎蛋白检测试剂盒",
|
||||||
|
"model_spec": "卡型",
|
||||||
|
"intended_use": "用于甲胎蛋白检测",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
),
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["batch"]["status"] == RegulatoryReviewBatch.Status.SUCCESS
|
||||||
|
assert batch.condition_json["confirmed"] is True
|
||||||
|
assert batch.condition_json["confirmed_conditions"]["model_spec"] == "卡型"
|
||||||
|
assert WorkflowNodeRun.objects.get(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code="condition_confirm",
|
||||||
|
).status == WorkflowNodeRun.Status.SUCCESS
|
||||||
27
tests/test_regulatory_consistency.py
Normal file
27
tests/test_regulatory_consistency.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from review_agent.regulatory_review.services.consistency_check import run_consistency_check
|
||||||
|
|
||||||
|
|
||||||
|
def test_consistency_check_reports_product_name_mismatch():
|
||||||
|
document_texts = {
|
||||||
|
"说明书.docx": "产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒\n预期用途:定量检测AFP",
|
||||||
|
"技术要求.docx": "产品名称:乙肝表面抗原检测试剂盒\n型号规格:20人份/盒\n预期用途:定量检测AFP",
|
||||||
|
}
|
||||||
|
|
||||||
|
findings = run_consistency_check(document_texts)
|
||||||
|
|
||||||
|
assert len(findings) == 1
|
||||||
|
assert findings[0].category == "consistency"
|
||||||
|
assert "产品名称" in findings[0].title
|
||||||
|
|
||||||
|
|
||||||
|
def test_consistency_check_reports_registration_scope_fields():
|
||||||
|
document_texts = {
|
||||||
|
"申请表.docx": "管理类别:第二类\n分类编码:6840\n注册类型:首次注册\n临床评价路径:免临床",
|
||||||
|
"综述资料.docx": "管理类别:第三类\n分类编码:6840\n注册类型:首次注册\n临床评价路径:临床试验",
|
||||||
|
}
|
||||||
|
|
||||||
|
findings = run_consistency_check(document_texts)
|
||||||
|
titles = [finding.title for finding in findings]
|
||||||
|
|
||||||
|
assert "管理类别在不同文件中不一致" in titles
|
||||||
|
assert "临床评价路径在不同文件中不一致" in titles
|
||||||
49
tests/test_regulatory_export.py
Normal file
49
tests/test_regulatory_export.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileSummaryBatch,
|
||||||
|
RegulatoryIssue,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.export import export_review_results
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_export_review_results_creates_markdown_excel_and_json(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-EXPORT",
|
||||||
|
risk_summary={"blocking": 1},
|
||||||
|
)
|
||||||
|
RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="registration_test_report",
|
||||||
|
category=RegulatoryIssue.Category.COMPLETENESS,
|
||||||
|
severity=RegulatoryIssue.Severity.BLOCKING,
|
||||||
|
title="缺少注册检验报告",
|
||||||
|
suggestion="请补充注册检验报告并复核。",
|
||||||
|
)
|
||||||
|
|
||||||
|
exports = export_review_results(batch)
|
||||||
|
|
||||||
|
assert {export.export_type for export in exports} == {
|
||||||
|
ExportedSummaryFile.ExportType.MARKDOWN,
|
||||||
|
ExportedSummaryFile.ExportType.EXCEL,
|
||||||
|
ExportedSummaryFile.ExportType.JSON,
|
||||||
|
}
|
||||||
|
json_export = next(export for export in exports if export.export_type == ExportedSummaryFile.ExportType.JSON)
|
||||||
|
payload = json.loads(open(json_export.storage_path, encoding="utf-8").read())
|
||||||
|
assert payload["batch_no"] == "RR-EXPORT"
|
||||||
|
assert payload["issues"][0]["title"] == "缺少注册检验报告"
|
||||||
232
tests/test_regulatory_frontend.py
Normal file
232
tests/test_regulatory_frontend.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
import pytest
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileSummaryBatch,
|
||||||
|
FileSummaryItem,
|
||||||
|
RegulatoryArtifact,
|
||||||
|
RegulatoryNotificationRecord,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_workspace_renders_regulatory_workflow_card(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
regulatory = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-CARD",
|
||||||
|
status=RegulatoryReviewBatch.Status.SUCCESS,
|
||||||
|
risk_summary={"blocking": 1, "high": 1},
|
||||||
|
)
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=regulatory.pk,
|
||||||
|
node_group="regulatory_review",
|
||||||
|
node_code="risk_assess",
|
||||||
|
node_name="风险评估",
|
||||||
|
status=WorkflowNodeRun.Status.SUCCESS,
|
||||||
|
progress=100,
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||||
|
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
assert "RR-CARD" in content
|
||||||
|
assert 'data-workflow-type="regulatory_review"' in content
|
||||||
|
assert "阻断项 1" in content
|
||||||
|
assert "风险评估" in content
|
||||||
|
assert "data-regulatory-status-url-template" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_workspace_renders_condition_confirmation_form(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
regulatory = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-WAIT",
|
||||||
|
status=RegulatoryReviewBatch.Status.WAITING_USER,
|
||||||
|
condition_json={
|
||||||
|
"confirmed": False,
|
||||||
|
"candidates": {
|
||||||
|
"product_category": {
|
||||||
|
"label": "产品类别",
|
||||||
|
"input_type": "select",
|
||||||
|
"options": ["体外诊断试剂", "医疗器械", "其他"],
|
||||||
|
"suggested": "体外诊断试剂",
|
||||||
|
},
|
||||||
|
"product_name": {
|
||||||
|
"label": "产品名称",
|
||||||
|
"input_type": "text",
|
||||||
|
"suggested": "甲胎蛋白检测试剂盒",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=regulatory.pk,
|
||||||
|
node_group="condition_confirm",
|
||||||
|
node_code="condition_confirm",
|
||||||
|
node_name="适用条件确认",
|
||||||
|
status=WorkflowNodeRun.Status.WAITING_USER,
|
||||||
|
progress=50,
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||||
|
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
assert "适用条件确认" in content
|
||||||
|
assert "data-condition-confirm-form" in content
|
||||||
|
assert "体外诊断试剂" in content
|
||||||
|
assert "甲胎蛋白检测试剂盒" in content
|
||||||
|
form_index = content.index("data-condition-confirm-form")
|
||||||
|
summary_index = content.index('id="summaryPanel"')
|
||||||
|
assert form_index < summary_index
|
||||||
|
assert "data-condition-confirm-form" not in content[summary_index:]
|
||||||
|
|
||||||
|
|
||||||
|
def test_workspace_refreshes_incomplete_condition_confirmation_candidates(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)产品注册。",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="第1章 监管信息",
|
||||||
|
file_name="符合标准的清单.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="第1章 监管信息/符合标准的清单.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-WAIT-EMPTY",
|
||||||
|
status=RegulatoryReviewBatch.Status.WAITING_USER,
|
||||||
|
condition_json={
|
||||||
|
"confirmed": False,
|
||||||
|
"candidates": {
|
||||||
|
"product_category": {"label": "产品类别", "input_type": "select", "options": ["其他"], "suggested": "其他"},
|
||||||
|
"product_name": {"label": "产品名称", "input_type": "text", "suggested": ""},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||||
|
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
assert "体外诊断试剂" in content
|
||||||
|
assert "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_workspace_renders_rectification_actions_and_summaries(client, tmp_path, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
regulatory = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-RECTIFY",
|
||||||
|
status=RegulatoryReviewBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
record_path = tmp_path / "review_record.json"
|
||||||
|
record_path.write_text('{"items":[{"status":"review_passed"}]}', encoding="utf-8")
|
||||||
|
RegulatoryArtifact.objects.create(
|
||||||
|
batch=regulatory,
|
||||||
|
artifact_type=RegulatoryArtifact.ArtifactType.JSON,
|
||||||
|
name="review_record.json",
|
||||||
|
storage_path=str(record_path),
|
||||||
|
metadata={"artifact": "review_record"},
|
||||||
|
)
|
||||||
|
RegulatoryNotificationRecord.objects.create(
|
||||||
|
batch=regulatory,
|
||||||
|
channel=RegulatoryNotificationRecord.Channel.MOCK,
|
||||||
|
target="法规整改负责人",
|
||||||
|
status=RegulatoryNotificationRecord.Status.SENT,
|
||||||
|
payload={"title": "缺少申请表"},
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||||
|
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
assert "data-rectification-action=\"full-review\"" in content
|
||||||
|
assert "data-rectification-action=\"issue-review\"" in content
|
||||||
|
assert "通知 1" in content
|
||||||
|
assert "复核记录 1" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_frontend_selects_status_url_by_workflow_type():
|
||||||
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
assert "workflow_type" in script
|
||||||
|
assert "data-regulatory-status-url-template" in script
|
||||||
|
assert "statusUrlForWorkflow" in script
|
||||||
|
assert "bindConditionConfirmForms" in script
|
||||||
|
assert "data-condition-confirm-form" in script
|
||||||
|
assert "ensureConditionConfirmationCard" in script
|
||||||
|
assert "condition_confirmation" in script
|
||||||
|
assert "bindRectificationActionButtons" in script
|
||||||
|
assert "data-rectification-action" in script
|
||||||
|
|
||||||
|
|
||||||
|
def test_frontend_polls_regulatory_workflow_with_explicit_workflow_type():
|
||||||
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
assert "function startWorkflowPolling(batchId, workflow_type)" in script
|
||||||
|
assert "startWorkflowPolling(payload.batch_id, payload.workflow_type)" in script
|
||||||
|
assert 'startWorkflowPolling(batchId, "regulatory_review")' in script
|
||||||
|
assert 'workflow_type || (card ? card.getAttribute("data-workflow-type") || "file_summary" : "file_summary")' in script
|
||||||
|
|
||||||
|
|
||||||
|
def test_frontend_keeps_single_condition_confirmation_prompt():
|
||||||
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
assert "data-condition-confirmation-card" in script
|
||||||
|
assert "removeStaleConditionConfirmationCards" in script
|
||||||
|
assert '[data-condition-confirmation-card]' in script
|
||||||
111
tests/test_regulatory_llm_review.py
Normal file
111
tests/test_regulatory_llm_review.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.services.llm_review import review_condition_fields, review_workflow_payload
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_condition_fields_selects_more_complete_llm_product_name():
|
||||||
|
def completion(messages, temperature=0.0):
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"fields": {
|
||||||
|
"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)",
|
||||||
|
"型号规格": "24人份/盒",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = review_condition_fields(
|
||||||
|
text="产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n(荧光PCR法)\n型号规格:24人份/盒",
|
||||||
|
rule_fields={"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒", "型号规格": "24人份/盒"},
|
||||||
|
file_context="申请表.txt",
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["selected_fields"]["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)"
|
||||||
|
assert result["selected_sources"]["产品名称"] == "llm"
|
||||||
|
assert result["selected_sources"]["型号规格"] == "rule"
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_condition_fields_falls_back_when_llm_returns_chapter_title():
|
||||||
|
def completion(messages, temperature=0.0):
|
||||||
|
return json.dumps({"fields": {"产品名称": "第1章 监管信息"}}, ensure_ascii=False)
|
||||||
|
|
||||||
|
result = review_condition_fields(
|
||||||
|
text="产品名称:甲胎蛋白检测试剂盒",
|
||||||
|
rule_fields={"产品名称": "甲胎蛋白检测试剂盒"},
|
||||||
|
file_context="申请表.txt",
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["selected_fields"]["产品名称"] == "甲胎蛋白检测试剂盒"
|
||||||
|
assert result["selected_sources"]["产品名称"] == "rule"
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_condition_fields_rejects_garbled_llm_product_name():
|
||||||
|
def completion(messages, temperature=0.0):
|
||||||
|
return json.dumps({"fields": {"产品名称": "呼吸道合胞病毒、 <20>肺炎支原体核酸检测试剂盒 (荧光PCR法)"}}, ensure_ascii=False)
|
||||||
|
|
||||||
|
result = review_condition_fields(
|
||||||
|
text="呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)",
|
||||||
|
rule_fields={"产品名称": "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)"},
|
||||||
|
file_context="产品列表.txt",
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["selected_fields"]["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)"
|
||||||
|
assert result["selected_sources"]["产品名称"] == "rule"
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_workflow_payload_handles_timeout_without_raising():
|
||||||
|
def completion(messages, temperature=0.0):
|
||||||
|
raise TimeoutError("The read operation timed out")
|
||||||
|
|
||||||
|
result = review_workflow_payload(
|
||||||
|
stage="completeness_check",
|
||||||
|
payload={"findings": []},
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["status"] == "failed"
|
||||||
|
assert result["stage"] == "completeness_check"
|
||||||
|
assert "timed out" in result["error_message"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_workflow_payload_retries_timeout_before_success(settings):
|
||||||
|
settings.REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = 0
|
||||||
|
attempts = {"count": 0}
|
||||||
|
|
||||||
|
def completion(messages, temperature=0.0):
|
||||||
|
attempts["count"] += 1
|
||||||
|
if attempts["count"] < 3:
|
||||||
|
raise TimeoutError("The read operation timed out")
|
||||||
|
return json.dumps({"reviewed": True})
|
||||||
|
|
||||||
|
result = review_workflow_payload(
|
||||||
|
stage="completeness_check",
|
||||||
|
payload={"findings": []},
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert attempts["count"] == 3
|
||||||
|
assert result["status"] == "success"
|
||||||
|
assert result["result"]["reviewed"] is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_workflow_payload_passes_configured_timeout(settings):
|
||||||
|
settings.REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = 0
|
||||||
|
settings.REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = 7
|
||||||
|
observed = {}
|
||||||
|
|
||||||
|
def completion(messages, temperature=0.0, timeout=None):
|
||||||
|
observed["timeout"] = timeout
|
||||||
|
return json.dumps({"reviewed": True})
|
||||||
|
|
||||||
|
review_workflow_payload(
|
||||||
|
stage="completeness_check",
|
||||||
|
payload={"findings": []},
|
||||||
|
completion_func=completion,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert observed["timeout"] == 7
|
||||||
137
tests/test_regulatory_models.py
Normal file
137
tests/test_regulatory_models.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileSummaryBatch,
|
||||||
|
Message,
|
||||||
|
RegulatoryArtifact,
|
||||||
|
RegulatoryIssue,
|
||||||
|
RegulatoryNotificationRecord,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
RegulatoryRuleVersion,
|
||||||
|
WorkflowEvent,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_models_store_batch_issue_artifact_and_notification(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="法规核查")
|
||||||
|
trigger = Message.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
role=Message.Role.USER,
|
||||||
|
content="请做NMPA法规核查",
|
||||||
|
)
|
||||||
|
summary_batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-READY",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
rule_version = RegulatoryRuleVersion.objects.create(
|
||||||
|
code="nmpa_ivd_registration_v1",
|
||||||
|
name="NMPA IVD 注册资料 Demo 规则",
|
||||||
|
yaml_path="review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml",
|
||||||
|
yaml_hash="abc123",
|
||||||
|
rag_collection="nmpa_ivd_registration_v1",
|
||||||
|
rag_index_version="idx-1",
|
||||||
|
rag_index_hash="hash-1",
|
||||||
|
status=RegulatoryRuleVersion.Status.ACTIVE,
|
||||||
|
)
|
||||||
|
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
rule_version=rule_version,
|
||||||
|
batch_no="RR-202606070001-abcdef",
|
||||||
|
)
|
||||||
|
issue = RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="registration_test_report",
|
||||||
|
category=RegulatoryIssue.Category.COMPLETENESS,
|
||||||
|
severity=RegulatoryIssue.Severity.BLOCKING,
|
||||||
|
title="缺少注册检验报告",
|
||||||
|
suggestion="请补充注册检验报告并复核。",
|
||||||
|
evidence={"matched_files": []},
|
||||||
|
citations=[{"source": "法规.doc", "text": "注册检验报告"}],
|
||||||
|
)
|
||||||
|
artifact = RegulatoryArtifact.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
artifact_type=RegulatoryArtifact.ArtifactType.JSON,
|
||||||
|
name="结果包",
|
||||||
|
storage_path="media/regulatory_review/result.json",
|
||||||
|
content_hash="hash",
|
||||||
|
)
|
||||||
|
notification = RegulatoryNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
channel=RegulatoryNotificationRecord.Channel.MOCK,
|
||||||
|
target="todo-plan",
|
||||||
|
payload={"issue_id": issue.pk},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.PENDING
|
||||||
|
assert batch.source_summary_batch == summary_batch
|
||||||
|
assert issue.status == RegulatoryIssue.Status.OPEN
|
||||||
|
assert artifact.artifact_type == RegulatoryArtifact.ArtifactType.JSON
|
||||||
|
assert notification.status == RegulatoryNotificationRecord.Status.PENDING
|
||||||
|
|
||||||
|
|
||||||
|
def test_generic_workflow_fields_support_file_summary_and_regulatory_batches(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary_batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-GENERIC",
|
||||||
|
)
|
||||||
|
regulatory_batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
batch_no="RR-GENERIC",
|
||||||
|
)
|
||||||
|
|
||||||
|
file_node = WorkflowNodeRun.objects.create(
|
||||||
|
batch=summary_batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=summary_batch.pk,
|
||||||
|
node_group="file_summary",
|
||||||
|
node_code="inventory",
|
||||||
|
node_name="文件扫描",
|
||||||
|
)
|
||||||
|
regulatory_node = WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=regulatory_batch.pk,
|
||||||
|
node_group="regulatory_review",
|
||||||
|
node_code="prepare",
|
||||||
|
node_name="准备",
|
||||||
|
)
|
||||||
|
event = WorkflowEvent.objects.create(
|
||||||
|
batch=summary_batch,
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=regulatory_batch.pk,
|
||||||
|
conversation=conversation,
|
||||||
|
event_type="workflow_created",
|
||||||
|
payload={"batch_no": regulatory_batch.batch_no},
|
||||||
|
)
|
||||||
|
exported = ExportedSummaryFile.objects.create(
|
||||||
|
batch=summary_batch,
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=regulatory_batch.pk,
|
||||||
|
export_category="result_package",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.JSON,
|
||||||
|
file_name="result.json",
|
||||||
|
storage_path="media/regulatory_review/result.json",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert file_node.batch == summary_batch
|
||||||
|
assert regulatory_node.batch is None
|
||||||
|
assert regulatory_node.workflow_batch_id == regulatory_batch.pk
|
||||||
|
assert event.conversation == conversation
|
||||||
|
assert exported.export_type == ExportedSummaryFile.ExportType.JSON
|
||||||
79
tests/test_regulatory_notification.py
Normal file
79
tests/test_regulatory_notification.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileSummaryBatch,
|
||||||
|
RegulatoryIssue,
|
||||||
|
RegulatoryNotificationRecord,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.export import build_markdown_report, build_result_payload
|
||||||
|
from review_agent.regulatory_review.services.feishu_notifier import create_mock_notifications
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_mock_notifications_for_medium_and_above(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-NOTIFY",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-NOTIFY",
|
||||||
|
)
|
||||||
|
high = RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="attachment4_1_2_application_form",
|
||||||
|
category=RegulatoryIssue.Category.COMPLETENESS,
|
||||||
|
severity=RegulatoryIssue.Severity.HIGH,
|
||||||
|
title="缺少申请表",
|
||||||
|
)
|
||||||
|
RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="info",
|
||||||
|
category=RegulatoryIssue.Category.RAG,
|
||||||
|
severity=RegulatoryIssue.Severity.INFO,
|
||||||
|
title="提示项",
|
||||||
|
)
|
||||||
|
|
||||||
|
records = create_mock_notifications(batch)
|
||||||
|
|
||||||
|
assert len(records) == 1
|
||||||
|
assert records[0].channel == RegulatoryNotificationRecord.Channel.MOCK
|
||||||
|
assert records[0].status == RegulatoryNotificationRecord.Status.SENT
|
||||||
|
assert records[0].payload["issue_id"] == high.pk
|
||||||
|
|
||||||
|
|
||||||
|
def test_notification_records_enter_reports(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-NOTIFY",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-NOTIFY",
|
||||||
|
)
|
||||||
|
RegulatoryNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
channel=RegulatoryNotificationRecord.Channel.MOCK,
|
||||||
|
target="法规整改负责人",
|
||||||
|
status=RegulatoryNotificationRecord.Status.SENT,
|
||||||
|
payload={"title": "缺少申请表", "severity": "high"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "通知记录" in build_markdown_report(batch)
|
||||||
|
assert build_result_payload(batch)["notifications"][0]["channel"] == "mock"
|
||||||
88
tests/test_regulatory_rag.py
Normal file
88
tests/test_regulatory_rag.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.services.rag_citation import (
|
||||||
|
RagIndexUnavailable,
|
||||||
|
retrieve_citations,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
|
||||||
|
from review_agent.regulatory_review.services.rag_index import chunk_text
|
||||||
|
from review_agent.regulatory_review.services.rag_index import collect_source_chunks
|
||||||
|
|
||||||
|
|
||||||
|
def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
class FakeResponse:
|
||||||
|
def raise_for_status(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
return {"data": [{"embedding": [0.1, 0.2]}, {"embedding": [0.3, 0.4]}]}
|
||||||
|
|
||||||
|
def fake_post(url, headers, json, timeout):
|
||||||
|
calls.append({"url": url, "headers": headers, "json": json, "timeout": timeout})
|
||||||
|
return FakeResponse()
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.regulatory_review.services.rag_embedding.httpx.post", fake_post)
|
||||||
|
|
||||||
|
provider = SiliconFlowEmbeddingProvider(
|
||||||
|
api_key="secret",
|
||||||
|
base_url="https://api.siliconflow.cn/v1",
|
||||||
|
model="Qwen/Qwen3-Embedding-4B",
|
||||||
|
dimensions=1024,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert provider.embed(["法规依据", "注册检验报告"]) == [[0.1, 0.2], [0.3, 0.4]]
|
||||||
|
assert calls[0]["url"] == "https://api.siliconflow.cn/v1/embeddings"
|
||||||
|
assert calls[0]["headers"]["Authorization"] == "Bearer secret"
|
||||||
|
assert calls[0]["json"]["model"] == "Qwen/Qwen3-Embedding-4B"
|
||||||
|
assert calls[0]["json"]["dimensions"] == 1024
|
||||||
|
|
||||||
|
|
||||||
|
def test_chunk_text_preserves_source_metadata():
|
||||||
|
chunks = chunk_text(
|
||||||
|
"第一段法规内容。\n" * 20,
|
||||||
|
source="法规.doc",
|
||||||
|
chunk_size=30,
|
||||||
|
overlap=5,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(chunks) > 1
|
||||||
|
assert chunks[0].metadata["source"] == "法规.doc"
|
||||||
|
assert chunks[0].text
|
||||||
|
|
||||||
|
|
||||||
|
def test_retrieve_citations_returns_placeholder_when_no_hits():
|
||||||
|
class EmptyCollection:
|
||||||
|
def query(self, query_embeddings, n_results):
|
||||||
|
return {"documents": [[]], "metadatas": [[]], "distances": [[]]}
|
||||||
|
|
||||||
|
citations = retrieve_citations(
|
||||||
|
"注册检验报告",
|
||||||
|
embedding_provider=lambda texts: [[0.1, 0.2]],
|
||||||
|
collection=EmptyCollection(),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert citations[0]["source"] == "原文依据待补充"
|
||||||
|
|
||||||
|
|
||||||
|
def test_retrieve_citations_raises_when_index_missing(settings, tmp_path):
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing"
|
||||||
|
|
||||||
|
with pytest.raises(RagIndexUnavailable):
|
||||||
|
retrieve_citations("注册检验报告", embedding_provider=lambda texts: [[0.1]])
|
||||||
|
|
||||||
|
|
||||||
|
def test_collect_source_chunks_requires_attachment4_extraction(monkeypatch, tmp_path):
|
||||||
|
source_dir = tmp_path / "sources"
|
||||||
|
source_dir.mkdir()
|
||||||
|
attachment4 = source_dir / "附件 4 体外诊断试剂注册申报资料要求及说明.doc"
|
||||||
|
attachment4.write_bytes(b"legacy-doc")
|
||||||
|
|
||||||
|
def fail_extract(path):
|
||||||
|
raise RuntimeError("无法通过 LibreOffice 转换法规 .doc 材料")
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.regulatory_review.services.rag_index.extract_text_from_path", fail_extract)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match="附件 4"):
|
||||||
|
collect_source_chunks(source_dir)
|
||||||
133
tests/test_regulatory_rectification.py
Normal file
133
tests/test_regulatory_rectification.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileSummaryBatch,
|
||||||
|
FileSummaryItem,
|
||||||
|
RegulatoryArtifact,
|
||||||
|
RegulatoryIssue,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.services.export import build_markdown_report, build_result_payload
|
||||||
|
from review_agent.regulatory_review.services.rectification_review import review_missing_issues
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def _make_review_batch(user):
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
original_summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-ORIGINAL",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=original_summary,
|
||||||
|
batch_no="RR-ORIGINAL",
|
||||||
|
status=RegulatoryReviewBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
return conversation, original_summary, batch
|
||||||
|
|
||||||
|
|
||||||
|
def test_start_full_package_review_creates_new_traceable_batch(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation, _original_summary, original_batch = _make_review_batch(user)
|
||||||
|
new_summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-NEW",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
reverse("regulatory_review_start_full_review", args=[original_batch.pk]),
|
||||||
|
data=json.dumps({"file_summary_batch_id": new_summary.pk}),
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
new_batch = RegulatoryReviewBatch.objects.exclude(pk=original_batch.pk).get()
|
||||||
|
assert new_batch.source_summary_batch == new_summary
|
||||||
|
assert new_batch.condition_json["source_review_batch_id"] == original_batch.pk
|
||||||
|
assert new_batch.condition_json["regenerated_from"]["batch_no"] == "RR-ORIGINAL"
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_missing_issues_updates_status_and_writes_record(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation, _original_summary, batch = _make_review_batch(user)
|
||||||
|
issue = RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="attachment4_5_3_label",
|
||||||
|
category=RegulatoryIssue.Category.COMPLETENESS,
|
||||||
|
severity=RegulatoryIssue.Severity.HIGH,
|
||||||
|
title="缺少标签样稿",
|
||||||
|
suggestion="请补充标签样稿。",
|
||||||
|
)
|
||||||
|
supplement = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-SUPPLEMENT",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=supplement,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="5. 产品说明书和标签样稿",
|
||||||
|
file_name="标签样稿.pdf",
|
||||||
|
file_type="pdf",
|
||||||
|
relative_path="5.3 标签样稿/标签样稿.pdf",
|
||||||
|
storage_path="x/label.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
|
record = review_missing_issues(batch=batch, issue_ids=[issue.pk], file_summary_batch=supplement)
|
||||||
|
|
||||||
|
issue.refresh_from_db()
|
||||||
|
assert issue.status == RegulatoryIssue.Status.REVIEW_PASSED
|
||||||
|
assert record["items"][0]["status"] == "review_passed"
|
||||||
|
assert RegulatoryArtifact.objects.filter(batch=batch, name__startswith="review_record").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_issue_review_endpoint_and_report_output(client, settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation, _original_summary, batch = _make_review_batch(user)
|
||||||
|
issue = RegulatoryIssue.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
rule_code="attachment4_6_quality_system",
|
||||||
|
category=RegulatoryIssue.Category.COMPLETENESS,
|
||||||
|
severity=RegulatoryIssue.Severity.HIGH,
|
||||||
|
title="缺少质量管理体系文件",
|
||||||
|
suggestion="请补充质量管理体系文件。",
|
||||||
|
)
|
||||||
|
supplement = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-SUPPLEMENT",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.post(
|
||||||
|
reverse("regulatory_review_review_issues", args=[batch.pk]),
|
||||||
|
data=json.dumps({"issue_ids": [issue.pk], "file_summary_batch_id": supplement.pk}),
|
||||||
|
content_type="application/json",
|
||||||
|
)
|
||||||
|
|
||||||
|
issue.refresh_from_db()
|
||||||
|
payload = build_result_payload(batch)
|
||||||
|
markdown = build_markdown_report(batch)
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert issue.status == RegulatoryIssue.Status.REVIEW_FAILED
|
||||||
|
assert payload["review_records"][0]["file_summary_batch_no"] == "FS-SUPPLEMENT"
|
||||||
|
assert "复核记录" in markdown
|
||||||
35
tests/test_regulatory_risk_assess.py
Normal file
35
tests/test_regulatory_risk_assess.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryIssue, RegulatoryReviewBatch
|
||||||
|
from review_agent.regulatory_review.schemas import Finding
|
||||||
|
from review_agent.regulatory_review.services.risk_assess import persist_findings
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_persist_findings_deduplicates_and_updates_risk_summary(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-RISK",
|
||||||
|
)
|
||||||
|
finding = Finding(
|
||||||
|
rule_code="registration_test_report",
|
||||||
|
category="completeness",
|
||||||
|
severity="blocking",
|
||||||
|
title="缺少注册检验报告",
|
||||||
|
suggestion="请补充注册检验报告并复核。",
|
||||||
|
citations=[{"source": "法规.doc", "text": "注册检验报告"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
issues = persist_findings(batch, [finding, finding])
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
assert len(issues) == 1
|
||||||
|
assert RegulatoryIssue.objects.count() == 1
|
||||||
|
assert batch.risk_summary["blocking"] == 1
|
||||||
93
tests/test_regulatory_rule_loader.py
Normal file
93
tests/test_regulatory_rule_loader.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from django.core.management import call_command
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryRuleVersion
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import (
|
||||||
|
DEFAULT_RULE_CODE,
|
||||||
|
check_rule_version,
|
||||||
|
compute_file_sha256,
|
||||||
|
load_rule_file,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_rule_file_reads_demo_requirements():
|
||||||
|
rule_set = load_rule_file()
|
||||||
|
|
||||||
|
codes = {item["code"] for item in rule_set["requirements"]}
|
||||||
|
assert rule_set["code"] == DEFAULT_RULE_CODE
|
||||||
|
assert "product_technical_requirements" in codes
|
||||||
|
assert "instructions_for_use" in codes
|
||||||
|
assert "registration_test_report" in codes
|
||||||
|
assert "clinical_evaluation" in codes
|
||||||
|
assert "essential_principles_checklist" in codes
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_rule_file_covers_attachment4_outline():
|
||||||
|
rule_set = load_rule_file()
|
||||||
|
requirements = rule_set["requirements"]
|
||||||
|
outline = json.loads(Path("tests/fixtures/regulatory/attachment4_outline.json").read_text(encoding="utf-8"))
|
||||||
|
|
||||||
|
for chapter in outline:
|
||||||
|
chapter_rule = next(
|
||||||
|
item for item in requirements if item["title"] == chapter["title"] and item.get("attachment4_code") == chapter["code"]
|
||||||
|
)
|
||||||
|
assert chapter_rule["attachment4_code"] == chapter["code"]
|
||||||
|
assert chapter_rule["severity"] == "high"
|
||||||
|
assert chapter_rule["citation_query"]
|
||||||
|
for child in chapter["children"]:
|
||||||
|
child_rule = next(
|
||||||
|
item
|
||||||
|
for item in requirements
|
||||||
|
if item["title"] == child and str(item.get("attachment4_code", "")).startswith(f"{chapter['code']}.")
|
||||||
|
)
|
||||||
|
assert child_rule["rule_id"]
|
||||||
|
assert child_rule["file_keywords"]
|
||||||
|
assert child_rule["severity"] in {"blocking", "high", "medium"}
|
||||||
|
assert child_rule["citation_query"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_compute_file_sha256_changes_when_file_changes(tmp_path):
|
||||||
|
path = tmp_path / "rule.yaml"
|
||||||
|
path.write_text("code: demo\n", encoding="utf-8")
|
||||||
|
first = compute_file_sha256(path)
|
||||||
|
path.write_text("code: demo2\n", encoding="utf-8")
|
||||||
|
|
||||||
|
assert compute_file_sha256(path) != first
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_rule_version_creates_missing_db_record():
|
||||||
|
result = check_rule_version(update_missing=True)
|
||||||
|
|
||||||
|
record = RegulatoryRuleVersion.objects.get(code=DEFAULT_RULE_CODE)
|
||||||
|
assert result.status == "created"
|
||||||
|
assert result.current_hash == record.yaml_hash
|
||||||
|
assert record.rag_collection == "nmpa_ivd_registration_v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_check_rule_version_reports_hash_mismatch_without_overwriting():
|
||||||
|
created = check_rule_version(update_missing=True)
|
||||||
|
record = RegulatoryRuleVersion.objects.get(code=DEFAULT_RULE_CODE)
|
||||||
|
record.yaml_hash = "stale"
|
||||||
|
record.save(update_fields=["yaml_hash"])
|
||||||
|
|
||||||
|
result = check_rule_version(update_missing=False)
|
||||||
|
record.refresh_from_db()
|
||||||
|
|
||||||
|
assert result.status == "mismatch"
|
||||||
|
assert result.database_hash == "stale"
|
||||||
|
assert result.current_hash == created.current_hash
|
||||||
|
assert record.yaml_hash == "stale"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_rules_check_command_reports_status(capsys):
|
||||||
|
call_command("regulatory_rules_check")
|
||||||
|
|
||||||
|
captured = capsys.readouterr()
|
||||||
|
assert DEFAULT_RULE_CODE in captured.out
|
||||||
|
assert "created" in captured.out or "ok" in captured.out
|
||||||
26
tests/test_regulatory_storage.py
Normal file
26
tests/test_regulatory_storage.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryReviewBatch
|
||||||
|
from review_agent.regulatory_review.storage import save_artifact
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_artifact_writes_file_and_records_hash(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-OK")
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-ART",
|
||||||
|
)
|
||||||
|
|
||||||
|
artifact = save_artifact(batch, name="raw.json", content='{"ok": true}', artifact_type="json")
|
||||||
|
|
||||||
|
assert artifact.content_hash
|
||||||
|
assert artifact.storage_path.endswith("raw.json")
|
||||||
|
assert (tmp_path / "regulatory_review" / "work" / "RR-ART" / "raw.json").exists()
|
||||||
26
tests/test_regulatory_structure.py
Normal file
26
tests/test_regulatory_structure.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from review_agent.regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
from review_agent.regulatory_review.services.structure_check import run_structure_check
|
||||||
|
|
||||||
|
|
||||||
|
def test_structure_check_reports_missing_instruction_sections():
|
||||||
|
document_texts = {
|
||||||
|
"说明书.docx": "产品名称:甲胎蛋白检测试剂盒\n样本要求:血清样本\n有效期:12个月"
|
||||||
|
}
|
||||||
|
|
||||||
|
findings = run_structure_check(document_texts, load_rule_file())
|
||||||
|
|
||||||
|
assert any(finding.rule_code == "instructions_for_use:储存条件" for finding in findings)
|
||||||
|
assert all("样本要求" not in finding.title for finding in findings)
|
||||||
|
|
||||||
|
|
||||||
|
def test_structure_check_reports_missing_attachment4_outline_heading():
|
||||||
|
document_texts = {
|
||||||
|
"申报资料目录.txt": "1. 监管信息\n1.2 申请表\n2. 综述资料\n3. 非临床资料\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
findings = run_structure_check(document_texts, load_rule_file())
|
||||||
|
|
||||||
|
missing = next(finding for finding in findings if finding.rule_code == "attachment4_4_clinical_evaluation")
|
||||||
|
assert missing.category == "structure"
|
||||||
|
assert missing.title == "申报资料目录缺少4临床评价资料章节"
|
||||||
|
assert missing.evidence["expected_title"] == "临床评价资料"
|
||||||
39
tests/test_regulatory_text_extract.py
Normal file
39
tests/test_regulatory_text_extract.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_text_reads_plain_text(tmp_path):
|
||||||
|
path = tmp_path / "说明书.txt"
|
||||||
|
path.write_text("产品名称:甲胎蛋白检测试剂盒\n储存条件:2-8℃", encoding="utf-8")
|
||||||
|
|
||||||
|
result = extract_text(path)
|
||||||
|
|
||||||
|
assert "甲胎蛋白" in result.text
|
||||||
|
assert result.status == "success"
|
||||||
|
assert result.content_hash
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_text_keeps_wrapped_product_name(tmp_path):
|
||||||
|
path = tmp_path / "申请表.txt"
|
||||||
|
path.write_text(
|
||||||
|
"产品名称:呼吸道合胞病毒、肺炎支原体核酸检测试剂盒\n"
|
||||||
|
"(荧光PCR法)\n"
|
||||||
|
"型号规格:24人份/盒\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_text(path)
|
||||||
|
|
||||||
|
assert result.field_candidates["产品名称"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒 (荧光PCR法)"
|
||||||
|
assert result.field_candidates["型号规格"] == "24人份/盒"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_text_reports_unsupported_file(tmp_path):
|
||||||
|
path = tmp_path / "image.png"
|
||||||
|
path.write_bytes(b"png")
|
||||||
|
|
||||||
|
result = extract_text(path)
|
||||||
|
|
||||||
|
assert result.status == "unsupported"
|
||||||
|
assert result.text == ""
|
||||||
136
tests/test_regulatory_views.py
Normal file
136
tests/test_regulatory_views.py
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
import pytest
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_batch_status_requires_owner(client, django_user_model):
|
||||||
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-STATUS",
|
||||||
|
)
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="regulatory_review",
|
||||||
|
node_code="prepare",
|
||||||
|
node_name="准备",
|
||||||
|
progress=50,
|
||||||
|
)
|
||||||
|
|
||||||
|
client.force_login(other)
|
||||||
|
denied = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
|
||||||
|
assert denied.status_code == 404
|
||||||
|
|
||||||
|
client.force_login(owner)
|
||||||
|
allowed = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
|
||||||
|
assert allowed.status_code == 200
|
||||||
|
payload = allowed.json()
|
||||||
|
assert payload["batch"]["workflow_type"] == "regulatory_review"
|
||||||
|
assert payload["batch"]["batch_no"] == "RR-STATUS"
|
||||||
|
assert payload["nodes"][0]["node_code"] == "prepare"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_batch_status_exposes_condition_confirmation(client, django_user_model):
|
||||||
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-WAIT",
|
||||||
|
status=RegulatoryReviewBatch.Status.WAITING_USER,
|
||||||
|
condition_json={
|
||||||
|
"confirmed": False,
|
||||||
|
"candidates": {
|
||||||
|
"product_category": {
|
||||||
|
"label": "产品类别",
|
||||||
|
"input_type": "select",
|
||||||
|
"options": ["体外诊断试剂", "医疗器械", "其他"],
|
||||||
|
"suggested": "体外诊断试剂",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
client.force_login(owner)
|
||||||
|
|
||||||
|
response = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
|
||||||
|
|
||||||
|
payload = response.json()
|
||||||
|
assert payload["batch"]["status"] == RegulatoryReviewBatch.Status.WAITING_USER
|
||||||
|
assert payload["condition_confirmation"]["batch_id"] == batch.pk
|
||||||
|
assert payload["condition_confirmation"]["candidates"]["product_category"]["suggested"] == "体外诊断试剂"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_batch_status_refreshes_incomplete_condition_candidates(
|
||||||
|
client, settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
product_name="第1章 监管信息",
|
||||||
|
)
|
||||||
|
application = tmp_path / "application.txt"
|
||||||
|
application.write_text(
|
||||||
|
"卡尤迪生物科技宜兴有限公司申请境内第三类体外诊断试剂"
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)产品注册。",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
from review_agent.models import FileSummaryItem
|
||||||
|
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
directory_level="第1章 监管信息",
|
||||||
|
file_name="符合标准的清单.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="第1章 监管信息/符合标准的清单.txt",
|
||||||
|
storage_path=str(application),
|
||||||
|
)
|
||||||
|
batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
batch_no="RR-WAIT-EMPTY",
|
||||||
|
status=RegulatoryReviewBatch.Status.WAITING_USER,
|
||||||
|
condition_json={
|
||||||
|
"confirmed": False,
|
||||||
|
"candidates": {
|
||||||
|
"product_category": {"suggested": "其他"},
|
||||||
|
"product_name": {"suggested": ""},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
client.force_login(owner)
|
||||||
|
|
||||||
|
response = client.get(reverse("regulatory_review_batch_status", args=[batch.pk]))
|
||||||
|
|
||||||
|
payload = response.json()
|
||||||
|
candidates = payload["condition_confirmation"]["candidates"]
|
||||||
|
assert candidates["product_category"]["suggested"] == "体外诊断试剂"
|
||||||
|
assert candidates["product_name"]["suggested"] == "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)"
|
||||||
502
tests/test_regulatory_workflow.py
Normal file
502
tests/test_regulatory_workflow.py
Normal file
@@ -0,0 +1,502 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileAttachment,
|
||||||
|
FileSummaryBatch,
|
||||||
|
FileSummaryItem,
|
||||||
|
Message,
|
||||||
|
RegulatoryIssue,
|
||||||
|
RegulatoryArtifact,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
WorkflowEvent,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_review.workflow import (
|
||||||
|
NODE_DEFINITIONS,
|
||||||
|
RegulatoryWorkflowExecutor,
|
||||||
|
create_regulatory_review_batch,
|
||||||
|
find_latest_successful_summary_batch,
|
||||||
|
start_regulatory_review_workflow,
|
||||||
|
)
|
||||||
|
from review_agent.services import stream_message
|
||||||
|
from review_agent.skill_router import SkillRoute, route_message_intent
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_rule_router_starts_regulatory_review_for_nmpa_keywords(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.skill_router._route_with_llm",
|
||||||
|
lambda conversation, content, attachments: (_ for _ in ()).throw(ValueError("fallback")),
|
||||||
|
)
|
||||||
|
|
||||||
|
route = route_message_intent(conversation, "请做NMPA核查和风险预警")
|
||||||
|
|
||||||
|
assert route.action == "regulatory_review"
|
||||||
|
assert route.workflow_type == "regulatory_review"
|
||||||
|
assert route.starts_regulatory_review
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_latest_successful_summary_batch_ignores_failed_batches(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
success = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-FAILED",
|
||||||
|
status=FileSummaryBatch.Status.FAILED,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert find_latest_successful_summary_batch(conversation) == success
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_regulatory_review_batch_initializes_nodes(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="法规核查")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=message,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.PENDING
|
||||||
|
assert WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).count() == len(NODE_DEFINITIONS)
|
||||||
|
assert WorkflowEvent.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
event_type="workflow_created",
|
||||||
|
).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_start_regulatory_review_workflow_runs_synchronously(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
|
||||||
|
assert WorkflowEvent.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
event_type="workflow_completed",
|
||||||
|
).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_continues_when_llm_review_times_out(monkeypatch, settings, django_user_model):
|
||||||
|
settings.REGULATORY_LLM_REVIEW_ALLOW_TEST_CALLS = True
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.regulatory_review.services.llm_review.generate_completion",
|
||||||
|
lambda messages, temperature=0.0: (_ for _ in ()).throw(TimeoutError("The read operation timed out")),
|
||||||
|
)
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
|
||||||
|
assert batch.error_message == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_workflow_logs_node_and_method_details(caplog, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
with caplog.at_level(logging.INFO, logger="review_agent.regulatory_review.workflow"):
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
messages = [record.getMessage() for record in caplog.records]
|
||||||
|
assert any("法规核查工作流开始" in message and batch.batch_no in message for message in messages)
|
||||||
|
assert any("节点开始" in message and "完整性核查" in message for message in messages)
|
||||||
|
assert any("方法执行" in message and "run_completeness_check" in message for message in messages)
|
||||||
|
assert any("节点完成" in message and "完整性核查" in message for message in messages)
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_message_prompts_for_summary_when_missing(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.route_message_intent",
|
||||||
|
lambda conversation, content: SkillRoute(
|
||||||
|
action="regulatory_review",
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
confidence=0.9,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
frames = list(stream_message(conversation, "请做法规核查"))
|
||||||
|
|
||||||
|
joined = "".join(frames)
|
||||||
|
assert "请先在当前对话右侧上传需要核查的文件或压缩包" in joined
|
||||||
|
assert "我会先自动汇总再继续法规核查" in joined
|
||||||
|
assert not RegulatoryReviewBatch.objects.exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_message_auto_runs_summary_before_regulatory_review(
|
||||||
|
monkeypatch, settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
attachment_path = tmp_path / "application.txt"
|
||||||
|
attachment_path.write_text("产品名称:甲胎蛋白检测试剂盒", encoding="utf-8")
|
||||||
|
FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name="application.txt",
|
||||||
|
storage_path=str(attachment_path),
|
||||||
|
file_size=attachment_path.stat().st_size,
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.route_message_intent",
|
||||||
|
lambda conversation, content: SkillRoute(
|
||||||
|
action="regulatory_review",
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
confidence=0.9,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def finish_summary(batch, async_run=True):
|
||||||
|
batch.status = FileSummaryBatch.Status.SUCCESS
|
||||||
|
batch.save(update_fields=["status"])
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.services.start_file_summary_workflow", finish_summary)
|
||||||
|
|
||||||
|
frames = list(stream_message(conversation, "进行第一章NMPA 法规核查"))
|
||||||
|
joined = "".join(frames)
|
||||||
|
|
||||||
|
assert "\"workflow_type\": \"file_summary\"" in joined
|
||||||
|
assert "\"workflow_type\": \"regulatory_review\"" in joined
|
||||||
|
assert "已先启动文件目录与页数自动汇总工作流" in joined
|
||||||
|
assert FileSummaryBatch.objects.filter(conversation=conversation, status=FileSummaryBatch.Status.SUCCESS).exists()
|
||||||
|
regulatory = RegulatoryReviewBatch.objects.get(conversation=conversation)
|
||||||
|
assert regulatory.condition_json["rule_scope"]["attachment4_chapter"] == "1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_message_starts_regulatory_workflow(monkeypatch, settings, django_user_model):
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.route_message_intent",
|
||||||
|
lambda conversation, content: SkillRoute(
|
||||||
|
action="regulatory_review",
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
confidence=0.9,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
frames = list(stream_message(conversation, "请做法规核查"))
|
||||||
|
|
||||||
|
joined = "".join(frames)
|
||||||
|
assert "workflow_started" in joined
|
||||||
|
assert "\"workflow_type\": \"regulatory_review\"" in joined
|
||||||
|
assert RegulatoryReviewBatch.objects.filter(conversation=conversation).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_message_records_attachment4_chapter_scope(monkeypatch, settings, django_user_model):
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.route_message_intent",
|
||||||
|
lambda conversation, content: SkillRoute(
|
||||||
|
action="regulatory_review",
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
confidence=0.9,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
list(stream_message(conversation, "请做第一章 NMPA 法规核查"))
|
||||||
|
|
||||||
|
batch = RegulatoryReviewBatch.objects.get(conversation=conversation)
|
||||||
|
assert batch.condition_json["rule_scope"]["attachment4_chapter"] == "1"
|
||||||
|
assert batch.condition_json["rule_scope"]["label"] == "第1章 监管信息"
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_chapter_scope_only_checks_selected_attachment4_chapter(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {
|
||||||
|
"confirmed": True,
|
||||||
|
"confirmed_conditions": {"product_category": "体外诊断试剂"},
|
||||||
|
"rule_scope": {"attachment4_chapter": "1", "label": "第1章 监管信息"},
|
||||||
|
}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
issue_codes = list(RegulatoryIssue.objects.filter(batch=batch).values_list("rule_code", flat=True))
|
||||||
|
assert issue_codes
|
||||||
|
assert all(code.startswith("attachment4_1") for code in issue_codes)
|
||||||
|
assert not any(code.startswith("attachment4_2") for code in issue_codes)
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_generates_issues_exports_and_assistant_summary(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
ifu_path = tmp_path / "ifu.txt"
|
||||||
|
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n样本要求:血清\n有效期:12个月", encoding="utf-8")
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
file_name="说明书.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="说明书.txt",
|
||||||
|
storage_path=str(ifu_path),
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
batch.refresh_from_db()
|
||||||
|
assert batch.status == RegulatoryReviewBatch.Status.SUCCESS
|
||||||
|
assert RegulatoryIssue.objects.filter(batch=batch, severity="blocking").exists()
|
||||||
|
assert ExportedSummaryFile.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).count() == 3
|
||||||
|
assert RegulatoryArtifact.objects.filter(batch=batch, name="text_extract_status.json").exists()
|
||||||
|
assert RegulatoryArtifact.objects.filter(batch=batch, name="rag_result_json.json").exists()
|
||||||
|
assert conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains="已完成 NMPA").exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_records_llm_review_artifacts_for_review_nodes(
|
||||||
|
monkeypatch, settings, tmp_path, django_user_model
|
||||||
|
):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_REVIEW_ASYNC = False
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "missing-rag"
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
ifu_path = tmp_path / "ifu.txt"
|
||||||
|
ifu_path.write_text("产品名称:甲胎蛋白检测试剂盒\n型号规格:20人份/盒", encoding="utf-8")
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=1,
|
||||||
|
file_name="说明书.txt",
|
||||||
|
file_type="txt",
|
||||||
|
relative_path="说明书.txt",
|
||||||
|
storage_path=str(ifu_path),
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
batch.condition_json = {"confirmed": True, "confirmed_conditions": {"product_category": "体外诊断试剂"}}
|
||||||
|
batch.save(update_fields=["condition_json"])
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.regulatory_review.workflow.review_workflow_payload",
|
||||||
|
lambda stage, payload: {"status": "success", "stage": stage, "result": {"reviewed": True}, "error_message": ""},
|
||||||
|
)
|
||||||
|
|
||||||
|
start_regulatory_review_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
artifact_names = set(RegulatoryArtifact.objects.filter(batch=batch).values_list("name", flat=True))
|
||||||
|
assert "llm_review_completeness_check.json" in artifact_names
|
||||||
|
assert "llm_review_text_extract.json" in artifact_names
|
||||||
|
assert "llm_review_structure_check.json" in artifact_names
|
||||||
|
assert "llm_review_consistency_check.json" in artifact_names
|
||||||
|
assert "llm_review_risk_assess.json" in artifact_names
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_progress_uses_processed_file_counts(settings, tmp_path, django_user_model):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
for index, name in enumerate(["注册信息.txt", "说明书.txt", "综述.txt"], start=1):
|
||||||
|
path = tmp_path / name
|
||||||
|
path.write_text(f"产品名称:甲胎蛋白检测试剂盒\n文件:{name}", encoding="utf-8")
|
||||||
|
FileSummaryItem.objects.create(
|
||||||
|
batch=summary,
|
||||||
|
file_index=index,
|
||||||
|
file_name=name,
|
||||||
|
file_type="txt",
|
||||||
|
relative_path=name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
batch = create_regulatory_review_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary,
|
||||||
|
)
|
||||||
|
node = WorkflowNodeRun.objects.get(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code="text_extract",
|
||||||
|
)
|
||||||
|
executor = RegulatoryWorkflowExecutor(batch)
|
||||||
|
|
||||||
|
texts = executor._extract_source_texts(node)
|
||||||
|
|
||||||
|
node.refresh_from_db()
|
||||||
|
assert len(texts) == 3
|
||||||
|
assert node.progress == 95
|
||||||
|
assert "文本抽取 3/3" in node.message
|
||||||
|
assert "综述.txt" in node.message
|
||||||
|
assert WorkflowEvent.objects.filter(
|
||||||
|
workflow_type="regulatory_review",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
event_type="node_progress",
|
||||||
|
payload__node_code="text_extract",
|
||||||
|
payload__processed=3,
|
||||||
|
payload__total=3,
|
||||||
|
).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_review_services_emit_actual_workload_progress_callbacks(django_user_model):
|
||||||
|
from review_agent.regulatory_review.services.completeness_check import run_completeness_check
|
||||||
|
from review_agent.regulatory_review.services.consistency_check import FIELDS, run_consistency_check
|
||||||
|
from review_agent.regulatory_review.services.structure_check import run_structure_check
|
||||||
|
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
summary = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-OK",
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
rule_set = {
|
||||||
|
"requirements": [
|
||||||
|
{"code": "r1", "title": "注册信息", "type": "required", "file_keywords": ["注册信息"]},
|
||||||
|
{"code": "r2", "title": "说明书", "type": "required", "file_keywords": ["说明书"]},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
completeness_updates = []
|
||||||
|
structure_updates = []
|
||||||
|
consistency_updates = []
|
||||||
|
|
||||||
|
run_completeness_check(summary, rule_set, progress_callback=completeness_updates.append)
|
||||||
|
run_structure_check({"注册信息.txt": "注册信息"}, rule_set, progress_callback=structure_updates.append)
|
||||||
|
run_consistency_check({"注册信息.txt": "产品名称:A"}, progress_callback=consistency_updates.append)
|
||||||
|
|
||||||
|
assert completeness_updates[-1]["processed"] == 2
|
||||||
|
assert completeness_updates[-1]["total"] == 2
|
||||||
|
assert completeness_updates[-1]["label"] == "说明书"
|
||||||
|
assert structure_updates[-1]["processed"] == 2
|
||||||
|
assert structure_updates[-1]["total"] == 2
|
||||||
|
assert consistency_updates[-1]["processed"] == len(FIELDS)
|
||||||
|
assert consistency_updates[-1]["total"] == len(FIELDS)
|
||||||
Reference in New Issue
Block a user