Compare commits
20 Commits
ccfa43645e
...
V2
| Author | SHA1 | Date | |
|---|---|---|---|
| 7def60f1b6 | |||
| 9c6cad481c | |||
| 1bf8634373 | |||
| 3bcf9647a1 | |||
| cf4f4456c4 | |||
| b728703e67 | |||
| 6d4b519f83 | |||
| dcd829e821 | |||
| dac8ce3c14 | |||
| f0286264e2 | |||
|
|
a060c23ba7 | ||
|
|
db0e94cf26 | ||
|
|
dce7045a46 | ||
| 8548b6d2b4 | |||
| 26e675e5d3 | |||
| 42187bf8e9 | |||
| 18548eb78f | |||
| 2b5093040d | |||
| d8cd95e590 | |||
| 681cb03eb9 |
4
.env
4
.env
@@ -6,7 +6,9 @@ DJANGO_ALLOWED_HOSTS=*
|
|||||||
LLM_PROVIDER=openai_compatible
|
LLM_PROVIDER=openai_compatible
|
||||||
LLM_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
LLM_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
||||||
LLM_BASE_URL=https://api.siliconflow.cn/v1
|
LLM_BASE_URL=https://api.siliconflow.cn/v1
|
||||||
LLM_MODEL=Qwen/Qwen2.5-7B-Instruct
|
LLM_MODEL=deepseek-ai/DeepSeek-V4-Pro
|
||||||
|
SILICONFLOW_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
|
||||||
|
SILICONFLOW_EMBEDDING_DIMENSIONS=4096
|
||||||
|
|
||||||
# SiliconFlow embedding model for RAG
|
# SiliconFlow embedding model for RAG
|
||||||
EMBEDDING_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
EMBEDDING_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ REGULATORY_LLM_REVIEW_MAX_ATTEMPTS = int(os.environ.get("REGULATORY_LLM_REVIEW_M
|
|||||||
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
|
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
|
||||||
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
|
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
|
||||||
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
|
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||||
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", "")
|
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", LLM_API_KEY)
|
||||||
SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
|
SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
|
||||||
"SILICONFLOW_EMBEDDING_MODEL",
|
"SILICONFLOW_EMBEDDING_MODEL",
|
||||||
"Qwen/Qwen3-Embedding-4B",
|
"Qwen/Qwen3-Embedding-4B",
|
||||||
|
|||||||
BIN
docs/0.原始材料/目标产品说明书.docx
Normal file
BIN
docs/0.原始材料/目标产品说明书.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息.rar
Normal file
BIN
docs/0.原始材料/第1章 监管信息.rar
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.1 符合标准的清单.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.1 符合标准的清单.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.5 真实性声明.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.5 真实性声明.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.6 符合性声明.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.6 符合性声明.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.2 监管信息目录.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.2 监管信息目录.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.4 申请表.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.4 申请表.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.5 产品列表.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.5 产品列表.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.9 产品申报前沟通的说明.doc
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.9 产品申报前沟通的说明.doc
Normal file
Binary file not shown.
450
docs/1.需求分析/5.第1章监管信息材料包生成.md
Normal file
450
docs/1.需求分析/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,450 @@
|
|||||||
|
# 第1章监管信息材料包生成需求分析
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始输入 | docs/0.原始材料/目标产品说明书.docx |
|
||||||
|
| 样例模板 | docs/0.原始材料/第1章 监管信息 |
|
||||||
|
| 法规材料 | docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 |
|
||||||
|
| 功能主题 | 从产品说明书生成第1章监管信息材料包 |
|
||||||
|
| 工作流名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 批次号规则 | RIP-YYYYMMDDHHMMSS-abcdef |
|
||||||
|
| 分析日期 | 2026-06-10 |
|
||||||
|
| 分析版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、需求背景
|
||||||
|
|
||||||
|
体外诊断试剂注册申报资料中,第1章监管信息包含监管信息目录、申请表、产品列表、申报前沟通说明、符合标准清单、真实性声明和符合性声明等材料。注册人员通常需要根据产品说明书、企业信息和法规要求手工整理这些文件,容易出现产品名称、包装规格、组成成分、预期用途等字段重复录入、漏填、格式不一致和待补信息不醒目的问题。
|
||||||
|
|
||||||
|
本需求新增独立工作流:用户上传或选择一个产品说明书后,系统以既有 `第1章 监管信息` 样例文件作为模板,抽取说明书中的产品关键信息,生成一套类似样例目录的第1章监管信息材料包。生成结果以 zip 压缩包作为主下载入口,同时保留单文件辅助下载。
|
||||||
|
|
||||||
|
该工作流可以复用现有自动填表工作流中已拆分出的字段抽取、LLM 调用、Word 写入、导出下载、批次事件和通知能力,但不并入 `application_form_fill`,而是作为独立工作流建设。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、需求范围
|
||||||
|
|
||||||
|
### 2.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 独立工作流 | 新增 `regulatory_info_package`,不复用 `application_form_fill` 的 workflow_type |
|
||||||
|
| 2 | 单说明书输入 | 本期只支持一个产品说明书作为主输入 |
|
||||||
|
| 3 | 模板复用 | 以 `docs/0.原始材料/第1章 监管信息` 下的样例文件作为生成模板 |
|
||||||
|
| 4 | 固定输出文件 | 固定生成 7 个第1章监管信息文件 |
|
||||||
|
| 5 | 代码抽取与 LLM 抽取并行 | 规则/代码抽取与 LLM 结构化抽取并行处理,合并后写入模板 |
|
||||||
|
| 6 | 尽量多填 | 对说明书中可识别的产品名称、包装规格、预期用途、组成成分、储存条件、适用仪器、样本类型、检测靶标等字段尽量填入 |
|
||||||
|
| 7 | 缺失项标记 | 系统新填入的缺失项使用 `/`,并设置黄色底色提醒负责人补充 |
|
||||||
|
| 8 | LLM-only 标记 | 代码抽取未取到但 LLM 抽取到的字段,也需要在输出文件中高亮提示人工复核 |
|
||||||
|
| 9 | 模板字段化 | 优先将样例模板整理为 Agent/代码可识别字段模板,使用内容控件 Tag 或稳定占位符,代码只填内容不手改格式 |
|
||||||
|
| 10 | doc 能力增强 | `.doc` 文档按能力驱动处理:有原生能力时优先原生写入,无原生能力时明确记录并允许 `.docx` 兜底,不静默输出未改写文件 |
|
||||||
|
| 11 | zip 主输出 | 生成 `第1章 监管信息(预生成版).zip` 作为主下载入口,单文件作为辅助下载 |
|
||||||
|
| 12 | 对话唤起提示 | 在对话框底部增加本工作流的唤起提示词 |
|
||||||
|
| 13 | LLM 意图判断 | 触发判断不能只依赖固定关键词,需要引入 LLM 判断用户是否要生成第1章监管信息材料包 |
|
||||||
|
|
||||||
|
### 2.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 多资料综合生成 | 本期不从产品技术要求、检验报告、企业证照等多文件综合生成 |
|
||||||
|
| 2 | 人工在线编辑 | 本期只生成文件并标记待确认项,不提供网页内字段编辑 |
|
||||||
|
| 3 | 自动保证法规最终准确 | 标准清单、分类编码、管理类别等无法从说明书确认的信息仍需负责人确认 |
|
||||||
|
| 4 | 自动提交监管系统 | 本期只生成申报材料包,不对接外部申报平台 |
|
||||||
|
| 5 | 版式人工校订替代 | 系统尽量保持模板版式,但最终提交前仍需人工核对 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、输入与触发
|
||||||
|
|
||||||
|
### 3.1 输入文件规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户上传一个 `.docx` 说明书 | 直接作为本次输入 |
|
||||||
|
| 用户上传多个文件 | 优先选择文件名包含“说明书”的 `.docx` |
|
||||||
|
| 多个说明书候选 | 工作流进入待确认状态,提示用户选择 |
|
||||||
|
| 未找到说明书 | 提示用户上传产品说明书 |
|
||||||
|
| 非 `.docx` 说明书 | 本期可提示格式不支持,后续扩展 `.doc`、PDF 或 OCR |
|
||||||
|
|
||||||
|
### 3.2 对话触发规则
|
||||||
|
|
||||||
|
固定提示词需要支持:
|
||||||
|
|
||||||
|
| 触发表达 | 触发结果 |
|
||||||
|
| --- | --- |
|
||||||
|
| 根据说明书生成第1章监管信息 | 启动第1章监管信息材料包生成 |
|
||||||
|
| 生成监管信息材料包 | 启动第1章监管信息材料包生成 |
|
||||||
|
| 从说明书生成第1章材料 | 启动第1章监管信息材料包生成 |
|
||||||
|
|
||||||
|
除固定表达外,系统需要引入 LLM 意图判断。当用户自然语言表达包含“根据说明书”“第1章”“监管信息”“材料包”“申请表/产品列表/声明”等意图组合时,LLM 可判断为 `regulatory_info_package`。规则命中优先,规则未命中时再进入 LLM 路由,避免只靠固定模板。
|
||||||
|
|
||||||
|
### 3.3 对话框底部唤起提示
|
||||||
|
|
||||||
|
对话框底部快捷提示词新增:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
后续可追加:
|
||||||
|
|
||||||
|
```text
|
||||||
|
生成监管信息材料包
|
||||||
|
从说明书生成第1章材料
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、输出文件范围
|
||||||
|
|
||||||
|
本期固定生成与样例目录一致的 7 个文件:
|
||||||
|
|
||||||
|
| 序号 | 输出文件 | 模板来源 | 生成规则 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 1 | CH1.2 监管信息目录.docx | 样例 `CH1.2 监管信息目录.docx` | 替换产品名称,目录结构和页码沿用样例 |
|
||||||
|
| 2 | CH1.4 申请表.docx | 样例 `CH1.4 申请表.docx` | 尽量填入说明书字段,未知项填 `/` 并黄底 |
|
||||||
|
| 3 | CH1.5 产品列表.docx | 样例 `CH1.5 产品列表.docx` | 按样例表头重建产品列表,货号留空并黄底 |
|
||||||
|
| 4 | CH1.9 产品申报前沟通的说明.doc | 样例 `CH1.9 产品申报前沟通的说明.doc` | `.doc` 应支持与 `.docx` 等价替换能力 |
|
||||||
|
| 5 | CH1.11.1 符合标准的清单.docx | 样例 `CH1.11.1 符合标准的清单.docx` | 从说明书和 RAG/法规知识库提取或推荐标准,非明确项需高亮待确认 |
|
||||||
|
| 6 | CH1.11.5 真实性声明.docx | 样例 `CH1.11.5 真实性声明.docx` | 保留样例正文结构,替换产品名称,公司名位置黄底 `/` |
|
||||||
|
| 7 | CH1.11.6 符合性声明.docx | 样例 `CH1.11.6 符合性声明.docx` | 保留样例正文结构,替换产品名称,公司名位置黄底 `/` |
|
||||||
|
|
||||||
|
### 4.1 下载形态
|
||||||
|
|
||||||
|
| 输出类型 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 主入口 | 生成 `第1章 监管信息(预生成版).zip`,只包含成功或兜底成功的文件 |
|
||||||
|
| 单文件下载 | 每个生成文件均可作为辅助下载项展示 |
|
||||||
|
| 追溯清单 | 建议生成 JSON/Excel,记录字段来源、抽取方式、高亮原因和待确认项 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、字段抽取与填写规则
|
||||||
|
|
||||||
|
### 5.1 抽取字段范围
|
||||||
|
|
||||||
|
系统应从说明书中尽量抽取以下字段:
|
||||||
|
|
||||||
|
| 字段 | 示例来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | `【产品名称】` |
|
||||||
|
| 包装规格 | `【包装规格】` |
|
||||||
|
| 预期用途 | `【预期用途】` |
|
||||||
|
| 检测原理/方法原理 | `【检测原理】` |
|
||||||
|
| 主要组成成分 | `【主要组成成分】` 及其下方表格 |
|
||||||
|
| 储存条件及有效期 | `【储存条件及有效期】` |
|
||||||
|
| 样本类型 | `【样本要求】` 中的适用样本类型 |
|
||||||
|
| 检测靶标 | 预期用途或检测原理中的基因、病原体、抗原、抗体等 |
|
||||||
|
| 适用仪器 | `【适用仪器】` |
|
||||||
|
| 检验方法 | `【检验方法】` |
|
||||||
|
| 生产日期和使用期限描述 | 储存条件章节 |
|
||||||
|
|
||||||
|
字段抽取采用规则/代码抽取与 LLM 结构化抽取并行模式:
|
||||||
|
|
||||||
|
```text
|
||||||
|
读取说明书
|
||||||
|
-> 规则/代码抽取
|
||||||
|
-> LLM 结构化抽取
|
||||||
|
-> 字段合并
|
||||||
|
-> 标记字段来源和置信度
|
||||||
|
-> 写入模板
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 合并与高亮规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 代码抽取和 LLM 都命中且结果一致 | 正常写入,不强制高亮 |
|
||||||
|
| 代码抽取和 LLM 都命中但结果不一致 | 优先按规则配置选择,写入值高亮并进入追溯清单 |
|
||||||
|
| 代码抽取未命中,LLM 命中 | 写入 LLM 值,并高亮提示人工复核 |
|
||||||
|
| 代码抽取命中,LLM 未命中 | 正常写入,追溯记录代码抽取来源 |
|
||||||
|
| 两者均未命中 | 写入 `/` 并设置黄色底色 |
|
||||||
|
| 企业信息缺失 | 写入 `/` 并设置黄色底色 |
|
||||||
|
|
||||||
|
高亮含义:
|
||||||
|
|
||||||
|
| 高亮类型 | 视觉要求 | 含义 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 缺失项高亮 | 黄色底色 | 说明书无法提供,负责人需填写 |
|
||||||
|
| LLM-only 高亮 | 黄色底色,可在追溯清单标记 `llm_only` | 代码未抽到,仅 LLM 推断,需要复核 |
|
||||||
|
| 冲突高亮 | 黄色底色,可配合红色字体 | 规则结果与 LLM 结果不一致 |
|
||||||
|
|
||||||
|
仅标记系统新填入的缺失项或需复核项。样例模板中原本存在的 `/` 不统一高亮,避免整份文件过度标记。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、各文件生成规则
|
||||||
|
|
||||||
|
### 6.1 CH1.2 监管信息目录
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 目录条目 | 沿用样例目录结构 |
|
||||||
|
| 适用情况 | 沿用样例 |
|
||||||
|
| 资料名称 | 沿用样例 |
|
||||||
|
| 页码 | 沿用样例页码 |
|
||||||
|
|
||||||
|
### 6.2 CH1.4 申请表
|
||||||
|
|
||||||
|
| 字段类型 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 从说明书抽取 |
|
||||||
|
| 包装规格 | 从说明书抽取 |
|
||||||
|
| 主要组成成分 | 优先使用说明书组成成分摘要或附件提示 |
|
||||||
|
| 预期用途 | 从说明书抽取 |
|
||||||
|
| 产品储存条件及有效期 | 从说明书抽取 |
|
||||||
|
| 方法原理 | 从说明书检测原理抽取 |
|
||||||
|
| 产品类别 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 分类编码 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 临床评价路径 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 申请人信息 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 联系人、法定代表人、邮箱、组织机构代码 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 生产地址 | 缺失,填 `/` 并黄底 |
|
||||||
|
|
||||||
|
管理类别、分类编码、临床评价路径、UDI、国家标准品/强制标准等不得根据经验自动下结论,全部按待确认处理。
|
||||||
|
|
||||||
|
### 6.3 CH1.5 产品列表
|
||||||
|
|
||||||
|
产品列表需要转成样例表头:
|
||||||
|
|
||||||
|
| 包装规格 | 货号 | 组成 | 组分 | 主要组成成分 | 规格/数量 |
|
||||||
|
| --- | --- | --- | --- | --- | --- |
|
||||||
|
|
||||||
|
生成规则:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 包装规格 | 从说明书组成成分表的规格列或包装规格章节抽取 |
|
||||||
|
| 货号 | 说明书未提供,填 `/` 并黄底 |
|
||||||
|
| 组成 | 根据组分名称推断为反应液、质控品、处理液、增强剂等;无法判断则填 `/` 并黄底 |
|
||||||
|
| 组分 | 使用说明书表格中的组分名称 |
|
||||||
|
| 主要组成成分 | 使用说明书表格中的主要组成成分 |
|
||||||
|
| 规格/数量 | 使用说明书表格中的对应规格数量 |
|
||||||
|
|
||||||
|
目标产品说明书中存在规格A大包装、规格A分管包装、规格B大管包装等多个组成表,系统应尽量展开为多行产品列表。
|
||||||
|
|
||||||
|
### 6.4 CH1.9 产品申报前沟通的说明
|
||||||
|
|
||||||
|
`CH1.9` 当前为 `.doc` 格式。本工作流要求 `.doc` 文档具备与 `.docx` 等价的原始功能,即模板复制、文本定位、字段替换、高亮标记、导出和打包均应支持 `.doc`。
|
||||||
|
|
||||||
|
实现上不应只把转换作为唯一方案。可选技术路径包括:
|
||||||
|
|
||||||
|
| 路径 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原生 `.doc` 处理 | 优先探索可直接读取和写入 `.doc` 的库、COM 或二进制文档处理能力 |
|
||||||
|
| Office/COM 自动化 | Windows 环境下通过 Word COM 直接打开 `.doc` 并原格式写入保存 |
|
||||||
|
| LibreOffice UNO/API | 通过 LibreOffice API 直接处理旧版 Word,而不只作为离线预转换 |
|
||||||
|
| 转换兜底 | 当原生处理不可用时,可作为兜底手段,但不能作为需求定义中的唯一能力 |
|
||||||
|
|
||||||
|
如运行环境不具备 `.doc` 写入能力,工作流应明确失败原因或降级提示,不应静默输出未改写文件。
|
||||||
|
|
||||||
|
### 6.5 CH1.11.1 符合标准的清单
|
||||||
|
|
||||||
|
生成规则:
|
||||||
|
|
||||||
|
| 来源 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书明确出现的标准号 | 可直接写入,并记录来源片段 |
|
||||||
|
| RAG/法规知识库命中的候选标准 | 可作为候选写入或追溯提示,但需高亮待确认 |
|
||||||
|
| 样例中的标准清单 | 不可无条件沿用 |
|
||||||
|
| 无法确认的标准 | 填 `/` 并黄底 |
|
||||||
|
|
||||||
|
法规材料目录中存在 `医疗器械注册申报资料和批准证明文件格式要求(体外诊断试剂).doc`、`体外诊断试剂注册申报资料要求及说明.doc`、`体外诊断试剂安全和性能基本原则清单.doc` 等材料。其中安全和性能基本原则清单属于第3章非临床资料,不直接等同于 `CH1.11.1 符合标准的清单`。系统应优先查询已上传 RAG/法规知识库来确认标准清单要求;未命中时不得强行套用样例标准。
|
||||||
|
|
||||||
|
### 6.6 CH1.11.5 真实性声明
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 正文结构 | 保留样例结构 |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 公司名/申请人 | 填 `/` 并黄底 |
|
||||||
|
| 日期 | 使用当天日期 |
|
||||||
|
| 材料列表 | 沿用样例材料列表 |
|
||||||
|
|
||||||
|
### 6.7 CH1.11.6 符合性声明
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 正文结构 | 保留样例结构 |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 公司名/申请人 | 填 `/` 并黄底 |
|
||||||
|
| 日期 | 使用当天日期 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、工作流设计
|
||||||
|
|
||||||
|
### 7.1 主流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户上传或选择产品说明书
|
||||||
|
-> 用户触发“根据说明书生成第1章监管信息”
|
||||||
|
-> 系统通过规则和 LLM 判断工作流意图
|
||||||
|
-> 创建 regulatory_info_package 批次
|
||||||
|
-> 校验输入说明书
|
||||||
|
-> 复制第1章监管信息样例模板到批次目录
|
||||||
|
-> 抽取说明书文本、段落和表格
|
||||||
|
-> 规则/代码抽取字段
|
||||||
|
-> LLM 结构化抽取字段
|
||||||
|
-> 合并字段并识别缺失、LLM-only 和冲突项
|
||||||
|
-> 生成 7 个目标文件
|
||||||
|
-> 对缺失项、LLM-only 项和冲突项进行高亮
|
||||||
|
-> 生成追溯清单
|
||||||
|
-> 打包第1章监管信息 zip
|
||||||
|
-> 写入导出记录
|
||||||
|
-> 对话框展示 zip 主下载入口、单文件下载和待确认摘要
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 节点建议
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 成功条件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | 找到唯一说明书输入 |
|
||||||
|
| template_copy | 复制模板 | 7 个样例模板复制到批次目录 |
|
||||||
|
| text_extract | 抽取说明书 | 提取说明书段落和表格 |
|
||||||
|
| field_extract | 抽取字段 | 规则和 LLM 抽取结果均留底 |
|
||||||
|
| field_merge | 合并字段 | 输出最终字段、缺失项、LLM-only 项和冲突项 |
|
||||||
|
| generate_docs | 生成材料 | 7 个文件生成完成 |
|
||||||
|
| highlight_review_items | 标记待确认 | 缺失项、LLM-only、冲突项完成高亮 |
|
||||||
|
| trace_export | 追溯清单 | 生成 JSON/Excel 追溯清单 |
|
||||||
|
| zip_export | 打包下载 | 生成 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| completed | 完成 | 更新批次状态并返回下载摘要 |
|
||||||
|
|
||||||
|
### 7.3 状态建议
|
||||||
|
|
||||||
|
| 状态 | 含义 |
|
||||||
|
| --- | --- |
|
||||||
|
| pending | 已创建,等待执行 |
|
||||||
|
| running | 执行中 |
|
||||||
|
| waiting_user | 多个说明书或缺少说明书,等待用户确认 |
|
||||||
|
| success | zip 和必要单文件生成成功 |
|
||||||
|
| partial_success | zip 已生成,但部分 `.doc`、追溯清单或高亮处理失败 |
|
||||||
|
| failed | 关键文件均未生成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、数据与产物
|
||||||
|
|
||||||
|
### 8.1 批次数据
|
||||||
|
|
||||||
|
建议新增独立批次模型或等价数据结构,记录:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch_no | RIP 批次号 |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| conversation | 所属对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_instruction_file | 输入说明书 |
|
||||||
|
| product_name | 抽取到的产品名称 |
|
||||||
|
| status | 批次状态 |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
| missing_fields | 缺失字段清单 |
|
||||||
|
| llm_only_fields | 仅 LLM 命中的字段 |
|
||||||
|
| conflict_fields | 冲突字段 |
|
||||||
|
| risk_notes | `.doc` 处理、标准清单待确认等风险提示 |
|
||||||
|
|
||||||
|
### 8.2 追溯清单
|
||||||
|
|
||||||
|
追溯清单至少记录:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| target_file | 目标文件 |
|
||||||
|
| target_field | 目标字段 |
|
||||||
|
| final_value | 写入值 |
|
||||||
|
| extraction_source | rule、llm、missing、rag_candidate |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| highlight_reason | missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需要负责人确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、界面与交互
|
||||||
|
|
||||||
|
### 9.1 对话回复
|
||||||
|
|
||||||
|
工作流完成后,对话框展示:
|
||||||
|
|
||||||
|
| 信息 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | RIP 批次号 |
|
||||||
|
| 产品名称 | 抽取到的产品名称 |
|
||||||
|
| 主下载 | `第1章 监管信息(预生成版).zip` |
|
||||||
|
| 单文件下载 | 7 个文件列表 |
|
||||||
|
| 待确认摘要 | 缺失字段数、LLM-only 字段数、冲突字段数 |
|
||||||
|
| `.doc` 状态 | CH1.9 是否成功完成 `.doc` 写入 |
|
||||||
|
| 标准清单提示 | 标准来源和待确认说明 |
|
||||||
|
|
||||||
|
### 9.2 工作流卡片
|
||||||
|
|
||||||
|
前端需新增 `regulatory_info_package` 工作流卡片,展示节点状态和导出结果。对话框底部新增快捷唤起提示词:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、异常与降级
|
||||||
|
|
||||||
|
| 异常场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 未上传说明书 | 提示用户上传产品说明书 |
|
||||||
|
| 多个说明书候选 | 进入 waiting_user,提示选择 |
|
||||||
|
| 产品名称未抽到 | 目标文件产品名位置填 `/` 并黄底 |
|
||||||
|
| 企业信息缺失 | 相关位置填 `/` 并黄底 |
|
||||||
|
| LLM 调用失败 | 使用规则抽取结果继续生成,并记录风险提示 |
|
||||||
|
| 规则抽取失败 | 使用 LLM 结果继续生成,LLM-only 字段高亮 |
|
||||||
|
| RAG/法规知识库不可用 | 标准清单不自动套用样例,写入 `/` 并黄底 |
|
||||||
|
| `.doc` 原生处理失败 | 批次标记 partial_success 或 failed,明确提示 CH1.9 处理失败原因 |
|
||||||
|
| zip 打包失败 | 保留单文件下载,并提示压缩包生成失败 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 触发识别 | 用户输入“根据说明书生成第1章监管信息”可启动 `regulatory_info_package` |
|
||||||
|
| 2 | LLM 路由 | 非固定话术但语义明确时,可由 LLM 判断进入本工作流 |
|
||||||
|
| 3 | 输入选择 | 单说明书可直接执行,多说明书进入待确认 |
|
||||||
|
| 4 | 输出文件 | 生成 7 个与样例同名或同语义的第1章文件 |
|
||||||
|
| 5 | zip 下载 | 生成 `第1章 监管信息(预生成版).zip` 作为主下载入口 |
|
||||||
|
| 6 | 单文件下载 | 7 个生成文件均可单独下载 |
|
||||||
|
| 7 | 产品名称替换 | 目录、申请表、声明类文件中的产品名称替换为说明书产品名称 |
|
||||||
|
| 8 | 产品列表 | CH1.5 使用样例表头展开说明书组成成分,货号填 `/` 并黄底 |
|
||||||
|
| 9 | 缺失项高亮 | 系统新填入的 `/` 均有黄色底色 |
|
||||||
|
| 10 | LLM-only 高亮 | 代码未抽到但 LLM 抽到的字段在文件中高亮 |
|
||||||
|
| 11 | 标准清单 | 不无条件沿用样例标准;无法确认时填 `/` 并黄底 |
|
||||||
|
| 12 | 日期 | 声明类文件日期使用当天日期 |
|
||||||
|
| 13 | `.doc` 支持 | CH1.9 `.doc` 具备与 `.docx` 等价的处理能力,失败时明确提示 |
|
||||||
|
| 14 | 追溯清单 | 输出字段来源、抽取方式和高亮原因 |
|
||||||
|
| 15 | 权限隔离 | 用户只能访问自己对话下的批次和导出文件 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、已确认结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 输出范围固定为样例第1章监管信息目录下的 7 个文件 |
|
||||||
|
| D2 | 样例文件作为模板使用,不只是效果参考 |
|
||||||
|
| D3 | 企业信息、申请人信息缺失时不沿用样例公司,填 `/` 并黄底 |
|
||||||
|
| D4 | 管理类别、分类编码、临床评价路径等无法从说明书确认的信息填 `/` 并黄底 |
|
||||||
|
| D5 | 产品列表货号留空,填 `/` 并黄底 |
|
||||||
|
| D6 | 标准清单不得无条件沿用样例,优先从说明书和 RAG/法规知识库确认 |
|
||||||
|
| D7 | 声明日期使用当天日期 |
|
||||||
|
| D8 | 新建独立工作流,可复用原自动填表工作流拆出的 skill/service |
|
||||||
|
| D9 | 需求分析文档新增为 `docs/1.需求分析/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D10 | zip 作为主入口,单文件作为辅助下载 |
|
||||||
|
| D11 | 对话框底部增加工作流唤起提示词 |
|
||||||
|
| D12 | 模板优先字段化,使用内容控件 Tag 或稳定占位符服务 Agent/代码填充,行标签定位仅作为兜底 |
|
||||||
|
| D13 | `.doc` 要按能力驱动实现与 `.docx` 等价能力;原生能力不可用时允许 `.docx` 兜底并明确提示 |
|
||||||
|
| D14 | 触发判断需要引入 LLM,不只依赖固定关键词 |
|
||||||
873
docs/2.功能设计/5.第1章监管信息材料包生成.md
Normal file
873
docs/2.功能设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,873 @@
|
|||||||
|
# 第1章监管信息材料包生成功能设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考功能设计 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计目标
|
||||||
|
|
||||||
|
新增独立工作流 `regulatory_info_package`,用于根据产品说明书生成第1章监管信息材料包。用户在对话中上传或选择一个产品说明书,发送“根据说明书生成第1章监管信息”等指令后,系统复制 `docs/0.原始材料/第1章 监管信息` 下的 7 个样例模板,抽取说明书中的产品关键信息,生成一套新的第1章监管信息文件,并打包为 `第1章 监管信息(预生成版).zip` 作为主下载入口。
|
||||||
|
|
||||||
|
本功能与 `application_form_fill` 平级,不复用其 workflow_type 和批次表;但复用其已形成的服务思想和部分可拆能力,包括字段抽取、LLM 调用、Word 写入、追溯清单、导出下载、通知、工作流事件和前端卡片。
|
||||||
|
|
||||||
|
本期重点实现:
|
||||||
|
|
||||||
|
| 目标 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 新增 `regulatory_info_package` 批次、节点和卡片 |
|
||||||
|
| 单说明书输入 | 直接从当前对话 active 附件中选择唯一说明书;兼容最近成功文件汇总批次 |
|
||||||
|
| 模板驱动 | 通过 YAML 配置维护 7 个模板、字段映射和生成策略 |
|
||||||
|
| 模板字段化 | 优先使用 Word 内容控件 Tag 或稳定占位符,让代码只写字段值,最大限度保留原格式 |
|
||||||
|
| 规则 + LLM 并行抽取 | 代码抽取与 LLM 抽取并行,合并后写入模板 |
|
||||||
|
| 待确认高亮 | 系统新填入的 `/`、LLM-only 字段、冲突字段均高亮 |
|
||||||
|
| `.doc` 等价处理 | 设计 `LegacyWordDocumentService`,按能力驱动提供与 `.docx` 一致的文档操作接口;原生能力不可用时明确兜底 |
|
||||||
|
| zip 主输出 | 扩展 `ExportedSummaryFile.ExportType.ZIP`,统一下载权限 |
|
||||||
|
| LLM 意图路由 | 扩展路由 action,支持固定话术和 LLM 语义判断 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中内容 | 设计处理 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 后端开发规范 | 服务层职责清晰、接口响应统一、记录必要日志 | Django 项目沿用现有 JsonResponse/SSE 模式;服务拆入独立模块,记录批次与节点日志 |
|
||||||
|
| GYRX 前端开发规范 | 前端样式复用、交互一致、下载图标语义 | 当前项目为 Django 模板 + 原生 JS,按现有工具 chip、工作流卡片和下载链接风格扩展 |
|
||||||
|
| 既有自动填表设计 | 独立工作流、YAML 配置、字段抽取、追溯清单、导出记录 | 复用模式,不复用批次表和 workflow_type |
|
||||||
|
| 需求分析确认 | `.doc` 不只依赖转换、zip 主入口、LLM-only 高亮 | 在服务抽象和验收标准中作为强约束 |
|
||||||
|
|
||||||
|
冲突裁决:GYRX 规范中部分 Java/Spring 约束不适用于当前 Django 项目,按当前项目既有 Django 架构落地;通用原则如服务拆分、日志、权限和前端交互一致性继续采用。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、与既有功能关系
|
||||||
|
|
||||||
|
### 3.1 复用边界
|
||||||
|
|
||||||
|
| 能力 | 处理方式 | 现有代码/模块 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 对话与消息 | 复用 | `Conversation`、`Message`、`stream_message` |
|
||||||
|
| 附件上传 | 复用 | `FileAttachment`、`file_summary.storage` |
|
||||||
|
| 文件汇总结果 | 兼容复用 | `FileSummaryBatch`、`FileSummaryItem` |
|
||||||
|
| 文本抽取 | 复用并扩展 | `regulatory_review/services/text_extract.py`、`rag_index.py` |
|
||||||
|
| LLM 调用 | 复用 | `review_agent/llm.py` |
|
||||||
|
| 知识库搜索 | 复用系统现有能力 | `knowledge_base.py`、法规 RAG 相关服务 |
|
||||||
|
| 导出下载 | 扩展复用 | `ExportedSummaryFile`、`file_summary.views.export_download` |
|
||||||
|
| 工作流事件 | 复用 | `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
| 通知 | 复用统一通知链路 | `review_agent.notifications` |
|
||||||
|
| 前端卡片 | 扩展复用 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
|
||||||
|
### 3.2 新增边界
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立批次 | 新增 `RegulatoryInfoPackageBatch`,批次号 `RIP-...` |
|
||||||
|
| 独立产物 | 新增 `RegulatoryInfoPackageArtifact` 记录模板副本、抽取结果、生成文件、zip 和追溯清单 |
|
||||||
|
| 独立通知记录 | 新增 `RegulatoryInfoPackageNotificationRecord`,结构与自动填表通知保持一致 |
|
||||||
|
| 模板配置 | 新增 `regulatory_info_package_templates_v1.yaml` |
|
||||||
|
| 说明书选择 | 新增输入选择服务,优先从 active 附件选择,兼容文件汇总批次 |
|
||||||
|
| 材料包生成 | 新增 7 个文件的生成策略和 zip 打包服务 |
|
||||||
|
| `.doc` 适配 | 新增旧版 Word 文档适配层 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、总体架构
|
||||||
|
|
||||||
|
### 4.1 目录结构
|
||||||
|
|
||||||
|
新增模块:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
regulatory_info_package/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
events.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
input_select.py
|
||||||
|
template_config.py
|
||||||
|
template_repository.py
|
||||||
|
instruction_extract.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
standard_candidates.py
|
||||||
|
document_writer.py
|
||||||
|
docx_document.py
|
||||||
|
legacy_doc_document.py
|
||||||
|
package_generate.py
|
||||||
|
traceability_export.py
|
||||||
|
zip_export.py
|
||||||
|
summary.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
regulatory_info_package_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
router_intent.md
|
||||||
|
standard_candidate.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 逻辑架构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["AI 对话页"] --> B["意图路由"]
|
||||||
|
B --> C{"action = regulatory_info_package"}
|
||||||
|
C --> D["RegulatoryInfoPackageBatch"]
|
||||||
|
D --> E["RegulatoryInfoPackageWorkflowExecutor"]
|
||||||
|
E --> F["输入说明书选择"]
|
||||||
|
E --> G["模板配置 YAML"]
|
||||||
|
F --> H["说明书文本与表格抽取"]
|
||||||
|
H --> I1["规则/代码抽取"]
|
||||||
|
H --> I2["LLM 结构化抽取"]
|
||||||
|
I1 --> J["字段合并与高亮决策"]
|
||||||
|
I2 --> J
|
||||||
|
J --> K["标准候选服务"]
|
||||||
|
J --> L["材料包生成服务"]
|
||||||
|
K --> L
|
||||||
|
L --> M1["DOCX 文档适配器"]
|
||||||
|
L --> M2["Legacy DOC 文档适配器"]
|
||||||
|
M1 --> N["7 个目标文件"]
|
||||||
|
M2 --> N
|
||||||
|
N --> O["追溯清单"]
|
||||||
|
N --> P["ZIP 打包"]
|
||||||
|
O --> Q["ExportedSummaryFile"]
|
||||||
|
P --> Q
|
||||||
|
E --> R["WorkflowEvent/SSE"]
|
||||||
|
E --> S["通知服务"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 技术选型
|
||||||
|
|
||||||
|
| 设计项 | 本期方案 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django | 沿用当前项目 |
|
||||||
|
| 工作流执行 | 轻量 Executor + 后台线程 | 与文件汇总、法规核查、自动填表一致 |
|
||||||
|
| 工作流状态 | `WorkflowNodeRun`、`WorkflowEvent` | 使用 `workflow_type=regulatory_info_package` |
|
||||||
|
| 模板配置 | YAML | 便于维护 7 个模板和字段映射 |
|
||||||
|
| `.docx` 操作 | `python-docx` | 表格、段落、run、底色和字体可控 |
|
||||||
|
| `.doc` 操作 | 适配器抽象 | Python 标准库不支持 `.doc` 二进制 Word 写入;设计为 COM/UNO/第三方库适配器,能力不可用时使用可追溯的 `.docx` 兜底 |
|
||||||
|
| zip 打包 | Python `zipfile` 标准库 | 标准库可满足打包需求 |
|
||||||
|
| Excel 追溯 | `openpyxl` | 复用现有依赖 |
|
||||||
|
| LLM | `review_agent.llm.generate_completion` | 统一模型调用 |
|
||||||
|
| 知识库 | 系统现有知识库/RAG | 不新增单独 RAG 模块 |
|
||||||
|
|
||||||
|
关于 `.doc`:Python 自带库不能实现类似 Apache POI HWPF 的 Word 97-2003 二进制文档完整读写。项目依赖中有 `olefile`,可读取 OLE 复合文档结构,但不足以可靠修改 Word 文本、表格和样式。因此设计上必须使用文档适配器屏蔽实现差异,底层可选 Word COM、LibreOffice UNO、专用第三方库或受控转换兜底。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、触发与路由设计
|
||||||
|
|
||||||
|
### 5.1 action 扩展
|
||||||
|
|
||||||
|
`skill_router.py` 扩展:
|
||||||
|
|
||||||
|
| 项 | 设计 |
|
||||||
|
| --- | --- |
|
||||||
|
| 新 action | `regulatory_info_package` |
|
||||||
|
| 新属性 | `starts_regulatory_info_package` |
|
||||||
|
| ROUTE_ACTIONS | 增加 `regulatory_info_package` |
|
||||||
|
| LLM prompt | 描述该 action 用于“根据说明书生成第1章监管信息、监管信息材料包、申请表/产品列表/声明材料包” |
|
||||||
|
|
||||||
|
### 5.2 固定规则
|
||||||
|
|
||||||
|
规则预判关键词:
|
||||||
|
|
||||||
|
```python
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS = [
|
||||||
|
"根据说明书生成第1章监管信息",
|
||||||
|
"生成监管信息材料包",
|
||||||
|
"从说明书生成第1章材料",
|
||||||
|
"第1章监管信息",
|
||||||
|
"监管信息材料包",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
规则命中时直接进入本工作流。规则未命中时,继续走 LLM 路由判断,避免自然表达漏触发。
|
||||||
|
|
||||||
|
### 5.3 对话启动
|
||||||
|
|
||||||
|
`review_agent/services.py::stream_message` 增加分支:
|
||||||
|
|
||||||
|
```text
|
||||||
|
if route.starts_regulatory_info_package:
|
||||||
|
-> 选择说明书输入
|
||||||
|
-> 创建 RegulatoryInfoPackageBatch
|
||||||
|
-> start_regulatory_info_package_workflow
|
||||||
|
-> SSE workflow_started
|
||||||
|
-> 回复“已启动第1章监管信息材料包生成工作流,批次号:RIP-...”
|
||||||
|
```
|
||||||
|
|
||||||
|
如果没有 active 附件,也没有可复用的最近文件汇总批次,则回复“请先上传产品说明书”。
|
||||||
|
如果存在多个候选说明书且用户消息无法唯一命中文件名,则不展示选择弹窗,由对话反问用户确认具体文件名后再启动工作流。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、输入选择设计
|
||||||
|
|
||||||
|
### 6.1 选择优先级
|
||||||
|
|
||||||
|
| 优先级 | 来源 | 规则 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 用户消息指定文件名 | 按 active 附件名或可复用文件名模糊匹配,唯一命中则使用 |
|
||||||
|
| 2 | 当前对话 active 附件 | 文件名包含“说明书”且扩展名为 `.docx` |
|
||||||
|
| 3 | 当前对话 active 附件 | 唯一 `.docx` 文件 |
|
||||||
|
| 4 | 最近成功 `FileSummaryBatch.items` | 文件名包含“说明书”且扩展名为 `.docx` |
|
||||||
|
| 5 | 无法唯一选择 | 对话反问用户确认使用哪个说明书;必要时批次进入 `waiting_user` |
|
||||||
|
|
||||||
|
本期直接输入只支持 `.docx` 产品说明书。`.doc`、PDF、扫描件说明书作为后续扩展;但输出模板中的 `.doc` 必须支持。
|
||||||
|
|
||||||
|
### 6.2 输入绑定
|
||||||
|
|
||||||
|
批次记录:
|
||||||
|
|
||||||
|
| 字段 | 来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| source_attachment | 直接选择的 FileAttachment |
|
||||||
|
| source_summary_batch | 可选,来自最近成功文件汇总 |
|
||||||
|
| source_summary_item | 可选,来自汇总条目 |
|
||||||
|
| source_file_name | 原始说明书文件名 |
|
||||||
|
| source_storage_path | 说明书存储路径 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、模板配置设计
|
||||||
|
|
||||||
|
配置路径:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
配置结构:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: regulatory_info_package_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/第1章 监管信息
|
||||||
|
output_zip_name: 第1章 监管信息(预生成版).zip
|
||||||
|
templates:
|
||||||
|
- code: ch1_2_directory
|
||||||
|
output_name: CH1.2 监管信息目录.docx
|
||||||
|
source_file: CH1.2 监管信息目录.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: directory
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
targets:
|
||||||
|
- type: paragraph_contains_replace
|
||||||
|
match: 呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)
|
||||||
|
- code: ch1_4_application_form
|
||||||
|
output_name: CH1.4 申请表.docx
|
||||||
|
source_file: CH1.4 申请表.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: application_form
|
||||||
|
include_in_zip: true
|
||||||
|
- code: ch1_9_pre_submission
|
||||||
|
output_name: CH1.9 产品申报前沟通的说明.doc
|
||||||
|
source_file: CH1.9 产品申报前沟通的说明.doc
|
||||||
|
file_format: doc
|
||||||
|
strategy: pre_submission
|
||||||
|
prefer_legacy_doc_native: true
|
||||||
|
allow_docx_fallback: true
|
||||||
|
include_in_zip: true
|
||||||
|
```
|
||||||
|
|
||||||
|
字段映射优先级:
|
||||||
|
|
||||||
|
| 目标类型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| content_control_tag | 正式模板优先,代码按 Word 内容控件 Tag 写入 |
|
||||||
|
| placeholder | 过渡方案,替换稳定占位符并保留原 run/段落格式 |
|
||||||
|
| table_row_label | 未字段化模板的兜底方案,必须保留原单元格格式 |
|
||||||
|
|
||||||
|
### 7.1 配置项说明
|
||||||
|
|
||||||
|
| 配置项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| version | 配置版本,写入批次 |
|
||||||
|
| source_dir | 样例模板目录 |
|
||||||
|
| output_zip_name | zip 主输出文件名 |
|
||||||
|
| templates | 7 个目标模板 |
|
||||||
|
| code | 模板编码 |
|
||||||
|
| output_name | 生成文件名 |
|
||||||
|
| source_file | 样例文件 |
|
||||||
|
| file_format | docx/doc |
|
||||||
|
| strategy | 生成策略 |
|
||||||
|
| include_in_zip | 是否进入 zip |
|
||||||
|
| fields | 字段映射与替换目标 |
|
||||||
|
| prefer_legacy_doc_native | `.doc` 是否优先尝试原生处理能力 |
|
||||||
|
| allow_docx_fallback | 原生 `.doc` 能力不可用或失败时是否允许 `.docx` 兜底 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、字段抽取设计
|
||||||
|
|
||||||
|
### 8.1 说明书解析
|
||||||
|
|
||||||
|
`instruction_extract.py` 输出:
|
||||||
|
|
||||||
|
| 数据 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| paragraphs | 按顺序提取段落 |
|
||||||
|
| sections | 按 `【章节名】` 切分 |
|
||||||
|
| tables | 提取表格二维数据 |
|
||||||
|
| component_tables | 识别主要组成成分表 |
|
||||||
|
| front_text | 前 4000 字,供 LLM 使用 |
|
||||||
|
|
||||||
|
### 8.2 规则抽取
|
||||||
|
|
||||||
|
规则抽取覆盖:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| product_name | `【产品名称】` 下一段 |
|
||||||
|
| package_specification | `【包装规格】` 到下一章节 |
|
||||||
|
| intended_use | `【预期用途】` 到下一章节 |
|
||||||
|
| detection_principle | `【检测原理】` 到下一章节 |
|
||||||
|
| main_components | `【主要组成成分】` 表格摘要 |
|
||||||
|
| storage_condition_and_validity | `【储存条件及有效期】` 到下一章节 |
|
||||||
|
| sample_type | `样本要求` 中“适用样本类型” |
|
||||||
|
| detection_targets | 从预期用途/检测原理中抽取基因、病原体、靶标 |
|
||||||
|
| applicable_instruments | `【适用仪器】` 到下一章节 |
|
||||||
|
| test_method | `【检验方法】` 摘要 |
|
||||||
|
| standards | 正则抽取 `GB/T`、`YY/T`、`YY`、`GB` 等标准号 |
|
||||||
|
|
||||||
|
### 8.3 LLM 抽取
|
||||||
|
|
||||||
|
LLM prompt 要求只输出 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "product_name",
|
||||||
|
"label": "产品名称",
|
||||||
|
"value": "...",
|
||||||
|
"evidence": "...",
|
||||||
|
"confidence": 0.9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"product_list_rows": [
|
||||||
|
{
|
||||||
|
"package_specification": "...",
|
||||||
|
"composition": "...",
|
||||||
|
"component_name": "...",
|
||||||
|
"main_component": "...",
|
||||||
|
"quantity": "..."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"standards": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
LLM 不允许填企业信息、分类编码、管理类别、临床评价路径等说明书无法证明的内容。
|
||||||
|
|
||||||
|
### 8.4 字段合并
|
||||||
|
|
||||||
|
`field_merge.py` 输出 `MergedField`:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| key | 字段编码 |
|
||||||
|
| label | 中文名 |
|
||||||
|
| value | 最终写入值 |
|
||||||
|
| source | rule、llm、missing、conflict |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| confidence | 置信度 |
|
||||||
|
| highlight_reason | none、missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需人工复核 |
|
||||||
|
|
||||||
|
合并规则:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| rule 与 LLM 一致 | 采用值,不高亮 |
|
||||||
|
| rule 与 LLM 不一致 | 采用规则优先或配置优先,标记 conflict |
|
||||||
|
| rule 缺失、LLM 命中 | 采用 LLM 值,标记 llm_only |
|
||||||
|
| 全部缺失 | 写 `/`,标记 missing |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、文档生成设计
|
||||||
|
|
||||||
|
### 9.1 文档适配器接口
|
||||||
|
|
||||||
|
`document_writer.py` 定义统一接口:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DocumentAdapter:
|
||||||
|
def replace_text(self, old: str, new: str, *, highlight: bool = False) -> int: ...
|
||||||
|
def fill_table_cell(self, row_label: str, value: str, *, highlight: bool = False) -> bool: ...
|
||||||
|
def replace_table(self, marker: str, rows: list[dict], *, highlight_columns: list[str] = None) -> bool: ...
|
||||||
|
def highlight_value(self, value: str, reason: str) -> int: ...
|
||||||
|
def save(self, path: Path) -> Path: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
`.docx` 使用 `DocxDocumentAdapter`。`.doc` 使用 `LegacyDocDocumentAdapter`。
|
||||||
|
|
||||||
|
### 9.2 `.docx` 处理
|
||||||
|
|
||||||
|
能力:
|
||||||
|
|
||||||
|
| 能力 | 实现 |
|
||||||
|
| --- | --- |
|
||||||
|
| 段落替换 | 遍历 paragraph runs |
|
||||||
|
| 表格行填充 | 按首列 label 定位 |
|
||||||
|
| 单元格高亮 | `w:shd` 黄色底色 |
|
||||||
|
| 字体颜色 | 冲突项可红色字体 |
|
||||||
|
| 产品列表重建 | 清空目标表格数据行后追加 |
|
||||||
|
| 声明日期替换 | 按日期正则或段落末尾替换 |
|
||||||
|
|
||||||
|
### 9.3 `.doc` 处理
|
||||||
|
|
||||||
|
设计 `LegacyDocDocumentAdapter`,对外提供与 `.docx` 一致能力。底层按可用性选择适配器:
|
||||||
|
|
||||||
|
| 适配器 | 定位 |
|
||||||
|
| --- | --- |
|
||||||
|
| `WordComDocAdapter` | Windows + Microsoft Word 环境下优先,直接打开 `.doc`、查找替换、设置高亮并保存 `.doc` |
|
||||||
|
| `LibreOfficeUnoDocAdapter` | LibreOffice UNO/API 环境下使用,直接操作文档模型 |
|
||||||
|
| `OleDocReadOnlyAdapter` | 仅可读取时用于诊断,不满足写入验收 |
|
||||||
|
| `ConversionFallbackAdapter` | 兜底路径,可转换为 `.docx` 后处理,但不能作为唯一实现 |
|
||||||
|
|
||||||
|
功能设计约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 不静默降级 | `.doc` 原生写入失败时必须记录适配器失败原因,随后尝试 `.docx` 兜底;兜底仍失败时该文件失败并触发 partial_success |
|
||||||
|
| 不只靠转换 | 转换可作为兜底,但设计主路径必须是文档适配器 |
|
||||||
|
| 能力探测 | 启动时或节点执行时检测适配器可用性 |
|
||||||
|
| 追溯记录 | 写入 `.doc` 的适配器类型和失败信息写入 artifact metadata |
|
||||||
|
|
||||||
|
### 9.4 7 个文件生成策略
|
||||||
|
|
||||||
|
| 模板 | 策略服务 | 关键动作 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| CH1.2 监管信息目录 | `generate_directory_doc` | 替换产品名称;页码沿用样例 |
|
||||||
|
| CH1.4 申请表 | `generate_application_form_doc` | 填表格行;缺失字段 `/` 黄底 |
|
||||||
|
| CH1.5 产品列表 | `generate_product_list_doc` | 使用样例表头重建产品列表;货号 `/` 黄底 |
|
||||||
|
| CH1.9 申报前沟通说明 | `generate_pre_submission_doc` | `.doc` 原生替换产品名和公司名;原生失败则输出 `.docx` 兜底文件;两者均失败才不进入 zip |
|
||||||
|
| CH1.11.1 符合标准清单 | `generate_standard_list_doc` | 说明书标准号直接写;候选/缺失高亮 |
|
||||||
|
| CH1.11.5 真实性声明 | `generate_authenticity_statement_doc` | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
| CH1.11.6 符合性声明 | `generate_compliance_statement_doc` | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
|
||||||
|
`generate_docs` 节点内部允许多线程并发处理 7 个目标文件。每个文档使用独立模板副本,子线程只返回生成结果,数据库 artifact/export 记录由主线程统一写入,避免并发写库和共享文件冲突。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、标准清单设计
|
||||||
|
|
||||||
|
系统中已有知识库/RAG 能力,不新增单独 RAG 模块。本功能只新增 `standard_candidates.py` 作为业务服务,调用既有知识库搜索能力。
|
||||||
|
|
||||||
|
处理规则:
|
||||||
|
|
||||||
|
| 来源 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书明确标准号 | 写入标准清单,记录 `source=instruction` |
|
||||||
|
| 知识库候选标准 | 可写入候选区或追溯清单,标记 `rag_candidate` 并高亮 |
|
||||||
|
| 无命中 | 写 `/` 并黄底 |
|
||||||
|
| 样例标准 | 不无条件沿用 |
|
||||||
|
|
||||||
|
查询建议:
|
||||||
|
|
||||||
|
```text
|
||||||
|
体外诊断试剂 核酸扩增 检测试剂 标准 清单
|
||||||
|
新型冠状病毒 2019-nCoV 核酸检测试剂盒 荧光PCR 标准
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、zip 与导出设计
|
||||||
|
|
||||||
|
### 11.1 ExportType 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 content type 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
"zip": "application/zip"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 11.2 导出记录
|
||||||
|
|
||||||
|
| 文件 | export_category | export_type |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 第1章 监管信息(预生成版).zip | regulatory_info_package | zip |
|
||||||
|
| 7 个生成文件 | generated_document | word 或 legacy_word |
|
||||||
|
| 追溯清单 Excel | traceability | excel |
|
||||||
|
|
||||||
|
追溯 JSON 和抽取过程 JSON 只保存到后台 `logs/` 目录和 artifact 记录,不作为用户下载入口。用户侧只提供追溯 Excel 下载。
|
||||||
|
|
||||||
|
如果不新增 `legacy_word` export_type,则 `.doc` 也可暂用 `word`,通过文件扩展名和 content type 判断下载 MIME。功能设计建议新增 content type 映射时按扩展名兜底,避免 `.doc` 被当作 `.docx`。
|
||||||
|
|
||||||
|
### 11.3 权限
|
||||||
|
|
||||||
|
`file_summary.views._export_for_user` 增加:
|
||||||
|
|
||||||
|
```text
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
查询 RegulatoryInfoPackageBatch
|
||||||
|
校验 conversation__user == request.user 且 is_deleted=False
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、数据模型设计
|
||||||
|
|
||||||
|
### 12.1 RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
字段建议:
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| conversation | FK Conversation | 所属对话 |
|
||||||
|
| user | FK User | 发起用户 |
|
||||||
|
| trigger_message | FK Message | 触发消息 |
|
||||||
|
| source_attachment | FK FileAttachment | 直接选中的说明书附件 |
|
||||||
|
| source_summary_batch | FK FileSummaryBatch | 可选文件汇总批次 |
|
||||||
|
| source_summary_item_id | PositiveBigIntegerField | 可选汇总条目 ID |
|
||||||
|
| batch_no | CharField unique | RIP 批次号 |
|
||||||
|
| status | CharField | 状态 |
|
||||||
|
| source_file_name | CharField | 说明书原文件名 |
|
||||||
|
| source_storage_path | CharField | 说明书路径 |
|
||||||
|
| product_name | CharField | 抽取产品名 |
|
||||||
|
| output_zip_name | CharField | zip 文件名 |
|
||||||
|
| generated_files | JSONField | 7 个文件状态 |
|
||||||
|
| missing_fields | JSONField | 缺失项 |
|
||||||
|
| llm_only_fields | JSONField | LLM-only 项 |
|
||||||
|
| conflict_fields | JSONField | 冲突项 |
|
||||||
|
| risk_notes | JSONField | 风险提示 |
|
||||||
|
| template_config_version | CharField | 配置版本 |
|
||||||
|
| template_config_hash | CharField | 配置 hash |
|
||||||
|
| adapter_summary | JSONField | `.doc`/`.docx` 适配器信息 |
|
||||||
|
| work_dir | CharField | 工作目录 |
|
||||||
|
| error_message | TextField | 错误信息 |
|
||||||
|
| started_at/finished_at | DateTimeField | 执行时间 |
|
||||||
|
| is_deleted | BooleanField | 软删除 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| idx_ra_rip_batch_conv_status | conversation, status |
|
||||||
|
| idx_ra_rip_batch_user_created | user, created_at |
|
||||||
|
| idx_ra_rip_batch_attachment | source_attachment |
|
||||||
|
| idx_ra_rip_batch_summary | source_summary_batch |
|
||||||
|
|
||||||
|
### 12.2 RegulatoryInfoPackageArtifact
|
||||||
|
|
||||||
|
产物类型:
|
||||||
|
|
||||||
|
| 类型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| instruction_extract | 说明书抽取结果 |
|
||||||
|
| field_extract_result | 字段抽取结果 |
|
||||||
|
| merged_fields | 合并字段 |
|
||||||
|
| generated_document | 生成文件 |
|
||||||
|
| traceability | 追溯清单 |
|
||||||
|
| zip_package | zip 包 |
|
||||||
|
| notification_record | 通知记录 |
|
||||||
|
|
||||||
|
字段与 `ApplicationFormFillArtifact` 保持一致:`batch`、`artifact_type`、`file_format`、`name`、`file_name`、`storage_path`、`file_size`、`content_hash`、`metadata`、`created_by_node`、`is_deleted`。
|
||||||
|
|
||||||
|
`file_format` 增加 `DOC`、`ZIP`。
|
||||||
|
|
||||||
|
### 12.3 RegulatoryInfoPackageNotificationRecord
|
||||||
|
|
||||||
|
结构对齐 `ApplicationFormFillNotificationRecord`:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 所属 RIP 批次 |
|
||||||
|
| recipient | 通知对象 |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| export_ids | 导出 ID |
|
||||||
|
| message_summary | 通知摘要 |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
| retry_count | 重试次数 |
|
||||||
|
| external_message_id | 外部消息 ID |
|
||||||
|
| error_message | 错误 |
|
||||||
|
| sent_at | 发送时间 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、工作流设计
|
||||||
|
|
||||||
|
### 13.1 节点定义
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 触发服务 | 成功条件 | 失败处理 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | `RegulatoryInfoPackageWorkflowExecutor` | 找到唯一说明书 | 缺失或多候选进入 waiting_user |
|
||||||
|
| template_copy | 复制模板 | `TemplateRepository` | 7 个模板进入批次目录 | 缺关键模板则 failed |
|
||||||
|
| text_extract | 抽取说明书 | `InstructionExtractService` | 提取文本、章节和表格 | 失败则 failed |
|
||||||
|
| field_extract | 抽取字段 | `FieldExtractionService` | 规则/LLM 结果留底 | LLM 失败可继续 |
|
||||||
|
| field_merge | 合并字段 | `FieldMergeService` | 输出 merged_fields | 无产品名仍继续,产品名 `/` |
|
||||||
|
| generate_docs | 生成材料 | `PackageGenerateService` | 生成 7 个文件 | 单文件失败可 partial_success |
|
||||||
|
| highlight_review_items | 标记待确认 | 文档适配器 | 缺失/LLM-only/冲突完成高亮 | 失败则对应文件失败 |
|
||||||
|
| trace_export | 追溯清单 | `TraceabilityExportService` | 生成 Excel/JSON | 不阻断 zip |
|
||||||
|
| zip_export | 打包下载 | `ZipExportService` | 生成 zip 并创建导出记录 | zip 失败则保留单文件 |
|
||||||
|
| notify | 通知 | `Notifier` | 写通知记录 | 不阻断下载 |
|
||||||
|
| completed | 完成 | Executor | 状态落定、摘要写入对话 | - |
|
||||||
|
|
||||||
|
### 13.2 状态落定
|
||||||
|
|
||||||
|
| 结果 | 批次状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 7 个文件、zip、追溯清单均成功 | success |
|
||||||
|
| zip 成功但部分单文件/追溯/通知失败 | partial_success |
|
||||||
|
| 单文件成功但 zip 失败 | partial_success |
|
||||||
|
| 关键输入或模板缺失 | failed 或 waiting_user |
|
||||||
|
| 所有目标文件生成失败 | failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、接口设计
|
||||||
|
|
||||||
|
### 14.1 URL
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/regulatory-info-package/health/
|
||||||
|
POST /api/review-agent/regulatory-info-package/start/
|
||||||
|
GET /api/review-agent/regulatory-info-package/<batch_id>/status/
|
||||||
|
POST /api/review-agent/regulatory-info-package/<batch_id>/select-input/
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.2 start
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"conversation_id": 1,
|
||||||
|
"attachment_id": 10,
|
||||||
|
"file_summary_batch_id": 20,
|
||||||
|
"source_summary_item_id": 30
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch_id": 1,
|
||||||
|
"workflow_type": "regulatory_info_package",
|
||||||
|
"batch_no": "RIP-20260610153000-abcdef",
|
||||||
|
"status": "pending"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.3 status
|
||||||
|
|
||||||
|
响应包含:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 批次基础信息、产品名、缺失数、LLM-only 数、冲突数 |
|
||||||
|
| nodes | 工作流节点 |
|
||||||
|
| generated_files | 7 个文件状态 |
|
||||||
|
| exports | zip、单文件、追溯清单下载 |
|
||||||
|
| missing_fields | 缺失项摘要 |
|
||||||
|
| llm_only_fields | LLM-only 摘要 |
|
||||||
|
| conflict_fields | 冲突摘要 |
|
||||||
|
| risk_notes | 风险提示 |
|
||||||
|
| notifications | 通知记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、前端设计
|
||||||
|
|
||||||
|
### 15.1 对话框底部快捷提示
|
||||||
|
|
||||||
|
`templates/home.html` 增加 tool chip:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
点击后填入 prompt,不自动发送,保持现有交互一致。
|
||||||
|
|
||||||
|
### 15.2 工作流卡片
|
||||||
|
|
||||||
|
`build_workflow_cards()` 增加 RIP 批次,前端复用现有卡片样式,展示:
|
||||||
|
|
||||||
|
| 信息 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | RIP-... |
|
||||||
|
| 状态 | pending/running/success/partial_success/failed |
|
||||||
|
| 风险摘要 | 缺失字段 N、LLM复核 N、提示 N |
|
||||||
|
| 节点 | RIP 节点 |
|
||||||
|
|
||||||
|
### 15.3 状态轮询
|
||||||
|
|
||||||
|
`summaryPanel` 增加:
|
||||||
|
|
||||||
|
```html
|
||||||
|
data-regulatory-info-package-status-url-template="/api/review-agent/regulatory-info-package/__batch_id__/status/"
|
||||||
|
```
|
||||||
|
|
||||||
|
`static/js/app.js` 在工作流类型判断中增加 `regulatory_info_package`。
|
||||||
|
|
||||||
|
### 15.4 结果展示
|
||||||
|
|
||||||
|
状态 payload 中 `exports` 按类别展示:
|
||||||
|
|
||||||
|
| 类别 | 展示 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip | 主下载按钮 |
|
||||||
|
| generated_document | 单文件下载列表 |
|
||||||
|
| traceability | 追溯清单下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、通知设计
|
||||||
|
|
||||||
|
复用统一通知服务,新增 `build_regulatory_info_package_context(batch)`:
|
||||||
|
|
||||||
|
| 摘要项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流 | 第1章监管信息材料包生成 |
|
||||||
|
| 批次号 | RIP-... |
|
||||||
|
| 产品名称 | 抽取产品名 |
|
||||||
|
| 导出文件 | zip + 单文件数量 |
|
||||||
|
| 待确认 | 缺失项、LLM-only、冲突项数量 |
|
||||||
|
| 下载提示 | 进入系统下载 zip |
|
||||||
|
|
||||||
|
通知失败不影响下载。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、异常与降级
|
||||||
|
|
||||||
|
| 异常 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 未找到说明书 | 返回提示,不创建或创建 waiting_user 批次 |
|
||||||
|
| 多说明书候选 | waiting_user,等待选择 |
|
||||||
|
| YAML 配置错误 | failed,提示配置错误 |
|
||||||
|
| 样例模板缺失 | failed,列出缺失模板 |
|
||||||
|
| LLM 失败 | 使用规则抽取继续,写 risk_notes |
|
||||||
|
| 规则抽取为空 | 使用 LLM-only 继续并高亮 |
|
||||||
|
| 知识库不可用 | 标准清单填 `/` 并高亮,写 risk_notes |
|
||||||
|
| `.doc` 适配器不可用 | CH1.9 失败,批次 partial_success 或 failed,明确原因 |
|
||||||
|
| zip 打包失败 | 保留单文件下载,状态 partial_success |
|
||||||
|
| 下载文件不存在 | 返回 404,记录日志 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、安全与权限
|
||||||
|
|
||||||
|
| 控制点 | 设计 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次访问 | `conversation__user == request.user` |
|
||||||
|
| 附件访问 | 附件必须属于当前对话和当前用户 |
|
||||||
|
| 汇总批次访问 | 批次必须属于当前对话和当前用户 |
|
||||||
|
| 导出下载 | `workflow_type=regulatory_info_package` 时反查 RIP 批次 |
|
||||||
|
| 工作目录 | `media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}` |
|
||||||
|
| 路径安全 | 所有复制/输出路径必须校验位于批次工作目录内 |
|
||||||
|
| 原始模板保护 | 只读复制,不允许覆盖 `docs/0.原始材料` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、测试设计
|
||||||
|
|
||||||
|
| 测试文件 | 覆盖 |
|
||||||
|
| --- | --- |
|
||||||
|
| `tests/test_regulatory_info_package_models.py` | 批次、产物、通知、zip 导出类型 |
|
||||||
|
| `tests/test_regulatory_info_package_trigger.py` | 固定规则与 LLM 路由 |
|
||||||
|
| `tests/test_regulatory_info_package_input_select.py` | 说明书选择、多候选 waiting_user |
|
||||||
|
| `tests/test_regulatory_info_package_template_config.py` | YAML 加载、模板存在性校验 |
|
||||||
|
| `tests/test_regulatory_info_package_field_extract.py` | 说明书字段、表格、标准号抽取 |
|
||||||
|
| `tests/test_regulatory_info_package_field_merge.py` | missing、llm_only、conflict 高亮决策 |
|
||||||
|
| `tests/test_regulatory_info_package_docx_writer.py` | docx 替换、表格填充、黄底 |
|
||||||
|
| `tests/test_regulatory_info_package_legacy_doc.py` | `.doc` 适配器能力探测和失败提示 |
|
||||||
|
| `tests/test_regulatory_info_package_zip.py` | zip 只包含 success/fallback_success 文件 |
|
||||||
|
| `tests/test_regulatory_info_package_workflow.py` | 工作流节点和状态落定 |
|
||||||
|
| `tests/test_regulatory_info_package_views.py` | start/status/权限 |
|
||||||
|
| `tests/test_regulatory_info_package_frontend.py` | 卡片、快捷提示、状态 URL |
|
||||||
|
|
||||||
|
回归测试:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_*.py tests/test_file_summary_views.py tests/test_regulatory_*tests.py
|
||||||
|
```
|
||||||
|
|
||||||
|
实际执行时按项目现有测试命名拆分运行。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、实施顺序建议
|
||||||
|
|
||||||
|
| 阶段 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| RIP-1 | 模型、迁移、ExportType.ZIP、下载权限 |
|
||||||
|
| RIP-2 | 模块骨架、YAML 配置、输入说明书选择 |
|
||||||
|
| RIP-3 | 路由 action、对话启动、工作流节点 |
|
||||||
|
| RIP-4 | 说明书文本/表格抽取、规则 + LLM 字段抽取 |
|
||||||
|
| RIP-5 | docx 文档生成、黄底高亮、产品列表重建 |
|
||||||
|
| RIP-6 | `.doc` 适配器、CH1.9 处理能力 |
|
||||||
|
| RIP-7 | 追溯清单、zip 导出、助手摘要 |
|
||||||
|
| RIP-8 | 前端卡片、快捷提示、状态轮询 |
|
||||||
|
| RIP-9 | 通知、权限、全量回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十一、待确认与风险
|
||||||
|
|
||||||
|
| 风险 | 说明 | 建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `.doc` 原生写入难度 | Python 标准库不支持 Word `.doc` 完整写入 | 优先调研 Word COM 或 LibreOffice UNO;无原生能力时允许可追溯 `.docx` 兜底 |
|
||||||
|
| 模板字段化工作量 | 需要先把样例模板整理为代码可识别字段 | 优先覆盖 CH1.4、CH1.5 和声明类关键字段;缺少 Tag 时通过模板审计提前暴露 |
|
||||||
|
| 样例模板文本碎片 | Word run 拆分可能导致简单字符串替换失败 | 文档写入服务需支持跨 run 替换 |
|
||||||
|
| 产品列表结构复杂 | 说明书表格可能存在合并单元格和多规格 | 先覆盖目标说明书结构,再扩展通用表格归一化 |
|
||||||
|
| 标准清单准确性 | 说明书未必包含标准号,知识库候选不能直接作为结论 | 候选全部高亮并进入追溯清单 |
|
||||||
|
| LLM-only 风险 | LLM 推断可能过度补全 | 写入但高亮,追溯清单标记需复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十二、设计结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 功能设计文档新增为 `docs/2.功能设计/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D2 | 新增独立模块 `review_agent/regulatory_info_package/` |
|
||||||
|
| D3 | 新建独立批次、产物、通知三张表 |
|
||||||
|
| D4 | 输入选择以 active 附件为主,兼容最近成功文件汇总批次 |
|
||||||
|
| D5 | `ExportedSummaryFile.ExportType` 扩展 `zip` |
|
||||||
|
| D6 | 采用 YAML 配置驱动 7 个模板 |
|
||||||
|
| D7 | 模板字段优先使用内容控件 Tag 或稳定占位符,行标签定位仅作为兜底 |
|
||||||
|
| D8 | `.doc` 通过 `LegacyWordDocumentService` 适配器实现与 `.docx` 等价接口,原生能力不可用时允许可追溯兜底 |
|
||||||
|
| D9 | 标准候选复用系统已有知识库/RAG,不新增独立 RAG |
|
||||||
|
| D10 | 前端只扩展现有对话页、工作流卡片、快捷提示和状态轮询 |
|
||||||
|
| D11 | 本轮先产出功能设计;数据库设计先在本文档中给出,后续可拆成正式数据库设计文档 |
|
||||||
590
docs/3.数据库设计/5.第1章监管信息材料包生成.md
Normal file
590
docs/3.数据库设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,590 @@
|
|||||||
|
# 第1章监管信息材料包生成数据库设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库类型 | SQLite / Django ORM |
|
||||||
|
| 表名前缀 | ra_ |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流批次 | 第1章监管信息材料包生成使用独立批次表,不复用自动填表批次 |
|
||||||
|
| 附件优先 | 输入说明书优先绑定 `FileAttachment`,兼容最近成功 `FileSummaryBatch` 与 `FileSummaryItem` |
|
||||||
|
| 过程产物文件化 | 大 JSON、追溯清单、模板副本、生成文件和 zip 均保存为文件,数据库只保存路径、hash、摘要 |
|
||||||
|
| 导出记录复用 | zip、单文件、追溯清单继续写入 `ExportedSummaryFile`,统一下载权限 |
|
||||||
|
| 工作流通用表复用 | 节点状态和 SSE 事件复用 `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
| 通知独立留痕 | 新增专项通知记录表,结构与自动填表通知记录保持一致 |
|
||||||
|
| SQLite 兼容 | 使用 Django ORM 常规字段和 JSONField,避免数据库特定语法 |
|
||||||
|
| 原始模板保护 | 数据库只记录批次工作目录产物,不记录对原始模板的写操作 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、ER 图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
AUTH_USER ||--o{ CONVERSATION : owns
|
||||||
|
CONVERSATION ||--o{ MESSAGE : contains
|
||||||
|
CONVERSATION ||--o{ RA_FILE_ATTACHMENT : has
|
||||||
|
CONVERSATION ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : has
|
||||||
|
AUTH_USER ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : runs
|
||||||
|
MESSAGE ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : triggers
|
||||||
|
RA_FILE_ATTACHMENT ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : provides_instruction
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : optionally_feeds
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_ARTIFACT : keeps
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_NOTIFICATION_RECORD : sends
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_EXPORTED_SUMMARY_FILE : exports
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_WORKFLOW_NODE_RUN : tracks
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_WORKFLOW_EVENT : emits
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:`ra_workflow_node_run`、`ra_workflow_event`、`ra_exported_summary_file` 通过 `workflow_type` 与 `workflow_batch_id` 支持多工作流。本功能统一使用 `workflow_type=regulatory_info_package`。
|
||||||
|
|
||||||
|
现状补充:当前通用节点表已有 `batch + node_code` 唯一约束主要服务文件汇总批次。RIP 批次不应强依赖 `FileSummaryBatch.batch`,因此实现时必须为 `workflow_type + workflow_batch_id + node_code` 增加数据库唯一约束,或在创建节点时使用同等幂等逻辑,避免同一 RIP 批次重复初始化节点。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、表结构设计
|
||||||
|
|
||||||
|
### 3.1 ra_regulatory_info_package_batch
|
||||||
|
|
||||||
|
一次第1章监管信息材料包生成工作流批次。记录触发来源、输入说明书、产品名称、生成状态、待确认摘要、zip 名称、配置版本和工作目录。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| conversation_id | ForeignKey | bigint | 是 | 所属对话 |
|
||||||
|
| user_id | ForeignKey | bigint | 是 | 发起用户 |
|
||||||
|
| trigger_message_id | ForeignKey | bigint | 否 | 触发本工作流的用户消息 |
|
||||||
|
| source_attachment_id | ForeignKey | bigint | 否 | 直接选中的说明书附件 |
|
||||||
|
| source_summary_batch_id | ForeignKey | bigint | 否 | 可选,最近成功文件汇总批次 |
|
||||||
|
| source_summary_item_id | PositiveBigIntegerField | integer | 否 | 可选,文件汇总条目 ID |
|
||||||
|
| batch_no | CharField(64) | varchar(64) | 是 | 批次编号,格式 `RIP-YYYYMMDDHHMMSS-abcdef`,唯一 |
|
||||||
|
| status | CharField(30) | varchar(30) | 是 | pending、running、waiting_user、success、partial_success、failed、cancelled |
|
||||||
|
| source_file_name | CharField(255) | varchar(255) | 否 | 说明书原文件名 |
|
||||||
|
| source_storage_path | CharField(500) | varchar(500) | 否 | 说明书存储路径 |
|
||||||
|
| product_name | CharField(200) | varchar(200) | 否 | 抽取到的产品名称 |
|
||||||
|
| output_zip_name | CharField(255) | varchar(255) | 否 | 主输出 zip 文件名,默认 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| generated_files | JSONField | text/json | 是 | 7 个文件生成状态摘要 |
|
||||||
|
| missing_fields | JSONField | text/json | 是 | 缺失并填 `/` 的字段 |
|
||||||
|
| llm_only_fields | JSONField | text/json | 是 | 仅 LLM 命中的字段 |
|
||||||
|
| conflict_fields | JSONField | text/json | 是 | 规则和 LLM 冲突字段 |
|
||||||
|
| risk_notes | JSONField | text/json | 是 | `.doc` 适配器、知识库不可用、zip 失败等提示 |
|
||||||
|
| template_config_version | CharField(80) | varchar(80) | 否 | 模板配置版本 |
|
||||||
|
| template_config_hash | CharField(128) | varchar(128) | 否 | 模板配置 hash |
|
||||||
|
| adapter_summary | JSONField | text/json | 是 | docx/doc 适配器使用情况 |
|
||||||
|
| work_dir | CharField(500) | varchar(500) | 否 | 批次工作目录 |
|
||||||
|
| error_message | TextField | text | 否 | 批次异常说明 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| started_at | DateTimeField | datetime | 否 | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | datetime | 否 | 完成时间 |
|
||||||
|
| archived_at | DateTimeField | datetime | 否 | 归档时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
唯一约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| uq_ra_rip_batch_no | batch_no |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_batch_conv_status | conversation_id, status | 查询对话下材料包批次状态 |
|
||||||
|
| idx_ra_rip_batch_user_created | user_id, created_at | 查询用户发起历史 |
|
||||||
|
| idx_ra_rip_batch_attachment | source_attachment_id | 查询某说明书附件生成历史 |
|
||||||
|
| idx_ra_rip_batch_summary | source_summary_batch_id | 查询文件汇总关联的材料包批次 |
|
||||||
|
| idx_ra_rip_batch_created | created_at | 后台按时间排查 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.2 ra_regulatory_info_package_artifact
|
||||||
|
|
||||||
|
第1章监管信息材料包生成过程产物表。仅保存文件元数据,不保存大文本正文。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属材料包批次 |
|
||||||
|
| artifact_type | CharField(60) | varchar(60) | 是 | template_copy、instruction_extract、field_extract_result、merged_fields、generated_document、traceability、zip_package、notification_record |
|
||||||
|
| file_format | CharField(20) | varchar(20) | 是 | json、excel、docx、doc、zip、markdown |
|
||||||
|
| name | CharField(160) | varchar(160) | 是 | 产物名称 |
|
||||||
|
| file_name | CharField(255) | varchar(255) | 是 | 文件名 |
|
||||||
|
| storage_path | CharField(500) | varchar(500) | 是 | 文件存储路径 |
|
||||||
|
| file_size | BigIntegerField | bigint | 是 | 文件大小 |
|
||||||
|
| content_hash | CharField(128) | varchar(128) | 否 | 文件 SHA-256 hash |
|
||||||
|
| metadata | JSONField | text/json | 是 | 模板编码、生成状态、高亮数量、适配器、错误摘要等 |
|
||||||
|
| created_by_node | CharField(60) | varchar(60) | 否 | 生成该产物的工作流节点 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_artifact_batch_type | batch_id, artifact_type | 查询批次过程产物 |
|
||||||
|
| idx_ra_rip_artifact_format | file_format | 按文件格式查询 |
|
||||||
|
| idx_ra_rip_artifact_created | created_at | 按时间追溯 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.3 ra_regulatory_info_package_notification_record
|
||||||
|
|
||||||
|
第1章监管信息材料包生成通知记录表。通知失败不阻断下载,但需要留痕和支持后续重试。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属材料包批次 |
|
||||||
|
| recipient_id | ForeignKey(User) | bigint | 是 | 通知对象,默认发起人 |
|
||||||
|
| channel | CharField(30) | varchar(30) | 是 | feishu_cli、feishu_api、mock |
|
||||||
|
| export_ids | JSONField | text/json | 是 | 本次通知关联导出文件 ID |
|
||||||
|
| message_summary | TextField | text | 是 | 通知摘要 |
|
||||||
|
| send_status | CharField(20) | varchar(20) | 是 | pending、success、failed |
|
||||||
|
| retry_count | PositiveIntegerField | integer | 是 | 已重试次数 |
|
||||||
|
| external_message_id | CharField(120) | varchar(120) | 否 | 飞书外部消息 ID |
|
||||||
|
| error_message | TextField | text | 否 | 失败原因 |
|
||||||
|
| sent_at | DateTimeField | datetime | 否 | 发送成功时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_notify_batch | batch_id, created_at | 查询批次通知 |
|
||||||
|
| idx_ra_rip_notify_recipient | recipient_id, send_status | 查询用户通知状态 |
|
||||||
|
| idx_ra_rip_notify_status | send_status, retry_count | 查询待重试通知 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、既有表扩展
|
||||||
|
|
||||||
|
### 4.1 ra_exported_summary_file
|
||||||
|
|
||||||
|
继续复用导出文件表,新增 zip 导出类型,并支持 `regulatory_info_package` 权限反查。
|
||||||
|
|
||||||
|
| 字段/枚举 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type | 增加 `zip` |
|
||||||
|
| workflow_type | 使用 `regulatory_info_package` |
|
||||||
|
| workflow_batch_id | 记录 `RegulatoryInfoPackageBatch.id` |
|
||||||
|
| export_category | 使用 `regulatory_info_package`、`generated_document`、`traceability` |
|
||||||
|
|
||||||
|
导出类型枚举:
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| markdown | Markdown | 既有报告 |
|
||||||
|
| excel | Excel | 追溯清单 |
|
||||||
|
| json | JSON | 抽取结果、合并字段 |
|
||||||
|
| word | Word | 生成的 Word 文件,包含 `.docx` 和可下载 `.doc` |
|
||||||
|
| pdf | PDF | 既有预留 |
|
||||||
|
| zip | ZIP | 第1章监管信息材料包主下载 |
|
||||||
|
|
||||||
|
下载 MIME 规则:
|
||||||
|
|
||||||
|
| 条件 | content_type |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type=zip | application/zip |
|
||||||
|
| export_type=word 且文件名后缀 `.doc` | application/msword |
|
||||||
|
| export_type=word 且文件名后缀 `.docx` | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
|
||||||
|
|
||||||
|
### 4.2 ra_workflow_node_run
|
||||||
|
|
||||||
|
本功能使用通用工作流节点表:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| workflow_batch_id | RegulatoryInfoPackageBatch.id |
|
||||||
|
| node_group | regulatory_info_package |
|
||||||
|
| batch_id | 可为空;如为兼容旧查询,不建议绑定文件汇总批次 |
|
||||||
|
|
||||||
|
幂等约束建议:
|
||||||
|
|
||||||
|
| 约束/策略 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| uq_ra_node_workflow_batch_code | workflow_type, workflow_batch_id, node_code | 推荐新增数据库唯一约束,防止同一 RIP 批次重复节点 |
|
||||||
|
| get_or_create 幂等 | workflow_type, workflow_batch_id, node_code | 若暂不改通用表约束,节点初始化必须使用该组合做代码层幂等 |
|
||||||
|
|
||||||
|
建议新增节点:
|
||||||
|
|
||||||
|
```text
|
||||||
|
prepare, template_copy, text_extract, field_extract, field_merge,
|
||||||
|
generate_docs, highlight_review_items, trace_export, zip_export, notify, completed
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 ra_workflow_event
|
||||||
|
|
||||||
|
本功能事件写入:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| workflow_batch_id | RegulatoryInfoPackageBatch.id |
|
||||||
|
| conversation_id | 当前对话 ID |
|
||||||
|
| payload | 节点状态、文件生成状态、导出 ID、待确认摘要等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、枚举设计
|
||||||
|
|
||||||
|
### 5.1 RegulatoryInfoPackageBatch.status
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| pending | 待执行 | 批次已创建,等待执行 |
|
||||||
|
| running | 执行中 | 工作流正在执行 |
|
||||||
|
| waiting_user | 等待用户 | 未找到唯一说明书,需要用户选择 |
|
||||||
|
| success | 成功 | 7 个文件、zip 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | 部分成功 | zip 或主要文件已生成,但部分单文件、`.doc` 原生处理、`.docx` 兜底、追溯或通知存在失败 |
|
||||||
|
| failed | 失败 | 关键输入、模板或全部目标文件生成失败 |
|
||||||
|
| cancelled | 已取消 | 用户或系统取消执行 |
|
||||||
|
|
||||||
|
### 5.2 RegulatoryInfoPackageArtifact.artifact_type
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| instruction_extract | 说明书文本、章节、表格抽取结果 |
|
||||||
|
| field_extract_result | 规则与 LLM 抽取原始结果 |
|
||||||
|
| merged_fields | 合并字段、高亮决策、标准候选 |
|
||||||
|
| generated_document | 生成后的单个目标文件 |
|
||||||
|
| traceability | 追溯清单 |
|
||||||
|
| zip_package | 主下载 zip 包 |
|
||||||
|
| notification_record | 通知记录产物 |
|
||||||
|
|
||||||
|
### 5.3 RegulatoryInfoPackageArtifact.file_format
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| json | JSON 产物 |
|
||||||
|
| excel | Excel 追溯清单 |
|
||||||
|
| docx | Word OpenXML 文件 |
|
||||||
|
| doc | Word 97-2003 文件 |
|
||||||
|
| zip | 压缩包 |
|
||||||
|
| markdown | Markdown 摘要或报告 |
|
||||||
|
|
||||||
|
### 5.4 通知枚举
|
||||||
|
|
||||||
|
| 字段 | value |
|
||||||
|
| --- | --- |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、JSON 字段结构
|
||||||
|
|
||||||
|
### 6.1 generated_files
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"template_code": "ch1_4_application_form",
|
||||||
|
"file_name": "CH1.4 申请表.docx",
|
||||||
|
"status": "success",
|
||||||
|
"artifact_id": 12,
|
||||||
|
"export_id": 34,
|
||||||
|
"highlight_count": 8,
|
||||||
|
"missing_count": 5,
|
||||||
|
"llm_only_count": 2,
|
||||||
|
"error_message": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 missing_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"target_file": "CH1.4 申请表.docx",
|
||||||
|
"field_key": "applicant_name",
|
||||||
|
"field_label": "申请人名称",
|
||||||
|
"final_value": "/",
|
||||||
|
"highlight_reason": "missing",
|
||||||
|
"needs_review": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 llm_only_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"target_file": "CH1.4 申请表.docx",
|
||||||
|
"field_key": "detection_targets",
|
||||||
|
"field_label": "检测靶标",
|
||||||
|
"final_value": "ORF1ab、N基因",
|
||||||
|
"evidence": "预期用途和检测原理章节",
|
||||||
|
"highlight_reason": "llm_only",
|
||||||
|
"needs_review": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 conflict_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"field_key": "package_specification",
|
||||||
|
"field_label": "包装规格",
|
||||||
|
"rule_value": "规格A:24人份/盒、48人份/盒、96人份/盒",
|
||||||
|
"llm_value": "规格A、规格B均为24/48/96人份",
|
||||||
|
"selected_value": "规格A:24人份/盒、48人份/盒、96人份/盒",
|
||||||
|
"handling": "规则优先,写入值高亮并进入追溯清单"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.5 risk_notes
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "legacy_doc_adapter_unavailable",
|
||||||
|
"message": "CH1.9 为 .doc 文件,当前环境未检测到可写入适配器。",
|
||||||
|
"template_code": "ch1_9_pre_submission"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "knowledge_base_unavailable",
|
||||||
|
"message": "标准清单知识库查询不可用,未自动写入候选标准。"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.6 adapter_summary
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"docx": {
|
||||||
|
"adapter": "DocxDocumentAdapter",
|
||||||
|
"status": "available"
|
||||||
|
},
|
||||||
|
"doc": {
|
||||||
|
"adapter": "WordComDocAdapter",
|
||||||
|
"status": "available",
|
||||||
|
"fallback_used": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.7 artifact.metadata
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_code": "ch1_5_product_list",
|
||||||
|
"strategy": "product_list",
|
||||||
|
"source_template": "CH1.5 产品列表.docx",
|
||||||
|
"generated_status": "success",
|
||||||
|
"highlight_count": 12,
|
||||||
|
"missing_count": 6,
|
||||||
|
"llm_only_count": 1,
|
||||||
|
"adapter": "DocxDocumentAdapter",
|
||||||
|
"created_by_node": "generate_docs"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、存储路径设计
|
||||||
|
|
||||||
|
批次目录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
```
|
||||||
|
|
||||||
|
目录结构:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/12/1001/RIP-20260610153000-abcdef/
|
||||||
|
templates/
|
||||||
|
ch1_2_directory.source.docx
|
||||||
|
ch1_9_pre_submission.source.doc
|
||||||
|
extracted/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
generated/
|
||||||
|
CH1.2 监管信息目录.docx
|
||||||
|
CH1.4 申请表.docx
|
||||||
|
CH1.5 产品列表.docx
|
||||||
|
CH1.9 产品申报前沟通的说明.doc
|
||||||
|
CH1.11.1 符合标准的清单.docx
|
||||||
|
CH1.11.5 真实性声明.docx
|
||||||
|
CH1.11.6 符合性声明.docx
|
||||||
|
exports/
|
||||||
|
traceability.xlsx
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
logs/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
traceability.json
|
||||||
|
doc_adapter_result.json
|
||||||
|
```
|
||||||
|
|
||||||
|
路径安全要求:
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 输出目录校验 | 所有输出路径必须位于当前批次 `work_dir` 下 |
|
||||||
|
| 原始模板只读 | 不允许覆盖 `docs/0.原始材料` |
|
||||||
|
| 导出路径 | `ExportedSummaryFile.storage_path` 保存实际文件路径,下载时校验权限 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、权限关系
|
||||||
|
|
||||||
|
### 8.1 批次权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
RegulatoryInfoPackageBatch.conversation.user_id == request.user.id
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 输入附件权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
FileAttachment.conversation_id == batch.conversation_id
|
||||||
|
FileAttachment.user_id == batch.user_id
|
||||||
|
FileAttachment.upload_status != deleted
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 导出下载权限
|
||||||
|
|
||||||
|
`ExportedSummaryFile` 下载时按 `workflow_type` 分支:
|
||||||
|
|
||||||
|
```text
|
||||||
|
workflow_type == "regulatory_info_package"
|
||||||
|
-> workflow_batch_id 反查 RegulatoryInfoPackageBatch
|
||||||
|
-> conversation__user == request.user
|
||||||
|
-> is_deleted == false
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、迁移设计
|
||||||
|
|
||||||
|
建议新增一个迁移文件,包含:
|
||||||
|
|
||||||
|
| 变更 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 新增 `RegulatoryInfoPackageBatch` | 批次表 |
|
||||||
|
| 新增 `RegulatoryInfoPackageArtifact` | 产物表 |
|
||||||
|
| 新增 `RegulatoryInfoPackageNotificationRecord` | 通知记录表 |
|
||||||
|
| 扩展 `ExportedSummaryFile.ExportType` | 增加 `zip` 枚举 |
|
||||||
|
|
||||||
|
Django 模型建议仍集中放在 `review_agent/models.py`,业务逻辑放入 `review_agent/regulatory_info_package/`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、DDL 参考
|
||||||
|
|
||||||
|
以下 DDL 为 SQLite / Django ORM 参考,实际以 migration 生成为准。
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE ra_regulatory_info_package_batch (
|
||||||
|
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
|
conversation_id bigint NOT NULL REFERENCES review_agent_conversation(id),
|
||||||
|
user_id bigint NOT NULL REFERENCES auth_user(id),
|
||||||
|
trigger_message_id bigint NULL REFERENCES review_agent_message(id),
|
||||||
|
source_attachment_id bigint NULL REFERENCES ra_file_attachment(id),
|
||||||
|
source_summary_batch_id bigint NULL REFERENCES ra_file_summary_batch(id),
|
||||||
|
source_summary_item_id integer NULL,
|
||||||
|
batch_no varchar(64) NOT NULL UNIQUE,
|
||||||
|
status varchar(30) NOT NULL,
|
||||||
|
source_file_name varchar(255) NOT NULL DEFAULT '',
|
||||||
|
source_storage_path varchar(500) NOT NULL DEFAULT '',
|
||||||
|
product_name varchar(200) NOT NULL DEFAULT '',
|
||||||
|
output_zip_name varchar(255) NOT NULL DEFAULT '',
|
||||||
|
generated_files text NOT NULL DEFAULT '[]',
|
||||||
|
missing_fields text NOT NULL DEFAULT '[]',
|
||||||
|
llm_only_fields text NOT NULL DEFAULT '[]',
|
||||||
|
conflict_fields text NOT NULL DEFAULT '[]',
|
||||||
|
risk_notes text NOT NULL DEFAULT '[]',
|
||||||
|
template_config_version varchar(80) NOT NULL DEFAULT '',
|
||||||
|
template_config_hash varchar(128) NOT NULL DEFAULT '',
|
||||||
|
adapter_summary text NOT NULL DEFAULT '{}',
|
||||||
|
work_dir varchar(500) NOT NULL DEFAULT '',
|
||||||
|
error_message text NOT NULL DEFAULT '',
|
||||||
|
created_at datetime NOT NULL,
|
||||||
|
started_at datetime NULL,
|
||||||
|
finished_at datetime NULL,
|
||||||
|
archived_at datetime NULL,
|
||||||
|
is_deleted bool NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_ra_rip_batch_conv_status
|
||||||
|
ON ra_regulatory_info_package_batch(conversation_id, status);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_user_created
|
||||||
|
ON ra_regulatory_info_package_batch(user_id, created_at);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_attachment
|
||||||
|
ON ra_regulatory_info_package_batch(source_attachment_id);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_summary
|
||||||
|
ON ra_regulatory_info_package_batch(source_summary_batch_id);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_created
|
||||||
|
ON ra_regulatory_info_package_batch(created_at);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、实现注意事项
|
||||||
|
|
||||||
|
| 注意事项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| JSONField 默认值 | 使用 `default=list` 或 `default=dict`,禁止使用可变对象字面量 |
|
||||||
|
| 外键删除策略 | conversation/user 使用 CASCADE;输入附件和文件汇总批次建议 PROTECT 或 SET_NULL,避免历史批次断链 |
|
||||||
|
| `source_summary_item_id` | 当前没有强制外键到 `FileSummaryItem`,可先保存 ID,后续需要强约束时再改 FK |
|
||||||
|
| 工作流节点幂等 | RIP 节点不得只依赖 `WorkflowNodeRun.batch + node_code` 唯一约束;必须使用 `workflow_type + workflow_batch_id + node_code` 保证幂等 |
|
||||||
|
| `.doc` 失败记录 | `.doc` 原生适配器不可用或执行失败时必须写入 `risk_notes` 和 artifact metadata;若 `.docx` 兜底成功则 generated_files 状态为 `fallback_success` |
|
||||||
|
| zip 主入口 | zip 导出记录的 `export_category` 固定为 `regulatory_info_package` |
|
||||||
|
| 单文件下载 | 7 个生成文件也写入 `ExportedSummaryFile`,作为辅助下载 |
|
||||||
|
| 软删除 | 批次和产物使用 `is_deleted`,下载权限需过滤软删除批次 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 模型创建 | 三张 RIP 专项表可通过 migration 创建 |
|
||||||
|
| 2 | 批次编号 | `batch_no` 唯一,符合 `RIP-...` 格式 |
|
||||||
|
| 3 | 附件关联 | 批次可绑定直接说明书附件 |
|
||||||
|
| 4 | 汇总兼容 | 批次可选绑定 `FileSummaryBatch` 与 `source_summary_item_id` |
|
||||||
|
| 5 | 产物留痕 | 模板副本、抽取结果、生成文件、zip、追溯清单均可写 artifact |
|
||||||
|
| 6 | zip 导出 | `ExportedSummaryFile` 支持 `export_type=zip` |
|
||||||
|
| 7 | 下载权限 | 非批次所属用户不能下载 RIP 导出 |
|
||||||
|
| 8 | 节点事件 | `WorkflowNodeRun` 和 `WorkflowEvent` 可通过 `workflow_type=regulatory_info_package` 查询 |
|
||||||
|
| 9 | 节点幂等 | 同一 `workflow_type + workflow_batch_id + node_code` 不会重复创建节点 |
|
||||||
|
| 10 | 通知记录 | 通知成功、失败和重试次数可落库 |
|
||||||
|
| 11 | JSON 摘要 | 缺失项、LLM-only、冲突项、风险提示结构符合本文约定 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中规则 | 本设计裁决 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 数据库设计流程 | 项目规范优先,未命中时回退基线规范 | 当前项目为 Django/SQLite,沿用既有数据库设计文档风格 |
|
||||||
|
| 既有自动填表数据库设计 | 独立批次、产物、通知三表;大 JSON 文件化;通用导出表复用 | 本功能按同样模式新增 RIP 三表 |
|
||||||
|
| 自动汇总数据库设计 | 对话隔离、多版本附件、工作流事件留痕 | 输入附件和批次权限沿用该关系 |
|
||||||
|
| 飞书通知数据库设计 | 通知摘要入库、失败不阻断主流程 | RIP 通知表结构与自动填表通知对齐 |
|
||||||
|
|
||||||
|
冲突裁决:技能规范中的低代码/Java 表达不适用于当前 Django 项目,数据库设计以当前项目 ORM、SQLite 兼容和既有 `ra_` 表风格为准。
|
||||||
963
docs/4.详细设计/5.第1章监管信息材料包生成.md
Normal file
963
docs/4.详细设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,963 @@
|
|||||||
|
# 第1章监管信息材料包生成详细设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库设计文档 | docs/3.数据库设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考详细设计 | docs/4.详细设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、详细设计目标
|
||||||
|
|
||||||
|
本详细设计用于指导 `regulatory_info_package` 独立工作流开发落地。系统根据用户上传或指定的产品说明书,抽取产品关键信息,基于 `docs/0.原始材料/第1章 监管信息` 下的样例模板生成第1章监管信息材料包,并以 `第1章 监管信息(预生成版).zip` 作为对话摘要首位下载入口。
|
||||||
|
|
||||||
|
核心约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 使用 `workflow_type=regulatory_info_package`,拥有独立批次、产物、通知和卡片 |
|
||||||
|
| 独立模块 | 新增 `review_agent/regulatory_info_package/`,与 `application_form_fill` 平级 |
|
||||||
|
| 模型集中 | Django 模型仍集中放在 `review_agent/models.py` |
|
||||||
|
| 节点幂等 | `WorkflowNodeRun` 必须按 `workflow_type + workflow_batch_id + node_code` 幂等创建或加唯一约束 |
|
||||||
|
| 输入优先级 | 用户消息指定文件名优先;其次 active 附件;再兼容最近成功文件汇总 |
|
||||||
|
| 模板固定 | 固定处理第1章监管信息 7 个模板 |
|
||||||
|
| 模板字段化 | 生成逻辑优先写 Word 内容控件 Tag 或稳定占位符,不以手工调整表格格式为前提 |
|
||||||
|
| 规则优先可演示 | 规则抽取可独立跑通;LLM 失败最多重试 3 次,失败后继续 |
|
||||||
|
| 文档并发生成 | 工作流整体串行,`generate_docs` 节点内部每个文档可独立线程并发处理 |
|
||||||
|
| `.doc` 兜底 | 能力驱动:有 Word COM/UNO 时优先原生 `.doc`;无原生能力或原生失败时允许生成 `.docx` 兜底文件 |
|
||||||
|
| zip 只含成功文件 | zip 只打包成功或兜底成功的文件;失败文件不进入 zip |
|
||||||
|
| 高亮规则 | 缺失和 LLM-only 黄底;冲突黄底红字 |
|
||||||
|
| 追溯输出 | 用户下载 Excel;JSON 仅保存到后台 logs 目录 |
|
||||||
|
| 前端最小接入 | 不做多说明书选择 UI;不确定时通过对话反问 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、代码结构设计
|
||||||
|
|
||||||
|
### 2.1 目录结构
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
models.py
|
||||||
|
services.py
|
||||||
|
skill_router.py
|
||||||
|
regulatory_info_package/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
events.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
input_select.py
|
||||||
|
template_config.py
|
||||||
|
template_repository.py
|
||||||
|
instruction_extract.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
standard_candidates.py
|
||||||
|
document_writer.py
|
||||||
|
docx_document.py
|
||||||
|
legacy_doc_document.py
|
||||||
|
package_generate.py
|
||||||
|
traceability_export.py
|
||||||
|
zip_export.py
|
||||||
|
summary.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
regulatory_info_package_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 文件职责
|
||||||
|
|
||||||
|
| 文件 | 职责 |
|
||||||
|
| --- | --- |
|
||||||
|
| constants.py | 工作流编码、节点定义、触发关键词、模板编码、状态常量 |
|
||||||
|
| schemas.py | dataclass 数据结构,如 `TemplateSpec`、`InstructionExtractResult`、`MergedField`、`GeneratedFileResult` |
|
||||||
|
| storage.py | 批次目录、子目录、hash、产物创建、路径安全校验 |
|
||||||
|
| events.py | 记录与序列化 `WorkflowEvent` |
|
||||||
|
| workflow.py | `RegulatoryInfoPackageWorkflowExecutor`、批次创建、工作流启动 |
|
||||||
|
| views.py | health、start、status、select-input 接口 |
|
||||||
|
| input_select.py | 根据用户消息、active 附件、文件汇总选择说明书 |
|
||||||
|
| template_config.py | YAML 加载、校验、hash |
|
||||||
|
| template_repository.py | 定位样例模板、复制到批次目录、审计字段 Tag/占位符 |
|
||||||
|
| instruction_extract.py | 说明书段落、章节、表格和组成成分表解析 |
|
||||||
|
| field_extract.py | 规则抽取与 LLM 抽取并行执行,LLM 最多 3 次重试 |
|
||||||
|
| field_merge.py | 合并字段,输出缺失、LLM-only、冲突和高亮决策 |
|
||||||
|
| standard_candidates.py | 从说明书抽标准号,调用现有知识库搜索候选 |
|
||||||
|
| document_writer.py | 文档适配器接口与通用高亮策略 |
|
||||||
|
| docx_document.py | `DocxDocumentAdapter`,处理 `.docx` |
|
||||||
|
| legacy_doc_document.py | `LegacyDocDocumentAdapter`,处理 `.doc` 原生写入与 `.docx` 兜底 |
|
||||||
|
| package_generate.py | 7 个文档生成策略,多线程生成文件 |
|
||||||
|
| traceability_export.py | 生成 `exports/traceability.xlsx` 和 `logs/traceability.json` |
|
||||||
|
| zip_export.py | 生成主下载 zip,只包含成功文件 |
|
||||||
|
| summary.py | 构造助手回显,zip 链接排首位 |
|
||||||
|
| notifier.py | 写专项通知记录,并调用统一通知服务 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、数据模型详细设计
|
||||||
|
|
||||||
|
模型放在 `review_agent/models.py`。
|
||||||
|
|
||||||
|
### 3.1 RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| conversation | 所属对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_attachment | 直接选中的说明书附件,可空 |
|
||||||
|
| source_summary_batch | 兼容文件汇总批次,可空 |
|
||||||
|
| source_summary_item_id | 文件汇总条目 ID,可空 |
|
||||||
|
| batch_no | `RIP-YYYYMMDDHHMMSS-abcdef` |
|
||||||
|
| source_file_name | 说明书原文件名 |
|
||||||
|
| source_storage_path | 说明书存储路径 |
|
||||||
|
| product_name | 抽取产品名称 |
|
||||||
|
| output_zip_name | `第1章 监管信息(预生成版).zip` |
|
||||||
|
| generated_files | 7 个文件状态 |
|
||||||
|
| missing_fields | 缺失字段 |
|
||||||
|
| llm_only_fields | LLM-only 字段 |
|
||||||
|
| conflict_fields | 冲突字段 |
|
||||||
|
| risk_notes | 风险和降级提示 |
|
||||||
|
| adapter_summary | doc/docx 适配器实际执行摘要 |
|
||||||
|
| template_config_version/hash | 模板配置版本和 hash |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
| is_deleted | 软删除 |
|
||||||
|
|
||||||
|
### 3.2 RegulatoryInfoPackageArtifact
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageArtifact(models.Model):
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
TEMPLATE_COPY = "template_copy", "模板副本"
|
||||||
|
INSTRUCTION_EXTRACT = "instruction_extract", "说明书抽取结果"
|
||||||
|
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
|
||||||
|
MERGED_FIELDS = "merged_fields", "合并字段"
|
||||||
|
GENERATED_DOCUMENT = "generated_document", "生成文件"
|
||||||
|
TRACEABILITY = "traceability", "追溯清单"
|
||||||
|
ZIP_PACKAGE = "zip_package", "ZIP包"
|
||||||
|
NOTIFICATION_RECORD = "notification_record", "通知记录"
|
||||||
|
```
|
||||||
|
|
||||||
|
`file_format` 包含:`json`、`excel`、`docx`、`doc`、`zip`、`markdown`。
|
||||||
|
|
||||||
|
### 3.3 RegulatoryInfoPackageNotificationRecord
|
||||||
|
|
||||||
|
字段对齐自动填表通知记录:`batch`、`recipient`、`channel`、`export_ids`、`message_summary`、`send_status`、`retry_count`、`external_message_id`、`error_message`、`sent_at`、`is_deleted`。
|
||||||
|
|
||||||
|
### 3.4 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 MIME 按扩展名兜底:
|
||||||
|
|
||||||
|
| 条件 | MIME |
|
||||||
|
| --- | --- |
|
||||||
|
| zip | application/zip |
|
||||||
|
| .doc | application/msword |
|
||||||
|
| .docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、常量设计
|
||||||
|
|
||||||
|
### 4.1 工作流常量
|
||||||
|
|
||||||
|
```python
|
||||||
|
WORKFLOW_TYPE = "regulatory_info_package"
|
||||||
|
DEFAULT_ZIP_NAME = "第1章 监管信息(预生成版).zip"
|
||||||
|
|
||||||
|
REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "regulatory_info_package"),
|
||||||
|
("template_copy", "复制模板", "regulatory_info_package"),
|
||||||
|
("text_extract", "抽取说明书", "regulatory_info_package"),
|
||||||
|
("field_extract", "抽取字段", "regulatory_info_package"),
|
||||||
|
("field_merge", "合并字段", "regulatory_info_package"),
|
||||||
|
("generate_docs", "生成材料", "regulatory_info_package"),
|
||||||
|
("highlight_review_items", "标记待确认", "regulatory_info_package"),
|
||||||
|
("trace_export", "追溯清单", "regulatory_info_package"),
|
||||||
|
("zip_export", "打包下载", "regulatory_info_package"),
|
||||||
|
("notify", "通知", "regulatory_info_package"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 触发关键词
|
||||||
|
|
||||||
|
```python
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS = [
|
||||||
|
"根据说明书生成第1章监管信息",
|
||||||
|
"生成监管信息材料包",
|
||||||
|
"从说明书生成第1章材料",
|
||||||
|
"第1章监管信息",
|
||||||
|
"监管信息材料包",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 文件状态
|
||||||
|
|
||||||
|
```python
|
||||||
|
GENERATED_FILE_SUCCESS = "success"
|
||||||
|
GENERATED_FILE_FALLBACK_SUCCESS = "fallback_success"
|
||||||
|
GENERATED_FILE_FAILED = "failed"
|
||||||
|
GENERATED_FILE_SKIPPED = "skipped"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、核心数据结构
|
||||||
|
|
||||||
|
### 5.1 TemplateSpec
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
output_name: str
|
||||||
|
source_file: str
|
||||||
|
file_format: str
|
||||||
|
strategy: str
|
||||||
|
include_in_zip: bool
|
||||||
|
prefer_legacy_doc_native: bool = False
|
||||||
|
allow_docx_fallback: bool = True
|
||||||
|
fields: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 InstructionExtractResult
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class InstructionExtractResult:
|
||||||
|
source_file_name: str
|
||||||
|
paragraphs: list[str]
|
||||||
|
sections: dict[str, str]
|
||||||
|
tables: list[list[list[str]]]
|
||||||
|
component_tables: list["ComponentTable"]
|
||||||
|
front_text: str
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 ProductListRow
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class ProductListRow:
|
||||||
|
package_specification: str
|
||||||
|
item_no: str
|
||||||
|
composition: str
|
||||||
|
component_name: str
|
||||||
|
main_component: str
|
||||||
|
quantity: str
|
||||||
|
source_table_title: str
|
||||||
|
needs_review_fields: list[str] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
其中 `item_no` 对应货号,本期固定 `/` 并黄底。
|
||||||
|
|
||||||
|
### 5.4 MergedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
highlight_reason: str = "none"
|
||||||
|
needs_review: bool = False
|
||||||
|
rule_value: str = ""
|
||||||
|
llm_value: str = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.5 GeneratedFileResult
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class GeneratedFileResult:
|
||||||
|
template_code: str
|
||||||
|
file_name: str
|
||||||
|
requested_format: str
|
||||||
|
actual_format: str
|
||||||
|
status: str
|
||||||
|
path: str = ""
|
||||||
|
artifact_id: int | None = None
|
||||||
|
export_id: int | None = None
|
||||||
|
highlight_count: int = 0
|
||||||
|
missing_count: int = 0
|
||||||
|
llm_only_count: int = 0
|
||||||
|
error_message: str = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、存储目录设计
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
templates/
|
||||||
|
logs/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
doc_adapter_result.json
|
||||||
|
traceability.json
|
||||||
|
generated/
|
||||||
|
CH1.2 监管信息目录.docx
|
||||||
|
CH1.4 申请表.docx
|
||||||
|
CH1.5 产品列表.docx
|
||||||
|
CH1.9 产品申报前沟通的说明.docx
|
||||||
|
CH1.11.1 符合标准的清单.docx
|
||||||
|
CH1.11.5 真实性声明.docx
|
||||||
|
CH1.11.6 符合性声明.docx
|
||||||
|
exports/
|
||||||
|
traceability.xlsx
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:
|
||||||
|
|
||||||
|
| 目录 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| templates | 模板副本 |
|
||||||
|
| logs | 后台 JSON 产物,不作为用户主下载 |
|
||||||
|
| generated | 生成成功或兜底成功的单文件 |
|
||||||
|
| exports | 用户可下载的追溯 Excel 和 zip |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、输入选择详细设计
|
||||||
|
|
||||||
|
### 7.1 选择优先级
|
||||||
|
|
||||||
|
`input_select.py` 的选择顺序:
|
||||||
|
|
||||||
|
1. 用户消息显式指定文件名时,按 active 附件名模糊匹配。
|
||||||
|
2. 当前对话 active 附件中文件名包含“说明书”的 `.docx`。
|
||||||
|
3. 当前对话 active 附件中唯一 `.docx`。
|
||||||
|
4. 最近成功 `FileSummaryBatch.items` 中包含“说明书”的 `.docx`。
|
||||||
|
5. 多候选或无候选时返回 `InputSelectionResult(status="waiting_user")`。
|
||||||
|
|
||||||
|
### 7.2 多候选处理
|
||||||
|
|
||||||
|
本期不新增在线选择弹窗。多候选时:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户消息可模糊匹配唯一附件 | 直接选择 |
|
||||||
|
| 多个候选且无法确定 | 对话反问用户确认哪个说明书 |
|
||||||
|
| 无说明书 | 提示上传产品说明书 |
|
||||||
|
|
||||||
|
反问示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
我找到多个说明书候选,请回复要使用的文件名:A.docx、B.docx。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、模板配置详细设计
|
||||||
|
|
||||||
|
配置路径:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
必须包含 7 个模板:
|
||||||
|
|
||||||
|
| code | source_file | strategy |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| ch1_2_directory | CH1.2 监管信息目录.docx | directory |
|
||||||
|
| ch1_4_application_form | CH1.4 申请表.docx | application_form |
|
||||||
|
| ch1_5_product_list | CH1.5 产品列表.docx | product_list |
|
||||||
|
| ch1_9_pre_submission | CH1.9 产品申报前沟通的说明.doc | pre_submission |
|
||||||
|
| ch1_11_1_standard_list | CH1.11.1 符合标准的清单.docx | standard_list |
|
||||||
|
| ch1_11_5_authenticity | CH1.11.5 真实性声明.docx | authenticity_statement |
|
||||||
|
| ch1_11_6_compliance | CH1.11.6 符合性声明.docx | compliance_statement |
|
||||||
|
|
||||||
|
校验规则:
|
||||||
|
|
||||||
|
| 校验 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| version 必填 | 写入批次 |
|
||||||
|
| source_dir 存在 | 指向样例目录 |
|
||||||
|
| code 唯一 | 防止覆盖产物 |
|
||||||
|
| source_file 存在 | 缺失则配置错误 |
|
||||||
|
| strategy 合法 | 必须命中生成策略 |
|
||||||
|
| doc 模板标记 | `.doc` 模板需声明 `prefer_legacy_doc_native`,并配置允许 `.docx` 兜底 |
|
||||||
|
|
||||||
|
### 8.1 模板字段化约定
|
||||||
|
|
||||||
|
为避免生成时破坏 Word 表格、复选框、字号、缩进和合并单元格,本工作流优先使用字段化模板:
|
||||||
|
|
||||||
|
| 方式 | 使用场景 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Word 内容控件 Tag | 正式模板优先 | 在 Word 中为产品名、申请人、复选框、日期、说明文字等填写区设置稳定 Tag,代码按 Tag 写入 |
|
||||||
|
| 稳定占位符 | 过渡方案 | 使用 `{{ product_name }}` 等不会影响版式的占位符,代码替换占位符所在 run |
|
||||||
|
| 行标签定位 | 兜底方案 | 仅用于未字段化的旧模板,必须保留原单元格、段落和 run 格式 |
|
||||||
|
|
||||||
|
模板配置中的字段目标优先级:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
targets:
|
||||||
|
- type: content_control_tag
|
||||||
|
tag: product_name
|
||||||
|
- type: placeholder
|
||||||
|
marker: "{{ product_name }}"
|
||||||
|
- type: table_row_label
|
||||||
|
label: 产品名称
|
||||||
|
```
|
||||||
|
|
||||||
|
模板加载时必须执行字段审计:关键字段缺少 Tag/占位符时给出清晰错误或降级说明;不得静默使用会破坏格式的整格重建策略。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、字段抽取详细设计
|
||||||
|
|
||||||
|
### 9.1 规则抽取
|
||||||
|
|
||||||
|
规则抽取必须独立可用,覆盖:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| product_name | `【产品名称】` 下一段 |
|
||||||
|
| package_specification | `【包装规格】` 至下一章节 |
|
||||||
|
| intended_use | `【预期用途】` 至下一章节 |
|
||||||
|
| detection_principle | `【检测原理】` 至下一章节 |
|
||||||
|
| main_components | `【主要组成成分】` 下方表格摘要 |
|
||||||
|
| storage_condition_and_validity | `【储存条件及有效期】` 至下一章节 |
|
||||||
|
| sample_type | 样本要求章节中的“适用样本类型” |
|
||||||
|
| detection_targets | 预期用途/检测原理中的基因、病原体、靶标 |
|
||||||
|
| applicable_instruments | `【适用仪器】` 至下一章节 |
|
||||||
|
| test_method | `【检验方法】` 摘要 |
|
||||||
|
| standards | 正则抽取标准号 |
|
||||||
|
|
||||||
|
### 9.2 LLM 抽取与重试
|
||||||
|
|
||||||
|
`field_extract.py` 并行执行规则抽取和 LLM 抽取:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ThreadPoolExecutor(max_workers=2)
|
||||||
|
-> rule_extract()
|
||||||
|
-> llm_extract_with_retry(max_attempts=3)
|
||||||
|
```
|
||||||
|
|
||||||
|
LLM 重试策略:
|
||||||
|
|
||||||
|
| 次数 | 间隔 |
|
||||||
|
| --- | --- |
|
||||||
|
| 第 1 次 | 立即 |
|
||||||
|
| 第 2 次 | 等待 1 秒 |
|
||||||
|
| 第 3 次 | 等待 2 秒 |
|
||||||
|
|
||||||
|
三次失败后:
|
||||||
|
|
||||||
|
| 产物 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| risk_notes | 增加 `llm_extract_failed` |
|
||||||
|
| logs/field_extract_result.json | 记录每次错误摘要 |
|
||||||
|
| 工作流 | 继续使用规则结果 |
|
||||||
|
|
||||||
|
LLM 不允许填企业信息、分类编码、管理类别、临床评价路径等说明书无法证明的内容。
|
||||||
|
|
||||||
|
### 9.3 字段合并
|
||||||
|
|
||||||
|
| 场景 | 写入值 | 高亮 | needs_review |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| rule 与 LLM 一致 | rule/LLM 值 | 否 | 否 |
|
||||||
|
| rule 与 LLM 冲突 | 规则优先或配置优先 | 黄底红字 | 是 |
|
||||||
|
| rule 缺失、LLM 命中 | LLM 值 | 黄底 | 是 |
|
||||||
|
| 全部缺失 | `/` | 黄底 | 是 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、文档适配器详细设计
|
||||||
|
|
||||||
|
### 10.1 统一接口
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DocumentAdapter(Protocol):
|
||||||
|
def replace_text(self, old: str, new: str, *, highlight: bool = False, conflict: bool = False) -> int: ...
|
||||||
|
def fill_table_cell(self, row_label: str, value: str, *, highlight: bool = False, conflict: bool = False) -> bool: ...
|
||||||
|
def replace_table(self, marker: str, rows: list[ProductListRow], *, highlight_columns: list[str] | None = None) -> bool: ...
|
||||||
|
def save(self, path: Path) -> Path: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
高亮规则:
|
||||||
|
|
||||||
|
| 类型 | 视觉 |
|
||||||
|
| --- | --- |
|
||||||
|
| missing | 黄色底色 |
|
||||||
|
| llm_only | 黄色底色 |
|
||||||
|
| conflict | 黄色底色 + 红色字体 |
|
||||||
|
|
||||||
|
### 10.2 DocxDocumentAdapter
|
||||||
|
|
||||||
|
实现能力:
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| replace_text | 支持段落与表格中的文本替换,需处理 run 拆分 |
|
||||||
|
| fill_content_control | 按内容控件 Tag 填写文本、日期或复选框 |
|
||||||
|
| replace_placeholder | 按稳定占位符替换文本,保留占位符所在 run/段落格式 |
|
||||||
|
| fill_table_cell | 按行标签定位目标单元格,仅作为未字段化模板的兜底 |
|
||||||
|
| replace_table | 重建 CH1.5 产品列表表格 |
|
||||||
|
| apply_highlight | 使用 `w:shd` 设置黄色底色 |
|
||||||
|
| apply_conflict_style | 黄色底色 + 红字 |
|
||||||
|
|
||||||
|
### 10.3 LegacyDocDocumentAdapter
|
||||||
|
|
||||||
|
接口:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AdapterCapability:
|
||||||
|
adapter_name: str
|
||||||
|
supports_native_doc_write: bool
|
||||||
|
supports_docx_fallback: bool
|
||||||
|
status: str
|
||||||
|
error_message: str = ""
|
||||||
|
|
||||||
|
class LegacyDocDocumentAdapter:
|
||||||
|
@staticmethod
|
||||||
|
def detect_available_adapter() -> AdapterCapability: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
执行顺序:
|
||||||
|
|
||||||
|
1. 执行能力探测:Word COM、LibreOffice UNO 或其他可写 `.doc` 能力。
|
||||||
|
2. 有原生能力时优先尝试原生打开 `.doc` 并保存 `.doc`。
|
||||||
|
3. 无原生能力或原生失败时,尝试生成同语义 `.docx` 兜底文件,再交给 `DocxDocumentAdapter`。
|
||||||
|
4. 兜底成功时,输出 `CH1.9 产品申报前沟通的说明.docx`,状态为 `fallback_success`。
|
||||||
|
5. 原生和兜底均失败时,该文件状态为 `failed`,不进入 zip。
|
||||||
|
|
||||||
|
兜底成功 `adapter_summary.doc`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requested_format": "doc",
|
||||||
|
"actual_format": "docx",
|
||||||
|
"adapter": "ConversionFallbackAdapter",
|
||||||
|
"status": "fallback_success"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、材料生成详细设计
|
||||||
|
|
||||||
|
### 11.1 generate_docs 节点并发
|
||||||
|
|
||||||
|
工作流节点仍串行执行,但 `generate_docs` 内部并发生成单文件:
|
||||||
|
|
||||||
|
```python
|
||||||
|
with ThreadPoolExecutor(max_workers=min(7, len(specs))) as executor:
|
||||||
|
futures = [executor.submit(generate_one_document, spec, context) for spec in specs]
|
||||||
|
```
|
||||||
|
|
||||||
|
并发注意事项:
|
||||||
|
|
||||||
|
| 注意事项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 每个文档使用独立模板副本 | 避免并发写同一文件 |
|
||||||
|
| 共享字段只读 | `merged_fields`、`product_list_rows` 不在子线程修改 |
|
||||||
|
| 数据库写入集中处理 | 子线程返回 `GeneratedFileResult`,主线程统一写 artifact/export |
|
||||||
|
| 异常隔离 | 单文件失败不影响其他文件 |
|
||||||
|
|
||||||
|
### 11.2 7 个生成策略
|
||||||
|
|
||||||
|
| 模板 | 输出规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| CH1.2 | 替换产品名;页码沿用样例 |
|
||||||
|
| CH1.4 | 填产品名、包装规格、预期用途、组成、储存有效期、方法原理;企业/分类等缺失项 `/` 黄底 |
|
||||||
|
| CH1.5 | 按样例表头重建,货号 `/` 黄底 |
|
||||||
|
| CH1.9 | 优先 `.doc` 原生写入;失败则 `.docx` 兜底;兜底失败则不输出 |
|
||||||
|
| CH1.11.1 | 说明书标准号直接写;知识库候选只作为待确认高亮/追溯 |
|
||||||
|
| CH1.11.5 | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
| CH1.11.6 | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
|
||||||
|
### 11.3 产品名缺失
|
||||||
|
|
||||||
|
规则和 LLM 都抽不到产品名称时:
|
||||||
|
|
||||||
|
| 项 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件内容 | 产品名位置写 `/` 并黄底 |
|
||||||
|
| 批次状态 | 至少 `partial_success` |
|
||||||
|
| zip | 仍生成,包含成功文件 |
|
||||||
|
| 摘要 | 明确提示产品名称待确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、追溯与 zip 设计
|
||||||
|
|
||||||
|
### 12.1 追溯 Excel
|
||||||
|
|
||||||
|
用户可下载:
|
||||||
|
|
||||||
|
```text
|
||||||
|
exports/traceability.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
创建导出记录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
export_category = traceability
|
||||||
|
export_type = excel
|
||||||
|
```
|
||||||
|
|
||||||
|
字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| target_file | 目标文件 |
|
||||||
|
| target_field | 目标字段 |
|
||||||
|
| final_value | 写入值 |
|
||||||
|
| extraction_source | rule、llm、missing、knowledge_candidate |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| highlight_reason | missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需复核 |
|
||||||
|
|
||||||
|
### 12.2 后台 JSON
|
||||||
|
|
||||||
|
JSON 产物仅写入 `logs/`,按需从后台查看:
|
||||||
|
|
||||||
|
```text
|
||||||
|
logs/instruction_extract.json
|
||||||
|
logs/field_extract_result.json
|
||||||
|
logs/merged_fields.json
|
||||||
|
logs/traceability.json
|
||||||
|
logs/doc_adapter_result.json
|
||||||
|
```
|
||||||
|
|
||||||
|
这些 JSON 产物写入 `RegulatoryInfoPackageArtifact`,但不作为用户主下载。
|
||||||
|
|
||||||
|
### 12.3 zip 打包
|
||||||
|
|
||||||
|
zip 文件名:
|
||||||
|
|
||||||
|
```text
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
```
|
||||||
|
|
||||||
|
规则:
|
||||||
|
|
||||||
|
| 场景 | 是否进入 zip |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件状态 `success` | 是 |
|
||||||
|
| 文件状态 `fallback_success` | 是 |
|
||||||
|
| 文件状态 `failed` | 否 |
|
||||||
|
| 文件状态 `skipped` | 否 |
|
||||||
|
|
||||||
|
若 `CH1.9 .doc` 兜底 `.docx` 成功,zip 中放入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
CH1.9 产品申报前沟通的说明.docx
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、工作流详细设计
|
||||||
|
|
||||||
|
### 13.1 批次创建
|
||||||
|
|
||||||
|
```python
|
||||||
|
def create_regulatory_info_package_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
source_attachment: FileAttachment | None = None,
|
||||||
|
source_summary_batch: FileSummaryBatch | None = None,
|
||||||
|
source_summary_item_id: int | None = None,
|
||||||
|
) -> RegulatoryInfoPackageBatch:
|
||||||
|
```
|
||||||
|
|
||||||
|
创建后初始化 `REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS`。
|
||||||
|
|
||||||
|
### 13.2 执行器
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageWorkflowExecutor:
|
||||||
|
def run(self) -> None: ...
|
||||||
|
def _nodes(self): ...
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None: ...
|
||||||
|
def _execute_node(self, node: WorkflowNodeRun) -> None: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
节点执行:
|
||||||
|
|
||||||
|
| 节点 | 关键动作 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 确认说明书,或 waiting_user |
|
||||||
|
| template_copy | 复制 7 个模板 |
|
||||||
|
| template_audit | 审计模板字段 Tag/占位符,记录缺失和降级策略 |
|
||||||
|
| text_extract | 抽取说明书章节和表格 |
|
||||||
|
| field_extract | 规则 + LLM 并行抽取 |
|
||||||
|
| field_merge | 合并字段、高亮决策 |
|
||||||
|
| generate_docs | 多线程生成单文件 |
|
||||||
|
| highlight_review_items | 若生成策略已完成高亮,该节点记录确认结果即可 |
|
||||||
|
| trace_export | 写 Excel 和 logs JSON |
|
||||||
|
| zip_export | 打包成功/兜底成功文件 |
|
||||||
|
| notify | 写专项通知并调用统一通知 |
|
||||||
|
| completed | 写助手摘要 |
|
||||||
|
|
||||||
|
### 13.3 状态落定
|
||||||
|
|
||||||
|
| 条件 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 成功且 7 个文件均 success/fallback_success | success |
|
||||||
|
| zip 成功但有 failed/skipped | partial_success |
|
||||||
|
| zip 失败但至少一个单文件成功 | partial_success |
|
||||||
|
| 全部文件失败或关键输入缺失 | failed |
|
||||||
|
| 多说明书候选等待确认 | waiting_user |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、路由与接口详细设计
|
||||||
|
|
||||||
|
### 14.1 skill_router.py
|
||||||
|
|
||||||
|
增加:
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| ROUTE_ACTIONS | 加入 `regulatory_info_package` |
|
||||||
|
| SkillRoute 属性 | `starts_regulatory_info_package` |
|
||||||
|
| deterministic route | 命中触发关键词直接返回 |
|
||||||
|
| LLM prompt | action 列表加入 `regulatory_info_package` |
|
||||||
|
|
||||||
|
### 14.2 services.py
|
||||||
|
|
||||||
|
`stream_message` 增加分支:
|
||||||
|
|
||||||
|
1. 调用 `select_instruction_input(conversation, content)`。
|
||||||
|
2. 若多候选,回复反问,不启动工作流。
|
||||||
|
3. 若无候选,回复请上传说明书。
|
||||||
|
4. 若唯一候选,创建批次并启动工作流。
|
||||||
|
5. SSE 发送 `workflow_started`。
|
||||||
|
|
||||||
|
### 14.3 views.py
|
||||||
|
|
||||||
|
接口:
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/regulatory-info-package/health/
|
||||||
|
POST /api/review-agent/regulatory-info-package/start/
|
||||||
|
GET /api/review-agent/regulatory-info-package/<batch_id>/status/
|
||||||
|
POST /api/review-agent/regulatory-info-package/<batch_id>/select-input/
|
||||||
|
```
|
||||||
|
|
||||||
|
`status` 返回:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 状态、产品名、缺失/LLM-only/冲突数量 |
|
||||||
|
| nodes | 节点状态 |
|
||||||
|
| generated_files | 7 个文件成功/失败/兜底状态 |
|
||||||
|
| exports | zip、单文件、Excel 下载 |
|
||||||
|
| risk_notes | 风险提示 |
|
||||||
|
| notifications | 通知 |
|
||||||
|
|
||||||
|
zip 不需要 `is_primary` 字段,前端或摘要按返回顺序把 zip 放首位。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、助手摘要设计
|
||||||
|
|
||||||
|
完成消息结构:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成第1章监管信息材料包。
|
||||||
|
|
||||||
|
批次号:RIP-...
|
||||||
|
产品名称:...
|
||||||
|
状态:success / partial_success
|
||||||
|
|
||||||
|
主下载:[第1章 监管信息(预生成版).zip](...)
|
||||||
|
|
||||||
|
| 文件 | 状态 | 下载/原因 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| CH1.2 监管信息目录.docx | 成功 | 下载 |
|
||||||
|
| CH1.9 产品申报前沟通的说明.docx | 兜底成功 | 下载 |
|
||||||
|
| CH1.11.1 符合标准的清单.docx | 失败 | 失败原因 |
|
||||||
|
|
||||||
|
待确认:缺失项 X 个,LLM复核项 Y 个,冲突项 Z 个。
|
||||||
|
```
|
||||||
|
|
||||||
|
要求:
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 首位 | zip 链接必须在单文件列表之前 |
|
||||||
|
| 失败可见 | 失败文件展示状态和原因,无下载链接 |
|
||||||
|
| 兜底提示 | `.doc -> .docx` 时显示“兜底成功” |
|
||||||
|
| 待确认摘要 | 展示 missing、llm_only、conflict 数量 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、前端详细设计
|
||||||
|
|
||||||
|
### 16.1 模板
|
||||||
|
|
||||||
|
`templates/home.html` 增加工具 chip:
|
||||||
|
|
||||||
|
```html
|
||||||
|
<button
|
||||||
|
class="tool-chip"
|
||||||
|
type="button"
|
||||||
|
data-prompt-template="根据说明书生成第1章监管信息"
|
||||||
|
>第1章监管信息</button>
|
||||||
|
```
|
||||||
|
|
||||||
|
`summaryPanel` 增加:
|
||||||
|
|
||||||
|
```html
|
||||||
|
data-regulatory-info-package-status-url-template="/api/review-agent/regulatory-info-package/__batch_id__/status/"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 16.2 app.js
|
||||||
|
|
||||||
|
增加:
|
||||||
|
|
||||||
|
| 位置 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow type 判断 | 支持 `regulatory_info_package` |
|
||||||
|
| 状态 URL 选择 | 使用 `data-regulatory-info-package-status-url-template` |
|
||||||
|
| 终态判断 | success、partial_success、failed、waiting_user |
|
||||||
|
| 导出展示 | 直接按 exports 返回顺序展示,zip 在后端排首位 |
|
||||||
|
|
||||||
|
### 16.3 不做选择 UI
|
||||||
|
|
||||||
|
多说明书候选时,本期不做弹窗。通过对话反问用户确认文件名。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、导出下载权限
|
||||||
|
|
||||||
|
`file_summary.views._export_for_user` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
allowed = RegulatoryInfoPackageBatch.objects.filter(
|
||||||
|
pk=exported.workflow_batch_id,
|
||||||
|
conversation__user=user,
|
||||||
|
is_deleted=False,
|
||||||
|
).exists()
|
||||||
|
return exported if allowed else None
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 content type 增加 zip 和 `.doc` 后缀判断。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、通知详细设计
|
||||||
|
|
||||||
|
`notifier.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def notify_completion(batch: RegulatoryInfoPackageBatch, exports: list[ExportedSummaryFile]) -> RegulatoryInfoPackageNotificationRecord:
|
||||||
|
```
|
||||||
|
|
||||||
|
处理:
|
||||||
|
|
||||||
|
| 步骤 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 创建专项通知记录 | 写 `RegulatoryInfoPackageNotificationRecord` |
|
||||||
|
| 调用统一通知 | `dispatch_workflow_notification(build_regulatory_info_package_context(batch))` |
|
||||||
|
| 捕获异常 | 通知失败写记录和 risk_notes,不影响批次下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、测试详细设计
|
||||||
|
|
||||||
|
| 测试文件 | 覆盖 |
|
||||||
|
| --- | --- |
|
||||||
|
| test_regulatory_info_package_models.py | 三张表、zip export type、基础关联 |
|
||||||
|
| test_regulatory_info_package_trigger.py | 固定关键词与 LLM action |
|
||||||
|
| test_regulatory_info_package_input_select.py | 文件名模糊匹配、active 附件、多候选反问 |
|
||||||
|
| test_regulatory_info_package_template_config.py | YAML 加载、模板缺失、code 唯一 |
|
||||||
|
| test_regulatory_info_package_instruction_extract.py | 说明书章节和组成表抽取 |
|
||||||
|
| test_regulatory_info_package_field_extract.py | 规则抽取、LLM 三次重试、失败降级 |
|
||||||
|
| test_regulatory_info_package_field_merge.py | missing、llm_only、conflict |
|
||||||
|
| test_regulatory_info_package_docx_writer.py | 替换、表格填充、黄底、红字 |
|
||||||
|
| test_regulatory_info_package_legacy_doc.py | adapter 探测、docx 兜底、失败状态 |
|
||||||
|
| test_regulatory_info_package_package_generate.py | 7 文件生成结果、多线程异常隔离 |
|
||||||
|
| test_regulatory_info_package_traceability.py | Excel 追溯和 logs JSON |
|
||||||
|
| test_regulatory_info_package_zip.py | zip 只包含 success/fallback_success |
|
||||||
|
| test_regulatory_info_package_workflow.py | 节点流转、partial_success、waiting_user |
|
||||||
|
| test_regulatory_info_package_views.py | start/status/download 权限 |
|
||||||
|
| test_regulatory_info_package_frontend.py | chip、卡片、状态 URL |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、异常处理矩阵
|
||||||
|
|
||||||
|
| 异常 | 批次状态 | 处理 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 无说明书 | waiting_user 或不创建批次 | 提示上传说明书 |
|
||||||
|
| 多候选无法匹配 | waiting_user 或不创建批次 | 反问确认文件名 |
|
||||||
|
| 模板缺失 | failed | 列出缺失模板 |
|
||||||
|
| 规则抽取失败 | partial_success/continue | 使用 LLM 结果 |
|
||||||
|
| LLM 三次失败 | continue | 使用规则结果,写 risk_notes |
|
||||||
|
| 产品名缺失 | partial_success | 写 `/` 黄底,继续生成 zip |
|
||||||
|
| 单个 docx 文件生成失败 | partial_success | 不进入 zip,摘要展示失败 |
|
||||||
|
| CH1.9 doc 原生失败但 docx 兜底成功 | success/partial_success | 状态 fallback_success,进入 zip |
|
||||||
|
| CH1.9 doc 和 docx 兜底均失败 | partial_success | 不进入 zip,摘要展示失败 |
|
||||||
|
| traceability.xlsx 失败 | partial_success | 不阻断 zip |
|
||||||
|
| zip 失败 | partial_success | 保留单文件下载 |
|
||||||
|
| 通知失败 | 不影响主状态 | 写通知失败和 risk_notes |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十一、设计结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 详细设计文档路径为 `docs/4.详细设计/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D2 | 模型集中在 `review_agent/models.py`,业务模块为 `review_agent/regulatory_info_package/` |
|
||||||
|
| D3 | `.doc` 采用能力驱动策略:探测 Word COM/UNO 等原生能力,有能力时优先原生处理 |
|
||||||
|
| D4 | `.doc` 无原生能力或原生失败时允许 `.docx` 兜底;兜底文件名为 `CH1.9 产品申报前沟通的说明.docx` |
|
||||||
|
| D5 | zip 只包含成功或兜底成功文件,失败文件不进入 zip |
|
||||||
|
| D6 | LLM 最多重试 3 次,失败后使用规则结果继续 |
|
||||||
|
| D7 | 缺失和 LLM-only 黄底,冲突黄底红字 |
|
||||||
|
| D8 | 产品列表使用 `ProductListRow`,货号固定 `/` 黄底 |
|
||||||
|
| D9 | 标准清单只复用现有知识库能力,不新增独立 RAG 流程 |
|
||||||
|
| D10 | 前端最小接入,不做说明书选择弹窗 |
|
||||||
|
| D11 | 追溯 Excel 可下载,JSON 只放后台 logs |
|
||||||
|
| D12 | 本期不新增字段级数据库表 |
|
||||||
|
| D13 | 工作流串行,文档生成节点内部可多线程 |
|
||||||
|
| D14 | 模板优先字段化,正式填充路径使用内容控件 Tag 或稳定占位符,行标签定位仅作为兜底 |
|
||||||
|
| D15 | 本轮只产出详细设计,不写代码、不生成迁移 |
|
||||||
622
docs/5.开发计划/5.第1章监管信息材料包生成.md
Normal file
622
docs/5.开发计划/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,622 @@
|
|||||||
|
# 第1章监管信息材料包生成开发计划
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库设计文档 | docs/3.数据库设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 详细设计文档 | docs/4.详细设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考开发计划 | docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 批次号规则 | RIP-YYYYMMDDHHMMSS-abcdef |
|
||||||
|
| 计划日期 | 2026-06-10 |
|
||||||
|
| 计划版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、开发计划目标
|
||||||
|
|
||||||
|
本开发计划面向 Codex 执行,目标是把 `regulatory_info_package` 独立工作流按可验证、可回滚、可阶段验收的方式落地。计划以现有自动填表工作流 `application_form_fill` 为主要参考,但保持独立模块、独立批次、独立产物、独立通知和独立前端卡片。
|
||||||
|
|
||||||
|
现状裁决:当前最新代码中尚未存在 `regulatory_info_package` 正式工作流,本计划按“新建正式材料包工作流”执行;不得把该功能并入或改造 `application_form_fill`。
|
||||||
|
|
||||||
|
开发完成后,用户可在对话中上传或指定产品说明书,并通过“根据说明书生成第1章监管信息”触发工作流。系统基于 `docs/0.原始材料/第1章 监管信息` 样例模板生成 7 个监管信息文件,以 `第1章 监管信息(预生成版).zip` 作为首位下载入口,同时提供单文件和追溯 Excel 辅助下载。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、已确认开发规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流独立 | 新增 `workflow_type=regulatory_info_package`,不并入 `application_form_fill` |
|
||||||
|
| 模块独立 | 新增 `review_agent/regulatory_info_package/`,服务与自动填表平级 |
|
||||||
|
| 模型集中 | Django 模型继续放在 `review_agent/models.py` |
|
||||||
|
| 节点幂等 | RIP 节点必须基于 `workflow_type + workflow_batch_id + node_code` 做幂等创建或数据库唯一约束 |
|
||||||
|
| 单说明书输入 | 用户消息指定文件名优先,其次 active 附件,再兼容最近成功文件汇总 |
|
||||||
|
| 多候选处理 | 不做选择弹窗,通过对话反问用户确认说明书文件名 |
|
||||||
|
| 模板固定 | 固定处理第1章监管信息 7 个模板 |
|
||||||
|
| 模板字段化 | 优先把模板整理为 Agent/代码可识别的字段模板,使用内容控件 Tag 或稳定占位符;代码只填字段,不依赖手工改格式 |
|
||||||
|
| 抽取策略 | 规则抽取和 LLM 抽取并行,LLM 最多重试 3 次,失败后规则结果继续 |
|
||||||
|
| 文档生成 | 工作流节点串行,`generate_docs` 节点内部每个文档独立线程处理 |
|
||||||
|
| `.doc` 策略 | CH1.9 能力驱动:探测到 Word COM/UNO 时优先原生 `.doc`,无原生能力时明确记录并允许 `.docx` 兜底 |
|
||||||
|
| zip 策略 | zip 只包含成功或兜底成功文件,失败文件不进入 zip |
|
||||||
|
| 高亮策略 | 缺失项 `/` 黄底;LLM-only 黄底;冲突黄底红字 |
|
||||||
|
| 追溯策略 | 用户下载 Excel;JSON 只写后台 logs 目录 |
|
||||||
|
| 前端策略 | 只做最小接入,不单独建设新页面或独立样式体系 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| Git 提交 | 每阶段验证通过后生成提交摘要;是否本地提交由用户确认 |
|
||||||
|
| 用户变更保护 | 不回滚、不覆盖用户已有未提交变更 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中内容 | 本计划裁决 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 后端开发规范 | 接口响应、日志、增量规范 | 状态接口、下载权限、异常降级和日志留痕按现有 Django 模式实现 |
|
||||||
|
| GYRX 前端开发规范 | 样式复用、组件接入、下载图标建议 | 复用现有对话页和工作流卡片样式,必要时只补少量语义化样式 |
|
||||||
|
| 既有自动填表开发计划 | 阶段拆分、测试先行、每阶段验证 | 本计划沿用阶段结构和 Codex 执行提示粒度 |
|
||||||
|
| 第1章监管信息详细设计 | 独立模块、7 模板、doc 兜底、zip 首位 | 作为本计划最高优先级依据 |
|
||||||
|
|
||||||
|
未发现规范冲突。项目专项设计优先于通用规范。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、总体验收标准
|
||||||
|
|
||||||
|
| 类别 | 完成标准 |
|
||||||
|
| --- | --- |
|
||||||
|
| 触发 | 固定提示词和 LLM 路由均可触发 `regulatory_info_package` |
|
||||||
|
| 输入选择 | 能按用户指定文件名、active 附件、最近文件汇总选择说明书;多候选可反问 |
|
||||||
|
| 批次 | 能创建 `RegulatoryInfoPackageBatch`,节点和事件可查询 |
|
||||||
|
| 模板 | 能加载并校验 7 个模板配置,模板复制只写批次目录 |
|
||||||
|
| 抽取 | 规则抽取可独立跑通,LLM 失败不阻断主链路 |
|
||||||
|
| 合并 | missing、llm_only、conflict 均有可追溯结构和高亮决策 |
|
||||||
|
| docx 生成 | 6 个 `.docx` 文件能按模板生成并保留基本版式 |
|
||||||
|
| doc 处理 | CH1.9 优先 `.doc` 原生处理,失败时 `.docx` 兜底,状态可见 |
|
||||||
|
| ZIP | `第1章 监管信息(预生成版).zip` 排在助手回显首位,只包含成功/兜底成功文件 |
|
||||||
|
| 单文件 | 成功文件有辅助下载,失败文件显示原因且无下载链接 |
|
||||||
|
| 追溯 | 用户可下载 `traceability.xlsx`,JSON 写入 `logs/` |
|
||||||
|
| 前端 | 对话快捷入口、工作流卡片、状态轮询和下载列表正常 |
|
||||||
|
| 权限 | 非批次所属用户不能下载 RIP 产物 |
|
||||||
|
| 回归 | `python manage.py check` 和相关 pytest 通过,既有文件汇总/自动填表/法规核查不回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 阶段验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| RIP-0 | 准备与基线回归 | 创建开发分支,确认依赖和既有测试状态 | 基线命令结果已记录 |
|
||||||
|
| RIP-1 | 数据模型与导出扩展 | 新增三张模型,扩展 zip 下载能力 | migration、模型和下载权限测试通过 |
|
||||||
|
| RIP-2 | 模块骨架与模板配置 | 新建模块、schema、YAML 配置和存储服务 | 配置加载和路径安全测试通过 |
|
||||||
|
| RIP-3 | 触发与工作流骨架 | 接入路由、批次创建、节点流转和状态接口 | 可创建并运行空工作流 |
|
||||||
|
| RIP-4 | 输入选择与说明书解析 | 选择说明书,解析 docx 段落、章节和表格 | 输入选择和说明书解析测试通过 |
|
||||||
|
| RIP-5 | 字段抽取与合并 | 规则 + LLM 并行抽取、重试、合并和高亮决策 | 抽取、重试、冲突合并测试通过 |
|
||||||
|
| RIP-6 | DOCX 文档生成 | 实现 6 个 docx 模板生成、产品列表重建和高亮 | docx 生成和 XML 高亮测试通过 |
|
||||||
|
| RIP-7 | CH1.9 DOC 适配 | 实现 `.doc` 原生适配探测和 `.docx` 兜底 | doc 兜底、失败隔离测试通过 |
|
||||||
|
| RIP-8 | 追溯、ZIP 与下载权限 | 生成 Excel、logs JSON、ZIP 和导出记录 | ZIP 内容、追溯、权限测试通过 |
|
||||||
|
| RIP-9 | 摘要、通知与状态归并 | 生成助手摘要,写通知记录,落定批次状态 | partial_success 等状态测试通过 |
|
||||||
|
| RIP-10 | 前端接入与总体验收 | 接入快捷入口、卡片、状态轮询和下载展示 | 前端回归和全量后端测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、RIP-0 准备与基线回归
|
||||||
|
|
||||||
|
### RIP-0-001 创建开发分支并确认工作区
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 创建本功能开发分支,确认当前工作区已有变更 |
|
||||||
|
| 修改范围 | Git 分支,不修改业务代码 |
|
||||||
|
| 验收标准 | 分支名符合 `codex/` 前缀;记录已有未提交变更,不回滚用户变更 |
|
||||||
|
| Codex 执行提示 | 请创建 `codex/regulatory-info-package` 开发分支,运行 `git status --short`,确认设计文档和目录重排状态,不要回滚无关变更。 |
|
||||||
|
|
||||||
|
### RIP-0-002 确认依赖与基线测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 确认 Django、python-docx、openpyxl、PyYAML、可选 Word COM 环境状态 |
|
||||||
|
| 修改范围 | 不修改业务代码 |
|
||||||
|
| 验收标准 | `python manage.py check` 可执行;关键依赖可 import;既有失败需记录 |
|
||||||
|
| Codex 执行提示 | 请运行 Django check 和关键回归测试,确认依赖可用。若发现既有失败,只记录并继续按计划隔离,不改无关代码。 |
|
||||||
|
|
||||||
|
### RIP-0 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、RIP-1 数据模型与导出扩展
|
||||||
|
|
||||||
|
### RIP-1-001 新增监管信息材料包 ORM 模型
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增 `RegulatoryInfoPackageBatch`、`RegulatoryInfoPackageArtifact`、`RegulatoryInfoPackageNotificationRecord` |
|
||||||
|
| 修改范围 | `review_agent/models.py` |
|
||||||
|
| 验收标准 | 字段、枚举、索引、软删除、关联关系符合数据库设计 |
|
||||||
|
| Codex 执行提示 | 请按 `docs/3.数据库设计/5.第1章监管信息材料包生成.md` 新增三张模型,模型集中放在 `review_agent/models.py`,不要新增字段级数据库表。 |
|
||||||
|
|
||||||
|
### RIP-1-002 扩展导出类型和下载 MIME
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | `ExportedSummaryFile.ExportType` 增加 `zip`,下载 MIME 支持 `.zip`、`.doc`、`.docx` |
|
||||||
|
| 修改范围 | `review_agent/models.py`、`review_agent/file_summary/views.py` |
|
||||||
|
| 验收标准 | zip 可下载;doc/docx MIME 正确;原有导出不回归 |
|
||||||
|
| Codex 执行提示 | 请扩展 `ExportedSummaryFile` 导出类型,并在下载接口按 workflow_type 和文件后缀处理权限与 content type。 |
|
||||||
|
|
||||||
|
### RIP-1-003 生成迁移并补模型测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成数据库迁移并覆盖基础模型行为 |
|
||||||
|
| 修改范围 | `review_agent/migrations/`、`tests/` |
|
||||||
|
| 验收标准 | migration 可应用;模型测试覆盖批次号、状态、artifact、通知、zip export type |
|
||||||
|
| Codex 执行提示 | 请生成迁移并新增 `tests/test_regulatory_info_package_models.py`,优先覆盖模型字段默认值、导出类型,以及 `WorkflowNodeRun` 在 RIP 批次下的幂等/唯一节点创建。 |
|
||||||
|
|
||||||
|
### RIP-1 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_models.py tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、RIP-2 模块骨架与模板配置
|
||||||
|
|
||||||
|
### RIP-2-001 创建 regulatory_info_package 模块骨架
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增独立模块目录和基础文件 |
|
||||||
|
| 修改范围 | `review_agent/regulatory_info_package/` |
|
||||||
|
| 验收标准 | 模块可 import;不影响现有 `application_form_fill` |
|
||||||
|
| Codex 执行提示 | 请创建详细设计中的模块骨架,先放常量、schema、storage、events、workflow 空实现和 service 包,不提前写复杂业务。 |
|
||||||
|
|
||||||
|
### RIP-2-002 编写模板配置 YAML
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 配置 7 个样例模板、输出文件名、策略、字段 Tag/占位符映射和 `.doc` 标记 |
|
||||||
|
| 修改范围 | `review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml` |
|
||||||
|
| 验收标准 | 7 个模板完整;zip 名称为 `第1章 监管信息(预生成版).zip`;字段映射优先使用内容控件 Tag 或稳定占位符 |
|
||||||
|
| Codex 执行提示 | 请按详细设计录入模板配置,source_dir 指向样例目录,字段 targets 优先写 content_control_tag 或 placeholder;CH1.9 声明 `prefer_legacy_doc_native: true` 且允许 docx fallback。 |
|
||||||
|
|
||||||
|
### RIP-2-003 实现配置加载、模板仓库和存储目录
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 实现 YAML 加载校验、模板复制、批次目录创建、路径安全检查 |
|
||||||
|
| 修改范围 | `template_config.py`、`template_repository.py`、`storage.py` |
|
||||||
|
| 验收标准 | 配置错误可返回清晰错误;模板只复制到批次目录;不写原始材料目录;能审计模板是否包含所需 Tag/占位符 |
|
||||||
|
| Codex 执行提示 | 请实现配置加载、模板复制和模板字段审计服务,所有路径必须校验位于批次工作目录内,原始模板目录只读。 |
|
||||||
|
|
||||||
|
### RIP-2-004 模板字段化整理与审计
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 将样例模板升级为代码友好的字段模板,不手工改生成文件格式 |
|
||||||
|
| 修改范围 | `docs/0.原始材料/第1章 监管信息` 的模板副本或 `review_agent/regulatory_info_package/templates/field_manifest.yaml` |
|
||||||
|
| 验收标准 | CH1.4 关键字段、复选框、声明类产品名/申请人位置有稳定 Tag 或占位符;审计缺失字段时测试失败 |
|
||||||
|
| Codex 执行提示 | 请优先使用 Word 内容控件 Tag;若暂不具备内容控件编辑能力,则使用不会影响版式的稳定占位符,并在配置中记录字段与目标位置。 |
|
||||||
|
|
||||||
|
### RIP-2 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_template_config.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、RIP-3 触发与工作流骨架
|
||||||
|
|
||||||
|
### RIP-3-001 扩展意图路由
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增 `regulatory_info_package` action,支持固定关键词和 LLM 路由 |
|
||||||
|
| 修改范围 | `review_agent/skill_router.py` |
|
||||||
|
| 验收标准 | 固定提示词直接命中;LLM action 列表包含本工作流;原路由不回归 |
|
||||||
|
| Codex 执行提示 | 请扩展意图路由,新增 `starts_regulatory_info_package` 标记,避免破坏 file_summary、regulatory_review 和 application_form_fill。 |
|
||||||
|
|
||||||
|
### RIP-3-002 实现批次创建和节点初始化
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 创建批次、生成节点、记录事件 |
|
||||||
|
| 修改范围 | `workflow.py`、`events.py`、`constants.py` |
|
||||||
|
| 验收标准 | 可创建 `RIP-...` 批次;节点按定义初始化;事件可查询 |
|
||||||
|
| Codex 执行提示 | 请实现批次创建和节点初始化,workflow_type 必须写 `regulatory_info_package`。 |
|
||||||
|
|
||||||
|
### RIP-3-003 实现执行器骨架和状态接口
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 工作流节点可完整流转,status 接口可返回批次、节点、导出和风险信息 |
|
||||||
|
| 修改范围 | `workflow.py`、`views.py`、`urls.py` 或现有 URL 注册文件 |
|
||||||
|
| 验收标准 | 空工作流可从 pending 到 completed;状态接口校验用户权限 |
|
||||||
|
| Codex 执行提示 | 请先实现可运行的空工作流骨架,业务节点可以临时 no-op,但状态流转和权限必须真实。 |
|
||||||
|
|
||||||
|
### RIP-3-004 接入对话启动逻辑
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | `stream_message` 能启动本工作流或返回说明书反问 |
|
||||||
|
| 修改范围 | `review_agent/services.py` |
|
||||||
|
| 验收标准 | 触发后发送 `workflow_started`;无输入或多候选时不误启动 |
|
||||||
|
| Codex 执行提示 | 请在 `stream_message` 增加 regulatory_info_package 分支,先调用输入选择服务,再决定启动、提示上传或反问。 |
|
||||||
|
|
||||||
|
### RIP-3 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_trigger.py tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_views.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、RIP-4 输入选择与说明书解析
|
||||||
|
|
||||||
|
### RIP-4-001 实现说明书输入选择
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 按用户消息、active 附件、最近汇总批次选择说明书 |
|
||||||
|
| 修改范围 | `services/input_select.py` |
|
||||||
|
| 验收标准 | 文件名模糊匹配、唯一 docx、多个说明书、无说明书均有明确结果 |
|
||||||
|
| Codex 执行提示 | 请实现 `select_instruction_input`,多候选返回 waiting_user 语义,由对话反问用户确认具体文件名。 |
|
||||||
|
|
||||||
|
### RIP-4-002 实现说明书 docx 解析
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 读取说明书段落、章节、表格、组成成分表和 front_text |
|
||||||
|
| 修改范围 | `services/instruction_extract.py` |
|
||||||
|
| 验收标准 | 能解析 `目标产品说明书.docx` 的产品名称、章节和主要表格结构 |
|
||||||
|
| Codex 执行提示 | 请使用结构化 Word 解析能力,不用脆弱的纯字符串拼接;解析结果写入可序列化 schema。 |
|
||||||
|
|
||||||
|
### RIP-4-003 写入说明书抽取日志产物
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 保存 `logs/instruction_extract.json` 并创建 artifact |
|
||||||
|
| 修改范围 | `workflow.py`、`storage.py`、`instruction_extract.py` |
|
||||||
|
| 验收标准 | JSON 只在后台 logs 目录,不进入用户下载列表 |
|
||||||
|
| Codex 执行提示 | 请在 text_extract 节点保存说明书抽取 JSON,artifact 可记录,但不要创建 ExportedSummaryFile。 |
|
||||||
|
|
||||||
|
### RIP-4 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_input_select.py tests/test_regulatory_info_package_instruction_extract.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、RIP-5 字段抽取与合并
|
||||||
|
|
||||||
|
### RIP-5-001 实现规则字段抽取
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 从说明书章节和表格中抽取产品名称、包装规格、预期用途、组成、储存条件、样本类型、适用仪器、标准号等 |
|
||||||
|
| 修改范围 | `services/field_extract.py` |
|
||||||
|
| 验收标准 | 不依赖 LLM 时可抽取关键字段并支撑 demo |
|
||||||
|
| Codex 执行提示 | 请优先实现规则抽取,抽取结果包含 value、evidence、confidence 和 source。 |
|
||||||
|
|
||||||
|
### RIP-5-002 实现 LLM 抽取封装和三次重试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | LLM 结构化抽取,失败最多重试 3 次,失败后不阻断 |
|
||||||
|
| 修改范围 | `services/field_extract.py`、`prompts/field_extract.md` |
|
||||||
|
| 验收标准 | 0s/1s/2s 重试;解析失败可记录错误;规则结果继续 |
|
||||||
|
| Codex 执行提示 | 请封装 LLM 调用为可 mock 的函数,测试中不要真实调用外部模型。 |
|
||||||
|
|
||||||
|
### RIP-5-003 实现规则与 LLM 并行抽取
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 使用线程并行执行规则抽取和 LLM 抽取 |
|
||||||
|
| 修改范围 | `services/field_extract.py` |
|
||||||
|
| 验收标准 | 任一分支失败不影响另一分支结果;输出 `field_extract_result.json` |
|
||||||
|
| Codex 执行提示 | 请使用 `ThreadPoolExecutor(max_workers=2)`,不要在子线程直接写数据库。 |
|
||||||
|
|
||||||
|
### RIP-5-004 实现字段合并和高亮决策
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 输出 missing、llm_only、conflict 和最终写入值 |
|
||||||
|
| 修改范围 | `services/field_merge.py` |
|
||||||
|
| 验收标准 | 全缺失写 `/` 黄底;LLM-only 黄底;冲突黄底红字;合并结果可追溯 |
|
||||||
|
| Codex 执行提示 | 请实现 `MergedField` 结构,合并结果写 `logs/merged_fields.json`,并同步批次摘要字段。 |
|
||||||
|
|
||||||
|
### RIP-5 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_field_extract.py tests/test_regulatory_info_package_field_merge.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、RIP-6 DOCX 文档生成
|
||||||
|
|
||||||
|
### RIP-6-001 实现 DocxDocumentAdapter
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 支持段落/表格替换、表格单元格填充、黄色底色、红字 |
|
||||||
|
| 修改范围 | `services/document_writer.py`、`services/docx_document.py` |
|
||||||
|
| 验收标准 | 可处理 run 拆分;测试可检查 docx XML 高亮和红字 |
|
||||||
|
| Codex 执行提示 | 请优先支持本模板需要的替换和表格填充场景,复杂通用 Word 引擎不要过度设计。 |
|
||||||
|
|
||||||
|
### RIP-6-002 实现 6 个 DOCX 文件生成策略
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 CH1.2、CH1.4、CH1.5、CH1.11.1、CH1.11.5、CH1.11.6 |
|
||||||
|
| 修改范围 | `services/package_generate.py`、`services/standard_candidates.py` |
|
||||||
|
| 验收标准 | 6 个 docx 文件可生成;缺失/LLM-only/冲突样式正确 |
|
||||||
|
| Codex 执行提示 | 请先完成 docx 主链路。CH1.5 产品列表必须转成样例表头:包装规格、货号、组成、组分、主要组成成分、规格/数量,其中货号 `/` 黄底。 |
|
||||||
|
|
||||||
|
### RIP-6-003 实现 generate_docs 内部并发
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 每个文档独立线程生成,主线程统一写 artifact/export |
|
||||||
|
| 修改范围 | `services/package_generate.py`、`workflow.py` |
|
||||||
|
| 验收标准 | 单个文件失败不影响其他文件;返回 `GeneratedFileResult` 列表 |
|
||||||
|
| Codex 执行提示 | 请使用独立模板副本,子线程不要写数据库;所有异常转成文件级 failed 状态。 |
|
||||||
|
|
||||||
|
### RIP-6 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_docx_writer.py tests/test_regulatory_info_package_package_generate.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、RIP-7 CH1.9 DOC 适配
|
||||||
|
|
||||||
|
### RIP-7-001 实现 LegacyDocDocumentAdapter 能力探测
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 探测 Word COM、LibreOffice UNO 或可用兜底能力 |
|
||||||
|
| 修改范围 | `services/legacy_doc_document.py` |
|
||||||
|
| 验收标准 | 当前环境无原生能力时返回清晰 capability,不崩溃;测试不要求本机必须安装 Word 或 LibreOffice |
|
||||||
|
| Codex 执行提示 | 请先实现能力探测和接口骨架,Windows Word COM/LibreOffice UNO 可作为原生能力;不可用时明确进入 docx 兜底。 |
|
||||||
|
|
||||||
|
### RIP-7-002 实现 CH1.9 原生写入与 docx 兜底
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | CH1.9 优先 `.doc` 输出,失败时生成同语义 `.docx` |
|
||||||
|
| 修改范围 | `legacy_doc_document.py`、`package_generate.py` |
|
||||||
|
| 验收标准 | 有原生能力时原生成功状态 success;无原生能力或原生失败但兜底成功时状态 fallback_success;两者失败不进入 zip |
|
||||||
|
| Codex 执行提示 | 请把能力探测、原生失败和兜底失败都写入 `adapter_summary` 和 `risk_notes`,不要静默转换。 |
|
||||||
|
|
||||||
|
### RIP-7-003 补充 doc 适配器测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 覆盖 capability、兜底成功、失败隔离 |
|
||||||
|
| 修改范围 | `tests/test_regulatory_info_package_legacy_doc.py` |
|
||||||
|
| 验收标准 | 测试不依赖本机必须安装 Word;用 mock 覆盖原生成功/失败 |
|
||||||
|
| Codex 执行提示 | 请用 mock 模拟 Word COM 可用和不可用场景,保证 CI 或本地无 Word 时测试仍稳定。 |
|
||||||
|
|
||||||
|
### RIP-7 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_legacy_doc.py tests/test_regulatory_info_package_package_generate.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、RIP-8 追溯、ZIP 与下载权限
|
||||||
|
|
||||||
|
### RIP-8-001 实现追溯 Excel 和后台 JSON
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 `exports/traceability.xlsx` 和 `logs/traceability.json` |
|
||||||
|
| 修改范围 | `services/traceability_export.py` |
|
||||||
|
| 验收标准 | Excel 可下载;JSON 不进入用户下载列表 |
|
||||||
|
| Codex 执行提示 | 请用 openpyxl 生成 Excel,字段包含 target_file、target_field、final_value、extraction_source、evidence、highlight_reason、needs_review。 |
|
||||||
|
|
||||||
|
### RIP-8-002 实现 zip 打包
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| 修改范围 | `services/zip_export.py` |
|
||||||
|
| 验收标准 | zip 只包含 success/fallback_success 文件;失败文件不入包 |
|
||||||
|
| Codex 执行提示 | 请用 Python 标准库 `zipfile` 打包,zip 中保留最终输出文件名。CH1.9 兜底成功时放入 `.docx` 文件。 |
|
||||||
|
|
||||||
|
### RIP-8-003 创建导出记录和下载权限
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | zip、单文件、Excel 均写 `ExportedSummaryFile`;下载接口校验用户权限 |
|
||||||
|
| 修改范围 | `file_summary/views.py`、`storage.py`、`zip_export.py` |
|
||||||
|
| 验收标准 | 非批次用户不能下载;zip 在 exports 返回顺序中排首位 |
|
||||||
|
| Codex 执行提示 | 请按 `workflow_type=regulatory_info_package` 反查批次所属 conversation/user,软删除批次不可下载。 |
|
||||||
|
|
||||||
|
### RIP-8 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_traceability.py tests/test_regulatory_info_package_zip.py tests/test_regulatory_info_package_views.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、RIP-9 摘要、通知与状态归并
|
||||||
|
|
||||||
|
### RIP-9-001 实现助手 Markdown 摘要
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 完成后返回 zip 首位、单文件列表、失败原因、待确认摘要 |
|
||||||
|
| 修改范围 | `services/summary.py`、`workflow.py` |
|
||||||
|
| 验收标准 | zip 链接在回复首位;失败文件显示原因且无下载;待确认数量准确 |
|
||||||
|
| Codex 执行提示 | 请严格按详细设计生成助手摘要,partial_success 时也要展示可下载 zip 和失败文件原因。 |
|
||||||
|
|
||||||
|
### RIP-9-002 实现通知记录和统一通知接入
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 写 `RegulatoryInfoPackageNotificationRecord`,调用统一通知服务 |
|
||||||
|
| 修改范围 | `services/notifier.py`、`workflow.py` |
|
||||||
|
| 验收标准 | 通知失败不阻断下载;失败写 `risk_notes` |
|
||||||
|
| Codex 执行提示 | 请复用已有通知模式,先保证本地测试可 mock;不要让外部通知失败影响批次主状态。 |
|
||||||
|
|
||||||
|
### RIP-9-003 完成状态归并
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 根据生成结果、zip、追溯、通知落定 success/partial_success/failed/waiting_user |
|
||||||
|
| 修改范围 | `workflow.py` |
|
||||||
|
| 验收标准 | 7 文件成功为 success;部分文件失败但有 zip 为 partial_success;全部失败为 failed |
|
||||||
|
| Codex 执行提示 | 请把状态归并集中在一个函数,测试覆盖 docx 兜底、zip 失败、通知失败、产品名缺失。 |
|
||||||
|
|
||||||
|
### RIP-9 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、RIP-10 前端接入与总体验收
|
||||||
|
|
||||||
|
### RIP-10-001 增加对话快捷入口
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 对话框底部增加“第1章监管信息”快捷提示 |
|
||||||
|
| 修改范围 | `templates/home.html` |
|
||||||
|
| 验收标准 | 点击后填入或发送 `根据说明书生成第1章监管信息` |
|
||||||
|
| Codex 执行提示 | 请复用现有 tool-chip 样式,不单独创建新前端样式文件,除非现有结构无法展示。 |
|
||||||
|
|
||||||
|
### RIP-10-002 工作流卡片和状态轮询支持
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 前端识别 `regulatory_info_package`,使用新 status URL 轮询 |
|
||||||
|
| 修改范围 | `static/js/app.js`、`templates/home.html` |
|
||||||
|
| 验收标准 | 卡片能展示节点、状态、风险和导出列表;终态识别 success/partial_success/failed/waiting_user |
|
||||||
|
| Codex 执行提示 | 请在现有工作流卡片逻辑中增量接入,不复制一套新卡片实现。 |
|
||||||
|
|
||||||
|
### RIP-10-003 下载展示和失败文件展示
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | zip 首位展示,单文件辅助下载,失败文件展示原因 |
|
||||||
|
| 修改范围 | `static/js/app.js` |
|
||||||
|
| 验收标准 | exports 返回顺序被保留;失败文件无下载按钮;traceability.xlsx 可下载 |
|
||||||
|
| Codex 执行提示 | 请以后端 exports 顺序为准,不新增 `is_primary` 字段;zip 已由后端排首位。 |
|
||||||
|
|
||||||
|
### RIP-10-004 总体验收与回归
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 全链路验证和回归保护 |
|
||||||
|
| 修改范围 | 测试、必要的 bug fix |
|
||||||
|
| 验收标准 | Django check、RIP 测试、关键既有测试通过;能用样例说明书生成材料包 |
|
||||||
|
| Codex 执行提示 | 请用 `docs/0.原始材料/目标产品说明书.docx` 做端到端验证,确认 zip、单文件、Excel、logs 和摘要均符合设计。 |
|
||||||
|
|
||||||
|
### RIP-10 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_frontend.py
|
||||||
|
pytest tests/test_regulatory_info_package_models.py tests/test_regulatory_info_package_trigger.py tests/test_regulatory_info_package_input_select.py tests/test_regulatory_info_package_template_config.py tests/test_regulatory_info_package_instruction_extract.py tests/test_regulatory_info_package_field_extract.py tests/test_regulatory_info_package_field_merge.py tests/test_regulatory_info_package_docx_writer.py tests/test_regulatory_info_package_legacy_doc.py tests/test_regulatory_info_package_package_generate.py tests/test_regulatory_info_package_traceability.py tests/test_regulatory_info_package_zip.py tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_views.py tests/test_regulatory_info_package_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、测试分层要求
|
||||||
|
|
||||||
|
| 测试层 | 覆盖内容 | 建议文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 模型测试 | 批次、产物、通知、zip 导出类型 | `tests/test_regulatory_info_package_models.py` |
|
||||||
|
| 路由测试 | 固定关键词、LLM action、对话启动分支 | `tests/test_regulatory_info_package_trigger.py` |
|
||||||
|
| 输入测试 | 文件名匹配、active 附件、多候选反问 | `tests/test_regulatory_info_package_input_select.py` |
|
||||||
|
| 配置测试 | YAML 加载、模板缺失、code 唯一 | `tests/test_regulatory_info_package_template_config.py` |
|
||||||
|
| 解析测试 | 说明书章节、表格、组成成分表 | `tests/test_regulatory_info_package_instruction_extract.py` |
|
||||||
|
| 抽取测试 | 规则抽取、LLM 重试、失败降级 | `tests/test_regulatory_info_package_field_extract.py` |
|
||||||
|
| 合并测试 | missing、llm_only、conflict | `tests/test_regulatory_info_package_field_merge.py` |
|
||||||
|
| 文档测试 | docx 替换、表格、高亮、红字 | `tests/test_regulatory_info_package_docx_writer.py` |
|
||||||
|
| doc 测试 | adapter 探测、docx 兜底、失败状态 | `tests/test_regulatory_info_package_legacy_doc.py` |
|
||||||
|
| 生成测试 | 7 文件并发生成、异常隔离 | `tests/test_regulatory_info_package_package_generate.py` |
|
||||||
|
| 追溯测试 | Excel 下载、logs JSON | `tests/test_regulatory_info_package_traceability.py` |
|
||||||
|
| zip 测试 | 只打包 success/fallback_success | `tests/test_regulatory_info_package_zip.py` |
|
||||||
|
| 工作流测试 | 节点流转、状态归并、partial_success | `tests/test_regulatory_info_package_workflow.py` |
|
||||||
|
| 接口测试 | start/status/download 权限 | `tests/test_regulatory_info_package_views.py` |
|
||||||
|
| 通知测试 | 通知记录、通知失败降级 | `tests/test_regulatory_info_package_notification.py` |
|
||||||
|
| 前端测试 | chip、卡片、状态 URL、下载展示 | `tests/test_regulatory_info_package_frontend.py` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、Codex 自动化执行规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 顺序执行 | 必须从 RIP-0 到 RIP-10 顺序执行,不得跳阶段 |
|
||||||
|
| 阶段聚焦 | 当前阶段失败时先修复当前阶段,不继续后续阶段 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| 小步修改 | 每次只修改当前阶段相关文件,避免顺手重构 |
|
||||||
|
| 用户变更保护 | 不得回滚或覆盖用户已有未提交变更 |
|
||||||
|
| 过程日志 | 每阶段记录关键命令结果和既有失败 |
|
||||||
|
| 阶段验证 | 每阶段完成后运行对应验证命令 |
|
||||||
|
| 阶段提交 | 每阶段验证通过后生成提交摘要;是否执行 `git commit` 由用户确认 |
|
||||||
|
| 回归保护 | 文件汇总、法规核查、自动填表现有测试不得回归 |
|
||||||
|
| doc 风险隔离 | `.doc` 原生能力不可用或原生处理失败不得阻断其他 6 个 docx 文件生成 |
|
||||||
|
| 外部依赖隔离 | LLM、通知、Word COM 均需可 mock,测试不依赖真实外部服务 |
|
||||||
|
| 下载安全 | 所有导出下载必须通过所属用户权限校验 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、推荐目标模式提示词
|
||||||
|
|
||||||
|
后续可直接对 Codex 输入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/5.第1章监管信息材料包生成.md 执行开发。
|
||||||
|
|
||||||
|
执行要求:
|
||||||
|
1. 严格按 RIP-0 到 RIP-10 顺序推进,不跳阶段。
|
||||||
|
2. 每阶段先读对应需求、功能、数据库、详细设计文档。
|
||||||
|
3. 每阶段先写或补充测试,再实现代码。
|
||||||
|
4. 每阶段只修改当前阶段相关文件,不做无关重构。
|
||||||
|
5. 不回滚、不覆盖用户已有未提交变更。
|
||||||
|
6. LLM、通知、Word COM 等外部能力必须可 mock。
|
||||||
|
7. 每阶段完成后运行该阶段验证命令。
|
||||||
|
8. 验证通过后生成提交摘要,是否本地提交等待用户确认。
|
||||||
|
9. 最后使用 docs/0.原始材料/目标产品说明书.docx 做端到端验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、待执行前检查清单
|
||||||
|
|
||||||
|
| 检查项 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析、功能设计、数据库设计、详细设计均已存在 | 待执行时确认 |
|
||||||
|
| 当前分支是否适合创建开发分支 | 待执行时确认 |
|
||||||
|
| 是否存在用户未提交变更 | 待执行时确认 |
|
||||||
|
| `python-docx`、`openpyxl`、`PyYAML` 是否可用 | 待执行时确认 |
|
||||||
|
| Word COM 或 LibreOffice UNO 是否可用 | 待执行时确认,非阻塞 |
|
||||||
|
| 目标说明书 `docs/0.原始材料/目标产品说明书.docx` 是否存在 | 待执行时确认 |
|
||||||
|
| 样例模板目录 `docs/0.原始材料/第1章 监管信息` 是否完整 | 待执行时确认 |
|
||||||
|
| 现有文件汇总、法规核查、自动填表测试是否通过 | 待执行时确认 |
|
||||||
|
|
||||||
@@ -1,115 +1,175 @@
|
|||||||
# 架构搭建思路汇报稿(基于 Demo 版)
|
# 架构搭建思路汇报稿(基于 Demo 版)
|
||||||
|
|
||||||
## 一、汇报开场
|
## 一、设计路径:先锁规格,再实现代码
|
||||||
|
|
||||||
各位老师好,我本次 Demo 搭建的是一个面向体外诊断试剂注册资料准备与审核的智能体原型。
|
各位老师好,我本次 Demo 搭建的是一个面向体外诊断试剂注册资料准备与审核的智能体原型。
|
||||||
|
|
||||||
这个 Demo 的目标不是简单做文件上传、文件解析或问答,而是把注册资料审核中几个高频、耗时、容易出错的环节串成一个可追溯的智能工作流,包括文件目录汇总、法规完整性核查、产品关键信息提取、申报表自动填充,以及异常风险预警。
|
这次开发没有直接从代码开始,而是采用“文档先行、规格锁定、再实现代码”的路径。原因是注册资料审核不是一个简单问答场景,它涉及文件解析、法规规则、RAG 依据、工作流状态、导出文件、人工确认和整改闭环。如果一开始就写代码,很容易出现功能能跑但边界不清、结果不可追溯、后续难维护的问题。
|
||||||
|
|
||||||
从整体定位上看,它更像是一个“注册资料审核助手”:用户上传一批申报资料后,系统能够先把资料包结构化,再对照法规规则做核查,之后输出风险清单和整改建议,并把抽取到的产品信息继续复用到申报模板填表中。
|
所以整体设计路径分为四步:
|
||||||
|
|
||||||
## 二、Demo 运行结果展示
|
|
||||||
|
|
||||||
本次 Demo 目前可以展示四类核心运行结果。
|
|
||||||
|
|
||||||
### 1. 文件目录汇总表
|
|
||||||
|
|
||||||
用户上传注册资料文件夹、散装文件或压缩包后,系统会自动完成附件固化、压缩包解压、文件扫描和页数统计。
|
|
||||||
|
|
||||||
最终系统会生成 Markdown 汇总报告和 Excel 文件明细表,主要字段包括:
|
|
||||||
|
|
||||||
| 字段 | 说明 |
|
|
||||||
| --- | --- |
|
|
||||||
| 序号 | 文件在批次中的顺序 |
|
|
||||||
| 目录层级 | 文件所在的相对目录 |
|
|
||||||
| 文件名 | 原始文件名 |
|
|
||||||
| 类型 | PDF、Word、Excel、PPT 等文件类型 |
|
|
||||||
| 页数 | PDF 页数、Word 页数、PPT 幻灯片数或 Excel 工作表数 |
|
|
||||||
| 路径 | 文件在批次工作目录中的相对路径 |
|
|
||||||
| 状态 | success、failed、unsupported、uncertain 等 |
|
|
||||||
| 重试次数 | 页数统计失败时的重试记录 |
|
|
||||||
| 异常说明 | 不支持、不可确定或解析失败的原因 |
|
|
||||||
|
|
||||||
这个结果解决的是资料包进入系统后的第一步问题:先把杂乱的文件夹变成结构化的文件清单。
|
|
||||||
|
|
||||||
### 2. 法规完整性报告
|
|
||||||
|
|
||||||
在文件汇总结果基础上,系统会调用法规核查工作流,对照 NMPA 体外诊断试剂注册申报资料要求进行完整性检查。
|
|
||||||
|
|
||||||
Demo 中使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml` 作为结构化规则文件。规则文件中配置了附件 4 的资料要求,例如监管信息、综述资料、非临床资料、临床评价资料、说明书和标签样稿、质量管理体系文件等。
|
|
||||||
|
|
||||||
系统会检查是否缺少关键资料,例如:
|
|
||||||
|
|
||||||
| 检查对象 | 风险示例 |
|
|
||||||
| --- | --- |
|
|
||||||
| 注册申请表 | 缺失时生成阻断项或高风险 |
|
|
||||||
| 符合性声明 | 缺失时生成阻断项 |
|
|
||||||
| 产品技术要求 | 缺失时生成阻断项 |
|
|
||||||
| 注册检验报告 | 缺失时生成阻断项 |
|
|
||||||
| 产品说明书 | 缺失或章节不完整时生成高风险 |
|
|
||||||
| 标签样稿 | 缺失时生成高风险 |
|
|
||||||
| 临床评价资料 | 按适用条件生成条件性风险 |
|
|
||||||
| 质量管理体系文件 | 缺失时生成高风险 |
|
|
||||||
|
|
||||||
最终输出包括 Markdown 法规核查报告、Excel 问题清单和 JSON 结构化结果包。
|
|
||||||
|
|
||||||
### 3. 信息提取对照表
|
|
||||||
|
|
||||||
系统会从说明书、产品技术要求、注册检验报告、申请表等文件中抽取产品关键信息。
|
|
||||||
|
|
||||||
当前 Demo 中重点抽取的字段包括:
|
|
||||||
|
|
||||||
| 字段 | 用途 |
|
|
||||||
| --- | --- |
|
|
||||||
| 产品名称 | 用于一致性核查和申报表填充 |
|
|
||||||
| 型号规格 | 用于跨文件比对 |
|
|
||||||
| 预期用途 | 用于法规适用条件和模板填充 |
|
|
||||||
| 管理类别 | 用于法规判断 |
|
|
||||||
| 分类编码 | 用于注册资料核对 |
|
|
||||||
| 注册类型 | 用于模板选择和法规规则裁剪 |
|
|
||||||
| 临床评价路径 | 用于临床资料适用性判断 |
|
|
||||||
|
|
||||||
每个抽取结果都会保留来源文件、来源角色、证据片段、抽取方式和置信度。这样后续生成的填表内容不是黑盒结果,而是能够回溯到原始文件。
|
|
||||||
|
|
||||||
### 4. 异常预警列表
|
|
||||||
|
|
||||||
系统会把完整性缺失、章节异常、字段冲突、文本抽取失败、页数不可确定、通知失败等问题统一沉淀为风险项。
|
|
||||||
|
|
||||||
风险等级目前分为:
|
|
||||||
|
|
||||||
| 风险等级 | 含义 |
|
|
||||||
| --- | --- |
|
|
||||||
| 阻断项 | 影响注册资料完整性或关键合规判断,需要优先整改 |
|
|
||||||
| 高风险 | 可能影响审评,需要重点关注 |
|
|
||||||
| 中风险 | 建议整改或补充说明 |
|
|
||||||
| 低风险 | 轻微问题或格式提示 |
|
|
||||||
| 提示项 | 不直接影响结论,但建议人工确认 |
|
|
||||||
|
|
||||||
例如,如果系统发现不同文件中的“产品名称”或“型号规格”不一致,会生成一致性风险;如果缺少注册检验报告,会生成阻断项,并给出补充注册检验报告的整改建议。
|
|
||||||
|
|
||||||
## 三、智能体整体工作流
|
|
||||||
|
|
||||||
结合当前 Demo 的实现,智能体整体工作流可以概括为:
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
文件扫描
|
需求拆解
|
||||||
-> 目录汇总
|
-> 生成需求分析、功能设计、详细设计、数据库设计和开发计划
|
||||||
-> 法规匹配
|
-> 用文档锁定实现规格
|
||||||
-> 信息提取
|
-> 按规格实现 Django 代码、工作流、前端页面和测试
|
||||||
-> 一致性核查
|
|
||||||
-> 风险预警
|
|
||||||
-> 报告导出
|
|
||||||
-> 通知与整改复核
|
|
||||||
```
|
```
|
||||||
|
|
||||||
从代码实现上看,系统拆成三条主链路。
|
当前仓库中可以看到完整的规格文档链路:
|
||||||
|
|
||||||
|
| 阶段 | 产物 | 作用 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 需求分析 | `docs/1.需求分析` | 明确业务目标、用户动作、输入输出和异常场景 |
|
||||||
|
| 功能设计 | `docs/2.功能设计` | 把需求拆成文件汇总、法规核查、自动填表、飞书通知等模块 |
|
||||||
|
| 详细设计 | `docs/3.详细设计` | 锁定工作流节点、字段结构、状态流转和服务边界 |
|
||||||
|
| 数据库设计 | `docs/4.数据库设计` | 锁定批次、附件、节点、风险项、导出文件等模型 |
|
||||||
|
| 开发计划 | `docs/5.开发计划` | 将实现拆成可验证的开发任务和前端线框图 |
|
||||||
|
|
||||||
|
因此,这个 Demo 的核心不是“让大模型临时回答一个问题”,而是先用文档定义清楚系统应该如何工作,再把这些规格落实到代码、数据库、前端和测试中。最终形成的是一个可追溯、可复核、可继续扩展的审核工作台。
|
||||||
|
|
||||||
|
## 二、系统定位和 Demo 目标
|
||||||
|
|
||||||
|
这个 Demo 的目标不是简单做文件上传、文件解析或法规问答,而是把注册资料审核中几个高频、耗时、容易出错的环节串成一个智能工作流,包括:
|
||||||
|
|
||||||
|
```text
|
||||||
|
资料上传
|
||||||
|
-> 文件目录和页数汇总
|
||||||
|
-> NMPA 法规完整性核查
|
||||||
|
-> 法规依据 RAG 检索
|
||||||
|
-> 产品关键信息抽取
|
||||||
|
-> 一致性核查和风险预警
|
||||||
|
-> 申报文件自动填表
|
||||||
|
-> 报告导出和整改复核
|
||||||
|
```
|
||||||
|
|
||||||
|
从产品形态上看,它更像是一个“注册资料审核工作台”。用户上传一批申报资料后,系统先把资料包结构化,再按法规规则做核查,然后输出风险清单、整改建议、证据来源和导出文件。后续还可以继续复用抽取到的产品信息,自动填入申报模板。
|
||||||
|
|
||||||
|
## 三、技术栈和总体架构
|
||||||
|
|
||||||
|
本 Demo 采用轻量、可本地运行、便于测试和可解释的技术栈。
|
||||||
|
|
||||||
|
| 层级 | 技术/工具 | 作用 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django | 路由、视图、模板、认证、ORM 和后台能力 |
|
||||||
|
| 数据库 | SQLite / Django ORM | Demo 阶段保存会话、附件、批次、节点、风险项和导出文件 |
|
||||||
|
| 前端 | Django Template + 原生 JS + CSS | 实现首页工作台、审核智能体、知识库管理、附件管理和流式对话 |
|
||||||
|
| 文件解析 | `pypdf`、`python-docx`、`python-pptx`、`openpyxl`、`xlrd`、`py7zr`、`zipfile` | 解析 PDF、Word、PPT、Excel、压缩包和旧 Office 文件 |
|
||||||
|
| 规则配置 | YAML | 维护 NMPA 体外诊断试剂注册资料核查规则 |
|
||||||
|
| RAG | ChromaDB + embedding provider | 构建法规材料向量索引,检索法规依据片段 |
|
||||||
|
| LLM | SiliconFlow / 可配置大模型接口 | 做意图路由、低置信度抽取、自然语言总结和辅助复核 |
|
||||||
|
| 流式交互 | SSE | 将工作流启动、节点进度和模型回复实时推给前端 |
|
||||||
|
| 自动化验证 | pytest + Django test client | 验证路由、页面、模型、工作流和导出结果 |
|
||||||
|
|
||||||
|
整体架构可以概括为:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户界面
|
||||||
|
-> Django 视图层
|
||||||
|
-> 对话服务和 Skill 路由器
|
||||||
|
-> 文件汇总 / 法规核查 / 自动填表工作流
|
||||||
|
-> ORM 状态记录和导出文件
|
||||||
|
-> RAG/LLM/规则服务
|
||||||
|
-> 前端工作流卡片和报告下载
|
||||||
|
```
|
||||||
|
|
||||||
|
这里的关键设计原则是:规则判断要稳定,RAG 负责补证据,LLM 做辅助,不把高风险合规结论完全交给大模型自由发挥。
|
||||||
|
|
||||||
|
## 四、对话流程:先识别意图,再决定 RAG 或工作流
|
||||||
|
|
||||||
|
审核智能体页面不是单纯把用户输入直接发给大模型,而是有一层对话编排流程。
|
||||||
|
|
||||||
|
一次用户消息进入系统后,大致会经历以下步骤:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户输入
|
||||||
|
-> 保存用户消息
|
||||||
|
-> Skill Router 判断意图
|
||||||
|
-> 根据意图选择普通问答、附件读取或工作流
|
||||||
|
-> 必要时先检查附件和前置批次
|
||||||
|
-> 启动对应工作流或执行 RAG 问答
|
||||||
|
-> 保存助手回复和工作流事件
|
||||||
|
-> 前端通过 SSE 展示增量内容和节点状态
|
||||||
|
```
|
||||||
|
|
||||||
|
当前路由动作包括:
|
||||||
|
|
||||||
|
| action | 场景 | 后续动作 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `normal_chat` | 普通法规问答或项目问答 | 先检索知识库,再把 RAG 片段放入大模型上下文 |
|
||||||
|
| `attachment_reader` | 用户要求阅读、提取、总结上传附件 | 调用附件读取 Skill,返回文件内容摘要 |
|
||||||
|
| `file_summary` | 用户要求汇总文件目录、页数、清单 | 启动文件汇总工作流 |
|
||||||
|
| `regulatory_review` | 用户要求法规核查、完整性核查、风险预警、整改建议 | 必要时先生成文件汇总批次,再启动法规核查工作流 |
|
||||||
|
| `application_form_fill` | 用户要求申报文件填表、模板填充、安全和性能清单 | 必要时先生成文件汇总批次,再启动自动填表工作流 |
|
||||||
|
|
||||||
|
也就是说,普通问题是“先 RAG,再回答”;工作流问题是“先路由,再检查前置条件,再启动工作流”。例如用户问“注册检验报告要求是什么”,系统会走 RAG 问答;用户说“请对当前资料做法规核查”,系统会进入法规核查工作流。
|
||||||
|
|
||||||
|
## 五、Skill 调用方式:路由器统一调度工具能力
|
||||||
|
|
||||||
|
Demo 中的 Skill 不是一个单独页面,而是对话服务后面的工具调用机制。用户不需要手动选择复杂功能,系统会根据用户话语和当前附件状态判断是否调用某个 Skill 或工作流。
|
||||||
|
|
||||||
|
当前实现中,`review_agent/skill_router.py` 负责意图路由。它采用两层判断:
|
||||||
|
|
||||||
|
```text
|
||||||
|
确定性规则预判
|
||||||
|
-> LLM 路由判断
|
||||||
|
-> 规则兜底
|
||||||
|
```
|
||||||
|
|
||||||
|
第一层是确定性规则。例如用户输入中包含“法规核查”“NMPA 核查”“风险预警”“自动填表”“申报模板”等明确关键词,系统可以直接判断要启动对应工作流。这样可以避免每次都依赖大模型判断。
|
||||||
|
|
||||||
|
第二层是 LLM 路由。系统会把用户消息和当前 active 附件列表发给路由模型,让模型只输出结构化 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"action": "regulatory_review",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"reason": "用户要求对当前注册资料进行法规完整性核查"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
第三层是规则兜底。如果 LLM 不可用、配置缺失或返回异常,系统会退回关键词和附件状态判断,保证 Demo 在本地环境也能稳定运行。
|
||||||
|
|
||||||
|
这个设计的好处是:用户体验上像是在和一个智能体对话,技术实现上则是由路由器把对话分发到不同工具、不同工作流和不同数据服务。
|
||||||
|
|
||||||
|
## 六、RAG 方式:法规依据和用户知识库共同参与
|
||||||
|
|
||||||
|
RAG 在 Demo 中有两类来源:
|
||||||
|
|
||||||
|
| 来源 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 内置法规材料 | 来自 `docs/0.原始材料` 和 NMPA 相关法规文件,用于法规依据检索 |
|
||||||
|
| 用户管理知识库 | 由用户在“知识库管理”页面上传,可作为当前账号所有对话的补充知识 |
|
||||||
|
|
||||||
|
法规材料会被切分为文本块,写入 ChromaDB 向量库。每个 chunk 保留来源文件、chunk 编号、文本片段和元数据。embedding 支持真实语义 embedding,也支持 deterministic/local embedding,后者主要用于测试和 dry run。
|
||||||
|
|
||||||
|
RAG 在系统中的定位有两种:
|
||||||
|
|
||||||
|
### 1. 普通问答中的 RAG
|
||||||
|
|
||||||
|
如果用户提出普通问题,系统会先检索知识库,把命中的法规片段或用户知识库片段拼入上下文,再调用大模型回答。这样回答不会只依赖模型记忆,而是带有本地法规材料和用户资料依据。
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户问题
|
||||||
|
-> 知识库检索
|
||||||
|
-> 过滤和排序相关片段
|
||||||
|
-> 组装为知识上下文
|
||||||
|
-> 调用 LLM 生成回答
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 工作流中的 RAG
|
||||||
|
|
||||||
|
在法规核查工作流里,RAG 不直接决定是否合规,而是为规则判断补充法规依据。例如结构化规则已经判断“缺少注册检验报告”,RAG 再检索相关法规要求,给出来源文件和依据片段。
|
||||||
|
|
||||||
|
这种方式避免了“让大模型自由判断合规”的不稳定性,同时让报告具备可解释依据。
|
||||||
|
|
||||||
|
## 七、三条核心工作流
|
||||||
|
|
||||||
|
当前 Demo 拆成三条主链路:文件汇总、法规核查、自动填表。
|
||||||
|
|
||||||
### 1. 文件汇总链路
|
### 1. 文件汇总链路
|
||||||
|
|
||||||
对应模块:`review_agent/file_summary`
|
对应模块:`review_agent/file_summary`
|
||||||
|
|
||||||
主要流程为:
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
文件上传
|
文件上传
|
||||||
-> 附件固化
|
-> 附件固化
|
||||||
@@ -117,17 +177,17 @@ Demo 中使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.ya
|
|||||||
-> 文件扫描
|
-> 文件扫描
|
||||||
-> 页数统计
|
-> 页数统计
|
||||||
-> 产品名识别
|
-> 产品名识别
|
||||||
-> 报告输出
|
-> Markdown/Excel 报告输出
|
||||||
```
|
```
|
||||||
|
|
||||||
这个链路的核心作用是把原始资料包转换成结构化数据。系统会生成 `FileSummaryBatch` 和 `FileSummaryItem`,后续法规核查和自动填表都复用这套文件清单,不再重复扫描文件。
|
这个链路负责把原始资料包转换成结构化文件清单。系统会生成 `FileSummaryBatch` 和 `FileSummaryItem`,后续法规核查和自动填表都复用这套文件清单,不再重复扫描资料。
|
||||||
|
|
||||||
|
输出字段包括序号、目录层级、文件名、文件类型、页数、相对路径、统计状态、重试次数和异常说明。
|
||||||
|
|
||||||
### 2. 法规核查链路
|
### 2. 法规核查链路
|
||||||
|
|
||||||
对应模块:`review_agent/regulatory_review`
|
对应模块:`review_agent/regulatory_review`
|
||||||
|
|
||||||
主要流程为:
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
准备资料
|
准备资料
|
||||||
-> 适用条件确认
|
-> 适用条件确认
|
||||||
@@ -136,20 +196,20 @@ Demo 中使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.ya
|
|||||||
-> 文本抽取
|
-> 文本抽取
|
||||||
-> 章节核查
|
-> 章节核查
|
||||||
-> 一致性核查
|
-> 一致性核查
|
||||||
|
-> RAG 法规依据补充
|
||||||
-> 风险评估
|
-> 风险评估
|
||||||
-> 报告输出
|
-> 报告输出
|
||||||
|
-> 整改复核
|
||||||
```
|
```
|
||||||
|
|
||||||
这条链路的核心设计原则是:规则优先,RAG 补依据,LLM 做辅助。
|
这条链路使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml` 作为结构化规则文件。规则中配置了附件 4 的资料要求,包括监管信息、综述资料、非临床资料、临床评价资料、说明书和标签样稿、质量管理体系文件等。
|
||||||
|
|
||||||
也就是说,法规结论不直接交给大模型自由判断,而是优先由结构化规则文件决定;RAG 负责检索法规依据和原文片段;LLM 主要用于低置信度字段抽取、自然语言条件解析和结果复核。
|
系统会检查是否缺少关键资料,例如注册申请表、符合性声明、产品技术要求、注册检验报告、说明书、标签样稿、临床评价资料和质量管理体系文件。缺失项会转成 `RegulatoryIssue`,并按阻断项、高风险、中风险、低风险和提示项分级。
|
||||||
|
|
||||||
### 3. 自动填表链路
|
### 3. 自动填表链路
|
||||||
|
|
||||||
对应模块:`review_agent/application_form_fill`
|
对应模块:`review_agent/application_form_fill`
|
||||||
|
|
||||||
主要流程为:
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
准备资料
|
准备资料
|
||||||
-> 模板选择
|
-> 模板选择
|
||||||
@@ -161,173 +221,91 @@ Demo 中使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.ya
|
|||||||
-> 结果通知
|
-> 结果通知
|
||||||
```
|
```
|
||||||
|
|
||||||
这条链路会复用前面抽取到的产品信息,自动选择申报模板,并将字段填入 Word 模板。对于冲突字段,Demo 中采用“说明书优先”的策略,同时在结果中保留冲突摘要和来源追溯。
|
这条链路会复用前面抽取到的产品信息,自动选择申报模板,并将字段填入 Word 模板。对于冲突字段,Demo 中采用明确的归并策略,同时在结果中保留冲突摘要和来源追溯。
|
||||||
|
|
||||||
## 四、Demo 实际调用的关键工具和库
|
## 八、页面和数据工作台
|
||||||
|
|
||||||
本 Demo 在工具选型上以轻量、可本地运行、可解释、便于测试为原则。
|
前端目前包括四个主要页面:
|
||||||
|
|
||||||
### 1. 文件解析类工具
|
| 页面 | URL | 作用 |
|
||||||
|
|
||||||
| 工具/库 | Demo 中的用途 | 选用理由 |
|
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| `pypdf` | PDF 页数统计和文本抽取 | 轻量、安装简单,适合 Demo 阶段快速处理 PDF |
|
| 首页工作台 | `/` | 展示对话、附件、知识库、批次状态和最近处理记录 |
|
||||||
| `python-docx` | DOCX 文本读取、Word 模板填充 | 可读取段落和表格,也能写入 Word 模板 |
|
| 审核智能体 | `/chat/` | 对话、上传附件、启动工作流、查看节点进度 |
|
||||||
| `python-pptx` | PPTX 幻灯片数量统计和文本读取 | 适合统计幻灯片数量和抽取文本 |
|
| 知识库管理 | `/knowledge-base/` | 管理用户上传知识库、查看内置法规材料和索引状态 |
|
||||||
| `openpyxl` | XLSX 工作表统计、Excel 报告导出 | 同时支持读取和生成 Excel |
|
| 附件管理 | `/attachments/` | 管理不同对话下的上传附件、版本、启用状态和下载 |
|
||||||
| `xlrd` | 旧版 XLS 文件读取 | 补充对历史 Excel 格式的支持 |
|
|
||||||
| `olefile` | 判断老 Office 文件 OLE 结构 | 用于 doc、xls、ppt 等老格式的兜底识别 |
|
|
||||||
| `py7zr` | 7z 压缩包解压 | 支持常见资料包压缩格式 |
|
|
||||||
| Python `zipfile` | ZIP 压缩包解压 | 标准库能力,无额外依赖 |
|
|
||||||
|
|
||||||
Demo 中没有选择重型 OCR 或复杂版式引擎,是因为当前阶段重点是打通审核链路和规则闭环。对于扫描件、图片 PDF、复杂版式 PDF,后续可以再接入 OCR 和更强的版式解析能力。
|
首页工作台重点不是营销展示,而是运行态数据,包括:
|
||||||
|
|
||||||
### 2. 规则和正则
|
|
||||||
|
|
||||||
系统使用 YAML 维护法规规则,例如 `nmpa_ivd_registration_v1.yaml`。每条规则包含规则编码、附件 4 编码、标题、资料类型、风险等级、匹配关键词、整改建议和 RAG 检索查询词。
|
|
||||||
|
|
||||||
正则表达式用于抽取结构化字段,例如:
|
|
||||||
|
|
||||||
```text
|
```text
|
||||||
产品名称:xxx
|
对话总数
|
||||||
型号规格:xxx
|
附件总数
|
||||||
预期用途:xxx
|
知识库材料数
|
||||||
管理类别:xxx
|
执行中批次
|
||||||
分类编码:xxx
|
已处理批次
|
||||||
|
成功批次
|
||||||
|
等待确认批次
|
||||||
|
失败批次
|
||||||
|
最近处理记录
|
||||||
```
|
```
|
||||||
|
|
||||||
选用规则和正则的原因是:这类注册资料中有大量固定标题和固定字段,使用确定性规则可以提高可解释性,也便于定位问题来源。
|
知识库材料中同时统计用户管理文档和内置法规材料,避免把“知识库”误解成只包含用户上传文件。
|
||||||
|
|
||||||
### 3. RAG 和向量检索
|
## 九、过程留痕和可追溯设计
|
||||||
|
|
||||||
Demo 使用 ChromaDB 构建本地法规 RAG 索引。法规原文材料会被切分为文本块,并保存来源文件、chunk 编号等元数据。
|
审核类系统不能只输出一个结论,还必须说明结论从哪里来。因此 Demo 对关键过程都做了结构化留痕。
|
||||||
|
|
||||||
向量 embedding 支持两种模式:
|
|
||||||
|
|
||||||
| 模式 | 用途 |
|
|
||||||
| --- | --- |
|
|
||||||
| SiliconFlow embedding | 用于真实语义检索 |
|
|
||||||
| deterministic/local embedding | 用于测试和 dry run |
|
|
||||||
|
|
||||||
RAG 在系统中的定位不是直接判断合规,而是为风险问题补充法规依据。例如完整性规则已经判断“缺少注册检验报告”,RAG 再检索相关法规条款,输出来源文件和依据片段,增强报告的可解释性。
|
|
||||||
|
|
||||||
### 4. LLM 调用
|
|
||||||
|
|
||||||
LLM 在 Demo 中主要承担辅助角色,包括:
|
|
||||||
|
|
||||||
| 场景 | LLM 作用 |
|
|
||||||
| --- | --- |
|
|
||||||
| 自然语言适用条件解析 | 将用户输入转换为结构化字段 |
|
|
||||||
| 低置信度字段抽取 | 正则抽取不足时补充结构化 JSON |
|
|
||||||
| 工作流结果复核 | 对中间结果做总结和校验 |
|
|
||||||
| 整改建议润色 | 在规则模板基础上优化表达 |
|
|
||||||
|
|
||||||
风险等级、法规结论和完整性判断不直接交给 LLM 决定,而是由规则引擎和风险评估服务控制。
|
|
||||||
|
|
||||||
### 5. 工作流和状态管理
|
|
||||||
|
|
||||||
系统使用 Django ORM 保存批次、节点、事件和导出文件。
|
|
||||||
|
|
||||||
关键模型包括:
|
|
||||||
|
|
||||||
| 模型 | 作用 |
|
|
||||||
| --- | --- |
|
|
||||||
| `FileSummaryBatch` | 文件汇总批次 |
|
|
||||||
| `FileSummaryItem` | 文件明细 |
|
|
||||||
| `RegulatoryReviewBatch` | 法规核查批次 |
|
|
||||||
| `RegulatoryIssue` | 法规问题和风险项 |
|
|
||||||
| `RegulatoryArtifact` | 法规核查过程产物 |
|
|
||||||
| `ApplicationFormFillBatch` | 自动填表批次 |
|
|
||||||
| `WorkflowNodeRun` | 工作流节点状态 |
|
|
||||||
| `WorkflowEvent` | SSE 事件和进度记录 |
|
|
||||||
| `ExportedSummaryFile` | Markdown、Excel、JSON、Word 等导出文件 |
|
|
||||||
|
|
||||||
前端通过 SSE 事件实时展示工作流卡片状态,使用户能够看到每个节点是否正在执行、是否成功、是否等待确认或失败。
|
|
||||||
|
|
||||||
## 五、难点规则处理方式
|
|
||||||
|
|
||||||
### 1. 文件完整性检测
|
|
||||||
|
|
||||||
文件完整性检测的难点在于:注册资料不是固定文件名,企业可能用不同命名方式组织材料。
|
|
||||||
|
|
||||||
Demo 的处理方式是使用多层匹配:
|
|
||||||
|
|
||||||
```text
|
|
||||||
规则要求项
|
|
||||||
-> 文件名关键词匹配
|
|
||||||
-> 相对路径匹配
|
|
||||||
-> 目录层级匹配
|
|
||||||
-> 必要时结合首页文本和字段候选
|
|
||||||
```
|
|
||||||
|
|
||||||
例如规则中要求“注册检验报告”,系统不仅查找文件名中是否包含“注册检验报告”,也会查找路径和目录中是否包含“检验报告”“检测报告”等别名。
|
|
||||||
|
|
||||||
如果没有匹配到文件,系统会生成 `Finding`,再由风险评估服务转换为 `RegulatoryIssue`。这样完整性问题既能被结构化记录,也能进入最终风险报告。
|
|
||||||
|
|
||||||
### 2. 信息一致性核查
|
|
||||||
|
|
||||||
一致性核查的难点在于:同一个字段可能散落在说明书、注册检验报告、产品技术要求、申请表等多个文件中。
|
|
||||||
|
|
||||||
Demo 的处理方式是:
|
|
||||||
|
|
||||||
```text
|
|
||||||
文本抽取
|
|
||||||
-> 字段正则识别
|
|
||||||
-> 同字段归并
|
|
||||||
-> 不同取值比对
|
|
||||||
-> 生成一致性风险
|
|
||||||
```
|
|
||||||
|
|
||||||
例如系统会从多个文件中抽取“产品名称”“型号规格”“预期用途”等字段。如果同一字段出现多个不同值,系统会生成高风险问题,并在证据中记录每个取值对应的来源文件。
|
|
||||||
|
|
||||||
这类结果可以直接辅助人工审核人员定位冲突来源。
|
|
||||||
|
|
||||||
### 3. 法规条款匹配
|
|
||||||
|
|
||||||
法规条款匹配的难点在于:法规原文长、条款多,直接让大模型判断容易不稳定,纯规则又缺少解释能力。
|
|
||||||
|
|
||||||
Demo 采用“双层法规能力”:
|
|
||||||
|
|
||||||
| 层级 | 职责 |
|
|
||||||
| --- | --- |
|
|
||||||
| 结构化规则库 | 负责判断应有哪些文件、哪些章节、哪些字段,以及风险等级 |
|
|
||||||
| RAG 法规依据索引 | 负责检索法规原文片段,补充依据说明 |
|
|
||||||
|
|
||||||
这种设计的好处是:判断逻辑稳定,报告解释充分,后续规则也可以由法规人员维护。
|
|
||||||
|
|
||||||
### 4. 过程留痕和可追溯
|
|
||||||
|
|
||||||
审核类系统不能只输出一个结论,还必须说明结论从哪里来。
|
|
||||||
|
|
||||||
Demo 中对关键过程都做了留痕:
|
|
||||||
|
|
||||||
| 过程 | 留痕内容 |
|
| 过程 | 留痕内容 |
|
||||||
| --- | --- |
|
| --- | --- |
|
||||||
| 文件汇总 | 文件路径、页数、统计状态、异常说明 |
|
| 对话 | 用户消息、助手消息、会话标题、更新时间 |
|
||||||
| 文本抽取 | 文本 hash、首页文本、章节候选、字段候选 |
|
| 附件 | 原始文件名、版本号、启用状态、存储路径、文件大小 |
|
||||||
| 完整性核查 | 规则编码、匹配关键词、命中文件或缺失证据 |
|
| 文件汇总 | 批次号、文件明细、页数、统计状态、异常说明 |
|
||||||
| 一致性核查 | 字段值、来源文件、冲突取值 |
|
| 工作流节点 | 节点编码、节点名称、进度、状态、错误信息 |
|
||||||
| RAG 检索 | 法规来源、片段文本、检索分数 |
|
| 法规核查 | 规则编码、缺失项、风险等级、证据、整改建议 |
|
||||||
| 报告导出 | Markdown、Excel、JSON 结果包 |
|
| RAG 检索 | 来源文件、片段文本、相似度、chunk 元数据 |
|
||||||
| 自动填表 | 字段来源、冲突摘要、追溯清单 |
|
| 自动填表 | 字段来源、冲突摘要、模板选择、追溯清单 |
|
||||||
|
| 导出文件 | Markdown、Excel、JSON、Word 等结果文件 |
|
||||||
|
|
||||||
这保证了 Demo 输出的结果不是一次性回答,而是可以复核、下载、整改和继续追踪的过程资产。
|
这保证了 Demo 输出的结果不是一次性回答,而是可以复核、下载、整改和继续追踪的过程资产。
|
||||||
|
|
||||||
## 六、总结
|
## 十、Demo 可展示结果
|
||||||
|
|
||||||
|
本次 Demo 可以展示以下核心结果:
|
||||||
|
|
||||||
|
### 1. 文件目录汇总表
|
||||||
|
|
||||||
|
用户上传注册资料文件夹、散装文件或压缩包后,系统自动完成附件固化、解压、扫描和页数统计,最终生成 Markdown 汇总报告和 Excel 明细表。
|
||||||
|
|
||||||
|
### 2. 法规完整性报告
|
||||||
|
|
||||||
|
系统基于文件汇总结果和 NMPA 规则库做完整性核查,输出 Markdown 法规核查报告、Excel 问题清单和 JSON 结构化结果包。
|
||||||
|
|
||||||
|
### 3. 产品关键信息提取对照表
|
||||||
|
|
||||||
|
系统从说明书、产品技术要求、注册检验报告、申请表等文件中抽取产品名称、型号规格、预期用途、管理类别、分类编码、注册类型和临床评价路径,并保留来源文件和证据片段。
|
||||||
|
|
||||||
|
### 4. 风险预警列表
|
||||||
|
|
||||||
|
系统把完整性缺失、章节异常、字段冲突、文本抽取失败、页数不可确定、通知失败等问题统一沉淀为风险项,并按阻断项、高风险、中风险、低风险和提示项分级。
|
||||||
|
|
||||||
|
### 5. 申报文件自动填表结果
|
||||||
|
|
||||||
|
系统根据资料内容和适用条件选择模板,自动填充 Word 文件,并导出字段追溯清单,说明每个字段来自哪个文件、哪个证据片段。
|
||||||
|
|
||||||
|
## 十一、总结
|
||||||
|
|
||||||
整体来看,本 Demo 的架构搭建思路可以概括为:
|
整体来看,本 Demo 的架构搭建思路可以概括为:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
先结构化资料
|
先用文档锁定规格
|
||||||
再匹配法规
|
再用规则结构化审核逻辑
|
||||||
再抽取字段
|
再用 RAG 补充法规依据
|
||||||
再核查一致性
|
再用 Skill Router 调度工具和工作流
|
||||||
再输出风险和报告
|
再用 ORM 和导出文件沉淀过程资产
|
||||||
最后支持填表和整改闭环
|
最后通过工作台页面呈现状态和结果
|
||||||
```
|
```
|
||||||
|
|
||||||
它体现的是一个“资料输入、规则判断、证据追溯、风险输出、整改闭环”的智能体原型。
|
它体现的是一个“资料输入、规则判断、证据追溯、风险输出、整改闭环”的智能体原型。
|
||||||
|
|
||||||
当前 Demo 已经完成了文件汇总、法规完整性核查、信息抽取、风险预警、报告导出和自动填表主链路。后续如果继续增强,可以重点补充 OCR、扫描件识别、复杂 PDF 版式解析、规则后台维护、人工确认界面、飞书真实消息闭环,以及更完整的多智能体编排能力。
|
当前 Demo 已经完成了首页工作台、审核智能体对话、附件管理、知识库管理、文件汇总、法规核查、RAG 依据检索、风险预警、报告导出和自动填表主链路。后续如果继续增强,可以重点补充 OCR、扫描件识别、复杂 PDF 版式解析、规则后台维护、人工确认界面、飞书真实消息闭环,以及更完整的多智能体编排能力。
|
||||||
|
|
||||||
最终希望这个智能体能够从一个 Demo 原型,逐步演进为注册资料准备和审核过程中的智能协作平台。
|
最终希望这个智能体能够从一个 Demo 原型,逐步演进为注册资料准备和审核过程中的智能协作平台。
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.db import transaction
|
||||||
from django.db.models import Count, Q
|
from django.db.models import Count, Q
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -7,7 +8,15 @@ from pathlib import Path
|
|||||||
from django.http import FileResponse, Http404, JsonResponse
|
from django.http import FileResponse, Http404, JsonResponse
|
||||||
from django.views.decorators.http import require_http_methods
|
from django.views.decorators.http import require_http_methods
|
||||||
|
|
||||||
from review_agent.models import ApplicationFormFillBatch, Conversation, ExportedSummaryFile, FileAttachment, Message
|
from review_agent.models import (
|
||||||
|
ApplicationFormFillBatch,
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileAttachment,
|
||||||
|
Message,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
)
|
||||||
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
||||||
from review_agent.notifications.presenter import serialize_notification_records
|
from review_agent.notifications.presenter import serialize_notification_records
|
||||||
from .events import serialize_event
|
from .events import serialize_event
|
||||||
@@ -152,6 +161,9 @@ def conversation_list(request):
|
|||||||
@login_required
|
@login_required
|
||||||
def conversation_detail(request, conversation_id: int):
|
def conversation_detail(request, conversation_id: int):
|
||||||
conversation = _conversation_for_user(request.user, conversation_id)
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
with transaction.atomic():
|
||||||
|
ApplicationFormFillBatch.objects.filter(conversation=conversation).delete()
|
||||||
|
RegulatoryReviewBatch.objects.filter(conversation=conversation).delete()
|
||||||
conversation.delete()
|
conversation.delete()
|
||||||
return JsonResponse({"ok": True, "conversation_id": conversation_id})
|
return JsonResponse({"ok": True, "conversation_id": conversation_id})
|
||||||
|
|
||||||
@@ -293,14 +305,20 @@ def export_download(request, export_id: int):
|
|||||||
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
||||||
)
|
)
|
||||||
return JsonResponse({"error": "文件不存在。"}, status=404)
|
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||||
|
suffix = Path(exported.file_name).suffix.lower()
|
||||||
content_types = {
|
content_types = {
|
||||||
ExportedSummaryFile.ExportType.MARKDOWN: "text/markdown; charset=utf-8",
|
ExportedSummaryFile.ExportType.MARKDOWN: "text/markdown; charset=utf-8",
|
||||||
ExportedSummaryFile.ExportType.EXCEL: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
ExportedSummaryFile.ExportType.EXCEL: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
ExportedSummaryFile.ExportType.JSON: "application/json; charset=utf-8",
|
ExportedSummaryFile.ExportType.JSON: "application/json; charset=utf-8",
|
||||||
ExportedSummaryFile.ExportType.WORD: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
ExportedSummaryFile.ExportType.WORD: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
ExportedSummaryFile.ExportType.PDF: "application/pdf",
|
ExportedSummaryFile.ExportType.PDF: "application/pdf",
|
||||||
|
ExportedSummaryFile.ExportType.ZIP: "application/zip",
|
||||||
}
|
}
|
||||||
content_type = content_types.get(exported.export_type, "application/octet-stream")
|
content_type = content_types.get(exported.export_type, "application/octet-stream")
|
||||||
|
if exported.export_type == ExportedSummaryFile.ExportType.WORD and suffix == ".doc":
|
||||||
|
content_type = "application/msword"
|
||||||
|
elif exported.export_type == ExportedSummaryFile.ExportType.WORD and suffix == ".docx":
|
||||||
|
content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
logger.info(
|
logger.info(
|
||||||
"Export download started",
|
"Export download started",
|
||||||
extra={
|
extra={
|
||||||
@@ -331,6 +349,17 @@ def _export_for_user(user, export_id: int) -> ExportedSummaryFile | None:
|
|||||||
is_deleted=False,
|
is_deleted=False,
|
||||||
).exists()
|
).exists()
|
||||||
return exported if allowed else None
|
return exported if allowed else None
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
if not exported.workflow_batch_id:
|
||||||
|
return None
|
||||||
|
allowed = RegulatoryInfoPackageBatch.objects.filter(
|
||||||
|
pk=exported.workflow_batch_id,
|
||||||
|
conversation__user=user,
|
||||||
|
is_deleted=False,
|
||||||
|
).exists()
|
||||||
|
return exported if allowed else None
|
||||||
|
if exported.batch_id is None:
|
||||||
|
return None
|
||||||
if exported.batch.user_id != user.pk:
|
if exported.batch.user_id != user.pk:
|
||||||
return None
|
return None
|
||||||
return exported
|
return exported
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ from django.core.files.uploadedfile import UploadedFile
|
|||||||
|
|
||||||
from review_agent.models import KnowledgeBaseDocument
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
from review_agent.regulatory_review.services.rag_citation import RagIndexUnavailable, retrieve_citations
|
from review_agent.regulatory_review.services.rag_citation import RagIndexUnavailable, retrieve_citations
|
||||||
from review_agent.regulatory_review.services.rag_embedding import DeterministicEmbeddingProvider
|
from review_agent.regulatory_review.services.rag_embedding import get_embedding_provider
|
||||||
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path
|
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path, is_excluded_source_path
|
||||||
from review_agent.regulatory_review.services.rule_loader import DEFAULT_RULE_PATH, compute_file_sha256, load_rule_file
|
from review_agent.regulatory_review.services.rule_loader import DEFAULT_RULE_PATH, compute_file_sha256, load_rule_file
|
||||||
|
|
||||||
|
|
||||||
@@ -78,6 +78,8 @@ def list_source_documents(source_dir: Path) -> list[dict[str, Any]]:
|
|||||||
continue
|
continue
|
||||||
suffix = path.suffix.lower()
|
suffix = path.suffix.lower()
|
||||||
relative_path = str(path.relative_to(source_dir))
|
relative_path = str(path.relative_to(source_dir))
|
||||||
|
if is_excluded_source_path(relative_path):
|
||||||
|
continue
|
||||||
indexed_chunk_count = source_chunk_counts.get(relative_path, 0)
|
indexed_chunk_count = source_chunk_counts.get(relative_path, 0)
|
||||||
documents.append(
|
documents.append(
|
||||||
{
|
{
|
||||||
@@ -101,7 +103,7 @@ def search_knowledge_base(query: str, *, n_results: int = 3) -> dict[str, Any]:
|
|||||||
try:
|
try:
|
||||||
results = retrieve_citations(
|
results = retrieve_citations(
|
||||||
normalized,
|
normalized,
|
||||||
embedding_provider=DeterministicEmbeddingProvider(),
|
embedding_provider=get_embedding_provider(),
|
||||||
n_results=n_results,
|
n_results=n_results,
|
||||||
)
|
)
|
||||||
except RagIndexUnavailable as exc:
|
except RagIndexUnavailable as exc:
|
||||||
@@ -151,6 +153,7 @@ def create_document_from_upload(
|
|||||||
|
|
||||||
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
||||||
update_fields = []
|
update_fields = []
|
||||||
|
active_changed = False
|
||||||
if "display_name" in payload:
|
if "display_name" in payload:
|
||||||
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
||||||
update_fields.append("display_name")
|
update_fields.append("display_name")
|
||||||
@@ -158,12 +161,21 @@ def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) ->
|
|||||||
document.description = str(payload.get("description") or "").strip()
|
document.description = str(payload.get("description") or "").strip()
|
||||||
update_fields.append("description")
|
update_fields.append("description")
|
||||||
if "is_active" in payload:
|
if "is_active" in payload:
|
||||||
document.is_active = bool(payload.get("is_active"))
|
next_is_active = bool(payload.get("is_active"))
|
||||||
document.status = KnowledgeBaseDocument.Status.ACTIVE if document.is_active else KnowledgeBaseDocument.Status.DISABLED
|
active_changed = document.is_active != next_is_active
|
||||||
|
document.is_active = next_is_active
|
||||||
|
document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
update_fields.extend(["is_active", "status"])
|
update_fields.extend(["is_active", "status"])
|
||||||
|
if not next_is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
update_fields.extend(["indexed_chunk_count", "metadata"])
|
||||||
if update_fields:
|
if update_fields:
|
||||||
update_fields.append("updated_at")
|
update_fields.append("updated_at")
|
||||||
document.save(update_fields=update_fields)
|
document.save(update_fields=update_fields)
|
||||||
|
if active_changed and document.is_active:
|
||||||
|
index_managed_document(document)
|
||||||
return document
|
return document
|
||||||
|
|
||||||
|
|
||||||
@@ -196,6 +208,12 @@ def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
|
|||||||
|
|
||||||
|
|
||||||
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||||
|
if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
path = Path(document.storage_path)
|
path = Path(document.storage_path)
|
||||||
if not path.is_absolute():
|
if not path.is_absolute():
|
||||||
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
||||||
@@ -210,7 +228,7 @@ def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
|||||||
return 0
|
return 0
|
||||||
collection = _load_chroma_collection()
|
collection = _load_chroma_collection()
|
||||||
texts = [chunk.text for chunk in chunks]
|
texts = [chunk.text for chunk in chunks]
|
||||||
embeddings = DeterministicEmbeddingProvider()(texts)
|
embeddings = get_embedding_provider()(texts)
|
||||||
ids = [
|
ids = [
|
||||||
hashlib.sha256(f"managed:{document.pk}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
hashlib.sha256(f"managed:{document.pk}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
||||||
for chunk in chunks
|
for chunk in chunks
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class Command(BaseCommand):
|
|||||||
raise CommandError(f"法规材料目录不存在:{source_dir}")
|
raise CommandError(f"法规材料目录不存在:{source_dir}")
|
||||||
try:
|
try:
|
||||||
provider = get_embedding_provider(options["provider"])
|
provider = get_embedding_provider(options["provider"])
|
||||||
count = build_chroma_index(source_dir=source_dir, embedding_provider=provider)
|
count = build_chroma_index(source_dir=source_dir, embedding_provider=provider, reset=True)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise CommandError(str(exc)) from exc
|
raise CommandError(str(exc)) from exc
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
|
|||||||
@@ -0,0 +1,388 @@
|
|||||||
|
# Generated by Django 5.2.14 on 2026-06-10 11:12
|
||||||
|
|
||||||
|
import django.db.models.deletion
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
("review_agent", "0008_knowledgebasedocument"),
|
||||||
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryInfoPackageArtifact",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"artifact_type",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("template_copy", "模板副本"),
|
||||||
|
("instruction_extract", "说明书抽取结果"),
|
||||||
|
("field_extract_result", "字段抽取结果"),
|
||||||
|
("merged_fields", "合并字段"),
|
||||||
|
("generated_document", "生成文件"),
|
||||||
|
("traceability", "追溯清单"),
|
||||||
|
("zip_package", "ZIP包"),
|
||||||
|
("notification_record", "通知记录"),
|
||||||
|
],
|
||||||
|
max_length=60,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"file_format",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("json", "JSON"),
|
||||||
|
("excel", "Excel"),
|
||||||
|
("docx", "DOCX"),
|
||||||
|
("doc", "DOC"),
|
||||||
|
("zip", "ZIP"),
|
||||||
|
("markdown", "Markdown"),
|
||||||
|
],
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=160)),
|
||||||
|
("file_name", models.CharField(max_length=255)),
|
||||||
|
("storage_path", models.CharField(max_length=500)),
|
||||||
|
("file_size", models.BigIntegerField(default=0)),
|
||||||
|
(
|
||||||
|
"content_hash",
|
||||||
|
models.CharField(blank=True, default="", max_length=128),
|
||||||
|
),
|
||||||
|
("metadata", models.JSONField(blank=True, default=dict)),
|
||||||
|
(
|
||||||
|
"created_by_node",
|
||||||
|
models.CharField(blank=True, default="", max_length=60),
|
||||||
|
),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("is_deleted", models.BooleanField(default=False)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_info_package_artifact",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryInfoPackageBatch",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"source_summary_item_id",
|
||||||
|
models.PositiveBigIntegerField(blank=True, null=True),
|
||||||
|
),
|
||||||
|
("batch_no", models.CharField(max_length=64, unique=True)),
|
||||||
|
(
|
||||||
|
"status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "待执行"),
|
||||||
|
("running", "执行中"),
|
||||||
|
("waiting_user", "等待用户"),
|
||||||
|
("success", "成功"),
|
||||||
|
("partial_success", "部分成功"),
|
||||||
|
("failed", "失败"),
|
||||||
|
("cancelled", "已取消"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=30,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"source_file_name",
|
||||||
|
models.CharField(blank=True, default="", max_length=255),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"source_storage_path",
|
||||||
|
models.CharField(blank=True, default="", max_length=500),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"product_name",
|
||||||
|
models.CharField(blank=True, default="", max_length=200),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"output_zip_name",
|
||||||
|
models.CharField(
|
||||||
|
blank=True,
|
||||||
|
default="第1章 监管信息(预生成版).zip",
|
||||||
|
max_length=255,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("generated_files", models.JSONField(blank=True, default=list)),
|
||||||
|
("missing_fields", models.JSONField(blank=True, default=list)),
|
||||||
|
("llm_only_fields", models.JSONField(blank=True, default=list)),
|
||||||
|
("conflict_fields", models.JSONField(blank=True, default=list)),
|
||||||
|
("risk_notes", models.JSONField(blank=True, default=list)),
|
||||||
|
(
|
||||||
|
"template_config_version",
|
||||||
|
models.CharField(blank=True, default="", max_length=80),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"template_config_hash",
|
||||||
|
models.CharField(blank=True, default="", max_length=128),
|
||||||
|
),
|
||||||
|
("adapter_summary", models.JSONField(blank=True, default=dict)),
|
||||||
|
("work_dir", models.CharField(blank=True, default="", max_length=500)),
|
||||||
|
("error_message", models.TextField(blank=True, default="")),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("started_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("finished_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("archived_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("is_deleted", models.BooleanField(default=False)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_info_package_batch",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name="RegulatoryInfoPackageNotificationRecord",
|
||||||
|
fields=[
|
||||||
|
(
|
||||||
|
"id",
|
||||||
|
models.BigAutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"channel",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("feishu_cli", "飞书 CLI"),
|
||||||
|
("feishu_api", "飞书 API"),
|
||||||
|
("mock", "模拟"),
|
||||||
|
],
|
||||||
|
default="mock",
|
||||||
|
max_length=30,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("export_ids", models.JSONField(blank=True, default=list)),
|
||||||
|
("message_summary", models.TextField(blank=True, default="")),
|
||||||
|
(
|
||||||
|
"send_status",
|
||||||
|
models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pending", "待发送"),
|
||||||
|
("success", "成功"),
|
||||||
|
("failed", "失败"),
|
||||||
|
],
|
||||||
|
default="pending",
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("retry_count", models.PositiveIntegerField(default=0)),
|
||||||
|
(
|
||||||
|
"external_message_id",
|
||||||
|
models.CharField(blank=True, default="", max_length=120),
|
||||||
|
),
|
||||||
|
("error_message", models.TextField(blank=True, default="")),
|
||||||
|
("sent_at", models.DateTimeField(blank=True, null=True)),
|
||||||
|
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("updated_at", models.DateTimeField(auto_now=True)),
|
||||||
|
("is_deleted", models.BooleanField(default=False)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
"db_table": "ra_regulatory_info_package_notification_record",
|
||||||
|
"ordering": ["-created_at", "-id"],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="exports",
|
||||||
|
to="review_agent.filesummarybatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name="exportedsummaryfile",
|
||||||
|
name="export_type",
|
||||||
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("markdown", "Markdown"),
|
||||||
|
("excel", "Excel"),
|
||||||
|
("json", "JSON"),
|
||||||
|
("word", "Word"),
|
||||||
|
("pdf", "PDF"),
|
||||||
|
("zip", "ZIP"),
|
||||||
|
],
|
||||||
|
max_length=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddConstraint(
|
||||||
|
model_name="workflownoderun",
|
||||||
|
constraint=models.UniqueConstraint(
|
||||||
|
fields=("workflow_type", "workflow_batch_id", "node_code"),
|
||||||
|
name="uq_ra_node_workflow_batch_code",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
name="conversation",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
to="review_agent.conversation",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
name="source_attachment",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
to="review_agent.fileattachment",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
name="source_summary_batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
to="review_agent.filesummarybatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
name="trigger_message",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="triggered_regulatory_info_package_batches",
|
||||||
|
to="review_agent.message",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
name="user",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="review_regulatory_info_package_batches",
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackageartifact",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="artifacts",
|
||||||
|
to="review_agent.regulatoryinfopackagebatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagenotificationrecord",
|
||||||
|
name="batch",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="notifications",
|
||||||
|
to="review_agent.regulatoryinfopackagebatch",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name="regulatoryinfopackagenotificationrecord",
|
||||||
|
name="recipient",
|
||||||
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="regulatory_info_package_notifications",
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["conversation", "status"], name="idx_ra_rip_batch_conv_status"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["user", "created_at"], name="idx_ra_rip_batch_user_created"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["source_attachment"], name="idx_ra_rip_batch_attachment"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["source_summary_batch"], name="idx_ra_rip_batch_summary"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagebatch",
|
||||||
|
index=models.Index(fields=["created_at"], name="idx_ra_rip_batch_created"),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackageartifact",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "artifact_type"], name="idx_ra_rip_artifact_batch_type"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackageartifact",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["file_format"], name="idx_ra_rip_artifact_format"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackageartifact",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["created_at"], name="idx_ra_rip_artifact_created"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagenotificationrecord",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["batch", "created_at"], name="idx_ra_rip_notify_batch"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagenotificationrecord",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["recipient", "send_status"], name="idx_ra_rip_notify_recipient"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
migrations.AddIndex(
|
||||||
|
model_name="regulatoryinfopackagenotificationrecord",
|
||||||
|
index=models.Index(
|
||||||
|
fields=["send_status", "retry_count"], name="idx_ra_rip_notify_status"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
@@ -280,7 +280,11 @@ class WorkflowNodeRun(models.Model):
|
|||||||
class Meta:
|
class Meta:
|
||||||
db_table = "ra_workflow_node_run"
|
db_table = "ra_workflow_node_run"
|
||||||
constraints = [
|
constraints = [
|
||||||
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code")
|
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code"),
|
||||||
|
models.UniqueConstraint(
|
||||||
|
fields=["workflow_type", "workflow_batch_id", "node_code"],
|
||||||
|
name="uq_ra_node_workflow_batch_code",
|
||||||
|
),
|
||||||
]
|
]
|
||||||
indexes = [
|
indexes = [
|
||||||
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
|
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
|
||||||
@@ -336,6 +340,7 @@ class ExportedSummaryFile(models.Model):
|
|||||||
JSON = "json", "JSON"
|
JSON = "json", "JSON"
|
||||||
WORD = "word", "Word"
|
WORD = "word", "Word"
|
||||||
PDF = "pdf", "PDF"
|
PDF = "pdf", "PDF"
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
|
||||||
class Status(models.TextChoices):
|
class Status(models.TextChoices):
|
||||||
SUCCESS = "success", "成功"
|
SUCCESS = "success", "成功"
|
||||||
@@ -345,6 +350,8 @@ class ExportedSummaryFile(models.Model):
|
|||||||
FileSummaryBatch,
|
FileSummaryBatch,
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
related_name="exports",
|
related_name="exports",
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
)
|
)
|
||||||
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
|
workflow_type = models.CharField(max_length=40, blank=True, default="file_summary")
|
||||||
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
|
workflow_batch_id = models.PositiveBigIntegerField(null=True, blank=True)
|
||||||
@@ -524,6 +531,87 @@ class ApplicationFormFillBatch(models.Model):
|
|||||||
return self.batch_no
|
return self.batch_no
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryInfoPackageBatch(models.Model):
|
||||||
|
"""Tracks one Chapter 1 regulatory information package workflow run."""
|
||||||
|
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
|
||||||
|
conversation = models.ForeignKey(
|
||||||
|
Conversation,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
)
|
||||||
|
user = models.ForeignKey(
|
||||||
|
settings.AUTH_USER_MODEL,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="review_regulatory_info_package_batches",
|
||||||
|
)
|
||||||
|
trigger_message = models.ForeignKey(
|
||||||
|
Message,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="triggered_regulatory_info_package_batches",
|
||||||
|
)
|
||||||
|
source_attachment = models.ForeignKey(
|
||||||
|
FileAttachment,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
)
|
||||||
|
source_summary_batch = models.ForeignKey(
|
||||||
|
FileSummaryBatch,
|
||||||
|
on_delete=models.SET_NULL,
|
||||||
|
null=True,
|
||||||
|
blank=True,
|
||||||
|
related_name="regulatory_info_package_batches",
|
||||||
|
)
|
||||||
|
source_summary_item_id = models.PositiveBigIntegerField(null=True, blank=True)
|
||||||
|
batch_no = models.CharField(max_length=64, unique=True)
|
||||||
|
status = models.CharField(max_length=30, choices=Status.choices, default=Status.PENDING)
|
||||||
|
source_file_name = models.CharField(max_length=255, blank=True, default="")
|
||||||
|
source_storage_path = models.CharField(max_length=500, blank=True, default="")
|
||||||
|
product_name = models.CharField(max_length=200, blank=True, default="")
|
||||||
|
output_zip_name = models.CharField(max_length=255, blank=True, default="第1章 监管信息(预生成版).zip")
|
||||||
|
generated_files = models.JSONField(default=list, blank=True)
|
||||||
|
missing_fields = models.JSONField(default=list, blank=True)
|
||||||
|
llm_only_fields = models.JSONField(default=list, blank=True)
|
||||||
|
conflict_fields = models.JSONField(default=list, blank=True)
|
||||||
|
risk_notes = models.JSONField(default=list, blank=True)
|
||||||
|
template_config_version = models.CharField(max_length=80, blank=True, default="")
|
||||||
|
template_config_hash = models.CharField(max_length=128, blank=True, default="")
|
||||||
|
adapter_summary = models.JSONField(default=dict, blank=True)
|
||||||
|
work_dir = models.CharField(max_length=500, blank=True, default="")
|
||||||
|
error_message = models.TextField(blank=True, default="")
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
started_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
finished_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
archived_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
is_deleted = models.BooleanField(default=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_info_package_batch"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["conversation", "status"], name="idx_ra_rip_batch_conv_status"),
|
||||||
|
models.Index(fields=["user", "created_at"], name="idx_ra_rip_batch_user_created"),
|
||||||
|
models.Index(fields=["source_attachment"], name="idx_ra_rip_batch_attachment"),
|
||||||
|
models.Index(fields=["source_summary_batch"], name="idx_ra_rip_batch_summary"),
|
||||||
|
models.Index(fields=["created_at"], name="idx_ra_rip_batch_created"),
|
||||||
|
]
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.batch_no
|
||||||
|
|
||||||
|
|
||||||
class RegulatoryReviewBatch(models.Model):
|
class RegulatoryReviewBatch(models.Model):
|
||||||
"""Tracks one NMPA regulatory review workflow run."""
|
"""Tracks one NMPA regulatory review workflow run."""
|
||||||
|
|
||||||
@@ -745,6 +833,54 @@ class ApplicationFormFillArtifact(models.Model):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryInfoPackageArtifact(models.Model):
|
||||||
|
"""Stores regulatory information package intermediate and generated files."""
|
||||||
|
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
TEMPLATE_COPY = "template_copy", "模板副本"
|
||||||
|
INSTRUCTION_EXTRACT = "instruction_extract", "说明书抽取结果"
|
||||||
|
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
|
||||||
|
MERGED_FIELDS = "merged_fields", "合并字段"
|
||||||
|
GENERATED_DOCUMENT = "generated_document", "生成文件"
|
||||||
|
TRACEABILITY = "traceability", "追溯清单"
|
||||||
|
ZIP_PACKAGE = "zip_package", "ZIP包"
|
||||||
|
NOTIFICATION_RECORD = "notification_record", "通知记录"
|
||||||
|
|
||||||
|
class FileFormat(models.TextChoices):
|
||||||
|
JSON = "json", "JSON"
|
||||||
|
EXCEL = "excel", "Excel"
|
||||||
|
DOCX = "docx", "DOCX"
|
||||||
|
DOC = "doc", "DOC"
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
MARKDOWN = "markdown", "Markdown"
|
||||||
|
|
||||||
|
batch = models.ForeignKey(
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="artifacts",
|
||||||
|
)
|
||||||
|
artifact_type = models.CharField(max_length=60, choices=ArtifactType.choices)
|
||||||
|
file_format = models.CharField(max_length=20, choices=FileFormat.choices)
|
||||||
|
name = models.CharField(max_length=160)
|
||||||
|
file_name = models.CharField(max_length=255)
|
||||||
|
storage_path = models.CharField(max_length=500)
|
||||||
|
file_size = models.BigIntegerField(default=0)
|
||||||
|
content_hash = models.CharField(max_length=128, blank=True, default="")
|
||||||
|
metadata = models.JSONField(default=dict, blank=True)
|
||||||
|
created_by_node = models.CharField(max_length=60, blank=True, default="")
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
is_deleted = models.BooleanField(default=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_info_package_artifact"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["batch", "artifact_type"], name="idx_ra_rip_artifact_batch_type"),
|
||||||
|
models.Index(fields=["file_format"], name="idx_ra_rip_artifact_format"),
|
||||||
|
models.Index(fields=["created_at"], name="idx_ra_rip_artifact_created"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class ApplicationFormFillNotificationRecord(models.Model):
|
class ApplicationFormFillNotificationRecord(models.Model):
|
||||||
"""Stores mock/Feishu notification records for application-form auto-fill."""
|
"""Stores mock/Feishu notification records for application-form auto-fill."""
|
||||||
|
|
||||||
@@ -795,6 +931,55 @@ class ApplicationFormFillNotificationRecord(models.Model):
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryInfoPackageNotificationRecord(models.Model):
|
||||||
|
"""Stores mock/Feishu notification records for regulatory info packages."""
|
||||||
|
|
||||||
|
class Channel(models.TextChoices):
|
||||||
|
FEISHU_CLI = "feishu_cli", "飞书 CLI"
|
||||||
|
FEISHU_API = "feishu_api", "飞书 API"
|
||||||
|
MOCK = "mock", "模拟"
|
||||||
|
|
||||||
|
class SendStatus(models.TextChoices):
|
||||||
|
PENDING = "pending", "待发送"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
|
||||||
|
batch = models.ForeignKey(
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="notifications",
|
||||||
|
)
|
||||||
|
recipient = models.ForeignKey(
|
||||||
|
settings.AUTH_USER_MODEL,
|
||||||
|
on_delete=models.CASCADE,
|
||||||
|
related_name="regulatory_info_package_notifications",
|
||||||
|
)
|
||||||
|
channel = models.CharField(max_length=30, choices=Channel.choices, default=Channel.MOCK)
|
||||||
|
export_ids = models.JSONField(default=list, blank=True)
|
||||||
|
message_summary = models.TextField(blank=True, default="")
|
||||||
|
send_status = models.CharField(
|
||||||
|
max_length=20,
|
||||||
|
choices=SendStatus.choices,
|
||||||
|
default=SendStatus.PENDING,
|
||||||
|
)
|
||||||
|
retry_count = models.PositiveIntegerField(default=0)
|
||||||
|
external_message_id = models.CharField(max_length=120, blank=True, default="")
|
||||||
|
error_message = models.TextField(blank=True, default="")
|
||||||
|
sent_at = models.DateTimeField(null=True, blank=True)
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
|
is_deleted = models.BooleanField(default=False)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
db_table = "ra_regulatory_info_package_notification_record"
|
||||||
|
ordering = ["-created_at", "-id"]
|
||||||
|
indexes = [
|
||||||
|
models.Index(fields=["batch", "created_at"], name="idx_ra_rip_notify_batch"),
|
||||||
|
models.Index(fields=["recipient", "send_status"], name="idx_ra_rip_notify_recipient"),
|
||||||
|
models.Index(fields=["send_status", "retry_count"], name="idx_ra_rip_notify_status"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class FeishuUserMapping(models.Model):
|
class FeishuUserMapping(models.Model):
|
||||||
"""Maps a system user to Feishu identifiers maintained by Admin."""
|
"""Maps a system user to Feishu identifiers maintained by Admin."""
|
||||||
|
|
||||||
|
|||||||
2
review_agent/regulatory_info_package/__init__.py
Normal file
2
review_agent/regulatory_info_package/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
"""Chapter 1 regulatory information package workflow."""
|
||||||
|
|
||||||
30
review_agent/regulatory_info_package/constants.py
Normal file
30
review_agent/regulatory_info_package/constants.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
WORKFLOW_TYPE = "regulatory_info_package"
|
||||||
|
DEFAULT_ZIP_NAME = "第1章 监管信息(预生成版).zip"
|
||||||
|
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS = [
|
||||||
|
"根据说明书生成第1章监管信息",
|
||||||
|
"生成监管信息材料包",
|
||||||
|
"从说明书生成第1章材料",
|
||||||
|
"第1章监管信息",
|
||||||
|
"监管信息材料包",
|
||||||
|
]
|
||||||
|
|
||||||
|
REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "regulatory_info_package"),
|
||||||
|
("template_copy", "复制模板", "regulatory_info_package"),
|
||||||
|
("text_extract", "抽取说明书", "regulatory_info_package"),
|
||||||
|
("field_extract", "抽取字段", "regulatory_info_package"),
|
||||||
|
("field_merge", "合并字段", "regulatory_info_package"),
|
||||||
|
("generate_docs", "生成材料", "regulatory_info_package"),
|
||||||
|
("highlight_review_items", "标记待确认", "regulatory_info_package"),
|
||||||
|
("trace_export", "追溯清单", "regulatory_info_package"),
|
||||||
|
("zip_export", "打包下载", "regulatory_info_package"),
|
||||||
|
("notify", "通知", "regulatory_info_package"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
|
||||||
|
GENERATED_FILE_SUCCESS = "success"
|
||||||
|
GENERATED_FILE_FALLBACK_SUCCESS = "fallback_success"
|
||||||
|
GENERATED_FILE_FAILED = "failed"
|
||||||
|
GENERATED_FILE_SKIPPED = "skipped"
|
||||||
|
|
||||||
15
review_agent/regulatory_info_package/events.py
Normal file
15
review_agent/regulatory_info_package/events.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.constants import WORKFLOW_TYPE
|
||||||
|
from review_agent.models import RegulatoryInfoPackageBatch, WorkflowEvent
|
||||||
|
|
||||||
|
|
||||||
|
def record_event(batch: RegulatoryInfoPackageBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
|
||||||
|
return WorkflowEvent.objects.create(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
conversation=batch.conversation,
|
||||||
|
event_type=event_type,
|
||||||
|
payload=payload or {},
|
||||||
|
)
|
||||||
|
|
||||||
58
review_agent/regulatory_info_package/schemas.py
Normal file
58
review_agent/regulatory_info_package/schemas.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
output_name: str
|
||||||
|
source_file: str
|
||||||
|
file_format: str
|
||||||
|
strategy: str
|
||||||
|
include_in_zip: bool
|
||||||
|
prefer_legacy_doc_native: bool = False
|
||||||
|
allow_docx_fallback: bool = True
|
||||||
|
fields: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InstructionExtractResult:
|
||||||
|
source_file_name: str
|
||||||
|
paragraphs: list[str]
|
||||||
|
sections: dict[str, str]
|
||||||
|
tables: list[list[list[str]]]
|
||||||
|
component_tables: list[dict[str, Any]]
|
||||||
|
front_text: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
highlight_reason: str = "none"
|
||||||
|
needs_review: bool = False
|
||||||
|
rule_value: str = ""
|
||||||
|
llm_value: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GeneratedFileResult:
|
||||||
|
template_code: str
|
||||||
|
file_name: str
|
||||||
|
requested_format: str
|
||||||
|
actual_format: str
|
||||||
|
status: str
|
||||||
|
path: str = ""
|
||||||
|
artifact_id: int | None = None
|
||||||
|
export_id: int | None = None
|
||||||
|
highlight_count: int = 0
|
||||||
|
missing_count: int = 0
|
||||||
|
llm_only_count: int = 0
|
||||||
|
error_message: str = ""
|
||||||
|
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
"""Services for the regulatory information package workflow."""
|
||||||
|
|
||||||
322
review_agent/regulatory_info_package/services/docx_document.py
Normal file
322
review_agent/regulatory_info_package/services/docx_document.py
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docx import Document
|
||||||
|
from docx.enum.text import WD_COLOR_INDEX
|
||||||
|
from docx.shared import RGBColor
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import MergedField
|
||||||
|
|
||||||
|
|
||||||
|
PLACEHOLDER_RE = re.compile(r"\{\{([a-zA-Z0-9_]+)\}\}")
|
||||||
|
|
||||||
|
|
||||||
|
def write_docx_from_template(
|
||||||
|
source_path: str | Path,
|
||||||
|
output_path: str | Path,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
*,
|
||||||
|
template_code: str = "",
|
||||||
|
directory_page_numbers: dict[str, str] | None = None,
|
||||||
|
) -> tuple[int, int, int]:
|
||||||
|
source = Path(source_path)
|
||||||
|
output = Path(output_path)
|
||||||
|
output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if source.exists():
|
||||||
|
document = Document(source)
|
||||||
|
else:
|
||||||
|
document = Document()
|
||||||
|
replacements = {f"{{{{{key}}}}}": field for key, field in merged_fields.items()}
|
||||||
|
highlight_count = 0
|
||||||
|
missing_count = 0
|
||||||
|
llm_only_count = 0
|
||||||
|
highlight_count += _apply_known_template_replacements(document, merged_fields, template_code=template_code)
|
||||||
|
if template_code == "ch1_5_product_list":
|
||||||
|
_rebuild_product_list_table(document, merged_fields)
|
||||||
|
if template_code == "ch1_2_directory":
|
||||||
|
_apply_directory_page_numbers(document, directory_page_numbers or {})
|
||||||
|
paragraph_counts = _replace_placeholders(document, replacements, merged_fields)
|
||||||
|
highlight_count += paragraph_counts[0]
|
||||||
|
missing_count += paragraph_counts[1]
|
||||||
|
llm_only_count += paragraph_counts[2]
|
||||||
|
document.save(output)
|
||||||
|
return highlight_count, missing_count, llm_only_count
|
||||||
|
|
||||||
|
|
||||||
|
def _replace_paragraph_text(paragraph, text: str, field: MergedField) -> None:
|
||||||
|
for run in paragraph.runs:
|
||||||
|
run.text = ""
|
||||||
|
run = paragraph.add_run(text)
|
||||||
|
if field.highlight_reason != "none":
|
||||||
|
run.font.highlight_color = WD_COLOR_INDEX.YELLOW
|
||||||
|
if field.highlight_reason == "conflict":
|
||||||
|
run.font.color.rgb = RGBColor(255, 0, 0)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_directory_page_numbers(document, page_numbers: dict[str, str]) -> None:
|
||||||
|
for table in document.tables:
|
||||||
|
if not table.rows:
|
||||||
|
continue
|
||||||
|
header = [cell.text.strip() for cell in table.rows[0].cells]
|
||||||
|
if len(header) < 5 or header[0] != "RPS目录" or header[4] != "页码":
|
||||||
|
continue
|
||||||
|
for row in table.rows[1:]:
|
||||||
|
code = row.cells[0].text.strip()
|
||||||
|
if code in page_numbers:
|
||||||
|
row.cells[4].text = page_numbers[code]
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def _replace_placeholders(
|
||||||
|
document,
|
||||||
|
replacements: dict[str, MergedField],
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
) -> tuple[int, int, int]:
|
||||||
|
highlight_count = 0
|
||||||
|
missing_count = 0
|
||||||
|
llm_only_count = 0
|
||||||
|
for paragraph in _iter_paragraphs(document):
|
||||||
|
text = paragraph.text
|
||||||
|
if "{{" not in text or "}}" not in text:
|
||||||
|
continue
|
||||||
|
used_fields: list[MergedField] = []
|
||||||
|
|
||||||
|
def replace(match: re.Match[str]) -> str:
|
||||||
|
key = match.group(1)
|
||||||
|
placeholder = match.group(0)
|
||||||
|
field = replacements.get(placeholder) or _default_placeholder_field(key, merged_fields)
|
||||||
|
used_fields.append(field)
|
||||||
|
return field.value
|
||||||
|
|
||||||
|
new_text = PLACEHOLDER_RE.sub(replace, text)
|
||||||
|
if new_text == text:
|
||||||
|
continue
|
||||||
|
field_for_style = next((field for field in used_fields if field.highlight_reason != "none"), None) or used_fields[0]
|
||||||
|
_replace_paragraph_text(paragraph, new_text, field_for_style)
|
||||||
|
for field in used_fields:
|
||||||
|
if field.highlight_reason != "none":
|
||||||
|
highlight_count += 1
|
||||||
|
if field.highlight_reason == "missing":
|
||||||
|
missing_count += 1
|
||||||
|
if field.highlight_reason == "llm_only":
|
||||||
|
llm_only_count += 1
|
||||||
|
return highlight_count, missing_count, llm_only_count
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_paragraphs(document):
|
||||||
|
yield from document.paragraphs
|
||||||
|
for table in document.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
yield from cell.paragraphs
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_known_template_replacements(document, merged_fields: dict[str, MergedField], *, template_code: str = "") -> int:
|
||||||
|
product = _field_value(merged_fields, "product_name")
|
||||||
|
applicant = _field_value(merged_fields, "applicant_name")
|
||||||
|
today = timezone.localdate().strftime("%Y年%m月%d日")
|
||||||
|
replacements = {
|
||||||
|
"xxxx年xx月xx日": today,
|
||||||
|
"XXXX年XX月XX日": today,
|
||||||
|
"xxxx 年 xx 月 xx 日": today,
|
||||||
|
"XXXX 年 XX 月 XX 日": today,
|
||||||
|
"2023年09月20日": today,
|
||||||
|
"2023 年 10 月": today[:8],
|
||||||
|
}
|
||||||
|
if not template_code.startswith("ch1_11"):
|
||||||
|
replacements.update({
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)": product,
|
||||||
|
"呼吸道合胞病毒、肺炎支原体核酸检测试剂盒": product,
|
||||||
|
"呼吸道合胞病毒 、肺炎支产品名称: 原体核酸检测试剂盒(荧": f"产品名称:{product}",
|
||||||
|
"光PCR法)": "",
|
||||||
|
"卡尤迪生物科技宜兴有限公司": applicant,
|
||||||
|
})
|
||||||
|
changed = 0
|
||||||
|
for paragraph in document.paragraphs:
|
||||||
|
changed += _replace_text_in_paragraph(paragraph, replacements, merged_fields)
|
||||||
|
for table in document.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
for paragraph in cell.paragraphs:
|
||||||
|
changed += _replace_text_in_paragraph(paragraph, replacements, merged_fields)
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def _default_placeholder_field(key: str, merged_fields: dict[str, MergedField]) -> MergedField:
|
||||||
|
if key == "declaration_date":
|
||||||
|
return _plain_field(key, "日期", timezone.localdate().strftime("%Y年%m月%d日"))
|
||||||
|
label = key
|
||||||
|
for field in merged_fields.values():
|
||||||
|
if field.key == key:
|
||||||
|
label = field.label
|
||||||
|
break
|
||||||
|
return MergedField(
|
||||||
|
key=key,
|
||||||
|
label=label,
|
||||||
|
value="/",
|
||||||
|
source="missing",
|
||||||
|
evidence="模板字段未从说明书中抽取到",
|
||||||
|
confidence=0.0,
|
||||||
|
highlight_reason="missing",
|
||||||
|
needs_review=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _replace_text_in_paragraph(paragraph, replacements: dict[str, str], merged_fields: dict[str, MergedField]) -> int:
|
||||||
|
text = paragraph.text
|
||||||
|
new_text = text
|
||||||
|
for old, new in replacements.items():
|
||||||
|
if old in new_text:
|
||||||
|
new_text = new_text.replace(old, new)
|
||||||
|
if new_text == text:
|
||||||
|
return 0
|
||||||
|
field = merged_fields.get("product_name") or MergedField(
|
||||||
|
key="product_name",
|
||||||
|
label="产品名称",
|
||||||
|
value=new_text,
|
||||||
|
source="rule",
|
||||||
|
evidence="",
|
||||||
|
confidence=0.0,
|
||||||
|
)
|
||||||
|
_replace_paragraph_text(paragraph, new_text, field)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def _rebuild_product_list_table(document, merged_fields: dict[str, MergedField]) -> None:
|
||||||
|
product = _field_value(merged_fields, "product_name")
|
||||||
|
package_specification = _field_value(merged_fields, "package_specification")
|
||||||
|
component_table = _component_table_payload(merged_fields)
|
||||||
|
component_notes = _field_value(merged_fields, "component_notes")
|
||||||
|
for paragraph in document.paragraphs:
|
||||||
|
if "的包装规格、货号、组分及主要组成成分见下表" in paragraph.text:
|
||||||
|
_replace_paragraph_text(
|
||||||
|
paragraph,
|
||||||
|
f"{product}的包装规格、货号、组分及主要组成成分见下表:",
|
||||||
|
merged_fields.get("product_name") or _plain_field("product_name", "产品名称", product),
|
||||||
|
)
|
||||||
|
if "规格A和规格B的区别" in paragraph.text and component_notes != "/":
|
||||||
|
_replace_paragraph_text(
|
||||||
|
paragraph,
|
||||||
|
component_notes,
|
||||||
|
merged_fields.get("component_notes") or _plain_field("component_notes", "主要组成成分备注", component_notes),
|
||||||
|
)
|
||||||
|
target = None
|
||||||
|
for table in document.tables:
|
||||||
|
header = [cell.text.strip() for cell in table.rows[0].cells] if table.rows else []
|
||||||
|
if header[:6] == ["包装规格", "货号", "组成", "组分", "主要组成成分", "规格/数量"]:
|
||||||
|
target = table
|
||||||
|
break
|
||||||
|
specs = _component_specs(component_table) or [
|
||||||
|
(spec, None) for spec in [item.strip() for item in package_specification.replace(";", ";").split(";") if item.strip()]
|
||||||
|
]
|
||||||
|
if target is not None:
|
||||||
|
_clear_table_body(target)
|
||||||
|
if component_table:
|
||||||
|
_fill_product_component_table(target, component_table, specs)
|
||||||
|
else:
|
||||||
|
if not specs:
|
||||||
|
specs = [("/", None)]
|
||||||
|
for spec, _index in specs[:8]:
|
||||||
|
cells = target.add_row().cells
|
||||||
|
cells[0].text = spec
|
||||||
|
cells[1].text = "/"
|
||||||
|
cells[2].text = _field_value(merged_fields, "composition")
|
||||||
|
cells[3].text = _field_value(merged_fields, "component_name")
|
||||||
|
cells[4].text = _field_value(merged_fields, "main_component")
|
||||||
|
cells[5].text = _field_value(merged_fields, "quantity")
|
||||||
|
if component_table:
|
||||||
|
_rebuild_component_comparison_table(document, component_table, specs)
|
||||||
|
|
||||||
|
|
||||||
|
def _field_value(merged_fields: dict[str, MergedField], key: str) -> str:
|
||||||
|
field = merged_fields.get(key)
|
||||||
|
if not field or not field.value:
|
||||||
|
return "/"
|
||||||
|
return field.value
|
||||||
|
|
||||||
|
|
||||||
|
def _plain_field(key: str, label: str, value: str) -> MergedField:
|
||||||
|
return MergedField(key=key, label=label, value=value, source="rule", evidence="", confidence=0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def _component_table_payload(merged_fields: dict[str, MergedField]) -> dict:
|
||||||
|
field = merged_fields.get("component_table")
|
||||||
|
if not field or not field.value or field.value == "/":
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
payload = json.loads(field.value)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {}
|
||||||
|
if not isinstance(payload, dict):
|
||||||
|
return {}
|
||||||
|
rows = payload.get("rows") or []
|
||||||
|
header = payload.get("header") or []
|
||||||
|
if not isinstance(header, list) or not isinstance(rows, list):
|
||||||
|
return {}
|
||||||
|
return {"header": header, "rows": rows}
|
||||||
|
|
||||||
|
|
||||||
|
def _component_specs(component_table: dict) -> list[tuple[str, int]]:
|
||||||
|
header = component_table.get("header") or []
|
||||||
|
specs: list[tuple[str, int]] = []
|
||||||
|
for index, value in enumerate(header[2:], start=2):
|
||||||
|
label = str(value or "").strip()
|
||||||
|
if not label:
|
||||||
|
continue
|
||||||
|
label = label.replace("规格(", "").replace("规格(", "").rstrip("))")
|
||||||
|
specs.append((label, index))
|
||||||
|
return specs
|
||||||
|
|
||||||
|
|
||||||
|
def _clear_table_body(table) -> None:
|
||||||
|
while len(table.rows) > 1:
|
||||||
|
table._tbl.remove(table.rows[-1]._tr)
|
||||||
|
|
||||||
|
|
||||||
|
def _fill_product_component_table(table, component_table: dict, specs: list[tuple[str, int]]) -> None:
|
||||||
|
rows = component_table.get("rows") or []
|
||||||
|
for spec_label, spec_index in specs:
|
||||||
|
for row in rows:
|
||||||
|
cells = table.add_row().cells
|
||||||
|
cells[0].text = spec_label
|
||||||
|
cells[1].text = "/"
|
||||||
|
cells[2].text = "/"
|
||||||
|
cells[3].text = _row_value(row, 0)
|
||||||
|
cells[4].text = _row_value(row, 1)
|
||||||
|
cells[5].text = _row_value(row, spec_index or 0)
|
||||||
|
|
||||||
|
|
||||||
|
def _rebuild_component_comparison_table(document, component_table: dict, specs: list[tuple[str, int]]) -> None:
|
||||||
|
target = None
|
||||||
|
for table in document.tables:
|
||||||
|
header = [cell.text.strip() for cell in table.rows[0].cells] if table.rows else []
|
||||||
|
if header and header[0] == "组分名称":
|
||||||
|
target = table
|
||||||
|
break
|
||||||
|
if target is None:
|
||||||
|
return
|
||||||
|
_clear_table_body(target)
|
||||||
|
header_cells = target.rows[0].cells
|
||||||
|
labels = ["组分名称", *[spec for spec, _index in specs[: len(header_cells) - 1]]]
|
||||||
|
while len(labels) < len(header_cells):
|
||||||
|
labels.append("备注")
|
||||||
|
for index, label in enumerate(labels[: len(header_cells)]):
|
||||||
|
header_cells[index].text = label
|
||||||
|
for row in component_table.get("rows") or []:
|
||||||
|
cells = target.add_row().cells
|
||||||
|
cells[0].text = _row_value(row, 0)
|
||||||
|
for cell_index, (_spec_label, spec_index) in enumerate(specs[: len(cells) - 1], start=1):
|
||||||
|
cells[cell_index].text = _row_value(row, spec_index)
|
||||||
|
for cell_index in range(len(specs[: len(cells) - 1]) + 1, len(cells)):
|
||||||
|
cells[cell_index].text = "/"
|
||||||
|
|
||||||
|
|
||||||
|
def _row_value(row, index: int) -> str:
|
||||||
|
if not isinstance(row, list) or index >= len(row):
|
||||||
|
return "/"
|
||||||
|
value = str(row[index] or "").strip()
|
||||||
|
return value or "/"
|
||||||
171
review_agent/regulatory_info_package/services/field_extract.py
Normal file
171
review_agent/regulatory_info_package/services/field_extract.py
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable
|
||||||
|
|
||||||
|
from review_agent.llm import generate_completion
|
||||||
|
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
||||||
|
|
||||||
|
|
||||||
|
FIELD_PATTERNS = {
|
||||||
|
"product_name": ("产品名称", r"产品名称[::\s]*([^\n\r]+)"),
|
||||||
|
"applicant_name": ("申请人名称", r"(?:申请人名称|注册人/售后服务单位名称|注册人名称|售后服务单位名称|生产企业名称)[::\s]*([^\n\r]+)"),
|
||||||
|
"manufacturer_name": ("生产企业名称", r"生产企业名称[::\s]*([^\n\r]+)"),
|
||||||
|
"applicant_address": ("申请人住所", r"(?:申请人住所|注册人住所|生产企业住所)[::\s]*([^\n\r]+)"),
|
||||||
|
"applicant_contact": ("申请人联系方式", r"(?:联系方式|联系电话|电话)[::\s]*([^\n\r]+)"),
|
||||||
|
"production_address": ("生产地址", r"生产地址[::\s]*([^\n\r]+)"),
|
||||||
|
"storage_condition": ("储存条件", r"(?:储存条件|贮存条件|保存条件)[::\s]*([^\n\r]+)"),
|
||||||
|
"intended_use": ("预期用途", r"预期用途[::\s]*([^\n\r]+)"),
|
||||||
|
"package_specification": ("包装规格", r"(?:包装规格|规格)[::\s]*([^\n\r]+)"),
|
||||||
|
"sample_type": ("样本类型", r"样本类型[::\s]*([^\n\r]+)"),
|
||||||
|
"applicable_instrument": ("适用仪器", r"适用仪器[::\s]*([^\n\r]+)"),
|
||||||
|
"standard_no": ("标准号", r"((?:GB|YY|WS|T/C[A-Z0-9]*)[ /T0-9.\-—]+)"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_fields_by_rules(instruction: InstructionExtractResult) -> dict[str, dict]:
|
||||||
|
text = "\n".join([instruction.front_text, *instruction.paragraphs, *instruction.sections.values()])
|
||||||
|
results: dict[str, dict] = {}
|
||||||
|
for key, (label, pattern) in FIELD_PATTERNS.items():
|
||||||
|
section_value = _value_after_label_paragraph(instruction.paragraphs, label)
|
||||||
|
if section_value:
|
||||||
|
results[key] = {
|
||||||
|
"label": label,
|
||||||
|
"value": section_value,
|
||||||
|
"evidence": f"【{label}】\n{section_value}",
|
||||||
|
"confidence": 0.82,
|
||||||
|
"source": "rule",
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
match = re.search(pattern, text, flags=re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
value = _clean_value(match.group(1))
|
||||||
|
if value:
|
||||||
|
results[key] = {
|
||||||
|
"label": label,
|
||||||
|
"value": value,
|
||||||
|
"evidence": match.group(0)[:240],
|
||||||
|
"confidence": 0.75,
|
||||||
|
"source": "rule",
|
||||||
|
}
|
||||||
|
component_table = _best_component_table(instruction.component_tables)
|
||||||
|
if component_table:
|
||||||
|
results["component_table"] = {
|
||||||
|
"label": "主要组成成分",
|
||||||
|
"value": json.dumps(component_table, ensure_ascii=False),
|
||||||
|
"evidence": "说明书【主要组成成分】表格",
|
||||||
|
"confidence": 0.86,
|
||||||
|
"source": "rule",
|
||||||
|
}
|
||||||
|
component_notes = _component_notes(instruction.sections)
|
||||||
|
if component_notes:
|
||||||
|
results["component_notes"] = {
|
||||||
|
"label": "主要组成成分备注",
|
||||||
|
"value": component_notes,
|
||||||
|
"evidence": "说明书【主要组成成分】段落",
|
||||||
|
"confidence": 0.8,
|
||||||
|
"source": "rule",
|
||||||
|
}
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def extract_fields_with_llm(instruction: InstructionExtractResult) -> dict[str, dict]:
|
||||||
|
prompt = (
|
||||||
|
"请从体外诊断试剂产品说明书中抽取字段,输出 JSON 对象,字段包括 "
|
||||||
|
"product_name、storage_condition、intended_use、package_specification、sample_type、applicable_instrument、standard_no。"
|
||||||
|
"每个字段值为 {label,value,evidence,confidence}。\n\n"
|
||||||
|
+ instruction.front_text[:6000]
|
||||||
|
)
|
||||||
|
raw = generate_completion([{"role": "user", "content": prompt}], temperature=0.0)
|
||||||
|
payload = _parse_json_object(raw)
|
||||||
|
return {key: value for key, value in payload.items() if isinstance(value, dict)}
|
||||||
|
|
||||||
|
|
||||||
|
def run_llm_extract_with_retry(
|
||||||
|
instruction: InstructionExtractResult,
|
||||||
|
*,
|
||||||
|
llm_extract_func: Callable[[InstructionExtractResult], dict[str, dict]] | None = None,
|
||||||
|
sleep_func: Callable[[float], None] = time.sleep,
|
||||||
|
) -> dict[str, dict]:
|
||||||
|
func = llm_extract_func or extract_fields_with_llm
|
||||||
|
last_exc: Exception | None = None
|
||||||
|
for delay in [0, 1, 2]:
|
||||||
|
if delay:
|
||||||
|
sleep_func(delay)
|
||||||
|
try:
|
||||||
|
return func(instruction)
|
||||||
|
except Exception as exc:
|
||||||
|
last_exc = exc
|
||||||
|
if last_exc:
|
||||||
|
raise last_exc
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def run_parallel_extract(
|
||||||
|
instruction: InstructionExtractResult,
|
||||||
|
*,
|
||||||
|
llm_extract_func: Callable[[InstructionExtractResult], dict[str, dict]] | None = None,
|
||||||
|
) -> dict:
|
||||||
|
payload = {"regex_results": {}, "llm_results": {}, "llm_error": ""}
|
||||||
|
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
rule_future = executor.submit(extract_fields_by_rules, instruction)
|
||||||
|
llm_future = executor.submit(run_llm_extract_with_retry, instruction, llm_extract_func=llm_extract_func)
|
||||||
|
payload["regex_results"] = rule_future.result()
|
||||||
|
try:
|
||||||
|
payload["llm_results"] = llm_future.result()
|
||||||
|
except Exception as exc:
|
||||||
|
payload["llm_error"] = str(exc)
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def save_field_extract_result(path: str | Path, payload: dict) -> Path:
|
||||||
|
target = Path(path)
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_value(value: str) -> str:
|
||||||
|
cleaned = value.strip()
|
||||||
|
if cleaned in {"】", "】】", "】:"}:
|
||||||
|
return ""
|
||||||
|
return re.split(r"[。;;]", cleaned)[0].strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _value_after_label_paragraph(paragraphs: list[str], label: str) -> str:
|
||||||
|
bracketed = {f"【{label}】", f"[{label}]", label}
|
||||||
|
for index, text in enumerate(paragraphs):
|
||||||
|
stripped = text.strip()
|
||||||
|
if stripped in bracketed and index + 1 < len(paragraphs):
|
||||||
|
return _clean_value(paragraphs[index + 1])
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_object(raw: str) -> dict:
|
||||||
|
text = (raw or "").strip()
|
||||||
|
if text.startswith("```"):
|
||||||
|
text = text.strip("`").strip()
|
||||||
|
if text.lower().startswith("json"):
|
||||||
|
text = text[4:].strip()
|
||||||
|
start = text.find("{")
|
||||||
|
end = text.rfind("}")
|
||||||
|
if start == -1 or end == -1:
|
||||||
|
return {}
|
||||||
|
return json.loads(text[start : end + 1])
|
||||||
|
|
||||||
|
|
||||||
|
def _best_component_table(component_tables: list[dict]) -> dict:
|
||||||
|
if not component_tables:
|
||||||
|
return {}
|
||||||
|
return max(component_tables, key=lambda table: len(table.get("rows") or []))
|
||||||
|
|
||||||
|
|
||||||
|
def _component_notes(sections: dict[str, str]) -> str:
|
||||||
|
for key, value in sections.items():
|
||||||
|
if "主要组成" in key:
|
||||||
|
return value.strip()
|
||||||
|
return ""
|
||||||
115
review_agent/regulatory_info_package/services/field_merge.py
Normal file
115
review_agent/regulatory_info_package/services/field_merge.py
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import MergedField
|
||||||
|
|
||||||
|
|
||||||
|
REQUIRED_FIELDS = {
|
||||||
|
"product_name": "产品名称",
|
||||||
|
"applicant_name": "申请人名称",
|
||||||
|
"package_specification": "包装规格",
|
||||||
|
"intended_use": "预期用途",
|
||||||
|
"storage_condition": "储存条件",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def merge_fields(rule_results: dict[str, dict], llm_results: dict[str, dict]) -> tuple[dict[str, MergedField], dict[str, list[dict]]]:
|
||||||
|
merged: dict[str, MergedField] = {}
|
||||||
|
missing_fields: list[dict] = []
|
||||||
|
llm_only_fields: list[dict] = []
|
||||||
|
conflict_fields: list[dict] = []
|
||||||
|
keys = set(REQUIRED_FIELDS) | set(rule_results) | set(llm_results)
|
||||||
|
for key in sorted(keys):
|
||||||
|
rule = rule_results.get(key) or {}
|
||||||
|
llm = llm_results.get(key) or {}
|
||||||
|
rule_value = str(rule.get("value") or "").strip()
|
||||||
|
llm_value = str(llm.get("value") or "").strip()
|
||||||
|
label = str(rule.get("label") or llm.get("label") or REQUIRED_FIELDS.get(key) or key)
|
||||||
|
if rule_value and llm_value and rule_value != llm_value:
|
||||||
|
field = MergedField(
|
||||||
|
key=key,
|
||||||
|
label=label,
|
||||||
|
value=rule_value,
|
||||||
|
source="rule_conflict",
|
||||||
|
evidence=str(rule.get("evidence") or ""),
|
||||||
|
confidence=float(rule.get("confidence") or 0.0),
|
||||||
|
highlight_reason="conflict",
|
||||||
|
needs_review=True,
|
||||||
|
rule_value=rule_value,
|
||||||
|
llm_value=llm_value,
|
||||||
|
)
|
||||||
|
conflict_fields.append(
|
||||||
|
{
|
||||||
|
"field_key": key,
|
||||||
|
"field_label": label,
|
||||||
|
"rule_value": rule_value,
|
||||||
|
"llm_value": llm_value,
|
||||||
|
"selected_value": rule_value,
|
||||||
|
"handling": "规则优先,写入值高亮并进入追溯清单",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif rule_value:
|
||||||
|
field = MergedField(
|
||||||
|
key=key,
|
||||||
|
label=label,
|
||||||
|
value=rule_value,
|
||||||
|
source="rule",
|
||||||
|
evidence=str(rule.get("evidence") or ""),
|
||||||
|
confidence=float(rule.get("confidence") or 0.0),
|
||||||
|
)
|
||||||
|
elif llm_value:
|
||||||
|
field = MergedField(
|
||||||
|
key=key,
|
||||||
|
label=label,
|
||||||
|
value=llm_value,
|
||||||
|
source="llm",
|
||||||
|
evidence=str(llm.get("evidence") or ""),
|
||||||
|
confidence=float(llm.get("confidence") or 0.0),
|
||||||
|
highlight_reason="llm_only",
|
||||||
|
needs_review=True,
|
||||||
|
llm_value=llm_value,
|
||||||
|
)
|
||||||
|
llm_only_fields.append(_review_dict(field))
|
||||||
|
else:
|
||||||
|
field = MergedField(
|
||||||
|
key=key,
|
||||||
|
label=label,
|
||||||
|
value="/",
|
||||||
|
source="missing",
|
||||||
|
evidence="",
|
||||||
|
confidence=0.0,
|
||||||
|
highlight_reason="missing",
|
||||||
|
needs_review=True,
|
||||||
|
)
|
||||||
|
missing_fields.append(_review_dict(field))
|
||||||
|
merged[key] = field
|
||||||
|
return merged, {
|
||||||
|
"missing_fields": missing_fields,
|
||||||
|
"llm_only_fields": llm_only_fields,
|
||||||
|
"conflict_fields": conflict_fields,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def save_merged_fields(path: str | Path, merged: dict[str, MergedField], summary: dict[str, list[dict]]) -> Path:
|
||||||
|
target = Path(path)
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
payload = {
|
||||||
|
"fields": {key: field.__dict__ for key, field in merged.items()},
|
||||||
|
**summary,
|
||||||
|
}
|
||||||
|
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def _review_dict(field: MergedField) -> dict:
|
||||||
|
return {
|
||||||
|
"target_file": "",
|
||||||
|
"field_key": field.key,
|
||||||
|
"field_label": field.label,
|
||||||
|
"final_value": field.value,
|
||||||
|
"highlight_reason": field.highlight_reason,
|
||||||
|
"needs_review": field.needs_review,
|
||||||
|
}
|
||||||
|
|
||||||
105
review_agent/regulatory_info_package/services/input_select.py
Normal file
105
review_agent/regulatory_info_package/services/input_select.py
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileAttachment, FileSummaryBatch, FileSummaryItem
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class InstructionInputSelection:
|
||||||
|
status: str
|
||||||
|
file_name: str = ""
|
||||||
|
storage_path: str = ""
|
||||||
|
attachment: FileAttachment | None = None
|
||||||
|
source_summary_batch: FileSummaryBatch | None = None
|
||||||
|
source_summary_item_id: int | None = None
|
||||||
|
candidates: list[str] = field(default_factory=list)
|
||||||
|
message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def select_instruction_input(conversation: Conversation, message: str) -> InstructionInputSelection:
|
||||||
|
candidates = _active_docx_attachments(conversation)
|
||||||
|
named = _match_by_message(candidates, message)
|
||||||
|
if len(named) == 1:
|
||||||
|
return _selection_from_attachment(named[0])
|
||||||
|
instruction_candidates = [item for item in candidates if "说明书" in item.original_name]
|
||||||
|
if len(instruction_candidates) == 1:
|
||||||
|
return _selection_from_attachment(instruction_candidates[0])
|
||||||
|
if len(candidates) == 1:
|
||||||
|
return _selection_from_attachment(candidates[0])
|
||||||
|
if len(instruction_candidates) > 1 or len(candidates) > 1:
|
||||||
|
names = [item.original_name for item in (instruction_candidates or candidates)]
|
||||||
|
return InstructionInputSelection(
|
||||||
|
status="waiting_user",
|
||||||
|
candidates=names,
|
||||||
|
message="请确认用于生成第1章监管信息的说明书文件名:" + "、".join(names),
|
||||||
|
)
|
||||||
|
summary_selection = _select_from_latest_summary(conversation, message)
|
||||||
|
if summary_selection:
|
||||||
|
return summary_selection
|
||||||
|
return InstructionInputSelection(status="missing", message="请先上传产品说明书 docx 文件。")
|
||||||
|
|
||||||
|
|
||||||
|
def _active_docx_attachments(conversation: Conversation) -> list[FileAttachment]:
|
||||||
|
return list(
|
||||||
|
FileAttachment.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||||
|
.filter(original_name__iendswith=".docx")
|
||||||
|
.order_by("original_name", "-version_no")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _match_by_message(candidates: list[FileAttachment], message: str) -> list[FileAttachment]:
|
||||||
|
compact = "".join((message or "").lower().split())
|
||||||
|
matched = []
|
||||||
|
for attachment in candidates:
|
||||||
|
stem = Path(attachment.original_name).stem.lower()
|
||||||
|
name = attachment.original_name.lower()
|
||||||
|
if stem and stem in compact or name and name in compact:
|
||||||
|
matched.append(attachment)
|
||||||
|
return matched
|
||||||
|
|
||||||
|
|
||||||
|
def _selection_from_attachment(attachment: FileAttachment) -> InstructionInputSelection:
|
||||||
|
return InstructionInputSelection(
|
||||||
|
status="selected",
|
||||||
|
file_name=attachment.original_name,
|
||||||
|
storage_path=attachment.storage_path,
|
||||||
|
attachment=attachment,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _select_from_latest_summary(conversation: Conversation, message: str) -> InstructionInputSelection | None:
|
||||||
|
batch = (
|
||||||
|
FileSummaryBatch.objects.filter(conversation=conversation, status=FileSummaryBatch.Status.SUCCESS)
|
||||||
|
.order_by("-finished_at", "-created_at", "-id")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if not batch:
|
||||||
|
return None
|
||||||
|
items = list(batch.items.filter(file_name__iendswith=".docx").order_by("file_name", "id"))
|
||||||
|
compact = "".join((message or "").lower().split())
|
||||||
|
named = [item for item in items if Path(item.file_name).stem.lower() in compact or item.file_name.lower() in compact]
|
||||||
|
candidates = named or [item for item in items if "说明书" in item.file_name]
|
||||||
|
if len(candidates) == 1:
|
||||||
|
item = candidates[0]
|
||||||
|
return InstructionInputSelection(
|
||||||
|
status="selected",
|
||||||
|
file_name=item.file_name,
|
||||||
|
storage_path=item.storage_path,
|
||||||
|
source_summary_batch=batch,
|
||||||
|
source_summary_item_id=item.pk,
|
||||||
|
)
|
||||||
|
if len(candidates) > 1:
|
||||||
|
return InstructionInputSelection(
|
||||||
|
status="waiting_user",
|
||||||
|
source_summary_batch=batch,
|
||||||
|
candidates=[item.file_name for item in candidates],
|
||||||
|
message="请确认用于生成第1章监管信息的说明书文件名:" + "、".join(item.file_name for item in candidates),
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
@@ -0,0 +1,77 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
||||||
|
|
||||||
|
|
||||||
|
def parse_instruction_docx(path: str | Path) -> InstructionExtractResult:
|
||||||
|
file_path = Path(path)
|
||||||
|
document = Document(file_path)
|
||||||
|
paragraphs = [paragraph.text.strip() for paragraph in document.paragraphs if paragraph.text.strip()]
|
||||||
|
tables = []
|
||||||
|
for table in document.tables:
|
||||||
|
rows = []
|
||||||
|
for row in table.rows:
|
||||||
|
rows.append([" ".join(cell.text.split()) for cell in row.cells])
|
||||||
|
if rows:
|
||||||
|
tables.append(rows)
|
||||||
|
sections = _build_sections(paragraphs)
|
||||||
|
front_text = "\n".join(paragraphs[:30])
|
||||||
|
return InstructionExtractResult(
|
||||||
|
source_file_name=file_path.name,
|
||||||
|
paragraphs=paragraphs,
|
||||||
|
sections=sections,
|
||||||
|
tables=tables,
|
||||||
|
component_tables=_component_tables(tables),
|
||||||
|
front_text=front_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def save_instruction_extract_json(path: str | Path, result: InstructionExtractResult) -> Path:
|
||||||
|
target = Path(path)
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
payload = {
|
||||||
|
"source_file_name": result.source_file_name,
|
||||||
|
"paragraphs": result.paragraphs,
|
||||||
|
"sections": result.sections,
|
||||||
|
"tables": result.tables,
|
||||||
|
"component_tables": result.component_tables,
|
||||||
|
"front_text": result.front_text,
|
||||||
|
}
|
||||||
|
target.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def _build_sections(paragraphs: list[str]) -> dict[str, str]:
|
||||||
|
sections: dict[str, list[str]] = {}
|
||||||
|
current = "front"
|
||||||
|
for text in paragraphs:
|
||||||
|
if _looks_like_heading(text):
|
||||||
|
current = text[:80]
|
||||||
|
sections.setdefault(current, [])
|
||||||
|
continue
|
||||||
|
sections.setdefault(current, []).append(text)
|
||||||
|
return {key: "\n".join(value).strip() for key, value in sections.items() if value}
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_heading(text: str) -> bool:
|
||||||
|
compact = text.strip()
|
||||||
|
if len(compact) > 40:
|
||||||
|
return False
|
||||||
|
heading_markers = ("一、", "二、", "三、", "四、", "五、", "六、", "【", "产品名称", "预期用途", "主要组成")
|
||||||
|
return compact.startswith(heading_markers)
|
||||||
|
|
||||||
|
|
||||||
|
def _component_tables(tables: list[list[list[str]]]) -> list[dict]:
|
||||||
|
results = []
|
||||||
|
for table in tables:
|
||||||
|
header = table[0] if table else []
|
||||||
|
joined = "".join(header)
|
||||||
|
if any(keyword in joined for keyword in ["组成", "组分", "成分"]):
|
||||||
|
results.append({"header": header, "rows": table[1:]})
|
||||||
|
return results
|
||||||
|
|
||||||
@@ -0,0 +1,81 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import MergedField
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LegacyDocCapability:
|
||||||
|
status: str
|
||||||
|
adapter: str
|
||||||
|
message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def detect_legacy_doc_capability() -> LegacyDocCapability:
|
||||||
|
try:
|
||||||
|
import win32com.client # noqa: F401
|
||||||
|
|
||||||
|
return LegacyDocCapability(status="available", adapter="WordComDocAdapter", message="Word COM 可用")
|
||||||
|
except Exception as exc:
|
||||||
|
return LegacyDocCapability(
|
||||||
|
status="unavailable",
|
||||||
|
adapter="UnavailableLegacyDocAdapter",
|
||||||
|
message=f"Word COM 不可用:{type(exc).__name__}",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def write_legacy_doc_or_fallback(
|
||||||
|
source_path: str | Path,
|
||||||
|
output_path: str | Path,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
) -> tuple[Path, str, dict]:
|
||||||
|
source = Path(source_path)
|
||||||
|
output = Path(output_path)
|
||||||
|
output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
capability = detect_legacy_doc_capability()
|
||||||
|
native_enabled = bool(getattr(settings, "REGULATORY_INFO_PACKAGE_ENABLE_WORD_COM_NATIVE", False))
|
||||||
|
if native_enabled and capability.status == "available" and source.exists():
|
||||||
|
shutil.copy2(source, output)
|
||||||
|
try:
|
||||||
|
_append_doc_summary_with_word_com(output, merged_fields)
|
||||||
|
return output, "success", {"doc": capability.__dict__, "fallback_used": False, "native_write": True}
|
||||||
|
except Exception as exc:
|
||||||
|
capability = LegacyDocCapability(
|
||||||
|
status="unavailable",
|
||||||
|
adapter="UnavailableLegacyDocAdapter",
|
||||||
|
message=f"Word COM 写入失败:{exc}",
|
||||||
|
)
|
||||||
|
fallback = output.with_suffix(".docx")
|
||||||
|
document = Document()
|
||||||
|
heading = document.add_paragraph()
|
||||||
|
heading.add_run(output.stem).bold = True
|
||||||
|
document.add_paragraph("【预生成版】当前未启用 .doc 原生写入,已生成 docx 兜底文件。")
|
||||||
|
for field in merged_fields.values():
|
||||||
|
document.add_paragraph(f"{field.label}:{field.value}")
|
||||||
|
document.save(fallback)
|
||||||
|
return fallback, "fallback_success", {"doc": capability.__dict__, "fallback_used": True, "native_enabled": native_enabled}
|
||||||
|
|
||||||
|
|
||||||
|
def _append_doc_summary_with_word_com(path: Path, merged_fields: dict[str, MergedField]) -> None:
|
||||||
|
import win32com.client
|
||||||
|
|
||||||
|
word = win32com.client.Dispatch("Word.Application")
|
||||||
|
word.Visible = False
|
||||||
|
document = None
|
||||||
|
try:
|
||||||
|
document = word.Documents.Open(str(path.resolve()))
|
||||||
|
end_range = document.Range(document.Content.End - 1, document.Content.End - 1)
|
||||||
|
lines = ["", "【预生成版】以下字段由系统根据说明书预填,请人工复核。"]
|
||||||
|
lines.extend(f"{field.label}:{field.value}" for field in merged_fields.values())
|
||||||
|
end_range.InsertAfter("\r".join(lines))
|
||||||
|
document.Save()
|
||||||
|
finally:
|
||||||
|
if document is not None:
|
||||||
|
document.Close(False)
|
||||||
|
word.Quit()
|
||||||
@@ -0,0 +1,186 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from pathlib import Path
|
||||||
|
from zipfile import ZipFile
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryInfoPackageBatch
|
||||||
|
from review_agent.regulatory_info_package.constants import GENERATED_FILE_FAILED
|
||||||
|
from review_agent.regulatory_info_package.schemas import GeneratedFileResult, MergedField, TemplateSpec
|
||||||
|
from review_agent.regulatory_info_package.services.docx_document import write_docx_from_template
|
||||||
|
from review_agent.regulatory_info_package.services.legacy_doc_document import write_legacy_doc_or_fallback
|
||||||
|
from review_agent.regulatory_info_package.services.template_repository import copy_template_to_batch, template_specs
|
||||||
|
from review_agent.regulatory_info_package.storage import ensure_batch_subdir
|
||||||
|
|
||||||
|
|
||||||
|
def generate_package_documents(
|
||||||
|
batch: RegulatoryInfoPackageBatch,
|
||||||
|
config: dict,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
) -> list[GeneratedFileResult]:
|
||||||
|
specs = template_specs(config)
|
||||||
|
directory_specs = [spec for spec in specs if spec.code == "ch1_2_directory"]
|
||||||
|
content_specs = [spec for spec in specs if spec.code != "ch1_2_directory"]
|
||||||
|
results: list[GeneratedFileResult] = []
|
||||||
|
with ThreadPoolExecutor(max_workers=min(4, len(content_specs) or 1)) as executor:
|
||||||
|
futures = [executor.submit(_generate_one, batch, config, spec, merged_fields) for spec in content_specs]
|
||||||
|
results.extend(future.result() for future in as_completed(futures))
|
||||||
|
page_numbers = _directory_page_numbers(results)
|
||||||
|
for spec in directory_specs:
|
||||||
|
results.append(_generate_one(batch, config, spec, merged_fields, directory_page_numbers=page_numbers))
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_one(
|
||||||
|
batch: RegulatoryInfoPackageBatch,
|
||||||
|
config: dict,
|
||||||
|
spec: TemplateSpec,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
*,
|
||||||
|
directory_page_numbers: dict[str, str] | None = None,
|
||||||
|
) -> GeneratedFileResult:
|
||||||
|
try:
|
||||||
|
template_path = copy_template_to_batch(batch, config, spec)
|
||||||
|
generated_dir = ensure_batch_subdir(batch, "generated")
|
||||||
|
output_path = generated_dir / spec.output_name
|
||||||
|
adapter_summary = {}
|
||||||
|
if spec.file_format == "doc":
|
||||||
|
actual_path, status, adapter_summary = write_legacy_doc_or_fallback(template_path, output_path, merged_fields)
|
||||||
|
actual_format = actual_path.suffix.lower().lstrip(".")
|
||||||
|
highlight_count = missing_count = llm_only_count = 0
|
||||||
|
else:
|
||||||
|
highlight_count, missing_count, llm_only_count = write_docx_from_template(
|
||||||
|
template_path,
|
||||||
|
output_path,
|
||||||
|
merged_fields,
|
||||||
|
template_code=spec.code,
|
||||||
|
directory_page_numbers=directory_page_numbers,
|
||||||
|
)
|
||||||
|
actual_path = output_path
|
||||||
|
actual_format = "docx"
|
||||||
|
status = "success"
|
||||||
|
return GeneratedFileResult(
|
||||||
|
template_code=spec.code,
|
||||||
|
file_name=actual_path.name,
|
||||||
|
requested_format=spec.file_format,
|
||||||
|
actual_format=actual_format,
|
||||||
|
status=status,
|
||||||
|
path=str(actual_path),
|
||||||
|
highlight_count=highlight_count,
|
||||||
|
missing_count=missing_count,
|
||||||
|
llm_only_count=llm_only_count,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return GeneratedFileResult(
|
||||||
|
template_code=spec.code,
|
||||||
|
file_name=spec.output_name,
|
||||||
|
requested_format=spec.file_format,
|
||||||
|
actual_format=spec.file_format,
|
||||||
|
status=GENERATED_FILE_FAILED,
|
||||||
|
error_message=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _directory_page_numbers(results: list[GeneratedFileResult]) -> dict[str, str]:
|
||||||
|
page_numbers = {"CH1.2": "1"}
|
||||||
|
for result in results:
|
||||||
|
if result.status not in {"success", "fallback_success"} or not result.path:
|
||||||
|
continue
|
||||||
|
code = _directory_code_from_file_name(result.file_name)
|
||||||
|
if not code:
|
||||||
|
continue
|
||||||
|
page_numbers[code] = str(count_document_pages(result.path))
|
||||||
|
return page_numbers
|
||||||
|
|
||||||
|
|
||||||
|
def _directory_code_from_file_name(file_name: str) -> str:
|
||||||
|
stem = Path(file_name).stem.strip()
|
||||||
|
return stem.split()[0] if stem.startswith("CH") else ""
|
||||||
|
|
||||||
|
|
||||||
|
def count_document_pages(path: str | Path) -> int:
|
||||||
|
file_path = Path(path)
|
||||||
|
if not file_path.exists():
|
||||||
|
return 1
|
||||||
|
pages = _count_pages_from_docx_properties(file_path)
|
||||||
|
if pages:
|
||||||
|
return pages
|
||||||
|
pages = _count_pages_with_pywin32(file_path)
|
||||||
|
if pages:
|
||||||
|
return pages
|
||||||
|
pages = _count_pages_with_powershell_word(file_path)
|
||||||
|
if pages:
|
||||||
|
return pages
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pages_from_docx_properties(file_path: Path) -> int:
|
||||||
|
if file_path.suffix.lower() != ".docx":
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
with ZipFile(file_path) as archive:
|
||||||
|
root = ElementTree.fromstring(archive.read("docProps/app.xml"))
|
||||||
|
namespace = {"ep": "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"}
|
||||||
|
pages = root.find("ep:Pages", namespace)
|
||||||
|
return max(int((pages.text or "").strip()), 1) if pages is not None else 0
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pages_with_pywin32(file_path: Path) -> int:
|
||||||
|
try:
|
||||||
|
import win32com.client
|
||||||
|
|
||||||
|
word = win32com.client.DispatchEx("Word.Application")
|
||||||
|
word.Visible = False
|
||||||
|
document = None
|
||||||
|
try:
|
||||||
|
document = word.Documents.Open(str(file_path.resolve()), ReadOnly=True)
|
||||||
|
document.Repaginate()
|
||||||
|
return max(int(document.ComputeStatistics(2)), 1)
|
||||||
|
finally:
|
||||||
|
if document is not None:
|
||||||
|
document.Close(False)
|
||||||
|
word.Quit()
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pages_with_powershell_word(file_path: Path) -> int:
|
||||||
|
script = r"""
|
||||||
|
param([string]$Path)
|
||||||
|
$word = $null
|
||||||
|
$doc = $null
|
||||||
|
try {
|
||||||
|
$word = New-Object -ComObject Word.Application
|
||||||
|
$word.Visible = $false
|
||||||
|
$doc = $word.Documents.Open($Path, $false, $true)
|
||||||
|
$doc.Repaginate()
|
||||||
|
[Console]::Out.Write($doc.ComputeStatistics(2))
|
||||||
|
exit 0
|
||||||
|
} catch {
|
||||||
|
[Console]::Error.Write($_.Exception.Message)
|
||||||
|
exit 1
|
||||||
|
} finally {
|
||||||
|
if ($doc -ne $null) { $doc.Close($false) | Out-Null }
|
||||||
|
if ($word -ne $null) { $word.Quit() | Out-Null }
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
completed = subprocess.run(
|
||||||
|
["powershell.exe", "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", script, str(file_path.resolve())],
|
||||||
|
capture_output=True,
|
||||||
|
check=False,
|
||||||
|
text=True,
|
||||||
|
timeout=8,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
return 0
|
||||||
|
if completed.returncode != 0:
|
||||||
|
return 0
|
||||||
|
try:
|
||||||
|
return max(int(completed.stdout.strip()), 1)
|
||||||
|
except ValueError:
|
||||||
|
return 0
|
||||||
12
review_agent/regulatory_info_package/services/summary.py
Normal file
12
review_agent/regulatory_info_package/services/summary.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def build_assistant_summary(*, batch_no: str, exports: list[dict], failed_files: list[dict]) -> str:
|
||||||
|
zip_exports = [item for item in exports if item.get("export_type") == "zip" or str(item.get("file_name", "")).endswith(".zip")]
|
||||||
|
other_exports = [item for item in exports if item not in zip_exports]
|
||||||
|
lines = [f"已完成第1章监管信息材料包生成,批次号:{batch_no}。", ""]
|
||||||
|
for export in [*zip_exports, *other_exports]:
|
||||||
|
lines.append(f"- [{export['file_name']}]({export['download_url']})")
|
||||||
|
for failed in failed_files:
|
||||||
|
lines.append(f"- {failed.get('file_name')}:生成失败,{failed.get('error_message') or '原因待查看'}")
|
||||||
|
return "\n".join(lines)
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
CONFIG_PATH = Path(__file__).resolve().parents[1] / "templates" / "regulatory_info_package_templates_v1.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
def load_template_config(path: str | Path | None = None) -> dict:
|
||||||
|
config_path = Path(path) if path else CONFIG_PATH
|
||||||
|
with config_path.open("r", encoding="utf-8") as handle:
|
||||||
|
payload = yaml.safe_load(handle) or {}
|
||||||
|
if payload.get("source_dir"):
|
||||||
|
payload["source_dir"] = str((Path(settings.BASE_DIR) / payload["source_dir"]).resolve())
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def compute_config_hash(path: str | Path | None = None) -> str:
|
||||||
|
config_path = Path(path) if path else CONFIG_PATH
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
digest.update(config_path.read_bytes())
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_template_config(config: dict) -> list[str]:
|
||||||
|
errors: list[str] = []
|
||||||
|
source_dir = Path(config.get("source_dir") or "")
|
||||||
|
if not source_dir.exists():
|
||||||
|
errors.append(f"模板源目录不存在:{source_dir}")
|
||||||
|
templates = config.get("templates") or []
|
||||||
|
if len(templates) != 6:
|
||||||
|
errors.append("第1章监管信息模板配置必须包含 6 个模板。")
|
||||||
|
seen: set[str] = set()
|
||||||
|
for template in templates:
|
||||||
|
code = str(template.get("code") or "")
|
||||||
|
if not code:
|
||||||
|
errors.append("模板 code 不能为空。")
|
||||||
|
elif code in seen:
|
||||||
|
errors.append(f"模板 code 重复:{code}")
|
||||||
|
seen.add(code)
|
||||||
|
source_file = str(template.get("source_file") or "")
|
||||||
|
output_name = str(template.get("output_name") or "")
|
||||||
|
if not source_file:
|
||||||
|
errors.append(f"模板 {code} 缺少 source_file。")
|
||||||
|
elif source_dir.exists() and not (source_dir / source_file).exists():
|
||||||
|
errors.append(f"模板源文件不存在:{source_file}")
|
||||||
|
if not output_name:
|
||||||
|
errors.append(f"模板 {code} 缺少 output_name。")
|
||||||
|
return errors
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import TemplateSpec
|
||||||
|
from review_agent.regulatory_info_package.storage import ensure_batch_subdir
|
||||||
|
from review_agent.models import RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
|
||||||
|
def template_specs(config: dict) -> list[TemplateSpec]:
|
||||||
|
return [
|
||||||
|
TemplateSpec(
|
||||||
|
code=item["code"],
|
||||||
|
output_name=item["output_name"],
|
||||||
|
source_file=item["source_file"],
|
||||||
|
file_format=item.get("file_format", "docx"),
|
||||||
|
strategy=item.get("strategy", item["code"]),
|
||||||
|
include_in_zip=bool(item.get("include_in_zip", True)),
|
||||||
|
prefer_legacy_doc_native=bool(item.get("prefer_legacy_doc_native", False)),
|
||||||
|
allow_docx_fallback=bool(item.get("allow_docx_fallback", True)),
|
||||||
|
fields=item.get("fields") or [],
|
||||||
|
)
|
||||||
|
for item in config.get("templates") or []
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def copy_template_to_batch(batch: RegulatoryInfoPackageBatch, config: dict, spec: TemplateSpec) -> Path:
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
source = source_dir / spec.source_file
|
||||||
|
target = ensure_batch_subdir(batch, "templates") / f"{spec.code}.source{source.suffix}"
|
||||||
|
shutil.copy2(source, target)
|
||||||
|
return target
|
||||||
|
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import MergedField
|
||||||
|
|
||||||
|
|
||||||
|
HEADERS = [
|
||||||
|
"target_file",
|
||||||
|
"target_field",
|
||||||
|
"final_value",
|
||||||
|
"extraction_source",
|
||||||
|
"evidence",
|
||||||
|
"highlight_reason",
|
||||||
|
"needs_review",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def save_traceability_exports(root: str | Path, merged_fields: dict[str, MergedField]) -> tuple[Path, Path]:
|
||||||
|
root_path = Path(root)
|
||||||
|
exports_dir = root_path / "exports"
|
||||||
|
logs_dir = root_path / "logs"
|
||||||
|
exports_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
logs_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
rows = [
|
||||||
|
{
|
||||||
|
"target_file": "",
|
||||||
|
"target_field": field.label,
|
||||||
|
"final_value": field.value,
|
||||||
|
"extraction_source": field.source,
|
||||||
|
"evidence": field.evidence,
|
||||||
|
"highlight_reason": field.highlight_reason,
|
||||||
|
"needs_review": field.needs_review,
|
||||||
|
}
|
||||||
|
for field in merged_fields.values()
|
||||||
|
]
|
||||||
|
excel_path = exports_dir / "traceability.xlsx"
|
||||||
|
workbook = Workbook()
|
||||||
|
sheet = workbook.active
|
||||||
|
sheet.title = "traceability"
|
||||||
|
sheet.append(HEADERS)
|
||||||
|
for row in rows:
|
||||||
|
sheet.append([row.get(header, "") for header in HEADERS])
|
||||||
|
workbook.save(excel_path)
|
||||||
|
json_path = logs_dir / "traceability.json"
|
||||||
|
json_path.write_text(json.dumps(rows, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
return excel_path, json_path
|
||||||
|
|
||||||
23
review_agent/regulatory_info_package/services/zip_export.py
Normal file
23
review_agent/regulatory_info_package/services/zip_export.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
from zipfile import ZIP_DEFLATED, ZipFile
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.constants import DEFAULT_ZIP_NAME, GENERATED_FILE_FALLBACK_SUCCESS, GENERATED_FILE_SUCCESS
|
||||||
|
from review_agent.regulatory_info_package.schemas import GeneratedFileResult
|
||||||
|
|
||||||
|
|
||||||
|
def create_zip_package(root: str | Path, generated_files: list[GeneratedFileResult], zip_name: str = DEFAULT_ZIP_NAME) -> Path:
|
||||||
|
root_path = Path(root)
|
||||||
|
exports_dir = root_path / "exports"
|
||||||
|
exports_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
zip_path = exports_dir / zip_name
|
||||||
|
allowed = {GENERATED_FILE_SUCCESS, GENERATED_FILE_FALLBACK_SUCCESS}
|
||||||
|
with ZipFile(zip_path, "w", compression=ZIP_DEFLATED) as archive:
|
||||||
|
for result in generated_files:
|
||||||
|
if result.status not in allowed or not result.path:
|
||||||
|
continue
|
||||||
|
file_path = Path(result.path)
|
||||||
|
if file_path.exists():
|
||||||
|
archive.write(file_path, arcname=result.file_name)
|
||||||
|
return zip_path
|
||||||
71
review_agent/regulatory_info_package/storage.py
Normal file
71
review_agent/regulatory_info_package/storage.py
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import RegulatoryInfoPackageArtifact, RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_work_dir(batch: RegulatoryInfoPackageBatch | None = None, *, batch_no: str = "") -> Path:
|
||||||
|
if batch:
|
||||||
|
return (
|
||||||
|
Path(settings.MEDIA_ROOT)
|
||||||
|
/ "regulatory_info_package"
|
||||||
|
/ str(batch.user_id)
|
||||||
|
/ str(batch.conversation_id)
|
||||||
|
/ batch.batch_no
|
||||||
|
)
|
||||||
|
return Path(settings.MEDIA_ROOT) / "regulatory_info_package" / batch_no
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_batch_subdir(batch: RegulatoryInfoPackageBatch, name: str) -> Path:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else build_batch_work_dir(batch)
|
||||||
|
target = root / Path(name).name
|
||||||
|
ensure_within_work_dir(batch, target)
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_within_work_dir(batch: RegulatoryInfoPackageBatch, path: str | Path) -> Path:
|
||||||
|
root = Path(batch.work_dir).resolve()
|
||||||
|
target = Path(path).resolve()
|
||||||
|
if root != target and root not in target.parents:
|
||||||
|
raise ValueError("输出路径必须位于当前材料包批次工作目录内。")
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def compute_file_sha256(path: str | Path) -> str:
|
||||||
|
file_path = Path(path)
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
with file_path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||||
|
digest.update(chunk)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def create_artifact_for_file(
|
||||||
|
batch: RegulatoryInfoPackageBatch,
|
||||||
|
*,
|
||||||
|
path: str | Path,
|
||||||
|
artifact_type: str,
|
||||||
|
file_format: str,
|
||||||
|
name: str = "",
|
||||||
|
metadata: dict | None = None,
|
||||||
|
created_by_node: str = "",
|
||||||
|
) -> RegulatoryInfoPackageArtifact:
|
||||||
|
file_path = ensure_within_work_dir(batch, path)
|
||||||
|
return RegulatoryInfoPackageArtifact.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
artifact_type=artifact_type,
|
||||||
|
file_format=file_format,
|
||||||
|
name=name or file_path.stem,
|
||||||
|
file_name=file_path.name,
|
||||||
|
storage_path=str(file_path),
|
||||||
|
file_size=file_path.stat().st_size if file_path.exists() else 0,
|
||||||
|
content_hash=compute_file_sha256(file_path) if file_path.exists() else "",
|
||||||
|
metadata=metadata or {},
|
||||||
|
created_by_node=created_by_node,
|
||||||
|
)
|
||||||
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,64 @@
|
|||||||
|
version: regulatory_info_package_templates_v1
|
||||||
|
source_dir: review_agent/regulatory_info_package/templates/clean
|
||||||
|
zip_name: 第1章 监管信息(预生成版).zip
|
||||||
|
templates:
|
||||||
|
- code: ch1_2_directory
|
||||||
|
source_file: CH1.2 监管信息目录 - 页码版.docx
|
||||||
|
output_name: CH1.2 监管信息目录.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: directory
|
||||||
|
include_in_zip: true
|
||||||
|
fields: []
|
||||||
|
- code: ch1_4_application_form
|
||||||
|
source_file: CH1.4 申请表 - 复选框调整版.docx
|
||||||
|
output_name: CH1.4 申请表.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: application_form
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
placeholder: "{{product_name}}"
|
||||||
|
- key: applicant_name
|
||||||
|
label: 申请人名称
|
||||||
|
placeholder: "{{applicant_name}}"
|
||||||
|
- code: ch1_5_product_list
|
||||||
|
source_file: CH1.5 产品列表.docx
|
||||||
|
output_name: CH1.5 产品列表.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: product_list
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: package_specification
|
||||||
|
label: 包装规格
|
||||||
|
placeholder: "{{package_specification}}"
|
||||||
|
- code: ch1_11_1_standards
|
||||||
|
source_file: CH1.11.1 符合标准的清单.docx
|
||||||
|
output_name: CH1.11.1 符合标准的清单.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: standards
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: standard_no
|
||||||
|
label: 标准号
|
||||||
|
placeholder: "{{standard_no}}"
|
||||||
|
- code: ch1_11_5_authenticity
|
||||||
|
source_file: CH1.11.5 真实性声明.docx
|
||||||
|
output_name: CH1.11.5 真实性声明.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: authenticity
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
placeholder: "{{product_name}}"
|
||||||
|
- code: ch1_11_6_conformity
|
||||||
|
source_file: CH1.11.6 符合性声明.docx
|
||||||
|
output_name: CH1.11.6 符合性声明.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: conformity
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
placeholder: "{{product_name}}"
|
||||||
127
review_agent/regulatory_info_package/views.py
Normal file
127
review_agent/regulatory_info_package/views.py
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.conf import settings
|
||||||
|
from django.http import Http404, JsonResponse
|
||||||
|
from django.views.decorators.http import require_http_methods
|
||||||
|
|
||||||
|
from review_agent.models import ExportedSummaryFile, RegulatoryInfoPackageBatch, WorkflowNodeRun
|
||||||
|
from review_agent.regulatory_info_package.constants import WORKFLOW_TYPE
|
||||||
|
from review_agent.regulatory_info_package.services.input_select import select_instruction_input
|
||||||
|
from review_agent.regulatory_info_package.workflow import (
|
||||||
|
create_regulatory_info_package_batch,
|
||||||
|
start_regulatory_info_package_workflow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def health(request):
|
||||||
|
return JsonResponse({"workflow_type": WORKFLOW_TYPE, "status": "available"})
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
def start(request):
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return JsonResponse({"error": "JSON 格式错误。"}, status=400)
|
||||||
|
from review_agent.models import Conversation
|
||||||
|
|
||||||
|
conversation = Conversation.objects.filter(pk=payload.get("conversation_id"), user=request.user).first()
|
||||||
|
if not conversation:
|
||||||
|
raise Http404("对话不存在。")
|
||||||
|
selection = select_instruction_input(conversation, str(payload.get("message") or ""))
|
||||||
|
if selection.status != "selected":
|
||||||
|
return JsonResponse(
|
||||||
|
{"status": selection.status, "message": selection.message, "candidates": selection.candidates},
|
||||||
|
status=400,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_info_package_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
source_attachment=selection.attachment,
|
||||||
|
source_summary_batch=selection.source_summary_batch,
|
||||||
|
source_summary_item_id=selection.source_summary_item_id,
|
||||||
|
source_file_name=selection.file_name,
|
||||||
|
source_storage_path=selection.storage_path,
|
||||||
|
)
|
||||||
|
start_regulatory_info_package_workflow(batch, async_run=getattr(settings, "REGULATORY_INFO_PACKAGE_ASYNC", True))
|
||||||
|
return JsonResponse({"batch_id": batch.pk, "workflow_type": WORKFLOW_TYPE, "status": batch.status})
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def batch_status(request, batch_id: int):
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.filter(
|
||||||
|
pk=batch_id,
|
||||||
|
conversation__user=request.user,
|
||||||
|
is_deleted=False,
|
||||||
|
).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("材料包批次不存在。")
|
||||||
|
exports = ExportedSummaryFile.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("-export_type", "id")
|
||||||
|
sorted_exports = sorted(exports, key=lambda item: 0 if item.export_type == ExportedSummaryFile.ExportType.ZIP else 1)
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": WORKFLOW_TYPE,
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"product_name": batch.product_name,
|
||||||
|
"risk_summary_text": _risk_summary_text(batch),
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
},
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"node_name": node.node_name,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
}
|
||||||
|
for node in WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
],
|
||||||
|
"exports": [
|
||||||
|
{
|
||||||
|
"id": export.pk,
|
||||||
|
"export_type": export.export_type,
|
||||||
|
"export_category": export.export_category,
|
||||||
|
"file_name": export.file_name,
|
||||||
|
"download_url": f"/api/review-agent/file-summary/exports/{export.pk}/download/",
|
||||||
|
}
|
||||||
|
for export in sorted_exports
|
||||||
|
],
|
||||||
|
"failed_files": [item for item in batch.generated_files if item.get("status") == "failed"],
|
||||||
|
"notifications": [
|
||||||
|
{
|
||||||
|
"id": item.pk,
|
||||||
|
"channel": item.channel,
|
||||||
|
"send_status": item.send_status,
|
||||||
|
"status_label": "通知已记录" if item.send_status == "success" else item.send_status,
|
||||||
|
"error_message": item.error_message,
|
||||||
|
}
|
||||||
|
for item in batch.notifications.filter(is_deleted=False).order_by("-created_at", "-id")
|
||||||
|
],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _risk_summary_text(batch: RegulatoryInfoPackageBatch) -> str:
|
||||||
|
parts = []
|
||||||
|
if batch.missing_fields:
|
||||||
|
parts.append(f"缺失字段 {len(batch.missing_fields)}")
|
||||||
|
if batch.llm_only_fields:
|
||||||
|
parts.append(f"LLM-only {len(batch.llm_only_fields)}")
|
||||||
|
if batch.conflict_fields:
|
||||||
|
parts.append(f"冲突字段 {len(batch.conflict_fields)}")
|
||||||
|
if batch.risk_notes:
|
||||||
|
parts.append(f"提示 {len(batch.risk_notes)}")
|
||||||
|
return " · ".join(parts)
|
||||||
375
review_agent/regulatory_info_package/workflow.py
Normal file
375
review_agent/regulatory_info_package/workflow.py
Normal file
@@ -0,0 +1,375 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from threading import Thread
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.file_summary.paths import resolve_storage_path
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
Message,
|
||||||
|
RegulatoryInfoPackageArtifact,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
RegulatoryInfoPackageNotificationRecord,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_info_package.constants import (
|
||||||
|
DEFAULT_ZIP_NAME,
|
||||||
|
REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS,
|
||||||
|
WORKFLOW_TYPE,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_info_package.events import record_event
|
||||||
|
from review_agent.regulatory_info_package.services.template_config import (
|
||||||
|
compute_config_hash,
|
||||||
|
load_template_config,
|
||||||
|
validate_template_config,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_info_package.services.field_extract import run_parallel_extract, save_field_extract_result
|
||||||
|
from review_agent.regulatory_info_package.services.field_merge import merge_fields, save_merged_fields
|
||||||
|
from review_agent.regulatory_info_package.services.instruction_extract import parse_instruction_docx, save_instruction_extract_json
|
||||||
|
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
|
||||||
|
from review_agent.regulatory_info_package.services.summary import build_assistant_summary
|
||||||
|
from review_agent.regulatory_info_package.services.traceability_export import save_traceability_exports
|
||||||
|
from review_agent.regulatory_info_package.services.zip_export import create_zip_package
|
||||||
|
from review_agent.regulatory_info_package.schemas import GeneratedFileResult, InstructionExtractResult, MergedField
|
||||||
|
from review_agent.regulatory_info_package.storage import build_batch_work_dir
|
||||||
|
from review_agent.regulatory_info_package.storage import create_artifact_for_file, ensure_batch_subdir
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.regulatory_info_package.workflow")
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_no() -> str:
|
||||||
|
return f"RIP-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def create_regulatory_info_package_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
source_attachment=None,
|
||||||
|
source_summary_batch=None,
|
||||||
|
source_summary_item_id: int | None = None,
|
||||||
|
source_file_name: str = "",
|
||||||
|
source_storage_path: str = "",
|
||||||
|
existing_batch: RegulatoryInfoPackageBatch | None = None,
|
||||||
|
) -> RegulatoryInfoPackageBatch:
|
||||||
|
batch = existing_batch
|
||||||
|
if batch is None:
|
||||||
|
batch_no = build_batch_no()
|
||||||
|
work_dir = build_batch_work_dir(batch_no=batch_no)
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger_message,
|
||||||
|
source_attachment=source_attachment,
|
||||||
|
source_summary_batch=source_summary_batch,
|
||||||
|
source_summary_item_id=source_summary_item_id,
|
||||||
|
source_file_name=source_file_name or getattr(source_attachment, "original_name", ""),
|
||||||
|
source_storage_path=source_storage_path or getattr(source_attachment, "storage_path", ""),
|
||||||
|
batch_no=batch_no,
|
||||||
|
output_zip_name=DEFAULT_ZIP_NAME,
|
||||||
|
work_dir=str(work_dir),
|
||||||
|
)
|
||||||
|
for code, name, group in REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS:
|
||||||
|
WorkflowNodeRun.objects.get_or_create(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_code=code,
|
||||||
|
defaults={
|
||||||
|
"node_group": group,
|
||||||
|
"node_name": name,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
class RegulatoryInfoPackageWorkflowExecutor:
|
||||||
|
"""Runs the Chapter 1 regulatory information package workflow."""
|
||||||
|
|
||||||
|
def __init__(self, batch: RegulatoryInfoPackageBatch):
|
||||||
|
self.batch = batch
|
||||||
|
self.template_config: dict = {}
|
||||||
|
self.instruction: InstructionExtractResult | None = None
|
||||||
|
self.extract_payload: dict = {}
|
||||||
|
self.merged_fields: dict[str, MergedField] = {}
|
||||||
|
self.merge_summary: dict[str, list[dict]] = {}
|
||||||
|
self.generation_results: list[GeneratedFileResult] = []
|
||||||
|
self.exports: list[ExportedSummaryFile] = []
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
logger.info("监管信息材料包工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
|
||||||
|
self.batch.status = RegulatoryInfoPackageBatch.Status.RUNNING
|
||||||
|
self.batch.started_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "started_at"])
|
||||||
|
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
|
||||||
|
try:
|
||||||
|
for node in self._nodes():
|
||||||
|
if node.status in {WorkflowNodeRun.Status.SUCCESS, WorkflowNodeRun.Status.SKIPPED}:
|
||||||
|
continue
|
||||||
|
self._run_node(node)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Regulatory info package workflow failed", extra={"batch_id": self.batch.pk})
|
||||||
|
self.batch.status = RegulatoryInfoPackageBatch.Status.FAILED
|
||||||
|
self.batch.error_message = str(exc)
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "error_message", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_failed", {"message": str(exc)})
|
||||||
|
return
|
||||||
|
self.batch.status = RegulatoryInfoPackageBatch.Status.SUCCESS
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "finished_at"])
|
||||||
|
self._append_completion_message()
|
||||||
|
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
|
||||||
|
|
||||||
|
def _nodes(self):
|
||||||
|
return WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
node.status = WorkflowNodeRun.Status.RUNNING
|
||||||
|
node.progress = 10
|
||||||
|
node.started_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}处理中"
|
||||||
|
node.save(update_fields=["status", "progress", "started_at", "message"])
|
||||||
|
record_event(self.batch, "node_progress", {"node_code": node.node_code, "status": node.status})
|
||||||
|
self._execute_node(node)
|
||||||
|
node.status = WorkflowNodeRun.Status.SUCCESS
|
||||||
|
node.progress = 100
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}完成"
|
||||||
|
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||||
|
record_event(self.batch, "node_progress", {"node_code": node.node_code, "status": node.status})
|
||||||
|
|
||||||
|
def _execute_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
if node.node_code == "prepare":
|
||||||
|
self.template_config = load_template_config()
|
||||||
|
errors = validate_template_config(self.template_config)
|
||||||
|
if errors:
|
||||||
|
raise ValueError(";".join(errors))
|
||||||
|
self.batch.template_config_version = str(self.template_config.get("version") or "")
|
||||||
|
self.batch.template_config_hash = compute_config_hash()
|
||||||
|
self.batch.save(update_fields=["template_config_version", "template_config_hash"])
|
||||||
|
return
|
||||||
|
if node.node_code == "template_copy":
|
||||||
|
return
|
||||||
|
if node.node_code == "text_extract":
|
||||||
|
if not self.batch.source_storage_path:
|
||||||
|
self.instruction = None
|
||||||
|
return
|
||||||
|
path = resolve_storage_path(self.batch.source_storage_path)
|
||||||
|
self.instruction = parse_instruction_docx(path)
|
||||||
|
json_path = ensure_batch_subdir(self.batch, "logs") / "instruction_extract.json"
|
||||||
|
save_instruction_extract_json(json_path, self.instruction)
|
||||||
|
create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=json_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.INSTRUCTION_EXTRACT,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.JSON,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "field_extract":
|
||||||
|
if not self.instruction:
|
||||||
|
self.extract_payload = {"regex_results": {}, "llm_results": {}, "llm_error": ""}
|
||||||
|
return
|
||||||
|
self.extract_payload = run_parallel_extract(self.instruction, llm_extract_func=lambda _instruction: {})
|
||||||
|
json_path = ensure_batch_subdir(self.batch, "logs") / "field_extract_result.json"
|
||||||
|
save_field_extract_result(json_path, self.extract_payload)
|
||||||
|
create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=json_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.FIELD_EXTRACT_RESULT,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.JSON,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "field_merge":
|
||||||
|
self.merged_fields, self.merge_summary = merge_fields(
|
||||||
|
self.extract_payload.get("regex_results") or {},
|
||||||
|
self.extract_payload.get("llm_results") or {},
|
||||||
|
)
|
||||||
|
product = self.merged_fields.get("product_name")
|
||||||
|
if product and product.value and product.value != "/":
|
||||||
|
self.batch.product_name = product.value
|
||||||
|
self.batch.missing_fields = self.merge_summary.get("missing_fields", [])
|
||||||
|
self.batch.llm_only_fields = self.merge_summary.get("llm_only_fields", [])
|
||||||
|
self.batch.conflict_fields = self.merge_summary.get("conflict_fields", [])
|
||||||
|
self.batch.save(update_fields=["product_name", "missing_fields", "llm_only_fields", "conflict_fields"])
|
||||||
|
json_path = ensure_batch_subdir(self.batch, "logs") / "merged_fields.json"
|
||||||
|
save_merged_fields(json_path, self.merged_fields, self.merge_summary)
|
||||||
|
create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=json_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.MERGED_FIELDS,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.JSON,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "generate_docs":
|
||||||
|
self.generation_results = generate_package_documents(self.batch, self.template_config, self.merged_fields)
|
||||||
|
generated_files = []
|
||||||
|
for result in self.generation_results:
|
||||||
|
if result.path:
|
||||||
|
artifact = create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=result.path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.GENERATED_DOCUMENT,
|
||||||
|
file_format=result.actual_format,
|
||||||
|
name=result.template_code,
|
||||||
|
metadata=result.__dict__,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
result.artifact_id = artifact.pk
|
||||||
|
if result.status in {"success", "fallback_success"}:
|
||||||
|
export = self._create_export(
|
||||||
|
path=result.path,
|
||||||
|
export_type=ExportedSummaryFile.ExportType.WORD,
|
||||||
|
export_category="generated_document",
|
||||||
|
)
|
||||||
|
result.export_id = export.pk
|
||||||
|
self.exports.append(export)
|
||||||
|
generated_files.append(result.__dict__)
|
||||||
|
self.batch.generated_files = generated_files
|
||||||
|
self.batch.save(update_fields=["generated_files"])
|
||||||
|
return
|
||||||
|
if node.node_code == "highlight_review_items":
|
||||||
|
return
|
||||||
|
if node.node_code == "trace_export":
|
||||||
|
excel_path, json_path = save_traceability_exports(self.batch.work_dir, self.merged_fields)
|
||||||
|
create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=json_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.TRACEABILITY,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.JSON,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
artifact = create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=excel_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.TRACEABILITY,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.EXCEL,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
export = self._create_export(
|
||||||
|
path=str(excel_path),
|
||||||
|
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||||
|
export_category="traceability",
|
||||||
|
)
|
||||||
|
self.exports.append(export)
|
||||||
|
artifact.metadata = {"export_id": export.pk}
|
||||||
|
artifact.save(update_fields=["metadata"])
|
||||||
|
return
|
||||||
|
if node.node_code == "zip_export":
|
||||||
|
zip_path = create_zip_package(self.batch.work_dir, self.generation_results, self.batch.output_zip_name)
|
||||||
|
artifact = create_artifact_for_file(
|
||||||
|
self.batch,
|
||||||
|
path=zip_path,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.ZIP_PACKAGE,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.ZIP,
|
||||||
|
created_by_node=node.node_code,
|
||||||
|
)
|
||||||
|
export = self._create_export(
|
||||||
|
path=str(zip_path),
|
||||||
|
export_type=ExportedSummaryFile.ExportType.ZIP,
|
||||||
|
export_category="regulatory_info_package",
|
||||||
|
)
|
||||||
|
self.exports.insert(0, export)
|
||||||
|
artifact.metadata = {"export_id": export.pk}
|
||||||
|
artifact.save(update_fields=["metadata"])
|
||||||
|
return
|
||||||
|
if node.node_code == "notify":
|
||||||
|
RegulatoryInfoPackageNotificationRecord.objects.create(
|
||||||
|
batch=self.batch,
|
||||||
|
recipient=self.batch.user,
|
||||||
|
export_ids=[export.pk for export in self.exports],
|
||||||
|
message_summary=build_assistant_summary(
|
||||||
|
batch_no=self.batch.batch_no,
|
||||||
|
exports=[
|
||||||
|
{
|
||||||
|
"file_name": export.file_name,
|
||||||
|
"download_url": f"/api/review-agent/file-summary/exports/{export.pk}/download/",
|
||||||
|
"export_type": export.export_type,
|
||||||
|
}
|
||||||
|
for export in self.exports
|
||||||
|
],
|
||||||
|
failed_files=[item for item in self.batch.generated_files if item.get("status") == "failed"],
|
||||||
|
),
|
||||||
|
send_status=RegulatoryInfoPackageNotificationRecord.SendStatus.SUCCESS,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
def _append_completion_message(self) -> None:
|
||||||
|
if (
|
||||||
|
Message.objects.filter(
|
||||||
|
conversation=self.batch.conversation,
|
||||||
|
role=Message.Role.ASSISTANT,
|
||||||
|
content__contains=self.batch.batch_no,
|
||||||
|
)
|
||||||
|
.filter(content__contains=self.batch.output_zip_name)
|
||||||
|
.exists()
|
||||||
|
):
|
||||||
|
return
|
||||||
|
exports = list(
|
||||||
|
ExportedSummaryFile.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
exports = sorted(exports, key=lambda export: 0 if export.export_type == ExportedSummaryFile.ExportType.ZIP else 1)
|
||||||
|
content = build_assistant_summary(
|
||||||
|
batch_no=self.batch.batch_no,
|
||||||
|
exports=[
|
||||||
|
{
|
||||||
|
"file_name": export.file_name,
|
||||||
|
"download_url": f"/api/review-agent/file-summary/exports/{export.pk}/download/",
|
||||||
|
"export_type": export.export_type,
|
||||||
|
}
|
||||||
|
for export in exports
|
||||||
|
],
|
||||||
|
failed_files=[item for item in self.batch.generated_files if item.get("status") == "failed"],
|
||||||
|
)
|
||||||
|
Message.objects.create(
|
||||||
|
conversation=self.batch.conversation,
|
||||||
|
role=Message.Role.ASSISTANT,
|
||||||
|
content=content,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _create_export(self, *, path: str, export_type: str, export_category: str) -> ExportedSummaryFile:
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
resolved = Path(path)
|
||||||
|
return ExportedSummaryFile.objects.create(
|
||||||
|
batch=None,
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
export_category=export_category,
|
||||||
|
export_type=export_type,
|
||||||
|
file_name=resolved.name,
|
||||||
|
storage_path=str(resolved),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def start_regulatory_info_package_workflow(
|
||||||
|
batch: RegulatoryInfoPackageBatch,
|
||||||
|
*,
|
||||||
|
async_run: bool | None = None,
|
||||||
|
) -> None:
|
||||||
|
if async_run is None:
|
||||||
|
async_run = getattr(settings, "REGULATORY_INFO_PACKAGE_ASYNC", True)
|
||||||
|
executor = RegulatoryInfoPackageWorkflowExecutor(batch)
|
||||||
|
if async_run:
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
|
else:
|
||||||
|
executor.run()
|
||||||
@@ -23,6 +23,8 @@ from .rag_embedding import EmbeddingFunction
|
|||||||
|
|
||||||
logger = logging.getLogger("review_agent.regulatory_review.rag_index")
|
logger = logging.getLogger("review_agent.regulatory_review.rag_index")
|
||||||
|
|
||||||
|
EXCLUDED_SOURCE_KEYWORDS = ("模拟题二", "试剂盒临床注册文件准备与审核Agent")
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class TextChunk:
|
class TextChunk:
|
||||||
@@ -227,6 +229,8 @@ def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
|||||||
for path in sorted(source_dir.rglob("*")):
|
for path in sorted(source_dir.rglob("*")):
|
||||||
if not path.is_file():
|
if not path.is_file():
|
||||||
continue
|
continue
|
||||||
|
if is_excluded_source_path(path.relative_to(source_dir)):
|
||||||
|
continue
|
||||||
try:
|
try:
|
||||||
text = extract_text_from_path(path)
|
text = extract_text_from_path(path)
|
||||||
except RuntimeError as exc:
|
except RuntimeError as exc:
|
||||||
@@ -238,6 +242,11 @@ def collect_source_chunks(source_dir: Path) -> list[TextChunk]:
|
|||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def is_excluded_source_path(path: Path | str) -> bool:
|
||||||
|
normalized = str(path)
|
||||||
|
return any(keyword in normalized for keyword in EXCLUDED_SOURCE_KEYWORDS)
|
||||||
|
|
||||||
|
|
||||||
def _is_attachment4(path: Path) -> bool:
|
def _is_attachment4(path: Path) -> bool:
|
||||||
normalized = path.name.replace(" ", "")
|
normalized = path.name.replace(" ", "")
|
||||||
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
|
return "附件4" in normalized and "体外诊断试剂注册申报资料要求及说明" in normalized
|
||||||
@@ -249,6 +258,7 @@ def build_chroma_index(
|
|||||||
embedding_provider: EmbeddingFunction,
|
embedding_provider: EmbeddingFunction,
|
||||||
persist_path: Path | None = None,
|
persist_path: Path | None = None,
|
||||||
collection_name: str | None = None,
|
collection_name: str | None = None,
|
||||||
|
reset: bool = False,
|
||||||
) -> int:
|
) -> int:
|
||||||
try:
|
try:
|
||||||
import chromadb
|
import chromadb
|
||||||
@@ -259,7 +269,22 @@ def build_chroma_index(
|
|||||||
collection_name = collection_name or settings.REGULATORY_RAG_COLLECTION
|
collection_name = collection_name or settings.REGULATORY_RAG_COLLECTION
|
||||||
persist_path.mkdir(parents=True, exist_ok=True)
|
persist_path.mkdir(parents=True, exist_ok=True)
|
||||||
chunks = collect_source_chunks(source_dir)
|
chunks = collect_source_chunks(source_dir)
|
||||||
|
try:
|
||||||
client = chromadb.PersistentClient(path=str(persist_path))
|
client = chromadb.PersistentClient(path=str(persist_path))
|
||||||
|
except Exception:
|
||||||
|
if not reset:
|
||||||
|
raise
|
||||||
|
clear_chroma_system_cache()
|
||||||
|
clear_chroma_index_dir(persist_path)
|
||||||
|
persist_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
client = chromadb.PersistentClient(path=str(persist_path))
|
||||||
|
if reset:
|
||||||
|
try:
|
||||||
|
client.delete_collection(collection_name)
|
||||||
|
clear_chroma_system_cache()
|
||||||
|
client = chromadb.PersistentClient(path=str(persist_path))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
collection = client.get_or_create_collection(collection_name)
|
collection = client.get_or_create_collection(collection_name)
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return 0
|
return 0
|
||||||
@@ -276,3 +301,22 @@ def build_chroma_index(
|
|||||||
embeddings=embeddings,
|
embeddings=embeddings,
|
||||||
)
|
)
|
||||||
return len(chunks)
|
return len(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_chroma_index_dir(persist_path: Path | str | None = None) -> None:
|
||||||
|
chroma_path = Path(persist_path or settings.REGULATORY_RAG_CHROMA_PATH).resolve()
|
||||||
|
media_root = Path(settings.MEDIA_ROOT).resolve()
|
||||||
|
try:
|
||||||
|
chroma_path.relative_to(media_root)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise RuntimeError("法规 RAG 索引目录必须位于 MEDIA_ROOT 内。") from exc
|
||||||
|
if chroma_path.exists():
|
||||||
|
shutil.rmtree(chroma_path)
|
||||||
|
|
||||||
|
|
||||||
|
def clear_chroma_system_cache() -> None:
|
||||||
|
try:
|
||||||
|
from chromadb.api.shared_system_client import SharedSystemClient
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
SharedSystemClient.clear_system_cache()
|
||||||
|
|||||||
@@ -19,6 +19,12 @@ from .application_form_fill.workflow import (
|
|||||||
find_latest_successful_summary_batch as find_latest_successful_form_fill_summary_batch,
|
find_latest_successful_summary_batch as find_latest_successful_form_fill_summary_batch,
|
||||||
start_application_form_fill_workflow,
|
start_application_form_fill_workflow,
|
||||||
)
|
)
|
||||||
|
from .regulatory_info_package.constants import WORKFLOW_TYPE as REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE
|
||||||
|
from .regulatory_info_package.services.input_select import select_instruction_input
|
||||||
|
from .regulatory_info_package.workflow import (
|
||||||
|
create_regulatory_info_package_batch,
|
||||||
|
start_regulatory_info_package_workflow,
|
||||||
|
)
|
||||||
from .regulatory_review.workflow import (
|
from .regulatory_review.workflow import (
|
||||||
create_regulatory_review_batch,
|
create_regulatory_review_batch,
|
||||||
find_latest_successful_summary_batch,
|
find_latest_successful_summary_batch,
|
||||||
@@ -108,6 +114,9 @@ def send_message(conversation: Conversation, content: str) -> tuple[Message, Mes
|
|||||||
|
|
||||||
user_message = append_user_message(conversation, content)
|
user_message = append_user_message(conversation, content)
|
||||||
knowledge_context = build_knowledge_context(content)
|
knowledge_context = build_knowledge_context(content)
|
||||||
|
if should_refuse_ungrounded_chat(conversation, content, knowledge_context):
|
||||||
|
reply_content = out_of_scope_reply()
|
||||||
|
else:
|
||||||
try:
|
try:
|
||||||
reply_content = generate_reply(conversation, content, knowledge_context=knowledge_context)
|
reply_content = generate_reply(conversation, content, knowledge_context=knowledge_context)
|
||||||
except (LLMConfigurationError, LLMRequestError) as exc:
|
except (LLMConfigurationError, LLMRequestError) as exc:
|
||||||
@@ -127,6 +136,31 @@ def stream_message(conversation: Conversation, content: str):
|
|||||||
|
|
||||||
user_message = append_user_message(conversation, content)
|
user_message = append_user_message(conversation, content)
|
||||||
assistant_parts: list[str] = []
|
assistant_parts: list[str] = []
|
||||||
|
knowledge_context = build_knowledge_context(content)
|
||||||
|
|
||||||
|
if should_refuse_ungrounded_chat(conversation, content, knowledge_context):
|
||||||
|
reply_content = out_of_scope_reply()
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event(
|
||||||
|
"meta",
|
||||||
|
{
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title or build_conversation_title(content),
|
||||||
|
"user_message_id": user_message.pk,
|
||||||
|
"user_message": user_message.content,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
route = route_message_intent(conversation, content)
|
route = route_message_intent(conversation, content)
|
||||||
logger.info(
|
logger.info(
|
||||||
"Stream message started",
|
"Stream message started",
|
||||||
@@ -314,6 +348,56 @@ def stream_message(conversation: Conversation, content: str):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if route.starts_regulatory_info_package:
|
||||||
|
selection = select_instruction_input(conversation, content)
|
||||||
|
if selection.status != "selected":
|
||||||
|
reply_content = selection.message or "请先在当前对话右侧上传产品说明书 docx 文件,然后再发送第1章监管信息生成指令。"
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
batch = create_regulatory_info_package_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=conversation.user,
|
||||||
|
trigger_message=user_message,
|
||||||
|
source_attachment=selection.attachment,
|
||||||
|
source_summary_batch=selection.source_summary_batch,
|
||||||
|
source_summary_item_id=selection.source_summary_item_id,
|
||||||
|
source_file_name=selection.file_name,
|
||||||
|
source_storage_path=selection.storage_path,
|
||||||
|
)
|
||||||
|
start_regulatory_info_package_workflow(
|
||||||
|
batch,
|
||||||
|
async_run=getattr(settings, "REGULATORY_INFO_PACKAGE_ASYNC", True),
|
||||||
|
)
|
||||||
|
reply_content = f"已启动第1章监管信息材料包生成工作流,批次号:{batch.batch_no}。"
|
||||||
|
assistant_message = append_assistant_message(conversation, reply_content)
|
||||||
|
yield sse_event(
|
||||||
|
"workflow_started",
|
||||||
|
{
|
||||||
|
"workflow_type": REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
"batch_id": batch.pk,
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
yield sse_event("chunk", {"delta": reply_content})
|
||||||
|
yield sse_event(
|
||||||
|
"done",
|
||||||
|
{
|
||||||
|
"assistant_message_id": assistant_message.pk,
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"title": conversation.title,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
if route.starts_regulatory_review:
|
if route.starts_regulatory_review:
|
||||||
source_summary_batch = find_latest_successful_summary_batch(conversation)
|
source_summary_batch = find_latest_successful_summary_batch(conversation)
|
||||||
if not source_summary_batch:
|
if not source_summary_batch:
|
||||||
@@ -395,7 +479,6 @@ def stream_message(conversation: Conversation, content: str):
|
|||||||
|
|
||||||
stream_failed = False
|
stream_failed = False
|
||||||
stream_error = ""
|
stream_error = ""
|
||||||
knowledge_context = build_knowledge_context(content)
|
|
||||||
try:
|
try:
|
||||||
for chunk in stream_reply(conversation, content, knowledge_context=knowledge_context):
|
for chunk in stream_reply(conversation, content, knowledge_context=knowledge_context):
|
||||||
assistant_parts.append(chunk)
|
assistant_parts.append(chunk)
|
||||||
@@ -497,6 +580,76 @@ def build_knowledge_context(content: str, *, n_results: int = 5) -> str:
|
|||||||
return "\n\n".join(lines)
|
return "\n\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def should_refuse_ungrounded_chat(
|
||||||
|
conversation: Conversation,
|
||||||
|
content: str,
|
||||||
|
knowledge_context: str = "",
|
||||||
|
) -> bool:
|
||||||
|
if (knowledge_context or "").strip():
|
||||||
|
return False
|
||||||
|
if _is_business_related_question(content):
|
||||||
|
return False
|
||||||
|
if _has_active_attachments(conversation):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def out_of_scope_reply() -> str:
|
||||||
|
return (
|
||||||
|
"没有在当前启用的知识库材料中找到可依据的内容,且这个问题与当前主营业务无关。"
|
||||||
|
"为避免编造,我不能直接回答。请先上传或启用相关知识库材料,或改问体外诊断试剂注册资料审核、"
|
||||||
|
"文件汇总、法规核查、申报填表等业务范围内的问题。"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_business_related_question(content: str) -> bool:
|
||||||
|
normalized = (content or "").lower()
|
||||||
|
compact = "".join(normalized.split())
|
||||||
|
if not compact:
|
||||||
|
return True
|
||||||
|
business_keywords = [
|
||||||
|
"审核智能体",
|
||||||
|
"体外诊断",
|
||||||
|
"ivd",
|
||||||
|
"nmpa",
|
||||||
|
"cmde",
|
||||||
|
"医疗器械",
|
||||||
|
"注册资料",
|
||||||
|
"注册申报",
|
||||||
|
"注册检验",
|
||||||
|
"注册证",
|
||||||
|
"申报资料",
|
||||||
|
"申报文件",
|
||||||
|
"法规",
|
||||||
|
"核查",
|
||||||
|
"审评",
|
||||||
|
"审核",
|
||||||
|
"整改",
|
||||||
|
"风险",
|
||||||
|
"说明书",
|
||||||
|
"临床",
|
||||||
|
"性能",
|
||||||
|
"安全",
|
||||||
|
"适用范围",
|
||||||
|
"预期用途",
|
||||||
|
"附件",
|
||||||
|
"文件",
|
||||||
|
"压缩包",
|
||||||
|
"目录",
|
||||||
|
"页数",
|
||||||
|
"清单",
|
||||||
|
"汇总",
|
||||||
|
"模板",
|
||||||
|
"填表",
|
||||||
|
"知识库",
|
||||||
|
"检索",
|
||||||
|
"报告",
|
||||||
|
"材料",
|
||||||
|
"资料",
|
||||||
|
]
|
||||||
|
return any(keyword in compact for keyword in business_keywords)
|
||||||
|
|
||||||
|
|
||||||
def build_filename_matched_document_context(query: str, *, max_chars: int = 12000) -> str:
|
def build_filename_matched_document_context(query: str, *, max_chars: int = 12000) -> str:
|
||||||
terms = _knowledge_query_terms(query)
|
terms = _knowledge_query_terms(query)
|
||||||
if not terms:
|
if not terms:
|
||||||
|
|||||||
@@ -11,6 +11,10 @@ from .file_summary.workflow_trigger import (
|
|||||||
from .application_form_fill.constants import FORM_FILL_TRIGGER_KEYWORDS, WORKFLOW_TYPE as FORM_FILL_WORKFLOW_TYPE
|
from .application_form_fill.constants import FORM_FILL_TRIGGER_KEYWORDS, WORKFLOW_TYPE as FORM_FILL_WORKFLOW_TYPE
|
||||||
from .llm import LLMConfigurationError, LLMRequestError, generate_completion
|
from .llm import LLMConfigurationError, LLMRequestError, generate_completion
|
||||||
from .models import Conversation, FileAttachment
|
from .models import Conversation, FileAttachment
|
||||||
|
from .regulatory_info_package.constants import (
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS,
|
||||||
|
WORKFLOW_TYPE as REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -18,6 +22,7 @@ logger = logging.getLogger(__name__)
|
|||||||
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
|
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
|
||||||
ROUTE_ACTIONS.add("regulatory_review")
|
ROUTE_ACTIONS.add("regulatory_review")
|
||||||
ROUTE_ACTIONS.add(FORM_FILL_WORKFLOW_TYPE)
|
ROUTE_ACTIONS.add(FORM_FILL_WORKFLOW_TYPE)
|
||||||
|
ROUTE_ACTIONS.add(REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@@ -45,6 +50,10 @@ class SkillRoute:
|
|||||||
def starts_application_form_fill(self) -> bool:
|
def starts_application_form_fill(self) -> bool:
|
||||||
return self.action == FORM_FILL_WORKFLOW_TYPE
|
return self.action == FORM_FILL_WORKFLOW_TYPE
|
||||||
|
|
||||||
|
@property
|
||||||
|
def starts_regulatory_info_package(self) -> bool:
|
||||||
|
return self.action == REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_normal_chat(self) -> bool:
|
def is_normal_chat(self) -> bool:
|
||||||
return self.action == "normal_chat"
|
return self.action == "normal_chat"
|
||||||
@@ -80,6 +89,14 @@ def route_message_intent(conversation: Conversation, content: str) -> SkillRoute
|
|||||||
|
|
||||||
|
|
||||||
def _deterministic_workflow_route(conversation: Conversation, content: str) -> SkillRoute | None:
|
def _deterministic_workflow_route(conversation: Conversation, content: str) -> SkillRoute | None:
|
||||||
|
if _matches_regulatory_info_package(content):
|
||||||
|
return SkillRoute(
|
||||||
|
action=REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
workflow_type=REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
confidence=0.9,
|
||||||
|
reason="命中明确第1章监管信息材料包生成关键词。",
|
||||||
|
source="rule_preflight",
|
||||||
|
)
|
||||||
if _matches_application_form_fill(content):
|
if _matches_application_form_fill(content):
|
||||||
return SkillRoute(
|
return SkillRoute(
|
||||||
action=FORM_FILL_WORKFLOW_TYPE,
|
action=FORM_FILL_WORKFLOW_TYPE,
|
||||||
@@ -144,7 +161,9 @@ def _route_with_llm(
|
|||||||
return SkillRoute(
|
return SkillRoute(
|
||||||
action=action,
|
action=action,
|
||||||
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
||||||
workflow_type=action if action in {"file_summary", "regulatory_review", FORM_FILL_WORKFLOW_TYPE} else "",
|
workflow_type=action
|
||||||
|
if action in {"file_summary", "regulatory_review", FORM_FILL_WORKFLOW_TYPE, REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE}
|
||||||
|
else "",
|
||||||
confidence=_float_or_zero(payload.get("confidence")),
|
confidence=_float_or_zero(payload.get("confidence")),
|
||||||
reason=str(payload.get("reason") or ""),
|
reason=str(payload.get("reason") or ""),
|
||||||
source="llm",
|
source="llm",
|
||||||
@@ -152,6 +171,15 @@ def _route_with_llm(
|
|||||||
|
|
||||||
|
|
||||||
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
|
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
|
||||||
|
if _matches_regulatory_info_package(content):
|
||||||
|
return SkillRoute(
|
||||||
|
action=REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
workflow_type=REGULATORY_INFO_PACKAGE_WORKFLOW_TYPE,
|
||||||
|
confidence=0.7,
|
||||||
|
reason="命中第1章监管信息材料包生成关键词。",
|
||||||
|
source="rule_fallback",
|
||||||
|
)
|
||||||
|
|
||||||
if _matches_application_form_fill(content):
|
if _matches_application_form_fill(content):
|
||||||
return SkillRoute(
|
return SkillRoute(
|
||||||
action=FORM_FILL_WORKFLOW_TYPE,
|
action=FORM_FILL_WORKFLOW_TYPE,
|
||||||
@@ -210,11 +238,12 @@ def _router_system_prompt() -> str:
|
|||||||
return (
|
return (
|
||||||
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
|
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
|
||||||
"你必须只输出 JSON 对象,不要输出 Markdown。"
|
"你必须只输出 JSON 对象,不要输出 Markdown。"
|
||||||
"可选 action:normal_chat、attachment_reader、file_summary、regulatory_review、application_form_fill。"
|
"可选 action:normal_chat、attachment_reader、file_summary、regulatory_review、application_form_fill、regulatory_info_package。"
|
||||||
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
|
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
|
||||||
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
|
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
|
||||||
"regulatory_review 用于用户要求法规核查、NMPA核查、完整性核查、章节一致性核查、风险预警或整改建议。"
|
"regulatory_review 用于用户要求法规核查、NMPA核查、完整性核查、章节一致性核查、风险预警或整改建议。"
|
||||||
"application_form_fill 用于用户要求填注册证、生成申报模板、填写对应表格、安全和性能基本原则清单或自动填表。"
|
"application_form_fill 用于用户要求填注册证、生成申报模板、填写对应表格、安全和性能基本原则清单或自动填表。"
|
||||||
|
"regulatory_info_package 用于用户要求根据说明书生成第1章监管信息、监管信息材料包、申请表、产品列表或声明材料包。"
|
||||||
"normal_chat 用于不需要读取附件或执行工作流的一般问答。"
|
"normal_chat 用于不需要读取附件或执行工作流的一般问答。"
|
||||||
"输出字段:action、confidence、reason。"
|
"输出字段:action、confidence、reason。"
|
||||||
)
|
)
|
||||||
@@ -268,6 +297,11 @@ def _matches_regulatory_review(content: str) -> bool:
|
|||||||
return any(keyword in normalized for keyword in keywords)
|
return any(keyword in normalized for keyword in keywords)
|
||||||
|
|
||||||
|
|
||||||
|
def _matches_regulatory_info_package(content: str) -> bool:
|
||||||
|
normalized = "".join((content or "").lower().split())
|
||||||
|
return any("".join(keyword.lower().split()) in normalized for keyword in REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS)
|
||||||
|
|
||||||
|
|
||||||
def _matches_application_form_fill(content: str) -> bool:
|
def _matches_application_form_fill(content: str) -> bool:
|
||||||
normalized = content.lower()
|
normalized = content.lower()
|
||||||
return any(keyword.lower() in normalized for keyword in FORM_FILL_TRIGGER_KEYWORDS)
|
return any(keyword.lower() in normalized for keyword in FORM_FILL_TRIGGER_KEYWORDS)
|
||||||
|
|||||||
@@ -21,10 +21,15 @@ from .application_form_fill.views import (
|
|||||||
batch_status as application_form_fill_batch_status,
|
batch_status as application_form_fill_batch_status,
|
||||||
start as application_form_fill_start,
|
start as application_form_fill_start,
|
||||||
)
|
)
|
||||||
|
from .regulatory_info_package.views import (
|
||||||
|
batch_status as regulatory_info_package_batch_status,
|
||||||
|
start as regulatory_info_package_start,
|
||||||
|
)
|
||||||
from .views import (
|
from .views import (
|
||||||
knowledge_base_document_detail,
|
knowledge_base_document_detail,
|
||||||
knowledge_base_document_index,
|
knowledge_base_document_index,
|
||||||
knowledge_base_documents,
|
knowledge_base_documents,
|
||||||
|
knowledge_base_rebuild_index,
|
||||||
knowledge_base_search,
|
knowledge_base_search,
|
||||||
knowledge_base_status,
|
knowledge_base_status,
|
||||||
)
|
)
|
||||||
@@ -111,6 +116,16 @@ urlpatterns = [
|
|||||||
application_form_fill_batch_status,
|
application_form_fill_batch_status,
|
||||||
name="application_form_fill_batch_status",
|
name="application_form_fill_batch_status",
|
||||||
),
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-info-package/start/",
|
||||||
|
regulatory_info_package_start,
|
||||||
|
name="regulatory_info_package_start",
|
||||||
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/regulatory-info-package/<int:batch_id>/status/",
|
||||||
|
regulatory_info_package_batch_status,
|
||||||
|
name="regulatory_info_package_batch_status",
|
||||||
|
),
|
||||||
path(
|
path(
|
||||||
"api/review-agent/knowledge-base/status/",
|
"api/review-agent/knowledge-base/status/",
|
||||||
knowledge_base_status,
|
knowledge_base_status,
|
||||||
@@ -121,6 +136,11 @@ urlpatterns = [
|
|||||||
knowledge_base_search,
|
knowledge_base_search,
|
||||||
name="knowledge_base_search",
|
name="knowledge_base_search",
|
||||||
),
|
),
|
||||||
|
path(
|
||||||
|
"api/review-agent/knowledge-base/rebuild-index/",
|
||||||
|
knowledge_base_rebuild_index,
|
||||||
|
name="knowledge_base_rebuild_index",
|
||||||
|
),
|
||||||
path(
|
path(
|
||||||
"api/review-agent/knowledge-base/documents/",
|
"api/review-agent/knowledge-base/documents/",
|
||||||
knowledge_base_documents,
|
knowledge_base_documents,
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
from django.contrib.auth.decorators import login_required
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.conf import settings
|
||||||
from django.db.models import Count, Q, Sum
|
from django.db.models import Count, Q, Sum
|
||||||
import json
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
|
from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
|
||||||
from django.shortcuts import redirect, render
|
from django.shortcuts import redirect, render
|
||||||
@@ -14,7 +16,15 @@ from .services import (
|
|||||||
send_message,
|
send_message,
|
||||||
stream_message,
|
stream_message,
|
||||||
)
|
)
|
||||||
from .models import ApplicationFormFillBatch, Conversation, FileAttachment, FileSummaryBatch, RegulatoryReviewBatch, WorkflowNodeRun
|
from .models import (
|
||||||
|
ApplicationFormFillBatch,
|
||||||
|
Conversation,
|
||||||
|
FileAttachment,
|
||||||
|
FileSummaryBatch,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
from .knowledge_base import build_knowledge_base_context, search_knowledge_base
|
from .knowledge_base import build_knowledge_base_context, search_knowledge_base
|
||||||
from .knowledge_base import (
|
from .knowledge_base import (
|
||||||
build_knowledge_base_context_for_user,
|
build_knowledge_base_context_for_user,
|
||||||
@@ -27,6 +37,9 @@ from .knowledge_base import (
|
|||||||
)
|
)
|
||||||
from .models import KnowledgeBaseDocument
|
from .models import KnowledgeBaseDocument
|
||||||
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
from .regulatory_review.services.info_extract import ensure_regulatory_condition_candidates
|
||||||
|
from .regulatory_review.services.rag_embedding import get_embedding_provider
|
||||||
|
from .regulatory_review.services.rag_index import build_chroma_index
|
||||||
|
from .regulatory_review.services.rule_loader import load_rule_file
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@@ -151,6 +164,24 @@ def knowledge_base_status(request: HttpRequest) -> JsonResponse:
|
|||||||
return JsonResponse(build_knowledge_base_context_for_user(request.user))
|
return JsonResponse(build_knowledge_base_context_for_user(request.user))
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
def knowledge_base_rebuild_index(request: HttpRequest) -> JsonResponse:
|
||||||
|
payload = rebuild_knowledge_base_index()
|
||||||
|
return JsonResponse({"knowledge_base": build_knowledge_base_context_for_user(request.user), **payload})
|
||||||
|
|
||||||
|
|
||||||
|
def rebuild_knowledge_base_index() -> dict[str, object]:
|
||||||
|
rule_set = load_rule_file()
|
||||||
|
source_dir = Path(settings.BASE_DIR) / rule_set["source_material_dir"]
|
||||||
|
chunk_count = build_chroma_index(
|
||||||
|
source_dir=source_dir,
|
||||||
|
embedding_provider=get_embedding_provider(),
|
||||||
|
reset=True,
|
||||||
|
)
|
||||||
|
return {"chunk_count": chunk_count}
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@require_http_methods(["POST"])
|
@require_http_methods(["POST"])
|
||||||
def knowledge_base_search(request: HttpRequest) -> JsonResponse:
|
def knowledge_base_search(request: HttpRequest) -> JsonResponse:
|
||||||
@@ -306,6 +337,25 @@ def build_workflow_cards(conversation: Conversation) -> list[dict[str, object]]:
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
rip_batches = RegulatoryInfoPackageBatch.objects.filter(conversation=conversation, is_deleted=False)
|
||||||
|
for batch in rip_batches:
|
||||||
|
cards.append(
|
||||||
|
{
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "regulatory_info_package",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
"risk_label": _format_regulatory_info_package_label(batch),
|
||||||
|
"created_at": batch.created_at,
|
||||||
|
"nodes": list(
|
||||||
|
WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
return sorted(cards, key=lambda item: item["created_at"], reverse=True)[:5]
|
return sorted(cards, key=lambda item: item["created_at"], reverse=True)[:5]
|
||||||
|
|
||||||
|
|
||||||
@@ -351,6 +401,20 @@ def _format_form_fill_label(batch: ApplicationFormFillBatch) -> str:
|
|||||||
return " · ".join(parts)
|
return " · ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_regulatory_info_package_label(batch: RegulatoryInfoPackageBatch) -> str:
|
||||||
|
parts = []
|
||||||
|
if batch.product_name:
|
||||||
|
parts.append(batch.product_name)
|
||||||
|
if batch.generated_files:
|
||||||
|
success_count = sum(1 for item in batch.generated_files if item.get("status") in {"success", "fallback_success"})
|
||||||
|
parts.append(f"生成 {success_count}/7")
|
||||||
|
if batch.missing_fields:
|
||||||
|
parts.append(f"缺失 {len(batch.missing_fields)}")
|
||||||
|
if batch.conflict_fields:
|
||||||
|
parts.append(f"冲突 {len(batch.conflict_fields)}")
|
||||||
|
return " · ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def build_home_dashboard_context(user) -> dict[str, object]:
|
def build_home_dashboard_context(user) -> dict[str, object]:
|
||||||
conversations = Conversation.objects.filter(user=user)
|
conversations = Conversation.objects.filter(user=user)
|
||||||
active_attachments = FileAttachment.objects.filter(user=user).exclude(
|
active_attachments = FileAttachment.objects.filter(user=user).exclude(
|
||||||
|
|||||||
@@ -517,6 +517,8 @@
|
|||||||
attributeName = "data-regulatory-status-url-template";
|
attributeName = "data-regulatory-status-url-template";
|
||||||
} else if (workflow_type === "application_form_fill") {
|
} else if (workflow_type === "application_form_fill") {
|
||||||
attributeName = "data-application-form-fill-status-url-template";
|
attributeName = "data-application-form-fill-status-url-template";
|
||||||
|
} else if (workflow_type === "regulatory_info_package") {
|
||||||
|
attributeName = "data-regulatory-info-package-status-url-template";
|
||||||
}
|
}
|
||||||
return templateUrl(attributeName, "__batch_id__", batchId);
|
return templateUrl(attributeName, "__batch_id__", batchId);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,8 @@
|
|||||||
var sourceTable = document.getElementById("knowledgeSourceTable");
|
var sourceTable = document.getElementById("knowledgeSourceTable");
|
||||||
var documentFileInput = document.getElementById("knowledgeDocumentFile");
|
var documentFileInput = document.getElementById("knowledgeDocumentFile");
|
||||||
var uploadDropzone = document.getElementById("knowledgeUploadDropzone");
|
var uploadDropzone = document.getElementById("knowledgeUploadDropzone");
|
||||||
|
var rebuildButton = document.getElementById("knowledgeRebuildIndexButton");
|
||||||
|
var rebuildStatus = document.getElementById("knowledgeRebuildStatus");
|
||||||
|
|
||||||
function csrfToken() {
|
function csrfToken() {
|
||||||
var cookie = document.cookie.split("; ").find(function (item) {
|
var cookie = document.cookie.split("; ").find(function (item) {
|
||||||
@@ -68,6 +70,17 @@
|
|||||||
return response.json();
|
return response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function rebuildIndex() {
|
||||||
|
var response = await fetch(page.getAttribute("data-rebuild-url"), {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "X-CSRFToken": csrfToken() },
|
||||||
|
});
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error("法规索引重建失败。");
|
||||||
|
}
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
function renderResults(payload) {
|
function renderResults(payload) {
|
||||||
if (!results) {
|
if (!results) {
|
||||||
return;
|
return;
|
||||||
@@ -196,6 +209,59 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function handleRebuild(trigger) {
|
||||||
|
if (!page.getAttribute("data-rebuild-url")) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var originalText = trigger ? trigger.textContent : "";
|
||||||
|
if (trigger) {
|
||||||
|
trigger.disabled = true;
|
||||||
|
trigger.textContent = "入库中";
|
||||||
|
}
|
||||||
|
if (rebuildButton && trigger !== rebuildButton) {
|
||||||
|
rebuildButton.disabled = true;
|
||||||
|
}
|
||||||
|
if (rebuildStatus) {
|
||||||
|
rebuildStatus.textContent = "正在重建法规 RAG 索引...";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
var payload = await rebuildIndex();
|
||||||
|
if (rebuildStatus) {
|
||||||
|
rebuildStatus.textContent = "重建完成,入库片段 " + (payload.chunk_count || 0) + " 个。";
|
||||||
|
}
|
||||||
|
window.setTimeout(function () {
|
||||||
|
window.location.reload();
|
||||||
|
}, 600);
|
||||||
|
} catch (error) {
|
||||||
|
if (rebuildStatus) {
|
||||||
|
rebuildStatus.textContent = error.message || "法规索引重建失败。";
|
||||||
|
}
|
||||||
|
if (trigger) {
|
||||||
|
trigger.disabled = false;
|
||||||
|
trigger.textContent = originalText;
|
||||||
|
}
|
||||||
|
if (rebuildButton) {
|
||||||
|
rebuildButton.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rebuildButton) {
|
||||||
|
rebuildButton.addEventListener("click", function () {
|
||||||
|
handleRebuild(rebuildButton);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sourceTable) {
|
||||||
|
sourceTable.addEventListener("click", function (event) {
|
||||||
|
var button = event.target.closest("[data-source-action='index']");
|
||||||
|
if (!button) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
handleRebuild(button);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (searchForm && queryInput) {
|
if (searchForm && queryInput) {
|
||||||
searchForm.addEventListener("submit", async function (event) {
|
searchForm.addEventListener("submit", async function (event) {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
|
|||||||
@@ -225,6 +225,11 @@
|
|||||||
type="button"
|
type="button"
|
||||||
data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板"
|
data-prompt-template="请基于当前对话最近成功汇总的产品资料,自动提取产品关键信息并填入申报文件模板"
|
||||||
>申报文件填表</button>
|
>申报文件填表</button>
|
||||||
|
<button
|
||||||
|
class="tool-chip"
|
||||||
|
type="button"
|
||||||
|
data-prompt-template="根据说明书生成第1章监管信息"
|
||||||
|
>第1章监管信息</button>
|
||||||
</div>
|
</div>
|
||||||
<button class="send-button" type="submit" id="sendButton">发送</button>
|
<button class="send-button" type="submit" id="sendButton">发送</button>
|
||||||
</div>
|
</div>
|
||||||
@@ -241,6 +246,7 @@
|
|||||||
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
|
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
|
||||||
data-regulatory-status-url-template="/api/review-agent/regulatory-review/__batch_id__/status/"
|
data-regulatory-status-url-template="/api/review-agent/regulatory-review/__batch_id__/status/"
|
||||||
data-application-form-fill-status-url-template="/api/review-agent/application-form-fill/__batch_id__/status/"
|
data-application-form-fill-status-url-template="/api/review-agent/application-form-fill/__batch_id__/status/"
|
||||||
|
data-regulatory-info-package-status-url-template="/api/review-agent/regulatory-info-package/__batch_id__/status/"
|
||||||
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
|
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
|
||||||
>
|
>
|
||||||
<section class="summary-section upload-section">
|
<section class="summary-section upload-section">
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
class="knowledge-page"
|
class="knowledge-page"
|
||||||
data-document-url="{% url 'knowledge_base_document_list' %}"
|
data-document-url="{% url 'knowledge_base_document_list' %}"
|
||||||
data-search-url="{% url 'knowledge_base_search' %}"
|
data-search-url="{% url 'knowledge_base_search' %}"
|
||||||
|
data-rebuild-url="{% url 'knowledge_base_rebuild_index' %}"
|
||||||
>
|
>
|
||||||
<header class="attachment-manager-hero attachment-manager-toolbar">
|
<header class="attachment-manager-hero attachment-manager-toolbar">
|
||||||
<div>
|
<div>
|
||||||
@@ -96,9 +97,10 @@
|
|||||||
</div>
|
</div>
|
||||||
</dl>
|
</dl>
|
||||||
<p class="knowledge-panel-note">{{ knowledge_base.status.message }}</p>
|
<p class="knowledge-panel-note">{{ knowledge_base.status.message }}</p>
|
||||||
|
<p class="upload-status" id="knowledgeRebuildStatus"></p>
|
||||||
<div class="knowledge-form-actions">
|
<div class="knowledge-form-actions">
|
||||||
<button type="button" onclick="window.location.reload()">刷新状态</button>
|
<button type="button" onclick="window.location.reload()">刷新状态</button>
|
||||||
<button type="button" disabled>重建索引</button>
|
<button type="button" id="knowledgeRebuildIndexButton">重建索引</button>
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
@@ -182,6 +184,7 @@
|
|||||||
<th>类型</th>
|
<th>类型</th>
|
||||||
<th>大小</th>
|
<th>大小</th>
|
||||||
<th>索引</th>
|
<th>索引</th>
|
||||||
|
<th>操作</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
@@ -192,10 +195,13 @@
|
|||||||
<td>{{ source.suffix }}</td>
|
<td>{{ source.suffix }}</td>
|
||||||
<td>{{ source.size }} bytes</td>
|
<td>{{ source.size }} bytes</td>
|
||||||
<td>{{ source.indexed_label }}</td>
|
<td>{{ source.indexed_label }}</td>
|
||||||
|
<td class="attachment-actions">
|
||||||
|
<button type="button" data-source-action="index">手动入库</button>
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% empty %}
|
{% empty %}
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="5" class="table-empty">暂无法规材料</td>
|
<td colspan="6" class="table-empty">暂无法规材料</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
@@ -209,5 +215,5 @@
|
|||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script src="{% static 'js/knowledge_base.js' %}?v=20260608-kb5"></script>
|
<script src="{% static 'js/knowledge_base.js' %}?v=20260608-kb6"></script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
8
tests/conftest.py
Normal file
8
tests/conftest.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def mock_regulatory_info_package_page_count(monkeypatch):
|
||||||
|
from review_agent.regulatory_info_package.services import package_generate
|
||||||
|
|
||||||
|
monkeypatch.setattr(package_generate, "count_document_pages", lambda _path: 1)
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from review_agent.models import KnowledgeBaseDocument
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
from review_agent.services import build_knowledge_context
|
from review_agent.services import build_knowledge_context, send_message, stream_message
|
||||||
|
|
||||||
|
|
||||||
pytestmark = pytest.mark.django_db
|
pytestmark = pytest.mark.django_db
|
||||||
@@ -57,3 +57,67 @@ def test_build_knowledge_context_uses_full_document_when_name_matches(settings,
|
|||||||
assert "全文材料" in context
|
assert "全文材料" in context
|
||||||
assert "来源:用户知识库/孙之烨-260510.txt" in context
|
assert "来源:用户知识库/孙之烨-260510.txt" in context
|
||||||
assert "完整经历:曾组织技术分享并带队参加竞赛" in context
|
assert "完整经历:曾组织技术分享并带队参加竞赛" in context
|
||||||
|
|
||||||
|
|
||||||
|
def test_send_message_refuses_out_of_scope_answer_without_knowledge_context(monkeypatch, django_user_model):
|
||||||
|
from review_agent.models import Conversation
|
||||||
|
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.search_knowledge_base",
|
||||||
|
lambda query, n_results=5: {"query": query, "results": [], "error_message": ""},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.generate_reply",
|
||||||
|
lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call LLM"),
|
||||||
|
)
|
||||||
|
|
||||||
|
_, assistant_message = send_message(conversation, "孙之烨是谁")
|
||||||
|
|
||||||
|
assert "没有在当前启用的知识库材料中找到" in assistant_message.content
|
||||||
|
assert "与当前主营业务无关" in assistant_message.content
|
||||||
|
|
||||||
|
|
||||||
|
def test_stream_message_refuses_out_of_scope_answer_without_knowledge_context(monkeypatch, django_user_model):
|
||||||
|
from review_agent.models import Conversation
|
||||||
|
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.search_knowledge_base",
|
||||||
|
lambda query, n_results=5: {"query": query, "results": [], "error_message": ""},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.stream_reply",
|
||||||
|
lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call streaming LLM"),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.generate_reply",
|
||||||
|
lambda *args, **kwargs: pytest.fail("out-of-scope answer without knowledge context must not call fallback LLM"),
|
||||||
|
)
|
||||||
|
|
||||||
|
frames = list(stream_message(conversation, "给我一份红烧肉菜谱"))
|
||||||
|
|
||||||
|
assert any("没有在当前启用的知识库材料中找到" in frame for frame in frames)
|
||||||
|
assert any("与当前主营业务无关" in frame for frame in frames)
|
||||||
|
assert any("done" in frame for frame in frames)
|
||||||
|
|
||||||
|
|
||||||
|
def test_business_question_without_knowledge_context_can_use_llm(monkeypatch, django_user_model):
|
||||||
|
from review_agent.models import Conversation
|
||||||
|
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.search_knowledge_base",
|
||||||
|
lambda query, n_results=5: {"query": query, "results": [], "error_message": ""},
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.services.generate_reply",
|
||||||
|
lambda *args, **kwargs: "注册检验报告通常用于证明产品性能符合要求。",
|
||||||
|
)
|
||||||
|
|
||||||
|
_, assistant_message = send_message(conversation, "注册检验报告有什么作用")
|
||||||
|
|
||||||
|
assert "注册检验报告" in assistant_message.content
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ from review_agent.models import (
|
|||||||
FileAttachment,
|
FileAttachment,
|
||||||
FileSummaryBatch,
|
FileSummaryBatch,
|
||||||
Message,
|
Message,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
WorkflowNodeRun,
|
WorkflowNodeRun,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -269,6 +270,39 @@ def test_conversation_delete_api_removes_owned_conversation(client, django_user_
|
|||||||
assert Conversation.objects.filter(pk=other_conversation.pk).exists()
|
assert Conversation.objects.filter(pk=other_conversation.pk).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_conversation_delete_api_removes_protected_workflow_dependents(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="待删除")
|
||||||
|
summary_batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="FS-DELETE-PROTECTED",
|
||||||
|
)
|
||||||
|
regulatory_batch = RegulatoryReviewBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
batch_no="RR-DELETE-PROTECTED",
|
||||||
|
)
|
||||||
|
form_batch = ApplicationFormFillBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
source_regulatory_batch=regulatory_batch,
|
||||||
|
batch_no="AFF-DELETE-PROTECTED",
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.delete(reverse("review_agent_conversation_detail", args=[conversation.pk]))
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["ok"] is True
|
||||||
|
assert not Conversation.objects.filter(pk=conversation.pk).exists()
|
||||||
|
assert not FileSummaryBatch.objects.filter(pk=summary_batch.pk).exists()
|
||||||
|
assert not RegulatoryReviewBatch.objects.filter(pk=regulatory_batch.pk).exists()
|
||||||
|
assert not ApplicationFormFillBatch.objects.filter(pk=form_batch.pk).exists()
|
||||||
|
|
||||||
|
|
||||||
def test_conversation_delete_api_rejects_unowned_conversation(client, django_user_model):
|
def test_conversation_delete_api_rejects_unowned_conversation(client, django_user_model):
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
|
|||||||
@@ -286,7 +286,7 @@ def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkey
|
|||||||
lambda conversation, content, knowledge_context="": "非流式完整回复",
|
lambda conversation, content, knowledge_context="": "非流式完整回复",
|
||||||
)
|
)
|
||||||
|
|
||||||
frames = list(stream_message(conversation, "普通问题"))
|
frames = list(stream_message(conversation, "注册检验报告审核要点有哪些"))
|
||||||
|
|
||||||
joined = "".join(frames)
|
joined = "".join(frames)
|
||||||
assert "已生成部分内容" in joined
|
assert "已生成部分内容" in joined
|
||||||
|
|||||||
@@ -2,7 +2,14 @@ import pytest
|
|||||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||||
from django.urls import reverse
|
from django.urls import reverse
|
||||||
|
|
||||||
from review_agent.knowledge_base import build_knowledge_base_context, delete_document, search_knowledge_base
|
from review_agent.knowledge_base import (
|
||||||
|
build_knowledge_base_context,
|
||||||
|
delete_document,
|
||||||
|
index_managed_document,
|
||||||
|
search_knowledge_base,
|
||||||
|
update_document,
|
||||||
|
)
|
||||||
|
from review_agent.views import rebuild_knowledge_base_index
|
||||||
from review_agent.models import KnowledgeBaseDocument
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
|
|
||||||
|
|
||||||
@@ -16,6 +23,7 @@ def test_knowledge_base_context_reports_rule_and_sources():
|
|||||||
assert context["rule"]["requirement_count"] > 0
|
assert context["rule"]["requirement_count"] > 0
|
||||||
assert context["source_count"] > 0
|
assert context["source_count"] > 0
|
||||||
assert context["collection_name"] == "nmpa_ivd_registration_v1"
|
assert context["collection_name"] == "nmpa_ivd_registration_v1"
|
||||||
|
assert not any("模拟题二" in source["relative_path"] for source in context["sources"])
|
||||||
|
|
||||||
|
|
||||||
def test_knowledge_base_page_requires_login(client):
|
def test_knowledge_base_page_requires_login(client):
|
||||||
@@ -36,6 +44,11 @@ def test_knowledge_base_page_renders_for_user(client, django_user_model):
|
|||||||
content = response.content.decode("utf-8")
|
content = response.content.decode("utf-8")
|
||||||
tabbar = content[content.index('<div class="tabbar"') : content.index("</div>", content.index('<div class="tabbar"'))]
|
tabbar = content[content.index('<div class="tabbar"') : content.index("</div>", content.index('<div class="tabbar"'))]
|
||||||
assert tabbar.index("审核智能体") < tabbar.index("知识库管理") < tabbar.index("附件管理")
|
assert tabbar.index("审核智能体") < tabbar.index("知识库管理") < tabbar.index("附件管理")
|
||||||
|
assert "data-rebuild-url=" in content
|
||||||
|
assert 'id="knowledgeRebuildIndexButton"' in content
|
||||||
|
assert "重建索引" in content
|
||||||
|
assert 'data-source-action="index"' in content
|
||||||
|
assert "手动入库" in content
|
||||||
|
|
||||||
|
|
||||||
def test_knowledge_base_status_api(client, django_user_model):
|
def test_knowledge_base_status_api(client, django_user_model):
|
||||||
@@ -48,6 +61,53 @@ def test_knowledge_base_status_api(client, django_user_model):
|
|||||||
assert response.json()["rule"]["code"] == "nmpa_ivd_registration_v1"
|
assert response.json()["rule"]["code"] == "nmpa_ivd_registration_v1"
|
||||||
|
|
||||||
|
|
||||||
|
def test_knowledge_base_rebuild_index_api(client, django_user_model, monkeypatch):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
client.force_login(user)
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.views.rebuild_knowledge_base_index",
|
||||||
|
lambda: calls.append("rebuild") or {"chunk_count": 12},
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.post(reverse("knowledge_base_rebuild_index"))
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response.json()["chunk_count"] == 12
|
||||||
|
assert response.json()["knowledge_base"]["collection"]["count"] >= 0
|
||||||
|
assert calls == ["rebuild"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_rebuild_knowledge_base_index_requests_reset(settings, tmp_path, monkeypatch):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH.mkdir()
|
||||||
|
stale_file = settings.REGULATORY_RAG_CHROMA_PATH / "chroma.sqlite3"
|
||||||
|
stale_file.write_text("stale", encoding="utf-8")
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.views.load_rule_file", lambda: {"source_material_dir": "docs/0.原始材料"})
|
||||||
|
monkeypatch.setattr("review_agent.views.get_embedding_provider", lambda: "provider")
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"review_agent.views.build_chroma_index",
|
||||||
|
lambda source_dir, embedding_provider, reset=False: calls.append(
|
||||||
|
{
|
||||||
|
"source_dir": source_dir,
|
||||||
|
"embedding_provider": embedding_provider,
|
||||||
|
"reset": reset,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
or 8,
|
||||||
|
)
|
||||||
|
|
||||||
|
payload = rebuild_knowledge_base_index()
|
||||||
|
|
||||||
|
assert payload["chunk_count"] == 8
|
||||||
|
assert calls[0]["embedding_provider"] == "provider"
|
||||||
|
assert calls[0]["reset"] is True
|
||||||
|
|
||||||
|
|
||||||
def test_knowledge_base_search_rejects_blank_query():
|
def test_knowledge_base_search_rejects_blank_query():
|
||||||
payload = search_knowledge_base("")
|
payload = search_knowledge_base("")
|
||||||
|
|
||||||
@@ -103,6 +163,8 @@ def test_knowledge_base_search_api_returns_payload(client, django_user_model):
|
|||||||
|
|
||||||
def test_knowledge_base_document_crud_api(client, settings, tmp_path, django_user_model):
|
def test_knowledge_base_document_crud_api(client, settings, tmp_path, django_user_model):
|
||||||
settings.MEDIA_ROOT = tmp_path
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
||||||
|
settings.REGULATORY_RAG_PROVIDER = "deterministic"
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
client.force_login(user)
|
client.force_login(user)
|
||||||
|
|
||||||
@@ -176,6 +238,67 @@ def test_delete_document_removes_managed_chunks_from_index(monkeypatch, django_u
|
|||||||
assert deleted_filters == [{"document_id": document.pk}]
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_disabling_document_removes_managed_chunks_from_index(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
status=KnowledgeBaseDocument.Status.ACTIVE,
|
||||||
|
is_active=True,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
metadata={"index_status": "indexed", "index_error": ""},
|
||||||
|
)
|
||||||
|
deleted_filters = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def delete(self, where):
|
||||||
|
deleted_filters.append(where)
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||||
|
|
||||||
|
update_document(document, {"is_active": False})
|
||||||
|
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert document.status == KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
assert document.is_active is False
|
||||||
|
assert document.indexed_chunk_count == 0
|
||||||
|
assert document.metadata["index_status"] == "disabled"
|
||||||
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
|
def test_inactive_document_manual_index_clears_existing_chunks(monkeypatch, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name="孙之烨简历",
|
||||||
|
original_name="孙之烨-260510.pdf",
|
||||||
|
storage_path="knowledge_base/resume.pdf",
|
||||||
|
file_size=1,
|
||||||
|
status=KnowledgeBaseDocument.Status.DISABLED,
|
||||||
|
is_active=False,
|
||||||
|
indexed_chunk_count=7,
|
||||||
|
metadata={"index_status": "indexed", "index_error": ""},
|
||||||
|
)
|
||||||
|
deleted_filters = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def delete(self, where):
|
||||||
|
deleted_filters.append(where)
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.knowledge_base._load_chroma_collection", lambda: FakeCollection())
|
||||||
|
|
||||||
|
chunk_count = index_managed_document(document)
|
||||||
|
|
||||||
|
document.refresh_from_db()
|
||||||
|
assert chunk_count == 0
|
||||||
|
assert document.indexed_chunk_count == 0
|
||||||
|
assert document.metadata["index_status"] == "disabled"
|
||||||
|
assert deleted_filters == [{"document_id": document.pk}]
|
||||||
|
|
||||||
|
|
||||||
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_model):
|
||||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
@@ -199,6 +322,8 @@ def test_knowledge_base_document_api_is_scoped_to_owner(client, django_user_mode
|
|||||||
|
|
||||||
def test_knowledge_base_document_manual_index_api(client, settings, tmp_path, django_user_model):
|
def test_knowledge_base_document_manual_index_api(client, settings, tmp_path, django_user_model):
|
||||||
settings.MEDIA_ROOT = tmp_path
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
settings.REGULATORY_RAG_CHROMA_PATH = tmp_path / "chroma"
|
||||||
|
settings.REGULATORY_RAG_PROVIDER = "deterministic"
|
||||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
client.force_login(user)
|
client.force_login(user)
|
||||||
source_path = tmp_path / "manual.md"
|
source_path = tmp_path / "manual.md"
|
||||||
|
|||||||
88
tests/test_regulatory_info_package_field_extract.py
Normal file
88
tests/test_regulatory_info_package_field_extract.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import InstructionExtractResult
|
||||||
|
from review_agent.regulatory_info_package.services.field_extract import extract_fields_by_rules, run_parallel_extract
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_fields_by_rules_finds_product_name_and_storage():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=["产品名称:新型冠状病毒检测试剂盒", "储存条件:2-8℃保存"],
|
||||||
|
sections={},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[],
|
||||||
|
front_text="产品名称:新型冠状病毒检测试剂盒\n储存条件:2-8℃保存",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_fields_by_rules(instruction)
|
||||||
|
|
||||||
|
assert result["product_name"]["value"] == "新型冠状病毒检测试剂盒"
|
||||||
|
assert result["storage_condition"]["value"] == "2-8℃保存"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_fields_by_rules_uses_registrant_or_manufacturer_for_applicant():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=[
|
||||||
|
"注册人/售后服务单位名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"生产企业名称:卡尤迪生物科技宜兴有限公司",
|
||||||
|
"生产企业住所:宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室",
|
||||||
|
"联系方式: 0510-80330909, 0510-80330919",
|
||||||
|
"生产地址:江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室",
|
||||||
|
],
|
||||||
|
sections={},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[],
|
||||||
|
front_text="",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_fields_by_rules(instruction)
|
||||||
|
|
||||||
|
assert result["applicant_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert result["manufacturer_name"]["value"] == "卡尤迪生物科技宜兴有限公司"
|
||||||
|
assert result["applicant_address"]["value"] == "宜兴经济技术开发区杏里路10号宜兴光电产业园4幢101室、102室"
|
||||||
|
assert result["applicant_contact"]["value"] == "0510-80330909, 0510-80330919"
|
||||||
|
assert result["production_address"]["value"] == "江苏省宜兴经济技术开发区杏里路10号宜兴光电产业园4幢102室"
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_fields_by_rules_serializes_component_table_and_notes():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=[],
|
||||||
|
sections={"【主要组成成分】": "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[
|
||||||
|
{
|
||||||
|
"header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"],
|
||||||
|
"rows": [
|
||||||
|
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
|
||||||
|
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
front_text="",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = extract_fields_by_rules(instruction)
|
||||||
|
payload = json.loads(result["component_table"]["value"])
|
||||||
|
|
||||||
|
assert payload["header"][0:2] == ["组分", "主要组成成分"]
|
||||||
|
assert payload["rows"][0][0] == "PCR反应液 I"
|
||||||
|
assert result["component_notes"]["value"] == "表1 规格A大包装试剂盒组成成分\n注:不同批号试剂盒中各组分不得互换使用。"
|
||||||
|
|
||||||
|
|
||||||
|
def test_run_parallel_extract_keeps_rule_result_when_llm_fails():
|
||||||
|
instruction = InstructionExtractResult(
|
||||||
|
source_file_name="目标产品说明书.docx",
|
||||||
|
paragraphs=["产品名称:测试产品"],
|
||||||
|
sections={},
|
||||||
|
tables=[],
|
||||||
|
component_tables=[],
|
||||||
|
front_text="产品名称:测试产品",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = run_parallel_extract(instruction, llm_extract_func=lambda _instruction: (_ for _ in ()).throw(ValueError("bad llm")))
|
||||||
|
|
||||||
|
assert result["regex_results"]["product_name"]["value"] == "测试产品"
|
||||||
|
assert result["llm_results"] == {}
|
||||||
|
assert result["llm_error"]
|
||||||
24
tests/test_regulatory_info_package_field_merge.py
Normal file
24
tests/test_regulatory_info_package_field_merge.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
from review_agent.regulatory_info_package.services.field_merge import merge_fields
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_fields_marks_missing_llm_only_and_conflict():
|
||||||
|
merged, summary = merge_fields(
|
||||||
|
{
|
||||||
|
"product_name": {"value": "规则产品", "evidence": "说明书", "confidence": 0.8, "label": "产品名称"},
|
||||||
|
"applicant_name": {"value": "", "evidence": "", "confidence": 0.0, "label": "申请人名称"},
|
||||||
|
"package_specification": {"value": "24人份/盒", "evidence": "表格", "confidence": 0.7, "label": "包装规格"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"intended_use": {"value": "用于检测", "evidence": "LLM", "confidence": 0.6, "label": "预期用途"},
|
||||||
|
"package_specification": {"value": "48人份/盒", "evidence": "LLM", "confidence": 0.6, "label": "包装规格"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert merged["applicant_name"].value == "/"
|
||||||
|
assert merged["applicant_name"].highlight_reason == "missing"
|
||||||
|
assert merged["intended_use"].highlight_reason == "llm_only"
|
||||||
|
assert merged["package_specification"].value == "24人份/盒"
|
||||||
|
assert merged["package_specification"].highlight_reason == "conflict"
|
||||||
|
assert any(item["field_key"] == "applicant_name" for item in summary["missing_fields"])
|
||||||
|
assert len(summary["llm_only_fields"]) == 1
|
||||||
|
assert len(summary["conflict_fields"]) == 1
|
||||||
45
tests/test_regulatory_info_package_frontend.py
Normal file
45
tests/test_regulatory_info_package_frontend.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
import pytest
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, RegulatoryInfoPackageBatch, WorkflowNodeRun
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_workspace_renders_regulatory_info_package_chip_and_card(client, django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-CARD",
|
||||||
|
status=RegulatoryInfoPackageBatch.Status.SUCCESS,
|
||||||
|
generated_files=[{"status": "success"} for _ in range(7)],
|
||||||
|
)
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="regulatory_info_package",
|
||||||
|
node_code="zip_export",
|
||||||
|
node_name="打包下载",
|
||||||
|
status=WorkflowNodeRun.Status.SUCCESS,
|
||||||
|
progress=100,
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"{reverse('chat')}?conversation={conversation.pk}")
|
||||||
|
content = response.content.decode("utf-8")
|
||||||
|
|
||||||
|
assert "第1章监管信息" in content
|
||||||
|
assert 'data-workflow-type="regulatory_info_package"' in content
|
||||||
|
assert "data-regulatory-info-package-status-url-template" in content
|
||||||
|
assert "RIP-CARD" in content
|
||||||
|
|
||||||
|
|
||||||
|
def test_frontend_selects_regulatory_info_package_status_url():
|
||||||
|
script = open("static/js/app.js", encoding="utf-8").read()
|
||||||
|
|
||||||
|
assert 'workflow_type === "regulatory_info_package"' in script
|
||||||
|
assert "data-regulatory-info-package-status-url-template" in script
|
||||||
|
|
||||||
48
tests/test_regulatory_info_package_input_select.py
Normal file
48
tests/test_regulatory_info_package_input_select.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileAttachment
|
||||||
|
from review_agent.regulatory_info_package.services.input_select import select_instruction_input
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_select_instruction_input_prefers_message_filename(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
selected = FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name="目标产品说明书.docx",
|
||||||
|
storage_path="uploads/target.docx",
|
||||||
|
)
|
||||||
|
FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name="其他说明书.docx",
|
||||||
|
storage_path="uploads/other.docx",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = select_instruction_input(conversation, "请使用目标产品说明书生成第1章监管信息")
|
||||||
|
|
||||||
|
assert result.status == "selected"
|
||||||
|
assert result.attachment == selected
|
||||||
|
assert result.file_name == "目标产品说明书.docx"
|
||||||
|
|
||||||
|
|
||||||
|
def test_select_instruction_input_waits_on_multiple_candidates(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
for name in ["A说明书.docx", "B说明书.docx"]:
|
||||||
|
FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name=name,
|
||||||
|
storage_path=f"uploads/{name}",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = select_instruction_input(conversation, "生成第1章监管信息")
|
||||||
|
|
||||||
|
assert result.status == "waiting_user"
|
||||||
|
assert result.candidates == ["A说明书.docx", "B说明书.docx"]
|
||||||
|
|
||||||
16
tests/test_regulatory_info_package_instruction_extract.py
Normal file
16
tests/test_regulatory_info_package_instruction_extract.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.services.instruction_extract import parse_instruction_docx
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_instruction_docx_extracts_paragraphs_and_tables():
|
||||||
|
path = Path("docs/0.原始材料/目标产品说明书.docx")
|
||||||
|
|
||||||
|
result = parse_instruction_docx(path)
|
||||||
|
|
||||||
|
assert result.source_file_name == "目标产品说明书.docx"
|
||||||
|
assert result.paragraphs
|
||||||
|
assert isinstance(result.sections, dict)
|
||||||
|
assert isinstance(result.tables, list)
|
||||||
|
assert result.front_text
|
||||||
|
|
||||||
9
tests/test_regulatory_info_package_legacy_doc.py
Normal file
9
tests/test_regulatory_info_package_legacy_doc.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from review_agent.regulatory_info_package.services.legacy_doc_document import detect_legacy_doc_capability
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_legacy_doc_capability_is_stable():
|
||||||
|
capability = detect_legacy_doc_capability()
|
||||||
|
|
||||||
|
assert capability.status in {"available", "unavailable"}
|
||||||
|
assert capability.adapter in {"WordComDocAdapter", "UnavailableLegacyDocAdapter"}
|
||||||
|
|
||||||
109
tests/test_regulatory_info_package_models.py
Normal file
109
tests/test_regulatory_info_package_models.py
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
import pytest
|
||||||
|
from django.db import IntegrityError
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileAttachment,
|
||||||
|
RegulatoryInfoPackageArtifact,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
RegulatoryInfoPackageNotificationRecord,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_info_package_batch_defaults(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
attachment = FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name="目标产品说明书.docx",
|
||||||
|
storage_path="uploads/instruction.docx",
|
||||||
|
)
|
||||||
|
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
source_attachment=attachment,
|
||||||
|
batch_no="RIP-20260610153000-abcdef",
|
||||||
|
source_file_name=attachment.original_name,
|
||||||
|
source_storage_path=attachment.storage_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert batch.status == RegulatoryInfoPackageBatch.Status.PENDING
|
||||||
|
assert batch.output_zip_name == "第1章 监管信息(预生成版).zip"
|
||||||
|
assert batch.generated_files == []
|
||||||
|
assert batch.missing_fields == []
|
||||||
|
assert batch.llm_only_fields == []
|
||||||
|
assert batch.conflict_fields == []
|
||||||
|
assert batch.risk_notes == []
|
||||||
|
assert batch.adapter_summary == {}
|
||||||
|
assert str(batch) == "RIP-20260610153000-abcdef"
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_info_package_artifact_and_notification(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610153100-abcdef",
|
||||||
|
)
|
||||||
|
|
||||||
|
artifact = RegulatoryInfoPackageArtifact.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
artifact_type=RegulatoryInfoPackageArtifact.ArtifactType.ZIP_PACKAGE,
|
||||||
|
file_format=RegulatoryInfoPackageArtifact.FileFormat.ZIP,
|
||||||
|
name="主下载包",
|
||||||
|
file_name="第1章 监管信息(预生成版).zip",
|
||||||
|
storage_path="media/regulatory_info_package/package.zip",
|
||||||
|
)
|
||||||
|
notification = RegulatoryInfoPackageNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
recipient=user,
|
||||||
|
export_ids=[1, 2],
|
||||||
|
message_summary="材料包已生成",
|
||||||
|
send_status=RegulatoryInfoPackageNotificationRecord.SendStatus.SUCCESS,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert artifact.metadata == {}
|
||||||
|
assert artifact.is_deleted is False
|
||||||
|
assert notification.channel == RegulatoryInfoPackageNotificationRecord.Channel.MOCK
|
||||||
|
assert notification.retry_count == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_exported_summary_file_supports_zip_type():
|
||||||
|
values = {value for value, _label in ExportedSummaryFile.ExportType.choices}
|
||||||
|
|
||||||
|
assert "zip" in values
|
||||||
|
|
||||||
|
|
||||||
|
def test_workflow_node_run_unique_for_workflow_batch(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610153200-abcdef",
|
||||||
|
)
|
||||||
|
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="regulatory_info_package",
|
||||||
|
node_code="prepare",
|
||||||
|
node_name="准备资料",
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(IntegrityError):
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="regulatory_info_package",
|
||||||
|
node_code="prepare",
|
||||||
|
node_name="准备资料",
|
||||||
|
)
|
||||||
17
tests/test_regulatory_info_package_notification.py
Normal file
17
tests/test_regulatory_info_package_notification.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, RegulatoryInfoPackageBatch, RegulatoryInfoPackageNotificationRecord
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_info_package_notification_record_defaults(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(conversation=conversation, user=user, batch_no="RIP-NOTIFY")
|
||||||
|
|
||||||
|
record = RegulatoryInfoPackageNotificationRecord.objects.create(batch=batch, recipient=user)
|
||||||
|
|
||||||
|
assert record.channel == RegulatoryInfoPackageNotificationRecord.Channel.MOCK
|
||||||
|
assert record.send_status == RegulatoryInfoPackageNotificationRecord.SendStatus.PENDING
|
||||||
281
tests/test_regulatory_info_package_package_generate.py
Normal file
281
tests/test_regulatory_info_package_package_generate.py
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from docx import Document
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils import timezone
|
||||||
|
from review_agent.models import Conversation, RegulatoryInfoPackageBatch
|
||||||
|
from review_agent.regulatory_info_package.services.field_merge import merge_fields
|
||||||
|
from review_agent.regulatory_info_package.services import package_generate
|
||||||
|
from review_agent.regulatory_info_package.services.package_generate import generate_package_documents
|
||||||
|
from review_agent.regulatory_info_package.services.template_config import load_template_config
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_config_uses_clean_internal_templates():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
|
||||||
|
assert source_dir == settings.BASE_DIR / "review_agent" / "regulatory_info_package" / "templates" / "clean"
|
||||||
|
assert source_dir.exists()
|
||||||
|
assert len(config["templates"]) == 6
|
||||||
|
assert all((source_dir / item["source_file"]).exists() for item in config["templates"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_templates_expose_stable_fill_placeholders():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
expected_by_code = {
|
||||||
|
"ch1_2_directory": {"{{product_name}}"},
|
||||||
|
"ch1_4_application_form": {"{{product_name}}", "{{applicant_name}}"},
|
||||||
|
"ch1_5_product_list": {"{{product_name}}"},
|
||||||
|
"ch1_11_1_standards": {"{{product_name}}"},
|
||||||
|
"ch1_11_5_authenticity": {"{{product_name}}"},
|
||||||
|
"ch1_11_6_conformity": {"{{product_name}}"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for item in config["templates"]:
|
||||||
|
document = Document(source_dir / item["source_file"])
|
||||||
|
text = _document_text(document)
|
||||||
|
for placeholder in expected_by_code[item["code"]]:
|
||||||
|
assert placeholder in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_directory_template_includes_page_numbers():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
item = next(template for template in config["templates"] if template["code"] == "ch1_2_directory")
|
||||||
|
document = Document(source_dir / item["source_file"])
|
||||||
|
page_numbers = [row.cells[4].text.strip() for row in document.tables[0].rows[1:]]
|
||||||
|
|
||||||
|
assert page_numbers == ["1", "1", "1", "1", "1", "1"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_application_form_template_uses_real_checkbox_symbols():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
item = next(template for template in config["templates"] if template["code"] == "ch1_4_application_form")
|
||||||
|
text = _document_text(Document(source_dir / item["source_file"]))
|
||||||
|
|
||||||
|
assert "{{复选框}}" not in text
|
||||||
|
assert "{{}}" not in text
|
||||||
|
assert "☐" in text
|
||||||
|
assert "☑" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_generate_package_documents_creates_six_results(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154000-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
|
||||||
|
assert len(results) == 6
|
||||||
|
assert all(result.status in {"success", "fallback_success"} for result in results), [
|
||||||
|
(result.template_code, result.status, result.error_message) for result in results
|
||||||
|
]
|
||||||
|
assert all(result.path for result in results)
|
||||||
|
|
||||||
|
|
||||||
|
def test_directory_is_generated_last_with_real_page_counts(django_user_model, tmp_path, monkeypatch):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154010-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
||||||
|
page_counts = {
|
||||||
|
"CH1.4 申请表.docx": 3,
|
||||||
|
"CH1.5 产品列表.docx": 5,
|
||||||
|
"CH1.11.1 符合标准的清单.docx": 2,
|
||||||
|
"CH1.11.5 真实性声明.docx": 4,
|
||||||
|
"CH1.11.6 符合性声明.docx": 6,
|
||||||
|
}
|
||||||
|
counted_files = []
|
||||||
|
|
||||||
|
def fake_count(path):
|
||||||
|
counted_files.append(Path(path).name)
|
||||||
|
return page_counts[Path(path).name]
|
||||||
|
|
||||||
|
monkeypatch.setattr(package_generate, "count_document_pages", fake_count, raising=False)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
|
||||||
|
assert results[-1].template_code == "ch1_2_directory"
|
||||||
|
assert set(counted_files) == set(page_counts)
|
||||||
|
directory = Document(results[-1].path)
|
||||||
|
directory_pages = {row.cells[0].text.strip(): row.cells[4].text.strip() for row in directory.tables[0].rows[1:]}
|
||||||
|
assert directory_pages == {
|
||||||
|
"CH1.2": "1",
|
||||||
|
"CH1.4": "3",
|
||||||
|
"CH1.5": "5",
|
||||||
|
"CH1.11.1": "2",
|
||||||
|
"CH1.11.5": "4",
|
||||||
|
"CH1.11.6": "6",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_generated_docx_does_not_add_prefill_or_audit_blocks(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154100-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields({"product_name": {"value": "测试产品", "label": "产品名称"}}, {})
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
for result in results:
|
||||||
|
document = Document(result.path)
|
||||||
|
text = _document_text(document)
|
||||||
|
|
||||||
|
assert "预生成版" not in text
|
||||||
|
assert "预生成字段" not in text
|
||||||
|
assert "component_table" not in text
|
||||||
|
assert '"header"' not in text
|
||||||
|
assert "测试产品" in text
|
||||||
|
|
||||||
|
|
||||||
|
def test_generated_docx_replaces_sample_case_content(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154200-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields(
|
||||||
|
{
|
||||||
|
"product_name": {"value": "测试产品", "label": "产品名称"},
|
||||||
|
"package_specification": {"value": "24人份/盒;48人份/盒", "label": "包装规格"},
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
docx_results = [result for result in results if result.actual_format == "docx"]
|
||||||
|
for result in docx_results:
|
||||||
|
document = Document(result.path)
|
||||||
|
text = "\n".join(paragraph.text for paragraph in document.paragraphs)
|
||||||
|
for table in document.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
text += "\n" + "\t".join(cell.text for cell in row.cells)
|
||||||
|
assert "呼吸道合胞病毒、肺炎支原体核酸检测试剂盒" not in text
|
||||||
|
product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
|
||||||
|
product_doc = Document(product_list.path)
|
||||||
|
table = product_doc.tables[0]
|
||||||
|
assert table.rows[1].cells[0].text == "24人份/盒"
|
||||||
|
assert table.rows[1].cells[1].text == "/"
|
||||||
|
assert "6018003102" not in "\n".join(cell.text for row in table.rows for cell in row.cells)
|
||||||
|
|
||||||
|
|
||||||
|
def test_generated_docs_fill_clean_template_body(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154300-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
merged, _summary = merge_fields(
|
||||||
|
{
|
||||||
|
"product_name": {"value": "甲型流感病毒核酸检测试剂盒", "label": "产品名称"},
|
||||||
|
"applicant_name": {"value": "星河医疗科技有限公司", "label": "申请人名称"},
|
||||||
|
"package_specification": {"value": "24人份/盒;48人份/盒", "label": "包装规格"},
|
||||||
|
"standard_no": {"value": "GB/T 29791.1-2013", "label": "标准号"},
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
|
||||||
|
for code in ["ch1_2_directory", "ch1_4_application_form", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
|
||||||
|
result = next(item for item in results if item.template_code == code)
|
||||||
|
text = _document_text(Document(result.path))
|
||||||
|
assert "甲型流感病毒核酸检测试剂盒" in text
|
||||||
|
if code == "ch1_4_application_form":
|
||||||
|
assert "星河医疗科技有限公司" in text
|
||||||
|
assert "{{" not in text
|
||||||
|
assert "}}" not in text
|
||||||
|
|
||||||
|
today = timezone.localdate().strftime("%Y年%m月%d日")
|
||||||
|
for code in ["ch1_11_1_standards", "ch1_11_5_authenticity", "ch1_11_6_conformity"]:
|
||||||
|
result = next(item for item in results if item.template_code == code)
|
||||||
|
text = _document_text(Document(result.path))
|
||||||
|
assert today in text
|
||||||
|
assert "xxxx年xx月xx日" not in text
|
||||||
|
assert "星河医疗科技有限公司" not in text
|
||||||
|
|
||||||
|
product_list = next(item for item in results if item.template_code == "ch1_5_product_list")
|
||||||
|
product_text = _document_text(Document(product_list.path))
|
||||||
|
assert "24人份/盒" in product_text
|
||||||
|
assert "48人份/盒" in product_text
|
||||||
|
|
||||||
|
|
||||||
|
def test_product_list_uses_component_table_from_instruction(django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610154400-abcdef",
|
||||||
|
work_dir=str(tmp_path),
|
||||||
|
)
|
||||||
|
component_payload = {
|
||||||
|
"header": ["组分", "主要组成成分", "规格(24人份/盒)", "规格(48人份/盒)"],
|
||||||
|
"rows": [
|
||||||
|
["PCR反应液 I", "逆转录酶、Taq酶", "840μL/管×1管", "840μL/管×2管"],
|
||||||
|
["阳性对照品", "含目的片段的假病毒", "600μL/管×2管", "1200μL/管×2管"],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
merged, _summary = merge_fields(
|
||||||
|
{
|
||||||
|
"product_name": {"value": "新型冠状病毒核酸检测试剂盒", "label": "产品名称"},
|
||||||
|
"package_specification": {"value": "24人份/盒;48人份/盒", "label": "包装规格"},
|
||||||
|
"component_table": {
|
||||||
|
"value": json.dumps(component_payload, ensure_ascii=False),
|
||||||
|
"label": "主要组成成分",
|
||||||
|
},
|
||||||
|
"component_notes": {
|
||||||
|
"value": "注:不同批号试剂盒中各组分不得互换使用。",
|
||||||
|
"label": "主要组成成分备注",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
results = generate_package_documents(batch, load_template_config(), merged)
|
||||||
|
product_list = next(result for result in results if result.template_code == "ch1_5_product_list")
|
||||||
|
document = Document(product_list.path)
|
||||||
|
text = _document_text(document)
|
||||||
|
|
||||||
|
assert "PCR反应液 I" in text
|
||||||
|
assert "840μL/管×1管" in text
|
||||||
|
assert "840μL/管×2管" in text
|
||||||
|
assert "注:不同批号试剂盒中各组分不得互换使用。" in text
|
||||||
|
assert "RSV&MP" not in text
|
||||||
|
assert "6018003102" not in text
|
||||||
|
|
||||||
|
|
||||||
|
def _document_text(document: Document) -> str:
|
||||||
|
text = "\n".join(paragraph.text for paragraph in document.paragraphs)
|
||||||
|
for table in document.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
text += "\n" + "\t".join(cell.text for cell in row.cells)
|
||||||
|
return text
|
||||||
13
tests/test_regulatory_info_package_summary.py
Normal file
13
tests/test_regulatory_info_package_summary.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from review_agent.regulatory_info_package.services.summary import build_assistant_summary
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_assistant_summary_puts_zip_first():
|
||||||
|
exports = [
|
||||||
|
{"file_name": "CH1.4 申请表.docx", "download_url": "/docx"},
|
||||||
|
{"file_name": "第1章 监管信息(预生成版).zip", "download_url": "/zip", "export_type": "zip"},
|
||||||
|
]
|
||||||
|
|
||||||
|
summary = build_assistant_summary(batch_no="RIP-1", exports=exports, failed_files=[])
|
||||||
|
|
||||||
|
assert summary.index("第1章 监管信息(预生成版).zip") < summary.index("CH1.4 申请表.docx")
|
||||||
|
|
||||||
46
tests/test_regulatory_info_package_template_config.py
Normal file
46
tests/test_regulatory_info_package_template_config.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.constants import DEFAULT_ZIP_NAME
|
||||||
|
from review_agent.regulatory_info_package.services.template_config import (
|
||||||
|
compute_config_hash,
|
||||||
|
load_template_config,
|
||||||
|
validate_template_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_config_loads_six_templates():
|
||||||
|
config = load_template_config()
|
||||||
|
|
||||||
|
assert config["version"] == "regulatory_info_package_templates_v1"
|
||||||
|
assert config["zip_name"] == DEFAULT_ZIP_NAME
|
||||||
|
assert len(config["templates"]) == 6
|
||||||
|
assert {template["code"] for template in config["templates"]} == {
|
||||||
|
"ch1_2_directory",
|
||||||
|
"ch1_4_application_form",
|
||||||
|
"ch1_5_product_list",
|
||||||
|
"ch1_11_1_standards",
|
||||||
|
"ch1_11_5_authenticity",
|
||||||
|
"ch1_11_6_conformity",
|
||||||
|
}
|
||||||
|
assert validate_template_config(config) == []
|
||||||
|
assert compute_config_hash()
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_config_rejects_duplicate_codes():
|
||||||
|
config = load_template_config()
|
||||||
|
config["templates"].append(dict(config["templates"][0]))
|
||||||
|
|
||||||
|
errors = validate_template_config(config)
|
||||||
|
|
||||||
|
assert any("重复" in error for error in errors)
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_config_sources_exist():
|
||||||
|
config = load_template_config()
|
||||||
|
source_dir = Path(config["source_dir"])
|
||||||
|
|
||||||
|
assert source_dir.exists()
|
||||||
|
for template in config["templates"]:
|
||||||
|
assert (source_dir / template["source_file"]).exists()
|
||||||
28
tests/test_regulatory_info_package_traceability.py
Normal file
28
tests/test_regulatory_info_package_traceability.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import MergedField
|
||||||
|
from review_agent.regulatory_info_package.services.traceability_export import save_traceability_exports
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_traceability_exports_writes_excel_and_json(tmp_path):
|
||||||
|
fields = {
|
||||||
|
"product_name": MergedField(
|
||||||
|
key="product_name",
|
||||||
|
label="产品名称",
|
||||||
|
value="测试产品",
|
||||||
|
source="rule",
|
||||||
|
evidence="说明书",
|
||||||
|
confidence=0.9,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
excel_path, json_path = save_traceability_exports(tmp_path, fields)
|
||||||
|
|
||||||
|
assert excel_path.name == "traceability.xlsx"
|
||||||
|
assert json_path.name == "traceability.json"
|
||||||
|
assert json_path.exists()
|
||||||
|
workbook = load_workbook(excel_path)
|
||||||
|
assert workbook.active["A1"].value == "target_file"
|
||||||
|
|
||||||
19
tests/test_regulatory_info_package_trigger.py
Normal file
19
tests/test_regulatory_info_package_trigger.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation
|
||||||
|
from review_agent.skill_router import route_message_intent
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_fixed_keyword_routes_to_regulatory_info_package(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
|
||||||
|
route = route_message_intent(conversation, "请根据说明书生成第1章监管信息")
|
||||||
|
|
||||||
|
assert route.action == "regulatory_info_package"
|
||||||
|
assert route.workflow_type == "regulatory_info_package"
|
||||||
|
assert route.starts_regulatory_info_package is True
|
||||||
|
|
||||||
140
tests/test_regulatory_info_package_views.py
Normal file
140
tests/test_regulatory_info_package_views.py
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_info_package_export_download_checks_owner(client, django_user_model, tmp_path):
|
||||||
|
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=owner,
|
||||||
|
batch_no="RIP-20260610153300-abcdef",
|
||||||
|
)
|
||||||
|
path = tmp_path / "第1章 监管信息(预生成版).zip"
|
||||||
|
path.write_bytes(b"zip-content")
|
||||||
|
exported = ExportedSummaryFile.objects.create(
|
||||||
|
batch=None,
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="regulatory_info_package",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.ZIP,
|
||||||
|
file_name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
|
||||||
|
client.force_login(other)
|
||||||
|
denied = client.get(f"/api/review-agent/file-summary/exports/{exported.pk}/download/")
|
||||||
|
assert denied.status_code == 404
|
||||||
|
|
||||||
|
client.force_login(owner)
|
||||||
|
allowed = client.get(f"/api/review-agent/file-summary/exports/{exported.pk}/download/")
|
||||||
|
assert allowed.status_code == 200
|
||||||
|
assert allowed["Content-Type"] == "application/zip"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("file_name", "export_type", "expected"),
|
||||||
|
[
|
||||||
|
("CH1.9 产品申报前沟通的说明.doc", ExportedSummaryFile.ExportType.WORD, "application/msword"),
|
||||||
|
(
|
||||||
|
"CH1.4 申请表.docx",
|
||||||
|
ExportedSummaryFile.ExportType.WORD,
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
),
|
||||||
|
("第1章 监管信息(预生成版).zip", ExportedSummaryFile.ExportType.ZIP, "application/zip"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_regulatory_info_package_download_mime_by_extension(
|
||||||
|
client,
|
||||||
|
django_user_model,
|
||||||
|
tmp_path,
|
||||||
|
file_name,
|
||||||
|
export_type,
|
||||||
|
expected,
|
||||||
|
):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no=f"RIP-20260610153400-{Path(file_name).suffix[1:] or 'zip'}",
|
||||||
|
)
|
||||||
|
path = tmp_path / file_name
|
||||||
|
path.write_bytes(b"content")
|
||||||
|
exported = ExportedSummaryFile.objects.create(
|
||||||
|
batch=None,
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="generated_document",
|
||||||
|
export_type=export_type,
|
||||||
|
file_name=file_name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"/api/review-agent/file-summary/exports/{exported.pk}/download/")
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert response["Content-Type"] == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_regulatory_info_package_status_returns_nodes_and_zip_first(client, django_user_model, tmp_path):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = RegulatoryInfoPackageBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
batch_no="RIP-20260610153500-abcdef",
|
||||||
|
status=RegulatoryInfoPackageBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="regulatory_info_package",
|
||||||
|
node_code="zip_export",
|
||||||
|
node_name="打包下载",
|
||||||
|
status=WorkflowNodeRun.Status.SUCCESS,
|
||||||
|
progress=100,
|
||||||
|
)
|
||||||
|
doc = tmp_path / "CH1.4 申请表.docx"
|
||||||
|
zip_file = tmp_path / "第1章 监管信息(预生成版).zip"
|
||||||
|
doc.write_bytes(b"doc")
|
||||||
|
zip_file.write_bytes(b"zip")
|
||||||
|
ExportedSummaryFile.objects.create(
|
||||||
|
batch=None,
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="generated_document",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.WORD,
|
||||||
|
file_name=doc.name,
|
||||||
|
storage_path=str(doc),
|
||||||
|
)
|
||||||
|
ExportedSummaryFile.objects.create(
|
||||||
|
batch=None,
|
||||||
|
workflow_type="regulatory_info_package",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="regulatory_info_package",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.ZIP,
|
||||||
|
file_name=zip_file.name,
|
||||||
|
storage_path=str(zip_file),
|
||||||
|
)
|
||||||
|
client.force_login(user)
|
||||||
|
|
||||||
|
response = client.get(f"/api/review-agent/regulatory-info-package/{batch.pk}/status/")
|
||||||
|
|
||||||
|
payload = response.json()
|
||||||
|
assert payload["batch"]["workflow_type"] == "regulatory_info_package"
|
||||||
|
assert payload["nodes"][0]["node_code"] == "zip_export"
|
||||||
|
assert payload["exports"][0]["export_type"] == "zip"
|
||||||
92
tests/test_regulatory_info_package_workflow.py
Normal file
92
tests/test_regulatory_info_package_workflow.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileAttachment, Message, RegulatoryInfoPackageBatch, WorkflowNodeRun
|
||||||
|
from review_agent.regulatory_info_package.constants import (
|
||||||
|
REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS,
|
||||||
|
WORKFLOW_TYPE,
|
||||||
|
)
|
||||||
|
from review_agent.regulatory_info_package.workflow import (
|
||||||
|
create_regulatory_info_package_batch,
|
||||||
|
start_regulatory_info_package_workflow,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
pytestmark = pytest.mark.django_db
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_regulatory_info_package_batch_initializes_nodes(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
|
||||||
|
batch = create_regulatory_info_package_batch(conversation=conversation, user=user)
|
||||||
|
|
||||||
|
assert batch.batch_no.startswith("RIP-")
|
||||||
|
assert batch.work_dir
|
||||||
|
nodes = WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
assert [node.node_code for node in nodes] == [
|
||||||
|
code for code, _name, _group in REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_regulatory_info_package_batch_is_node_idempotent(django_user_model):
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = create_regulatory_info_package_batch(conversation=conversation, user=user)
|
||||||
|
|
||||||
|
create_regulatory_info_package_batch(conversation=conversation, user=user, existing_batch=batch)
|
||||||
|
|
||||||
|
assert WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).count() == len(REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS)
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_workflow_skeleton_completes(django_user_model, settings):
|
||||||
|
settings.REGULATORY_INFO_PACKAGE_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
batch = create_regulatory_info_package_batch(conversation=conversation, user=user)
|
||||||
|
|
||||||
|
start_regulatory_info_package_workflow(batch, async_run=False)
|
||||||
|
batch.refresh_from_db()
|
||||||
|
|
||||||
|
assert batch.status == RegulatoryInfoPackageBatch.Status.SUCCESS
|
||||||
|
assert WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
status=WorkflowNodeRun.Status.SUCCESS,
|
||||||
|
).count() == len(REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS)
|
||||||
|
|
||||||
|
|
||||||
|
def test_completed_workflow_appends_download_summary_message(django_user_model, settings):
|
||||||
|
settings.REGULATORY_INFO_PACKAGE_ASYNC = False
|
||||||
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||||
|
conversation = Conversation.objects.create(user=user, title="会话")
|
||||||
|
trigger = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="根据说明书生成第1章监管信息")
|
||||||
|
source = Path("docs/0.原始材料/目标产品说明书.docx").resolve()
|
||||||
|
attachment = FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name="目标产品说明书.docx",
|
||||||
|
storage_path=str(source),
|
||||||
|
file_size=source.stat().st_size,
|
||||||
|
)
|
||||||
|
batch = create_regulatory_info_package_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger,
|
||||||
|
source_attachment=attachment,
|
||||||
|
source_file_name=attachment.original_name,
|
||||||
|
source_storage_path=attachment.storage_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
start_regulatory_info_package_workflow(batch, async_run=False)
|
||||||
|
|
||||||
|
message = conversation.messages.filter(role=Message.Role.ASSISTANT, content__contains=batch.batch_no).latest("id")
|
||||||
|
assert "第1章 监管信息(预生成版).zip" in message.content
|
||||||
|
assert "/api/review-agent/file-summary/exports/" in message.content
|
||||||
22
tests/test_regulatory_info_package_zip.py
Normal file
22
tests/test_regulatory_info_package_zip.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
import zipfile
|
||||||
|
|
||||||
|
from review_agent.regulatory_info_package.schemas import GeneratedFileResult
|
||||||
|
from review_agent.regulatory_info_package.services.zip_export import create_zip_package
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_zip_package_includes_only_success_files(tmp_path):
|
||||||
|
success = tmp_path / "ok.docx"
|
||||||
|
failed = tmp_path / "bad.docx"
|
||||||
|
success.write_bytes(b"ok")
|
||||||
|
failed.write_bytes(b"bad")
|
||||||
|
|
||||||
|
zip_path = create_zip_package(
|
||||||
|
tmp_path,
|
||||||
|
[
|
||||||
|
GeneratedFileResult("ok", "ok.docx", "docx", "docx", "success", path=str(success)),
|
||||||
|
GeneratedFileResult("bad", "bad.docx", "docx", "docx", "failed", path=str(failed)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with zipfile.ZipFile(zip_path) as archive:
|
||||||
|
assert archive.namelist() == ["ok.docx"]
|
||||||
@@ -1,3 +1,5 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from review_agent.regulatory_review.services.rag_citation import (
|
from review_agent.regulatory_review.services.rag_citation import (
|
||||||
@@ -7,6 +9,7 @@ from review_agent.regulatory_review.services.rag_citation import (
|
|||||||
from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
|
from review_agent.regulatory_review.services.rag_embedding import SiliconFlowEmbeddingProvider
|
||||||
from review_agent.regulatory_review.services.rag_index import chunk_text
|
from review_agent.regulatory_review.services.rag_index import chunk_text
|
||||||
from review_agent.regulatory_review.services.rag_index import collect_source_chunks
|
from review_agent.regulatory_review.services.rag_index import collect_source_chunks
|
||||||
|
from review_agent.regulatory_review.services.rag_index import build_chroma_index
|
||||||
|
|
||||||
|
|
||||||
def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
|
def test_siliconflow_embedding_provider_posts_expected_payload(monkeypatch):
|
||||||
@@ -86,3 +89,141 @@ def test_collect_source_chunks_requires_attachment4_extraction(monkeypatch, tmp_
|
|||||||
|
|
||||||
with pytest.raises(RuntimeError, match="附件 4"):
|
with pytest.raises(RuntimeError, match="附件 4"):
|
||||||
collect_source_chunks(source_dir)
|
collect_source_chunks(source_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_collect_source_chunks_excludes_demo_agent_materials(monkeypatch, tmp_path):
|
||||||
|
source_dir = tmp_path / "sources"
|
||||||
|
source_dir.mkdir()
|
||||||
|
demo_dir = source_dir / "【模拟题二】试剂盒临床注册文件准备与审核Agent"
|
||||||
|
demo_dir.mkdir()
|
||||||
|
(demo_dir / "【模拟题二】试剂盒临床注册文件准备与审核Agent.md").write_text("题目材料", encoding="utf-8")
|
||||||
|
(source_dir / "【模拟题二】试剂盒临床注册文件准备与审核Agent.docx").write_bytes(b"demo")
|
||||||
|
real_source = source_dir / "附件 4 体外诊断试剂注册申报资料要求及说明.doc"
|
||||||
|
real_source.write_bytes(b"rule")
|
||||||
|
|
||||||
|
def fake_extract(path):
|
||||||
|
return "附件4 正文" if path == real_source else "不应被抽取"
|
||||||
|
|
||||||
|
monkeypatch.setattr("review_agent.regulatory_review.services.rag_index.extract_text_from_path", fake_extract)
|
||||||
|
|
||||||
|
chunks = collect_source_chunks(source_dir)
|
||||||
|
|
||||||
|
assert chunks
|
||||||
|
assert all("模拟题二" not in chunk.metadata["source"] for chunk in chunks)
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_chroma_index_reset_recreates_collection_without_deleting_index_dir(settings, monkeypatch, tmp_path):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
persist_path = tmp_path / "chroma"
|
||||||
|
persist_path.mkdir()
|
||||||
|
stale_file = persist_path / "chroma.sqlite3"
|
||||||
|
stale_file.write_text("stale", encoding="utf-8")
|
||||||
|
source_dir = tmp_path / "sources"
|
||||||
|
source_dir.mkdir()
|
||||||
|
(source_dir / "rule.md").write_text("注册检验报告要求", encoding="utf-8")
|
||||||
|
client_states = []
|
||||||
|
deleted_collections = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def upsert(self, **kwargs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
class FakeClient:
|
||||||
|
def __init__(self, path):
|
||||||
|
client_states.append({"path": path, "stale_exists": stale_file.exists()})
|
||||||
|
|
||||||
|
def delete_collection(self, name):
|
||||||
|
deleted_collections.append(name)
|
||||||
|
|
||||||
|
def get_or_create_collection(self, name):
|
||||||
|
return FakeCollection()
|
||||||
|
|
||||||
|
class FakeSharedSystemClient:
|
||||||
|
@staticmethod
|
||||||
|
def clear_system_cache():
|
||||||
|
client_states.append({"path": "cache-cleared", "stale_exists": stale_file.exists()})
|
||||||
|
|
||||||
|
monkeypatch.setitem(sys.modules, "chromadb", type("FakeChromaModule", (), {"PersistentClient": FakeClient}))
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"chromadb.api.shared_system_client",
|
||||||
|
type("FakeSharedSystemClientModule", (), {"SharedSystemClient": FakeSharedSystemClient}),
|
||||||
|
)
|
||||||
|
|
||||||
|
count = build_chroma_index(
|
||||||
|
source_dir=source_dir,
|
||||||
|
embedding_provider=lambda texts: [[0.1, 0.2] for _ in texts],
|
||||||
|
persist_path=persist_path,
|
||||||
|
collection_name="test",
|
||||||
|
reset=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert count == 1
|
||||||
|
assert client_states == [
|
||||||
|
{"path": str(persist_path), "stale_exists": True},
|
||||||
|
{"path": "cache-cleared", "stale_exists": True},
|
||||||
|
{"path": str(persist_path), "stale_exists": True},
|
||||||
|
]
|
||||||
|
assert stale_file.exists()
|
||||||
|
assert deleted_collections == ["test"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_chroma_index_reset_clears_bad_index_dir_after_chroma_cache_reset(settings, monkeypatch, tmp_path):
|
||||||
|
settings.MEDIA_ROOT = tmp_path
|
||||||
|
persist_path = tmp_path / "chroma"
|
||||||
|
persist_path.mkdir()
|
||||||
|
stale_file = persist_path / "chroma.sqlite3"
|
||||||
|
stale_file.write_text("stale", encoding="utf-8")
|
||||||
|
source_dir = tmp_path / "sources"
|
||||||
|
source_dir.mkdir()
|
||||||
|
(source_dir / "rule.md").write_text("注册检验报告要求", encoding="utf-8")
|
||||||
|
events = []
|
||||||
|
|
||||||
|
class FakeCollection:
|
||||||
|
def upsert(self, **kwargs):
|
||||||
|
return None
|
||||||
|
|
||||||
|
class BrokenThenFreshClient:
|
||||||
|
attempts = 0
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
BrokenThenFreshClient.attempts += 1
|
||||||
|
events.append(("client", BrokenThenFreshClient.attempts, stale_file.exists()))
|
||||||
|
if BrokenThenFreshClient.attempts == 1:
|
||||||
|
raise ValueError("Could not connect to tenant default_tenant")
|
||||||
|
|
||||||
|
def get_or_create_collection(self, name):
|
||||||
|
return FakeCollection()
|
||||||
|
|
||||||
|
class FakeSharedSystemClient:
|
||||||
|
@staticmethod
|
||||||
|
def clear_system_cache():
|
||||||
|
events.append(("clear_cache", stale_file.exists()))
|
||||||
|
|
||||||
|
fake_chromadb = type(
|
||||||
|
"FakeChromaModule",
|
||||||
|
(),
|
||||||
|
{"PersistentClient": BrokenThenFreshClient},
|
||||||
|
)
|
||||||
|
monkeypatch.setitem(sys.modules, "chromadb", fake_chromadb)
|
||||||
|
monkeypatch.setitem(
|
||||||
|
sys.modules,
|
||||||
|
"chromadb.api.shared_system_client",
|
||||||
|
type("FakeSharedSystemClientModule", (), {"SharedSystemClient": FakeSharedSystemClient}),
|
||||||
|
)
|
||||||
|
|
||||||
|
count = build_chroma_index(
|
||||||
|
source_dir=source_dir,
|
||||||
|
embedding_provider=lambda texts: [[0.1, 0.2] for _ in texts],
|
||||||
|
persist_path=persist_path,
|
||||||
|
collection_name="test",
|
||||||
|
reset=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert count == 1
|
||||||
|
assert events == [
|
||||||
|
("client", 1, True),
|
||||||
|
("clear_cache", True),
|
||||||
|
("client", 2, False),
|
||||||
|
]
|
||||||
|
assert not stale_file.exists()
|
||||||
|
|||||||
Reference in New Issue
Block a user