Compare commits
133 Commits
b96ab1303a
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 8f7d1482ed | |||
| 1d0b5338a8 | |||
| bb2fbe272f | |||
| ccd6e8ef4d | |||
| 64d09ec30f | |||
| 7def60f1b6 | |||
| 9c6cad481c | |||
| 1bf8634373 | |||
| 3bcf9647a1 | |||
| cf4f4456c4 | |||
| b728703e67 | |||
| 6d4b519f83 | |||
| dcd829e821 | |||
| dac8ce3c14 | |||
| f0286264e2 | |||
|
|
a060c23ba7 | ||
|
|
db0e94cf26 | ||
|
|
dce7045a46 | ||
| 8548b6d2b4 | |||
| 26e675e5d3 | |||
| 42187bf8e9 | |||
| 18548eb78f | |||
| 2b5093040d | |||
| d8cd95e590 | |||
| 681cb03eb9 | |||
| ccfa43645e | |||
| ef0a9ee13e | |||
| 2244b69d62 | |||
| 5ecf78c5d6 | |||
| e6fa738fd5 | |||
| 1f56247978 | |||
| 90144c42ac | |||
| f23e403eb8 | |||
| bd9b2e872e | |||
| be7fbab0a0 | |||
| 1a1b3ee9d4 | |||
| cbc7493df8 | |||
| 820069f558 | |||
| bdc1d58c22 | |||
| da81ce24d0 | |||
| 003ff59268 | |||
| d640ced748 | |||
| 30bdcdbc9c | |||
| 57f9181d58 | |||
| 0ccd69d3f4 | |||
| 13b543c99d | |||
| ac5cf8bf7e | |||
| 82c33e513f | |||
| 228f30a697 | |||
| 4ac9c04dbf | |||
| 9be10ef990 | |||
| f35a3ba9b4 | |||
| a48f778e09 | |||
| 72890783b3 | |||
| 8694f2d43e | |||
| e48d44f832 | |||
| 74cbe349a8 | |||
| f48277aea7 | |||
| 56225f40d9 | |||
| 3e8720e521 | |||
| 32d258bb75 | |||
| 0f9fb980f2 | |||
| 9e27c4c684 | |||
| 1b4a10b5ba | |||
| 911e5378e8 | |||
| 8f16675a92 | |||
| 945669b9c2 | |||
| a34684e490 | |||
| 72f18167c5 | |||
| b8d711729d | |||
| f46d9c5be6 | |||
| 462d3ec5f5 | |||
| 12b476a8ef | |||
| 48d94884b9 | |||
| 4e46f27c28 | |||
| d39e3fe2d5 | |||
| d88d642f6a | |||
| 1bdc7322cf | |||
| bbd2d3532a | |||
| bd805203f1 | |||
| 4c28466fe4 | |||
| ec89e62661 | |||
| 44d31d2a14 | |||
| 26490f7c46 | |||
| 2a4dd6cfab | |||
| f52dcc197d | |||
| 665403735a | |||
| f179749cfb | |||
| e58da66853 | |||
| df3f393dd2 | |||
| 0fca20756b | |||
| 3c6ec67371 | |||
| 7e561ea213 | |||
| daa0642142 | |||
| c78ff3a1fd | |||
| 460d418921 | |||
| 54c37edf19 | |||
| fa77c68d77 | |||
| 47b5ad1054 | |||
| fd88ff4652 | |||
| b1a336d019 | |||
| 311eb1b129 | |||
| 77db0d978a | |||
| 684682f86d | |||
| a917a18ca1 | |||
| 61bd31790b | |||
| 18d045d487 | |||
| 51e7c0c007 | |||
| eb87d9040d | |||
| 855afcdee3 | |||
| e8c2a591fe | |||
| 1056bf62d9 | |||
| 0de6f6b2ff | |||
| 43196f79e6 | |||
| 322c161818 | |||
| ccfe5eb667 | |||
| f68b44f325 | |||
| f7e0d8e4d8 | |||
| c57ab2f194 | |||
| 81f17319ff | |||
| c2b3a3b4f7 | |||
| 905067277a | |||
| df45a89eb1 | |||
| ba3f5fc584 | |||
| 5c9718ddb1 | |||
| 4a831ee2c5 | |||
| 7a6c110103 | |||
| 35b80929b0 | |||
| 6291940734 | |||
| b5ed5b6faa | |||
| e24d9804ba | |||
| d4a236d0db | |||
| 569542bdea |
10
.env
10
.env
@@ -6,7 +6,9 @@ DJANGO_ALLOWED_HOSTS=*
|
|||||||
LLM_PROVIDER=openai_compatible
|
LLM_PROVIDER=openai_compatible
|
||||||
LLM_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
LLM_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
||||||
LLM_BASE_URL=https://api.siliconflow.cn/v1
|
LLM_BASE_URL=https://api.siliconflow.cn/v1
|
||||||
LLM_MODEL=Qwen/Qwen2.5-7B-Instruct
|
LLM_MODEL=deepseek-ai/DeepSeek-V4-Pro
|
||||||
|
SILICONFLOW_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B
|
||||||
|
SILICONFLOW_EMBEDDING_DIMENSIONS=4096
|
||||||
|
|
||||||
# SiliconFlow embedding model for RAG
|
# SiliconFlow embedding model for RAG
|
||||||
EMBEDDING_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
EMBEDDING_API_KEY=sk-pgvkjondmmrlyxmrfhotgpuirgbtgzrpjpweorhwruflxmxw
|
||||||
@@ -17,3 +19,9 @@ SCENARIO_CONFIG_DIR=configs
|
|||||||
GOVERNANCE_CONFIG_PATH=configs/governance.yaml
|
GOVERNANCE_CONFIG_PATH=configs/governance.yaml
|
||||||
UPLOAD_ROOT=data/uploads
|
UPLOAD_ROOT=data/uploads
|
||||||
CHROMA_PATH=data/chroma
|
CHROMA_PATH=data/chroma
|
||||||
|
FEISHU_NOTIFY_ENABLED=true
|
||||||
|
FEISHU_APP_ID=cli_aaafcc59f4b85bc2
|
||||||
|
FEISHU_APP_SECRET=OO8GKpjqTO3bHAUwCiSmRgW4FqsNB5Qa
|
||||||
|
FEISHU_DEFAULT_USER_OPEN_ID=ou_a6015773781a117eb7d8995efa5e4590
|
||||||
|
FEISHU_DEFAULT_TARGET_NAME=bruce
|
||||||
|
PUBLIC_BASE_URL=http://127.0.0.1:8000
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,4 +6,5 @@ db.sqlite3
|
|||||||
staticfiles/
|
staticfiles/
|
||||||
media/
|
media/
|
||||||
.pytest_cache/
|
.pytest_cache/
|
||||||
|
.tmp/
|
||||||
.idea/
|
.idea/
|
||||||
|
|||||||
65
AGENTS.md
Normal file
65
AGENTS.md
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# Agent Collaboration Guide
|
||||||
|
|
||||||
|
This guide is for Codex or other coding agents working in this repository.
|
||||||
|
|
||||||
|
## Project Summary
|
||||||
|
|
||||||
|
DEMO-AGENT V2 is a Django application for IVD registration document review. The main app is `review_agent`, with workflow modules for file summaries, regulatory review, application form filling, regulatory information package generation, knowledge-base management, and Feishu notification/question handling.
|
||||||
|
|
||||||
|
The current `master` branch is intended to match `V2`.
|
||||||
|
|
||||||
|
## Important Paths
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
| --- | --- |
|
||||||
|
| `config/settings.py` | Django settings and environment loading |
|
||||||
|
| `config/urls.py` | Page routes and included API routes |
|
||||||
|
| `review_agent/models.py` | Shared Django models |
|
||||||
|
| `review_agent/urls.py` | Review-agent API routes |
|
||||||
|
| `review_agent/file_summary/` | Attachment handling, file inventory, page count, exports |
|
||||||
|
| `review_agent/regulatory_review/` | NMPA review workflow, rules, RAG, risk and issue review |
|
||||||
|
| `review_agent/application_form_fill/` | Application form field extraction and Word filling |
|
||||||
|
| `review_agent/regulatory_info_package/` | Chapter 1 regulatory information package generation |
|
||||||
|
| `review_agent/notifications/` | Notification dispatch and Feishu adapters |
|
||||||
|
| `templates/` | Django templates |
|
||||||
|
| `static/` | Frontend CSS and JavaScript |
|
||||||
|
| `docs/` | Requirements, designs, plans, source materials |
|
||||||
|
| `tests/` | pytest suite |
|
||||||
|
|
||||||
|
## Development Rules
|
||||||
|
|
||||||
|
- Prefer the existing Django patterns in `review_agent` before introducing new abstractions.
|
||||||
|
- Keep workflow modules independent. Do not fold regulatory package, application form fill, or regulatory review logic into unrelated modules.
|
||||||
|
- Preserve user data and generated artifacts. Do not delete `media/`, `.tmp/`, `db.sqlite3`, or `.env` unless explicitly asked.
|
||||||
|
- Treat `.env` as environment-specific configuration. It is currently tracked because this project needs a complete V2 state, but do not print secret values in logs or docs.
|
||||||
|
- For Word/PDF/Excel handling, use structured libraries already in the project instead of ad hoc text parsing when possible.
|
||||||
|
- For frontend work, keep the current workbench style: restrained, task-focused, evidence-first, and consistent with existing templates and CSS.
|
||||||
|
|
||||||
|
## Common Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
python manage.py migrate
|
||||||
|
python manage.py runserver
|
||||||
|
pytest
|
||||||
|
pytest tests -k regulatory_info_package
|
||||||
|
pytest tests/test_feishu_*.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verification Notes
|
||||||
|
|
||||||
|
Before claiming a code change is complete, run at least the narrow test set for the touched workflow. For broad changes, run `python manage.py check` and `pytest`.
|
||||||
|
|
||||||
|
Known current state:
|
||||||
|
|
||||||
|
- `python manage.py check` passes.
|
||||||
|
- `pytest tests -k regulatory_info_package` passes.
|
||||||
|
- Full `pytest` may still include a few historical failures unrelated to the latest regulatory-info-package merge; report exact failures if they remain.
|
||||||
|
|
||||||
|
## Git Notes
|
||||||
|
|
||||||
|
- Check `git status --short --branch` before editing.
|
||||||
|
- Do not reset or revert user changes unless explicitly asked.
|
||||||
|
- Keep commits grouped by logical concern: docs, feature behavior, tests, cleanup.
|
||||||
|
- When merging `V2` and `master`, remember these histories were unrelated before the merge. Prefer preserving the V2 tree when the goal is to keep `master` as the complete V2 state.
|
||||||
|
|
||||||
55
PRODUCT.md
Normal file
55
PRODUCT.md
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# Product
|
||||||
|
|
||||||
|
## Product Name
|
||||||
|
|
||||||
|
DEMO-AGENT V2
|
||||||
|
|
||||||
|
## Users
|
||||||
|
|
||||||
|
注册资料准备人员、法规审核人员、项目管理人员和演示评审人员。用户通常需要在资料量大、文件格式复杂、法规要求多、证据链容易断裂的情况下快速完成资料整理、核查、整改和申报文件准备。
|
||||||
|
|
||||||
|
## Product Purpose
|
||||||
|
|
||||||
|
DEMO-AGENT V2 是一个体外诊断试剂注册资料审核工作台。它把上传资料、文件汇总、法规规则核查、RAG 依据检索、风险预警、整改复核、申报表填充和第 1 章监管信息材料包生成组织成可追溯的工作流。
|
||||||
|
|
||||||
|
产品目标不是替代法规负责人作最终判断,而是把机械整理、跨文件检索、字段预填、问题归类和证据追溯做扎实,让负责人把精力放在判断和确认上。
|
||||||
|
|
||||||
|
## Core Workflows
|
||||||
|
|
||||||
|
| 工作流 | 目标产物 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件汇总 | 文件目录、页数、类型、批次状态、Markdown/Excel 导出 |
|
||||||
|
| 法规核查 | 缺失项、风险项、一致性问题、整改建议、复核记录 |
|
||||||
|
| 知识库管理 | 用户资料索引、内置法规资料检索、引用片段 |
|
||||||
|
| 申报表填充 | 预填申报表、字段来源、冲突和缺失提示 |
|
||||||
|
| 第 1 章监管信息材料包 | CH1.2、CH1.4、CH1.5、CH1.11 等 docx 文件和 zip |
|
||||||
|
| 飞书通知与问答 | 批次完成通知、问题模拟查询、系统入口链接 |
|
||||||
|
|
||||||
|
## Brand Personality
|
||||||
|
|
||||||
|
克制、可信、清晰。界面应服务审核任务,优先呈现状态、证据和下一步动作。
|
||||||
|
|
||||||
|
## Anti-References
|
||||||
|
|
||||||
|
避免营销页式大标题、装饰性卡片堆叠、过度动画、过亮渐变和不必要的视觉噪声。不要把审核工作台做成展示型官网,也不要隐藏关键状态或证据来源。
|
||||||
|
|
||||||
|
## Design Principles
|
||||||
|
|
||||||
|
- 证据优先:每个结论都应能回到来源文件、规则或检索片段。
|
||||||
|
- 状态清楚:批次、节点、风险、异常和导出结果要一眼可辨。
|
||||||
|
- 操作克制:页面提供必要动作,不把审核工作做成复杂后台。
|
||||||
|
- 人工确认:系统负责预处理和提示,法规负责人保留最终确认权。
|
||||||
|
- 可追溯:导出文件、消息、节点事件和问题状态都应能回到批次。
|
||||||
|
- 复用现有模式:新增页面沿用当前工作台导航、面板、表格和按钮体系。
|
||||||
|
|
||||||
|
## Accessibility & Inclusion
|
||||||
|
|
||||||
|
默认按 WCAG AA 方向处理对比度、键盘可访问和清晰标签。动效仅用于状态反馈,并尊重减少动态效果需求。
|
||||||
|
|
||||||
|
## Operational Boundaries
|
||||||
|
|
||||||
|
- `.env` 可用于本地和演示环境,但包含密钥时应限制分发范围。
|
||||||
|
- LLM、飞书、Word COM、7z、RAG 索引等外部能力必须允许 mock 或降级。
|
||||||
|
- 生成的申报和监管信息文件是预生成结果,需要人工复核后再用于正式申报。
|
||||||
|
- 默认存储使用 SQLite 和本地 `media/`,生产环境应迁移到持久化卷和受控备份。
|
||||||
|
|
||||||
113
README.md
113
README.md
@@ -1,6 +1,49 @@
|
|||||||
# DEMO-AGENT V2
|
# DEMO-AGENT V2
|
||||||
|
|
||||||
V2 是一个重置后的最小 Django 项目,仅保留基础配置和登录页面。
|
DEMO-AGENT V2 是一个面向体外诊断试剂注册资料准备与审核的 Django 工作台。系统把资料上传、文件目录汇总、法规核查、知识库检索、风险提示、整改复核、申报表自动填充和第 1 章监管信息材料包生成组织到同一个可追溯的审核流程中。
|
||||||
|
|
||||||
|
当前 `master` 已与 `V2` 内容对齐,是项目主线。
|
||||||
|
|
||||||
|
## 核心能力
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 审核工作台 | 登录后进入首页,查看对话、附件、知识库、批次和处理状态 |
|
||||||
|
| 对话式工作流 | 在 `/chat/` 中围绕当前对话上传资料、触发汇总、法规核查和生成任务 |
|
||||||
|
| 文件汇总 | 读取 PDF、Word、Excel、PowerPoint、压缩包等资料,生成目录、页数、类型和导出结果 |
|
||||||
|
| NMPA 法规核查 | 基于规则、文本抽取、RAG 检索和 LLM 复核生成问题、风险和整改建议 |
|
||||||
|
| 知识库管理 | 上传管理资料、重建索引、检索引用片段,并过滤已停用或删除文档 |
|
||||||
|
| 申报表填充 | 从说明书和资料中抽取关键字段,生成预填申报表和追溯结果 |
|
||||||
|
| 第 1 章监管信息材料包 | 生成 CH1.2、CH1.4、CH1.5、CH1.11 等监管信息文件和 zip 产物 |
|
||||||
|
| 飞书通知与问答 | 支持企业自建应用消息通知,并预留飞书问答模拟命令 |
|
||||||
|
|
||||||
|
## 页面入口
|
||||||
|
|
||||||
|
| 页面 | 路径 |
|
||||||
|
| --- | --- |
|
||||||
|
| 登录页 | `http://127.0.0.1:8000/login/` |
|
||||||
|
| 首页 | `http://127.0.0.1:8000/` |
|
||||||
|
| 审核智能体 | `http://127.0.0.1:8000/chat/` |
|
||||||
|
| 知识库管理 | `http://127.0.0.1:8000/knowledge-base/` |
|
||||||
|
| 附件管理 | `http://127.0.0.1:8000/attachments/` |
|
||||||
|
| 管理后台 | `http://127.0.0.1:8000/admin/` |
|
||||||
|
|
||||||
|
## 项目结构
|
||||||
|
|
||||||
|
```text
|
||||||
|
config/ Django 配置和总路由
|
||||||
|
review_agent/ 核心业务应用
|
||||||
|
application_form_fill/ 申报表自动填充
|
||||||
|
file_summary/ 文件汇总、附件和导出
|
||||||
|
regulatory_review/ 法规核查与整改复核
|
||||||
|
regulatory_info_package/ 第 1 章监管信息材料包生成
|
||||||
|
notifications/ 飞书通知和消息适配
|
||||||
|
feishu_questions/ 飞书问答预留能力
|
||||||
|
static/ 前端脚本和样式
|
||||||
|
templates/ Django 模板
|
||||||
|
docs/ 需求、设计、开发计划和原始材料
|
||||||
|
tests/ pytest 测试
|
||||||
|
```
|
||||||
|
|
||||||
## 本地运行
|
## 本地运行
|
||||||
|
|
||||||
@@ -13,8 +56,68 @@ python manage.py createsuperuser
|
|||||||
python manage.py runserver
|
python manage.py runserver
|
||||||
```
|
```
|
||||||
|
|
||||||
访问:
|
项目会自动读取仓库根目录 `.env`。当前仓库保留了 V2 的 `.env` 文件;后续如果要面向外部协作,请先确认其中没有不应公开的密钥。
|
||||||
|
|
||||||
|
## 常用环境变量
|
||||||
|
|
||||||
|
| 变量名 | 用途 |
|
||||||
|
| --- | --- |
|
||||||
|
| `DJANGO_SECRET_KEY` | Django secret key |
|
||||||
|
| `DJANGO_DEBUG` | 是否开启调试模式 |
|
||||||
|
| `DJANGO_ALLOWED_HOSTS` | 允许访问的主机列表 |
|
||||||
|
| `LLM_PROVIDER` | LLM provider 选择 |
|
||||||
|
| `LLM_API_KEY` | LLM API key |
|
||||||
|
| `LLM_BASE_URL` | OpenAI 兼容 LLM API 地址 |
|
||||||
|
| `LLM_MODEL` | 默认对话/抽取模型 |
|
||||||
|
| `SILICONFLOW_API_KEY` | SiliconFlow API key,默认可复用 `LLM_API_KEY` |
|
||||||
|
| `SILICONFLOW_EMBEDDING_MODEL` | 法规 RAG 使用的 embedding 模型 |
|
||||||
|
| `SILICONFLOW_EMBEDDING_DIMENSIONS` | embedding 维度 |
|
||||||
|
| `REGULATORY_RAG_CHROMA_PATH` | 法规 RAG Chroma 存储路径 |
|
||||||
|
| `REGULATORY_RAG_COLLECTION` | 法规 RAG collection 名称 |
|
||||||
|
| `FEISHU_NOTIFY_ENABLED` | 是否启用真实飞书通知 |
|
||||||
|
| `FEISHU_APP_ID` | 飞书应用 App ID |
|
||||||
|
| `FEISHU_APP_SECRET` | 飞书应用 App Secret |
|
||||||
|
| `FEISHU_DEFAULT_USER_OPEN_ID` | 默认飞书接收人 open_id |
|
||||||
|
| `PUBLIC_BASE_URL` | 飞书消息中的系统入口根地址 |
|
||||||
|
|
||||||
|
## 外部依赖
|
||||||
|
|
||||||
|
Python 依赖见 `requirements.txt`,主要包括:
|
||||||
|
|
||||||
|
- Django
|
||||||
|
- PyYAML
|
||||||
|
- httpx
|
||||||
|
- chromadb
|
||||||
|
- pypdf
|
||||||
|
- python-docx
|
||||||
|
- python-pptx
|
||||||
|
- openpyxl / xlrd
|
||||||
|
- py7zr
|
||||||
|
- playwright
|
||||||
|
|
||||||
|
文件汇总支持 `.7z` 和 `.rar` 时,运行环境还需要可用的 `7z`/`p7zip` 命令。LibreOffice 不是必需依赖,仅作为后续增强老格式文档处理能力的可选项。
|
||||||
|
|
||||||
|
## 常用命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
pytest tests -k regulatory_info_package
|
||||||
|
pytest tests/test_feishu_*.py
|
||||||
|
python manage.py send_test_feishu_notification --username owner
|
||||||
|
python manage.py feishu_question_simulate --username owner "查最新法规核查"
|
||||||
|
```
|
||||||
|
|
||||||
|
已知情况:当前全量 `pytest` 中仍有少量历史测试与当前页面/LLM 调用策略不完全一致;监管信息材料包主链路测试已通过。
|
||||||
|
|
||||||
|
## 文档入口
|
||||||
|
|
||||||
|
- [产品说明](PRODUCT.md)
|
||||||
|
- [Agent 协作约定](AGENTS.md)
|
||||||
|
- [docs 文档索引](docs/README.md)
|
||||||
|
- [需求分析](docs/1.需求分析)
|
||||||
|
- [功能设计](docs/2.功能设计)
|
||||||
|
- [数据库设计](docs/3.数据库设计)
|
||||||
|
- [详细设计](docs/4.详细设计)
|
||||||
|
- [开发计划](docs/5.开发计划)
|
||||||
|
|
||||||
- 登录页:http://127.0.0.1:8000/login/
|
|
||||||
- 首页:http://127.0.0.1:8000/
|
|
||||||
- 管理后台:http://127.0.0.1:8000/admin/
|
|
||||||
|
|||||||
@@ -92,6 +92,8 @@ USE_TZ = True
|
|||||||
|
|
||||||
STATIC_URL = "static/"
|
STATIC_URL = "static/"
|
||||||
STATICFILES_DIRS = [BASE_DIR / "static"]
|
STATICFILES_DIRS = [BASE_DIR / "static"]
|
||||||
|
MEDIA_ROOT = BASE_DIR / "media"
|
||||||
|
MEDIA_URL = "media/"
|
||||||
|
|
||||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||||
|
|
||||||
@@ -102,3 +104,76 @@ LOGOUT_REDIRECT_URL = "login"
|
|||||||
LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
|
LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
|
||||||
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
|
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||||
LLM_MODEL = os.environ.get("LLM_MODEL", "")
|
LLM_MODEL = os.environ.get("LLM_MODEL", "")
|
||||||
|
|
||||||
|
REGULATORY_RAG_PROVIDER = os.environ.get("REGULATORY_RAG_PROVIDER", "siliconflow")
|
||||||
|
REGULATORY_RAG_CHROMA_PATH = os.environ.get(
|
||||||
|
"REGULATORY_RAG_CHROMA_PATH",
|
||||||
|
str(MEDIA_ROOT / "regulatory_review" / "rag" / "chroma"),
|
||||||
|
)
|
||||||
|
REGULATORY_RAG_COLLECTION = os.environ.get(
|
||||||
|
"REGULATORY_RAG_COLLECTION",
|
||||||
|
"nmpa_ivd_registration_v1",
|
||||||
|
)
|
||||||
|
REGULATORY_REVIEW_ASYNC = os.environ.get("REGULATORY_REVIEW_ASYNC", "true").lower() == "true"
|
||||||
|
REGULATORY_LLM_REVIEW_MAX_ATTEMPTS = int(os.environ.get("REGULATORY_LLM_REVIEW_MAX_ATTEMPTS", "3"))
|
||||||
|
REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_RETRY_DELAY_SECONDS", "0.5"))
|
||||||
|
REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS = float(os.environ.get("REGULATORY_LLM_REVIEW_TIMEOUT_SECONDS", "15"))
|
||||||
|
SILICONFLOW_BASE_URL = os.environ.get("SILICONFLOW_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||||
|
SILICONFLOW_API_KEY = os.environ.get("SILICONFLOW_API_KEY", LLM_API_KEY)
|
||||||
|
SILICONFLOW_EMBEDDING_MODEL = os.environ.get(
|
||||||
|
"SILICONFLOW_EMBEDDING_MODEL",
|
||||||
|
"Qwen/Qwen3-Embedding-4B",
|
||||||
|
)
|
||||||
|
SILICONFLOW_EMBEDDING_DIMENSIONS = int(os.environ.get("SILICONFLOW_EMBEDDING_DIMENSIONS", "1024"))
|
||||||
|
|
||||||
|
FEISHU_NOTIFY_ENABLED = os.environ.get("FEISHU_NOTIFY_ENABLED", "false").lower() == "true"
|
||||||
|
FEISHU_NOTIFY_CHANNEL = os.environ.get("FEISHU_NOTIFY_CHANNEL", "feishu_api")
|
||||||
|
FEISHU_APP_ID = os.environ.get("FEISHU_APP_ID", "")
|
||||||
|
FEISHU_APP_SECRET = os.environ.get("FEISHU_APP_SECRET", "")
|
||||||
|
FEISHU_DEFAULT_USER_OPEN_ID = os.environ.get("FEISHU_DEFAULT_USER_OPEN_ID", "")
|
||||||
|
FEISHU_DEFAULT_USER_ID = os.environ.get("FEISHU_DEFAULT_USER_ID", "")
|
||||||
|
FEISHU_DEFAULT_TARGET_NAME = os.environ.get("FEISHU_DEFAULT_TARGET_NAME", "默认飞书接收人")
|
||||||
|
FEISHU_TENANT_TOKEN_CACHE_SECONDS = int(os.environ.get("FEISHU_TENANT_TOKEN_CACHE_SECONDS", "6600"))
|
||||||
|
FEISHU_TOKEN_API_URL = os.environ.get(
|
||||||
|
"FEISHU_TOKEN_API_URL",
|
||||||
|
"https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal",
|
||||||
|
)
|
||||||
|
FEISHU_MESSAGE_API_URL = os.environ.get(
|
||||||
|
"FEISHU_MESSAGE_API_URL",
|
||||||
|
"https://open.feishu.cn/open-apis/im/v1/messages",
|
||||||
|
)
|
||||||
|
PUBLIC_BASE_URL = os.environ.get("PUBLIC_BASE_URL", "http://127.0.0.1:8000")
|
||||||
|
|
||||||
|
LOGGING = {
|
||||||
|
"version": 1,
|
||||||
|
"disable_existing_loggers": False,
|
||||||
|
"filters": {
|
||||||
|
"suppress_workflow_status_poll": {
|
||||||
|
"()": "review_agent.logging_filters.SuppressWorkflowStatusPollFilter",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"handlers": {
|
||||||
|
"console": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"formatter": "verbose",
|
||||||
|
"filters": ["suppress_workflow_status_poll"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"formatters": {
|
||||||
|
"verbose": {
|
||||||
|
"format": "%(asctime)s %(levelname)s %(name)s %(message)s",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"loggers": {
|
||||||
|
"review_agent": {
|
||||||
|
"handlers": ["console"],
|
||||||
|
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
|
||||||
|
"propagate": True,
|
||||||
|
},
|
||||||
|
"django.server": {
|
||||||
|
"handlers": ["console"],
|
||||||
|
"level": "INFO",
|
||||||
|
"propagate": False,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,11 +1,15 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
|
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
|
||||||
from django.urls import path
|
from django.urls import include, path
|
||||||
|
|
||||||
from review_agent.views import stream_chat, workspace
|
from review_agent.views import attachment_manager, home_dashboard, knowledge_base_manager, stream_chat, workspace
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path("", workspace, name="home"),
|
path("", home_dashboard, name="home"),
|
||||||
|
path("chat/", workspace, name="chat"),
|
||||||
|
path("knowledge-base/", knowledge_base_manager, name="knowledge_base_manager"),
|
||||||
|
path("attachments/", attachment_manager, name="attachment_manager"),
|
||||||
|
path("", include("review_agent.urls")),
|
||||||
path("chat/stream/", stream_chat, name="chat_stream"),
|
path("chat/stream/", stream_chat, name="chat_stream"),
|
||||||
path(
|
path(
|
||||||
"login/",
|
"login/",
|
||||||
|
|||||||
BIN
docs/0.原始材料/目标产品说明书.docx
Normal file
BIN
docs/0.原始材料/目标产品说明书.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息.rar
Normal file
BIN
docs/0.原始材料/第1章 监管信息.rar
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.1 符合标准的清单.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.1 符合标准的清单.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.5 真实性声明.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.5 真实性声明.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.6 符合性声明.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.11.6 符合性声明.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.2 监管信息目录.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.2 监管信息目录.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.4 申请表.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.4 申请表.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.5 产品列表.docx
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.5 产品列表.docx
Normal file
Binary file not shown.
BIN
docs/0.原始材料/第1章 监管信息/CH1.9 产品申报前沟通的说明.doc
Normal file
BIN
docs/0.原始材料/第1章 监管信息/CH1.9 产品申报前沟通的说明.doc
Normal file
Binary file not shown.
BIN
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
Normal file
BIN
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
Normal file
Binary file not shown.
394
docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md
Normal file
394
docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,394 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表需求分析
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始材料 | docs/原始材料/【模拟题二】试剂盒临床注册文件准备与审核Agent.docx |
|
||||||
|
| 法规模板来源 | docs/原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 |
|
||||||
|
| 功能主题 | 从产品文件中提取关键信息并自动填写至指定申报模板 |
|
||||||
|
| 分析日期 | 2026-06-07 |
|
||||||
|
| 分析版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、需求背景
|
||||||
|
|
||||||
|
试剂盒及体外诊断试剂注册申报过程中,注册人员需要将同一批产品关键信息重复填写到注册证格式文件、变更注册或备案文件、安全和性能基本原则清单等申报材料中。人工复制粘贴容易出现字段遗漏、表述不一致、来源不可追溯和模板误改等问题。
|
||||||
|
|
||||||
|
原始任务中的第 3 条能力要求系统能够“从产品文件中提取关键信息并自动填写至目标文件”。本功能目标是:系统基于用户上传的产品说明书、产品技术要求、检测报告、性能研究资料等文件,自动抽取产品名称、检测靶标、适用范围、储存条件、性能指标等核心信息,复制指定法规模板生成可填写副本,将抽取结果写入模板,并输出 Word 与 PDF 两种下载文件。
|
||||||
|
|
||||||
|
本功能是前两批能力的后续增强:依赖第一批文件汇总结果定位产品文件,复用第二批文本抽取、适用条件确认和一致性核查能力,同时新增“模板识别、字段映射、模板填充、冲突高亮、PDF 转换、来源追溯”能力。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、需求范围
|
||||||
|
|
||||||
|
### 2.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 目标模板复制 | 从原始法规资料中复制指定模板,不覆盖原始文件 |
|
||||||
|
| 2 | 注册类型选择 | 首次注册填写注册证格式;变更注册或备案填写变更注册(备案)文件格式 |
|
||||||
|
| 3 | 安全和性能基本原则清单填写 | 无论首次注册或变更注册,均生成并填写安全和性能基本原则清单 |
|
||||||
|
| 4 | 产品信息提取 | 从产品说明书、产品技术要求、检测报告、性能研究资料等文件中抽取模板所需字段 |
|
||||||
|
| 5 | 模板字段识别 | 读取目标模板中的表格、段落、占位栏位和清单条目,建立字段映射 |
|
||||||
|
| 6 | 自动填入模板 | 将抽取字段写入模板副本,缺失字段保持留空 |
|
||||||
|
| 7 | 冲突标记 | 同一字段在多个文件中不一致时,按说明书为准填写,并在模板中黄色底色、红色字体标记 |
|
||||||
|
| 8 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和待用户下载确认提示 |
|
||||||
|
| 9 | Word 导出 | 输出填好的 `.docx` 或可编辑 Word 文件 |
|
||||||
|
| 10 | PDF 导出 | 将填好的 Word 转换为 PDF,尽量保持原 Word 模板版式一致,可用于正式提交前预览 |
|
||||||
|
| 11 | 来源追溯 | 允许额外输出字段来源追溯清单,记录字段来源文件、文本片段、冲突状态和填入目标 |
|
||||||
|
|
||||||
|
### 2.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 直接覆盖原始法规模板 | 原始材料只作为模板来源,不允许被改写 |
|
||||||
|
| 2 | 自动代替人工最终确认 | 系统生成带标记文件,用户自行下载核对确认 |
|
||||||
|
| 3 | 在线提交 NMPA 系统 | 本期只生成申报文件,不对接外部申报系统 |
|
||||||
|
| 4 | 全部法规表单覆盖 | 本期仅覆盖用户指定的三个目标模板 |
|
||||||
|
| 5 | 复杂版式人工校订 | 系统尽量保持模板版式,复杂错位仍需人工最终复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、目标模板
|
||||||
|
|
||||||
|
本期一共处理三个目标模板。用户此前重复提到“体外诊断试剂安全和性能基本原则清单”,经确认属于误填,实际只有一个该清单模板。
|
||||||
|
|
||||||
|
| 序号 | 模板名称 | 原始文件 | 使用条件 | 输出要求 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| 1 | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式) | 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx | 首次注册 | Word + PDF |
|
||||||
|
| 2 | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式) | 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc | 变更注册或备案 | Word + PDF |
|
||||||
|
| 3 | 体外诊断试剂安全和性能基本原则清单 | 体外诊断试剂安全和性能基本原则清单.doc | 首次注册、变更注册、备案均适用 | Word + PDF |
|
||||||
|
|
||||||
|
### 3.1 已识别注册证模板字段
|
||||||
|
|
||||||
|
从 `中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx` 中已识别到以下表格栏目:
|
||||||
|
|
||||||
|
| 字段 | 填写规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 注册人名称 | 从申请人、注册人、企业信息类文件中抽取 |
|
||||||
|
| 注册人住所 | 从申请人、注册人、企业信息类文件中抽取 |
|
||||||
|
| 生产地址 | 从注册资料、说明书、质量体系或生产信息文件中抽取 |
|
||||||
|
| 代理人名称 | 进口体外诊断试剂适用,境内产品可留空 |
|
||||||
|
| 代理人住所 | 进口体外诊断试剂适用,境内产品可留空 |
|
||||||
|
| 产品名称 | 优先取说明书字段 |
|
||||||
|
| 包装规格 | 对应型号规格、包装规格 |
|
||||||
|
| 主要组成成分 | 优先取说明书和产品技术要求 |
|
||||||
|
| 预期用途 | 对应适用范围、预期用途 |
|
||||||
|
| 产品储存条件及有效期 | 对应储存条件、有效期 |
|
||||||
|
| 附件 | 默认包含产品技术要求、说明书,可根据实际文件匹配补充 |
|
||||||
|
| 其他内容 | 未识别或需人工确认时留空 |
|
||||||
|
| 备注 | 未识别或需人工确认时留空 |
|
||||||
|
|
||||||
|
### 3.2 模板解析约束
|
||||||
|
|
||||||
|
变更注册(备案)文件格式和安全和性能基本原则清单当前为 `.doc` 格式。系统实施时需要支持以下任一方案:
|
||||||
|
|
||||||
|
| 方案 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| LibreOffice 转换 | 使用 LibreOffice/soffice 将 `.doc` 转为 `.docx` 后识别和填写 |
|
||||||
|
| 预转换模板 | 项目内预先保存经人工确认的 `.docx` 模板副本 |
|
||||||
|
| OOXML/COM 方案 | 在 Windows 环境通过 Office 自动化读取和转换模板 |
|
||||||
|
|
||||||
|
无论采用哪种方式,转换后的模板必须保留原文件表格结构、分页、字体和版式,PDF 导出需以填好的 Word 为来源。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、用户角色与使用场景
|
||||||
|
|
||||||
|
| 角色 | 诉求 | 典型场景 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册人员 | 减少重复填表,提高字段一致性 | 上传注册资料包后生成已填注册证格式和基本原则清单 |
|
||||||
|
| 变更注册负责人 | 根据变更类型生成变更注册或备案文件 | 上传变更资料后生成已填变更注册(备案)文件 |
|
||||||
|
| 审核人员 | 快速定位字段来源和冲突 | 下载带冲突高亮的 Word/PDF,并查看 AI 对话框冲突摘要 |
|
||||||
|
| 系统管理员 | 维护模板版本和转换能力 | 更新法规模板、检查 PDF 转换服务和导出记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、业务流程分析
|
||||||
|
|
||||||
|
### 5.1 主流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户上传产品注册资料
|
||||||
|
-> 系统执行文件目录与页数汇总
|
||||||
|
-> 系统执行法规核查前置文本抽取
|
||||||
|
-> 系统识别注册类型:首次注册、变更注册或备案
|
||||||
|
-> 系统选择本次适用目标模板
|
||||||
|
-> 系统复制原始模板到批次工作目录
|
||||||
|
-> 系统读取目标模板栏目和清单条目
|
||||||
|
-> 系统从产品文件中抽取模板所需字段
|
||||||
|
-> 系统按字段优先级合并抽取结果
|
||||||
|
-> 如字段存在跨文件冲突,系统按说明书为准填入,并做黄色底色、红色字体标记
|
||||||
|
-> 缺失字段保持留空
|
||||||
|
-> 系统逐条判断安全和性能基本原则清单的适用性、符合性证据和证明文件位置
|
||||||
|
-> 系统生成已填 Word 文件
|
||||||
|
-> 系统将已填 Word 转换为 PDF
|
||||||
|
-> 系统生成来源追溯清单
|
||||||
|
-> AI 对话框展示生成结果、冲突字段摘要和下载链接
|
||||||
|
-> 用户下载 Word/PDF 自行确认
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 注册类型分支
|
||||||
|
|
||||||
|
| 注册类型 | 生成文件 |
|
||||||
|
| --- | --- |
|
||||||
|
| 首次注册 | 注册证格式 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 变更注册 | 变更注册(备案)文件 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 备案 | 变更注册(备案)文件 Word/PDF;安全和性能基本原则清单 Word/PDF |
|
||||||
|
| 注册类型无法识别 | AI 对话框提示待确认;默认不生成注册证或变更文件,只可生成带待确认标记的草稿版本 |
|
||||||
|
|
||||||
|
### 5.3 异常流程
|
||||||
|
|
||||||
|
| 异常场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板文件不存在 | 批次标记失败,对话框提示缺少目标模板 |
|
||||||
|
| `.doc` 模板无法转换 | 对应模板导出失败,其他模板继续生成 |
|
||||||
|
| 字段未提取到 | 目标栏位留空,来源追溯清单记录为空 |
|
||||||
|
| 字段冲突 | 按说明书为准填入,模板内高亮标记,对话框展示冲突摘要 |
|
||||||
|
| PDF 转换失败 | 保留 Word 下载,提示 PDF 生成失败原因 |
|
||||||
|
| 模板版式明显错位 | 标记为需人工复核,不阻断 Word 文件下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、信息提取与字段规则
|
||||||
|
|
||||||
|
### 6.1 字段范围
|
||||||
|
|
||||||
|
字段范围不固定写死,应以三个目标模板的实际栏目和清单条目为准动态建立。Demo 阶段优先覆盖以下字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 产品标准名称 |
|
||||||
|
| 检测靶标 | 被检测物、基因、抗原、抗体、病原体或生物标志物 |
|
||||||
|
| 适用范围/预期用途 | 适用人群、样本类型、检测目的、临床用途 |
|
||||||
|
| 储存条件 | 温度、避光、防潮等保存条件 |
|
||||||
|
| 性能指标 | 分析灵敏度、特异性、重复性、准确度、检出限等 |
|
||||||
|
| 型号规格/包装规格 | 规格型号、包装规格、人份数或测试数 |
|
||||||
|
| 样本类型 | 血清、血浆、全血、咽拭子等 |
|
||||||
|
| 有效期 | 产品有效期或稳定性期限 |
|
||||||
|
| 主要组成成分 | 试剂、校准品、质控品、耗材等组成 |
|
||||||
|
| 检验原理 | 反应原理、方法学或检测平台 |
|
||||||
|
| 注册人/申请人 | 注册申请主体 |
|
||||||
|
| 生产地址 | 生产场所地址 |
|
||||||
|
|
||||||
|
### 6.2 来源文件优先级
|
||||||
|
|
||||||
|
| 优先级 | 文件类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 说明书 | 字段冲突时默认以说明书为准 |
|
||||||
|
| 2 | 产品技术要求 | 用于补充性能指标、检验方法、组成成分等字段 |
|
||||||
|
| 3 | 注册检验报告/检测报告 | 用于补充性能指标、样本信息、检验依据和结论 |
|
||||||
|
| 4 | 性能研究资料 | 用于补充安全和性能基本原则清单证据 |
|
||||||
|
| 5 | 其他注册资料 | 用于补充申请人、生产地址、附件清单等信息 |
|
||||||
|
|
||||||
|
### 6.3 冲突处理规则
|
||||||
|
|
||||||
|
| 场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书与其他文件字段不一致 | 按说明书值填入模板 |
|
||||||
|
| 多个非说明书文件不一致,说明书缺失 | 目标字段留空或取最高优先级来源,具体规则由实现阶段配置 |
|
||||||
|
| 字段被高亮标记 | 黄色底色、红色字体,提示用户下载后确认 |
|
||||||
|
| AI 对话框展示 | 展示字段名、采用值、冲突值、来源文件和目标模板 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、安全和性能基本原则清单填写规则
|
||||||
|
|
||||||
|
安全和性能基本原则清单不只填写基础产品信息,还需要根据产品文件内容逐条判断清单条目的适用性、符合性证据和证明文件位置。
|
||||||
|
|
||||||
|
| 填写项 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 适用/不适用 | 根据产品特性、检测方法、样本类型、是否含仪器/软件/灭菌/生物材料等信息判断 |
|
||||||
|
| 符合性说明 | 从产品技术要求、说明书、风险管理、性能研究、稳定性研究等文件中提取证据摘要 |
|
||||||
|
| 证明文件位置 | 填写证据文件名、章节、页码或可定位文本片段 |
|
||||||
|
| 无法判断 | 留空或标记待人工确认,来源追溯清单记录原因 |
|
||||||
|
| 冲突证据 | 如不同文件对同一条款适用性或证据描述冲突,保留高亮并在对话框列出 |
|
||||||
|
|
||||||
|
逐条判断结果需要可追溯,不能只输出“适用”或“不适用”结论。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、输出要求
|
||||||
|
|
||||||
|
### 8.1 文件命名
|
||||||
|
|
||||||
|
文件命名规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
批次号-产品名称-注册证格式.docx
|
||||||
|
批次号-产品名称-注册证格式.pdf
|
||||||
|
批次号-产品名称-变更注册备案文件.docx
|
||||||
|
批次号-产品名称-变更注册备案文件.pdf
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.docx
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.pdf
|
||||||
|
批次号-产品名称-字段来源追溯清单.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
产品名称为空时,可使用 `未识别产品名称` 作为文件名占位。
|
||||||
|
|
||||||
|
### 8.2 AI 对话框摘要
|
||||||
|
|
||||||
|
AI 对话框应展示生成结果、下载链接和冲突字段摘要。
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 下载 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 下载 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 Word 输出
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板副本 | 从原始法规模板复制生成,不覆盖原始文件 |
|
||||||
|
| 版式保持 | 保留原模板表格、段落、分页、字体和标题结构 |
|
||||||
|
| 冲突高亮 | 黄色底色、红色字体 |
|
||||||
|
| 缺失字段 | 留空,不填“待补充” |
|
||||||
|
| 可编辑 | 用户可下载后继续人工修改 |
|
||||||
|
|
||||||
|
### 8.4 PDF 输出
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 来源 | 由填好的 Word 转换生成 |
|
||||||
|
| 版式 | 尽量与原 Word 模板一致 |
|
||||||
|
| 用途 | 可作为正式提交前预览 |
|
||||||
|
| 失败处理 | PDF 失败不影响 Word 下载 |
|
||||||
|
|
||||||
|
### 8.5 来源追溯清单
|
||||||
|
|
||||||
|
来源追溯清单允许额外生成,建议至少包含:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标模板 | 字段填入哪个模板 |
|
||||||
|
| 目标栏位/条目 | 字段对应的表格栏位或清单条目 |
|
||||||
|
| 填入值 | 实际写入模板的值 |
|
||||||
|
| 来源文件 | 取值来源文件 |
|
||||||
|
| 来源片段 | 支撑取值的文本片段 |
|
||||||
|
| 是否冲突 | 是/否 |
|
||||||
|
| 冲突值 | 其他文件中的不同值 |
|
||||||
|
| 处理方式 | 采用说明书、留空、高亮、待人工确认等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、功能模块梳理
|
||||||
|
|
||||||
|
| 序号 | 功能名称 | 功能描述 | 优先级 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 1 | 模板管理 | 维护三个目标模板路径、版本和适用注册类型 | P0 |
|
||||||
|
| 2 | 模板副本生成 | 将原始模板复制到批次工作目录 | P0 |
|
||||||
|
| 3 | 模板结构识别 | 识别模板中的表格字段、段落占位、清单条目 | P0 |
|
||||||
|
| 4 | 产品字段抽取 | 从上传文件中抽取模板所需产品字段 | P0 |
|
||||||
|
| 5 | 字段合并与冲突检测 | 按说明书优先级合并字段,并识别跨文件冲突 | P0 |
|
||||||
|
| 6 | Word 模板填充 | 将字段写入 Word 模板副本 | P0 |
|
||||||
|
| 7 | 冲突高亮 | 对冲突字段应用黄色底色和红色字体 | P0 |
|
||||||
|
| 8 | 基本原则逐条判断 | 判断安全和性能条目的适用性、符合性证据和证明文件位置 | P0 |
|
||||||
|
| 9 | PDF 转换 | 将填好的 Word 转为 PDF | P0 |
|
||||||
|
| 10 | 下载链接生成 | 在 AI 对话框提供 Word/PDF 下载链接 | P0 |
|
||||||
|
| 11 | 来源追溯清单导出 | 输出字段来源、冲突和填入目标 | P1 |
|
||||||
|
| 12 | 版式 QA | 对 Word/PDF 版式进行自动或人工可见检查 | P1 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、数据实体分析
|
||||||
|
|
||||||
|
| 实体名称 | 字段说明 | 关联实体 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 自动填表批次 | 批次编号、用户、会话、注册类型、产品名称、状态、错误信息、创建时间、完成时间 | 文件汇总批次、法规核查批次 |
|
||||||
|
| 模板副本 | 模板名称、模板类型、原始模板路径、副本路径、模板版本、适用条件 | 自动填表批次 |
|
||||||
|
| 提取字段 | 字段名、填入值、来源文件、来源片段、来源优先级、是否冲突、冲突详情 | 自动填表批次 |
|
||||||
|
| 填表结果文件 | 文件类型、文件名、Word 路径、PDF 路径、下载状态 | 自动填表批次 |
|
||||||
|
| 清单条目判断 | 条目编号、条目内容、适用性、符合性证据、证明文件位置、判断来源 | 自动填表批次 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、非功能性需求
|
||||||
|
|
||||||
|
### 11.1 可追溯性
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字段来源可追溯 | 每个填入字段应能追溯到来源文件和文本片段 |
|
||||||
|
| 模板版本可追溯 | 每次生成记录原始模板文件名、版本和路径 |
|
||||||
|
| 冲突处理可追溯 | 冲突字段记录采用值、冲突值和处理规则 |
|
||||||
|
| 输出文件可追溯 | Word/PDF 文件关联批次、用户和会话 |
|
||||||
|
|
||||||
|
### 11.2 安全要求
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始模板保护 | 不允许覆盖或修改原始法规资料目录中的模板 |
|
||||||
|
| 下载权限 | Word/PDF/追溯清单仅允许当前会话授权用户下载 |
|
||||||
|
| 敏感信息保护 | 对话框只展示必要冲突摘要,不展示大段敏感原文 |
|
||||||
|
| 文件隔离 | 不同用户、不同批次的模板副本和导出文件隔离存储 |
|
||||||
|
|
||||||
|
### 11.3 版式要求
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| Word 版式 | 尽量保持原模板表格、字体、分页和段落结构 |
|
||||||
|
| PDF 版式 | 与填好后的 Word 一致,可用于正式提交前预览 |
|
||||||
|
| 高亮可见 | 冲突字段在 Word 和 PDF 中均应能被用户识别 |
|
||||||
|
| 缺失字段不污染模板 | 未提取字段留空,不填入系统提示语 |
|
||||||
|
|
||||||
|
### 11.4 性能要求
|
||||||
|
|
||||||
|
| 场景 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 小批次资料 | 50 个文件以内,应在 1 分钟内完成字段抽取和模板生成 |
|
||||||
|
| 中等批次资料 | 200 个文件以内支持后台异步处理和进度提示 |
|
||||||
|
| 单个模板失败 | 不影响其他适用模板生成 |
|
||||||
|
| 单个字段失败 | 不影响整份模板生成,字段留空并记录原因 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、待后续确认事项
|
||||||
|
|
||||||
|
| 序号 | 待确认项 | 当前建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | `.doc` 模板转换方案 | 优先使用 LibreOffice/soffice 转 docx;无法部署时预置人工确认版 docx 模板 |
|
||||||
|
| 2 | 变更注册(备案)文件字段清单 | 需在模板可解析后补充字段映射 |
|
||||||
|
| 3 | 安全和性能基本原则清单条目结构 | 需在模板可解析后拆解条目编号、要求、适用性和证据栏 |
|
||||||
|
| 4 | 说明书识别规则 | 需明确如何从上传资料中判定哪份文件是说明书 |
|
||||||
|
| 5 | PDF 转换质量标准 | 需明确是否要求逐页渲染检查、页数一致和关键表格不跨页错位 |
|
||||||
|
| 6 | 注册类型无法识别时是否允许生成草稿 | 建议允许生成安全和性能基本原则清单,注册证或变更文件等待确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 模板复制 | 系统生成模板副本,不修改原始法规模板 |
|
||||||
|
| 2 | 首次注册文件选择 | 首次注册场景生成注册证格式和安全和性能基本原则清单 |
|
||||||
|
| 3 | 变更注册/备案文件选择 | 变更注册或备案场景生成变更注册(备案)文件和安全和性能基本原则清单 |
|
||||||
|
| 4 | 字段自动填写 | 产品名称、预期用途、储存条件、包装规格等字段能自动写入对应栏目 |
|
||||||
|
| 5 | 缺失字段留空 | 未提取到的字段保持空白 |
|
||||||
|
| 6 | 冲突字段高亮 | 字段冲突时按说明书值填入,并在 Word/PDF 中黄色底色、红色字体标记 |
|
||||||
|
| 7 | 冲突摘要展示 | AI 对话框展示冲突字段、采用值、冲突来源和处理方式 |
|
||||||
|
| 8 | 基本原则清单判断 | 系统能逐条输出适用/不适用、符合性证据和证明文件位置 |
|
||||||
|
| 9 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
|
||||||
|
| 10 | PDF 下载 | 对话框提供由 Word 转换生成的 PDF 下载链接 |
|
||||||
|
| 11 | 来源追溯 | 可导出字段来源追溯清单,记录字段来源和冲突情况 |
|
||||||
|
| 12 | 异常不中断 | 单个字段、单个模板或 PDF 转换失败时,其他结果仍可正常输出 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、下一步建议
|
||||||
|
|
||||||
|
1. 将两个 `.doc` 原始模板转换为可解析的 `.docx` 工作模板,并人工确认版式无明显变化。
|
||||||
|
2. 拆解三个模板的字段、表格和清单条目,形成模板字段映射配置。
|
||||||
|
3. 扩展产品信息抽取字段,优先覆盖注册证模板已识别字段和安全和性能基本原则清单证据字段。
|
||||||
|
4. 设计冲突高亮写入规则,确保 Word 与 PDF 中均可见。
|
||||||
|
5. 接入 Word 到 PDF 转换能力,并建立页数、版式和关键表格的转换质量检查。
|
||||||
527
docs/1.需求分析/4.飞书通知与问答接入.md
Normal file
527
docs/1.需求分析/4.飞书通知与问答接入.md
Normal file
@@ -0,0 +1,527 @@
|
|||||||
|
# 飞书通知与问答接入需求分析
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 功能主题 | 飞书通知链路与飞书内问答能力接入 |
|
||||||
|
| 关联工作流 | 自动汇总、NMPA 注册资料法规核查与整改闭环、产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 分析日期 | 2026-06-07 |
|
||||||
|
| 分析版本 | V1.0 |
|
||||||
|
| 短期目标 | 流程结束后同步飞书提醒 |
|
||||||
|
| 终极目标 | 用户可在飞书内向 Agent 提问并获得基于系统数据的回答 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、需求背景
|
||||||
|
|
||||||
|
当前系统已经具备多个注册资料处理工作流,包括文件自动汇总、法规核查与整改闭环、产品关键信息提取与申报文件自动填表。系统内已经存在模拟通知记录和通知节点,但尚未接入真实飞书发送链路。
|
||||||
|
|
||||||
|
在实际业务协作中,注册人员、审核人员和整改负责人往往以飞书群或飞书私聊作为日常沟通入口。如果工作流只在系统页面内展示结果,用户需要主动返回系统查看状态,容易造成流程完成后无人跟进、整改项遗漏、生成文件未及时下载等问题。
|
||||||
|
|
||||||
|
因此需要引入飞书接入能力,分阶段实现:
|
||||||
|
|
||||||
|
1. 流程结束后自动向飞书发送提醒,完成从“系统内闭环”到“协作通知闭环”的升级。
|
||||||
|
2. 后续支持用户在飞书内与 Agent 对话,查询批次状态、风险项、生成文件、整改建议等信息。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、接入方案调研摘要
|
||||||
|
|
||||||
|
### 2.1 主方案:飞书官方智能体/应用机器人 + 消息 API
|
||||||
|
|
||||||
|
飞书开放平台支持创建飞书智能体应用或企业自建应用机器人。系统通过 `App ID` 和 `App Secret` 获取 `tenant_access_token`,再调用飞书消息 API 向固定群发送流程完成提醒;后续通过事件订阅接收用户私聊机器人或群内 @ 机器人的消息,实现飞书内问答。
|
||||||
|
|
||||||
|
该方案同时覆盖短期“流程结束后提醒”和终极“飞书内问答”,避免先接自定义 Webhook、后续再迁移到应用机器人的重复建设。
|
||||||
|
|
||||||
|
核心能力:
|
||||||
|
|
||||||
|
| 能力 | 用途 |
|
||||||
|
| --- | --- |
|
||||||
|
| 飞书智能体/应用机器人 | 允许 Agent 以机器人身份进入飞书 |
|
||||||
|
| tenant_access_token | 使用 App ID、App Secret 换取应用访问令牌 |
|
||||||
|
| 发送消息 API | 主动向用户或群聊发送文本、富文本、卡片、文件等消息 |
|
||||||
|
| 事件订阅 | 接收用户私聊机器人或群里 @ 机器人的消息 |
|
||||||
|
| 权限配置 | 申请发送消息、接收消息、读取用户或群组信息等权限 |
|
||||||
|
|
||||||
|
### 2.2 备选方案:飞书自定义机器人 Webhook
|
||||||
|
|
||||||
|
飞书自定义机器人 Webhook 适合只做固定群主动推送,但不适合飞书内问答、私聊回复和统一身份权限管理。本项目不将 Webhook 作为主接入方案,仅作为后续极简部署或故障降级备选。
|
||||||
|
|
||||||
|
### 2.3 参考官方文档
|
||||||
|
|
||||||
|
| 主题 | 参考地址 |
|
||||||
|
| --- | --- |
|
||||||
|
| 一键创建飞书智能体应用 | https://open.feishu.cn/document/mcp_open_tools/integrating-agents-with-feishu/overview |
|
||||||
|
| 机器人概述 | https://open.feishu.cn/document/client-docs/bot-v3/bot-overview?lang=zh-CN |
|
||||||
|
| 自建应用获取 tenant_access_token | https://open.feishu.cn/document/server-docs/authentication-management/access-token/tenant_access_token_internal?lang=zh-CN |
|
||||||
|
| 发送消息 API | https://open.feishu.cn/document/server-docs/im-v1/message/create?lang=zh-CN |
|
||||||
|
| 事件订阅概述 | https://open.feishu.cn/document/server-docs/event-subscription-guide/overview?lang=zh-CN |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体目标
|
||||||
|
|
||||||
|
### 3.1 短期目标:流程结束后同步提醒到指定个人账号
|
||||||
|
|
||||||
|
当系统中的工作流执行结束后,自动通过飞书智能体向指定个人账号发送一条结构化私聊提醒。Demo 阶段先与当前系统负责人账号单独对接,不接入外部群聊。提醒内容应帮助用户快速判断:
|
||||||
|
|
||||||
|
| 信息项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 哪个流程完成 | 例如自动汇总、法规核查、自动填表 |
|
||||||
|
| 哪个批次完成 | 展示批次编号、会话标题或上传文件摘要 |
|
||||||
|
| 当前状态 | 成功、部分成功、失败、需人工确认 |
|
||||||
|
| 核心结果 | 风险数量、阻断项数量、生成文件数量、冲突字段数量等 |
|
||||||
|
| 下一步动作 | 查看报告、下载文件、处理整改项、回到系统确认 |
|
||||||
|
| 系统入口 | 提供可点击链接,跳转到对应批次或会话页面 |
|
||||||
|
| 被提醒人 | 首期固定发送给已配置的个人飞书账号 |
|
||||||
|
|
||||||
|
### 3.2 中期目标:按流程和责任人分发
|
||||||
|
|
||||||
|
在个人账号通知跑通后,逐步支持更精细的通知策略:
|
||||||
|
|
||||||
|
| 通知策略 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 按发起人私聊 | 根据系统用户映射发送给流程发起人 |
|
||||||
|
| 按工作流分群 | 不同工作流通知到不同群 |
|
||||||
|
| 按项目分群 | 同一项目或产品线通知到指定群 |
|
||||||
|
| 按负责人私聊 | 将待处理事项发送给上传人、审核人或整改负责人 |
|
||||||
|
| 风险分级通知 | 阻断项和高风险立即通知,低风险可汇总通知 |
|
||||||
|
|
||||||
|
### 3.3 终极目标:飞书内问答
|
||||||
|
|
||||||
|
用户可以在飞书内向 Agent 提问,系统根据用户消息识别意图,查询本地业务数据和已生成结果,返回回答。
|
||||||
|
|
||||||
|
示例问题:
|
||||||
|
|
||||||
|
| 问题 | 预期回答 |
|
||||||
|
| --- | --- |
|
||||||
|
| “最近一个法规核查批次结果怎么样?” | 返回最近批次状态、风险数量和报告入口 |
|
||||||
|
| “RR-20260607-001 有哪些阻断项?” | 返回阻断项标题、法规依据、整改建议 |
|
||||||
|
| “自动填表生成的 Word 在哪里?” | 返回生成文件列表和下载入口 |
|
||||||
|
| “这个批次还缺哪些资料?” | 返回缺失文件清单和对应建议 |
|
||||||
|
| “帮我解释第 3 个风险项” | 返回风险说明、证据文件、整改建议和注意事项 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、需求范围
|
||||||
|
|
||||||
|
### 4.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 真实飞书通知通道 | 接入飞书官方智能体/应用机器人消息 API |
|
||||||
|
| 2 | 通知开关 | 通过环境变量控制是否启用真实飞书通知 |
|
||||||
|
| 3 | 保留 mock 通道 | 默认可回退到 mock,不影响本地开发和自动化测试 |
|
||||||
|
| 4 | 工作流完成通知 | 流程成功、部分成功或失败后发送飞书提醒 |
|
||||||
|
| 5 | 通知记录落库 | 记录通道、目标、发送状态、发送时间、错误信息和原始 payload |
|
||||||
|
| 6 | 失败不阻断主流程 | 飞书发送失败只记录错误,不让业务工作流失败,首期不自动重试 |
|
||||||
|
| 7 | 消息模板 | 输出清晰的富文本消息,包含批次、状态、摘要和系统链接 |
|
||||||
|
| 8 | 安全配置 | App ID、App Secret、事件订阅密钥等敏感配置不得写入代码库 |
|
||||||
|
| 9 | 基础测试 | 覆盖成功、失败、未启用、配置缺失、发送超时等场景 |
|
||||||
|
|
||||||
|
### 4.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 飞书内问答完整实现 | 本期只为后续问答预留架构,不直接实现复杂对话 |
|
||||||
|
| 2 | 飞书审批流 | 不接入飞书审批或表单能力 |
|
||||||
|
| 3 | 飞书文档写入 | 不自动创建或更新飞书文档 |
|
||||||
|
| 4 | 企业级组织架构同步 | 不做通讯录全量同步 |
|
||||||
|
| 5 | 多租户飞书应用管理 | Demo 阶段只考虑单企业或单环境配置 |
|
||||||
|
| 6 | 复杂交互式卡片操作 | 本期优先文本或简单卡片,不实现按钮回调闭环 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、用户角色与使用场景
|
||||||
|
|
||||||
|
| 角色 | 诉求 | 典型场景 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册人员 | 及时知道批次完成并下载结果 | 自动汇总或自动填表完成后在飞书收到提醒 |
|
||||||
|
| 审核人员 | 快速查看法规核查风险摘要 | 法规核查结束后查看阻断项和高风险数量 |
|
||||||
|
| 整改负责人 | 及时处理缺失资料和风险项 | 飞书提醒中看到整改入口和主要问题 |
|
||||||
|
| 系统管理员 | 维护通知配置并排查发送失败 | 查看通知记录、错误信息和配置状态 |
|
||||||
|
| 后续飞书用户 | 不打开系统也能查询结果 | 在飞书中向机器人提问批次状态或风险项 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、业务流程
|
||||||
|
|
||||||
|
### 6.1 短期通知流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户发起业务工作流
|
||||||
|
-> 系统执行自动汇总、法规核查或自动填表
|
||||||
|
-> 工作流进入完成、部分成功或失败状态
|
||||||
|
-> 系统生成通知摘要
|
||||||
|
-> 系统判断飞书真实通知是否启用
|
||||||
|
-> 未启用:写入 mock 通知记录
|
||||||
|
-> 已启用:使用 App ID/App Secret 获取或复用 tenant_access_token
|
||||||
|
-> 调用飞书消息 API 向指定个人账号发送富文本消息
|
||||||
|
-> 发送成功:写入成功通知记录和 sent_at
|
||||||
|
-> 发送失败:写入失败通知记录、错误信息和重试次数
|
||||||
|
-> 主工作流继续完成,不因通知失败回滚业务结果
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 终极问答流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户在飞书私聊机器人或群里 @ 机器人提问
|
||||||
|
-> 飞书通过事件订阅将消息推送到系统回调地址
|
||||||
|
-> 系统校验事件来源和签名
|
||||||
|
-> 系统解析用户身份、会话位置和消息内容
|
||||||
|
-> 系统执行意图识别
|
||||||
|
-> 系统根据意图查询批次、文件、报告、风险项或生成结果
|
||||||
|
-> 系统组织回答内容
|
||||||
|
-> 系统通过飞书消息 API 回复用户或群聊
|
||||||
|
-> 系统记录问答日志、引用数据和错误信息
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、功能需求
|
||||||
|
|
||||||
|
### 7.1 通知触发点
|
||||||
|
|
||||||
|
| 工作流 | 触发节点 | 通知时机 | 初始优先级 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 自动汇总 | 文件汇总完成 | 成功、部分成功、失败 | 高 |
|
||||||
|
| 法规核查与整改闭环 | 风险分级和报告生成后 | 成功、部分成功、失败;阻断项和高风险优先展示 | 高 |
|
||||||
|
| 自动填表 | Word/PDF 和追溯清单生成后 | 成功、部分成功、失败 | 高 |
|
||||||
|
|
||||||
|
首期三个业务工作流均接入飞书通知:自动汇总、法规核查与整改闭环、产品关键信息提取与申报文件自动填表。
|
||||||
|
|
||||||
|
### 7.2 通知内容模板
|
||||||
|
|
||||||
|
通知消息首期采用富文本格式,需支持换行和重点信息突出展示。通知消息应至少包含:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 标题 | 工作流名称 + 状态 |
|
||||||
|
| 批次编号 | 例如 RR-NOTIFY、AFF-NOTIFY |
|
||||||
|
| 发起人 | 当前系统用户 |
|
||||||
|
| 完成时间 | 工作流完成时间 |
|
||||||
|
| 结果摘要 | 风险数量、文件数量、导出文件数量、冲突字段数量等 |
|
||||||
|
| 下一步 | 查看报告、下载结果、处理整改项、重新复核 |
|
||||||
|
| 系统链接 | 首期使用本地地址拼接系统内批次或会话页面链接,例如 `http://127.0.0.1:8000/...` |
|
||||||
|
|
||||||
|
发送策略:
|
||||||
|
|
||||||
|
| 策略项 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知状态 | 成功、部分成功、失败均发送飞书通知 |
|
||||||
|
| 重复发送 | 同一批次、同一工作流、同一状态只发送一次,避免重复点击或重复运行造成刷屏 |
|
||||||
|
| 失败重试 | 首期不自动重试,只记录失败状态和错误信息 |
|
||||||
|
| 主流程影响 | 通知失败不阻断业务工作流完成 |
|
||||||
|
|
||||||
|
消息内容粒度:
|
||||||
|
|
||||||
|
| 粒度项 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 基础信息 | 工作流名称、状态、批次编号、发起人、完成时间 |
|
||||||
|
| 结果摘要 | 自动汇总展示文件数和异常数;法规核查展示风险总数、阻断项、高风险数量;自动填表展示导出文件数、冲突字段数和失败原因概述 |
|
||||||
|
| 详细清单 | 首期不在飞书私聊中展开完整风险项、缺失项或文件明细,避免消息过长 |
|
||||||
|
| 系统入口 | 首期使用本地地址拼接系统内批次或会话链接,部署后再升级为可配置外部域名 |
|
||||||
|
|
||||||
|
### 7.3 通知状态记录
|
||||||
|
|
||||||
|
通知发送后必须落库,便于排查和审计。
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| channel | mock、feishu_api 等 |
|
||||||
|
| target | 指定个人 open_id、user_id 等 |
|
||||||
|
| status | pending、sent、failed 或 success、failed |
|
||||||
|
| payload | 发送内容摘要和业务上下文 |
|
||||||
|
| external_message_id | 飞书返回的消息 ID,Webhook 无返回时可为空 |
|
||||||
|
| error_message | 失败原因 |
|
||||||
|
| retry_count | 重试次数 |
|
||||||
|
| sent_at | 成功发送时间 |
|
||||||
|
|
||||||
|
### 7.4 配置需求
|
||||||
|
|
||||||
|
环境变量建议:
|
||||||
|
|
||||||
|
| 配置项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| FEISHU_NOTIFY_ENABLED | 是否启用真实飞书通知 |
|
||||||
|
| FEISHU_NOTIFY_CHANNEL | 通知通道,首期为 feishu_api |
|
||||||
|
| FEISHU_APP_ID | 飞书智能体/企业自建应用 App ID |
|
||||||
|
| FEISHU_APP_SECRET | 飞书智能体/企业自建应用 App Secret |
|
||||||
|
| FEISHU_DEFAULT_USER_OPEN_ID | 首期指定接收人的飞书 open_id |
|
||||||
|
| FEISHU_DEFAULT_USER_ID | 首期指定接收人的飞书 user_id,可作为 open_id 的备选 |
|
||||||
|
| FEISHU_DEFAULT_TARGET_NAME | 默认通知目标名称,用于记录展示 |
|
||||||
|
| FEISHU_TENANT_TOKEN_CACHE_SECONDS | tenant_access_token 本地缓存秒数 |
|
||||||
|
| FEISHU_EVENT_VERIFY_TOKEN | 事件订阅校验 Token,后续问答使用 |
|
||||||
|
| FEISHU_EVENT_ENCRYPT_KEY | 事件订阅加密 Key,后续问答使用 |
|
||||||
|
|
||||||
|
敏感配置不得提交到代码库,只能通过本地 `.env`、部署环境变量或密钥管理系统注入。
|
||||||
|
|
||||||
|
首期配置维护方式:
|
||||||
|
|
||||||
|
| 配置类型 | 维护方式 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 飞书 App ID | 环境变量 | 属于敏感信息,不进入数据库和代码库 |
|
||||||
|
| 飞书 App Secret | 环境变量 | 属于敏感信息,不进入数据库和代码库 |
|
||||||
|
| 指定接收人 open_id/user_id | 环境变量 | 首期固定发送到一个个人账号 |
|
||||||
|
| 通知开关 | 环境变量 | 便于本地、测试、部署环境切换 |
|
||||||
|
| 系统用户与飞书用户映射 | Django Admin | 便于非开发人员维护发起人和飞书用户标识 |
|
||||||
|
|
||||||
|
### 7.5 系统用户与飞书用户映射
|
||||||
|
|
||||||
|
首期采用手工配置表维护系统用户与飞书用户之间的映射关系。系统在发送固定群通知时,根据批次 `user` 字段找到流程发起人或上传人,再从映射表中读取可用于飞书 @ 的用户标识。
|
||||||
|
|
||||||
|
建议字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| system_username | 系统登录用户名 |
|
||||||
|
| system_user_id | 系统用户 ID,可选 |
|
||||||
|
| feishu_display_name | 飞书展示名称,便于管理员识别 |
|
||||||
|
| feishu_mobile | 飞书手机号,可选 |
|
||||||
|
| feishu_open_id | 飞书 open_id,可选 |
|
||||||
|
| feishu_user_id | 飞书 user_id,可选 |
|
||||||
|
| is_active | 是否启用该映射 |
|
||||||
|
| remark | 备注 |
|
||||||
|
|
||||||
|
首期实现时,系统优先将通知发送给环境变量中配置的指定个人账号。用户映射表仍保留,用于后续从“固定个人账号”升级为“按流程发起人私聊”。若指定接收人未配置,系统不发送真实飞书消息,只记录配置缺失失败。
|
||||||
|
|
||||||
|
当同一个系统用户配置了多个飞书标识时,首期按以下优先级选择 @ 标识:
|
||||||
|
|
||||||
|
```text
|
||||||
|
feishu_open_id -> feishu_user_id -> feishu_mobile
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.6 通知记录展示
|
||||||
|
|
||||||
|
首期需要在对应批次详情页展示通知状态,帮助用户和管理员判断飞书提醒是否已发送。
|
||||||
|
|
||||||
|
| 展示项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知通道 | mock、feishu_api 等 |
|
||||||
|
| 通知目标 | 指定个人账号名称或配置名称 |
|
||||||
|
| 接收人 | 首期指定接收人;后续可展示发起人/上传人的飞书展示名称 |
|
||||||
|
| 发送状态 | 成功、失败、待发送或未启用 |
|
||||||
|
| 发送时间 | 成功发送时间 |
|
||||||
|
| 失败原因 | 发送失败或配置异常时展示摘要 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、飞书内问答需求预留
|
||||||
|
|
||||||
|
### 8.1 问答入口
|
||||||
|
|
||||||
|
| 入口 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 私聊机器人 | 首期入口,用户直接向机器人询问自己的批次、文件和报告 |
|
||||||
|
| 群聊 @ 机器人 | 群内成员 @ 机器人询问某个批次或风险项 |
|
||||||
|
| 通知消息引用 | 用户收到通知后,基于批次编号继续提问 |
|
||||||
|
|
||||||
|
### 8.2 问答能力边界
|
||||||
|
|
||||||
|
第一阶段飞书问答不应直接执行高风险写操作,只提供查询和解释:
|
||||||
|
|
||||||
|
| 能力 | 是否纳入首期问答 |
|
||||||
|
| --- | --- |
|
||||||
|
| 查询批次状态 | 是 |
|
||||||
|
| 查询风险项摘要 | 是 |
|
||||||
|
| 查询缺失项摘要 | 是 |
|
||||||
|
| 查询生成文件摘要 | 是 |
|
||||||
|
| 解释整改建议 | 否,作为后续增强 |
|
||||||
|
| 重新发起工作流 | 否 |
|
||||||
|
| 删除文件或记录 | 否 |
|
||||||
|
| 自动关闭风险项 | 否 |
|
||||||
|
| 修改申报文件 | 否 |
|
||||||
|
|
||||||
|
### 8.3 权限原则
|
||||||
|
|
||||||
|
飞书内问答必须解决用户身份和数据权限问题:
|
||||||
|
|
||||||
|
| 场景 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 私聊查询 | 普通用户只能查询自己发起或上传的批次;管理员可以查询全部批次 |
|
||||||
|
| 群内查询 | 只返回适合在群内公开的信息,敏感文件链接需谨慎 |
|
||||||
|
| 未绑定用户 | 提示先完成系统用户与飞书用户绑定 |
|
||||||
|
| 无权限数据 | 返回无权限提示,不泄露批次是否存在以外的敏感信息 |
|
||||||
|
|
||||||
|
### 8.4 首期问答交互规则
|
||||||
|
|
||||||
|
首期私聊问答支持两类批次定位方式:
|
||||||
|
|
||||||
|
| 方式 | 示例 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 明确批次号 | “查 RR-20260607-001” | 系统按批次编号精确查询 |
|
||||||
|
| 自然指代 | “查最近一个法规核查批次”“最新自动填表结果怎么样” | 系统在用户可访问范围内查找最近批次 |
|
||||||
|
|
||||||
|
问答回复规则:
|
||||||
|
|
||||||
|
| 规则 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 链接返回 | 只有用户具备对应批次访问权限时才返回系统链接 |
|
||||||
|
| 无权限结果 | 提示无权限或无法访问,不返回敏感摘要和链接 |
|
||||||
|
| 回答粒度 | 返回批次状态、风险摘要、缺失摘要、导出摘要和下一步建议 |
|
||||||
|
| 日志留痕 | 记录用户问题、识别意图、查询对象、回答摘要、错误信息和处理时间,不保存完整回答正文 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、异常与安全要求
|
||||||
|
|
||||||
|
| 场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| App ID/App Secret 或指定接收人未配置 | 自动回退 mock 或只记录未发送状态 |
|
||||||
|
| tenant_access_token 获取失败 | 记录失败,不发送消息,不阻断主流程 |
|
||||||
|
| 飞书接口超时 | 记录失败,不阻断主流程 |
|
||||||
|
| 飞书返回错误 | 记录错误码和错误信息,便于排查 |
|
||||||
|
| 消息过长 | 自动截断摘要,系统链接保留完整结果;首期不发送详细风险项或缺失项清单 |
|
||||||
|
| 重复触发 | 同一批次、同一工作流、同一状态只发送一次 |
|
||||||
|
| 敏感信息 | 通知正文避免包含完整文件内容、密钥、个人敏感信息 |
|
||||||
|
| 外部链接 | 首期使用本地地址;部署环境应升级为可信域名配置 |
|
||||||
|
| 回调伪造 | 后续事件订阅必须校验来源、签名、Token 或加密参数 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、验收标准
|
||||||
|
|
||||||
|
### 10.1 短期通知验收
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 配置关闭 | 未启用飞书通知时,工作流仍可正常完成并记录 mock 通知 |
|
||||||
|
| 2 | 配置开启 | 配置 App ID、App Secret 和指定个人 open_id/user_id 后,流程完成会向个人飞书账号发送提醒 |
|
||||||
|
| 3 | 成功记录 | 发送成功后通知记录状态为成功,并记录发送时间 |
|
||||||
|
| 4 | 失败记录 | token 获取失败、消息 API 错误、超时或配置错误时记录失败原因 |
|
||||||
|
| 5 | 不阻断主流程 | 通知失败不会导致工作流失败 |
|
||||||
|
| 6 | 内容完整 | 飞书消息包含工作流、批次、状态、摘要和系统入口 |
|
||||||
|
| 7 | 自动化测试 | 有单元测试覆盖通知构造、发送成功、发送失败、配置关闭 |
|
||||||
|
| 8 | token 管理 | 系统能获取并缓存 tenant_access_token,token 失效后可重新获取 |
|
||||||
|
| 9 | 后台映射 | 管理员可在 Django Admin 维护系统用户与飞书用户映射 |
|
||||||
|
|
||||||
|
### 10.2 终极问答验收
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 消息接收 | 系统能接收飞书私聊或群 @ 机器人消息 |
|
||||||
|
| 2 | 身份识别 | 能识别飞书用户并关联系统用户 |
|
||||||
|
| 3 | 意图识别 | 能区分批次查询、风险查询、文件查询、解释类问题 |
|
||||||
|
| 4 | 权限控制 | 普通用户只能查询自己发起或上传的批次;管理员可查询全部批次 |
|
||||||
|
| 5 | 消息回复 | 系统能通过飞书消息 API 回复用户 |
|
||||||
|
| 6 | 日志留痕 | 用户问题、意图、查询对象、回答摘要和错误信息可追溯,不保存完整回答正文 |
|
||||||
|
| 7 | 批次定位 | 支持明确批次号和“最近一个/最新批次”等自然说法 |
|
||||||
|
| 8 | 链接控制 | 只有用户有权限访问时才返回系统链接 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、阶段规划
|
||||||
|
|
||||||
|
### 阶段一:指定个人账号完成提醒
|
||||||
|
|
||||||
|
目标:使用飞书官方智能体/应用机器人消息 API 将流程完成提醒发送到指定个人账号。Demo 阶段先与当前系统负责人账号单独对接,暂不接入外部群聊。
|
||||||
|
|
||||||
|
建议内容:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知通道抽象 | 将 mock 和 feishu_api 封装为可切换通道 |
|
||||||
|
| 消息模板 | 输出流程完成摘要 |
|
||||||
|
| 指定接收人 | 根据环境变量配置的 open_id/user_id 发送给指定个人账号 |
|
||||||
|
| token 管理 | 使用 App ID/App Secret 获取并缓存 tenant_access_token |
|
||||||
|
| 消息 API | 使用指定个人 open_id/user_id 调用飞书发送消息 API |
|
||||||
|
| 通知记录 | 发送结果落库 |
|
||||||
|
| 配置开关 | 环境变量控制启用与否 |
|
||||||
|
| 测试覆盖 | 不依赖真实飞书也能测试发送逻辑 |
|
||||||
|
| 批次详情展示 | 在批次详情页展示通知状态和失败原因 |
|
||||||
|
|
||||||
|
### 阶段一附加:飞书问答预留
|
||||||
|
|
||||||
|
目标:在不实现飞书事件回调和私聊问答的前提下,为后续问答 MVP 预留必要的数据结构、服务边界和权限规则。
|
||||||
|
|
||||||
|
建议内容:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户映射复用 | 飞书用户映射模型同时服务 @ 通知和后续私聊身份识别 |
|
||||||
|
| 查询服务边界 | 预留按批次号、最近批次、工作流类型查询结果摘要的服务接口 |
|
||||||
|
| 权限过滤规则 | 查询服务内置管理员全查、普通用户查自己批次的权限规则 |
|
||||||
|
| 问答日志模型预留 | 可先设计模型或接口,不要求首期接收飞书消息 |
|
||||||
|
|
||||||
|
### 阶段二:按流程或项目分群
|
||||||
|
|
||||||
|
目标:支持不同流程、项目或业务线配置不同飞书目标。
|
||||||
|
|
||||||
|
建议内容:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知路由 | 根据 workflow_type、project、batch 等选择目标 |
|
||||||
|
| 通知策略 | 风险等级、完成状态、失败状态决定是否通知 |
|
||||||
|
| 消息降噪 | 避免同一批次重复刷屏 |
|
||||||
|
|
||||||
|
### 阶段三:事件订阅与私聊问答
|
||||||
|
|
||||||
|
建议内容:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 事件回调 | 接收飞书私聊消息事件 |
|
||||||
|
| 用户绑定 | 使用飞书 open_id/user_id 映射系统用户 |
|
||||||
|
| 问答处理 | 查询批次状态、风险摘要、缺失摘要和导出摘要 |
|
||||||
|
| 回复消息 | 继续使用消息 API 回复用户 |
|
||||||
|
|
||||||
|
### 阶段四:飞书内问答
|
||||||
|
|
||||||
|
目标:通过事件订阅接收用户消息,并调用系统 Agent 能力回答问题。
|
||||||
|
|
||||||
|
建议内容:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 事件回调 | 接收私聊和群 @ 消息 |
|
||||||
|
| 意图识别 | 解析查询对象和问题类型 |
|
||||||
|
| 数据查询 | 查询批次、风险、文件、报告和通知记录 |
|
||||||
|
| 回答生成 | 返回简洁、可追溯、带链接的回答 |
|
||||||
|
| 安全审计 | 记录问答日志和权限判断 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、待确认问题
|
||||||
|
|
||||||
|
| 编号 | 问题 | 推荐选项 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Q1 | 短期通知发到哪里?固定飞书群、按业务群区分、还是按个人私聊? | 已调整:先发送到指定个人账号,暂不接入外部群聊 |
|
||||||
|
| Q2 | 首期接入哪些工作流?自动填表、法规核查、自动汇总是否都通知? | 已确认:三个流程都通知 |
|
||||||
|
| Q3 | 通知格式用普通文本、富文本还是飞书消息卡片? | 已确认:首期使用富文本 |
|
||||||
|
| Q4 | 系统链接使用本地地址还是部署域名? | Demo 本地,部署后改域名 |
|
||||||
|
| Q5 | 是否需要 @ 指定人员? | 已调整:首期为个人私聊通知,不需要群内 @ |
|
||||||
|
| Q6 | 是否需要失败重试? | 已确认:首期不自动重试,只记录失败 |
|
||||||
|
| Q7 | 飞书内问答优先支持私聊还是群 @? | 先私聊,后群 @ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、已确认决策
|
||||||
|
|
||||||
|
| 编号 | 决策 | 影响 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| D1 | 短期通知发送到指定个人账号,暂不接入外部群聊 | 首期需要配置个人 open_id/user_id;后续再扩展群聊、按发起人私聊和责任矩阵 |
|
||||||
|
| D2 | 首期接收人为配置中的固定个人账号 | 通知服务不再依赖批次 `user` 解析接收人;批次 `user` 仍用于摘要展示和后续按发起人私聊 |
|
||||||
|
| D3 | 首期采用手工配置表维护系统用户与飞书用户映射 | 避免首期被通讯录权限、用户自动绑定和开放平台审核阻塞;后续可升级为自动绑定 |
|
||||||
|
| D4 | 首期三个流程均发送飞书完成通知 | 自动汇总、法规核查、自动填表都需要接入统一通知服务;消息发送到指定个人账号 |
|
||||||
|
| D5 | 首期通知格式采用飞书富文本 | 消息构造需支持富文本结构、换行、重点字段和 @ 用户标签;暂不实现消息卡片按钮 |
|
||||||
|
| D6 | 成功、部分成功、失败三类状态均发送通知 | 消息模板需要按状态展示不同摘要和下一步动作 |
|
||||||
|
| D7 | 同一批次、同一工作流、同一状态只发送一次 | 通知记录需要保存可判重的业务键,发送前先检查历史成功或已发送记录 |
|
||||||
|
| D8 | 首期飞书发送失败不自动重试 | 通知失败只落库并暴露错误信息,不引入异步重试队列 |
|
||||||
|
| D9 | 飞书消息链接首期使用本地地址 | 满足本机 Demo;部署环境后续升级为可信域名配置 |
|
||||||
|
| D10 | 飞书消息采用摘要级内容粒度 | 私聊通知展示核心结果摘要和入口链接,不展开完整风险项、缺失项或文件明细 |
|
||||||
|
| D11 | 指定个人接收人未配置时不发送真实飞书消息 | 记录配置缺失失败或回退 mock;用户映射缺失不影响首期固定个人通知 |
|
||||||
|
| D12 | 通知记录只保存发送摘要,不保存完整富文本 payload | 降低记录冗余和敏感信息留存风险;排查时依赖摘要、状态、错误信息和业务上下文 |
|
||||||
|
| D13 | App ID、App Secret、指定个人 open_id/user_id 等敏感配置通过环境变量维护,用户映射通过 Django Admin 维护 | 兼顾安全性和运维便利性;用户映射服务于后续按发起人私聊和问答身份识别 |
|
||||||
|
| D14 | 首期使用 tenant_access_token + 飞书消息 API 发送通知 | 通知客户端需要实现 token 获取、缓存、失效重取和消息 API 错误处理 |
|
||||||
|
| D15 | 飞书内问答首期入口为私聊机器人 | 优先解决个人查询场景,降低群聊权限泄露风险 |
|
||||||
|
| D16 | 飞书内问答首期回答批次状态、风险摘要、缺失摘要和导出摘要 | 不在首期做具体风险解释和复杂整改建议生成 |
|
||||||
|
| D17 | 私聊问答支持明确批次号和“最近/最新”自然说法 | 问答解析需要支持批次编号识别和按工作流类型查询最近可访问批次 |
|
||||||
|
| D18 | 问答权限为管理员可查全部,普通用户只能查自己发起或上传的批次 | 需要识别系统管理员身份,并在查询层统一做权限过滤 |
|
||||||
|
| D19 | 问答回复仅在用户有权限时返回系统链接 | 链接生成必须在权限校验之后执行 |
|
||||||
|
| D20 | 问答日志记录问题、意图、查询对象和回答摘要,不保存完整回答 | 兼顾审计排查与敏感信息最小留存 |
|
||||||
|
| D21 | 首期实现指定个人私聊通知,并预留飞书问答数据模型和服务边界 | 不在首期实现飞书事件回调和交互式问答,降低一次性交付风险 |
|
||||||
|
| D22 | 批次详情页需要展示通知状态 | 用户无需进入数据库或 Admin 即可确认飞书提醒是否发送成功 |
|
||||||
|
| D23 | 多个飞书标识的 @ 优先级为 `open_id > user_id > mobile` | 优先使用稳定标识,手机号作为兜底 |
|
||||||
|
| D24 | 本需求文档版本升级为 V1.0 | 当前决策已足够进入功能设计阶段 |
|
||||||
450
docs/1.需求分析/5.第1章监管信息材料包生成.md
Normal file
450
docs/1.需求分析/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,450 @@
|
|||||||
|
# 第1章监管信息材料包生成需求分析
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始输入 | docs/0.原始材料/目标产品说明书.docx |
|
||||||
|
| 样例模板 | docs/0.原始材料/第1章 监管信息 |
|
||||||
|
| 法规材料 | docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告 |
|
||||||
|
| 功能主题 | 从产品说明书生成第1章监管信息材料包 |
|
||||||
|
| 工作流名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 批次号规则 | RIP-YYYYMMDDHHMMSS-abcdef |
|
||||||
|
| 分析日期 | 2026-06-10 |
|
||||||
|
| 分析版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、需求背景
|
||||||
|
|
||||||
|
体外诊断试剂注册申报资料中,第1章监管信息包含监管信息目录、申请表、产品列表、申报前沟通说明、符合标准清单、真实性声明和符合性声明等材料。注册人员通常需要根据产品说明书、企业信息和法规要求手工整理这些文件,容易出现产品名称、包装规格、组成成分、预期用途等字段重复录入、漏填、格式不一致和待补信息不醒目的问题。
|
||||||
|
|
||||||
|
本需求新增独立工作流:用户上传或选择一个产品说明书后,系统以既有 `第1章 监管信息` 样例文件作为模板,抽取说明书中的产品关键信息,生成一套类似样例目录的第1章监管信息材料包。生成结果以 zip 压缩包作为主下载入口,同时保留单文件辅助下载。
|
||||||
|
|
||||||
|
该工作流可以复用现有自动填表工作流中已拆分出的字段抽取、LLM 调用、Word 写入、导出下载、批次事件和通知能力,但不并入 `application_form_fill`,而是作为独立工作流建设。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、需求范围
|
||||||
|
|
||||||
|
### 2.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 独立工作流 | 新增 `regulatory_info_package`,不复用 `application_form_fill` 的 workflow_type |
|
||||||
|
| 2 | 单说明书输入 | 本期只支持一个产品说明书作为主输入 |
|
||||||
|
| 3 | 模板复用 | 以 `docs/0.原始材料/第1章 监管信息` 下的样例文件作为生成模板 |
|
||||||
|
| 4 | 固定输出文件 | 固定生成 7 个第1章监管信息文件 |
|
||||||
|
| 5 | 代码抽取与 LLM 抽取并行 | 规则/代码抽取与 LLM 结构化抽取并行处理,合并后写入模板 |
|
||||||
|
| 6 | 尽量多填 | 对说明书中可识别的产品名称、包装规格、预期用途、组成成分、储存条件、适用仪器、样本类型、检测靶标等字段尽量填入 |
|
||||||
|
| 7 | 缺失项标记 | 系统新填入的缺失项使用 `/`,并设置黄色底色提醒负责人补充 |
|
||||||
|
| 8 | LLM-only 标记 | 代码抽取未取到但 LLM 抽取到的字段,也需要在输出文件中高亮提示人工复核 |
|
||||||
|
| 9 | 模板字段化 | 优先将样例模板整理为 Agent/代码可识别字段模板,使用内容控件 Tag 或稳定占位符,代码只填内容不手改格式 |
|
||||||
|
| 10 | doc 能力增强 | `.doc` 文档按能力驱动处理:有原生能力时优先原生写入,无原生能力时明确记录并允许 `.docx` 兜底,不静默输出未改写文件 |
|
||||||
|
| 11 | zip 主输出 | 生成 `第1章 监管信息(预生成版).zip` 作为主下载入口,单文件作为辅助下载 |
|
||||||
|
| 12 | 对话唤起提示 | 在对话框底部增加本工作流的唤起提示词 |
|
||||||
|
| 13 | LLM 意图判断 | 触发判断不能只依赖固定关键词,需要引入 LLM 判断用户是否要生成第1章监管信息材料包 |
|
||||||
|
|
||||||
|
### 2.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 多资料综合生成 | 本期不从产品技术要求、检验报告、企业证照等多文件综合生成 |
|
||||||
|
| 2 | 人工在线编辑 | 本期只生成文件并标记待确认项,不提供网页内字段编辑 |
|
||||||
|
| 3 | 自动保证法规最终准确 | 标准清单、分类编码、管理类别等无法从说明书确认的信息仍需负责人确认 |
|
||||||
|
| 4 | 自动提交监管系统 | 本期只生成申报材料包,不对接外部申报平台 |
|
||||||
|
| 5 | 版式人工校订替代 | 系统尽量保持模板版式,但最终提交前仍需人工核对 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、输入与触发
|
||||||
|
|
||||||
|
### 3.1 输入文件规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户上传一个 `.docx` 说明书 | 直接作为本次输入 |
|
||||||
|
| 用户上传多个文件 | 优先选择文件名包含“说明书”的 `.docx` |
|
||||||
|
| 多个说明书候选 | 工作流进入待确认状态,提示用户选择 |
|
||||||
|
| 未找到说明书 | 提示用户上传产品说明书 |
|
||||||
|
| 非 `.docx` 说明书 | 本期可提示格式不支持,后续扩展 `.doc`、PDF 或 OCR |
|
||||||
|
|
||||||
|
### 3.2 对话触发规则
|
||||||
|
|
||||||
|
固定提示词需要支持:
|
||||||
|
|
||||||
|
| 触发表达 | 触发结果 |
|
||||||
|
| --- | --- |
|
||||||
|
| 根据说明书生成第1章监管信息 | 启动第1章监管信息材料包生成 |
|
||||||
|
| 生成监管信息材料包 | 启动第1章监管信息材料包生成 |
|
||||||
|
| 从说明书生成第1章材料 | 启动第1章监管信息材料包生成 |
|
||||||
|
|
||||||
|
除固定表达外,系统需要引入 LLM 意图判断。当用户自然语言表达包含“根据说明书”“第1章”“监管信息”“材料包”“申请表/产品列表/声明”等意图组合时,LLM 可判断为 `regulatory_info_package`。规则命中优先,规则未命中时再进入 LLM 路由,避免只靠固定模板。
|
||||||
|
|
||||||
|
### 3.3 对话框底部唤起提示
|
||||||
|
|
||||||
|
对话框底部快捷提示词新增:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
后续可追加:
|
||||||
|
|
||||||
|
```text
|
||||||
|
生成监管信息材料包
|
||||||
|
从说明书生成第1章材料
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、输出文件范围
|
||||||
|
|
||||||
|
本期固定生成与样例目录一致的 7 个文件:
|
||||||
|
|
||||||
|
| 序号 | 输出文件 | 模板来源 | 生成规则 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 1 | CH1.2 监管信息目录.docx | 样例 `CH1.2 监管信息目录.docx` | 替换产品名称,目录结构和页码沿用样例 |
|
||||||
|
| 2 | CH1.4 申请表.docx | 样例 `CH1.4 申请表.docx` | 尽量填入说明书字段,未知项填 `/` 并黄底 |
|
||||||
|
| 3 | CH1.5 产品列表.docx | 样例 `CH1.5 产品列表.docx` | 按样例表头重建产品列表,货号留空并黄底 |
|
||||||
|
| 4 | CH1.9 产品申报前沟通的说明.doc | 样例 `CH1.9 产品申报前沟通的说明.doc` | `.doc` 应支持与 `.docx` 等价替换能力 |
|
||||||
|
| 5 | CH1.11.1 符合标准的清单.docx | 样例 `CH1.11.1 符合标准的清单.docx` | 从说明书和 RAG/法规知识库提取或推荐标准,非明确项需高亮待确认 |
|
||||||
|
| 6 | CH1.11.5 真实性声明.docx | 样例 `CH1.11.5 真实性声明.docx` | 保留样例正文结构,替换产品名称,公司名位置黄底 `/` |
|
||||||
|
| 7 | CH1.11.6 符合性声明.docx | 样例 `CH1.11.6 符合性声明.docx` | 保留样例正文结构,替换产品名称,公司名位置黄底 `/` |
|
||||||
|
|
||||||
|
### 4.1 下载形态
|
||||||
|
|
||||||
|
| 输出类型 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 主入口 | 生成 `第1章 监管信息(预生成版).zip`,只包含成功或兜底成功的文件 |
|
||||||
|
| 单文件下载 | 每个生成文件均可作为辅助下载项展示 |
|
||||||
|
| 追溯清单 | 建议生成 JSON/Excel,记录字段来源、抽取方式、高亮原因和待确认项 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、字段抽取与填写规则
|
||||||
|
|
||||||
|
### 5.1 抽取字段范围
|
||||||
|
|
||||||
|
系统应从说明书中尽量抽取以下字段:
|
||||||
|
|
||||||
|
| 字段 | 示例来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | `【产品名称】` |
|
||||||
|
| 包装规格 | `【包装规格】` |
|
||||||
|
| 预期用途 | `【预期用途】` |
|
||||||
|
| 检测原理/方法原理 | `【检测原理】` |
|
||||||
|
| 主要组成成分 | `【主要组成成分】` 及其下方表格 |
|
||||||
|
| 储存条件及有效期 | `【储存条件及有效期】` |
|
||||||
|
| 样本类型 | `【样本要求】` 中的适用样本类型 |
|
||||||
|
| 检测靶标 | 预期用途或检测原理中的基因、病原体、抗原、抗体等 |
|
||||||
|
| 适用仪器 | `【适用仪器】` |
|
||||||
|
| 检验方法 | `【检验方法】` |
|
||||||
|
| 生产日期和使用期限描述 | 储存条件章节 |
|
||||||
|
|
||||||
|
字段抽取采用规则/代码抽取与 LLM 结构化抽取并行模式:
|
||||||
|
|
||||||
|
```text
|
||||||
|
读取说明书
|
||||||
|
-> 规则/代码抽取
|
||||||
|
-> LLM 结构化抽取
|
||||||
|
-> 字段合并
|
||||||
|
-> 标记字段来源和置信度
|
||||||
|
-> 写入模板
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 合并与高亮规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 代码抽取和 LLM 都命中且结果一致 | 正常写入,不强制高亮 |
|
||||||
|
| 代码抽取和 LLM 都命中但结果不一致 | 优先按规则配置选择,写入值高亮并进入追溯清单 |
|
||||||
|
| 代码抽取未命中,LLM 命中 | 写入 LLM 值,并高亮提示人工复核 |
|
||||||
|
| 代码抽取命中,LLM 未命中 | 正常写入,追溯记录代码抽取来源 |
|
||||||
|
| 两者均未命中 | 写入 `/` 并设置黄色底色 |
|
||||||
|
| 企业信息缺失 | 写入 `/` 并设置黄色底色 |
|
||||||
|
|
||||||
|
高亮含义:
|
||||||
|
|
||||||
|
| 高亮类型 | 视觉要求 | 含义 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 缺失项高亮 | 黄色底色 | 说明书无法提供,负责人需填写 |
|
||||||
|
| LLM-only 高亮 | 黄色底色,可在追溯清单标记 `llm_only` | 代码未抽到,仅 LLM 推断,需要复核 |
|
||||||
|
| 冲突高亮 | 黄色底色,可配合红色字体 | 规则结果与 LLM 结果不一致 |
|
||||||
|
|
||||||
|
仅标记系统新填入的缺失项或需复核项。样例模板中原本存在的 `/` 不统一高亮,避免整份文件过度标记。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、各文件生成规则
|
||||||
|
|
||||||
|
### 6.1 CH1.2 监管信息目录
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 目录条目 | 沿用样例目录结构 |
|
||||||
|
| 适用情况 | 沿用样例 |
|
||||||
|
| 资料名称 | 沿用样例 |
|
||||||
|
| 页码 | 沿用样例页码 |
|
||||||
|
|
||||||
|
### 6.2 CH1.4 申请表
|
||||||
|
|
||||||
|
| 字段类型 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品名称 | 从说明书抽取 |
|
||||||
|
| 包装规格 | 从说明书抽取 |
|
||||||
|
| 主要组成成分 | 优先使用说明书组成成分摘要或附件提示 |
|
||||||
|
| 预期用途 | 从说明书抽取 |
|
||||||
|
| 产品储存条件及有效期 | 从说明书抽取 |
|
||||||
|
| 方法原理 | 从说明书检测原理抽取 |
|
||||||
|
| 产品类别 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 分类编码 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 临床评价路径 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 申请人信息 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 联系人、法定代表人、邮箱、组织机构代码 | 缺失,填 `/` 并黄底 |
|
||||||
|
| 生产地址 | 缺失,填 `/` 并黄底 |
|
||||||
|
|
||||||
|
管理类别、分类编码、临床评价路径、UDI、国家标准品/强制标准等不得根据经验自动下结论,全部按待确认处理。
|
||||||
|
|
||||||
|
### 6.3 CH1.5 产品列表
|
||||||
|
|
||||||
|
产品列表需要转成样例表头:
|
||||||
|
|
||||||
|
| 包装规格 | 货号 | 组成 | 组分 | 主要组成成分 | 规格/数量 |
|
||||||
|
| --- | --- | --- | --- | --- | --- |
|
||||||
|
|
||||||
|
生成规则:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 包装规格 | 从说明书组成成分表的规格列或包装规格章节抽取 |
|
||||||
|
| 货号 | 说明书未提供,填 `/` 并黄底 |
|
||||||
|
| 组成 | 根据组分名称推断为反应液、质控品、处理液、增强剂等;无法判断则填 `/` 并黄底 |
|
||||||
|
| 组分 | 使用说明书表格中的组分名称 |
|
||||||
|
| 主要组成成分 | 使用说明书表格中的主要组成成分 |
|
||||||
|
| 规格/数量 | 使用说明书表格中的对应规格数量 |
|
||||||
|
|
||||||
|
目标产品说明书中存在规格A大包装、规格A分管包装、规格B大管包装等多个组成表,系统应尽量展开为多行产品列表。
|
||||||
|
|
||||||
|
### 6.4 CH1.9 产品申报前沟通的说明
|
||||||
|
|
||||||
|
`CH1.9` 当前为 `.doc` 格式。本工作流要求 `.doc` 文档具备与 `.docx` 等价的原始功能,即模板复制、文本定位、字段替换、高亮标记、导出和打包均应支持 `.doc`。
|
||||||
|
|
||||||
|
实现上不应只把转换作为唯一方案。可选技术路径包括:
|
||||||
|
|
||||||
|
| 路径 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原生 `.doc` 处理 | 优先探索可直接读取和写入 `.doc` 的库、COM 或二进制文档处理能力 |
|
||||||
|
| Office/COM 自动化 | Windows 环境下通过 Word COM 直接打开 `.doc` 并原格式写入保存 |
|
||||||
|
| LibreOffice UNO/API | 通过 LibreOffice API 直接处理旧版 Word,而不只作为离线预转换 |
|
||||||
|
| 转换兜底 | 当原生处理不可用时,可作为兜底手段,但不能作为需求定义中的唯一能力 |
|
||||||
|
|
||||||
|
如运行环境不具备 `.doc` 写入能力,工作流应明确失败原因或降级提示,不应静默输出未改写文件。
|
||||||
|
|
||||||
|
### 6.5 CH1.11.1 符合标准的清单
|
||||||
|
|
||||||
|
生成规则:
|
||||||
|
|
||||||
|
| 来源 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书明确出现的标准号 | 可直接写入,并记录来源片段 |
|
||||||
|
| RAG/法规知识库命中的候选标准 | 可作为候选写入或追溯提示,但需高亮待确认 |
|
||||||
|
| 样例中的标准清单 | 不可无条件沿用 |
|
||||||
|
| 无法确认的标准 | 填 `/` 并黄底 |
|
||||||
|
|
||||||
|
法规材料目录中存在 `医疗器械注册申报资料和批准证明文件格式要求(体外诊断试剂).doc`、`体外诊断试剂注册申报资料要求及说明.doc`、`体外诊断试剂安全和性能基本原则清单.doc` 等材料。其中安全和性能基本原则清单属于第3章非临床资料,不直接等同于 `CH1.11.1 符合标准的清单`。系统应优先查询已上传 RAG/法规知识库来确认标准清单要求;未命中时不得强行套用样例标准。
|
||||||
|
|
||||||
|
### 6.6 CH1.11.5 真实性声明
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 正文结构 | 保留样例结构 |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 公司名/申请人 | 填 `/` 并黄底 |
|
||||||
|
| 日期 | 使用当天日期 |
|
||||||
|
| 材料列表 | 沿用样例材料列表 |
|
||||||
|
|
||||||
|
### 6.7 CH1.11.6 符合性声明
|
||||||
|
|
||||||
|
| 项目 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 正文结构 | 保留样例结构 |
|
||||||
|
| 产品名称 | 替换为说明书抽取的产品名称 |
|
||||||
|
| 公司名/申请人 | 填 `/` 并黄底 |
|
||||||
|
| 日期 | 使用当天日期 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、工作流设计
|
||||||
|
|
||||||
|
### 7.1 主流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户上传或选择产品说明书
|
||||||
|
-> 用户触发“根据说明书生成第1章监管信息”
|
||||||
|
-> 系统通过规则和 LLM 判断工作流意图
|
||||||
|
-> 创建 regulatory_info_package 批次
|
||||||
|
-> 校验输入说明书
|
||||||
|
-> 复制第1章监管信息样例模板到批次目录
|
||||||
|
-> 抽取说明书文本、段落和表格
|
||||||
|
-> 规则/代码抽取字段
|
||||||
|
-> LLM 结构化抽取字段
|
||||||
|
-> 合并字段并识别缺失、LLM-only 和冲突项
|
||||||
|
-> 生成 7 个目标文件
|
||||||
|
-> 对缺失项、LLM-only 项和冲突项进行高亮
|
||||||
|
-> 生成追溯清单
|
||||||
|
-> 打包第1章监管信息 zip
|
||||||
|
-> 写入导出记录
|
||||||
|
-> 对话框展示 zip 主下载入口、单文件下载和待确认摘要
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 节点建议
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 成功条件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | 找到唯一说明书输入 |
|
||||||
|
| template_copy | 复制模板 | 7 个样例模板复制到批次目录 |
|
||||||
|
| text_extract | 抽取说明书 | 提取说明书段落和表格 |
|
||||||
|
| field_extract | 抽取字段 | 规则和 LLM 抽取结果均留底 |
|
||||||
|
| field_merge | 合并字段 | 输出最终字段、缺失项、LLM-only 项和冲突项 |
|
||||||
|
| generate_docs | 生成材料 | 7 个文件生成完成 |
|
||||||
|
| highlight_review_items | 标记待确认 | 缺失项、LLM-only、冲突项完成高亮 |
|
||||||
|
| trace_export | 追溯清单 | 生成 JSON/Excel 追溯清单 |
|
||||||
|
| zip_export | 打包下载 | 生成 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| completed | 完成 | 更新批次状态并返回下载摘要 |
|
||||||
|
|
||||||
|
### 7.3 状态建议
|
||||||
|
|
||||||
|
| 状态 | 含义 |
|
||||||
|
| --- | --- |
|
||||||
|
| pending | 已创建,等待执行 |
|
||||||
|
| running | 执行中 |
|
||||||
|
| waiting_user | 多个说明书或缺少说明书,等待用户确认 |
|
||||||
|
| success | zip 和必要单文件生成成功 |
|
||||||
|
| partial_success | zip 已生成,但部分 `.doc`、追溯清单或高亮处理失败 |
|
||||||
|
| failed | 关键文件均未生成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、数据与产物
|
||||||
|
|
||||||
|
### 8.1 批次数据
|
||||||
|
|
||||||
|
建议新增独立批次模型或等价数据结构,记录:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch_no | RIP 批次号 |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| conversation | 所属对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_instruction_file | 输入说明书 |
|
||||||
|
| product_name | 抽取到的产品名称 |
|
||||||
|
| status | 批次状态 |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
| missing_fields | 缺失字段清单 |
|
||||||
|
| llm_only_fields | 仅 LLM 命中的字段 |
|
||||||
|
| conflict_fields | 冲突字段 |
|
||||||
|
| risk_notes | `.doc` 处理、标准清单待确认等风险提示 |
|
||||||
|
|
||||||
|
### 8.2 追溯清单
|
||||||
|
|
||||||
|
追溯清单至少记录:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| target_file | 目标文件 |
|
||||||
|
| target_field | 目标字段 |
|
||||||
|
| final_value | 写入值 |
|
||||||
|
| extraction_source | rule、llm、missing、rag_candidate |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| highlight_reason | missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需要负责人确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、界面与交互
|
||||||
|
|
||||||
|
### 9.1 对话回复
|
||||||
|
|
||||||
|
工作流完成后,对话框展示:
|
||||||
|
|
||||||
|
| 信息 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | RIP 批次号 |
|
||||||
|
| 产品名称 | 抽取到的产品名称 |
|
||||||
|
| 主下载 | `第1章 监管信息(预生成版).zip` |
|
||||||
|
| 单文件下载 | 7 个文件列表 |
|
||||||
|
| 待确认摘要 | 缺失字段数、LLM-only 字段数、冲突字段数 |
|
||||||
|
| `.doc` 状态 | CH1.9 是否成功完成 `.doc` 写入 |
|
||||||
|
| 标准清单提示 | 标准来源和待确认说明 |
|
||||||
|
|
||||||
|
### 9.2 工作流卡片
|
||||||
|
|
||||||
|
前端需新增 `regulatory_info_package` 工作流卡片,展示节点状态和导出结果。对话框底部新增快捷唤起提示词:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、异常与降级
|
||||||
|
|
||||||
|
| 异常场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 未上传说明书 | 提示用户上传产品说明书 |
|
||||||
|
| 多个说明书候选 | 进入 waiting_user,提示选择 |
|
||||||
|
| 产品名称未抽到 | 目标文件产品名位置填 `/` 并黄底 |
|
||||||
|
| 企业信息缺失 | 相关位置填 `/` 并黄底 |
|
||||||
|
| LLM 调用失败 | 使用规则抽取结果继续生成,并记录风险提示 |
|
||||||
|
| 规则抽取失败 | 使用 LLM 结果继续生成,LLM-only 字段高亮 |
|
||||||
|
| RAG/法规知识库不可用 | 标准清单不自动套用样例,写入 `/` 并黄底 |
|
||||||
|
| `.doc` 原生处理失败 | 批次标记 partial_success 或 failed,明确提示 CH1.9 处理失败原因 |
|
||||||
|
| zip 打包失败 | 保留单文件下载,并提示压缩包生成失败 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 触发识别 | 用户输入“根据说明书生成第1章监管信息”可启动 `regulatory_info_package` |
|
||||||
|
| 2 | LLM 路由 | 非固定话术但语义明确时,可由 LLM 判断进入本工作流 |
|
||||||
|
| 3 | 输入选择 | 单说明书可直接执行,多说明书进入待确认 |
|
||||||
|
| 4 | 输出文件 | 生成 7 个与样例同名或同语义的第1章文件 |
|
||||||
|
| 5 | zip 下载 | 生成 `第1章 监管信息(预生成版).zip` 作为主下载入口 |
|
||||||
|
| 6 | 单文件下载 | 7 个生成文件均可单独下载 |
|
||||||
|
| 7 | 产品名称替换 | 目录、申请表、声明类文件中的产品名称替换为说明书产品名称 |
|
||||||
|
| 8 | 产品列表 | CH1.5 使用样例表头展开说明书组成成分,货号填 `/` 并黄底 |
|
||||||
|
| 9 | 缺失项高亮 | 系统新填入的 `/` 均有黄色底色 |
|
||||||
|
| 10 | LLM-only 高亮 | 代码未抽到但 LLM 抽到的字段在文件中高亮 |
|
||||||
|
| 11 | 标准清单 | 不无条件沿用样例标准;无法确认时填 `/` 并黄底 |
|
||||||
|
| 12 | 日期 | 声明类文件日期使用当天日期 |
|
||||||
|
| 13 | `.doc` 支持 | CH1.9 `.doc` 具备与 `.docx` 等价的处理能力,失败时明确提示 |
|
||||||
|
| 14 | 追溯清单 | 输出字段来源、抽取方式和高亮原因 |
|
||||||
|
| 15 | 权限隔离 | 用户只能访问自己对话下的批次和导出文件 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、已确认结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 输出范围固定为样例第1章监管信息目录下的 7 个文件 |
|
||||||
|
| D2 | 样例文件作为模板使用,不只是效果参考 |
|
||||||
|
| D3 | 企业信息、申请人信息缺失时不沿用样例公司,填 `/` 并黄底 |
|
||||||
|
| D4 | 管理类别、分类编码、临床评价路径等无法从说明书确认的信息填 `/` 并黄底 |
|
||||||
|
| D5 | 产品列表货号留空,填 `/` 并黄底 |
|
||||||
|
| D6 | 标准清单不得无条件沿用样例,优先从说明书和 RAG/法规知识库确认 |
|
||||||
|
| D7 | 声明日期使用当天日期 |
|
||||||
|
| D8 | 新建独立工作流,可复用原自动填表工作流拆出的 skill/service |
|
||||||
|
| D9 | 需求分析文档新增为 `docs/1.需求分析/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D10 | zip 作为主入口,单文件作为辅助下载 |
|
||||||
|
| D11 | 对话框底部增加工作流唤起提示词 |
|
||||||
|
| D12 | 模板优先字段化,使用内容控件 Tag 或稳定占位符服务 Agent/代码填充,行标签定位仅作为兜底 |
|
||||||
|
| D13 | `.doc` 要按能力驱动实现与 `.docx` 等价能力;原生能力不可用时允许 `.docx` 兜底并明确提示 |
|
||||||
|
| D14 | 触发判断需要引入 LLM,不只依赖固定关键词 |
|
||||||
816
docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
816
docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,816 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表功能设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 依赖功能设计 | docs/2.功能设计/1.自动汇总.md;docs/2.功能设计/2.NMPA注册资料法规核查与整改闭环.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计目标
|
||||||
|
|
||||||
|
本功能作为独立工作流 `application_form_fill` 建设,由用户在 AI 对话中触发,例如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板”“生成安全和性能基本原则清单”“把产品信息填到申报模板里”等。用户可以明确指定目标模板;未指定时,系统根据识别出的注册类型生成当前注册类型适用的全部模板。
|
||||||
|
|
||||||
|
本功能复用第一批文件汇总结果作为文件来源,复用第二批法规核查中的文本抽取、适用条件识别、LLM 调用、飞书通知和导出下载能力,但拥有独立批次、独立工作流卡片和独立过程产物。系统复制原始法规模板到批次工作目录,不覆盖原始文件;随后按模板配置识别应填字段,使用规则/正则抽取与 LLM 结构化抽取并行处理,合并字段、识别冲突、写入 Word 模板,并在 AI 对话框和飞书通知中提示生成结果与冲突摘要。
|
||||||
|
|
||||||
|
Demo 阶段优先保证 Word 模板自动填写和下载。PDF 转换作为待办增强项:功能设计保留 PDF 导出节点和数据结构,实施时可先返回 Word 与追溯清单,并在待办清单记录 PDF 转换能力。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、与既有功能的关系
|
||||||
|
|
||||||
|
### 2.1 复用边界
|
||||||
|
|
||||||
|
| 能力 | 处理方式 | 现有代码/模型 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 对话与用户权限 | 复用 | `Conversation`、`Message` |
|
||||||
|
| 附件上传与文件绑定 | 复用 | `FileAttachment`、`FileSummaryBatchAttachment` |
|
||||||
|
| 文件汇总与页数统计 | 复用 | `FileSummaryBatch`、`FileSummaryItem`、`file_summary.workflow` |
|
||||||
|
| 文本抽取 | 复用并扩展 | `regulatory_review/services/text_extract.py`、`rag_index.py` |
|
||||||
|
| 适用条件候选 | 复用并扩展 | `regulatory_review/services/info_extract.py` |
|
||||||
|
| LLM 调用 | 复用 | `review_agent/llm.py`、`regulatory_review/services/llm_review.py` |
|
||||||
|
| 导出记录与下载 | 扩展复用 | `ExportedSummaryFile` |
|
||||||
|
| 过程产物 | 复用 | `RegulatoryArtifact` 或新增填表过程产物 |
|
||||||
|
| 飞书通知 | 复用并扩展 | `regulatory_review/services/feishu_notifier.py` |
|
||||||
|
| SSE 工作流事件 | 复用 | `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
|
||||||
|
### 2.2 新增边界
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立填表批次 | 新增 `ApplicationFormFillBatch`,不强绑法规核查批次 |
|
||||||
|
| 模板配置 | 新增 YAML 配置,维护模板路径、适用条件、字段映射和输出规则 |
|
||||||
|
| 模板选择 | 根据用户指定模板和注册类型选择生成范围 |
|
||||||
|
| 规则/正则与 LLM 并行抽取 | 两路抽取并行执行,最后统一合并 |
|
||||||
|
| 字段冲突归并 | 按来源文件优先级处理,说明书优先;冲突字段高亮 |
|
||||||
|
| Word 模板填充 | 使用 `python-docx` 对 `.docx` 表格、段落和占位字段写入 |
|
||||||
|
| `.doc` 模板转换 | 使用 LibreOffice/soffice 或预转换 `.docx` 模板 |
|
||||||
|
| 字段来源追溯 | 输出 Excel/JSON 追溯清单,记录抽取、合并和冲突证据 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体架构
|
||||||
|
|
||||||
|
### 3.1 架构原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 填表流程拥有独立批次、节点和卡片,workflow_type 为 `application_form_fill` |
|
||||||
|
| 复用文件汇总 | 填表不重新实现上传扫描,默认使用当前对话最近成功的 `FileSummaryBatch` |
|
||||||
|
| 用户指令优先 | 用户明确指定模板或注册类型时,优先使用用户指令 |
|
||||||
|
| 配置驱动 | 模板路径、字段映射、适用条件和输出规则写入 YAML 配置 |
|
||||||
|
| Word 优先 | Demo 阶段优先生成可编辑 Word,PDF 作为增强项进入待办 |
|
||||||
|
| 可追溯 | 规则抽取、LLM 抽取、合并结果、冲突列表和来源证据均留底 |
|
||||||
|
| 失败隔离 | 单字段、单模板或 PDF 转换失败不影响其他模板输出 |
|
||||||
|
| 通知可控 | 填表完成后可通过飞书通知上传人,通知内容只包含摘要和下载提示 |
|
||||||
|
|
||||||
|
### 3.2 逻辑架构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["AI 对话页"] --> B["意图识别 application_form_fill"]
|
||||||
|
B --> C{"本次消息是否带附件"}
|
||||||
|
C -->|"是"| D["先执行文件汇总工作流"]
|
||||||
|
C -->|"否"| E["查找最近成功 FileSummaryBatch"]
|
||||||
|
D --> E
|
||||||
|
E --> F["ApplicationFormFillBatch"]
|
||||||
|
F --> G["FormFillWorkflowExecutor"]
|
||||||
|
G --> H["模板配置 YAML"]
|
||||||
|
G --> I["模板选择服务"]
|
||||||
|
G --> J["文本抽取服务"]
|
||||||
|
J --> K1["规则/正则抽取"]
|
||||||
|
J --> K2["LLM 结构化抽取"]
|
||||||
|
K1 --> L["字段合并与冲突归并"]
|
||||||
|
K2 --> L
|
||||||
|
L --> M["Word 模板填充服务"]
|
||||||
|
M --> N["追溯清单导出"]
|
||||||
|
M --> O["PDF 转换服务 P1"]
|
||||||
|
N --> P["ExportedSummaryFile"]
|
||||||
|
O --> P
|
||||||
|
G --> Q["WorkflowEvent/SSE"]
|
||||||
|
Q --> R["自动填表工作流卡片"]
|
||||||
|
G --> S["FeishuNotifier"]
|
||||||
|
S --> T["上传人通知"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.3 技术选型
|
||||||
|
|
||||||
|
| 设计项 | Demo 方案 | 后续演进 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django,沿用当前 `review_agent` 应用 | 保持 Django,必要时拆分独立 app |
|
||||||
|
| 工作流编排 | 新增轻量 `FormFillWorkflowExecutor` | 接入 LangGraph 子图 |
|
||||||
|
| 后台执行 | Django 后台线程,沿用现有工作流方式 | Celery/RQ + Redis |
|
||||||
|
| 工作流状态 | `WorkflowNodeRun` + `WorkflowEvent`,新增 workflow_type | 独立工作流事件中心 |
|
||||||
|
| 模板配置 | YAML,建议路径 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` | 数据库模板管理后台 |
|
||||||
|
| Word 处理 | `python-docx` 写入 `.docx` 表格和段落,高亮冲突字段 | OOXML 精细 patch、内容控件 SDT |
|
||||||
|
| `.doc` 转换 | LibreOffice/soffice headless 转 `.docx`;无法部署时预置 `.docx` 工作模板 | 模板入库前统一转换和人工校验 |
|
||||||
|
| PDF 导出 | P1 待办:LibreOffice/soffice headless 转 PDF | 逐页渲染 QA、版式差异检测 |
|
||||||
|
| Excel 追溯清单 | `openpyxl` | 增加多 Sheet 审核视图 |
|
||||||
|
| 文本抽取 | 复用 `text_extract.py`、`rag_index.py` | OCR、文档文本缓存 |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并后输出 | 可配置抽取器和置信度模型 |
|
||||||
|
| 飞书通知 | 复用 `FeishuNotifier`,Demo 可 mock 或 CLI | 飞书 Webhook/API |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、触发与模板选择设计
|
||||||
|
|
||||||
|
### 4.1 意图识别
|
||||||
|
|
||||||
|
填表工作流通过用户对话触发。意图识别可先采用关键词规则,必要时调用现有 LLM 路由能力。
|
||||||
|
|
||||||
|
| 触发表达 | 触发结果 |
|
||||||
|
| --- | --- |
|
||||||
|
| 帮我填注册证 | 触发填表,指定注册证格式 |
|
||||||
|
| 给我这个内容对应的表格 | 触发填表,未指定模板 |
|
||||||
|
| 为我该方案生成申报模板 | 触发填表,未指定模板 |
|
||||||
|
| 生成安全和性能基本原则清单 | 触发填表,指定安全和性能基本原则清单 |
|
||||||
|
| 把产品信息填到申报模板里 | 触发填表,未指定模板 |
|
||||||
|
| 只生成变更注册备案文件 | 触发填表,指定变更注册(备案)文件 |
|
||||||
|
|
||||||
|
### 4.2 文件来源选择
|
||||||
|
|
||||||
|
| 场景 | 处理方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 本次消息带新附件 | 先自动执行文件汇总,汇总成功后启动填表 |
|
||||||
|
| 本次消息无附件 | 默认使用当前对话最近一次成功 `FileSummaryBatch` |
|
||||||
|
| 无成功汇总批次 | 对话框提示用户先上传资料或补充附件 |
|
||||||
|
| 用户明确指定历史批次 | 校验批次属于当前对话和当前用户后使用 |
|
||||||
|
|
||||||
|
### 4.3 注册类型识别优先级
|
||||||
|
|
||||||
|
注册类型用于决定默认生成哪些模板。优先级如下:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户话语明确指定
|
||||||
|
-> 当前对话已确认的法规核查条件
|
||||||
|
-> 上传文件内容抽取结果
|
||||||
|
-> 无法识别
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.4 模板选择规则
|
||||||
|
|
||||||
|
| 场景 | 生成模板 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户未指定模板,注册类型为首次注册 | 注册证格式;安全和性能基本原则清单 |
|
||||||
|
| 用户未指定模板,注册类型为变更注册或备案 | 变更注册(备案)文件;安全和性能基本原则清单 |
|
||||||
|
| 用户未指定模板,注册类型无法识别 | 安全和性能基本原则清单;注册证/变更文件进入待确认提示 |
|
||||||
|
| 用户明确指定模板且与注册类型一致 | 只生成用户指定模板 |
|
||||||
|
| 用户明确指定模板但与注册类型不一致 | 允许生成,并在摘要和追溯清单提示“与识别注册类型不一致,需人工确认” |
|
||||||
|
| 用户指定“全部模板” | 生成三个目标模板,并提示用户核对注册类型适用性 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、工作流设计
|
||||||
|
|
||||||
|
### 5.1 节点图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
N1["准备资料"] --> N2["选择模板"]
|
||||||
|
N2 --> N3["复制模板"]
|
||||||
|
N3 --> N4["抽取字段"]
|
||||||
|
N4 --> N5["冲突归并"]
|
||||||
|
N5 --> N6["填写 Word"]
|
||||||
|
N6 --> N7["转换 PDF P1"]
|
||||||
|
N6 --> N8["追溯清单"]
|
||||||
|
N7 --> N9["输出下载"]
|
||||||
|
N8 --> N9
|
||||||
|
N9 --> N10["飞书通知"]
|
||||||
|
N10 --> N11["完成"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 节点定义
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 触发服务 | 成功条件 | 失败处理 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | `FormFillWorkflowExecutor` | 找到或生成成功的 `FileSummaryBatch` | 无文件汇总则暂停提示上传 |
|
||||||
|
| template_select | 选择模板 | `TemplateSelectionService` | 输出本次目标模板列表 | 无适用模板则失败 |
|
||||||
|
| template_copy | 复制模板 | `TemplateRepository` | 模板副本进入批次工作目录 | 单模板失败不影响其他模板 |
|
||||||
|
| field_extract | 抽取字段 | `FieldExtractionService` | 规则/正则与 LLM 结果留底 | 单文件失败记录并继续 |
|
||||||
|
| conflict_merge | 冲突归并 | `FieldMergeService` | 输出最终字段和冲突列表 | 无字段时仍生成空模板 |
|
||||||
|
| word_fill | 填写 Word | `WordTemplateFillService` | 生成填好后的 Word 文件 | 单模板失败记录失败 |
|
||||||
|
| pdf_convert | 转换 PDF | `PdfConversionService` | P1:生成 PDF 文件 | PDF 失败标记 partial_success |
|
||||||
|
| trace_export | 追溯清单 | `TraceabilityExportService` | 生成 Excel/JSON 追溯清单 | 失败不影响 Word |
|
||||||
|
| output_export | 输出下载 | `FormFillExportService` | 写入 `ExportedSummaryFile` 并生成下载链接 | 关键 Word 失败则批次失败 |
|
||||||
|
| notify | 飞书通知 | `FeishuNotifier` | 通知上传人生成完成 | 通知失败不影响下载 |
|
||||||
|
| completed | 完成 | 工作流执行器 | 更新批次状态和对话消息 | - |
|
||||||
|
|
||||||
|
### 5.3 状态设计
|
||||||
|
|
||||||
|
| 状态 | 含义 |
|
||||||
|
| --- | --- |
|
||||||
|
| pending | 已创建,等待执行 |
|
||||||
|
| running | 执行中 |
|
||||||
|
| waiting_user | 缺少文件或关键条件,等待用户补充 |
|
||||||
|
| success | Word 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | Word 已生成,但部分模板、PDF、追溯清单或通知失败 |
|
||||||
|
| failed | 所有目标 Word 模板均生成失败 |
|
||||||
|
| skipped | 当前节点不适用,例如 Demo 阶段跳过 PDF |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、模板配置设计
|
||||||
|
|
||||||
|
### 6.1 配置文件路径
|
||||||
|
|
||||||
|
建议新增:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 配置结构
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: application_form_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
templates:
|
||||||
|
- code: registration_certificate
|
||||||
|
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
|
||||||
|
output_label: 注册证格式
|
||||||
|
applies_when:
|
||||||
|
registration_type: ["首次注册"]
|
||||||
|
file_format: docx
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品名称
|
||||||
|
sources: ["说明书", "产品技术要求", "注册检验报告"]
|
||||||
|
- key: package_specification
|
||||||
|
label: 包装规格
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 包装规格
|
||||||
|
sources: ["说明书", "产品技术要求"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 模板配置项
|
||||||
|
|
||||||
|
| 配置项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| code | 模板编码,用于用户指定和导出分类 |
|
||||||
|
| name | 模板中文名称 |
|
||||||
|
| source_file | 原始模板文件名 |
|
||||||
|
| working_template | 可选,预转换 `.docx` 工作模板 |
|
||||||
|
| output_label | 文件命名中的模板标签 |
|
||||||
|
| applies_when | 默认适用注册类型 |
|
||||||
|
| fields | 字段映射列表 |
|
||||||
|
| checklist_items | 安全和性能基本原则清单条目映射 |
|
||||||
|
| conversion | `.doc` 转 `.docx` 和 PDF 的转换策略 |
|
||||||
|
|
||||||
|
### 6.4 已知模板字段
|
||||||
|
|
||||||
|
注册证格式当前已从 `.docx` 表格识别到以下字段:注册人名称、注册人住所、生产地址、代理人名称、代理人住所、产品名称、包装规格、主要组成成分、预期用途、产品储存条件及有效期、附件、其他内容、备注。
|
||||||
|
|
||||||
|
变更注册(备案)文件和安全和性能基本原则清单当前为 `.doc`,实施前需通过 LibreOffice/soffice 转换或预置人工确认版 `.docx` 工作模板,再补齐字段映射。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、字段抽取与合并设计
|
||||||
|
|
||||||
|
### 7.1 三层提取链路
|
||||||
|
|
||||||
|
```text
|
||||||
|
模板字段配置
|
||||||
|
-> 文档字段候选提取
|
||||||
|
-> 规则/正则抽取与 LLM 结构化抽取并行
|
||||||
|
-> 字段归一化
|
||||||
|
-> 来源优先级合并
|
||||||
|
-> 冲突识别
|
||||||
|
-> 最终字段包
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 规则/正则抽取
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 标签字段识别 | 识别 `产品名称:`、`预期用途:`、`储存条件:` 等标签行 |
|
||||||
|
| 表格字段识别 | 从 Word/Excel 表格中识别左侧字段名、右侧字段值 |
|
||||||
|
| 章节范围识别 | 从说明书、产品技术要求中按章节提取连续文本 |
|
||||||
|
| 文件类型识别 | 根据文件名、目录名和首页标题判断说明书、产品技术要求、检验报告 |
|
||||||
|
| 证据片段截取 | 保存字段前后上下文,用于追溯清单 |
|
||||||
|
|
||||||
|
### 7.3 LLM 结构化抽取
|
||||||
|
|
||||||
|
LLM 输入为模板字段清单、文件上下文和候选文本片段,输出严格 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "storage_condition",
|
||||||
|
"label": "产品储存条件及有效期",
|
||||||
|
"value": "2-8℃保存,有效期12个月",
|
||||||
|
"source_file": "说明书.docx",
|
||||||
|
"evidence": "产品储存条件:2-8℃保存...",
|
||||||
|
"confidence": 0.86
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"checklist_items": [
|
||||||
|
{
|
||||||
|
"item_code": "A1",
|
||||||
|
"applicability": "适用",
|
||||||
|
"compliance_evidence": "产品技术要求中规定了性能指标和检验方法",
|
||||||
|
"proof_location": "产品技术要求.docx 第2章"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.4 并行合并规则
|
||||||
|
|
||||||
|
| 场景 | 处理规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 规则和 LLM 值一致 | 合并为同一字段,提高置信度 |
|
||||||
|
| 规则和 LLM 值不一致,但来源文件不同 | 按来源文件优先级处理,说明书优先 |
|
||||||
|
| 规则和 LLM 值不一致,来源文件相同 | 标记冲突,模板中高亮 |
|
||||||
|
| 说明书与其他文件冲突 | 采用说明书值,黄色底色、红色字体标记 |
|
||||||
|
| 说明书缺失,多个来源冲突 | 取最高优先级文件值并标记冲突;无法判断则留空 |
|
||||||
|
| 字段缺失 | 模板留空,追溯清单记录未提取 |
|
||||||
|
|
||||||
|
### 7.5 过程产物留底
|
||||||
|
|
||||||
|
字段抽取结果保存为 `field_extract_result.json`,至少包含:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| regex_results | 规则/正则抽取结果 |
|
||||||
|
| llm_results | LLM 结构化抽取结果 |
|
||||||
|
| merged_fields | 合并后的最终字段 |
|
||||||
|
| conflicts | 冲突字段列表 |
|
||||||
|
| source_evidence | 来源文件和文本片段 |
|
||||||
|
| selected_templates | 本次选择的模板 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、安全和性能基本原则清单设计
|
||||||
|
|
||||||
|
### 8.1 判断策略
|
||||||
|
|
||||||
|
安全和性能基本原则清单采用“候选判断 + 高置信度写入”策略。
|
||||||
|
|
||||||
|
| 步骤 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 条目拆解 | 从模板配置中读取条目编号、原则内容、适用性栏、证据栏、证明文件位置栏 |
|
||||||
|
| 候选判断 | 规则和 LLM 均可给出适用/不适用候选 |
|
||||||
|
| 证据匹配 | 从产品技术要求、说明书、性能研究、稳定性研究、风险管理资料中匹配证明文件 |
|
||||||
|
| 高置信度写入 | 仅将高置信度判断写入 Word |
|
||||||
|
| 低置信度留空 | 证据不足或判断不一致时 Word 留空,追溯清单记录候选判断 |
|
||||||
|
| 冲突提示 | 冲突条目在对话框和追溯清单中提示,不强行填入 |
|
||||||
|
|
||||||
|
### 8.2 输出字段
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 条目编号 | 基本原则清单中的条目编码 |
|
||||||
|
| 条目内容 | 原始原则或要求 |
|
||||||
|
| 适用性 | 适用/不适用,低置信度留空 |
|
||||||
|
| 符合性证据 | 高置信度证据摘要 |
|
||||||
|
| 证明文件位置 | 文件名、章节、页码或文本定位 |
|
||||||
|
| 置信度 | 用于判断是否写入 Word |
|
||||||
|
| 候选来源 | 规则、LLM 或两者一致 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、Word 与 PDF 生成设计
|
||||||
|
|
||||||
|
### 9.1 Word 模板填充
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板副本 | 原始模板复制到批次工作目录后再写入 |
|
||||||
|
| 表格行填充 | 根据行首字段名定位目标单元格 |
|
||||||
|
| 段落占位填充 | 支持 `{{field_key}}` 等占位符 |
|
||||||
|
| 清单条目填充 | 按条目编号和配置列写入适用性、证据和证明位置 |
|
||||||
|
| 冲突高亮 | 冲突字段使用黄色底色和红色字体 |
|
||||||
|
| 缺失字段 | 保持空白,不写“待补充” |
|
||||||
|
| 版式保持 | 尽量不改变表格结构、分页和字体 |
|
||||||
|
|
||||||
|
### 9.2 PDF 转换
|
||||||
|
|
||||||
|
PDF 转换作为 P1 待办增强项设计:
|
||||||
|
|
||||||
|
| 阶段 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| Demo 主链路 | 优先生成 Word,不因 PDF 能力缺失阻断工作流 |
|
||||||
|
| P1 增强 | 使用 LibreOffice/soffice headless 将 Word 转为 PDF |
|
||||||
|
| 失败处理 | Word 已生成但 PDF 失败时,批次状态为 `partial_success` |
|
||||||
|
| QA 增强 | 后续增加 PDF 页数非 0、逐页截图或版式差异检查 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、输出与下载设计
|
||||||
|
|
||||||
|
### 10.1 输出文件
|
||||||
|
|
||||||
|
| 文件 | Demo 阶段 | P1/P2 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 填好后的 Word | 必须生成 | 持续支持 |
|
||||||
|
| PDF 预览 | 待办增强 | LibreOffice 转换生成 |
|
||||||
|
| 字段来源追溯清单 Excel | 允许生成,建议实现 | 增加多 Sheet |
|
||||||
|
| 字段抽取 JSON | 过程产物留底 | 支持下载或调试查看 |
|
||||||
|
|
||||||
|
### 10.2 文件命名
|
||||||
|
|
||||||
|
```text
|
||||||
|
批次号-产品名称-注册证格式.docx
|
||||||
|
批次号-产品名称-注册证格式.pdf
|
||||||
|
批次号-产品名称-变更注册备案文件.docx
|
||||||
|
批次号-产品名称-变更注册备案文件.pdf
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.docx
|
||||||
|
批次号-产品名称-安全和性能基本原则清单.pdf
|
||||||
|
批次号-产品名称-字段来源追溯清单.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.3 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
继续复用 `ExportedSummaryFile`,但需要扩展 `ExportType`:
|
||||||
|
|
||||||
|
| export_type | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| markdown | 既有 Markdown 报告 |
|
||||||
|
| excel | Excel 追溯清单 |
|
||||||
|
| json | 字段抽取 JSON 或结果包 |
|
||||||
|
| word | 填好的 Word 文件,新增 |
|
||||||
|
| pdf | Word 转换后的 PDF,新增 |
|
||||||
|
|
||||||
|
填表工作流导出记录建议:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | `application_form_fill` |
|
||||||
|
| workflow_batch_id | `ApplicationFormFillBatch.id` |
|
||||||
|
| export_category | `filled_template`、`traceability`、`extract_result` |
|
||||||
|
| export_type | `word`、`pdf`、`excel`、`json` |
|
||||||
|
|
||||||
|
导出服务入参应包含目标输出类型列表,例如:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"output_types": ["word", "pdf", "excel"],
|
||||||
|
"template_codes": ["registration_certificate", "essential_principles"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
系统根据入参决定生成哪些类型的内容。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、数据模型设计
|
||||||
|
|
||||||
|
### 11.1 ApplicationFormFillBatch
|
||||||
|
|
||||||
|
新增自动填表批次表。
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| id | BigAutoField | 主键 |
|
||||||
|
| conversation | ForeignKey(Conversation) | 绑定对话 |
|
||||||
|
| user | ForeignKey(User) | 发起用户 |
|
||||||
|
| source_summary_batch | ForeignKey(FileSummaryBatch) | 文件来源批次 |
|
||||||
|
| source_regulatory_batch | ForeignKey(RegulatoryReviewBatch, null=True) | 可选,复用已确认法规条件 |
|
||||||
|
| batch_no | CharField | 填表批次号,如 AFF-YYYYMMDDHHMMSS |
|
||||||
|
| status | CharField | pending、running、waiting_user、success、partial_success、failed |
|
||||||
|
| trigger_message | ForeignKey(Message, null=True) | 触发消息 |
|
||||||
|
| requested_templates | JSONField | 用户指定模板 |
|
||||||
|
| selected_templates | JSONField | 实际生成模板 |
|
||||||
|
| output_types | JSONField | 请求输出类型,如 word、pdf、excel |
|
||||||
|
| registration_type | CharField | 注册类型 |
|
||||||
|
| product_name | CharField | 产品名称 |
|
||||||
|
| conflict_summary | JSONField | 冲突摘要 |
|
||||||
|
| risk_notes | JSONField | 不适用模板、低置信度等提示 |
|
||||||
|
| work_dir | CharField | 批次工作目录 |
|
||||||
|
| error_message | TextField | 异常说明 |
|
||||||
|
| created_at | DateTimeField | 创建时间 |
|
||||||
|
| started_at | DateTimeField | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | 完成时间 |
|
||||||
|
|
||||||
|
### 11.2 ApplicationFormFillArtifact
|
||||||
|
|
||||||
|
可新增独立过程产物表,也可复用 `RegulatoryArtifact`。考虑到这是独立工作流,建议新增轻量产物表,结构与 `RegulatoryArtifact` 保持一致。
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| id | BigAutoField | 主键 |
|
||||||
|
| batch | ForeignKey(ApplicationFormFillBatch) | 所属填表批次 |
|
||||||
|
| artifact_type | CharField | template_copy、field_extract_result、merged_fields、traceability、notification_record |
|
||||||
|
| file_format | CharField | json、excel、docx、pdf |
|
||||||
|
| name | CharField | 产物名称 |
|
||||||
|
| storage_path | CharField | 存储路径 |
|
||||||
|
| metadata | JSONField | 模板编码、输出类型、生成状态等 |
|
||||||
|
| content_hash | CharField | 文件 hash |
|
||||||
|
| created_at | DateTimeField | 创建时间 |
|
||||||
|
|
||||||
|
### 11.3 与既有模型关系
|
||||||
|
|
||||||
|
```text
|
||||||
|
Conversation 1:N ApplicationFormFillBatch
|
||||||
|
FileSummaryBatch 1:N ApplicationFormFillBatch
|
||||||
|
RegulatoryReviewBatch 0:N ApplicationFormFillBatch
|
||||||
|
ApplicationFormFillBatch 1:N ApplicationFormFillArtifact
|
||||||
|
ApplicationFormFillBatch 1:N WorkflowNodeRun
|
||||||
|
ApplicationFormFillBatch 1:N ExportedSummaryFile
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、后端服务设计
|
||||||
|
|
||||||
|
### 12.1 FormFillWorkflowExecutor
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| run(batch) | 串行执行自动填表节点 |
|
||||||
|
| run_node(node) | 执行单节点并记录进度 |
|
||||||
|
| resolve_source_summary_batch() | 根据本次附件或最近成功批次确定来源 |
|
||||||
|
| emit_event() | 写入 `WorkflowEvent` |
|
||||||
|
| complete_or_partial() | 根据 Word/PDF/通知结果更新批次状态 |
|
||||||
|
|
||||||
|
### 12.2 TemplateSelectionService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| parse_requested_templates(message) | 从用户话语中识别指定模板 |
|
||||||
|
| detect_registration_type() | 按用户话语、法规确认条件、文件抽取识别注册类型 |
|
||||||
|
| select_templates() | 根据注册类型和用户指令输出模板列表 |
|
||||||
|
|
||||||
|
### 12.3 TemplateRepository
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| load_config() | 读取 YAML 模板配置 |
|
||||||
|
| resolve_source_template(code) | 找到原始模板或预转换模板 |
|
||||||
|
| copy_to_work_dir(code, batch) | 复制模板到批次目录 |
|
||||||
|
| convert_doc_to_docx(path) | `.doc` 转 `.docx` |
|
||||||
|
|
||||||
|
### 12.4 FieldExtractionService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| extract_by_rules(texts, template_fields) | 规则/正则抽取 |
|
||||||
|
| extract_by_llm(texts, template_fields) | LLM 结构化抽取 |
|
||||||
|
| run_parallel() | 并行执行两路抽取 |
|
||||||
|
| save_extract_artifact() | 保存 `field_extract_result.json` |
|
||||||
|
|
||||||
|
### 12.5 FieldMergeService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| normalize_fields() | 字段名、单位、空白和同义词归一 |
|
||||||
|
| rank_sources() | 按说明书、产品技术要求、检验报告等来源排序 |
|
||||||
|
| merge() | 输出最终字段 |
|
||||||
|
| detect_conflicts() | 输出冲突列表和高亮标记 |
|
||||||
|
|
||||||
|
### 12.6 WordTemplateFillService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| fill_table_rows() | 根据行名定位表格单元格并写入 |
|
||||||
|
| fill_placeholders() | 替换段落占位符 |
|
||||||
|
| fill_checklist_items() | 写入安全和性能基本原则清单 |
|
||||||
|
| apply_conflict_highlight() | 黄底红字标记冲突字段 |
|
||||||
|
| save_docx() | 保存填好后的 Word |
|
||||||
|
|
||||||
|
### 12.7 TraceabilityExportService
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| build_excel() | 生成字段来源追溯清单 |
|
||||||
|
| build_json() | 生成结构化追溯 JSON |
|
||||||
|
| create_export_records() | 写入 `ExportedSummaryFile` |
|
||||||
|
|
||||||
|
### 12.8 FormFillNotifier
|
||||||
|
|
||||||
|
复用或包装 `FeishuNotifier`。
|
||||||
|
|
||||||
|
| 通知场景 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 填表成功 | 通知上传人文件已生成 |
|
||||||
|
| 部分成功 | 通知 Word 已生成,但 PDF/部分模板失败 |
|
||||||
|
| 冲突字段存在 | 通知中提示存在冲突字段,需下载核对 |
|
||||||
|
| 失败 | 可选通知失败原因,Demo 可只在对话框展示 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、接口设计
|
||||||
|
|
||||||
|
### 13.1 发起自动填表
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | POST /api/review-agent/application-form-fill/start/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 请求 | conversation_id、message_id、file_summary_batch_id 可选、template_codes 可选、output_types 可选 |
|
||||||
|
| 响应 | batch_id、workflow_type、status、selected_templates |
|
||||||
|
|
||||||
|
处理规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
校验 conversation 属于当前用户
|
||||||
|
-> 如本次消息带附件,先执行文件汇总
|
||||||
|
-> 否则查找当前对话最近成功 FileSummaryBatch
|
||||||
|
-> 创建 ApplicationFormFillBatch
|
||||||
|
-> 初始化 WorkflowNodeRun
|
||||||
|
-> 启动 FormFillWorkflowExecutor
|
||||||
|
-> 返回工作流卡片初始状态
|
||||||
|
```
|
||||||
|
|
||||||
|
### 13.2 查询自动填表状态
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | GET /api/review-agent/application-form-fill/{batch_id}/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 响应 | 批次状态、节点状态、选择模板、冲突摘要、导出文件 |
|
||||||
|
|
||||||
|
### 13.3 下载导出文件
|
||||||
|
|
||||||
|
继续复用:
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| URL | GET /api/review-agent/file-summary/exports/{export_id}/download/ |
|
||||||
|
| 认证 | 登录用户 |
|
||||||
|
| 响应 | 文件流 |
|
||||||
|
|
||||||
|
权限规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
export_id -> workflow_type/workflow_batch_id -> ApplicationFormFillBatch -> conversation -> user
|
||||||
|
必须等于当前登录用户,才允许下载。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、前端设计
|
||||||
|
|
||||||
|
### 14.1 自动填表工作流卡片
|
||||||
|
|
||||||
|
前端新增独立卡片类型 `application_form_fill`,展示节点:
|
||||||
|
|
||||||
|
| 节点 | 展示文案 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 准备资料 |
|
||||||
|
| template_select | 选择模板 |
|
||||||
|
| template_copy | 复制模板 |
|
||||||
|
| field_extract | 抽取字段 |
|
||||||
|
| conflict_merge | 冲突归并 |
|
||||||
|
| word_fill | 填写 Word |
|
||||||
|
| pdf_convert | 转换 PDF |
|
||||||
|
| output_export | 输出下载 |
|
||||||
|
| notify | 飞书通知 |
|
||||||
|
| completed | 已完成 |
|
||||||
|
|
||||||
|
### 14.2 对话框结果展示
|
||||||
|
|
||||||
|
工作流完成后,AI 对话框展示 Markdown 摘要:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 待生成 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 待生成 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
|
||||||
|
[下载字段来源追溯清单](download-url)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.3 指定模板交互
|
||||||
|
|
||||||
|
用户可以通过自然语言指定模板。前端无需额外表单,后端意图识别后在卡片中展示本次选择模板。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、事件设计
|
||||||
|
|
||||||
|
### 15.1 SSE 事件结构
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"event": "workflow",
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"batch_id": 3001,
|
||||||
|
"conversation_id": 1001,
|
||||||
|
"node_code": "field_extract",
|
||||||
|
"node_group": "form_fill",
|
||||||
|
"status": "running",
|
||||||
|
"progress": 55,
|
||||||
|
"message": "正在并行抽取模板字段",
|
||||||
|
"payload": {
|
||||||
|
"selected_templates": ["registration_certificate", "essential_principles"],
|
||||||
|
"processed_files": 8,
|
||||||
|
"total_files": 20
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 15.2 节点进度
|
||||||
|
|
||||||
|
| 节点 | 进度口径 |
|
||||||
|
| --- | --- |
|
||||||
|
| 准备资料 | 是否找到来源批次 |
|
||||||
|
| 选择模板 | 模板数量 |
|
||||||
|
| 复制模板 | 已复制模板数/总模板数 |
|
||||||
|
| 抽取字段 | 已处理文件数/总文件数 |
|
||||||
|
| 冲突归并 | 字段数量和冲突数量 |
|
||||||
|
| 填写 Word | 已生成 Word 数/目标 Word 数 |
|
||||||
|
| 转换 PDF | 已生成 PDF 数/目标 PDF 数 |
|
||||||
|
| 输出下载 | 已创建下载记录数 |
|
||||||
|
| 飞书通知 | 通知状态 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、异常与降级设计
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 无成功文件汇总批次 | 进入 waiting_user,提示上传资料 |
|
||||||
|
| 新附件汇总失败 | 填表工作流不启动或标记失败 |
|
||||||
|
| 用户指定不适用模板 | 允许生成,摘要提示需人工确认 |
|
||||||
|
| `.doc` 转换失败 | 该模板失败,其他模板继续 |
|
||||||
|
| 单字段缺失 | Word 留空,追溯清单记录未提取 |
|
||||||
|
| 规则和 LLM 冲突 | 按来源优先级合并,冲突高亮 |
|
||||||
|
| 所有 Word 生成失败 | 批次 failed |
|
||||||
|
| 部分 Word 生成失败 | 批次 partial_success |
|
||||||
|
| PDF 转换失败 | 批次 partial_success,保留 Word 下载 |
|
||||||
|
| 飞书通知失败 | 不影响文件下载,记录通知失败 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、安全设计
|
||||||
|
|
||||||
|
| 设计点 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 原始模板保护 | 只读原始模板,所有写入发生在批次工作目录副本 |
|
||||||
|
| 对话隔离 | 填表批次必须绑定当前 Conversation |
|
||||||
|
| 文件读取权限 | 只能读取关联 `FileSummaryBatch` 下的文件 |
|
||||||
|
| 下载权限 | 根据 workflow_type 和 workflow_batch_id 校验当前用户 |
|
||||||
|
| LLM 输入控制 | 只传必要文本片段和字段上下文,避免发送整包敏感资料 |
|
||||||
|
| 飞书脱敏 | 通知仅包含生成状态、模板名称、冲突数量和系统内下载提示 |
|
||||||
|
| 命令调用安全 | LibreOffice/飞书 CLI 使用结构化参数,不拼接用户输入 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、验收设计
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 意图触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
|
||||||
|
| 2 | 指定模板 | 用户指定模板时只生成指定模板 |
|
||||||
|
| 3 | 默认模板 | 未指定模板时按注册类型生成适用的全部模板 |
|
||||||
|
| 4 | 新附件串联 | 本次消息带附件时先自动汇总,再执行填表 |
|
||||||
|
| 5 | 最近批次复用 | 无附件时复用当前对话最近成功文件汇总批次 |
|
||||||
|
| 6 | 工作流卡片 | 前端展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
|
||||||
|
| 7 | 字段并行抽取 | 规则/正则和 LLM 抽取结果均进入过程产物 |
|
||||||
|
| 8 | 冲突归并 | 说明书优先,冲突字段在 Word 中黄底红字 |
|
||||||
|
| 9 | 缺失字段 | 未提取字段在 Word 中留空 |
|
||||||
|
| 10 | 基本原则清单 | 高置信度条目写入,低置信度候选留在追溯清单 |
|
||||||
|
| 11 | Word 下载 | 对话框提供填好后的 Word 下载链接 |
|
||||||
|
| 12 | PDF 待办 | Demo 阶段 PDF 可展示为待生成,不阻断 Word |
|
||||||
|
| 13 | 追溯清单 | 生成字段来源追溯清单,包含规则、LLM、合并和冲突信息 |
|
||||||
|
| 14 | 飞书通知 | 填表完成后可通知上传人,失败不影响下载 |
|
||||||
|
| 15 | 权限隔离 | A 对话生成的 Word/追溯清单不能被 B 对话访问 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、实施建议
|
||||||
|
|
||||||
|
1. 新增 `ApplicationFormFillBatch` 和 `ApplicationFormFillArtifact` 数据模型,扩展 `ExportedSummaryFile.ExportType` 支持 `word`、`pdf`。
|
||||||
|
2. 新增模板配置 `application_form_templates_v1.yaml`,先录入注册证格式 `.docx` 的已识别字段。
|
||||||
|
3. 将两个 `.doc` 模板转换为 `.docx` 工作模板,或在配置中标记为待转换模板。
|
||||||
|
4. 实现 `TemplateSelectionService`,支持用户指定模板、注册类型识别和默认模板选择。
|
||||||
|
5. 实现规则/正则与 LLM 并行字段抽取,并保存 `field_extract_result.json`。
|
||||||
|
6. 实现 `FieldMergeService`,按说明书优先规则处理冲突。
|
||||||
|
7. 实现 `WordTemplateFillService`,优先支持表格行填充和冲突高亮。
|
||||||
|
8. 实现追溯清单 Excel 导出和 Word 下载记录。
|
||||||
|
9. 改造前端工作流卡片,新增 `application_form_fill` 类型。
|
||||||
|
10. 接入飞书通知摘要。
|
||||||
|
11. 将 PDF 转换、逐页版式 QA 和更完整的 `.doc` 模板转换纳入后续待办。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、待办与待确认事项
|
||||||
|
|
||||||
|
| 序号 | 项目 | 当前建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | PDF 转换 | 放入待办,Demo 优先 Word 下载 |
|
||||||
|
| 2 | `.doc` 模板转换 | 优先 LibreOffice/soffice;不可用时预置 `.docx` 工作模板 |
|
||||||
|
| 3 | 安全和性能基本原则清单条目拆解 | 需转换模板后补齐 YAML 条目配置 |
|
||||||
|
| 4 | LLM 结构化抽取提示词 | 需约束输出 JSON schema 和置信度 |
|
||||||
|
| 5 | 飞书通知渠道 | Demo 可 mock 或 CLI,正式版接 Webhook/API |
|
||||||
|
| 6 | 低置信度阈值 | 建议功能实现阶段先配置为 0.75 |
|
||||||
|
| 7 | 版式验证 | P1 增加 PDF 页数检查和逐页截图 QA |
|
||||||
292
docs/2.功能设计/4.飞书通知与问答接入.md
Normal file
292
docs/2.功能设计/4.飞书通知与问答接入.md
Normal file
@@ -0,0 +1,292 @@
|
|||||||
|
# 飞书通知与问答接入功能设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/4.飞书通知与问答接入.md |
|
||||||
|
| 依赖功能设计 | docs/2.功能设计/1.自动汇总.md;docs/2.功能设计/2.NMPA注册资料法规核查与整改闭环.md;docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 飞书通知与问答接入 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计目标
|
||||||
|
|
||||||
|
本功能用于将系统内工作流结果通过飞书官方智能体/应用机器人同步到指定个人账号,并为后续飞书内问答能力预留数据模型和服务边界。首期实现重点是:自动汇总、NMPA 注册资料法规核查与整改闭环、产品关键信息提取与申报文件自动填表三个流程结束后,使用 App ID/App Secret 获取 `tenant_access_token`,调用飞书消息 API 向指定个人账号发送富文本私聊提醒。
|
||||||
|
|
||||||
|
首期不实现飞书事件订阅回调和私聊问答,但需要在设计上预留用户映射、查询服务、权限过滤和问答日志能力,保证后续可以平滑扩展到“用户在飞书私聊机器人中查询批次状态、风险摘要、缺失摘要和导出摘要”。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、设计范围
|
||||||
|
|
||||||
|
### 2.1 本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 指定个人通知 | 通过飞书官方智能体/应用机器人消息 API 向一个指定个人账号发送通知 |
|
||||||
|
| 2 | 发起人展示 | 消息正文展示批次发起人或上传人,不做群内 @ |
|
||||||
|
| 3 | 三流程接入 | 自动汇总、法规核查、自动填表均发送完成通知 |
|
||||||
|
| 4 | 富文本消息 | 使用飞书富文本格式展示标题、批次、状态、摘要、链接和发起人 |
|
||||||
|
| 5 | token 管理 | 使用 App ID/App Secret 获取并缓存 tenant_access_token |
|
||||||
|
| 6 | 通知判重 | 同一批次、同一工作流、同一状态只发送一次 |
|
||||||
|
| 7 | 通知记录 | 保存摘要、通道、目标、状态、失败原因、发送时间等信息 |
|
||||||
|
| 8 | 批次详情展示 | 在对应批次详情页展示通知状态和失败原因 |
|
||||||
|
| 9 | 用户映射管理 | 通过 Django Admin 手工维护系统用户与飞书用户标识,服务后续按发起人私聊和问答身份识别 |
|
||||||
|
| 10 | 问答预留 | 预留飞书用户映射、查询服务、权限规则和问答日志模型 |
|
||||||
|
|
||||||
|
### 2.2 非本期范围
|
||||||
|
|
||||||
|
| 序号 | 范围项 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 飞书私聊问答回调 | 不实现事件订阅接口和问答回复处理 |
|
||||||
|
| 2 | 群聊 @ 机器人问答 | 不接收群消息,不处理群内权限问题 |
|
||||||
|
| 3 | 飞书事件订阅回调 | 首期不接收私聊或群聊消息事件 |
|
||||||
|
| 4 | 复杂消息卡片 | 不做交互式卡片按钮和回调 |
|
||||||
|
| 5 | 自动后台重试 | 飞书发送失败只记录,不自动重试 |
|
||||||
|
| 6 | 飞书通讯录同步 | 不自动拉取用户,首期手工维护映射 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、与既有功能的关系
|
||||||
|
|
||||||
|
| 既有能力 | 处理方式 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 自动汇总工作流 | 接入通知 | 文件汇总完成后生成摘要通知 |
|
||||||
|
| 法规核查工作流 | 替换/扩展 mock 通知 | 风险分级和报告生成后发送摘要通知 |
|
||||||
|
| 自动填表工作流 | 扩展现有 notifier | Word/追溯清单生成后发送摘要通知 |
|
||||||
|
| 通知记录模型 | 统一扩展 | 现有法规和填表通知记录已存在,本设计建议抽象统一通知服务 |
|
||||||
|
| 工作流事件 | 复用 | 通知发送结果可作为节点事件或批次附属信息展示 |
|
||||||
|
| Django Admin | 扩展 | 新增飞书用户映射管理入口 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、总体架构
|
||||||
|
|
||||||
|
### 4.1 逻辑架构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["业务工作流完成"] --> B["NotificationDispatcher"]
|
||||||
|
B --> C["WorkflowNotificationBuilder"]
|
||||||
|
C --> D["ConfiguredPersonalRecipientResolver"]
|
||||||
|
D --> E["RichTextMessageBuilder"]
|
||||||
|
E --> F{"通知是否已发送"}
|
||||||
|
F -->|"已发送"| G["写入/返回重复跳过结果"]
|
||||||
|
F -->|"未发送"| H{"飞书通知是否启用"}
|
||||||
|
H -->|"否"| I["写入 mock/未启用记录"]
|
||||||
|
H -->|"是"| J["FeishuTokenProvider"]
|
||||||
|
J --> K["获取/复用 tenant_access_token"]
|
||||||
|
K --> L["FeishuMessageApiClient"]
|
||||||
|
L --> X["POST /im/v1/messages"]
|
||||||
|
X --> M["保存通知记录"]
|
||||||
|
M --> N["批次详情页展示"]
|
||||||
|
|
||||||
|
O["后续飞书私聊消息"] -.预留.-> P["FeishuQuestionService"]
|
||||||
|
P -.预留.-> Q["BatchSummaryQueryService"]
|
||||||
|
Q -.预留.-> R["权限过滤"]
|
||||||
|
P -.预留.-> S["FeishuQuestionLog"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 模块划分
|
||||||
|
|
||||||
|
| 模块 | 责任 |
|
||||||
|
| --- | --- |
|
||||||
|
| `notification_dispatcher` | 工作流完成后统一调度通知发送 |
|
||||||
|
| `workflow_notification_builder` | 将不同工作流批次转换为统一通知上下文 |
|
||||||
|
| `feishu_recipient_resolver` | 首期读取配置中的个人 open_id/user_id;后续支持按系统用户映射解析 |
|
||||||
|
| `feishu_message_builder` | 构造飞书富文本消息体 |
|
||||||
|
| `feishu_token_provider` | 使用 App ID/App Secret 获取并缓存 tenant_access_token |
|
||||||
|
| `feishu_message_api_client` | 调用飞书发送消息 API、处理超时和响应解析 |
|
||||||
|
| `notification_record_service` | 判重、保存成功/失败/未启用记录 |
|
||||||
|
| `batch_notification_presenter` | 为批次详情页输出通知状态 |
|
||||||
|
| `feishu_question_service` | 后续问答预留,解析问题并查询摘要 |
|
||||||
|
| `batch_summary_query_service` | 后续问答预留,按权限查询批次摘要 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、通知业务流程
|
||||||
|
|
||||||
|
### 5.1 主流程
|
||||||
|
|
||||||
|
```text
|
||||||
|
业务工作流进入 success、partial_success 或 failed
|
||||||
|
-> 工作流调用统一通知服务
|
||||||
|
-> 通知服务生成 workflow_type、batch_id、status 组成的判重键
|
||||||
|
-> 检查是否已有同一判重键的成功通知
|
||||||
|
-> 若已有成功通知,跳过发送并返回 skipped
|
||||||
|
-> 读取批次、用户、摘要、结果链接
|
||||||
|
-> 读取配置中的个人 open_id/user_id 作为接收人
|
||||||
|
-> 构造富文本消息,正文展示批次发起人或上传人
|
||||||
|
-> 判断 FEISHU_NOTIFY_ENABLED
|
||||||
|
-> 未启用时写入 mock/disabled 记录
|
||||||
|
-> 已启用时获取或复用 tenant_access_token
|
||||||
|
-> 调用飞书消息 API 向指定个人 open_id/user_id 发送消息
|
||||||
|
-> 发送成功写入 sent/success 记录
|
||||||
|
-> 发送失败写入 failed 记录,记录错误信息
|
||||||
|
-> 业务工作流不因通知失败而失败
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 三类工作流通知摘要
|
||||||
|
|
||||||
|
| 工作流 | workflow_type | 摘要字段 | 下一步 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 自动汇总 | `file_summary` | 文件总数、成功解析数、失败/跳过数、导出文件数 | 查看汇总结果或下载 Excel |
|
||||||
|
| 法规核查 | `regulatory_review` | 风险总数、阻断项数、高风险数、中风险数、报告导出状态 | 查看风险报告和整改建议 |
|
||||||
|
| 自动填表 | `application_form_fill` | 选中模板数、导出文件数、冲突字段数、失败原因概述 | 下载 Word/追溯清单并人工确认 |
|
||||||
|
|
||||||
|
### 5.3 通知状态
|
||||||
|
|
||||||
|
| 状态 | 含义 | 是否阻断主流程 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| pending | 已创建记录但未发送 | 否 |
|
||||||
|
| sent/success | 已成功发送到飞书 | 否 |
|
||||||
|
| failed | 发送失败或配置异常 | 否 |
|
||||||
|
| skipped_duplicate | 已存在同一批次、同一流程、同一状态通知 | 否 |
|
||||||
|
| disabled/mock | 真实通知未启用,记录为模拟或未启用 | 否 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、飞书富文本设计
|
||||||
|
|
||||||
|
### 6.1 消息结构
|
||||||
|
|
||||||
|
飞书富文本消息建议使用 `post` 类型。首期内容只放摘要,不展开完整风险项和缺失项。
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"msg_type": "post",
|
||||||
|
"content": {
|
||||||
|
"post": {
|
||||||
|
"zh_cn": {
|
||||||
|
"title": "自动填表流程已完成",
|
||||||
|
"content": [
|
||||||
|
[
|
||||||
|
{"tag": "text", "text": "状态:成功\n"},
|
||||||
|
{"tag": "text", "text": "批次:AFF-20260607-001\n"},
|
||||||
|
{"tag": "text", "text": "发起人:owner\n"}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{"tag": "text", "text": "摘要:生成 2 个文件,冲突字段 1 个。\n"},
|
||||||
|
{"tag": "a", "text": "查看系统结果", "href": "http://127.0.0.1:8000/..."}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 接收人标识优先级
|
||||||
|
|
||||||
|
首期接收人来自环境变量配置。若同时配置多个飞书标识,按以下优先级选取:
|
||||||
|
|
||||||
|
```text
|
||||||
|
FEISHU_DEFAULT_USER_OPEN_ID -> FEISHU_DEFAULT_USER_ID
|
||||||
|
```
|
||||||
|
|
||||||
|
若无可用接收人标识,系统不发送真实飞书消息,并记录配置缺失失败。
|
||||||
|
|
||||||
|
用户映射表仍保留,用于后续从“固定个人账号”升级为“按发起人私聊”。
|
||||||
|
|
||||||
|
### 6.3 系统链接
|
||||||
|
|
||||||
|
首期使用本地地址,例如:
|
||||||
|
|
||||||
|
```text
|
||||||
|
http://127.0.0.1:8000/
|
||||||
|
```
|
||||||
|
|
||||||
|
批次详情链接由各工作流已有页面路由或详情接口拼接。部署环境后续再升级为可信域名配置。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、配置设计
|
||||||
|
|
||||||
|
| 配置项 | 来源 | 是否敏感 | 说明 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| FEISHU_NOTIFY_ENABLED | 环境变量 | 否 | 是否启用真实飞书通知 |
|
||||||
|
| FEISHU_NOTIFY_CHANNEL | 环境变量 | 否 | 首期为 `feishu_api` |
|
||||||
|
| FEISHU_APP_ID | 环境变量 | 是 | 飞书智能体/企业自建应用 App ID |
|
||||||
|
| FEISHU_APP_SECRET | 环境变量 | 是 | 飞书智能体/企业自建应用 App Secret |
|
||||||
|
| FEISHU_DEFAULT_USER_OPEN_ID | 环境变量 | 否 | 首期指定接收人的飞书 open_id |
|
||||||
|
| FEISHU_DEFAULT_USER_ID | 环境变量 | 否 | 首期指定接收人的飞书 user_id |
|
||||||
|
| FEISHU_DEFAULT_TARGET_NAME | 环境变量 | 否 | 固定群展示名称 |
|
||||||
|
| FEISHU_TENANT_TOKEN_CACHE_SECONDS | 环境变量 | 否 | tenant_access_token 本地缓存秒数 |
|
||||||
|
| FEISHU_REQUEST_TIMEOUT_SECONDS | 环境变量 | 否 | 默认 5 秒 |
|
||||||
|
| 系统用户与飞书用户映射 | Django Admin | 部分敏感 | open_id、user_id、mobile |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、页面设计
|
||||||
|
|
||||||
|
### 8.1 Django Admin
|
||||||
|
|
||||||
|
新增飞书用户映射管理:
|
||||||
|
|
||||||
|
| 字段 | 列表展示 | 可搜索 | 可过滤 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| system_user | 是 | username | 是 |
|
||||||
|
| feishu_display_name | 是 | 是 | 否 |
|
||||||
|
| feishu_open_id | 否 | 是 | 否 |
|
||||||
|
| feishu_user_id | 否 | 是 | 否 |
|
||||||
|
| feishu_mobile | 否 | 是 | 否 |
|
||||||
|
| is_active | 是 | 否 | 是 |
|
||||||
|
|
||||||
|
### 8.2 批次详情页
|
||||||
|
|
||||||
|
三个流程对应的批次详情或结果区域展示通知状态:
|
||||||
|
|
||||||
|
| 展示项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知通道 | mock、feishu_api |
|
||||||
|
| 通知目标 | 指定个人账号名称或配置名称 |
|
||||||
|
| 接收人 | 指定个人账号;后续可展示发起人/上传人的飞书展示名 |
|
||||||
|
| 发送状态 | 成功、失败、未启用、重复跳过 |
|
||||||
|
| 发送时间 | 成功发送时间 |
|
||||||
|
| 失败原因 | 配置错误、超时、飞书返回错误等摘要 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、飞书问答预留设计
|
||||||
|
|
||||||
|
### 9.1 首期预留能力
|
||||||
|
|
||||||
|
| 能力 | 设计说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户映射复用 | 后续私聊事件中的飞书用户 ID 可通过映射表关联系统用户 |
|
||||||
|
| 批次查询服务 | 预留按批次号、工作流类型、最近批次查询摘要的服务 |
|
||||||
|
| 权限过滤 | 普通用户只查自己发起或上传的批次;管理员可查全部 |
|
||||||
|
| 问答日志 | 预留日志表或服务接口,记录问题、意图、查询对象和回答摘要 |
|
||||||
|
|
||||||
|
### 9.2 后续问答能力边界
|
||||||
|
|
||||||
|
| 问题类型 | 首期问答 MVP 是否支持 |
|
||||||
|
| --- | --- |
|
||||||
|
| 查最近批次状态 | 是 |
|
||||||
|
| 查指定批次状态 | 是 |
|
||||||
|
| 查风险摘要 | 是 |
|
||||||
|
| 查缺失摘要 | 是 |
|
||||||
|
| 查导出摘要 | 是 |
|
||||||
|
| 解释具体整改建议 | 否 |
|
||||||
|
| 重新发起工作流 | 否 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 三流程通知 | 自动汇总、法规核查、自动填表完成后均调用统一通知服务 |
|
||||||
|
| 2 | 个人账号发送 | 配置 App ID、App Secret 和指定个人 open_id/user_id 后,个人飞书账号能收到富文本通知 |
|
||||||
|
| 3 | 发起人展示 | 消息正文能展示流程发起人或上传人 |
|
||||||
|
| 4 | 接收人缺失 | 指定接收人缺失时不发送真实消息,并记录配置错误 |
|
||||||
|
| 5 | token 管理 | 系统能获取并缓存 tenant_access_token,token 失效后可重新获取 |
|
||||||
|
| 6 | 判重 | 同一批次、同一流程、同一状态不会重复发送成功通知 |
|
||||||
|
| 7 | 失败不阻断 | 飞书接口失败时主工作流仍完成 |
|
||||||
|
| 8 | 记录落库 | 成功、失败、未启用、重复跳过均可追溯 |
|
||||||
|
| 9 | 页面展示 | 批次详情页展示通知状态和失败原因 |
|
||||||
|
| 10 | 问答预留 | 用户映射、查询服务边界和日志设计可支持后续私聊问答 |
|
||||||
873
docs/2.功能设计/5.第1章监管信息材料包生成.md
Normal file
873
docs/2.功能设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,873 @@
|
|||||||
|
# 第1章监管信息材料包生成功能设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考功能设计 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计目标
|
||||||
|
|
||||||
|
新增独立工作流 `regulatory_info_package`,用于根据产品说明书生成第1章监管信息材料包。用户在对话中上传或选择一个产品说明书,发送“根据说明书生成第1章监管信息”等指令后,系统复制 `docs/0.原始材料/第1章 监管信息` 下的 7 个样例模板,抽取说明书中的产品关键信息,生成一套新的第1章监管信息文件,并打包为 `第1章 监管信息(预生成版).zip` 作为主下载入口。
|
||||||
|
|
||||||
|
本功能与 `application_form_fill` 平级,不复用其 workflow_type 和批次表;但复用其已形成的服务思想和部分可拆能力,包括字段抽取、LLM 调用、Word 写入、追溯清单、导出下载、通知、工作流事件和前端卡片。
|
||||||
|
|
||||||
|
本期重点实现:
|
||||||
|
|
||||||
|
| 目标 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 新增 `regulatory_info_package` 批次、节点和卡片 |
|
||||||
|
| 单说明书输入 | 直接从当前对话 active 附件中选择唯一说明书;兼容最近成功文件汇总批次 |
|
||||||
|
| 模板驱动 | 通过 YAML 配置维护 7 个模板、字段映射和生成策略 |
|
||||||
|
| 模板字段化 | 优先使用 Word 内容控件 Tag 或稳定占位符,让代码只写字段值,最大限度保留原格式 |
|
||||||
|
| 规则 + LLM 并行抽取 | 代码抽取与 LLM 抽取并行,合并后写入模板 |
|
||||||
|
| 待确认高亮 | 系统新填入的 `/`、LLM-only 字段、冲突字段均高亮 |
|
||||||
|
| `.doc` 等价处理 | 设计 `LegacyWordDocumentService`,按能力驱动提供与 `.docx` 一致的文档操作接口;原生能力不可用时明确兜底 |
|
||||||
|
| zip 主输出 | 扩展 `ExportedSummaryFile.ExportType.ZIP`,统一下载权限 |
|
||||||
|
| LLM 意图路由 | 扩展路由 action,支持固定话术和 LLM 语义判断 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中内容 | 设计处理 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 后端开发规范 | 服务层职责清晰、接口响应统一、记录必要日志 | Django 项目沿用现有 JsonResponse/SSE 模式;服务拆入独立模块,记录批次与节点日志 |
|
||||||
|
| GYRX 前端开发规范 | 前端样式复用、交互一致、下载图标语义 | 当前项目为 Django 模板 + 原生 JS,按现有工具 chip、工作流卡片和下载链接风格扩展 |
|
||||||
|
| 既有自动填表设计 | 独立工作流、YAML 配置、字段抽取、追溯清单、导出记录 | 复用模式,不复用批次表和 workflow_type |
|
||||||
|
| 需求分析确认 | `.doc` 不只依赖转换、zip 主入口、LLM-only 高亮 | 在服务抽象和验收标准中作为强约束 |
|
||||||
|
|
||||||
|
冲突裁决:GYRX 规范中部分 Java/Spring 约束不适用于当前 Django 项目,按当前项目既有 Django 架构落地;通用原则如服务拆分、日志、权限和前端交互一致性继续采用。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、与既有功能关系
|
||||||
|
|
||||||
|
### 3.1 复用边界
|
||||||
|
|
||||||
|
| 能力 | 处理方式 | 现有代码/模块 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 对话与消息 | 复用 | `Conversation`、`Message`、`stream_message` |
|
||||||
|
| 附件上传 | 复用 | `FileAttachment`、`file_summary.storage` |
|
||||||
|
| 文件汇总结果 | 兼容复用 | `FileSummaryBatch`、`FileSummaryItem` |
|
||||||
|
| 文本抽取 | 复用并扩展 | `regulatory_review/services/text_extract.py`、`rag_index.py` |
|
||||||
|
| LLM 调用 | 复用 | `review_agent/llm.py` |
|
||||||
|
| 知识库搜索 | 复用系统现有能力 | `knowledge_base.py`、法规 RAG 相关服务 |
|
||||||
|
| 导出下载 | 扩展复用 | `ExportedSummaryFile`、`file_summary.views.export_download` |
|
||||||
|
| 工作流事件 | 复用 | `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
| 通知 | 复用统一通知链路 | `review_agent.notifications` |
|
||||||
|
| 前端卡片 | 扩展复用 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
|
||||||
|
### 3.2 新增边界
|
||||||
|
|
||||||
|
| 能力 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立批次 | 新增 `RegulatoryInfoPackageBatch`,批次号 `RIP-...` |
|
||||||
|
| 独立产物 | 新增 `RegulatoryInfoPackageArtifact` 记录模板副本、抽取结果、生成文件、zip 和追溯清单 |
|
||||||
|
| 独立通知记录 | 新增 `RegulatoryInfoPackageNotificationRecord`,结构与自动填表通知保持一致 |
|
||||||
|
| 模板配置 | 新增 `regulatory_info_package_templates_v1.yaml` |
|
||||||
|
| 说明书选择 | 新增输入选择服务,优先从 active 附件选择,兼容文件汇总批次 |
|
||||||
|
| 材料包生成 | 新增 7 个文件的生成策略和 zip 打包服务 |
|
||||||
|
| `.doc` 适配 | 新增旧版 Word 文档适配层 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、总体架构
|
||||||
|
|
||||||
|
### 4.1 目录结构
|
||||||
|
|
||||||
|
新增模块:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
regulatory_info_package/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
events.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
input_select.py
|
||||||
|
template_config.py
|
||||||
|
template_repository.py
|
||||||
|
instruction_extract.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
standard_candidates.py
|
||||||
|
document_writer.py
|
||||||
|
docx_document.py
|
||||||
|
legacy_doc_document.py
|
||||||
|
package_generate.py
|
||||||
|
traceability_export.py
|
||||||
|
zip_export.py
|
||||||
|
summary.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
regulatory_info_package_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
router_intent.md
|
||||||
|
standard_candidate.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 逻辑架构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TD
|
||||||
|
A["AI 对话页"] --> B["意图路由"]
|
||||||
|
B --> C{"action = regulatory_info_package"}
|
||||||
|
C --> D["RegulatoryInfoPackageBatch"]
|
||||||
|
D --> E["RegulatoryInfoPackageWorkflowExecutor"]
|
||||||
|
E --> F["输入说明书选择"]
|
||||||
|
E --> G["模板配置 YAML"]
|
||||||
|
F --> H["说明书文本与表格抽取"]
|
||||||
|
H --> I1["规则/代码抽取"]
|
||||||
|
H --> I2["LLM 结构化抽取"]
|
||||||
|
I1 --> J["字段合并与高亮决策"]
|
||||||
|
I2 --> J
|
||||||
|
J --> K["标准候选服务"]
|
||||||
|
J --> L["材料包生成服务"]
|
||||||
|
K --> L
|
||||||
|
L --> M1["DOCX 文档适配器"]
|
||||||
|
L --> M2["Legacy DOC 文档适配器"]
|
||||||
|
M1 --> N["7 个目标文件"]
|
||||||
|
M2 --> N
|
||||||
|
N --> O["追溯清单"]
|
||||||
|
N --> P["ZIP 打包"]
|
||||||
|
O --> Q["ExportedSummaryFile"]
|
||||||
|
P --> Q
|
||||||
|
E --> R["WorkflowEvent/SSE"]
|
||||||
|
E --> S["通知服务"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 技术选型
|
||||||
|
|
||||||
|
| 设计项 | 本期方案 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django | 沿用当前项目 |
|
||||||
|
| 工作流执行 | 轻量 Executor + 后台线程 | 与文件汇总、法规核查、自动填表一致 |
|
||||||
|
| 工作流状态 | `WorkflowNodeRun`、`WorkflowEvent` | 使用 `workflow_type=regulatory_info_package` |
|
||||||
|
| 模板配置 | YAML | 便于维护 7 个模板和字段映射 |
|
||||||
|
| `.docx` 操作 | `python-docx` | 表格、段落、run、底色和字体可控 |
|
||||||
|
| `.doc` 操作 | 适配器抽象 | Python 标准库不支持 `.doc` 二进制 Word 写入;设计为 COM/UNO/第三方库适配器,能力不可用时使用可追溯的 `.docx` 兜底 |
|
||||||
|
| zip 打包 | Python `zipfile` 标准库 | 标准库可满足打包需求 |
|
||||||
|
| Excel 追溯 | `openpyxl` | 复用现有依赖 |
|
||||||
|
| LLM | `review_agent.llm.generate_completion` | 统一模型调用 |
|
||||||
|
| 知识库 | 系统现有知识库/RAG | 不新增单独 RAG 模块 |
|
||||||
|
|
||||||
|
关于 `.doc`:Python 自带库不能实现类似 Apache POI HWPF 的 Word 97-2003 二进制文档完整读写。项目依赖中有 `olefile`,可读取 OLE 复合文档结构,但不足以可靠修改 Word 文本、表格和样式。因此设计上必须使用文档适配器屏蔽实现差异,底层可选 Word COM、LibreOffice UNO、专用第三方库或受控转换兜底。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、触发与路由设计
|
||||||
|
|
||||||
|
### 5.1 action 扩展
|
||||||
|
|
||||||
|
`skill_router.py` 扩展:
|
||||||
|
|
||||||
|
| 项 | 设计 |
|
||||||
|
| --- | --- |
|
||||||
|
| 新 action | `regulatory_info_package` |
|
||||||
|
| 新属性 | `starts_regulatory_info_package` |
|
||||||
|
| ROUTE_ACTIONS | 增加 `regulatory_info_package` |
|
||||||
|
| LLM prompt | 描述该 action 用于“根据说明书生成第1章监管信息、监管信息材料包、申请表/产品列表/声明材料包” |
|
||||||
|
|
||||||
|
### 5.2 固定规则
|
||||||
|
|
||||||
|
规则预判关键词:
|
||||||
|
|
||||||
|
```python
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS = [
|
||||||
|
"根据说明书生成第1章监管信息",
|
||||||
|
"生成监管信息材料包",
|
||||||
|
"从说明书生成第1章材料",
|
||||||
|
"第1章监管信息",
|
||||||
|
"监管信息材料包",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
规则命中时直接进入本工作流。规则未命中时,继续走 LLM 路由判断,避免自然表达漏触发。
|
||||||
|
|
||||||
|
### 5.3 对话启动
|
||||||
|
|
||||||
|
`review_agent/services.py::stream_message` 增加分支:
|
||||||
|
|
||||||
|
```text
|
||||||
|
if route.starts_regulatory_info_package:
|
||||||
|
-> 选择说明书输入
|
||||||
|
-> 创建 RegulatoryInfoPackageBatch
|
||||||
|
-> start_regulatory_info_package_workflow
|
||||||
|
-> SSE workflow_started
|
||||||
|
-> 回复“已启动第1章监管信息材料包生成工作流,批次号:RIP-...”
|
||||||
|
```
|
||||||
|
|
||||||
|
如果没有 active 附件,也没有可复用的最近文件汇总批次,则回复“请先上传产品说明书”。
|
||||||
|
如果存在多个候选说明书且用户消息无法唯一命中文件名,则不展示选择弹窗,由对话反问用户确认具体文件名后再启动工作流。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、输入选择设计
|
||||||
|
|
||||||
|
### 6.1 选择优先级
|
||||||
|
|
||||||
|
| 优先级 | 来源 | 规则 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 用户消息指定文件名 | 按 active 附件名或可复用文件名模糊匹配,唯一命中则使用 |
|
||||||
|
| 2 | 当前对话 active 附件 | 文件名包含“说明书”且扩展名为 `.docx` |
|
||||||
|
| 3 | 当前对话 active 附件 | 唯一 `.docx` 文件 |
|
||||||
|
| 4 | 最近成功 `FileSummaryBatch.items` | 文件名包含“说明书”且扩展名为 `.docx` |
|
||||||
|
| 5 | 无法唯一选择 | 对话反问用户确认使用哪个说明书;必要时批次进入 `waiting_user` |
|
||||||
|
|
||||||
|
本期直接输入只支持 `.docx` 产品说明书。`.doc`、PDF、扫描件说明书作为后续扩展;但输出模板中的 `.doc` 必须支持。
|
||||||
|
|
||||||
|
### 6.2 输入绑定
|
||||||
|
|
||||||
|
批次记录:
|
||||||
|
|
||||||
|
| 字段 | 来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| source_attachment | 直接选择的 FileAttachment |
|
||||||
|
| source_summary_batch | 可选,来自最近成功文件汇总 |
|
||||||
|
| source_summary_item | 可选,来自汇总条目 |
|
||||||
|
| source_file_name | 原始说明书文件名 |
|
||||||
|
| source_storage_path | 说明书存储路径 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、模板配置设计
|
||||||
|
|
||||||
|
配置路径:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
配置结构:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: regulatory_info_package_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/第1章 监管信息
|
||||||
|
output_zip_name: 第1章 监管信息(预生成版).zip
|
||||||
|
templates:
|
||||||
|
- code: ch1_2_directory
|
||||||
|
output_name: CH1.2 监管信息目录.docx
|
||||||
|
source_file: CH1.2 监管信息目录.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: directory
|
||||||
|
include_in_zip: true
|
||||||
|
fields:
|
||||||
|
- key: product_name
|
||||||
|
targets:
|
||||||
|
- type: paragraph_contains_replace
|
||||||
|
match: 呼吸道合胞病毒、肺炎支原体核酸检测试剂盒(荧光PCR法)
|
||||||
|
- code: ch1_4_application_form
|
||||||
|
output_name: CH1.4 申请表.docx
|
||||||
|
source_file: CH1.4 申请表.docx
|
||||||
|
file_format: docx
|
||||||
|
strategy: application_form
|
||||||
|
include_in_zip: true
|
||||||
|
- code: ch1_9_pre_submission
|
||||||
|
output_name: CH1.9 产品申报前沟通的说明.doc
|
||||||
|
source_file: CH1.9 产品申报前沟通的说明.doc
|
||||||
|
file_format: doc
|
||||||
|
strategy: pre_submission
|
||||||
|
prefer_legacy_doc_native: true
|
||||||
|
allow_docx_fallback: true
|
||||||
|
include_in_zip: true
|
||||||
|
```
|
||||||
|
|
||||||
|
字段映射优先级:
|
||||||
|
|
||||||
|
| 目标类型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| content_control_tag | 正式模板优先,代码按 Word 内容控件 Tag 写入 |
|
||||||
|
| placeholder | 过渡方案,替换稳定占位符并保留原 run/段落格式 |
|
||||||
|
| table_row_label | 未字段化模板的兜底方案,必须保留原单元格格式 |
|
||||||
|
|
||||||
|
### 7.1 配置项说明
|
||||||
|
|
||||||
|
| 配置项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| version | 配置版本,写入批次 |
|
||||||
|
| source_dir | 样例模板目录 |
|
||||||
|
| output_zip_name | zip 主输出文件名 |
|
||||||
|
| templates | 7 个目标模板 |
|
||||||
|
| code | 模板编码 |
|
||||||
|
| output_name | 生成文件名 |
|
||||||
|
| source_file | 样例文件 |
|
||||||
|
| file_format | docx/doc |
|
||||||
|
| strategy | 生成策略 |
|
||||||
|
| include_in_zip | 是否进入 zip |
|
||||||
|
| fields | 字段映射与替换目标 |
|
||||||
|
| prefer_legacy_doc_native | `.doc` 是否优先尝试原生处理能力 |
|
||||||
|
| allow_docx_fallback | 原生 `.doc` 能力不可用或失败时是否允许 `.docx` 兜底 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、字段抽取设计
|
||||||
|
|
||||||
|
### 8.1 说明书解析
|
||||||
|
|
||||||
|
`instruction_extract.py` 输出:
|
||||||
|
|
||||||
|
| 数据 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| paragraphs | 按顺序提取段落 |
|
||||||
|
| sections | 按 `【章节名】` 切分 |
|
||||||
|
| tables | 提取表格二维数据 |
|
||||||
|
| component_tables | 识别主要组成成分表 |
|
||||||
|
| front_text | 前 4000 字,供 LLM 使用 |
|
||||||
|
|
||||||
|
### 8.2 规则抽取
|
||||||
|
|
||||||
|
规则抽取覆盖:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| product_name | `【产品名称】` 下一段 |
|
||||||
|
| package_specification | `【包装规格】` 到下一章节 |
|
||||||
|
| intended_use | `【预期用途】` 到下一章节 |
|
||||||
|
| detection_principle | `【检测原理】` 到下一章节 |
|
||||||
|
| main_components | `【主要组成成分】` 表格摘要 |
|
||||||
|
| storage_condition_and_validity | `【储存条件及有效期】` 到下一章节 |
|
||||||
|
| sample_type | `样本要求` 中“适用样本类型” |
|
||||||
|
| detection_targets | 从预期用途/检测原理中抽取基因、病原体、靶标 |
|
||||||
|
| applicable_instruments | `【适用仪器】` 到下一章节 |
|
||||||
|
| test_method | `【检验方法】` 摘要 |
|
||||||
|
| standards | 正则抽取 `GB/T`、`YY/T`、`YY`、`GB` 等标准号 |
|
||||||
|
|
||||||
|
### 8.3 LLM 抽取
|
||||||
|
|
||||||
|
LLM prompt 要求只输出 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "product_name",
|
||||||
|
"label": "产品名称",
|
||||||
|
"value": "...",
|
||||||
|
"evidence": "...",
|
||||||
|
"confidence": 0.9
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"product_list_rows": [
|
||||||
|
{
|
||||||
|
"package_specification": "...",
|
||||||
|
"composition": "...",
|
||||||
|
"component_name": "...",
|
||||||
|
"main_component": "...",
|
||||||
|
"quantity": "..."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"standards": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
LLM 不允许填企业信息、分类编码、管理类别、临床评价路径等说明书无法证明的内容。
|
||||||
|
|
||||||
|
### 8.4 字段合并
|
||||||
|
|
||||||
|
`field_merge.py` 输出 `MergedField`:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| key | 字段编码 |
|
||||||
|
| label | 中文名 |
|
||||||
|
| value | 最终写入值 |
|
||||||
|
| source | rule、llm、missing、conflict |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| confidence | 置信度 |
|
||||||
|
| highlight_reason | none、missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需人工复核 |
|
||||||
|
|
||||||
|
合并规则:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| rule 与 LLM 一致 | 采用值,不高亮 |
|
||||||
|
| rule 与 LLM 不一致 | 采用规则优先或配置优先,标记 conflict |
|
||||||
|
| rule 缺失、LLM 命中 | 采用 LLM 值,标记 llm_only |
|
||||||
|
| 全部缺失 | 写 `/`,标记 missing |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、文档生成设计
|
||||||
|
|
||||||
|
### 9.1 文档适配器接口
|
||||||
|
|
||||||
|
`document_writer.py` 定义统一接口:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DocumentAdapter:
|
||||||
|
def replace_text(self, old: str, new: str, *, highlight: bool = False) -> int: ...
|
||||||
|
def fill_table_cell(self, row_label: str, value: str, *, highlight: bool = False) -> bool: ...
|
||||||
|
def replace_table(self, marker: str, rows: list[dict], *, highlight_columns: list[str] = None) -> bool: ...
|
||||||
|
def highlight_value(self, value: str, reason: str) -> int: ...
|
||||||
|
def save(self, path: Path) -> Path: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
`.docx` 使用 `DocxDocumentAdapter`。`.doc` 使用 `LegacyDocDocumentAdapter`。
|
||||||
|
|
||||||
|
### 9.2 `.docx` 处理
|
||||||
|
|
||||||
|
能力:
|
||||||
|
|
||||||
|
| 能力 | 实现 |
|
||||||
|
| --- | --- |
|
||||||
|
| 段落替换 | 遍历 paragraph runs |
|
||||||
|
| 表格行填充 | 按首列 label 定位 |
|
||||||
|
| 单元格高亮 | `w:shd` 黄色底色 |
|
||||||
|
| 字体颜色 | 冲突项可红色字体 |
|
||||||
|
| 产品列表重建 | 清空目标表格数据行后追加 |
|
||||||
|
| 声明日期替换 | 按日期正则或段落末尾替换 |
|
||||||
|
|
||||||
|
### 9.3 `.doc` 处理
|
||||||
|
|
||||||
|
设计 `LegacyDocDocumentAdapter`,对外提供与 `.docx` 一致能力。底层按可用性选择适配器:
|
||||||
|
|
||||||
|
| 适配器 | 定位 |
|
||||||
|
| --- | --- |
|
||||||
|
| `WordComDocAdapter` | Windows + Microsoft Word 环境下优先,直接打开 `.doc`、查找替换、设置高亮并保存 `.doc` |
|
||||||
|
| `LibreOfficeUnoDocAdapter` | LibreOffice UNO/API 环境下使用,直接操作文档模型 |
|
||||||
|
| `OleDocReadOnlyAdapter` | 仅可读取时用于诊断,不满足写入验收 |
|
||||||
|
| `ConversionFallbackAdapter` | 兜底路径,可转换为 `.docx` 后处理,但不能作为唯一实现 |
|
||||||
|
|
||||||
|
功能设计约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 不静默降级 | `.doc` 原生写入失败时必须记录适配器失败原因,随后尝试 `.docx` 兜底;兜底仍失败时该文件失败并触发 partial_success |
|
||||||
|
| 不只靠转换 | 转换可作为兜底,但设计主路径必须是文档适配器 |
|
||||||
|
| 能力探测 | 启动时或节点执行时检测适配器可用性 |
|
||||||
|
| 追溯记录 | 写入 `.doc` 的适配器类型和失败信息写入 artifact metadata |
|
||||||
|
|
||||||
|
### 9.4 7 个文件生成策略
|
||||||
|
|
||||||
|
| 模板 | 策略服务 | 关键动作 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| CH1.2 监管信息目录 | `generate_directory_doc` | 替换产品名称;页码沿用样例 |
|
||||||
|
| CH1.4 申请表 | `generate_application_form_doc` | 填表格行;缺失字段 `/` 黄底 |
|
||||||
|
| CH1.5 产品列表 | `generate_product_list_doc` | 使用样例表头重建产品列表;货号 `/` 黄底 |
|
||||||
|
| CH1.9 申报前沟通说明 | `generate_pre_submission_doc` | `.doc` 原生替换产品名和公司名;原生失败则输出 `.docx` 兜底文件;两者均失败才不进入 zip |
|
||||||
|
| CH1.11.1 符合标准清单 | `generate_standard_list_doc` | 说明书标准号直接写;候选/缺失高亮 |
|
||||||
|
| CH1.11.5 真实性声明 | `generate_authenticity_statement_doc` | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
| CH1.11.6 符合性声明 | `generate_compliance_statement_doc` | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
|
||||||
|
`generate_docs` 节点内部允许多线程并发处理 7 个目标文件。每个文档使用独立模板副本,子线程只返回生成结果,数据库 artifact/export 记录由主线程统一写入,避免并发写库和共享文件冲突。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、标准清单设计
|
||||||
|
|
||||||
|
系统中已有知识库/RAG 能力,不新增单独 RAG 模块。本功能只新增 `standard_candidates.py` 作为业务服务,调用既有知识库搜索能力。
|
||||||
|
|
||||||
|
处理规则:
|
||||||
|
|
||||||
|
| 来源 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 说明书明确标准号 | 写入标准清单,记录 `source=instruction` |
|
||||||
|
| 知识库候选标准 | 可写入候选区或追溯清单,标记 `rag_candidate` 并高亮 |
|
||||||
|
| 无命中 | 写 `/` 并黄底 |
|
||||||
|
| 样例标准 | 不无条件沿用 |
|
||||||
|
|
||||||
|
查询建议:
|
||||||
|
|
||||||
|
```text
|
||||||
|
体外诊断试剂 核酸扩增 检测试剂 标准 清单
|
||||||
|
新型冠状病毒 2019-nCoV 核酸检测试剂盒 荧光PCR 标准
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、zip 与导出设计
|
||||||
|
|
||||||
|
### 11.1 ExportType 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 content type 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
"zip": "application/zip"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 11.2 导出记录
|
||||||
|
|
||||||
|
| 文件 | export_category | export_type |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 第1章 监管信息(预生成版).zip | regulatory_info_package | zip |
|
||||||
|
| 7 个生成文件 | generated_document | word 或 legacy_word |
|
||||||
|
| 追溯清单 Excel | traceability | excel |
|
||||||
|
|
||||||
|
追溯 JSON 和抽取过程 JSON 只保存到后台 `logs/` 目录和 artifact 记录,不作为用户下载入口。用户侧只提供追溯 Excel 下载。
|
||||||
|
|
||||||
|
如果不新增 `legacy_word` export_type,则 `.doc` 也可暂用 `word`,通过文件扩展名和 content type 判断下载 MIME。功能设计建议新增 content type 映射时按扩展名兜底,避免 `.doc` 被当作 `.docx`。
|
||||||
|
|
||||||
|
### 11.3 权限
|
||||||
|
|
||||||
|
`file_summary.views._export_for_user` 增加:
|
||||||
|
|
||||||
|
```text
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
查询 RegulatoryInfoPackageBatch
|
||||||
|
校验 conversation__user == request.user 且 is_deleted=False
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、数据模型设计
|
||||||
|
|
||||||
|
### 12.1 RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
字段建议:
|
||||||
|
|
||||||
|
| 字段 | 类型 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| conversation | FK Conversation | 所属对话 |
|
||||||
|
| user | FK User | 发起用户 |
|
||||||
|
| trigger_message | FK Message | 触发消息 |
|
||||||
|
| source_attachment | FK FileAttachment | 直接选中的说明书附件 |
|
||||||
|
| source_summary_batch | FK FileSummaryBatch | 可选文件汇总批次 |
|
||||||
|
| source_summary_item_id | PositiveBigIntegerField | 可选汇总条目 ID |
|
||||||
|
| batch_no | CharField unique | RIP 批次号 |
|
||||||
|
| status | CharField | 状态 |
|
||||||
|
| source_file_name | CharField | 说明书原文件名 |
|
||||||
|
| source_storage_path | CharField | 说明书路径 |
|
||||||
|
| product_name | CharField | 抽取产品名 |
|
||||||
|
| output_zip_name | CharField | zip 文件名 |
|
||||||
|
| generated_files | JSONField | 7 个文件状态 |
|
||||||
|
| missing_fields | JSONField | 缺失项 |
|
||||||
|
| llm_only_fields | JSONField | LLM-only 项 |
|
||||||
|
| conflict_fields | JSONField | 冲突项 |
|
||||||
|
| risk_notes | JSONField | 风险提示 |
|
||||||
|
| template_config_version | CharField | 配置版本 |
|
||||||
|
| template_config_hash | CharField | 配置 hash |
|
||||||
|
| adapter_summary | JSONField | `.doc`/`.docx` 适配器信息 |
|
||||||
|
| work_dir | CharField | 工作目录 |
|
||||||
|
| error_message | TextField | 错误信息 |
|
||||||
|
| started_at/finished_at | DateTimeField | 执行时间 |
|
||||||
|
| is_deleted | BooleanField | 软删除 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| idx_ra_rip_batch_conv_status | conversation, status |
|
||||||
|
| idx_ra_rip_batch_user_created | user, created_at |
|
||||||
|
| idx_ra_rip_batch_attachment | source_attachment |
|
||||||
|
| idx_ra_rip_batch_summary | source_summary_batch |
|
||||||
|
|
||||||
|
### 12.2 RegulatoryInfoPackageArtifact
|
||||||
|
|
||||||
|
产物类型:
|
||||||
|
|
||||||
|
| 类型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| instruction_extract | 说明书抽取结果 |
|
||||||
|
| field_extract_result | 字段抽取结果 |
|
||||||
|
| merged_fields | 合并字段 |
|
||||||
|
| generated_document | 生成文件 |
|
||||||
|
| traceability | 追溯清单 |
|
||||||
|
| zip_package | zip 包 |
|
||||||
|
| notification_record | 通知记录 |
|
||||||
|
|
||||||
|
字段与 `ApplicationFormFillArtifact` 保持一致:`batch`、`artifact_type`、`file_format`、`name`、`file_name`、`storage_path`、`file_size`、`content_hash`、`metadata`、`created_by_node`、`is_deleted`。
|
||||||
|
|
||||||
|
`file_format` 增加 `DOC`、`ZIP`。
|
||||||
|
|
||||||
|
### 12.3 RegulatoryInfoPackageNotificationRecord
|
||||||
|
|
||||||
|
结构对齐 `ApplicationFormFillNotificationRecord`:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 所属 RIP 批次 |
|
||||||
|
| recipient | 通知对象 |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| export_ids | 导出 ID |
|
||||||
|
| message_summary | 通知摘要 |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
| retry_count | 重试次数 |
|
||||||
|
| external_message_id | 外部消息 ID |
|
||||||
|
| error_message | 错误 |
|
||||||
|
| sent_at | 发送时间 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、工作流设计
|
||||||
|
|
||||||
|
### 13.1 节点定义
|
||||||
|
|
||||||
|
| 节点编码 | 节点名称 | 触发服务 | 成功条件 | 失败处理 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| prepare | 准备资料 | `RegulatoryInfoPackageWorkflowExecutor` | 找到唯一说明书 | 缺失或多候选进入 waiting_user |
|
||||||
|
| template_copy | 复制模板 | `TemplateRepository` | 7 个模板进入批次目录 | 缺关键模板则 failed |
|
||||||
|
| text_extract | 抽取说明书 | `InstructionExtractService` | 提取文本、章节和表格 | 失败则 failed |
|
||||||
|
| field_extract | 抽取字段 | `FieldExtractionService` | 规则/LLM 结果留底 | LLM 失败可继续 |
|
||||||
|
| field_merge | 合并字段 | `FieldMergeService` | 输出 merged_fields | 无产品名仍继续,产品名 `/` |
|
||||||
|
| generate_docs | 生成材料 | `PackageGenerateService` | 生成 7 个文件 | 单文件失败可 partial_success |
|
||||||
|
| highlight_review_items | 标记待确认 | 文档适配器 | 缺失/LLM-only/冲突完成高亮 | 失败则对应文件失败 |
|
||||||
|
| trace_export | 追溯清单 | `TraceabilityExportService` | 生成 Excel/JSON | 不阻断 zip |
|
||||||
|
| zip_export | 打包下载 | `ZipExportService` | 生成 zip 并创建导出记录 | zip 失败则保留单文件 |
|
||||||
|
| notify | 通知 | `Notifier` | 写通知记录 | 不阻断下载 |
|
||||||
|
| completed | 完成 | Executor | 状态落定、摘要写入对话 | - |
|
||||||
|
|
||||||
|
### 13.2 状态落定
|
||||||
|
|
||||||
|
| 结果 | 批次状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 7 个文件、zip、追溯清单均成功 | success |
|
||||||
|
| zip 成功但部分单文件/追溯/通知失败 | partial_success |
|
||||||
|
| 单文件成功但 zip 失败 | partial_success |
|
||||||
|
| 关键输入或模板缺失 | failed 或 waiting_user |
|
||||||
|
| 所有目标文件生成失败 | failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、接口设计
|
||||||
|
|
||||||
|
### 14.1 URL
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/regulatory-info-package/health/
|
||||||
|
POST /api/review-agent/regulatory-info-package/start/
|
||||||
|
GET /api/review-agent/regulatory-info-package/<batch_id>/status/
|
||||||
|
POST /api/review-agent/regulatory-info-package/<batch_id>/select-input/
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.2 start
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"conversation_id": 1,
|
||||||
|
"attachment_id": 10,
|
||||||
|
"file_summary_batch_id": 20,
|
||||||
|
"source_summary_item_id": 30
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch_id": 1,
|
||||||
|
"workflow_type": "regulatory_info_package",
|
||||||
|
"batch_no": "RIP-20260610153000-abcdef",
|
||||||
|
"status": "pending"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 14.3 status
|
||||||
|
|
||||||
|
响应包含:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 批次基础信息、产品名、缺失数、LLM-only 数、冲突数 |
|
||||||
|
| nodes | 工作流节点 |
|
||||||
|
| generated_files | 7 个文件状态 |
|
||||||
|
| exports | zip、单文件、追溯清单下载 |
|
||||||
|
| missing_fields | 缺失项摘要 |
|
||||||
|
| llm_only_fields | LLM-only 摘要 |
|
||||||
|
| conflict_fields | 冲突摘要 |
|
||||||
|
| risk_notes | 风险提示 |
|
||||||
|
| notifications | 通知记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、前端设计
|
||||||
|
|
||||||
|
### 15.1 对话框底部快捷提示
|
||||||
|
|
||||||
|
`templates/home.html` 增加 tool chip:
|
||||||
|
|
||||||
|
```text
|
||||||
|
根据说明书生成第1章监管信息
|
||||||
|
```
|
||||||
|
|
||||||
|
点击后填入 prompt,不自动发送,保持现有交互一致。
|
||||||
|
|
||||||
|
### 15.2 工作流卡片
|
||||||
|
|
||||||
|
`build_workflow_cards()` 增加 RIP 批次,前端复用现有卡片样式,展示:
|
||||||
|
|
||||||
|
| 信息 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | RIP-... |
|
||||||
|
| 状态 | pending/running/success/partial_success/failed |
|
||||||
|
| 风险摘要 | 缺失字段 N、LLM复核 N、提示 N |
|
||||||
|
| 节点 | RIP 节点 |
|
||||||
|
|
||||||
|
### 15.3 状态轮询
|
||||||
|
|
||||||
|
`summaryPanel` 增加:
|
||||||
|
|
||||||
|
```html
|
||||||
|
data-regulatory-info-package-status-url-template="/api/review-agent/regulatory-info-package/__batch_id__/status/"
|
||||||
|
```
|
||||||
|
|
||||||
|
`static/js/app.js` 在工作流类型判断中增加 `regulatory_info_package`。
|
||||||
|
|
||||||
|
### 15.4 结果展示
|
||||||
|
|
||||||
|
状态 payload 中 `exports` 按类别展示:
|
||||||
|
|
||||||
|
| 类别 | 展示 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip | 主下载按钮 |
|
||||||
|
| generated_document | 单文件下载列表 |
|
||||||
|
| traceability | 追溯清单下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、通知设计
|
||||||
|
|
||||||
|
复用统一通知服务,新增 `build_regulatory_info_package_context(batch)`:
|
||||||
|
|
||||||
|
| 摘要项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流 | 第1章监管信息材料包生成 |
|
||||||
|
| 批次号 | RIP-... |
|
||||||
|
| 产品名称 | 抽取产品名 |
|
||||||
|
| 导出文件 | zip + 单文件数量 |
|
||||||
|
| 待确认 | 缺失项、LLM-only、冲突项数量 |
|
||||||
|
| 下载提示 | 进入系统下载 zip |
|
||||||
|
|
||||||
|
通知失败不影响下载。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、异常与降级
|
||||||
|
|
||||||
|
| 异常 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 未找到说明书 | 返回提示,不创建或创建 waiting_user 批次 |
|
||||||
|
| 多说明书候选 | waiting_user,等待选择 |
|
||||||
|
| YAML 配置错误 | failed,提示配置错误 |
|
||||||
|
| 样例模板缺失 | failed,列出缺失模板 |
|
||||||
|
| LLM 失败 | 使用规则抽取继续,写 risk_notes |
|
||||||
|
| 规则抽取为空 | 使用 LLM-only 继续并高亮 |
|
||||||
|
| 知识库不可用 | 标准清单填 `/` 并高亮,写 risk_notes |
|
||||||
|
| `.doc` 适配器不可用 | CH1.9 失败,批次 partial_success 或 failed,明确原因 |
|
||||||
|
| zip 打包失败 | 保留单文件下载,状态 partial_success |
|
||||||
|
| 下载文件不存在 | 返回 404,记录日志 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、安全与权限
|
||||||
|
|
||||||
|
| 控制点 | 设计 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次访问 | `conversation__user == request.user` |
|
||||||
|
| 附件访问 | 附件必须属于当前对话和当前用户 |
|
||||||
|
| 汇总批次访问 | 批次必须属于当前对话和当前用户 |
|
||||||
|
| 导出下载 | `workflow_type=regulatory_info_package` 时反查 RIP 批次 |
|
||||||
|
| 工作目录 | `media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}` |
|
||||||
|
| 路径安全 | 所有复制/输出路径必须校验位于批次工作目录内 |
|
||||||
|
| 原始模板保护 | 只读复制,不允许覆盖 `docs/0.原始材料` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、测试设计
|
||||||
|
|
||||||
|
| 测试文件 | 覆盖 |
|
||||||
|
| --- | --- |
|
||||||
|
| `tests/test_regulatory_info_package_models.py` | 批次、产物、通知、zip 导出类型 |
|
||||||
|
| `tests/test_regulatory_info_package_trigger.py` | 固定规则与 LLM 路由 |
|
||||||
|
| `tests/test_regulatory_info_package_input_select.py` | 说明书选择、多候选 waiting_user |
|
||||||
|
| `tests/test_regulatory_info_package_template_config.py` | YAML 加载、模板存在性校验 |
|
||||||
|
| `tests/test_regulatory_info_package_field_extract.py` | 说明书字段、表格、标准号抽取 |
|
||||||
|
| `tests/test_regulatory_info_package_field_merge.py` | missing、llm_only、conflict 高亮决策 |
|
||||||
|
| `tests/test_regulatory_info_package_docx_writer.py` | docx 替换、表格填充、黄底 |
|
||||||
|
| `tests/test_regulatory_info_package_legacy_doc.py` | `.doc` 适配器能力探测和失败提示 |
|
||||||
|
| `tests/test_regulatory_info_package_zip.py` | zip 只包含 success/fallback_success 文件 |
|
||||||
|
| `tests/test_regulatory_info_package_workflow.py` | 工作流节点和状态落定 |
|
||||||
|
| `tests/test_regulatory_info_package_views.py` | start/status/权限 |
|
||||||
|
| `tests/test_regulatory_info_package_frontend.py` | 卡片、快捷提示、状态 URL |
|
||||||
|
|
||||||
|
回归测试:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_*.py tests/test_file_summary_views.py tests/test_regulatory_*tests.py
|
||||||
|
```
|
||||||
|
|
||||||
|
实际执行时按项目现有测试命名拆分运行。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、实施顺序建议
|
||||||
|
|
||||||
|
| 阶段 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| RIP-1 | 模型、迁移、ExportType.ZIP、下载权限 |
|
||||||
|
| RIP-2 | 模块骨架、YAML 配置、输入说明书选择 |
|
||||||
|
| RIP-3 | 路由 action、对话启动、工作流节点 |
|
||||||
|
| RIP-4 | 说明书文本/表格抽取、规则 + LLM 字段抽取 |
|
||||||
|
| RIP-5 | docx 文档生成、黄底高亮、产品列表重建 |
|
||||||
|
| RIP-6 | `.doc` 适配器、CH1.9 处理能力 |
|
||||||
|
| RIP-7 | 追溯清单、zip 导出、助手摘要 |
|
||||||
|
| RIP-8 | 前端卡片、快捷提示、状态轮询 |
|
||||||
|
| RIP-9 | 通知、权限、全量回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十一、待确认与风险
|
||||||
|
|
||||||
|
| 风险 | 说明 | 建议 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `.doc` 原生写入难度 | Python 标准库不支持 Word `.doc` 完整写入 | 优先调研 Word COM 或 LibreOffice UNO;无原生能力时允许可追溯 `.docx` 兜底 |
|
||||||
|
| 模板字段化工作量 | 需要先把样例模板整理为代码可识别字段 | 优先覆盖 CH1.4、CH1.5 和声明类关键字段;缺少 Tag 时通过模板审计提前暴露 |
|
||||||
|
| 样例模板文本碎片 | Word run 拆分可能导致简单字符串替换失败 | 文档写入服务需支持跨 run 替换 |
|
||||||
|
| 产品列表结构复杂 | 说明书表格可能存在合并单元格和多规格 | 先覆盖目标说明书结构,再扩展通用表格归一化 |
|
||||||
|
| 标准清单准确性 | 说明书未必包含标准号,知识库候选不能直接作为结论 | 候选全部高亮并进入追溯清单 |
|
||||||
|
| LLM-only 风险 | LLM 推断可能过度补全 | 写入但高亮,追溯清单标记需复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十二、设计结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 功能设计文档新增为 `docs/2.功能设计/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D2 | 新增独立模块 `review_agent/regulatory_info_package/` |
|
||||||
|
| D3 | 新建独立批次、产物、通知三张表 |
|
||||||
|
| D4 | 输入选择以 active 附件为主,兼容最近成功文件汇总批次 |
|
||||||
|
| D5 | `ExportedSummaryFile.ExportType` 扩展 `zip` |
|
||||||
|
| D6 | 采用 YAML 配置驱动 7 个模板 |
|
||||||
|
| D7 | 模板字段优先使用内容控件 Tag 或稳定占位符,行标签定位仅作为兜底 |
|
||||||
|
| D8 | `.doc` 通过 `LegacyWordDocumentService` 适配器实现与 `.docx` 等价接口,原生能力不可用时允许可追溯兜底 |
|
||||||
|
| D9 | 标准候选复用系统已有知识库/RAG,不新增独立 RAG |
|
||||||
|
| D10 | 前端只扩展现有对话页、工作流卡片、快捷提示和状态轮询 |
|
||||||
|
| D11 | 本轮先产出功能设计;数据库设计先在本文档中给出,后续可拆成正式数据库设计文档 |
|
||||||
433
docs/3.数据库设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
433
docs/3.数据库设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,433 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表数据库设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库类型 | SQLite / Django ORM |
|
||||||
|
| 表名前缀 | ra_ |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立填表批次 | 自动填表作为独立工作流,使用独立批次表,不强绑法规核查批次 |
|
||||||
|
| 复用文件来源 | 填表批次必须关联一个成功的 `FileSummaryBatch`,不重复保存文件清单 |
|
||||||
|
| 可选复用法规条件 | 如当前对话已有已确认法规核查批次,可通过可空外键复用注册类型等条件 |
|
||||||
|
| 导出记录复用 | Word、Excel、JSON、PDF 等下载文件继续进入 `ExportedSummaryFile` |
|
||||||
|
| 过程产物独立 | 自动填表过程产物单独建表,避免和法规核查 `RegulatoryArtifact` 混用 |
|
||||||
|
| 通知记录独立 | 自动填表飞书通知单独建表,字段风格与法规通知记录保持一致 |
|
||||||
|
| 大文本不入库 | 字段抽取 JSON、追溯清单和模板副本保存为文件,数据库仅保存路径、hash 和摘要 |
|
||||||
|
| 字段明细暂不入库 | 本期不新增字段级明细表;字段结果保存在 JSON/Excel 产物与批次摘要中 |
|
||||||
|
| SQLite 兼容 | 字段类型、索引和约束优先保证当前 SQLite + Django ORM 可运行 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、ER 图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
AUTH_USER ||--o{ CONVERSATION : owns
|
||||||
|
CONVERSATION ||--o{ RA_FILE_SUMMARY_BATCH : has
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_FILE_SUMMARY_ITEM : produces
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : feeds
|
||||||
|
RA_REGULATORY_REVIEW_BATCH ||--o{ RA_APPLICATION_FORM_FILL_BATCH : optionally_confirms
|
||||||
|
AUTH_USER ||--o{ RA_APPLICATION_FORM_FILL_BATCH : runs
|
||||||
|
CONVERSATION ||--o{ RA_APPLICATION_FORM_FILL_BATCH : has
|
||||||
|
MESSAGE ||--o{ RA_APPLICATION_FORM_FILL_BATCH : triggers
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_ARTIFACT : keeps
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_APPLICATION_FORM_FILL_NOTIFICATION_RECORD : sends
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_EXPORTED_SUMMARY_FILE : exports
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_NODE_RUN : tracks
|
||||||
|
RA_APPLICATION_FORM_FILL_BATCH ||--o{ RA_WORKFLOW_EVENT : emits
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:`ra_workflow_node_run`、`ra_workflow_event`、`ra_exported_summary_file` 已在第二批中被通用化,通过 `workflow_type` 与 `workflow_batch_id` 支持多工作流。本功能使用 `workflow_type=application_form_fill`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、表结构设计
|
||||||
|
|
||||||
|
### 3.1 ra_application_form_fill_batch
|
||||||
|
|
||||||
|
一次自动填表工作流批次。该表记录本次触发来源、选择模板、输出类型、注册类型、产品名称、冲突摘要、工作目录和状态。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| conversation_id | ForeignKey | bigint | 是 | 绑定对话 |
|
||||||
|
| user_id | ForeignKey | bigint | 是 | 发起用户 |
|
||||||
|
| trigger_message_id | ForeignKey | bigint | 否 | 触发填表工作流的用户消息 |
|
||||||
|
| source_summary_batch_id | ForeignKey | bigint | 是 | 文件来源汇总批次 |
|
||||||
|
| source_regulatory_batch_id | ForeignKey | bigint | 否 | 可选,复用已确认法规核查批次条件 |
|
||||||
|
| batch_no | CharField(64) | varchar(64) | 是 | 填表批次编号,唯一 |
|
||||||
|
| status | CharField(30) | varchar(30) | 是 | pending、running、waiting_user、success、partial_success、failed、cancelled |
|
||||||
|
| requested_templates | JSONField | text/json | 是 | 用户指定模板编码列表;未指定为空数组 |
|
||||||
|
| selected_templates | JSONField | text/json | 是 | 系统实际选择模板编码列表 |
|
||||||
|
| output_types | JSONField | text/json | 是 | 请求输出类型,如 word、excel、json、pdf |
|
||||||
|
| registration_type | CharField(80) | varchar(80) | 否 | 识别出的注册类型 |
|
||||||
|
| registration_type_source | CharField(40) | varchar(40) | 否 | user_message、regulatory_batch、file_extract、unknown |
|
||||||
|
| product_name | CharField(200) | varchar(200) | 否 | 产品名称 |
|
||||||
|
| conflict_summary | JSONField | text/json | 是 | 冲突字段摘要 |
|
||||||
|
| risk_notes | JSONField | text/json | 是 | 不适用模板、低置信度、PDF 待生成等提示 |
|
||||||
|
| template_config_version | CharField(80) | varchar(80) | 否 | 模板配置版本 |
|
||||||
|
| template_config_hash | CharField(128) | varchar(128) | 否 | 模板配置文件 hash |
|
||||||
|
| work_dir | CharField(500) | varchar(500) | 否 | 批次工作目录 |
|
||||||
|
| error_message | TextField | text | 否 | 批次异常说明 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| started_at | DateTimeField | datetime | 否 | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | datetime | 否 | 完成时间 |
|
||||||
|
| archived_at | DateTimeField | datetime | 否 | 归档时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
唯一约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| uq_ra_aff_batch_no | batch_no |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_batch_conv_status | conversation_id, status | 查询对话下填表批次状态 |
|
||||||
|
| idx_ra_aff_batch_summary | source_summary_batch_id | 根据文件汇总批次查询填表历史 |
|
||||||
|
| idx_ra_aff_batch_regulatory | source_regulatory_batch_id | 根据法规核查批次查询关联填表历史 |
|
||||||
|
| idx_ra_aff_batch_user_created | user_id, created_at | 查询用户发起记录 |
|
||||||
|
| idx_ra_aff_batch_created | created_at | 按创建时间查询 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.2 ra_application_form_fill_artifact
|
||||||
|
|
||||||
|
自动填表过程产物表。仅保存文件元数据,不保存字段抽取大 JSON 的全文。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
|
||||||
|
| artifact_type | CharField(60) | varchar(60) | 是 | template_copy、field_extract_result、merged_fields、traceability、filled_template、notification_record |
|
||||||
|
| file_format | CharField(20) | varchar(20) | 是 | json、excel、docx、pdf、markdown |
|
||||||
|
| name | CharField(160) | varchar(160) | 是 | 产物名称 |
|
||||||
|
| file_name | CharField(255) | varchar(255) | 是 | 文件名 |
|
||||||
|
| storage_path | CharField(500) | varchar(500) | 是 | 存储路径 |
|
||||||
|
| file_size | BigIntegerField | bigint | 是 | 文件大小 |
|
||||||
|
| content_hash | CharField(128) | varchar(128) | 是 | 文件 SHA-256 hash |
|
||||||
|
| metadata | JSONField | text/json | 是 | 模板编码、输出类型、生成状态、错误摘要等 |
|
||||||
|
| created_by_node | CharField(60) | varchar(60) | 否 | 产生该产物的节点 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_artifact_batch_type | batch_id, artifact_type | 查询批次过程产物 |
|
||||||
|
| idx_ra_aff_artifact_format | file_format | 按文件格式查询 |
|
||||||
|
| idx_ra_aff_artifact_created | created_at | 按时间追溯 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.3 ra_application_form_fill_notification_record
|
||||||
|
|
||||||
|
自动填表飞书通知记录表。通知失败不阻断文件下载,但需要留痕和支持后续重试。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属自动填表批次 |
|
||||||
|
| recipient_id | ForeignKey(User) | bigint | 是 | 通知对象,默认上传人/发起人 |
|
||||||
|
| channel | CharField(30) | varchar(30) | 是 | feishu_cli、feishu_api、mock |
|
||||||
|
| template_codes | JSONField | text/json | 是 | 本次通知涉及模板 |
|
||||||
|
| export_ids | JSONField | text/json | 是 | 本次通知关联导出文件 ID |
|
||||||
|
| message_summary | TextField | text | 是 | 通知摘要 |
|
||||||
|
| send_status | CharField(20) | varchar(20) | 是 | pending、success、failed |
|
||||||
|
| retry_count | PositiveIntegerField | integer | 是 | 已重试次数 |
|
||||||
|
| external_message_id | CharField(120) | varchar(120) | 否 | 飞书外部消息 ID |
|
||||||
|
| error_message | TextField | text | 否 | 失败原因 |
|
||||||
|
| sent_at | DateTimeField | datetime | 否 | 发送成功时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_aff_notify_batch | batch_id, created_at | 查询批次通知记录 |
|
||||||
|
| idx_ra_aff_notify_recipient | recipient_id, send_status | 查询用户通知状态 |
|
||||||
|
| idx_ra_aff_notify_status | send_status, retry_count | 查询待重试通知 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、既有表扩展
|
||||||
|
|
||||||
|
### 4.1 ra_exported_summary_file
|
||||||
|
|
||||||
|
继续复用导出文件表,需扩展导出类型。
|
||||||
|
|
||||||
|
| 字段/枚举 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type | 增加 `word`、`pdf` |
|
||||||
|
| workflow_type | 使用 `application_form_fill` |
|
||||||
|
| workflow_batch_id | 记录 `ApplicationFormFillBatch.id` |
|
||||||
|
| export_category | 使用 `filled_template`、`traceability`、`extract_result` |
|
||||||
|
|
||||||
|
导出类型枚举:
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| markdown | Markdown | 既有报告 |
|
||||||
|
| excel | Excel | 追溯清单 |
|
||||||
|
| json | JSON | 字段抽取结果包 |
|
||||||
|
| word | Word | 填好的 Word 模板 |
|
||||||
|
| pdf | PDF | Word 转换后的 PDF,P1 预留 |
|
||||||
|
|
||||||
|
### 4.2 ra_workflow_node_run
|
||||||
|
|
||||||
|
本功能使用通用工作流字段:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| node_group | form_fill |
|
||||||
|
| batch_id | 可为空或兼容性填充 source_summary_batch_id |
|
||||||
|
|
||||||
|
### 4.3 ra_workflow_event
|
||||||
|
|
||||||
|
本功能事件写入:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| conversation_id | 当前对话 ID |
|
||||||
|
| payload | 节点状态、模板列表、冲突数量、导出文件等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、枚举设计
|
||||||
|
|
||||||
|
### 5.1 ApplicationFormFillBatch.status
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| pending | 待执行 | 批次已创建,等待执行 |
|
||||||
|
| running | 执行中 | 工作流正在执行 |
|
||||||
|
| waiting_user | 等待用户 | 缺少文件汇总批次或关键条件 |
|
||||||
|
| success | 成功 | Word 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | 部分成功 | 部分模板、PDF、追溯清单或通知失败 |
|
||||||
|
| failed | 失败 | 所有目标 Word 模板均生成失败 |
|
||||||
|
| cancelled | 已取消 | 用户或系统取消执行 |
|
||||||
|
|
||||||
|
### 5.2 artifact_type
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| field_extract_result | 规则/正则与 LLM 抽取原始结果 |
|
||||||
|
| merged_fields | 合并后的最终字段和冲突 |
|
||||||
|
| traceability | 字段来源追溯清单 |
|
||||||
|
| filled_template | 已填写模板 |
|
||||||
|
| notification_record | 通知记录产物 |
|
||||||
|
|
||||||
|
### 5.3 registration_type_source
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| user_message | 用户话语明确指定 |
|
||||||
|
| regulatory_batch | 复用已确认法规核查条件 |
|
||||||
|
| file_extract | 从文件内容抽取 |
|
||||||
|
| unknown | 未识别 |
|
||||||
|
|
||||||
|
### 5.4 通知枚举
|
||||||
|
|
||||||
|
| 字段 | value |
|
||||||
|
| --- | --- |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、JSON 字段结构建议
|
||||||
|
|
||||||
|
### 6.1 requested_templates / selected_templates
|
||||||
|
|
||||||
|
```json
|
||||||
|
["registration_certificate", "essential_principles"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 output_types
|
||||||
|
|
||||||
|
```json
|
||||||
|
["word", "excel", "json"]
|
||||||
|
```
|
||||||
|
|
||||||
|
PDF 作为 P1 预留,可在后续加入:
|
||||||
|
|
||||||
|
```json
|
||||||
|
["word", "pdf", "excel", "json"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 conflict_summary
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"field_key": "storage_condition",
|
||||||
|
"field_label": "产品储存条件及有效期",
|
||||||
|
"selected_value": "2-8℃保存,有效期12个月",
|
||||||
|
"selected_source": "说明书.docx",
|
||||||
|
"conflict_values": [
|
||||||
|
{
|
||||||
|
"value": "-20℃保存",
|
||||||
|
"source_file": "产品技术要求.docx",
|
||||||
|
"evidence": "储存条件:-20℃保存"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"handling": "说明书优先,模板内黄底红字高亮"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 risk_notes
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "template_registration_mismatch",
|
||||||
|
"message": "用户指定变更注册(备案)文件,但系统识别注册类型为首次注册,需人工确认。"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "pdf_pending",
|
||||||
|
"message": "PDF 转换为后续增强项,本次优先生成 Word。"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.5 artifact.metadata
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_code": "registration_certificate",
|
||||||
|
"output_type": "word",
|
||||||
|
"node_code": "word_fill",
|
||||||
|
"status": "success",
|
||||||
|
"conflict_count": 2
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、存储路径设计
|
||||||
|
|
||||||
|
自动填表工作目录按用户、对话和批次隔离:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/application_form_fill/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
```
|
||||||
|
|
||||||
|
目录结构:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/application_form_fill/12/1001/AFF-20260607153000-a1b2c3/
|
||||||
|
templates/
|
||||||
|
registration_certificate.source.docx
|
||||||
|
essential_principles.source.docx
|
||||||
|
filled/
|
||||||
|
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-注册证格式.docx
|
||||||
|
exports/
|
||||||
|
AFF-20260607153000-a1b2c3-甲胎蛋白检测试剂盒-字段来源追溯清单.xlsx
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
notifications/
|
||||||
|
notification_record.json
|
||||||
|
```
|
||||||
|
|
||||||
|
所有产物写入 `ApplicationFormFillArtifact` 时必须记录 SHA-256 hash。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、权限与查询规则
|
||||||
|
|
||||||
|
### 8.1 批次访问权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
ApplicationFormFillBatch -> conversation -> user
|
||||||
|
必须等于当前 request.user
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 导出下载权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
ExportedSummaryFile.workflow_type == application_form_fill
|
||||||
|
-> workflow_batch_id
|
||||||
|
-> ApplicationFormFillBatch.conversation.user
|
||||||
|
```
|
||||||
|
|
||||||
|
若 `workflow_type=file_summary` 或 `regulatory_review`,仍按既有逻辑校验。
|
||||||
|
|
||||||
|
### 8.3 文件读取权限
|
||||||
|
|
||||||
|
自动填表只能读取 `source_summary_batch.items` 对应的文件,不允许从其他对话或其他批次随意读取文件。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、字段级数据库表暂缓说明
|
||||||
|
|
||||||
|
本期不新增 `ApplicationFormFillField` 字段级明细表。原因:
|
||||||
|
|
||||||
|
| 原因 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| Demo 主链路更轻 | 字段结果以 JSON 和 Excel 追溯清单即可满足下载复核 |
|
||||||
|
| 避免过早建模 | 字段结构依赖模板配置和后续人工修改交互,暂不固化表结构 |
|
||||||
|
| 查询需求有限 | 本期主要按批次下载文件,不做字段级统计和在线编辑 |
|
||||||
|
|
||||||
|
后续如需要在线确认、人工修改、字段级审计或批量统计,再新增字段级表。该事项写入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、Django Model 命名建议
|
||||||
|
|
||||||
|
| 表名 | Model 名称 |
|
||||||
|
| --- | --- |
|
||||||
|
| ra_application_form_fill_batch | ApplicationFormFillBatch |
|
||||||
|
| ra_application_form_fill_artifact | ApplicationFormFillArtifact |
|
||||||
|
| ra_application_form_fill_notification_record | ApplicationFormFillNotificationRecord |
|
||||||
|
|
||||||
|
建议模型仍集中放在 `review_agent/models.py`,与前两批现有模型保持一致;业务逻辑放在 `review_agent/application_form_fill/`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、验收检查点
|
||||||
|
|
||||||
|
| 序号 | 检查项 | 验收标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 独立批次 | 触发填表后生成 `ApplicationFormFillBatch` |
|
||||||
|
| 2 | 文件来源 | 每个填表批次都关联一个成功的 `FileSummaryBatch` |
|
||||||
|
| 3 | 可选法规条件 | 如有关联法规核查批次,可记录 `source_regulatory_batch` |
|
||||||
|
| 4 | 过程产物 | 字段抽取 JSON、合并结果、追溯清单、模板副本均可留底 |
|
||||||
|
| 5 | 导出复用 | 填好的 Word 和追溯清单进入 `ExportedSummaryFile` |
|
||||||
|
| 6 | 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word`、`pdf` |
|
||||||
|
| 7 | 通知记录 | 飞书通知记录能保存状态、重试次数、失败原因 |
|
||||||
|
| 8 | 权限隔离 | A 对话的填表批次和导出文件不能被 B 对话访问 |
|
||||||
|
| 9 | 字段表暂缓 | 字段级结果不入库,但能从 JSON/Excel 追溯产物复核 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、开发顺序建议
|
||||||
|
|
||||||
|
1. 扩展 `ExportedSummaryFile.ExportType`,增加 `word`、`pdf`。
|
||||||
|
2. 新增 `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord`。
|
||||||
|
3. 为新增状态字段定义 Django `TextChoices`。
|
||||||
|
4. 配置表名、索引和唯一约束。
|
||||||
|
5. 执行 `python manage.py makemigrations review_agent` 和 `python manage.py migrate`。
|
||||||
|
6. 编写模型测试,覆盖批次创建、产物 hash、通知重试字段、导出权限查询。
|
||||||
|
7. 将字段级数据库表和 PDF 转换能力写入待办计划。
|
||||||
302
docs/3.数据库设计/4.飞书通知与问答接入.md
Normal file
302
docs/3.数据库设计/4.飞书通知与问答接入.md
Normal file
@@ -0,0 +1,302 @@
|
|||||||
|
# 飞书通知与问答接入数据库设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/4.飞书通知与问答接入.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/4.飞书通知与问答接入.md |
|
||||||
|
| 数据库类型 | SQLite / Django ORM |
|
||||||
|
| 表名前缀 | ra_ |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 统一通知抽象 | 三个工作流共用统一通知服务和通用通知记录,减少重复实现 |
|
||||||
|
| 兼容现有表 | 现有法规通知、填表通知可保留;新增通用表作为后续统一入口 |
|
||||||
|
| 可判重 | 通知记录必须支持同一批次、同一流程、同一状态只发送一次 |
|
||||||
|
| 摘要入库 | 只保存发送摘要、状态、错误,不保存完整富文本 payload |
|
||||||
|
| 映射可维护 | 系统用户与飞书用户映射独立建表,通过 Django Admin 维护 |
|
||||||
|
| 问答可扩展 | 预留问答日志表,首期可不接事件回调 |
|
||||||
|
| SQLite 兼容 | 使用 Django ORM 常规字段,避免数据库特有能力 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、ER 图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
AUTH_USER ||--o{ RA_FEISHU_USER_MAPPING : maps
|
||||||
|
AUTH_USER ||--o{ RA_WORKFLOW_NOTIFICATION_RECORD : triggers
|
||||||
|
RA_FEISHU_USER_MAPPING ||--o{ RA_WORKFLOW_NOTIFICATION_RECORD : resolves
|
||||||
|
AUTH_USER ||--o{ RA_FEISHU_QUESTION_LOG : asks
|
||||||
|
|
||||||
|
RA_WORKFLOW_NOTIFICATION_RECORD {
|
||||||
|
bigint id
|
||||||
|
string workflow_type
|
||||||
|
bigint workflow_batch_id
|
||||||
|
string workflow_status
|
||||||
|
string dedupe_key
|
||||||
|
string channel
|
||||||
|
string target
|
||||||
|
string send_status
|
||||||
|
}
|
||||||
|
|
||||||
|
RA_FEISHU_USER_MAPPING {
|
||||||
|
bigint id
|
||||||
|
bigint system_user_id
|
||||||
|
string feishu_open_id
|
||||||
|
string feishu_user_id
|
||||||
|
string feishu_mobile
|
||||||
|
boolean is_active
|
||||||
|
}
|
||||||
|
|
||||||
|
RA_FEISHU_QUESTION_LOG {
|
||||||
|
bigint id
|
||||||
|
bigint system_user_id
|
||||||
|
string feishu_open_id
|
||||||
|
string intent
|
||||||
|
string query_object
|
||||||
|
string status
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、表结构设计
|
||||||
|
|
||||||
|
### 3.1 ra_feishu_user_mapping
|
||||||
|
|
||||||
|
系统用户与飞书用户标识映射表。首期通知发送给环境变量中配置的指定个人账号,本表通过 Django Admin 手工维护,用于后续按发起人私聊通知和飞书私聊问答身份识别。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| system_user_id | ForeignKey | bigint | 是 | 关联 Django 用户 |
|
||||||
|
| feishu_display_name | CharField(120) | varchar(120) | 否 | 飞书展示名,便于后台识别 |
|
||||||
|
| feishu_open_id | CharField(120) | varchar(120) | 否 | 飞书 open_id,优先用于 @ |
|
||||||
|
| feishu_user_id | CharField(120) | varchar(120) | 否 | 飞书 user_id,第二优先级 |
|
||||||
|
| feishu_mobile | CharField(40) | varchar(40) | 否 | 飞书手机号,兜底 |
|
||||||
|
| is_active | BooleanField | bool | 是 | 是否启用 |
|
||||||
|
| remark | CharField(255) | varchar(255) | 否 | 备注 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
|
||||||
|
约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| uq_ra_feishu_mapping_user | system_user_id | 一个系统用户首期只维护一条启用映射 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_feishu_mapping_active | is_active | 后台筛选启用映射 |
|
||||||
|
| idx_ra_feishu_mapping_open | feishu_open_id | 后续私聊事件反查用户 |
|
||||||
|
| idx_ra_feishu_mapping_userid | feishu_user_id | 后续私聊事件反查用户 |
|
||||||
|
| idx_ra_feishu_mapping_mobile | feishu_mobile | 手机号兜底查询 |
|
||||||
|
|
||||||
|
校验规则:
|
||||||
|
|
||||||
|
| 规则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 至少一个飞书标识 | `feishu_open_id`、`feishu_user_id`、`feishu_mobile` 至少填写一个 |
|
||||||
|
| @ 优先级 | `feishu_open_id -> feishu_user_id -> feishu_mobile` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.2 ra_workflow_notification_record
|
||||||
|
|
||||||
|
通用工作流通知记录表。用于记录自动汇总、法规核查、自动填表的飞书通知发送结果。现有专项通知表可继续保留,后续逐步收敛到本表。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| workflow_type | CharField(40) | varchar(40) | 是 | file_summary、regulatory_review、application_form_fill |
|
||||||
|
| workflow_batch_id | PositiveBigIntegerField | bigint | 是 | 对应工作流批次 ID |
|
||||||
|
| workflow_batch_no | CharField(80) | varchar(80) | 是 | 批次编号冗余,便于展示 |
|
||||||
|
| workflow_status | CharField(40) | varchar(40) | 是 | success、partial_success、failed 等 |
|
||||||
|
| dedupe_key | CharField(160) | varchar(160) | 是 | 判重键 |
|
||||||
|
| trigger_user_id | ForeignKey | bigint | 是 | 发起人或上传人 |
|
||||||
|
| feishu_mapping_id | ForeignKey | bigint | 否 | 命中的飞书用户映射 |
|
||||||
|
| channel | CharField(40) | varchar(40) | 是 | mock、feishu_api、disabled |
|
||||||
|
| target | CharField(160) | varchar(160) | 否 | 指定个人账号名称、open_id、user_id 或目标标识 |
|
||||||
|
| at_display_name | CharField(120) | varchar(120) | 否 | 被 @ 人展示名 |
|
||||||
|
| at_identifier_type | CharField(30) | varchar(30) | 否 | open_id、user_id、mobile、missing |
|
||||||
|
| at_identifier_masked | CharField(120) | varchar(120) | 否 | 脱敏后的 @ 标识 |
|
||||||
|
| send_status | CharField(30) | varchar(30) | 是 | pending、success、failed、skipped_duplicate、disabled |
|
||||||
|
| message_title | CharField(200) | varchar(200) | 是 | 通知标题 |
|
||||||
|
| message_summary | TextField | text | 否 | 发送摘要,不保存完整 payload |
|
||||||
|
| result_url | CharField(500) | varchar(500) | 否 | 系统结果入口 |
|
||||||
|
| external_message_id | CharField(120) | varchar(120) | 否 | Webhook 一般为空,API 发送时保存 |
|
||||||
|
| error_code | CharField(80) | varchar(80) | 否 | 飞书或客户端错误码 |
|
||||||
|
| error_message | TextField | text | 否 | 失败原因 |
|
||||||
|
| request_duration_ms | PositiveIntegerField | integer | 否 | HTTP 请求耗时 |
|
||||||
|
| sent_at | DateTimeField | datetime | 否 | 成功发送时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
|
||||||
|
唯一约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| uq_ra_notify_dedupe_key | dedupe_key | 同一批次、流程、状态只保留一个成功发送意图 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_notify_workflow | workflow_type, workflow_batch_id | 批次详情页查询通知 |
|
||||||
|
| idx_ra_notify_user_created | trigger_user_id, created_at | 用户通知历史 |
|
||||||
|
| idx_ra_notify_status | send_status, created_at | 排查失败通知 |
|
||||||
|
| idx_ra_notify_batch_no | workflow_batch_no | 按批次编号检索 |
|
||||||
|
|
||||||
|
dedupe_key 生成规则:
|
||||||
|
|
||||||
|
```text
|
||||||
|
{workflow_type}:{workflow_batch_id}:{workflow_status}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.3 ra_feishu_question_log
|
||||||
|
|
||||||
|
飞书问答日志预留表。首期可创建表但不接入事件回调;后续私聊问答 MVP 使用该表记录问题、意图、查询对象、回答摘要和错误信息。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| system_user_id | ForeignKey | bigint | 否 | 识别出的系统用户 |
|
||||||
|
| feishu_mapping_id | ForeignKey | bigint | 否 | 命中的飞书映射 |
|
||||||
|
| feishu_open_id | CharField(120) | varchar(120) | 否 | 事件中的 open_id |
|
||||||
|
| feishu_user_id | CharField(120) | varchar(120) | 否 | 事件中的 user_id |
|
||||||
|
| source_type | CharField(30) | varchar(30) | 是 | private_chat、group_mention |
|
||||||
|
| message_id | CharField(120) | varchar(120) | 否 | 飞书消息 ID |
|
||||||
|
| question_text | TextField | text | 是 | 用户原始问题 |
|
||||||
|
| intent | CharField(60) | varchar(60) | 否 | batch_status、risk_summary、export_summary 等 |
|
||||||
|
| query_object | JSONField | text/json | 是 | 批次号、工作流类型、最近批次等查询对象 |
|
||||||
|
| answer_summary | TextField | text | 否 | 回答摘要,不保存完整回答正文 |
|
||||||
|
| permission_result | CharField(40) | varchar(40) | 否 | allowed、denied、unbound |
|
||||||
|
| status | CharField(30) | varchar(30) | 是 | success、failed、ignored |
|
||||||
|
| error_message | TextField | text | 否 | 异常说明 |
|
||||||
|
| processed_at | DateTimeField | datetime | 否 | 处理完成时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_feishu_q_user_created | system_user_id, created_at | 用户问答历史 |
|
||||||
|
| idx_ra_feishu_q_intent | intent, created_at | 按意图分析 |
|
||||||
|
| idx_ra_feishu_q_status | status, created_at | 排查失败问答 |
|
||||||
|
| idx_ra_feishu_q_message | message_id | 消息幂等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、状态枚举
|
||||||
|
|
||||||
|
### 4.1 WorkflowNotificationRecord.channel
|
||||||
|
|
||||||
|
| 值 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| mock | 模拟通知 |
|
||||||
|
| disabled | 真实通知未启用 |
|
||||||
|
| feishu_api | 飞书官方智能体/企业自建应用消息 API |
|
||||||
|
| feishu_webhook | 备选自定义机器人 Webhook,非首期主方案 |
|
||||||
|
|
||||||
|
### 4.2 WorkflowNotificationRecord.send_status
|
||||||
|
|
||||||
|
| 值 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| pending | 待发送 |
|
||||||
|
| success | 发送成功 |
|
||||||
|
| failed | 发送失败 |
|
||||||
|
| skipped_duplicate | 重复通知跳过 |
|
||||||
|
| disabled | 未启用真实发送 |
|
||||||
|
|
||||||
|
### 4.3 FeishuQuestionLog.intent
|
||||||
|
|
||||||
|
| 值 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch_status | 查询批次状态 |
|
||||||
|
| risk_summary | 查询风险摘要 |
|
||||||
|
| missing_summary | 查询缺失摘要 |
|
||||||
|
| export_summary | 查询导出摘要 |
|
||||||
|
| unknown | 未识别 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、与现有表的兼容关系
|
||||||
|
|
||||||
|
| 现有表 | 处理建议 |
|
||||||
|
| --- | --- |
|
||||||
|
| `ra_regulatory_notification_record` | 保留现有数据;法规核查真实飞书通知可新增写入通用表,后续再决定是否迁移 |
|
||||||
|
| `ra_application_form_fill_notification_record` | 保留现有数据;自动填表通知状态展示可优先读通用表,兼容旧表 |
|
||||||
|
| `ra_exported_summary_file` | 通知摘要中的导出文件数量来自该表 |
|
||||||
|
| `ra_workflow_event` | 可记录通知节点事件,但不替代通知记录表 |
|
||||||
|
| `auth_user` | 飞书映射通过外键关联系统用户 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、数据脱敏与安全
|
||||||
|
|
||||||
|
| 数据 | 入库策略 |
|
||||||
|
| --- | --- |
|
||||||
|
| App ID | 不入库,只在环境变量中维护 |
|
||||||
|
| App Secret | 不入库,只在环境变量中维护 |
|
||||||
|
| tenant_access_token | 不持久化入库,仅允许进程内短期缓存 |
|
||||||
|
| 富文本完整 payload | 不入库 |
|
||||||
|
| 手机号 | 映射表保存原值;通知记录只保存脱敏值 |
|
||||||
|
| open_id/user_id | 映射表保存原值;通知记录保存脱敏值 |
|
||||||
|
| 用户问题 | 问答日志保存原始问题,用于审计;不保存完整回答正文 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、迁移计划
|
||||||
|
|
||||||
|
| 步骤 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1 | 新增 `FeishuUserMapping` 模型和迁移 |
|
||||||
|
| 2 | 新增 `WorkflowNotificationRecord` 模型和迁移 |
|
||||||
|
| 3 | 新增 `FeishuQuestionLog` 预留模型和迁移 |
|
||||||
|
| 4 | 注册 Django Admin 管理入口 |
|
||||||
|
| 5 | 批次详情页查询通用通知记录展示 |
|
||||||
|
| 6 | 保留现有专项通知表,不做破坏性迁移 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、验收 SQL 示例
|
||||||
|
|
||||||
|
查询某个批次通知状态:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT workflow_type, workflow_batch_no, workflow_status, channel, send_status, sent_at, error_message
|
||||||
|
FROM ra_workflow_notification_record
|
||||||
|
WHERE workflow_type = 'application_form_fill'
|
||||||
|
AND workflow_batch_no = 'AFF-20260607-001'
|
||||||
|
ORDER BY created_at DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
查询未配置飞书映射的失败或降级通知:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT workflow_type, workflow_batch_no, trigger_user_id, send_status, message_summary
|
||||||
|
FROM ra_workflow_notification_record
|
||||||
|
WHERE at_identifier_type = 'missing'
|
||||||
|
ORDER BY created_at DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
查询飞书用户映射:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT u.username, m.feishu_display_name, m.feishu_open_id, m.feishu_user_id, m.feishu_mobile, m.is_active
|
||||||
|
FROM ra_feishu_user_mapping m
|
||||||
|
JOIN auth_user u ON u.id = m.system_user_id
|
||||||
|
ORDER BY u.username;
|
||||||
|
```
|
||||||
590
docs/3.数据库设计/5.第1章监管信息材料包生成.md
Normal file
590
docs/3.数据库设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,590 @@
|
|||||||
|
# 第1章监管信息材料包生成数据库设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库类型 | SQLite / Django ORM |
|
||||||
|
| 表名前缀 | ra_ |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、设计原则
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流批次 | 第1章监管信息材料包生成使用独立批次表,不复用自动填表批次 |
|
||||||
|
| 附件优先 | 输入说明书优先绑定 `FileAttachment`,兼容最近成功 `FileSummaryBatch` 与 `FileSummaryItem` |
|
||||||
|
| 过程产物文件化 | 大 JSON、追溯清单、模板副本、生成文件和 zip 均保存为文件,数据库只保存路径、hash、摘要 |
|
||||||
|
| 导出记录复用 | zip、单文件、追溯清单继续写入 `ExportedSummaryFile`,统一下载权限 |
|
||||||
|
| 工作流通用表复用 | 节点状态和 SSE 事件复用 `WorkflowNodeRun`、`WorkflowEvent` |
|
||||||
|
| 通知独立留痕 | 新增专项通知记录表,结构与自动填表通知记录保持一致 |
|
||||||
|
| SQLite 兼容 | 使用 Django ORM 常规字段和 JSONField,避免数据库特定语法 |
|
||||||
|
| 原始模板保护 | 数据库只记录批次工作目录产物,不记录对原始模板的写操作 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、ER 图
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
erDiagram
|
||||||
|
AUTH_USER ||--o{ CONVERSATION : owns
|
||||||
|
CONVERSATION ||--o{ MESSAGE : contains
|
||||||
|
CONVERSATION ||--o{ RA_FILE_ATTACHMENT : has
|
||||||
|
CONVERSATION ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : has
|
||||||
|
AUTH_USER ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : runs
|
||||||
|
MESSAGE ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : triggers
|
||||||
|
RA_FILE_ATTACHMENT ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : provides_instruction
|
||||||
|
RA_FILE_SUMMARY_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_BATCH : optionally_feeds
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_ARTIFACT : keeps
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_REGULATORY_INFO_PACKAGE_NOTIFICATION_RECORD : sends
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_EXPORTED_SUMMARY_FILE : exports
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_WORKFLOW_NODE_RUN : tracks
|
||||||
|
RA_REGULATORY_INFO_PACKAGE_BATCH ||--o{ RA_WORKFLOW_EVENT : emits
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:`ra_workflow_node_run`、`ra_workflow_event`、`ra_exported_summary_file` 通过 `workflow_type` 与 `workflow_batch_id` 支持多工作流。本功能统一使用 `workflow_type=regulatory_info_package`。
|
||||||
|
|
||||||
|
现状补充:当前通用节点表已有 `batch + node_code` 唯一约束主要服务文件汇总批次。RIP 批次不应强依赖 `FileSummaryBatch.batch`,因此实现时必须为 `workflow_type + workflow_batch_id + node_code` 增加数据库唯一约束,或在创建节点时使用同等幂等逻辑,避免同一 RIP 批次重复初始化节点。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、表结构设计
|
||||||
|
|
||||||
|
### 3.1 ra_regulatory_info_package_batch
|
||||||
|
|
||||||
|
一次第1章监管信息材料包生成工作流批次。记录触发来源、输入说明书、产品名称、生成状态、待确认摘要、zip 名称、配置版本和工作目录。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| conversation_id | ForeignKey | bigint | 是 | 所属对话 |
|
||||||
|
| user_id | ForeignKey | bigint | 是 | 发起用户 |
|
||||||
|
| trigger_message_id | ForeignKey | bigint | 否 | 触发本工作流的用户消息 |
|
||||||
|
| source_attachment_id | ForeignKey | bigint | 否 | 直接选中的说明书附件 |
|
||||||
|
| source_summary_batch_id | ForeignKey | bigint | 否 | 可选,最近成功文件汇总批次 |
|
||||||
|
| source_summary_item_id | PositiveBigIntegerField | integer | 否 | 可选,文件汇总条目 ID |
|
||||||
|
| batch_no | CharField(64) | varchar(64) | 是 | 批次编号,格式 `RIP-YYYYMMDDHHMMSS-abcdef`,唯一 |
|
||||||
|
| status | CharField(30) | varchar(30) | 是 | pending、running、waiting_user、success、partial_success、failed、cancelled |
|
||||||
|
| source_file_name | CharField(255) | varchar(255) | 否 | 说明书原文件名 |
|
||||||
|
| source_storage_path | CharField(500) | varchar(500) | 否 | 说明书存储路径 |
|
||||||
|
| product_name | CharField(200) | varchar(200) | 否 | 抽取到的产品名称 |
|
||||||
|
| output_zip_name | CharField(255) | varchar(255) | 否 | 主输出 zip 文件名,默认 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| generated_files | JSONField | text/json | 是 | 7 个文件生成状态摘要 |
|
||||||
|
| missing_fields | JSONField | text/json | 是 | 缺失并填 `/` 的字段 |
|
||||||
|
| llm_only_fields | JSONField | text/json | 是 | 仅 LLM 命中的字段 |
|
||||||
|
| conflict_fields | JSONField | text/json | 是 | 规则和 LLM 冲突字段 |
|
||||||
|
| risk_notes | JSONField | text/json | 是 | `.doc` 适配器、知识库不可用、zip 失败等提示 |
|
||||||
|
| template_config_version | CharField(80) | varchar(80) | 否 | 模板配置版本 |
|
||||||
|
| template_config_hash | CharField(128) | varchar(128) | 否 | 模板配置 hash |
|
||||||
|
| adapter_summary | JSONField | text/json | 是 | docx/doc 适配器使用情况 |
|
||||||
|
| work_dir | CharField(500) | varchar(500) | 否 | 批次工作目录 |
|
||||||
|
| error_message | TextField | text | 否 | 批次异常说明 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| started_at | DateTimeField | datetime | 否 | 开始时间 |
|
||||||
|
| finished_at | DateTimeField | datetime | 否 | 完成时间 |
|
||||||
|
| archived_at | DateTimeField | datetime | 否 | 归档时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
唯一约束:
|
||||||
|
|
||||||
|
| 约束名 | 字段 |
|
||||||
|
| --- | --- |
|
||||||
|
| uq_ra_rip_batch_no | batch_no |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_batch_conv_status | conversation_id, status | 查询对话下材料包批次状态 |
|
||||||
|
| idx_ra_rip_batch_user_created | user_id, created_at | 查询用户发起历史 |
|
||||||
|
| idx_ra_rip_batch_attachment | source_attachment_id | 查询某说明书附件生成历史 |
|
||||||
|
| idx_ra_rip_batch_summary | source_summary_batch_id | 查询文件汇总关联的材料包批次 |
|
||||||
|
| idx_ra_rip_batch_created | created_at | 后台按时间排查 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.2 ra_regulatory_info_package_artifact
|
||||||
|
|
||||||
|
第1章监管信息材料包生成过程产物表。仅保存文件元数据,不保存大文本正文。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属材料包批次 |
|
||||||
|
| artifact_type | CharField(60) | varchar(60) | 是 | template_copy、instruction_extract、field_extract_result、merged_fields、generated_document、traceability、zip_package、notification_record |
|
||||||
|
| file_format | CharField(20) | varchar(20) | 是 | json、excel、docx、doc、zip、markdown |
|
||||||
|
| name | CharField(160) | varchar(160) | 是 | 产物名称 |
|
||||||
|
| file_name | CharField(255) | varchar(255) | 是 | 文件名 |
|
||||||
|
| storage_path | CharField(500) | varchar(500) | 是 | 文件存储路径 |
|
||||||
|
| file_size | BigIntegerField | bigint | 是 | 文件大小 |
|
||||||
|
| content_hash | CharField(128) | varchar(128) | 否 | 文件 SHA-256 hash |
|
||||||
|
| metadata | JSONField | text/json | 是 | 模板编码、生成状态、高亮数量、适配器、错误摘要等 |
|
||||||
|
| created_by_node | CharField(60) | varchar(60) | 否 | 生成该产物的工作流节点 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_artifact_batch_type | batch_id, artifact_type | 查询批次过程产物 |
|
||||||
|
| idx_ra_rip_artifact_format | file_format | 按文件格式查询 |
|
||||||
|
| idx_ra_rip_artifact_created | created_at | 按时间追溯 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 3.3 ra_regulatory_info_package_notification_record
|
||||||
|
|
||||||
|
第1章监管信息材料包生成通知记录表。通知失败不阻断下载,但需要留痕和支持后续重试。
|
||||||
|
|
||||||
|
| 字段名 | Django 类型 | SQLite 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| id | BigAutoField | integer | 是 | 主键 |
|
||||||
|
| batch_id | ForeignKey | bigint | 是 | 所属材料包批次 |
|
||||||
|
| recipient_id | ForeignKey(User) | bigint | 是 | 通知对象,默认发起人 |
|
||||||
|
| channel | CharField(30) | varchar(30) | 是 | feishu_cli、feishu_api、mock |
|
||||||
|
| export_ids | JSONField | text/json | 是 | 本次通知关联导出文件 ID |
|
||||||
|
| message_summary | TextField | text | 是 | 通知摘要 |
|
||||||
|
| send_status | CharField(20) | varchar(20) | 是 | pending、success、failed |
|
||||||
|
| retry_count | PositiveIntegerField | integer | 是 | 已重试次数 |
|
||||||
|
| external_message_id | CharField(120) | varchar(120) | 否 | 飞书外部消息 ID |
|
||||||
|
| error_message | TextField | text | 否 | 失败原因 |
|
||||||
|
| sent_at | DateTimeField | datetime | 否 | 发送成功时间 |
|
||||||
|
| created_at | DateTimeField | datetime | 是 | 创建时间 |
|
||||||
|
| updated_at | DateTimeField | datetime | 是 | 更新时间 |
|
||||||
|
| is_deleted | BooleanField | bool | 是 | 软删除标记 |
|
||||||
|
|
||||||
|
索引:
|
||||||
|
|
||||||
|
| 索引名 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| idx_ra_rip_notify_batch | batch_id, created_at | 查询批次通知 |
|
||||||
|
| idx_ra_rip_notify_recipient | recipient_id, send_status | 查询用户通知状态 |
|
||||||
|
| idx_ra_rip_notify_status | send_status, retry_count | 查询待重试通知 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、既有表扩展
|
||||||
|
|
||||||
|
### 4.1 ra_exported_summary_file
|
||||||
|
|
||||||
|
继续复用导出文件表,新增 zip 导出类型,并支持 `regulatory_info_package` 权限反查。
|
||||||
|
|
||||||
|
| 字段/枚举 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type | 增加 `zip` |
|
||||||
|
| workflow_type | 使用 `regulatory_info_package` |
|
||||||
|
| workflow_batch_id | 记录 `RegulatoryInfoPackageBatch.id` |
|
||||||
|
| export_category | 使用 `regulatory_info_package`、`generated_document`、`traceability` |
|
||||||
|
|
||||||
|
导出类型枚举:
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| markdown | Markdown | 既有报告 |
|
||||||
|
| excel | Excel | 追溯清单 |
|
||||||
|
| json | JSON | 抽取结果、合并字段 |
|
||||||
|
| word | Word | 生成的 Word 文件,包含 `.docx` 和可下载 `.doc` |
|
||||||
|
| pdf | PDF | 既有预留 |
|
||||||
|
| zip | ZIP | 第1章监管信息材料包主下载 |
|
||||||
|
|
||||||
|
下载 MIME 规则:
|
||||||
|
|
||||||
|
| 条件 | content_type |
|
||||||
|
| --- | --- |
|
||||||
|
| export_type=zip | application/zip |
|
||||||
|
| export_type=word 且文件名后缀 `.doc` | application/msword |
|
||||||
|
| export_type=word 且文件名后缀 `.docx` | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
|
||||||
|
|
||||||
|
### 4.2 ra_workflow_node_run
|
||||||
|
|
||||||
|
本功能使用通用工作流节点表:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| workflow_batch_id | RegulatoryInfoPackageBatch.id |
|
||||||
|
| node_group | regulatory_info_package |
|
||||||
|
| batch_id | 可为空;如为兼容旧查询,不建议绑定文件汇总批次 |
|
||||||
|
|
||||||
|
幂等约束建议:
|
||||||
|
|
||||||
|
| 约束/策略 | 字段 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| uq_ra_node_workflow_batch_code | workflow_type, workflow_batch_id, node_code | 推荐新增数据库唯一约束,防止同一 RIP 批次重复节点 |
|
||||||
|
| get_or_create 幂等 | workflow_type, workflow_batch_id, node_code | 若暂不改通用表约束,节点初始化必须使用该组合做代码层幂等 |
|
||||||
|
|
||||||
|
建议新增节点:
|
||||||
|
|
||||||
|
```text
|
||||||
|
prepare, template_copy, text_extract, field_extract, field_merge,
|
||||||
|
generate_docs, highlight_review_items, trace_export, zip_export, notify, completed
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 ra_workflow_event
|
||||||
|
|
||||||
|
本功能事件写入:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | regulatory_info_package |
|
||||||
|
| workflow_batch_id | RegulatoryInfoPackageBatch.id |
|
||||||
|
| conversation_id | 当前对话 ID |
|
||||||
|
| payload | 节点状态、文件生成状态、导出 ID、待确认摘要等 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、枚举设计
|
||||||
|
|
||||||
|
### 5.1 RegulatoryInfoPackageBatch.status
|
||||||
|
|
||||||
|
| value | 中文展示 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| pending | 待执行 | 批次已创建,等待执行 |
|
||||||
|
| running | 执行中 | 工作流正在执行 |
|
||||||
|
| waiting_user | 等待用户 | 未找到唯一说明书,需要用户选择 |
|
||||||
|
| success | 成功 | 7 个文件、zip 和必要追溯产物生成成功 |
|
||||||
|
| partial_success | 部分成功 | zip 或主要文件已生成,但部分单文件、`.doc` 原生处理、`.docx` 兜底、追溯或通知存在失败 |
|
||||||
|
| failed | 失败 | 关键输入、模板或全部目标文件生成失败 |
|
||||||
|
| cancelled | 已取消 | 用户或系统取消执行 |
|
||||||
|
|
||||||
|
### 5.2 RegulatoryInfoPackageArtifact.artifact_type
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| template_copy | 模板副本 |
|
||||||
|
| instruction_extract | 说明书文本、章节、表格抽取结果 |
|
||||||
|
| field_extract_result | 规则与 LLM 抽取原始结果 |
|
||||||
|
| merged_fields | 合并字段、高亮决策、标准候选 |
|
||||||
|
| generated_document | 生成后的单个目标文件 |
|
||||||
|
| traceability | 追溯清单 |
|
||||||
|
| zip_package | 主下载 zip 包 |
|
||||||
|
| notification_record | 通知记录产物 |
|
||||||
|
|
||||||
|
### 5.3 RegulatoryInfoPackageArtifact.file_format
|
||||||
|
|
||||||
|
| value | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| json | JSON 产物 |
|
||||||
|
| excel | Excel 追溯清单 |
|
||||||
|
| docx | Word OpenXML 文件 |
|
||||||
|
| doc | Word 97-2003 文件 |
|
||||||
|
| zip | 压缩包 |
|
||||||
|
| markdown | Markdown 摘要或报告 |
|
||||||
|
|
||||||
|
### 5.4 通知枚举
|
||||||
|
|
||||||
|
| 字段 | value |
|
||||||
|
| --- | --- |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、JSON 字段结构
|
||||||
|
|
||||||
|
### 6.1 generated_files
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"template_code": "ch1_4_application_form",
|
||||||
|
"file_name": "CH1.4 申请表.docx",
|
||||||
|
"status": "success",
|
||||||
|
"artifact_id": 12,
|
||||||
|
"export_id": 34,
|
||||||
|
"highlight_count": 8,
|
||||||
|
"missing_count": 5,
|
||||||
|
"llm_only_count": 2,
|
||||||
|
"error_message": ""
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 missing_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"target_file": "CH1.4 申请表.docx",
|
||||||
|
"field_key": "applicant_name",
|
||||||
|
"field_label": "申请人名称",
|
||||||
|
"final_value": "/",
|
||||||
|
"highlight_reason": "missing",
|
||||||
|
"needs_review": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 llm_only_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"target_file": "CH1.4 申请表.docx",
|
||||||
|
"field_key": "detection_targets",
|
||||||
|
"field_label": "检测靶标",
|
||||||
|
"final_value": "ORF1ab、N基因",
|
||||||
|
"evidence": "预期用途和检测原理章节",
|
||||||
|
"highlight_reason": "llm_only",
|
||||||
|
"needs_review": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 conflict_fields
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"field_key": "package_specification",
|
||||||
|
"field_label": "包装规格",
|
||||||
|
"rule_value": "规格A:24人份/盒、48人份/盒、96人份/盒",
|
||||||
|
"llm_value": "规格A、规格B均为24/48/96人份",
|
||||||
|
"selected_value": "规格A:24人份/盒、48人份/盒、96人份/盒",
|
||||||
|
"handling": "规则优先,写入值高亮并进入追溯清单"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.5 risk_notes
|
||||||
|
|
||||||
|
```json
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"type": "legacy_doc_adapter_unavailable",
|
||||||
|
"message": "CH1.9 为 .doc 文件,当前环境未检测到可写入适配器。",
|
||||||
|
"template_code": "ch1_9_pre_submission"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "knowledge_base_unavailable",
|
||||||
|
"message": "标准清单知识库查询不可用,未自动写入候选标准。"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.6 adapter_summary
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"docx": {
|
||||||
|
"adapter": "DocxDocumentAdapter",
|
||||||
|
"status": "available"
|
||||||
|
},
|
||||||
|
"doc": {
|
||||||
|
"adapter": "WordComDocAdapter",
|
||||||
|
"status": "available",
|
||||||
|
"fallback_used": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.7 artifact.metadata
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"template_code": "ch1_5_product_list",
|
||||||
|
"strategy": "product_list",
|
||||||
|
"source_template": "CH1.5 产品列表.docx",
|
||||||
|
"generated_status": "success",
|
||||||
|
"highlight_count": 12,
|
||||||
|
"missing_count": 6,
|
||||||
|
"llm_only_count": 1,
|
||||||
|
"adapter": "DocxDocumentAdapter",
|
||||||
|
"created_by_node": "generate_docs"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、存储路径设计
|
||||||
|
|
||||||
|
批次目录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
```
|
||||||
|
|
||||||
|
目录结构:
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/12/1001/RIP-20260610153000-abcdef/
|
||||||
|
templates/
|
||||||
|
ch1_2_directory.source.docx
|
||||||
|
ch1_9_pre_submission.source.doc
|
||||||
|
extracted/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
generated/
|
||||||
|
CH1.2 监管信息目录.docx
|
||||||
|
CH1.4 申请表.docx
|
||||||
|
CH1.5 产品列表.docx
|
||||||
|
CH1.9 产品申报前沟通的说明.doc
|
||||||
|
CH1.11.1 符合标准的清单.docx
|
||||||
|
CH1.11.5 真实性声明.docx
|
||||||
|
CH1.11.6 符合性声明.docx
|
||||||
|
exports/
|
||||||
|
traceability.xlsx
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
logs/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
traceability.json
|
||||||
|
doc_adapter_result.json
|
||||||
|
```
|
||||||
|
|
||||||
|
路径安全要求:
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 输出目录校验 | 所有输出路径必须位于当前批次 `work_dir` 下 |
|
||||||
|
| 原始模板只读 | 不允许覆盖 `docs/0.原始材料` |
|
||||||
|
| 导出路径 | `ExportedSummaryFile.storage_path` 保存实际文件路径,下载时校验权限 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、权限关系
|
||||||
|
|
||||||
|
### 8.1 批次权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
RegulatoryInfoPackageBatch.conversation.user_id == request.user.id
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 输入附件权限
|
||||||
|
|
||||||
|
```text
|
||||||
|
FileAttachment.conversation_id == batch.conversation_id
|
||||||
|
FileAttachment.user_id == batch.user_id
|
||||||
|
FileAttachment.upload_status != deleted
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 导出下载权限
|
||||||
|
|
||||||
|
`ExportedSummaryFile` 下载时按 `workflow_type` 分支:
|
||||||
|
|
||||||
|
```text
|
||||||
|
workflow_type == "regulatory_info_package"
|
||||||
|
-> workflow_batch_id 反查 RegulatoryInfoPackageBatch
|
||||||
|
-> conversation__user == request.user
|
||||||
|
-> is_deleted == false
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、迁移设计
|
||||||
|
|
||||||
|
建议新增一个迁移文件,包含:
|
||||||
|
|
||||||
|
| 变更 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 新增 `RegulatoryInfoPackageBatch` | 批次表 |
|
||||||
|
| 新增 `RegulatoryInfoPackageArtifact` | 产物表 |
|
||||||
|
| 新增 `RegulatoryInfoPackageNotificationRecord` | 通知记录表 |
|
||||||
|
| 扩展 `ExportedSummaryFile.ExportType` | 增加 `zip` 枚举 |
|
||||||
|
|
||||||
|
Django 模型建议仍集中放在 `review_agent/models.py`,业务逻辑放入 `review_agent/regulatory_info_package/`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、DDL 参考
|
||||||
|
|
||||||
|
以下 DDL 为 SQLite / Django ORM 参考,实际以 migration 生成为准。
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE TABLE ra_regulatory_info_package_batch (
|
||||||
|
id integer NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
|
conversation_id bigint NOT NULL REFERENCES review_agent_conversation(id),
|
||||||
|
user_id bigint NOT NULL REFERENCES auth_user(id),
|
||||||
|
trigger_message_id bigint NULL REFERENCES review_agent_message(id),
|
||||||
|
source_attachment_id bigint NULL REFERENCES ra_file_attachment(id),
|
||||||
|
source_summary_batch_id bigint NULL REFERENCES ra_file_summary_batch(id),
|
||||||
|
source_summary_item_id integer NULL,
|
||||||
|
batch_no varchar(64) NOT NULL UNIQUE,
|
||||||
|
status varchar(30) NOT NULL,
|
||||||
|
source_file_name varchar(255) NOT NULL DEFAULT '',
|
||||||
|
source_storage_path varchar(500) NOT NULL DEFAULT '',
|
||||||
|
product_name varchar(200) NOT NULL DEFAULT '',
|
||||||
|
output_zip_name varchar(255) NOT NULL DEFAULT '',
|
||||||
|
generated_files text NOT NULL DEFAULT '[]',
|
||||||
|
missing_fields text NOT NULL DEFAULT '[]',
|
||||||
|
llm_only_fields text NOT NULL DEFAULT '[]',
|
||||||
|
conflict_fields text NOT NULL DEFAULT '[]',
|
||||||
|
risk_notes text NOT NULL DEFAULT '[]',
|
||||||
|
template_config_version varchar(80) NOT NULL DEFAULT '',
|
||||||
|
template_config_hash varchar(128) NOT NULL DEFAULT '',
|
||||||
|
adapter_summary text NOT NULL DEFAULT '{}',
|
||||||
|
work_dir varchar(500) NOT NULL DEFAULT '',
|
||||||
|
error_message text NOT NULL DEFAULT '',
|
||||||
|
created_at datetime NOT NULL,
|
||||||
|
started_at datetime NULL,
|
||||||
|
finished_at datetime NULL,
|
||||||
|
archived_at datetime NULL,
|
||||||
|
is_deleted bool NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_ra_rip_batch_conv_status
|
||||||
|
ON ra_regulatory_info_package_batch(conversation_id, status);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_user_created
|
||||||
|
ON ra_regulatory_info_package_batch(user_id, created_at);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_attachment
|
||||||
|
ON ra_regulatory_info_package_batch(source_attachment_id);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_summary
|
||||||
|
ON ra_regulatory_info_package_batch(source_summary_batch_id);
|
||||||
|
CREATE INDEX idx_ra_rip_batch_created
|
||||||
|
ON ra_regulatory_info_package_batch(created_at);
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、实现注意事项
|
||||||
|
|
||||||
|
| 注意事项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| JSONField 默认值 | 使用 `default=list` 或 `default=dict`,禁止使用可变对象字面量 |
|
||||||
|
| 外键删除策略 | conversation/user 使用 CASCADE;输入附件和文件汇总批次建议 PROTECT 或 SET_NULL,避免历史批次断链 |
|
||||||
|
| `source_summary_item_id` | 当前没有强制外键到 `FileSummaryItem`,可先保存 ID,后续需要强约束时再改 FK |
|
||||||
|
| 工作流节点幂等 | RIP 节点不得只依赖 `WorkflowNodeRun.batch + node_code` 唯一约束;必须使用 `workflow_type + workflow_batch_id + node_code` 保证幂等 |
|
||||||
|
| `.doc` 失败记录 | `.doc` 原生适配器不可用或执行失败时必须写入 `risk_notes` 和 artifact metadata;若 `.docx` 兜底成功则 generated_files 状态为 `fallback_success` |
|
||||||
|
| zip 主入口 | zip 导出记录的 `export_category` 固定为 `regulatory_info_package` |
|
||||||
|
| 单文件下载 | 7 个生成文件也写入 `ExportedSummaryFile`,作为辅助下载 |
|
||||||
|
| 软删除 | 批次和产物使用 `is_deleted`,下载权限需过滤软删除批次 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、验收标准
|
||||||
|
|
||||||
|
| 序号 | 验收项 | 标准 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | 模型创建 | 三张 RIP 专项表可通过 migration 创建 |
|
||||||
|
| 2 | 批次编号 | `batch_no` 唯一,符合 `RIP-...` 格式 |
|
||||||
|
| 3 | 附件关联 | 批次可绑定直接说明书附件 |
|
||||||
|
| 4 | 汇总兼容 | 批次可选绑定 `FileSummaryBatch` 与 `source_summary_item_id` |
|
||||||
|
| 5 | 产物留痕 | 模板副本、抽取结果、生成文件、zip、追溯清单均可写 artifact |
|
||||||
|
| 6 | zip 导出 | `ExportedSummaryFile` 支持 `export_type=zip` |
|
||||||
|
| 7 | 下载权限 | 非批次所属用户不能下载 RIP 导出 |
|
||||||
|
| 8 | 节点事件 | `WorkflowNodeRun` 和 `WorkflowEvent` 可通过 `workflow_type=regulatory_info_package` 查询 |
|
||||||
|
| 9 | 节点幂等 | 同一 `workflow_type + workflow_batch_id + node_code` 不会重复创建节点 |
|
||||||
|
| 10 | 通知记录 | 通知成功、失败和重试次数可落库 |
|
||||||
|
| 11 | JSON 摘要 | 缺失项、LLM-only、冲突项、风险提示结构符合本文约定 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中规则 | 本设计裁决 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 数据库设计流程 | 项目规范优先,未命中时回退基线规范 | 当前项目为 Django/SQLite,沿用既有数据库设计文档风格 |
|
||||||
|
| 既有自动填表数据库设计 | 独立批次、产物、通知三表;大 JSON 文件化;通用导出表复用 | 本功能按同样模式新增 RIP 三表 |
|
||||||
|
| 自动汇总数据库设计 | 对话隔离、多版本附件、工作流事件留痕 | 输入附件和批次权限沿用该关系 |
|
||||||
|
| 飞书通知数据库设计 | 通知摘要入库、失败不阻断主流程 | RIP 通知表结构与自动填表通知对齐 |
|
||||||
|
|
||||||
|
冲突裁决:技能规范中的低代码/Java 表达不适用于当前 Django 项目,数据库设计以当前项目 ORM、SQLite 兼容和既有 `ra_` 表风格为准。
|
||||||
790
docs/4.详细设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
790
docs/4.详细设计/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,790 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表详细设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 依赖详细设计 | docs/3.详细设计/1.自动汇总.md;docs/3.详细设计/2.NMPA注册资料法规核查与整改闭环.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、详细设计目标
|
||||||
|
|
||||||
|
本详细设计用于指导“产品关键信息提取与申报文件自动填表”功能开发落地,覆盖代码结构、数据库模型、模板配置、独立工作流、字段抽取、字段合并、Word 模板填充、追溯清单导出、飞书通知、接口契约、前端卡片、异常降级和测试建议。
|
||||||
|
|
||||||
|
核心约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 使用 `workflow_type=application_form_fill`,拥有独立批次和卡片 |
|
||||||
|
| 对话触发 | 由用户自然语言触发,可指定模板;未指定时按注册类型选择适用模板 |
|
||||||
|
| 文件来源复用 | 默认使用当前对话最近成功的 `FileSummaryBatch`;本次带附件时先执行自动汇总 |
|
||||||
|
| 模板配置驱动 | 模板路径、字段映射、适用条件写入 `application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| Word 优先 | Demo 阶段主链路只要求生成 Word 和追溯清单 |
|
||||||
|
| PDF 待办 | PDF 转换节点保留,但本期可标记 skipped 并写入待办计划 |
|
||||||
|
| 抽取并行 | 规则/正则抽取与 LLM 结构化抽取并行执行,再统一合并 |
|
||||||
|
| 冲突可见 | 说明书优先;冲突字段写入 Word 时黄底红字,并在对话框展示摘要 |
|
||||||
|
| 过程留底 | 规则抽取、LLM 抽取、合并结果、冲突和追溯清单均保存产物 |
|
||||||
|
| 飞书通知 | 填表完成后通知上传人,通知失败不阻断下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、代码结构设计
|
||||||
|
|
||||||
|
### 2.1 目录结构
|
||||||
|
|
||||||
|
第三批独立为 `review_agent/application_form_fill/` 模块。Django 模型仍集中在 `review_agent/models.py`,业务服务放入独立模块。
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
models.py
|
||||||
|
services.py
|
||||||
|
skill_router.py
|
||||||
|
application_form_fill/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
template_config.py
|
||||||
|
template_select.py
|
||||||
|
template_repository.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
word_fill.py
|
||||||
|
traceability_export.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
application_form_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
checklist_extract.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 文件职责
|
||||||
|
|
||||||
|
| 文件 | 职责 |
|
||||||
|
| --- | --- |
|
||||||
|
| application_form_fill/constants.py | 工作流节点、模板编码、状态、输出类型常量 |
|
||||||
|
| application_form_fill/schemas.py | FormFillContext、TemplateSpec、ExtractedField、MergedField 等 dataclass |
|
||||||
|
| application_form_fill/storage.py | 批次工作目录、模板副本、产物保存、hash 计算 |
|
||||||
|
| application_form_fill/workflow.py | FormFillWorkflowExecutor,串行执行独立填表工作流 |
|
||||||
|
| application_form_fill/views.py | 启动、状态查询、后续可选下载或重试接口 |
|
||||||
|
| services/template_config.py | 读取和校验 YAML 模板配置 |
|
||||||
|
| services/template_select.py | 解析用户指定模板、识别注册类型、选择模板 |
|
||||||
|
| services/template_repository.py | 定位原始模板、复制模板、`.doc` 转 `.docx` 预留 |
|
||||||
|
| services/field_extract.py | 规则/正则与 LLM 并行字段抽取 |
|
||||||
|
| services/field_merge.py | 字段归一化、来源排序、冲突识别、最终字段输出 |
|
||||||
|
| services/word_fill.py | 使用 `python-docx` 写入 Word 表格、段落和高亮 |
|
||||||
|
| services/traceability_export.py | 生成 Excel/JSON 追溯清单,创建导出记录 |
|
||||||
|
| services/notifier.py | 包装飞书通知,生成通知记录 |
|
||||||
|
| prompts/field_extract.md | LLM 字段抽取提示词 |
|
||||||
|
| prompts/checklist_extract.md | 安全和性能基本原则清单条目判断提示词 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、依赖设计
|
||||||
|
|
||||||
|
### 3.1 Python 依赖
|
||||||
|
|
||||||
|
| 依赖 | 用途 | 当前项目情况 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Django | Web、ORM、权限 | 已使用 |
|
||||||
|
| python-docx | Word 模板读取、表格填充、字体和底色设置 | 已在项目依赖链中使用 |
|
||||||
|
| openpyxl | 字段来源追溯清单 Excel 导出 | 已使用 |
|
||||||
|
| PyYAML | YAML 模板配置读取 | 已用于法规规则 |
|
||||||
|
| pypdf / python-pptx | 文本抽取链路复用 | 已使用 |
|
||||||
|
| LibreOffice/soffice | `.doc` 转 `.docx`、PDF 转换 | 本期非强依赖,后续待办 |
|
||||||
|
|
||||||
|
### 3.2 技术边界
|
||||||
|
|
||||||
|
| 能力 | 本期实现 | 后续增强 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `.docx` 模板填充 | 必须支持 | 支持内容控件、复杂 OOXML patch |
|
||||||
|
| `.doc` 模板处理 | 可通过预转换模板或标记失败 | 自动 LibreOffice 转换 |
|
||||||
|
| PDF 转换 | 可跳过并提示待生成 | LibreOffice 转 PDF + 视觉 QA |
|
||||||
|
| 字段级入库 | 不做 | 新增字段明细表和在线编辑 |
|
||||||
|
| LLM 抽取 | 输出 JSON 并留底 | 增加置信度校准和人工确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、数据模型详细设计
|
||||||
|
|
||||||
|
模型放在 `review_agent/models.py`。
|
||||||
|
|
||||||
|
### 4.1 ApplicationFormFillBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ApplicationFormFillBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| conversation | 绑定对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_summary_batch | 文件来源批次 |
|
||||||
|
| source_regulatory_batch | 可选法规核查批次 |
|
||||||
|
| batch_no | `AFF-YYYYMMDDHHMMSS-abcdef` |
|
||||||
|
| requested_templates | 用户指定模板 |
|
||||||
|
| selected_templates | 实际生成模板 |
|
||||||
|
| output_types | 本次请求输出类型,Demo 默认 `["word", "excel", "json"]` |
|
||||||
|
| registration_type | 识别出的注册类型 |
|
||||||
|
| registration_type_source | 注册类型来源 |
|
||||||
|
| product_name | 产品名称 |
|
||||||
|
| conflict_summary | 冲突摘要 |
|
||||||
|
| risk_notes | 不适用模板、PDF 待生成等提示 |
|
||||||
|
| template_config_version | 模板配置版本 |
|
||||||
|
| template_config_hash | 模板配置 hash |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
|
||||||
|
### 4.2 ApplicationFormFillArtifact
|
||||||
|
|
||||||
|
用于保存过程产物和模板副本元数据。
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ApplicationFormFillArtifact(models.Model):
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
TEMPLATE_COPY = "template_copy", "模板副本"
|
||||||
|
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
|
||||||
|
MERGED_FIELDS = "merged_fields", "字段合并结果"
|
||||||
|
TRACEABILITY = "traceability", "追溯清单"
|
||||||
|
FILLED_TEMPLATE = "filled_template", "已填模板"
|
||||||
|
NOTIFICATION_RECORD = "notification_record", "通知记录"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 ApplicationFormFillNotificationRecord
|
||||||
|
|
||||||
|
通知记录字段与第二批法规通知风格一致,支持重试:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 自动填表批次 |
|
||||||
|
| recipient | 通知对象 |
|
||||||
|
| channel | feishu_cli、feishu_api、mock |
|
||||||
|
| template_codes | 涉及模板 |
|
||||||
|
| export_ids | 关联下载文件 |
|
||||||
|
| message_summary | 通知摘要 |
|
||||||
|
| send_status | pending、success、failed |
|
||||||
|
| retry_count | 重试次数 |
|
||||||
|
| external_message_id | 飞书外部消息 ID |
|
||||||
|
| error_message | 失败原因 |
|
||||||
|
| sent_at | 发送成功时间 |
|
||||||
|
|
||||||
|
### 4.4 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
WORD = "word", "Word"
|
||||||
|
PDF = "pdf", "PDF"
|
||||||
|
```
|
||||||
|
|
||||||
|
填表导出记录使用:
|
||||||
|
|
||||||
|
| 字段 | 值 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | application_form_fill |
|
||||||
|
| workflow_batch_id | ApplicationFormFillBatch.id |
|
||||||
|
| export_category | filled_template、traceability、extract_result |
|
||||||
|
| export_type | word、excel、json、pdf |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、常量设计
|
||||||
|
|
||||||
|
### 5.1 工作流节点
|
||||||
|
|
||||||
|
```python
|
||||||
|
FORM_FILL_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "form_fill"),
|
||||||
|
("template_select", "选择模板", "form_fill"),
|
||||||
|
("template_copy", "复制模板", "form_fill"),
|
||||||
|
("field_extract", "抽取字段", "form_fill"),
|
||||||
|
("conflict_merge", "冲突归并", "form_fill"),
|
||||||
|
("word_fill", "填写 Word", "form_fill"),
|
||||||
|
("pdf_convert", "转换 PDF", "form_fill"),
|
||||||
|
("trace_export", "追溯清单", "form_fill"),
|
||||||
|
("output_export", "输出下载", "form_fill"),
|
||||||
|
("notify", "飞书通知", "form_fill"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 模板编码
|
||||||
|
|
||||||
|
```python
|
||||||
|
TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate"
|
||||||
|
TEMPLATE_CHANGE_REGISTRATION = "change_registration"
|
||||||
|
TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 触发关键词
|
||||||
|
|
||||||
|
```python
|
||||||
|
FORM_FILL_TRIGGER_KEYWORDS = [
|
||||||
|
"填注册证",
|
||||||
|
"对应的表格",
|
||||||
|
"生成申报模板",
|
||||||
|
"安全和性能基本原则清单",
|
||||||
|
"填到申报模板",
|
||||||
|
"自动填表",
|
||||||
|
"生成表格",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、核心数据结构
|
||||||
|
|
||||||
|
### 6.1 FormFillContext
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class FormFillContext:
|
||||||
|
batch: ApplicationFormFillBatch
|
||||||
|
source_summary_batch: FileSummaryBatch
|
||||||
|
source_regulatory_batch: RegulatoryReviewBatch | None
|
||||||
|
template_config: dict[str, Any]
|
||||||
|
selected_templates: list["TemplateSpec"]
|
||||||
|
document_texts: dict[str, str]
|
||||||
|
regex_results: dict[str, Any]
|
||||||
|
llm_results: dict[str, Any]
|
||||||
|
merged_fields: dict[str, "MergedField"]
|
||||||
|
checklist_items: dict[str, Any]
|
||||||
|
conflicts: list[dict[str, Any]]
|
||||||
|
exports: list[ExportedSummaryFile]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.2 TemplateSpec
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
name: str
|
||||||
|
source_file: str
|
||||||
|
output_label: str
|
||||||
|
applies_when: dict[str, Any]
|
||||||
|
file_format: str
|
||||||
|
fields: list[dict[str, Any]]
|
||||||
|
checklist_items: list[dict[str, Any]]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.3 ExtractedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ExtractedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
source_role: str
|
||||||
|
evidence: str
|
||||||
|
extractor: str
|
||||||
|
confidence: float
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6.4 MergedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
has_conflict: bool = False
|
||||||
|
conflict_values: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、模板配置详细设计
|
||||||
|
|
||||||
|
### 7.1 配置路径
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/application_form_fill/templates/application_form_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 初始配置示例
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
version: application_form_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
templates:
|
||||||
|
- code: registration_certificate
|
||||||
|
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
|
||||||
|
output_label: 注册证格式
|
||||||
|
applies_when:
|
||||||
|
registration_type: ["首次注册"]
|
||||||
|
file_format: docx
|
||||||
|
fields:
|
||||||
|
- key: applicant_name
|
||||||
|
label: 注册人名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 注册人名称
|
||||||
|
source_roles: ["申请表", "说明书", "企业信息"]
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品名称
|
||||||
|
source_roles: ["说明书", "产品技术要求", "注册检验报告"]
|
||||||
|
- key: intended_use
|
||||||
|
label: 预期用途
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 预期用途
|
||||||
|
source_roles: ["说明书", "临床评价资料", "产品技术要求"]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.3 配置校验
|
||||||
|
|
||||||
|
`TemplateConfigService` 启动时校验:
|
||||||
|
|
||||||
|
| 校验项 | 失败处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| version 存在 | 批次 failed |
|
||||||
|
| source_dir 存在 | 批次 failed |
|
||||||
|
| templates 非空 | 批次 failed |
|
||||||
|
| code 唯一 | 批次 failed |
|
||||||
|
| source_file 存在 | 对应模板不可用 |
|
||||||
|
| target.type 支持 | 对应字段跳过并记录 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、服务详细设计
|
||||||
|
|
||||||
|
### 8.1 TemplateConfigService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def load_template_config() -> dict:
|
||||||
|
"""读取 YAML 模板配置。"""
|
||||||
|
|
||||||
|
def validate_template_config(config: dict) -> list[str]:
|
||||||
|
"""返回配置错误列表。"""
|
||||||
|
|
||||||
|
def compute_config_hash(path: Path) -> str:
|
||||||
|
"""计算模板配置 SHA-256。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.2 TemplateSelectionService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def parse_requested_templates(message: str) -> list[str]:
|
||||||
|
"""从用户话语中识别指定模板。"""
|
||||||
|
|
||||||
|
def detect_registration_type(batch: ApplicationFormFillBatch, message: str) -> tuple[str, str]:
|
||||||
|
"""按用户话语、法规核查批次、文件抽取结果识别注册类型及来源。"""
|
||||||
|
|
||||||
|
def select_templates(
|
||||||
|
config: dict,
|
||||||
|
requested_templates: list[str],
|
||||||
|
registration_type: str,
|
||||||
|
) -> tuple[list[TemplateSpec], list[dict]]:
|
||||||
|
"""输出模板列表和风险提示。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
注册类型优先级:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户话语明确指定
|
||||||
|
-> source_regulatory_batch.condition_json / confirmed_conditions
|
||||||
|
-> source_summary_batch 文件内容抽取候选
|
||||||
|
-> unknown
|
||||||
|
```
|
||||||
|
|
||||||
|
### 8.3 TemplateRepository
|
||||||
|
|
||||||
|
```python
|
||||||
|
def resolve_source_template(spec: TemplateSpec) -> Path:
|
||||||
|
"""返回原始模板路径或预转换工作模板路径。"""
|
||||||
|
|
||||||
|
def copy_template_to_batch(spec: TemplateSpec, batch: ApplicationFormFillBatch) -> Path:
|
||||||
|
"""复制模板到批次 work_dir/templates。"""
|
||||||
|
|
||||||
|
def convert_doc_to_docx(source: Path, target_dir: Path) -> Path:
|
||||||
|
"""P1 能力:使用 soffice 转 docx。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
`.doc` 模板本期处理:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 存在 working_template docx | 使用工作模板 |
|
||||||
|
| 仅有 `.doc` 且无 soffice | 对应模板失败,其他模板继续 |
|
||||||
|
| 具备 soffice | 转换为 `.docx` 后继续 |
|
||||||
|
|
||||||
|
### 8.4 FieldExtractionService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||||
|
"""复用 text_extract 读取文件文本。"""
|
||||||
|
|
||||||
|
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
|
||||||
|
"""规则/正则抽取字段。"""
|
||||||
|
|
||||||
|
def extract_by_llm(texts: dict[str, str], specs: list[TemplateSpec]) -> dict:
|
||||||
|
"""LLM 结构化抽取字段。"""
|
||||||
|
|
||||||
|
def run_parallel_extract(texts: dict[str, str], specs: list[TemplateSpec]) -> tuple[dict, dict]:
|
||||||
|
"""并行执行规则/正则与 LLM 抽取。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
并行实现可使用 `ThreadPoolExecutor(max_workers=2)`。LLM 超时或失败时,保留规则/正则结果继续。
|
||||||
|
|
||||||
|
### 8.5 FieldMergeService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def normalize_field_value(value: str) -> str:
|
||||||
|
"""字段值归一化。"""
|
||||||
|
|
||||||
|
def rank_source(source_role: str, source_file: str) -> int:
|
||||||
|
"""说明书优先,其次产品技术要求、检测报告、性能研究等。"""
|
||||||
|
|
||||||
|
def merge_fields(regex_results: dict, llm_results: dict) -> tuple[dict[str, MergedField], list[dict]]:
|
||||||
|
"""合并字段并输出冲突。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
来源优先级:
|
||||||
|
|
||||||
|
| 排名 | 来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1 | 说明书 |
|
||||||
|
| 2 | 产品技术要求 |
|
||||||
|
| 3 | 注册检验报告/检测报告 |
|
||||||
|
| 4 | 性能研究资料 |
|
||||||
|
| 5 | 其他注册资料 |
|
||||||
|
|
||||||
|
### 8.6 WordTemplateFillService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def fill_template(
|
||||||
|
template_path: Path,
|
||||||
|
output_path: Path,
|
||||||
|
spec: TemplateSpec,
|
||||||
|
fields: dict[str, MergedField],
|
||||||
|
checklist_items: dict[str, Any],
|
||||||
|
) -> Path:
|
||||||
|
"""填充 Word 模板并保存。"""
|
||||||
|
|
||||||
|
def fill_table_row(document: Document, row_label: str, value: str, conflict: bool) -> bool:
|
||||||
|
"""根据表格行首字段名定位并填入第二列。"""
|
||||||
|
|
||||||
|
def replace_placeholders(document: Document, fields: dict[str, MergedField]) -> None:
|
||||||
|
"""替换段落中的 {{field_key}}。"""
|
||||||
|
|
||||||
|
def apply_conflict_style(cell_or_run) -> None:
|
||||||
|
"""应用黄色底色和红色字体。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
冲突样式:
|
||||||
|
|
||||||
|
| 样式 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字体颜色 | 红色 `FF0000` |
|
||||||
|
| 底色 | 黄色 `FFFF00` |
|
||||||
|
| 适用范围 | 单元格或字段值 run |
|
||||||
|
|
||||||
|
### 8.7 TraceabilityExportService
|
||||||
|
|
||||||
|
```python
|
||||||
|
def build_traceability_workbook(batch, merged_fields, conflicts, specs) -> Workbook:
|
||||||
|
"""生成追溯清单 Excel。"""
|
||||||
|
|
||||||
|
def save_traceability_excel(batch, workbook) -> ExportedSummaryFile:
|
||||||
|
"""保存 Excel 并写导出记录。"""
|
||||||
|
|
||||||
|
def save_extract_json(batch, payload: dict) -> ApplicationFormFillArtifact:
|
||||||
|
"""保存字段抽取 JSON 过程产物。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
Excel Sheet:
|
||||||
|
|
||||||
|
| Sheet | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 字段追溯 | 模板、字段、填入值、来源文件、证据、冲突状态 |
|
||||||
|
| 冲突字段 | 字段、采用值、冲突值、处理方式 |
|
||||||
|
| 低置信度条目 | 安全和性能基本原则清单候选判断 |
|
||||||
|
| 生成结果 | 模板文件、Word 状态、PDF 状态、错误说明 |
|
||||||
|
|
||||||
|
### 8.8 FormFillNotifier
|
||||||
|
|
||||||
|
```python
|
||||||
|
def notify_completion(batch: ApplicationFormFillBatch, exports: list[ExportedSummaryFile]) -> ApplicationFormFillNotificationRecord:
|
||||||
|
"""发送填表完成通知。"""
|
||||||
|
```
|
||||||
|
|
||||||
|
通知摘要包含:
|
||||||
|
|
||||||
|
| 内容 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 批次号 | 填表批次 |
|
||||||
|
| 产品名称 | 如已识别 |
|
||||||
|
| 生成模板 | 模板名称列表 |
|
||||||
|
| 冲突数量 | 提示需下载核对 |
|
||||||
|
| 下载提示 | 提示回到系统对话下载,不直接暴露敏感全文 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、工作流执行器详细设计
|
||||||
|
|
||||||
|
### 9.1 启动入口
|
||||||
|
|
||||||
|
```python
|
||||||
|
def start_application_form_fill_workflow(batch: ApplicationFormFillBatch, *, async_run: bool = True) -> None:
|
||||||
|
executor = FormFillWorkflowExecutor(batch)
|
||||||
|
if async_run:
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
|
else:
|
||||||
|
executor.run()
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9.2 执行伪代码
|
||||||
|
|
||||||
|
```python
|
||||||
|
class FormFillWorkflowExecutor:
|
||||||
|
def run(self) -> None:
|
||||||
|
self.mark_batch_running()
|
||||||
|
try:
|
||||||
|
for node in self.nodes():
|
||||||
|
if node.status == "success":
|
||||||
|
continue
|
||||||
|
self.run_node(node)
|
||||||
|
self.complete_or_partial()
|
||||||
|
except WorkflowPausedForUser:
|
||||||
|
self.mark_waiting_user()
|
||||||
|
except Exception as exc:
|
||||||
|
self.mark_failed(exc)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 9.3 节点处理要点
|
||||||
|
|
||||||
|
| 节点 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 校验 `source_summary_batch` 成功且属于当前对话 |
|
||||||
|
| template_select | 读取 YAML、识别注册类型、选择模板 |
|
||||||
|
| template_copy | 复制模板到 `work_dir/templates` |
|
||||||
|
| field_extract | 抽取文本,规则/正则与 LLM 并行,保存 JSON |
|
||||||
|
| conflict_merge | 合并字段,写 `conflict_summary` |
|
||||||
|
| word_fill | 逐模板生成 Word,写 `ExportedSummaryFile(word)` |
|
||||||
|
| pdf_convert | 本期 skipped,写 `risk_notes` |
|
||||||
|
| trace_export | 生成追溯 Excel 和 JSON |
|
||||||
|
| output_export | 生成 AI 对话 Markdown 摘要 |
|
||||||
|
| notify | 写飞书通知记录,失败不阻断 |
|
||||||
|
| completed | 标记 success 或 partial_success |
|
||||||
|
|
||||||
|
### 9.4 批次状态决策
|
||||||
|
|
||||||
|
| 条件 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 所有目标 Word 均成功,追溯清单成功,通知成功或跳过 | success |
|
||||||
|
| 至少一个 Word 成功,但部分模板、追溯清单、PDF 或通知失败 | partial_success |
|
||||||
|
| 所有目标 Word 均失败 | failed |
|
||||||
|
| 无来源文件汇总批次 | waiting_user |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、接口详细设计
|
||||||
|
|
||||||
|
### 10.1 发起自动填表
|
||||||
|
|
||||||
|
```text
|
||||||
|
POST /api/review-agent/application-form-fill/start/
|
||||||
|
```
|
||||||
|
|
||||||
|
请求:
|
||||||
|
|
||||||
|
| 参数 | 类型 | 必填 | 说明 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| conversation_id | integer | 是 | 当前对话 |
|
||||||
|
| message_id | integer | 否 | 触发消息 |
|
||||||
|
| file_summary_batch_id | integer | 否 | 指定文件来源批次 |
|
||||||
|
| template_codes | array | 否 | 指定模板 |
|
||||||
|
| output_types | array | 否 | 输出类型,默认 word、excel、json |
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch_id": 3001,
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"status": "pending",
|
||||||
|
"selected_templates": ["registration_certificate", "essential_principles"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.2 查询状态
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/application-form-fill/{batch_id}/
|
||||||
|
```
|
||||||
|
|
||||||
|
响应:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": 3001,
|
||||||
|
"batch_no": "AFF-20260607153000-a1b2c3",
|
||||||
|
"status": "success",
|
||||||
|
"product_name": "甲胎蛋白检测试剂盒",
|
||||||
|
"selected_templates": ["registration_certificate"]
|
||||||
|
},
|
||||||
|
"nodes": [],
|
||||||
|
"conflicts": [],
|
||||||
|
"exports": []
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 10.3 下载文件
|
||||||
|
|
||||||
|
继续复用既有导出下载接口:
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/file-summary/exports/{export_id}/download/
|
||||||
|
```
|
||||||
|
|
||||||
|
下载权限通过 `workflow_type=application_form_fill` 和 `workflow_batch_id` 反查填表批次。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、前端详细设计
|
||||||
|
|
||||||
|
### 11.1 工作流卡片
|
||||||
|
|
||||||
|
新增卡片类型 `application_form_fill`。
|
||||||
|
|
||||||
|
| 节点 | 展示 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 准备资料 |
|
||||||
|
| template_select | 选择模板 |
|
||||||
|
| template_copy | 复制模板 |
|
||||||
|
| field_extract | 抽取字段 |
|
||||||
|
| conflict_merge | 冲突归并 |
|
||||||
|
| word_fill | 填写 Word |
|
||||||
|
| pdf_convert | 转换 PDF |
|
||||||
|
| trace_export | 追溯清单 |
|
||||||
|
| output_export | 输出下载 |
|
||||||
|
| notify | 飞书通知 |
|
||||||
|
| completed | 已完成 |
|
||||||
|
|
||||||
|
PDF 本期显示为“已跳过/待增强”,不显示为失败。
|
||||||
|
|
||||||
|
### 11.2 AI 回复摘要
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成申报模板自动填表文件。
|
||||||
|
|
||||||
|
| 文件 | Word | PDF |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 注册证格式 | 下载 | 待增强 |
|
||||||
|
| 安全和性能基本原则清单 | 下载 | 待增强 |
|
||||||
|
|
||||||
|
| 冲突字段 | 采用值 | 冲突来源 | 处理 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 储存条件 | 2-8℃保存 | 产品技术要求:-20℃保存 | 已按说明书填入,并在模板中高亮 |
|
||||||
|
|
||||||
|
[下载字段来源追溯清单](download-url)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、异常与降级
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 无成功汇总批次 | 批次 waiting_user,对话提示上传资料 |
|
||||||
|
| 模板配置不存在 | 批次 failed |
|
||||||
|
| 指定模板不存在 | 忽略无效模板并提示;若无有效模板则 failed |
|
||||||
|
| `.doc` 模板无可用工作模板 | 该模板失败,其他模板继续 |
|
||||||
|
| 文本抽取失败 | 对应文件跳过,记录在追溯清单 |
|
||||||
|
| LLM 抽取失败 | 使用规则/正则结果继续 |
|
||||||
|
| 字段缺失 | Word 留空 |
|
||||||
|
| 字段冲突 | 说明书优先并高亮 |
|
||||||
|
| 追溯清单失败 | Word 成功时批次 partial_success |
|
||||||
|
| 飞书通知失败 | 批次 partial_success 或 success,取决于核心产物是否成功 |
|
||||||
|
| PDF 未实现 | 节点 skipped,写入待增强提示 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、测试设计
|
||||||
|
|
||||||
|
### 13.1 单元测试
|
||||||
|
|
||||||
|
| 用例 | 目标 |
|
||||||
|
| --- | --- |
|
||||||
|
| test_form_fill_trigger_keywords | 触发语句识别为自动填表 |
|
||||||
|
| test_template_config_loads | YAML 配置可加载并校验 |
|
||||||
|
| test_select_default_templates_initial_registration | 首次注册默认选择注册证和基本原则清单 |
|
||||||
|
| test_select_user_requested_mismatch | 用户指定不适用模板仍允许生成并提示 |
|
||||||
|
| test_field_merge_prefers_instructions | 说明书字段优先 |
|
||||||
|
| test_field_merge_marks_conflict | 冲突字段进入 conflict_summary |
|
||||||
|
| test_word_fill_table_row | 能按表格行名写入 Word |
|
||||||
|
| test_word_fill_conflict_highlight | 冲突字段黄底红字 |
|
||||||
|
| test_traceability_excel | 追溯清单包含字段、来源和冲突 |
|
||||||
|
| test_notify_records_failure | 飞书失败写通知记录但不阻断 |
|
||||||
|
|
||||||
|
### 13.2 集成测试
|
||||||
|
|
||||||
|
| 场景 | 验证 |
|
||||||
|
| --- | --- |
|
||||||
|
| 最近汇总批次触发填表 | 无附件时复用最近 success `FileSummaryBatch` |
|
||||||
|
| 新附件触发填表 | 先自动汇总再启动填表 |
|
||||||
|
| 注册证模板填充 | 生成 Word 导出文件 |
|
||||||
|
| LLM 失败降级 | LLM 超时后规则抽取仍可生成 Word |
|
||||||
|
| 部分模板失败 | 至少一个 Word 成功时批次 partial_success |
|
||||||
|
| 权限隔离 | 不能查询或下载他人填表批次产物 |
|
||||||
|
|
||||||
|
### 13.3 前端验证
|
||||||
|
|
||||||
|
| 场景 | 验证 |
|
||||||
|
| --- | --- |
|
||||||
|
| 自动填表卡片 | 节点状态随 SSE 更新 |
|
||||||
|
| 指定模板展示 | 卡片展示本次选择模板 |
|
||||||
|
| PDF 跳过显示 | PDF 节点显示待增强而非失败 |
|
||||||
|
| 下载链接 | Word 和追溯清单链接可点击下载 |
|
||||||
|
| 冲突摘要 | 冲突字段表格正常渲染 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、实施顺序建议
|
||||||
|
|
||||||
|
1. 修改功能设计中的模板配置路径为 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`。
|
||||||
|
2. 新增数据库模型和 `ExportedSummaryFile.ExportType` 扩展。
|
||||||
|
3. 新增 `application_form_fill` 模块目录和常量、schemas、storage。
|
||||||
|
4. 新增模板配置 YAML,先录入注册证 `.docx` 的已识别字段。
|
||||||
|
5. 实现模板选择、模板复制和 Word 表格行填充。
|
||||||
|
6. 实现规则/正则字段抽取和 LLM 抽取降级。
|
||||||
|
7. 实现字段合并、冲突高亮和追溯清单。
|
||||||
|
8. 实现工作流执行器、节点事件和状态接口。
|
||||||
|
9. 改造路由和前端工作流卡片。
|
||||||
|
10. 接入飞书通知记录。
|
||||||
|
11. 将字段级数据库表和 PDF 转换写入待办计划。
|
||||||
604
docs/4.详细设计/4.飞书通知与问答接入.md
Normal file
604
docs/4.详细设计/4.飞书通知与问答接入.md
Normal file
@@ -0,0 +1,604 @@
|
|||||||
|
# 飞书通知与问答接入详细设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/4.飞书通知与问答接入.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/4.飞书通知与问答接入.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/4.飞书通知与问答接入.md |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-07 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、实现目标
|
||||||
|
|
||||||
|
首期实现一个统一飞书通知能力,使自动汇总、法规核查、自动填表三个工作流在完成、部分成功或失败时,通过飞书官方智能体/应用机器人消息 API 向指定个人账号发送富文本私聊通知。通知失败不阻断主流程,发送结果落库并在批次详情页展示。
|
||||||
|
|
||||||
|
同时预留飞书私聊问答所需的用户映射、查询服务、权限过滤和问答日志模型,但不实现飞书事件订阅回调。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、推荐文件结构
|
||||||
|
|
||||||
|
| 文件 | 类型 | 责任 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `review_agent/models.py` | 修改 | 新增 `FeishuUserMapping`、`WorkflowNotificationRecord`、`FeishuQuestionLog` |
|
||||||
|
| `review_agent/admin.py` | 修改/新增 | 注册飞书用户映射和通知记录后台 |
|
||||||
|
| `review_agent/notifications/__init__.py` | 新增 | 通知模块包 |
|
||||||
|
| `review_agent/notifications/context.py` | 新增 | 定义统一通知上下文 dataclass |
|
||||||
|
| `review_agent/notifications/recipient.py` | 新增 | 解析首期指定个人接收人;后续扩展为按系统用户映射解析 |
|
||||||
|
| `review_agent/notifications/message_builder.py` | 新增 | 构造飞书富文本 payload 和摘要 |
|
||||||
|
| `review_agent/notifications/feishu_token.py` | 新增 | 使用 App ID/App Secret 获取并缓存 tenant_access_token |
|
||||||
|
| `review_agent/notifications/feishu_message_api.py` | 新增 | 调用飞书发送消息 API、处理响应解析 |
|
||||||
|
| `review_agent/notifications/records.py` | 新增 | 判重和通知记录落库 |
|
||||||
|
| `review_agent/notifications/dispatcher.py` | 新增 | 对外统一发送入口 |
|
||||||
|
| `review_agent/notifications/workflow_adapters.py` | 新增 | 三个工作流批次到通知上下文的适配 |
|
||||||
|
| `review_agent/feishu_questions/query.py` | 新增 | 后续问答预留:批次摘要查询 |
|
||||||
|
| `review_agent/feishu_questions/permissions.py` | 新增 | 后续问答预留:权限过滤 |
|
||||||
|
| `tests/test_feishu_notification.py` | 新增 | 飞书通知单元测试 |
|
||||||
|
| `tests/test_feishu_question_reserved.py` | 新增 | 问答预留服务测试 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、数据结构设计
|
||||||
|
|
||||||
|
### 3.1 NotificationContext
|
||||||
|
|
||||||
|
```python
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class NotificationContext:
|
||||||
|
workflow_type: str
|
||||||
|
workflow_batch_id: int
|
||||||
|
workflow_batch_no: str
|
||||||
|
workflow_status: str
|
||||||
|
title: str
|
||||||
|
trigger_user_id: int
|
||||||
|
trigger_username: str
|
||||||
|
result_url: str
|
||||||
|
summary_lines: list[str] = field(default_factory=list)
|
||||||
|
next_action: str = ""
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dedupe_key(self) -> str:
|
||||||
|
return f"{self.workflow_type}:{self.workflow_batch_id}:{self.workflow_status}"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3.2 ResolvedFeishuTarget
|
||||||
|
|
||||||
|
```python
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ResolvedFeishuTarget:
|
||||||
|
mapping_id: int | None
|
||||||
|
display_name: str
|
||||||
|
identifier_type: str
|
||||||
|
identifier_value: str
|
||||||
|
masked_identifier: str
|
||||||
|
missing: bool = False
|
||||||
|
```
|
||||||
|
|
||||||
|
identifier_type 取值:
|
||||||
|
|
||||||
|
| 值 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| open_id | 使用飞书 open_id |
|
||||||
|
| user_id | 使用飞书 user_id |
|
||||||
|
| mobile | 使用手机号,后续按发起人私聊时使用 |
|
||||||
|
| missing | 未配置映射 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、模型详细设计
|
||||||
|
|
||||||
|
### 4.1 FeishuUserMapping
|
||||||
|
|
||||||
|
字段见数据库设计。模型需提供方法:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def preferred_identifier(self) -> tuple[str, str]:
|
||||||
|
if self.feishu_open_id:
|
||||||
|
return "open_id", self.feishu_open_id
|
||||||
|
if self.feishu_user_id:
|
||||||
|
return "user_id", self.feishu_user_id
|
||||||
|
if self.feishu_mobile:
|
||||||
|
return "mobile", self.feishu_mobile
|
||||||
|
return "missing", ""
|
||||||
|
```
|
||||||
|
|
||||||
|
`clean()` 校验:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def clean(self):
|
||||||
|
if not (self.feishu_open_id or self.feishu_user_id or self.feishu_mobile):
|
||||||
|
raise ValidationError("feishu_open_id、feishu_user_id、feishu_mobile 至少填写一个")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 WorkflowNotificationRecord
|
||||||
|
|
||||||
|
字段见数据库设计。建议方法:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@classmethod
|
||||||
|
def already_sent(cls, dedupe_key: str) -> bool:
|
||||||
|
return cls.objects.filter(dedupe_key=dedupe_key, send_status=cls.SendStatus.SUCCESS).exists()
|
||||||
|
```
|
||||||
|
|
||||||
|
注意:若使用唯一约束限制 `dedupe_key`,重复触发时可以直接返回已有记录;若希望保留 skipped_duplicate 记录,则不能对 dedupe_key 做全局唯一,只能用查询判重。本项目需求是“只发一次”,更推荐保留唯一成功意图,重复触发返回已有记录或创建 skipped 记录需在实现计划中二选一。为了 SQLite 简化,首期建议不创建 skipped 记录,直接返回已有成功记录。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、核心服务详细设计
|
||||||
|
|
||||||
|
### 5.1 workflow_adapters.py
|
||||||
|
|
||||||
|
职责:把不同批次对象转换为 `NotificationContext`。
|
||||||
|
|
||||||
|
函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def build_file_summary_context(batch: FileSummaryBatch) -> NotificationContext: ...
|
||||||
|
def build_regulatory_review_context(batch: RegulatoryReviewBatch) -> NotificationContext: ...
|
||||||
|
def build_application_form_fill_context(batch: ApplicationFormFillBatch) -> NotificationContext: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
自动汇总摘要:
|
||||||
|
|
||||||
|
| 字段 | 计算方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件总数 | `batch.items.count()` |
|
||||||
|
| 成功解析数 | 解析状态为 success 的 item 数 |
|
||||||
|
| 异常数 | failed、skipped、unsupported 等状态数量 |
|
||||||
|
| 导出文件数 | `ExportedSummaryFile` 中 workflow_type=file_summary 或 batch 关联文件数 |
|
||||||
|
|
||||||
|
法规核查摘要:
|
||||||
|
|
||||||
|
| 字段 | 计算方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 风险总数 | `batch.issues.count()` |
|
||||||
|
| 阻断项 | severity=blocking |
|
||||||
|
| 高风险 | severity=high |
|
||||||
|
| 中风险 | severity=medium |
|
||||||
|
|
||||||
|
自动填表摘要:
|
||||||
|
|
||||||
|
| 字段 | 计算方式 |
|
||||||
|
| --- | --- |
|
||||||
|
| 模板数 | `len(batch.selected_templates)` |
|
||||||
|
| 导出文件数 | 对应 `ExportedSummaryFile` 数量 |
|
||||||
|
| 冲突字段数 | `len(batch.conflict_summary or [])` |
|
||||||
|
| 失败原因 | `batch.error_message` 或节点错误摘要 |
|
||||||
|
|
||||||
|
### 5.2 recipient.py
|
||||||
|
|
||||||
|
职责:首期根据环境变量解析指定个人接收人;后续可扩展为根据系统用户解析飞书目标。
|
||||||
|
|
||||||
|
伪代码:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def resolve_feishu_target(user: User) -> ResolvedFeishuTarget:
|
||||||
|
if settings.FEISHU_DEFAULT_USER_OPEN_ID:
|
||||||
|
return ResolvedFeishuTarget(
|
||||||
|
mapping_id=None,
|
||||||
|
display_name=getattr(settings, "FEISHU_DEFAULT_TARGET_NAME", "指定个人账号"),
|
||||||
|
identifier_type="open_id",
|
||||||
|
identifier_value=settings.FEISHU_DEFAULT_USER_OPEN_ID,
|
||||||
|
masked_identifier=mask_identifier(settings.FEISHU_DEFAULT_USER_OPEN_ID),
|
||||||
|
missing=False,
|
||||||
|
)
|
||||||
|
if settings.FEISHU_DEFAULT_USER_ID:
|
||||||
|
return ResolvedFeishuTarget(
|
||||||
|
mapping_id=None,
|
||||||
|
display_name=getattr(settings, "FEISHU_DEFAULT_TARGET_NAME", "指定个人账号"),
|
||||||
|
identifier_type="user_id",
|
||||||
|
identifier_value=settings.FEISHU_DEFAULT_USER_ID,
|
||||||
|
masked_identifier=mask_identifier(settings.FEISHU_DEFAULT_USER_ID),
|
||||||
|
missing=False,
|
||||||
|
)
|
||||||
|
return ResolvedFeishuTarget(
|
||||||
|
mapping_id=None,
|
||||||
|
display_name=user.get_username(),
|
||||||
|
identifier_type="missing",
|
||||||
|
identifier_value="",
|
||||||
|
masked_identifier="",
|
||||||
|
missing=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_feishu_target_by_user_mapping(user: User) -> ResolvedFeishuTarget:
|
||||||
|
mapping = (
|
||||||
|
FeishuUserMapping.objects
|
||||||
|
.filter(system_user=user, is_active=True)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if mapping is None:
|
||||||
|
return ResolvedFeishuTarget(
|
||||||
|
mapping_id=None,
|
||||||
|
display_name=user.get_username(),
|
||||||
|
identifier_type="missing",
|
||||||
|
identifier_value="",
|
||||||
|
masked_identifier="",
|
||||||
|
missing=True,
|
||||||
|
)
|
||||||
|
identifier_type, identifier_value = mapping.preferred_identifier()
|
||||||
|
return ResolvedFeishuTarget(
|
||||||
|
mapping_id=mapping.pk,
|
||||||
|
display_name=mapping.feishu_display_name or user.get_username(),
|
||||||
|
identifier_type=identifier_type,
|
||||||
|
identifier_value=identifier_value,
|
||||||
|
masked_identifier=mask_identifier(identifier_value),
|
||||||
|
missing=identifier_type == "missing",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
脱敏规则:
|
||||||
|
|
||||||
|
| 类型 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| mobile | 保留前三位和后四位,如 `138****1234` |
|
||||||
|
| open_id/user_id | 保留前 6 位和后 4 位 |
|
||||||
|
| missing | 空字符串 |
|
||||||
|
|
||||||
|
首期调度器使用 `resolve_feishu_target()`。`resolve_feishu_target_by_user_mapping()` 作为后续“按发起人私聊”能力预留。
|
||||||
|
|
||||||
|
### 5.3 message_builder.py
|
||||||
|
|
||||||
|
职责:构造富文本 payload 和入库摘要。
|
||||||
|
|
||||||
|
函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def build_feishu_post_message(
|
||||||
|
context: NotificationContext,
|
||||||
|
target: ResolvedFeishuTarget,
|
||||||
|
) -> dict: ...
|
||||||
|
|
||||||
|
def build_message_summary(
|
||||||
|
context: NotificationContext,
|
||||||
|
target: ResolvedFeishuTarget,
|
||||||
|
) -> str: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
富文本规则:
|
||||||
|
|
||||||
|
| 场景 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| 有映射 | 加入 `at` 标签 |
|
||||||
|
| 无映射 | 不加入 `at` 标签,增加映射缺失提示 |
|
||||||
|
| 失败状态 | 标题和下一步动作突出失败原因摘要 |
|
||||||
|
| 摘要过长 | 每条摘要最多 120 字,总摘要最多 800 字 |
|
||||||
|
| 链接 | 使用本地地址拼接,后续再切换域名配置 |
|
||||||
|
|
||||||
|
### 5.4 feishu_token.py
|
||||||
|
|
||||||
|
职责:使用 App ID/App Secret 获取并缓存 `tenant_access_token`。
|
||||||
|
|
||||||
|
函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_tenant_access_token() -> FeishuTokenResult: ...
|
||||||
|
def refresh_tenant_access_token() -> FeishuTokenResult: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
结果结构:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class FeishuTokenResult:
|
||||||
|
ok: bool
|
||||||
|
tenant_access_token: str
|
||||||
|
expire_seconds: int
|
||||||
|
code: str
|
||||||
|
message: str
|
||||||
|
```
|
||||||
|
|
||||||
|
处理规则:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| App ID/App Secret 缺失 | 返回 failed,错误码 config_missing |
|
||||||
|
| 缓存 token 未过期 | 直接返回缓存 token |
|
||||||
|
| token 过期或不存在 | 调用飞书 token API 重新获取 |
|
||||||
|
| token API 返回失败 | 返回 failed,记录 code/message |
|
||||||
|
| HTTP 超时 | 返回 failed,错误码 timeout |
|
||||||
|
|
||||||
|
### 5.5 feishu_message_api.py
|
||||||
|
|
||||||
|
职责:调用飞书发送消息 API。
|
||||||
|
|
||||||
|
函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def send_personal_message(
|
||||||
|
*,
|
||||||
|
tenant_access_token: str,
|
||||||
|
receive_id_type: str,
|
||||||
|
receive_id: str,
|
||||||
|
payload: dict,
|
||||||
|
) -> FeishuMessageApiResult: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
结果结构:
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class FeishuMessageApiResult:
|
||||||
|
ok: bool
|
||||||
|
status_code: int | None
|
||||||
|
code: str
|
||||||
|
message: str
|
||||||
|
duration_ms: int
|
||||||
|
message_id: str = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
异常处理:
|
||||||
|
|
||||||
|
| 异常 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 指定接收人缺失 | 返回 failed,错误码 recipient_missing |
|
||||||
|
| tenant_access_token 缺失 | 返回 failed,错误码 token_missing |
|
||||||
|
| HTTP 超时 | 返回 failed,错误码 timeout |
|
||||||
|
| 非 2xx | 返回 failed,记录 status_code |
|
||||||
|
| 飞书返回 code 非 0 | 返回 failed,记录 code/message |
|
||||||
|
| token 失效 | 刷新 token 后允许同步重试一次消息 API |
|
||||||
|
|
||||||
|
### 5.6 records.py
|
||||||
|
|
||||||
|
职责:判重和落库。
|
||||||
|
|
||||||
|
流程:
|
||||||
|
|
||||||
|
```text
|
||||||
|
输入 NotificationContext
|
||||||
|
-> 查询 dedupe_key 是否已有 success
|
||||||
|
-> 若有,返回已有记录,不发送
|
||||||
|
-> 若未启用真实飞书,创建 disabled/mock 记录
|
||||||
|
-> 若发送成功,创建 success 记录
|
||||||
|
-> 若发送失败,创建 failed 记录
|
||||||
|
```
|
||||||
|
|
||||||
|
字段写入规则:
|
||||||
|
|
||||||
|
| 字段 | 来源 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow_type | context.workflow_type |
|
||||||
|
| workflow_batch_id | context.workflow_batch_id |
|
||||||
|
| workflow_batch_no | context.workflow_batch_no |
|
||||||
|
| workflow_status | context.workflow_status |
|
||||||
|
| dedupe_key | context.dedupe_key |
|
||||||
|
| trigger_user_id | context.trigger_user_id |
|
||||||
|
| feishu_mapping_id | target.mapping_id |
|
||||||
|
| at_identifier_type | target.identifier_type |
|
||||||
|
| at_identifier_masked | target.masked_identifier |
|
||||||
|
| message_summary | `build_message_summary()` |
|
||||||
|
|
||||||
|
### 5.7 dispatcher.py
|
||||||
|
|
||||||
|
对外入口:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def dispatch_workflow_notification(context: NotificationContext) -> WorkflowNotificationRecord:
|
||||||
|
if WorkflowNotificationRecord.already_sent(context.dedupe_key):
|
||||||
|
return WorkflowNotificationRecord.objects.get(
|
||||||
|
dedupe_key=context.dedupe_key,
|
||||||
|
send_status=WorkflowNotificationRecord.SendStatus.SUCCESS,
|
||||||
|
)
|
||||||
|
|
||||||
|
user = User.objects.get(pk=context.trigger_user_id)
|
||||||
|
target = resolve_feishu_target(user)
|
||||||
|
message = build_feishu_post_message(context, target)
|
||||||
|
summary = build_message_summary(context, target)
|
||||||
|
|
||||||
|
if not settings.FEISHU_NOTIFY_ENABLED:
|
||||||
|
return create_disabled_record(context, target, summary)
|
||||||
|
|
||||||
|
token_result = get_tenant_access_token()
|
||||||
|
if not token_result.ok:
|
||||||
|
return create_failed_record(context, target, summary, token_result)
|
||||||
|
|
||||||
|
result = send_personal_message(
|
||||||
|
tenant_access_token=token_result.tenant_access_token,
|
||||||
|
receive_id_type=target.identifier_type,
|
||||||
|
receive_id=target.identifier_value,
|
||||||
|
payload=message,
|
||||||
|
)
|
||||||
|
if result.ok:
|
||||||
|
return create_success_record(context, target, summary, result)
|
||||||
|
return create_failed_record(context, target, summary, result)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、工作流接入点
|
||||||
|
|
||||||
|
| 工作流 | 推荐接入位置 |
|
||||||
|
| --- | --- |
|
||||||
|
| 自动汇总 | 文件汇总批次状态写为 success/partial_success/failed 后 |
|
||||||
|
| 法规核查 | 报告导出和风险项保存后;替换或并行现有 `create_mock_notifications` |
|
||||||
|
| 自动填表 | `notify` 节点中替换或扩展现有 `notify_completion` |
|
||||||
|
|
||||||
|
接入原则:
|
||||||
|
|
||||||
|
| 原则 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 通知异常捕获 | 工作流调用通知服务时捕获异常并记录 non_blocking_errors |
|
||||||
|
| 不回滚业务结果 | 通知失败不修改业务批次成功状态 |
|
||||||
|
| 单点适配 | 工作流只负责生成或传入批次,摘要由 adapter 负责 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、批次详情展示设计
|
||||||
|
|
||||||
|
### 7.1 后端上下文
|
||||||
|
|
||||||
|
为批次详情页提供:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def get_notification_records(workflow_type: str, batch_id: int) -> QuerySet:
|
||||||
|
return WorkflowNotificationRecord.objects.filter(
|
||||||
|
workflow_type=workflow_type,
|
||||||
|
workflow_batch_id=batch_id,
|
||||||
|
).order_by("-created_at")
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7.2 页面展示规则
|
||||||
|
|
||||||
|
| 状态 | 展示 |
|
||||||
|
| --- | --- |
|
||||||
|
| success | “飞书通知已发送”,展示 sent_at |
|
||||||
|
| failed | “飞书通知失败”,展示 error_message |
|
||||||
|
| disabled | “飞书通知未启用” |
|
||||||
|
| 无记录 | “暂无通知记录” |
|
||||||
|
|
||||||
|
三个工作流结果页可复用同一 partial 模板或上下文字段。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、问答预留详细设计
|
||||||
|
|
||||||
|
### 8.1 批次摘要查询服务
|
||||||
|
|
||||||
|
预留函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def query_batch_summary(
|
||||||
|
user: User,
|
||||||
|
*,
|
||||||
|
workflow_type: str | None = None,
|
||||||
|
batch_no: str | None = None,
|
||||||
|
latest: bool = False,
|
||||||
|
) -> dict:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
权限规则:
|
||||||
|
|
||||||
|
| 用户 | 可查范围 |
|
||||||
|
| --- | --- |
|
||||||
|
| 管理员 | 全部批次 |
|
||||||
|
| 普通用户 | `batch.user == user` 的批次 |
|
||||||
|
| 未绑定用户 | 不可查 |
|
||||||
|
|
||||||
|
查询对象:
|
||||||
|
|
||||||
|
| 类型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 明确批次号 | 精确匹配 batch_no |
|
||||||
|
| 最近/最新 | 在有权限范围内按 created_at/finished_at 倒序取第一条 |
|
||||||
|
| 工作流类型 | file_summary、regulatory_review、application_form_fill |
|
||||||
|
|
||||||
|
### 8.2 问答日志服务
|
||||||
|
|
||||||
|
预留函数:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def record_feishu_question_log(
|
||||||
|
*,
|
||||||
|
user: User | None,
|
||||||
|
mapping: FeishuUserMapping | None,
|
||||||
|
source_type: str,
|
||||||
|
question_text: str,
|
||||||
|
intent: str,
|
||||||
|
query_object: dict,
|
||||||
|
answer_summary: str,
|
||||||
|
permission_result: str,
|
||||||
|
status: str,
|
||||||
|
error_message: str = "",
|
||||||
|
) -> FeishuQuestionLog:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
首期不需要接飞书事件,但测试可直接调用该服务,确认日志字段与权限规则可用。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、测试设计
|
||||||
|
|
||||||
|
### 9.1 单元测试
|
||||||
|
|
||||||
|
| 测试文件 | 用例 |
|
||||||
|
| --- | --- |
|
||||||
|
| `tests/test_feishu_notification.py` | tenant_access_token 获取和缓存 |
|
||||||
|
| `tests/test_feishu_notification.py` | 指定个人接收人优先级 open_id > user_id |
|
||||||
|
| `tests/test_feishu_notification.py` | 指定接收人缺失时写 failed 记录 |
|
||||||
|
| `tests/test_feishu_notification.py` | 真实通知关闭时写 disabled/mock 记录 |
|
||||||
|
| `tests/test_feishu_notification.py` | 消息 API 成功写 success 记录 |
|
||||||
|
| `tests/test_feishu_notification.py` | token 获取失败写 failed 记录 |
|
||||||
|
| `tests/test_feishu_notification.py` | 消息 API 超时写 failed 记录 |
|
||||||
|
| `tests/test_feishu_notification.py` | 同一 dedupe_key 不重复发送 |
|
||||||
|
| `tests/test_feishu_question_reserved.py` | 管理员可查询全部批次摘要 |
|
||||||
|
| `tests/test_feishu_question_reserved.py` | 普通用户只能查询自己的批次 |
|
||||||
|
| `tests/test_feishu_question_reserved.py` | 问答日志不保存完整回答正文 |
|
||||||
|
|
||||||
|
### 9.2 集成测试
|
||||||
|
|
||||||
|
| 场景 | 验证 |
|
||||||
|
| --- | --- |
|
||||||
|
| 自动汇总完成 | 生成通知上下文并写记录 |
|
||||||
|
| 法规核查完成 | 风险摘要正确 |
|
||||||
|
| 自动填表完成 | 导出和冲突摘要正确 |
|
||||||
|
| 批次详情页 | 展示通知状态和失败原因 |
|
||||||
|
|
||||||
|
### 9.3 外部飞书测试
|
||||||
|
|
||||||
|
真实飞书 API 测试不进入默认 CI。建议提供手动命令或 Django management command:
|
||||||
|
|
||||||
|
```text
|
||||||
|
python manage.py send_test_feishu_notification --username owner
|
||||||
|
```
|
||||||
|
|
||||||
|
该命令只在本地配置 `FEISHU_NOTIFY_ENABLED=true`、`FEISHU_APP_ID`、`FEISHU_APP_SECRET`、`FEISHU_DEFAULT_USER_OPEN_ID` 或 `FEISHU_DEFAULT_USER_ID` 后使用。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、异常处理
|
||||||
|
|
||||||
|
| 异常 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 指定接收人缺失 | 不发送真实消息,记录 recipient_missing |
|
||||||
|
| App ID/App Secret 未配置 | 写 failed 或 disabled 记录,不发送 |
|
||||||
|
| tenant_access_token 获取失败 | 写 failed,记录 token API 错误 |
|
||||||
|
| 指定接收人 open_id/user_id 未配置 | 写 failed,错误码 recipient_missing |
|
||||||
|
| HTTP 超时 | 写 failed,错误码 timeout |
|
||||||
|
| 飞书返回错误 | 写 failed,记录 code/message |
|
||||||
|
| 通知记录唯一冲突 | 查询已有记录并返回,不重复发送 |
|
||||||
|
| 批次链接生成失败 | 发送无链接摘要,记录 warning 到 message_summary |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、日志与安全
|
||||||
|
|
||||||
|
| 项 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 日志脱敏 | 不打印 App Secret、tenant_access_token、完整手机号 |
|
||||||
|
| 入库脱敏 | 通知记录只保存脱敏接收人标识 |
|
||||||
|
| payload | 不保存完整富文本 payload |
|
||||||
|
| 错误信息 | 保存飞书错误摘要,避免保存敏感请求头 |
|
||||||
|
| 问答日志 | 保存问题、意图、对象和回答摘要,不保存完整回答 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、实施顺序建议
|
||||||
|
|
||||||
|
| 顺序 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1 | 新增模型、迁移和 Admin |
|
||||||
|
| 2 | 实现用户映射解析和脱敏 |
|
||||||
|
| 3 | 实现飞书富文本构造 |
|
||||||
|
| 4 | 实现 tenant_access_token 获取与缓存 |
|
||||||
|
| 5 | 实现飞书消息 API 发送客户端 |
|
||||||
|
| 6 | 实现通知记录判重和落库 |
|
||||||
|
| 7 | 实现三个工作流 adapter |
|
||||||
|
| 8 | 接入三个工作流完成节点 |
|
||||||
|
| 9 | 批次详情页展示通知状态 |
|
||||||
|
| 10 | 实现问答预留查询服务和日志服务 |
|
||||||
|
| 11 | 补齐单元测试和集成测试 |
|
||||||
963
docs/4.详细设计/5.第1章监管信息材料包生成.md
Normal file
963
docs/4.详细设计/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,963 @@
|
|||||||
|
# 第1章监管信息材料包生成详细设计
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库设计文档 | docs/3.数据库设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考详细设计 | docs/4.详细设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 设计日期 | 2026-06-10 |
|
||||||
|
| 设计版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、详细设计目标
|
||||||
|
|
||||||
|
本详细设计用于指导 `regulatory_info_package` 独立工作流开发落地。系统根据用户上传或指定的产品说明书,抽取产品关键信息,基于 `docs/0.原始材料/第1章 监管信息` 下的样例模板生成第1章监管信息材料包,并以 `第1章 监管信息(预生成版).zip` 作为对话摘要首位下载入口。
|
||||||
|
|
||||||
|
核心约束:
|
||||||
|
|
||||||
|
| 约束 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 独立工作流 | 使用 `workflow_type=regulatory_info_package`,拥有独立批次、产物、通知和卡片 |
|
||||||
|
| 独立模块 | 新增 `review_agent/regulatory_info_package/`,与 `application_form_fill` 平级 |
|
||||||
|
| 模型集中 | Django 模型仍集中放在 `review_agent/models.py` |
|
||||||
|
| 节点幂等 | `WorkflowNodeRun` 必须按 `workflow_type + workflow_batch_id + node_code` 幂等创建或加唯一约束 |
|
||||||
|
| 输入优先级 | 用户消息指定文件名优先;其次 active 附件;再兼容最近成功文件汇总 |
|
||||||
|
| 模板固定 | 固定处理第1章监管信息 7 个模板 |
|
||||||
|
| 模板字段化 | 生成逻辑优先写 Word 内容控件 Tag 或稳定占位符,不以手工调整表格格式为前提 |
|
||||||
|
| 规则优先可演示 | 规则抽取可独立跑通;LLM 失败最多重试 3 次,失败后继续 |
|
||||||
|
| 文档并发生成 | 工作流整体串行,`generate_docs` 节点内部每个文档可独立线程并发处理 |
|
||||||
|
| `.doc` 兜底 | 能力驱动:有 Word COM/UNO 时优先原生 `.doc`;无原生能力或原生失败时允许生成 `.docx` 兜底文件 |
|
||||||
|
| zip 只含成功文件 | zip 只打包成功或兜底成功的文件;失败文件不进入 zip |
|
||||||
|
| 高亮规则 | 缺失和 LLM-only 黄底;冲突黄底红字 |
|
||||||
|
| 追溯输出 | 用户下载 Excel;JSON 仅保存到后台 logs 目录 |
|
||||||
|
| 前端最小接入 | 不做多说明书选择 UI;不确定时通过对话反问 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、代码结构设计
|
||||||
|
|
||||||
|
### 2.1 目录结构
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/
|
||||||
|
models.py
|
||||||
|
services.py
|
||||||
|
skill_router.py
|
||||||
|
regulatory_info_package/
|
||||||
|
__init__.py
|
||||||
|
constants.py
|
||||||
|
schemas.py
|
||||||
|
storage.py
|
||||||
|
events.py
|
||||||
|
workflow.py
|
||||||
|
views.py
|
||||||
|
services/
|
||||||
|
__init__.py
|
||||||
|
input_select.py
|
||||||
|
template_config.py
|
||||||
|
template_repository.py
|
||||||
|
instruction_extract.py
|
||||||
|
field_extract.py
|
||||||
|
field_merge.py
|
||||||
|
standard_candidates.py
|
||||||
|
document_writer.py
|
||||||
|
docx_document.py
|
||||||
|
legacy_doc_document.py
|
||||||
|
package_generate.py
|
||||||
|
traceability_export.py
|
||||||
|
zip_export.py
|
||||||
|
summary.py
|
||||||
|
notifier.py
|
||||||
|
templates/
|
||||||
|
regulatory_info_package_templates_v1.yaml
|
||||||
|
prompts/
|
||||||
|
field_extract.md
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2.2 文件职责
|
||||||
|
|
||||||
|
| 文件 | 职责 |
|
||||||
|
| --- | --- |
|
||||||
|
| constants.py | 工作流编码、节点定义、触发关键词、模板编码、状态常量 |
|
||||||
|
| schemas.py | dataclass 数据结构,如 `TemplateSpec`、`InstructionExtractResult`、`MergedField`、`GeneratedFileResult` |
|
||||||
|
| storage.py | 批次目录、子目录、hash、产物创建、路径安全校验 |
|
||||||
|
| events.py | 记录与序列化 `WorkflowEvent` |
|
||||||
|
| workflow.py | `RegulatoryInfoPackageWorkflowExecutor`、批次创建、工作流启动 |
|
||||||
|
| views.py | health、start、status、select-input 接口 |
|
||||||
|
| input_select.py | 根据用户消息、active 附件、文件汇总选择说明书 |
|
||||||
|
| template_config.py | YAML 加载、校验、hash |
|
||||||
|
| template_repository.py | 定位样例模板、复制到批次目录、审计字段 Tag/占位符 |
|
||||||
|
| instruction_extract.py | 说明书段落、章节、表格和组成成分表解析 |
|
||||||
|
| field_extract.py | 规则抽取与 LLM 抽取并行执行,LLM 最多 3 次重试 |
|
||||||
|
| field_merge.py | 合并字段,输出缺失、LLM-only、冲突和高亮决策 |
|
||||||
|
| standard_candidates.py | 从说明书抽标准号,调用现有知识库搜索候选 |
|
||||||
|
| document_writer.py | 文档适配器接口与通用高亮策略 |
|
||||||
|
| docx_document.py | `DocxDocumentAdapter`,处理 `.docx` |
|
||||||
|
| legacy_doc_document.py | `LegacyDocDocumentAdapter`,处理 `.doc` 原生写入与 `.docx` 兜底 |
|
||||||
|
| package_generate.py | 7 个文档生成策略,多线程生成文件 |
|
||||||
|
| traceability_export.py | 生成 `exports/traceability.xlsx` 和 `logs/traceability.json` |
|
||||||
|
| zip_export.py | 生成主下载 zip,只包含成功文件 |
|
||||||
|
| summary.py | 构造助手回显,zip 链接排首位 |
|
||||||
|
| notifier.py | 写专项通知记录,并调用统一通知服务 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、数据模型详细设计
|
||||||
|
|
||||||
|
模型放在 `review_agent/models.py`。
|
||||||
|
|
||||||
|
### 3.1 RegulatoryInfoPackageBatch
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageBatch(models.Model):
|
||||||
|
class Status(models.TextChoices):
|
||||||
|
PENDING = "pending", "待执行"
|
||||||
|
RUNNING = "running", "执行中"
|
||||||
|
WAITING_USER = "waiting_user", "等待用户"
|
||||||
|
SUCCESS = "success", "成功"
|
||||||
|
PARTIAL_SUCCESS = "partial_success", "部分成功"
|
||||||
|
FAILED = "failed", "失败"
|
||||||
|
CANCELLED = "cancelled", "已取消"
|
||||||
|
```
|
||||||
|
|
||||||
|
关键字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| conversation | 所属对话 |
|
||||||
|
| user | 发起用户 |
|
||||||
|
| trigger_message | 触发消息 |
|
||||||
|
| source_attachment | 直接选中的说明书附件,可空 |
|
||||||
|
| source_summary_batch | 兼容文件汇总批次,可空 |
|
||||||
|
| source_summary_item_id | 文件汇总条目 ID,可空 |
|
||||||
|
| batch_no | `RIP-YYYYMMDDHHMMSS-abcdef` |
|
||||||
|
| source_file_name | 说明书原文件名 |
|
||||||
|
| source_storage_path | 说明书存储路径 |
|
||||||
|
| product_name | 抽取产品名称 |
|
||||||
|
| output_zip_name | `第1章 监管信息(预生成版).zip` |
|
||||||
|
| generated_files | 7 个文件状态 |
|
||||||
|
| missing_fields | 缺失字段 |
|
||||||
|
| llm_only_fields | LLM-only 字段 |
|
||||||
|
| conflict_fields | 冲突字段 |
|
||||||
|
| risk_notes | 风险和降级提示 |
|
||||||
|
| adapter_summary | doc/docx 适配器实际执行摘要 |
|
||||||
|
| template_config_version/hash | 模板配置版本和 hash |
|
||||||
|
| work_dir | 批次工作目录 |
|
||||||
|
| is_deleted | 软删除 |
|
||||||
|
|
||||||
|
### 3.2 RegulatoryInfoPackageArtifact
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageArtifact(models.Model):
|
||||||
|
class ArtifactType(models.TextChoices):
|
||||||
|
TEMPLATE_COPY = "template_copy", "模板副本"
|
||||||
|
INSTRUCTION_EXTRACT = "instruction_extract", "说明书抽取结果"
|
||||||
|
FIELD_EXTRACT_RESULT = "field_extract_result", "字段抽取结果"
|
||||||
|
MERGED_FIELDS = "merged_fields", "合并字段"
|
||||||
|
GENERATED_DOCUMENT = "generated_document", "生成文件"
|
||||||
|
TRACEABILITY = "traceability", "追溯清单"
|
||||||
|
ZIP_PACKAGE = "zip_package", "ZIP包"
|
||||||
|
NOTIFICATION_RECORD = "notification_record", "通知记录"
|
||||||
|
```
|
||||||
|
|
||||||
|
`file_format` 包含:`json`、`excel`、`docx`、`doc`、`zip`、`markdown`。
|
||||||
|
|
||||||
|
### 3.3 RegulatoryInfoPackageNotificationRecord
|
||||||
|
|
||||||
|
字段对齐自动填表通知记录:`batch`、`recipient`、`channel`、`export_ids`、`message_summary`、`send_status`、`retry_count`、`external_message_id`、`error_message`、`sent_at`、`is_deleted`。
|
||||||
|
|
||||||
|
### 3.4 ExportedSummaryFile 扩展
|
||||||
|
|
||||||
|
`ExportedSummaryFile.ExportType` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
ZIP = "zip", "ZIP"
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 MIME 按扩展名兜底:
|
||||||
|
|
||||||
|
| 条件 | MIME |
|
||||||
|
| --- | --- |
|
||||||
|
| zip | application/zip |
|
||||||
|
| .doc | application/msword |
|
||||||
|
| .docx | application/vnd.openxmlformats-officedocument.wordprocessingml.document |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、常量设计
|
||||||
|
|
||||||
|
### 4.1 工作流常量
|
||||||
|
|
||||||
|
```python
|
||||||
|
WORKFLOW_TYPE = "regulatory_info_package"
|
||||||
|
DEFAULT_ZIP_NAME = "第1章 监管信息(预生成版).zip"
|
||||||
|
|
||||||
|
REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "regulatory_info_package"),
|
||||||
|
("template_copy", "复制模板", "regulatory_info_package"),
|
||||||
|
("text_extract", "抽取说明书", "regulatory_info_package"),
|
||||||
|
("field_extract", "抽取字段", "regulatory_info_package"),
|
||||||
|
("field_merge", "合并字段", "regulatory_info_package"),
|
||||||
|
("generate_docs", "生成材料", "regulatory_info_package"),
|
||||||
|
("highlight_review_items", "标记待确认", "regulatory_info_package"),
|
||||||
|
("trace_export", "追溯清单", "regulatory_info_package"),
|
||||||
|
("zip_export", "打包下载", "regulatory_info_package"),
|
||||||
|
("notify", "通知", "regulatory_info_package"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 触发关键词
|
||||||
|
|
||||||
|
```python
|
||||||
|
REGULATORY_INFO_PACKAGE_TRIGGER_KEYWORDS = [
|
||||||
|
"根据说明书生成第1章监管信息",
|
||||||
|
"生成监管信息材料包",
|
||||||
|
"从说明书生成第1章材料",
|
||||||
|
"第1章监管信息",
|
||||||
|
"监管信息材料包",
|
||||||
|
]
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.3 文件状态
|
||||||
|
|
||||||
|
```python
|
||||||
|
GENERATED_FILE_SUCCESS = "success"
|
||||||
|
GENERATED_FILE_FALLBACK_SUCCESS = "fallback_success"
|
||||||
|
GENERATED_FILE_FAILED = "failed"
|
||||||
|
GENERATED_FILE_SKIPPED = "skipped"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、核心数据结构
|
||||||
|
|
||||||
|
### 5.1 TemplateSpec
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
output_name: str
|
||||||
|
source_file: str
|
||||||
|
file_format: str
|
||||||
|
strategy: str
|
||||||
|
include_in_zip: bool
|
||||||
|
prefer_legacy_doc_native: bool = False
|
||||||
|
allow_docx_fallback: bool = True
|
||||||
|
fields: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.2 InstructionExtractResult
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class InstructionExtractResult:
|
||||||
|
source_file_name: str
|
||||||
|
paragraphs: list[str]
|
||||||
|
sections: dict[str, str]
|
||||||
|
tables: list[list[list[str]]]
|
||||||
|
component_tables: list["ComponentTable"]
|
||||||
|
front_text: str
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.3 ProductListRow
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class ProductListRow:
|
||||||
|
package_specification: str
|
||||||
|
item_no: str
|
||||||
|
composition: str
|
||||||
|
component_name: str
|
||||||
|
main_component: str
|
||||||
|
quantity: str
|
||||||
|
source_table_title: str
|
||||||
|
needs_review_fields: list[str] = field(default_factory=list)
|
||||||
|
```
|
||||||
|
|
||||||
|
其中 `item_no` 对应货号,本期固定 `/` 并黄底。
|
||||||
|
|
||||||
|
### 5.4 MergedField
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
highlight_reason: str = "none"
|
||||||
|
needs_review: bool = False
|
||||||
|
rule_value: str = ""
|
||||||
|
llm_value: str = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5.5 GeneratedFileResult
|
||||||
|
|
||||||
|
```python
|
||||||
|
@dataclass
|
||||||
|
class GeneratedFileResult:
|
||||||
|
template_code: str
|
||||||
|
file_name: str
|
||||||
|
requested_format: str
|
||||||
|
actual_format: str
|
||||||
|
status: str
|
||||||
|
path: str = ""
|
||||||
|
artifact_id: int | None = None
|
||||||
|
export_id: int | None = None
|
||||||
|
highlight_count: int = 0
|
||||||
|
missing_count: int = 0
|
||||||
|
llm_only_count: int = 0
|
||||||
|
error_message: str = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、存储目录设计
|
||||||
|
|
||||||
|
```text
|
||||||
|
media/regulatory_info_package/{user_id}/{conversation_id}/{batch_no}/
|
||||||
|
templates/
|
||||||
|
logs/
|
||||||
|
instruction_extract.json
|
||||||
|
field_extract_result.json
|
||||||
|
merged_fields.json
|
||||||
|
doc_adapter_result.json
|
||||||
|
traceability.json
|
||||||
|
generated/
|
||||||
|
CH1.2 监管信息目录.docx
|
||||||
|
CH1.4 申请表.docx
|
||||||
|
CH1.5 产品列表.docx
|
||||||
|
CH1.9 产品申报前沟通的说明.docx
|
||||||
|
CH1.11.1 符合标准的清单.docx
|
||||||
|
CH1.11.5 真实性声明.docx
|
||||||
|
CH1.11.6 符合性声明.docx
|
||||||
|
exports/
|
||||||
|
traceability.xlsx
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
```
|
||||||
|
|
||||||
|
说明:
|
||||||
|
|
||||||
|
| 目录 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| templates | 模板副本 |
|
||||||
|
| logs | 后台 JSON 产物,不作为用户主下载 |
|
||||||
|
| generated | 生成成功或兜底成功的单文件 |
|
||||||
|
| exports | 用户可下载的追溯 Excel 和 zip |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、输入选择详细设计
|
||||||
|
|
||||||
|
### 7.1 选择优先级
|
||||||
|
|
||||||
|
`input_select.py` 的选择顺序:
|
||||||
|
|
||||||
|
1. 用户消息显式指定文件名时,按 active 附件名模糊匹配。
|
||||||
|
2. 当前对话 active 附件中文件名包含“说明书”的 `.docx`。
|
||||||
|
3. 当前对话 active 附件中唯一 `.docx`。
|
||||||
|
4. 最近成功 `FileSummaryBatch.items` 中包含“说明书”的 `.docx`。
|
||||||
|
5. 多候选或无候选时返回 `InputSelectionResult(status="waiting_user")`。
|
||||||
|
|
||||||
|
### 7.2 多候选处理
|
||||||
|
|
||||||
|
本期不新增在线选择弹窗。多候选时:
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 用户消息可模糊匹配唯一附件 | 直接选择 |
|
||||||
|
| 多个候选且无法确定 | 对话反问用户确认哪个说明书 |
|
||||||
|
| 无说明书 | 提示上传产品说明书 |
|
||||||
|
|
||||||
|
反问示例:
|
||||||
|
|
||||||
|
```text
|
||||||
|
我找到多个说明书候选,请回复要使用的文件名:A.docx、B.docx。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、模板配置详细设计
|
||||||
|
|
||||||
|
配置路径:
|
||||||
|
|
||||||
|
```text
|
||||||
|
review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
必须包含 7 个模板:
|
||||||
|
|
||||||
|
| code | source_file | strategy |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| ch1_2_directory | CH1.2 监管信息目录.docx | directory |
|
||||||
|
| ch1_4_application_form | CH1.4 申请表.docx | application_form |
|
||||||
|
| ch1_5_product_list | CH1.5 产品列表.docx | product_list |
|
||||||
|
| ch1_9_pre_submission | CH1.9 产品申报前沟通的说明.doc | pre_submission |
|
||||||
|
| ch1_11_1_standard_list | CH1.11.1 符合标准的清单.docx | standard_list |
|
||||||
|
| ch1_11_5_authenticity | CH1.11.5 真实性声明.docx | authenticity_statement |
|
||||||
|
| ch1_11_6_compliance | CH1.11.6 符合性声明.docx | compliance_statement |
|
||||||
|
|
||||||
|
校验规则:
|
||||||
|
|
||||||
|
| 校验 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| version 必填 | 写入批次 |
|
||||||
|
| source_dir 存在 | 指向样例目录 |
|
||||||
|
| code 唯一 | 防止覆盖产物 |
|
||||||
|
| source_file 存在 | 缺失则配置错误 |
|
||||||
|
| strategy 合法 | 必须命中生成策略 |
|
||||||
|
| doc 模板标记 | `.doc` 模板需声明 `prefer_legacy_doc_native`,并配置允许 `.docx` 兜底 |
|
||||||
|
|
||||||
|
### 8.1 模板字段化约定
|
||||||
|
|
||||||
|
为避免生成时破坏 Word 表格、复选框、字号、缩进和合并单元格,本工作流优先使用字段化模板:
|
||||||
|
|
||||||
|
| 方式 | 使用场景 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Word 内容控件 Tag | 正式模板优先 | 在 Word 中为产品名、申请人、复选框、日期、说明文字等填写区设置稳定 Tag,代码按 Tag 写入 |
|
||||||
|
| 稳定占位符 | 过渡方案 | 使用 `{{ product_name }}` 等不会影响版式的占位符,代码替换占位符所在 run |
|
||||||
|
| 行标签定位 | 兜底方案 | 仅用于未字段化的旧模板,必须保留原单元格、段落和 run 格式 |
|
||||||
|
|
||||||
|
模板配置中的字段目标优先级:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
targets:
|
||||||
|
- type: content_control_tag
|
||||||
|
tag: product_name
|
||||||
|
- type: placeholder
|
||||||
|
marker: "{{ product_name }}"
|
||||||
|
- type: table_row_label
|
||||||
|
label: 产品名称
|
||||||
|
```
|
||||||
|
|
||||||
|
模板加载时必须执行字段审计:关键字段缺少 Tag/占位符时给出清晰错误或降级说明;不得静默使用会破坏格式的整格重建策略。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、字段抽取详细设计
|
||||||
|
|
||||||
|
### 9.1 规则抽取
|
||||||
|
|
||||||
|
规则抽取必须独立可用,覆盖:
|
||||||
|
|
||||||
|
| 字段 | 规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| product_name | `【产品名称】` 下一段 |
|
||||||
|
| package_specification | `【包装规格】` 至下一章节 |
|
||||||
|
| intended_use | `【预期用途】` 至下一章节 |
|
||||||
|
| detection_principle | `【检测原理】` 至下一章节 |
|
||||||
|
| main_components | `【主要组成成分】` 下方表格摘要 |
|
||||||
|
| storage_condition_and_validity | `【储存条件及有效期】` 至下一章节 |
|
||||||
|
| sample_type | 样本要求章节中的“适用样本类型” |
|
||||||
|
| detection_targets | 预期用途/检测原理中的基因、病原体、靶标 |
|
||||||
|
| applicable_instruments | `【适用仪器】` 至下一章节 |
|
||||||
|
| test_method | `【检验方法】` 摘要 |
|
||||||
|
| standards | 正则抽取标准号 |
|
||||||
|
|
||||||
|
### 9.2 LLM 抽取与重试
|
||||||
|
|
||||||
|
`field_extract.py` 并行执行规则抽取和 LLM 抽取:
|
||||||
|
|
||||||
|
```text
|
||||||
|
ThreadPoolExecutor(max_workers=2)
|
||||||
|
-> rule_extract()
|
||||||
|
-> llm_extract_with_retry(max_attempts=3)
|
||||||
|
```
|
||||||
|
|
||||||
|
LLM 重试策略:
|
||||||
|
|
||||||
|
| 次数 | 间隔 |
|
||||||
|
| --- | --- |
|
||||||
|
| 第 1 次 | 立即 |
|
||||||
|
| 第 2 次 | 等待 1 秒 |
|
||||||
|
| 第 3 次 | 等待 2 秒 |
|
||||||
|
|
||||||
|
三次失败后:
|
||||||
|
|
||||||
|
| 产物 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| risk_notes | 增加 `llm_extract_failed` |
|
||||||
|
| logs/field_extract_result.json | 记录每次错误摘要 |
|
||||||
|
| 工作流 | 继续使用规则结果 |
|
||||||
|
|
||||||
|
LLM 不允许填企业信息、分类编码、管理类别、临床评价路径等说明书无法证明的内容。
|
||||||
|
|
||||||
|
### 9.3 字段合并
|
||||||
|
|
||||||
|
| 场景 | 写入值 | 高亮 | needs_review |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| rule 与 LLM 一致 | rule/LLM 值 | 否 | 否 |
|
||||||
|
| rule 与 LLM 冲突 | 规则优先或配置优先 | 黄底红字 | 是 |
|
||||||
|
| rule 缺失、LLM 命中 | LLM 值 | 黄底 | 是 |
|
||||||
|
| 全部缺失 | `/` | 黄底 | 是 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、文档适配器详细设计
|
||||||
|
|
||||||
|
### 10.1 统一接口
|
||||||
|
|
||||||
|
```python
|
||||||
|
class DocumentAdapter(Protocol):
|
||||||
|
def replace_text(self, old: str, new: str, *, highlight: bool = False, conflict: bool = False) -> int: ...
|
||||||
|
def fill_table_cell(self, row_label: str, value: str, *, highlight: bool = False, conflict: bool = False) -> bool: ...
|
||||||
|
def replace_table(self, marker: str, rows: list[ProductListRow], *, highlight_columns: list[str] | None = None) -> bool: ...
|
||||||
|
def save(self, path: Path) -> Path: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
高亮规则:
|
||||||
|
|
||||||
|
| 类型 | 视觉 |
|
||||||
|
| --- | --- |
|
||||||
|
| missing | 黄色底色 |
|
||||||
|
| llm_only | 黄色底色 |
|
||||||
|
| conflict | 黄色底色 + 红色字体 |
|
||||||
|
|
||||||
|
### 10.2 DocxDocumentAdapter
|
||||||
|
|
||||||
|
实现能力:
|
||||||
|
|
||||||
|
| 方法 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| replace_text | 支持段落与表格中的文本替换,需处理 run 拆分 |
|
||||||
|
| fill_content_control | 按内容控件 Tag 填写文本、日期或复选框 |
|
||||||
|
| replace_placeholder | 按稳定占位符替换文本,保留占位符所在 run/段落格式 |
|
||||||
|
| fill_table_cell | 按行标签定位目标单元格,仅作为未字段化模板的兜底 |
|
||||||
|
| replace_table | 重建 CH1.5 产品列表表格 |
|
||||||
|
| apply_highlight | 使用 `w:shd` 设置黄色底色 |
|
||||||
|
| apply_conflict_style | 黄色底色 + 红字 |
|
||||||
|
|
||||||
|
### 10.3 LegacyDocDocumentAdapter
|
||||||
|
|
||||||
|
接口:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class AdapterCapability:
|
||||||
|
adapter_name: str
|
||||||
|
supports_native_doc_write: bool
|
||||||
|
supports_docx_fallback: bool
|
||||||
|
status: str
|
||||||
|
error_message: str = ""
|
||||||
|
|
||||||
|
class LegacyDocDocumentAdapter:
|
||||||
|
@staticmethod
|
||||||
|
def detect_available_adapter() -> AdapterCapability: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
执行顺序:
|
||||||
|
|
||||||
|
1. 执行能力探测:Word COM、LibreOffice UNO 或其他可写 `.doc` 能力。
|
||||||
|
2. 有原生能力时优先尝试原生打开 `.doc` 并保存 `.doc`。
|
||||||
|
3. 无原生能力或原生失败时,尝试生成同语义 `.docx` 兜底文件,再交给 `DocxDocumentAdapter`。
|
||||||
|
4. 兜底成功时,输出 `CH1.9 产品申报前沟通的说明.docx`,状态为 `fallback_success`。
|
||||||
|
5. 原生和兜底均失败时,该文件状态为 `failed`,不进入 zip。
|
||||||
|
|
||||||
|
兜底成功 `adapter_summary.doc`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"requested_format": "doc",
|
||||||
|
"actual_format": "docx",
|
||||||
|
"adapter": "ConversionFallbackAdapter",
|
||||||
|
"status": "fallback_success"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、材料生成详细设计
|
||||||
|
|
||||||
|
### 11.1 generate_docs 节点并发
|
||||||
|
|
||||||
|
工作流节点仍串行执行,但 `generate_docs` 内部并发生成单文件:
|
||||||
|
|
||||||
|
```python
|
||||||
|
with ThreadPoolExecutor(max_workers=min(7, len(specs))) as executor:
|
||||||
|
futures = [executor.submit(generate_one_document, spec, context) for spec in specs]
|
||||||
|
```
|
||||||
|
|
||||||
|
并发注意事项:
|
||||||
|
|
||||||
|
| 注意事项 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 每个文档使用独立模板副本 | 避免并发写同一文件 |
|
||||||
|
| 共享字段只读 | `merged_fields`、`product_list_rows` 不在子线程修改 |
|
||||||
|
| 数据库写入集中处理 | 子线程返回 `GeneratedFileResult`,主线程统一写 artifact/export |
|
||||||
|
| 异常隔离 | 单文件失败不影响其他文件 |
|
||||||
|
|
||||||
|
### 11.2 7 个生成策略
|
||||||
|
|
||||||
|
| 模板 | 输出规则 |
|
||||||
|
| --- | --- |
|
||||||
|
| CH1.2 | 替换产品名;页码沿用样例 |
|
||||||
|
| CH1.4 | 填产品名、包装规格、预期用途、组成、储存有效期、方法原理;企业/分类等缺失项 `/` 黄底 |
|
||||||
|
| CH1.5 | 按样例表头重建,货号 `/` 黄底 |
|
||||||
|
| CH1.9 | 优先 `.doc` 原生写入;失败则 `.docx` 兜底;兜底失败则不输出 |
|
||||||
|
| CH1.11.1 | 说明书标准号直接写;知识库候选只作为待确认高亮/追溯 |
|
||||||
|
| CH1.11.5 | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
| CH1.11.6 | 保留正文,替换产品名,公司名 `/` 黄底,日期当天 |
|
||||||
|
|
||||||
|
### 11.3 产品名缺失
|
||||||
|
|
||||||
|
规则和 LLM 都抽不到产品名称时:
|
||||||
|
|
||||||
|
| 项 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件内容 | 产品名位置写 `/` 并黄底 |
|
||||||
|
| 批次状态 | 至少 `partial_success` |
|
||||||
|
| zip | 仍生成,包含成功文件 |
|
||||||
|
| 摘要 | 明确提示产品名称待确认 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、追溯与 zip 设计
|
||||||
|
|
||||||
|
### 12.1 追溯 Excel
|
||||||
|
|
||||||
|
用户可下载:
|
||||||
|
|
||||||
|
```text
|
||||||
|
exports/traceability.xlsx
|
||||||
|
```
|
||||||
|
|
||||||
|
创建导出记录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
export_category = traceability
|
||||||
|
export_type = excel
|
||||||
|
```
|
||||||
|
|
||||||
|
字段:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| target_file | 目标文件 |
|
||||||
|
| target_field | 目标字段 |
|
||||||
|
| final_value | 写入值 |
|
||||||
|
| extraction_source | rule、llm、missing、knowledge_candidate |
|
||||||
|
| evidence | 来源片段 |
|
||||||
|
| highlight_reason | missing、llm_only、conflict、rag_candidate |
|
||||||
|
| needs_review | 是否需复核 |
|
||||||
|
|
||||||
|
### 12.2 后台 JSON
|
||||||
|
|
||||||
|
JSON 产物仅写入 `logs/`,按需从后台查看:
|
||||||
|
|
||||||
|
```text
|
||||||
|
logs/instruction_extract.json
|
||||||
|
logs/field_extract_result.json
|
||||||
|
logs/merged_fields.json
|
||||||
|
logs/traceability.json
|
||||||
|
logs/doc_adapter_result.json
|
||||||
|
```
|
||||||
|
|
||||||
|
这些 JSON 产物写入 `RegulatoryInfoPackageArtifact`,但不作为用户主下载。
|
||||||
|
|
||||||
|
### 12.3 zip 打包
|
||||||
|
|
||||||
|
zip 文件名:
|
||||||
|
|
||||||
|
```text
|
||||||
|
第1章 监管信息(预生成版).zip
|
||||||
|
```
|
||||||
|
|
||||||
|
规则:
|
||||||
|
|
||||||
|
| 场景 | 是否进入 zip |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件状态 `success` | 是 |
|
||||||
|
| 文件状态 `fallback_success` | 是 |
|
||||||
|
| 文件状态 `failed` | 否 |
|
||||||
|
| 文件状态 `skipped` | 否 |
|
||||||
|
|
||||||
|
若 `CH1.9 .doc` 兜底 `.docx` 成功,zip 中放入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
CH1.9 产品申报前沟通的说明.docx
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、工作流详细设计
|
||||||
|
|
||||||
|
### 13.1 批次创建
|
||||||
|
|
||||||
|
```python
|
||||||
|
def create_regulatory_info_package_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
source_attachment: FileAttachment | None = None,
|
||||||
|
source_summary_batch: FileSummaryBatch | None = None,
|
||||||
|
source_summary_item_id: int | None = None,
|
||||||
|
) -> RegulatoryInfoPackageBatch:
|
||||||
|
```
|
||||||
|
|
||||||
|
创建后初始化 `REGULATORY_INFO_PACKAGE_NODE_DEFINITIONS`。
|
||||||
|
|
||||||
|
### 13.2 执行器
|
||||||
|
|
||||||
|
```python
|
||||||
|
class RegulatoryInfoPackageWorkflowExecutor:
|
||||||
|
def run(self) -> None: ...
|
||||||
|
def _nodes(self): ...
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None: ...
|
||||||
|
def _execute_node(self, node: WorkflowNodeRun) -> None: ...
|
||||||
|
```
|
||||||
|
|
||||||
|
节点执行:
|
||||||
|
|
||||||
|
| 节点 | 关键动作 |
|
||||||
|
| --- | --- |
|
||||||
|
| prepare | 确认说明书,或 waiting_user |
|
||||||
|
| template_copy | 复制 7 个模板 |
|
||||||
|
| template_audit | 审计模板字段 Tag/占位符,记录缺失和降级策略 |
|
||||||
|
| text_extract | 抽取说明书章节和表格 |
|
||||||
|
| field_extract | 规则 + LLM 并行抽取 |
|
||||||
|
| field_merge | 合并字段、高亮决策 |
|
||||||
|
| generate_docs | 多线程生成单文件 |
|
||||||
|
| highlight_review_items | 若生成策略已完成高亮,该节点记录确认结果即可 |
|
||||||
|
| trace_export | 写 Excel 和 logs JSON |
|
||||||
|
| zip_export | 打包成功/兜底成功文件 |
|
||||||
|
| notify | 写专项通知并调用统一通知 |
|
||||||
|
| completed | 写助手摘要 |
|
||||||
|
|
||||||
|
### 13.3 状态落定
|
||||||
|
|
||||||
|
| 条件 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 成功且 7 个文件均 success/fallback_success | success |
|
||||||
|
| zip 成功但有 failed/skipped | partial_success |
|
||||||
|
| zip 失败但至少一个单文件成功 | partial_success |
|
||||||
|
| 全部文件失败或关键输入缺失 | failed |
|
||||||
|
| 多说明书候选等待确认 | waiting_user |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、路由与接口详细设计
|
||||||
|
|
||||||
|
### 14.1 skill_router.py
|
||||||
|
|
||||||
|
增加:
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| ROUTE_ACTIONS | 加入 `regulatory_info_package` |
|
||||||
|
| SkillRoute 属性 | `starts_regulatory_info_package` |
|
||||||
|
| deterministic route | 命中触发关键词直接返回 |
|
||||||
|
| LLM prompt | action 列表加入 `regulatory_info_package` |
|
||||||
|
|
||||||
|
### 14.2 services.py
|
||||||
|
|
||||||
|
`stream_message` 增加分支:
|
||||||
|
|
||||||
|
1. 调用 `select_instruction_input(conversation, content)`。
|
||||||
|
2. 若多候选,回复反问,不启动工作流。
|
||||||
|
3. 若无候选,回复请上传说明书。
|
||||||
|
4. 若唯一候选,创建批次并启动工作流。
|
||||||
|
5. SSE 发送 `workflow_started`。
|
||||||
|
|
||||||
|
### 14.3 views.py
|
||||||
|
|
||||||
|
接口:
|
||||||
|
|
||||||
|
```text
|
||||||
|
GET /api/review-agent/regulatory-info-package/health/
|
||||||
|
POST /api/review-agent/regulatory-info-package/start/
|
||||||
|
GET /api/review-agent/regulatory-info-package/<batch_id>/status/
|
||||||
|
POST /api/review-agent/regulatory-info-package/<batch_id>/select-input/
|
||||||
|
```
|
||||||
|
|
||||||
|
`status` 返回:
|
||||||
|
|
||||||
|
| 字段 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| batch | 状态、产品名、缺失/LLM-only/冲突数量 |
|
||||||
|
| nodes | 节点状态 |
|
||||||
|
| generated_files | 7 个文件成功/失败/兜底状态 |
|
||||||
|
| exports | zip、单文件、Excel 下载 |
|
||||||
|
| risk_notes | 风险提示 |
|
||||||
|
| notifications | 通知 |
|
||||||
|
|
||||||
|
zip 不需要 `is_primary` 字段,前端或摘要按返回顺序把 zip 放首位。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、助手摘要设计
|
||||||
|
|
||||||
|
完成消息结构:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已生成第1章监管信息材料包。
|
||||||
|
|
||||||
|
批次号:RIP-...
|
||||||
|
产品名称:...
|
||||||
|
状态:success / partial_success
|
||||||
|
|
||||||
|
主下载:[第1章 监管信息(预生成版).zip](...)
|
||||||
|
|
||||||
|
| 文件 | 状态 | 下载/原因 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| CH1.2 监管信息目录.docx | 成功 | 下载 |
|
||||||
|
| CH1.9 产品申报前沟通的说明.docx | 兜底成功 | 下载 |
|
||||||
|
| CH1.11.1 符合标准的清单.docx | 失败 | 失败原因 |
|
||||||
|
|
||||||
|
待确认:缺失项 X 个,LLM复核项 Y 个,冲突项 Z 个。
|
||||||
|
```
|
||||||
|
|
||||||
|
要求:
|
||||||
|
|
||||||
|
| 要求 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| zip 首位 | zip 链接必须在单文件列表之前 |
|
||||||
|
| 失败可见 | 失败文件展示状态和原因,无下载链接 |
|
||||||
|
| 兜底提示 | `.doc -> .docx` 时显示“兜底成功” |
|
||||||
|
| 待确认摘要 | 展示 missing、llm_only、conflict 数量 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、前端详细设计
|
||||||
|
|
||||||
|
### 16.1 模板
|
||||||
|
|
||||||
|
`templates/home.html` 增加工具 chip:
|
||||||
|
|
||||||
|
```html
|
||||||
|
<button
|
||||||
|
class="tool-chip"
|
||||||
|
type="button"
|
||||||
|
data-prompt-template="根据说明书生成第1章监管信息"
|
||||||
|
>第1章监管信息</button>
|
||||||
|
```
|
||||||
|
|
||||||
|
`summaryPanel` 增加:
|
||||||
|
|
||||||
|
```html
|
||||||
|
data-regulatory-info-package-status-url-template="/api/review-agent/regulatory-info-package/__batch_id__/status/"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 16.2 app.js
|
||||||
|
|
||||||
|
增加:
|
||||||
|
|
||||||
|
| 位置 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| workflow type 判断 | 支持 `regulatory_info_package` |
|
||||||
|
| 状态 URL 选择 | 使用 `data-regulatory-info-package-status-url-template` |
|
||||||
|
| 终态判断 | success、partial_success、failed、waiting_user |
|
||||||
|
| 导出展示 | 直接按 exports 返回顺序展示,zip 在后端排首位 |
|
||||||
|
|
||||||
|
### 16.3 不做选择 UI
|
||||||
|
|
||||||
|
多说明书候选时,本期不做弹窗。通过对话反问用户确认文件名。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、导出下载权限
|
||||||
|
|
||||||
|
`file_summary.views._export_for_user` 增加:
|
||||||
|
|
||||||
|
```python
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
allowed = RegulatoryInfoPackageBatch.objects.filter(
|
||||||
|
pk=exported.workflow_batch_id,
|
||||||
|
conversation__user=user,
|
||||||
|
is_deleted=False,
|
||||||
|
).exists()
|
||||||
|
return exported if allowed else None
|
||||||
|
```
|
||||||
|
|
||||||
|
下载 content type 增加 zip 和 `.doc` 后缀判断。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、通知详细设计
|
||||||
|
|
||||||
|
`notifier.py`:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def notify_completion(batch: RegulatoryInfoPackageBatch, exports: list[ExportedSummaryFile]) -> RegulatoryInfoPackageNotificationRecord:
|
||||||
|
```
|
||||||
|
|
||||||
|
处理:
|
||||||
|
|
||||||
|
| 步骤 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 创建专项通知记录 | 写 `RegulatoryInfoPackageNotificationRecord` |
|
||||||
|
| 调用统一通知 | `dispatch_workflow_notification(build_regulatory_info_package_context(batch))` |
|
||||||
|
| 捕获异常 | 通知失败写记录和 risk_notes,不影响批次下载 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、测试详细设计
|
||||||
|
|
||||||
|
| 测试文件 | 覆盖 |
|
||||||
|
| --- | --- |
|
||||||
|
| test_regulatory_info_package_models.py | 三张表、zip export type、基础关联 |
|
||||||
|
| test_regulatory_info_package_trigger.py | 固定关键词与 LLM action |
|
||||||
|
| test_regulatory_info_package_input_select.py | 文件名模糊匹配、active 附件、多候选反问 |
|
||||||
|
| test_regulatory_info_package_template_config.py | YAML 加载、模板缺失、code 唯一 |
|
||||||
|
| test_regulatory_info_package_instruction_extract.py | 说明书章节和组成表抽取 |
|
||||||
|
| test_regulatory_info_package_field_extract.py | 规则抽取、LLM 三次重试、失败降级 |
|
||||||
|
| test_regulatory_info_package_field_merge.py | missing、llm_only、conflict |
|
||||||
|
| test_regulatory_info_package_docx_writer.py | 替换、表格填充、黄底、红字 |
|
||||||
|
| test_regulatory_info_package_legacy_doc.py | adapter 探测、docx 兜底、失败状态 |
|
||||||
|
| test_regulatory_info_package_package_generate.py | 7 文件生成结果、多线程异常隔离 |
|
||||||
|
| test_regulatory_info_package_traceability.py | Excel 追溯和 logs JSON |
|
||||||
|
| test_regulatory_info_package_zip.py | zip 只包含 success/fallback_success |
|
||||||
|
| test_regulatory_info_package_workflow.py | 节点流转、partial_success、waiting_user |
|
||||||
|
| test_regulatory_info_package_views.py | start/status/download 权限 |
|
||||||
|
| test_regulatory_info_package_frontend.py | chip、卡片、状态 URL |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、异常处理矩阵
|
||||||
|
|
||||||
|
| 异常 | 批次状态 | 处理 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 无说明书 | waiting_user 或不创建批次 | 提示上传说明书 |
|
||||||
|
| 多候选无法匹配 | waiting_user 或不创建批次 | 反问确认文件名 |
|
||||||
|
| 模板缺失 | failed | 列出缺失模板 |
|
||||||
|
| 规则抽取失败 | partial_success/continue | 使用 LLM 结果 |
|
||||||
|
| LLM 三次失败 | continue | 使用规则结果,写 risk_notes |
|
||||||
|
| 产品名缺失 | partial_success | 写 `/` 黄底,继续生成 zip |
|
||||||
|
| 单个 docx 文件生成失败 | partial_success | 不进入 zip,摘要展示失败 |
|
||||||
|
| CH1.9 doc 原生失败但 docx 兜底成功 | success/partial_success | 状态 fallback_success,进入 zip |
|
||||||
|
| CH1.9 doc 和 docx 兜底均失败 | partial_success | 不进入 zip,摘要展示失败 |
|
||||||
|
| traceability.xlsx 失败 | partial_success | 不阻断 zip |
|
||||||
|
| zip 失败 | partial_success | 保留单文件下载 |
|
||||||
|
| 通知失败 | 不影响主状态 | 写通知失败和 risk_notes |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十一、设计结论
|
||||||
|
|
||||||
|
| 编号 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| D1 | 详细设计文档路径为 `docs/4.详细设计/5.第1章监管信息材料包生成.md` |
|
||||||
|
| D2 | 模型集中在 `review_agent/models.py`,业务模块为 `review_agent/regulatory_info_package/` |
|
||||||
|
| D3 | `.doc` 采用能力驱动策略:探测 Word COM/UNO 等原生能力,有能力时优先原生处理 |
|
||||||
|
| D4 | `.doc` 无原生能力或原生失败时允许 `.docx` 兜底;兜底文件名为 `CH1.9 产品申报前沟通的说明.docx` |
|
||||||
|
| D5 | zip 只包含成功或兜底成功文件,失败文件不进入 zip |
|
||||||
|
| D6 | LLM 最多重试 3 次,失败后使用规则结果继续 |
|
||||||
|
| D7 | 缺失和 LLM-only 黄底,冲突黄底红字 |
|
||||||
|
| D8 | 产品列表使用 `ProductListRow`,货号固定 `/` 黄底 |
|
||||||
|
| D9 | 标准清单只复用现有知识库能力,不新增独立 RAG 流程 |
|
||||||
|
| D10 | 前端最小接入,不做说明书选择弹窗 |
|
||||||
|
| D11 | 追溯 Excel 可下载,JSON 只放后台 logs |
|
||||||
|
| D12 | 本期不新增字段级数据库表 |
|
||||||
|
| D13 | 工作流串行,文档生成节点内部可多线程 |
|
||||||
|
| D14 | 模板优先字段化,正式填充路径使用内容控件 Tag 或稳定占位符,行标签定位仅作为兜底 |
|
||||||
|
| D15 | 本轮只产出详细设计,不写代码、不生成迁移 |
|
||||||
74
docs/5.开发计划/1.自动汇总-前端线框图.md
Normal file
74
docs/5.开发计划/1.自动汇总-前端线框图.md
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
# 自动汇总前端线框图
|
||||||
|
|
||||||
|
## 评审目标
|
||||||
|
|
||||||
|
在实现三栏页面前,先确认审核智能体工作台的信息架构、右侧文件汇总面板、工作流状态展示和移动端降级方式。
|
||||||
|
|
||||||
|
## 桌面端布局
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart LR
|
||||||
|
A["左栏:会话列表<br/>新对话 / 搜索 / 历史会话"] --> B["中栏:聊天区<br/>顶部导航 / 消息流 / 输入框"]
|
||||||
|
B --> C["右栏:文件汇总面板"]
|
||||||
|
C --> C1["上半区:上传区<br/>拖拽上传 / 选择文件 / 上传状态"]
|
||||||
|
C --> C2["中段:当前对话附件<br/>文件名 / 版本 / 大小 / 状态 / 删除"]
|
||||||
|
C --> C3["下半区:工作流卡片<br/>批次号 / 节点进度 / 下载入口"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 右侧面板结构
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TB
|
||||||
|
P["文件汇总面板"] --> U["上传拖拽区"]
|
||||||
|
U --> U0["无附件:提示上传文件或压缩包"]
|
||||||
|
U --> U1["上传中:显示文件名和处理中状态"]
|
||||||
|
U --> U2["上传失败:展示错误并允许重试"]
|
||||||
|
P --> L["附件列表"]
|
||||||
|
L --> L1["active 版本优先展示"]
|
||||||
|
L --> L2["历史版本保留展示"]
|
||||||
|
L --> L3["逻辑删除后从默认候选移除"]
|
||||||
|
P --> W["工作流卡片列表"]
|
||||||
|
W --> W1["运行中:节点逐项更新"]
|
||||||
|
W --> W2["成功:展示 Markdown/Excel 下载"]
|
||||||
|
W --> W3["失败:展示失败节点和错误说明"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 工作流状态流转
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
stateDiagram-v2
|
||||||
|
[*] --> Pending: 用户上传附件
|
||||||
|
Pending --> Running: 发送自动汇总提示词
|
||||||
|
Running --> Extracting: 固化附件
|
||||||
|
Extracting --> Scanning: 解压完成或跳过
|
||||||
|
Scanning --> Counting: 生成文件清单
|
||||||
|
Counting --> Detecting: 页数统计完成
|
||||||
|
Detecting --> Reporting: 产品名识别完成
|
||||||
|
Reporting --> Success: 生成报告与下载
|
||||||
|
Running --> Failed: 批次级异常
|
||||||
|
Extracting --> Failed: 解压安全检查失败
|
||||||
|
Reporting --> Failed: 报告生成失败
|
||||||
|
Success --> Restored: 刷新页面后状态恢复
|
||||||
|
Failed --> Restored: 刷新页面后状态恢复
|
||||||
|
```
|
||||||
|
|
||||||
|
## 移动端布局
|
||||||
|
|
||||||
|
```mermaid
|
||||||
|
flowchart TB
|
||||||
|
M["移动端工作台"] --> T["顶部:侧栏按钮 / 当前页面 / 用户菜单"]
|
||||||
|
T --> Chat["聊天区优先展示"]
|
||||||
|
Chat --> Composer["底部输入框"]
|
||||||
|
T --> Drawer["会话侧栏抽屉"]
|
||||||
|
Chat --> Panel["文件汇总面板下移或折叠"]
|
||||||
|
Panel --> Upload["上传区"]
|
||||||
|
Panel --> Workflow["工作流卡片"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## 关键评审点
|
||||||
|
|
||||||
|
- 桌面端保持左侧会话、中间聊天、右侧文件汇总三栏,不改变现有聊天主路径。
|
||||||
|
- 右侧面板上半部分用于上传和附件列表,下半部分用于批次工作流卡片。
|
||||||
|
- 工作流卡片节点顺序固定为:附件固化、压缩包解压、文件扫描、页数统计、产品识别、报告输出、完成。
|
||||||
|
- 助手消息中的文件汇总结果使用安全 Markdown 渲染,用户消息仍按纯文本转义。
|
||||||
|
- 移动端优先保证聊天可用,文件汇总面板折叠或下移,不能遮挡输入框。
|
||||||
415
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md
Normal file
415
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md
Normal file
@@ -0,0 +1,415 @@
|
|||||||
|
# NMPA 注册资料法规核查与整改闭环开发计划(第一批:主链路)
|
||||||
|
|
||||||
|
## 一、已确认口径
|
||||||
|
|
||||||
|
| 问题 | 结论 |
|
||||||
|
| --- | --- |
|
||||||
|
| 第二阶段覆盖范围 | 覆盖原始需求 2、4、5:法规完整性核查、章节/一致性核查、风险预警与整改建议 |
|
||||||
|
| 原始需求 3 | 本阶段只做核查所需的信息抽取,不做自动填写目标文件 |
|
||||||
|
| 执行策略 | 第二阶段拆成两次 Codex 目标执行;第一批先打通 Demo 主链路 |
|
||||||
|
| 启动方式 | 用户对话提示词触发法规核查工作流,不做上传后自动核查 |
|
||||||
|
| 汇总批次 | 默认复用当前对话最近一次成功 `FileSummaryBatch`,不自动串联文件汇总 |
|
||||||
|
| 规则来源 | Demo 先用本地 YAML;数据库记录规则版本、路径、hash、RAG 索引信息 |
|
||||||
|
| 规则差异 | 自动检测 YAML 与数据库记录差异,提示人工确认更新;第一批不做规则管理前端 |
|
||||||
|
| RAG | 必须使用向量库;默认 ChromaDB |
|
||||||
|
| Embedding | Provider 可配置;Demo 默认 SiliconFlow `Qwen/Qwen3-Embedding-4B` |
|
||||||
|
| 法规材料 | 先索引 `docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告` |
|
||||||
|
| 法规文档抽取 | 允许使用 LibreOffice headless 转换本地法规 `.doc` 材料;该依赖只服务 RAG 建库,不改变第一阶段页数统计口径 |
|
||||||
|
| ChromaDB 运行方式 | 第一批采用本地持久化模式,不单独启动 Chroma Server |
|
||||||
|
| 飞书 | 第一批不接真实飞书;暂缓项写入待办计划 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、第一批目标
|
||||||
|
|
||||||
|
第一批只追求“可运行、可演示、可追溯”的法规核查主链路:
|
||||||
|
|
||||||
|
```text
|
||||||
|
已有文件汇总批次
|
||||||
|
-> 用户提示词触发法规核查
|
||||||
|
-> 读取本地 YAML 规则
|
||||||
|
-> 检查规则版本和 RAG 索引状态
|
||||||
|
-> 使用 ChromaDB 检索法规依据
|
||||||
|
-> 完整性核查
|
||||||
|
-> 基础章节核查
|
||||||
|
-> 基础一致性核查
|
||||||
|
-> 风险分级和整改建议
|
||||||
|
-> 生成对话摘要、Markdown 报告、Excel 清单、JSON 结果包
|
||||||
|
-> 前端展示法规核查工作流卡片
|
||||||
|
```
|
||||||
|
|
||||||
|
第一批完成后,Demo 应能展示:
|
||||||
|
|
||||||
|
| 展示项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 法规依据 | RAG 返回本地法规材料来源和片段 |
|
||||||
|
| 完整性问题 | 如缺少注册检验报告、临床评价资料等 |
|
||||||
|
| 章节问题 | 如说明书缺少储存条件、有效期、样本要求等章节 |
|
||||||
|
| 一致性问题 | 如产品名称、型号规格、预期用途在不同文件中不一致 |
|
||||||
|
| 风险清单 | blocking/high/medium/low/info 五级 |
|
||||||
|
| 报告下载 | Markdown、Excel、JSON |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、阶段拆分
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| RR1-0 | 准备与回归 | 确认第一阶段稳定,创建开发分支 | `pytest` 通过 |
|
||||||
|
| RR1-1 | 模型与兼容改造 | 新增法规核查模型,兼容工作流/导出通用字段 | migration 和模型测试通过 |
|
||||||
|
| RR1-2 | YAML 规则与版本记录 | 建立 Demo 规则文件、规则版本表、hash 差异检测 | 能识别 YAML 与 DB 差异 |
|
||||||
|
| RR1-3 | RAG 索引与检索 | 用 ChromaDB + SiliconFlow embedding 构建本地法规索引 | 能检索法规依据 |
|
||||||
|
| RR1-4 | 触发与工作流骨架 | 对话提示词触发法规核查,复用最近成功汇总批次 | 能创建并运行法规核查批次 |
|
||||||
|
| RR1-5 | 核查服务 | 完整性、基础章节、基础一致性核查 | 生成 findings |
|
||||||
|
| RR1-6 | 风险与导出 | 风险归并、Issue 落库、报告导出 | 生成助手摘要和下载文件 |
|
||||||
|
| RR1-7 | 前端与验收 | 法规核查卡片、状态恢复、Markdown 结果展示 | 全量测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、RR1-0 准备与回归
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| RR1-0-001 | 从当前稳定分支创建 `codex/YYYYMMDD-NMPA法规核查主链路` |
|
||||||
|
| RR1-0-002 | 运行 `python manage.py check`、`pytest` |
|
||||||
|
| RR1-0-003 | 记录第一阶段边界:文件夹上传不作为强验收、RAR 依赖 7z、Office 页数口径可不精确 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
git status --short
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请创建第二阶段第一批开发分支,先确认第一阶段文件汇总功能全量测试通过。本阶段不要修改业务代码,只做环境和边界确认。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、RR1-1 模型与兼容改造
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-1-001 | 新增法规核查模型和枚举 | `review_agent/models.py` |
|
||||||
|
| RR1-1-002 | 给 `WorkflowNodeRun` 增加 `workflow_type`、`workflow_batch_id`、`node_group` | `review_agent/models.py` |
|
||||||
|
| RR1-1-003 | 给 `WorkflowEvent` 增加 `workflow_type`、`workflow_batch_id`、`conversation_id` | `review_agent/models.py` |
|
||||||
|
| RR1-1-004 | 给 `ExportedSummaryFile` 增加 `workflow_type`、`workflow_batch_id`、`export_category` | `review_agent/models.py` |
|
||||||
|
| RR1-1-005 | 保持第一阶段文件汇总写入兼容 | `review_agent/file_summary/*` |
|
||||||
|
| RR1-1-006 | 生成 migration 并补模型测试 | `review_agent/migrations/`、`tests/test_regulatory_models.py` |
|
||||||
|
|
||||||
|
### 新增模型
|
||||||
|
|
||||||
|
| 模型 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| `RegulatoryRuleVersion` | 规则版本、YAML 路径、文件 hash、RAG 索引版本 |
|
||||||
|
| `RegulatoryReviewBatch` | 法规核查批次 |
|
||||||
|
| `RegulatoryIssue` | 风险问题和整改状态 |
|
||||||
|
| `RegulatoryArtifact` | 过程产物 |
|
||||||
|
| `RegulatoryNotificationRecord` | mock 通知预留记录,第一批可只建表不接真实通知 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py makemigrations review_agent
|
||||||
|
python manage.py migrate
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_models.py tests/test_file_summary_workflow.py tests/test_file_summary_views.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请新增法规核查相关模型,并轻量通用化现有工作流节点、事件和导出文件表。必须保持第一阶段文件汇总测试通过,不要重写第一阶段工作流。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、RR1-2 YAML 规则与版本记录
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-2-001 | 新建法规核查模块目录 | `review_agent/regulatory_review/` |
|
||||||
|
| RR1-2-002 | 编写 Demo YAML 规则 | `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml` |
|
||||||
|
| RR1-2-003 | 实现规则 hash 计算和版本记录 | `services/rule_loader.py` |
|
||||||
|
| RR1-2-004 | 实现 YAML 与 DB 差异检测 | `services/rule_loader.py` |
|
||||||
|
| RR1-2-005 | 增加规则版本初始化/检查管理命令 | `management/commands/regulatory_rules_check.py` |
|
||||||
|
| RR1-2-006 | 增加测试 | `tests/test_regulatory_rule_loader.py` |
|
||||||
|
|
||||||
|
### Demo 规则至少覆盖
|
||||||
|
|
||||||
|
| 文件项 | 类型 | 风险 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 产品技术要求 | required | blocking |
|
||||||
|
| 说明书 | required | high |
|
||||||
|
| 注册检验报告 | required | blocking |
|
||||||
|
| 临床评价资料 | conditional | high |
|
||||||
|
| 安全和性能基本原则清单 | recommended | medium |
|
||||||
|
|
||||||
|
YAML 规则内容需参考本地法规资料目录:
|
||||||
|
|
||||||
|
```text
|
||||||
|
docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
```
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_rule_loader.py
|
||||||
|
python manage.py regulatory_rules_check
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请建立 Demo 版 NMPA IVD 注册资料 YAML 规则库,并实现规则版本、文件 hash 和数据库记录差异检测。发现 YAML 与 DB hash 不一致时只提示需要更新,不自动覆盖。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、RR1-3 RAG 索引与检索
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-3-001 | 增加依赖 `chromadb` 和必要 HTTP 客户端 | `requirements.txt` |
|
||||||
|
| RR1-3-002 | 实现 embedding provider 抽象 | `services/rag_embedding.py` |
|
||||||
|
| RR1-3-003 | 实现 SiliconFlow embedding provider | `services/rag_embedding.py` |
|
||||||
|
| RR1-3-004 | 实现法规文档文本抽取和切块 | `services/rag_index.py` |
|
||||||
|
| RR1-3-005 | 实现 ChromaDB 持久化索引构建命令 | `management/commands/regulatory_rag_build.py` |
|
||||||
|
| RR1-3-006 | 实现 RAG 引用检索服务 | `services/rag_citation.py` |
|
||||||
|
| RR1-3-007 | 增加测试 | `tests/test_regulatory_rag.py` |
|
||||||
|
|
||||||
|
### 配置
|
||||||
|
|
||||||
|
| 配置项 | 默认 |
|
||||||
|
| --- | --- |
|
||||||
|
| `REGULATORY_RAG_PROVIDER` | `siliconflow` |
|
||||||
|
| `REGULATORY_RAG_CHROMA_PATH` | `media/regulatory_review/rag/chroma/` |
|
||||||
|
| `SILICONFLOW_BASE_URL` | `https://api.siliconflow.cn/v1` |
|
||||||
|
| `SILICONFLOW_API_KEY` | 从环境变量读取 |
|
||||||
|
| `SILICONFLOW_EMBEDDING_MODEL` | `Qwen/Qwen3-Embedding-4B` |
|
||||||
|
| `SILICONFLOW_EMBEDDING_DIMENSIONS` | `1024` |
|
||||||
|
| `REGULATORY_RAG_COLLECTION` | `nmpa_ivd_registration_v1` |
|
||||||
|
|
||||||
|
SiliconFlow Embedding API 参考:
|
||||||
|
|
||||||
|
```text
|
||||||
|
https://docs.siliconflow.com/en/api-reference/embeddings/create-embeddings
|
||||||
|
```
|
||||||
|
|
||||||
|
### 规则
|
||||||
|
|
||||||
|
| 场景 | 处理 |
|
||||||
|
| --- | --- |
|
||||||
|
| RAG 索引不存在 | 核查时提示先构建索引,不在核查中临时构建 |
|
||||||
|
| Embedding API 不可用 | 构建命令失败,核查不启动 |
|
||||||
|
| RAG 无命中 | 规则问题仍输出,法规依据标记“原文依据待补充” |
|
||||||
|
| 本地法规 `.doc` 无法直接抽取 | 允许通过 LibreOffice headless 转换后抽取;Docker 部署说明需写明可选安装方式 |
|
||||||
|
| ChromaDB 存储 | 使用本地持久化目录,Docker 部署时通过 volume 挂载保留索引 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py regulatory_rag_build
|
||||||
|
pytest tests/test_regulatory_rag.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现基于 ChromaDB 的本地法规 RAG。Embedding Provider 要可配置,Demo 默认使用 SiliconFlow Qwen/Qwen3-Embedding-4B。ChromaDB 使用本地持久化目录,不单独启动服务。法规 `.doc` 材料允许用 LibreOffice headless 转换后抽取。核查流程只检查索引可用性,不临时构建索引。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、RR1-4 触发与工作流骨架
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-4-001 | 实现法规核查提示词路由 | `review_agent/skill_router.py` |
|
||||||
|
| RR1-4-002 | 实现法规核查批次创建 | `regulatory_review/workflow.py` |
|
||||||
|
| RR1-4-003 | 默认查找当前对话最近成功 `FileSummaryBatch` | `workflow.py` |
|
||||||
|
| RR1-4-004 | 无成功汇总批次时提示用户先执行自动汇总 | `services.py` |
|
||||||
|
| RR1-4-005 | 实现启动、状态、事件接口 | `regulatory_review/views.py`、`urls.py` |
|
||||||
|
| RR1-4-006 | 接入项目 URL | `config/urls.py` 或 `review_agent/urls.py` |
|
||||||
|
| RR1-4-007 | 增加测试 | `tests/test_regulatory_workflow.py`、`tests/test_regulatory_views.py` |
|
||||||
|
|
||||||
|
### 第一批节点
|
||||||
|
|
||||||
|
```text
|
||||||
|
prepare
|
||||||
|
-> rule_scope
|
||||||
|
-> completeness_check
|
||||||
|
-> text_extract
|
||||||
|
-> structure_check
|
||||||
|
-> consistency_check
|
||||||
|
-> risk_assess
|
||||||
|
-> report_export
|
||||||
|
-> completed
|
||||||
|
```
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_workflow.py tests/test_regulatory_views.py
|
||||||
|
pytest tests/test_file_summary_trigger.py tests/test_llm_streaming.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现法规核查提示词触发和工作流骨架。用户说“法规核查、NMPA核查、完整性核查、风险预警”等意图时启动 regulatory_review;默认复用当前对话最近成功 FileSummaryBatch;没有成功汇总批次时提示先自动汇总。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、RR1-5 核查服务
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-5-001 | 实现统一 Finding dataclass | `regulatory_review/schemas.py` |
|
||||||
|
| RR1-5-002 | 完整性核查:文件名、目录名、首页文本匹配 | `services/completeness_check.py` |
|
||||||
|
| RR1-5-003 | 文本抽取:docx/pdf/xlsx/pptx/txt/md 基础文本 | `services/text_extract.py` |
|
||||||
|
| RR1-5-004 | 基础章节核查:按规则关键词判断章节是否存在 | `services/structure_check.py` |
|
||||||
|
| RR1-5-005 | 基础一致性核查:产品名称、型号规格、预期用途 | `services/consistency_check.py` |
|
||||||
|
| RR1-5-006 | 过程产物保存和 hash | `storage.py` |
|
||||||
|
| RR1-5-007 | 增加测试 | `tests/test_regulatory_completeness.py`、`tests/test_regulatory_text_extract.py`、`tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py` |
|
||||||
|
|
||||||
|
### Demo 验收样例
|
||||||
|
|
||||||
|
测试或演示资料中至少构造:
|
||||||
|
|
||||||
|
| 条件 | 预期 |
|
||||||
|
| --- | --- |
|
||||||
|
| 有说明书 | 可匹配说明书规则 |
|
||||||
|
| 有产品技术要求 | 可匹配产品技术要求规则 |
|
||||||
|
| 缺少注册检验报告 | 生成 blocking 问题 |
|
||||||
|
| 说明书缺少储存条件章节 | 生成 high 或 medium 问题 |
|
||||||
|
| 产品名称在两个文件中不一致 | 生成 consistency 问题 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_completeness.py tests/test_regulatory_text_extract.py tests/test_regulatory_structure.py tests/test_regulatory_consistency.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现完整性核查、文本抽取、基础章节核查和基础一致性核查。所有核查服务只返回 Finding,不直接创建 RegulatoryIssue。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、RR1-6 风险与导出
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-6-001 | Findings 去重和风险归并 | `services/risk_assess.py` |
|
||||||
|
| RR1-6-002 | RAG 引用挂载到问题证据 | `services/risk_assess.py`、`services/rag_citation.py` |
|
||||||
|
| RR1-6-003 | 创建 `RegulatoryIssue` | `services/risk_assess.py` |
|
||||||
|
| RR1-6-004 | 生成 Markdown 核查报告 | `services/export.py` |
|
||||||
|
| RR1-6-005 | 生成 Excel 缺失清单 | `services/export.py` |
|
||||||
|
| RR1-6-006 | 生成 JSON 结果包 | `services/export.py` |
|
||||||
|
| RR1-6-007 | 工作流完成后写入助手消息 | `workflow.py` |
|
||||||
|
| RR1-6-008 | 增加测试 | `tests/test_regulatory_risk_assess.py`、`tests/test_regulatory_export.py` |
|
||||||
|
|
||||||
|
### 对话摘要
|
||||||
|
|
||||||
|
助手消息至少包含:
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
已完成 NMPA 注册资料法规核查。
|
||||||
|
|
||||||
|
| 风险等级 | 数量 |
|
||||||
|
| --- | --- |
|
||||||
|
| 阻断项 | 1 |
|
||||||
|
| 高风险 | 1 |
|
||||||
|
|
||||||
|
| 等级 | 问题 | 状态 | 建议 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| 阻断项 | 缺少注册检验报告 | 待处理 | 请补充注册检验报告并复核 |
|
||||||
|
|
||||||
|
[下载 Markdown 核查报告](...)
|
||||||
|
[下载 Excel 缺失清单](...)
|
||||||
|
[下载 JSON 结果包](...)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_risk_assess.py tests/test_regulatory_export.py tests/test_regulatory_workflow.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现风险归并、RAG 法规依据挂载、Issue 落库和最终报告导出。工作流完成后必须向当前对话写入 Markdown 摘要和下载链接。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、RR1-7 前端与总体验收
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR1-7-001 | 工作流卡片支持 `regulatory_review` 类型 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
| RR1-7-002 | 卡片使用 `workflow_type + workflow_batch_id` 区分 | `static/js/app.js` |
|
||||||
|
| RR1-7-003 | 显示法规核查节点和风险摘要 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
| RR1-7-004 | 页面刷新恢复法规核查卡片 | `views.py`、`static/js/app.js` |
|
||||||
|
| RR1-7-005 | 补前端测试 | `tests/test_regulatory_frontend.py` |
|
||||||
|
| RR1-7-006 | 全量回归 | 全项目 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
如浏览器可用,再运行 Playwright 端到端验证。
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请在现有工作流卡片轮播基础上支持 regulatory_review 类型,展示法规核查节点、风险摘要和完成状态。最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、第一批 Codex 目标模式提示词
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md 执行第二阶段第一批开发。
|
||||||
|
|
||||||
|
目标:
|
||||||
|
完成 NMPA 法规核查主链路,复用当前对话最近成功 FileSummaryBatch,通过用户提示词触发 regulatory_review 工作流,实现 YAML 规则、ChromaDB + SiliconFlow Embedding RAG、完整性核查、基础章节核查、基础一致性核查、风险分级、Markdown/Excel/JSON 报告和前端法规核查卡片。
|
||||||
|
|
||||||
|
执行规则:
|
||||||
|
1. 创建 codex/YYYYMMDD-NMPA法规核查主链路 分支。
|
||||||
|
2. 按 RR1-0 到 RR1-7 顺序执行,不跳阶段。
|
||||||
|
3. 每阶段完成后运行对应验证命令。
|
||||||
|
4. 第一阶段文件汇总测试不得回归。
|
||||||
|
5. 不自动串联文件汇总;没有成功汇总批次时提示用户先自动汇总。
|
||||||
|
6. 不接真实飞书,不做规则管理前端,不做自动填写目标文件。
|
||||||
|
7. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
304
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md
Normal file
304
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
# NMPA 注册资料法规核查与整改闭环开发计划(第二批:完整闭环补齐)
|
||||||
|
|
||||||
|
## 一、第二批目标
|
||||||
|
|
||||||
|
第二批在第一批主链路通过后执行,补齐完整整改闭环和交互能力:
|
||||||
|
|
||||||
|
```text
|
||||||
|
适用条件对话选择框
|
||||||
|
-> waiting_user 暂停恢复
|
||||||
|
-> 附件 4 申报资料目录规则对齐
|
||||||
|
-> 整包复核
|
||||||
|
-> 缺失项复核
|
||||||
|
-> mock 通知留痕
|
||||||
|
-> 更完整的过程产物
|
||||||
|
-> 更强的前端交互和验收测试
|
||||||
|
```
|
||||||
|
|
||||||
|
飞书真实 CLI/API、规则管理前端、自动填写目标文件不在第二批落地,进入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 |
|
||||||
|
| RR2-2 | 附件 4 规则对齐与核查能力增强 | 按《体外诊断试剂注册申报资料要求及说明》扩展完整目录规则、章节、一致性、RAG 引用和文本抽取范围 | 能识别附件 4 一级/二级目录缺失和关键字段问题 |
|
||||||
|
| RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 |
|
||||||
|
| RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record |
|
||||||
|
| RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 |
|
||||||
|
| RR2-6 | 前端和总体验收 | 条件选择框、复核入口、通知/复核记录展示 | 全量测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、RR2-1 适用条件确认
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-1-001 | 实现适用条件候选识别 | `services/info_extract.py` |
|
||||||
|
| RR2-1-002 | 工作流支持 `waiting_user` 暂停 | `regulatory_review/workflow.py` |
|
||||||
|
| RR2-1-003 | 实现条件确认接口 | `regulatory_review/views.py` |
|
||||||
|
| RR2-1-004 | 实现对话选择框 UI | `templates/home.html`、`static/js/app.js` |
|
||||||
|
| RR2-1-005 | 确认后从 `rule_scope` 或下一节点恢复 | `workflow.py` |
|
||||||
|
| RR2-1-006 | 增加测试 | `tests/test_regulatory_condition.py`、`tests/test_regulatory_frontend.py` |
|
||||||
|
|
||||||
|
### 确认字段
|
||||||
|
|
||||||
|
以下选项来自既有第二阶段功能/详细设计:`RegulatoryInfoExtract` 输出产品类别、注册类型、临床评价路径,功能设计中明确注册类型包括“首次注册、变更注册、延续注册等”,临床评价路径包括“临床试验、免临床、同品种比对等”。因此 Demo 版按下表实现。
|
||||||
|
|
||||||
|
| 字段 | 交互 |
|
||||||
|
| --- | --- |
|
||||||
|
| 产品类别 | 体外诊断试剂 / 医疗器械 / 其他 |
|
||||||
|
| 注册类型 | 首次注册 / 变更注册 / 延续注册 |
|
||||||
|
| 临床评价路径 | 临床试验 / 免临床 / 同品种比对 / 待确认 |
|
||||||
|
| 产品名称 | 文本输入 |
|
||||||
|
| 型号规格 | 文本输入 |
|
||||||
|
| 预期用途 | 文本输入 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_condition.py tests/test_regulatory_frontend.py tests/test_regulatory_workflow.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现法规适用条件候选识别、waiting_user 暂停恢复和对话选择框确认。用户确认前工作流不得继续执行规则裁剪。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、RR2-2 附件 4 规则对齐与核查能力增强
|
||||||
|
|
||||||
|
### 新增口径:附件 4 必须结构化入规则库
|
||||||
|
|
||||||
|
第一批主链路已经可以演示,但现有 Demo YAML 只覆盖 5 类规则:产品技术要求、说明书、注册检验报告、临床评价资料、安全和性能基本原则清单。经人工确认,第一批链路可通过;但与附件《体外诊断试剂注册申报资料要求及说明》相比,规则覆盖仍不完整。第二批 RR2-2 必须将附件 4 的申报资料目录结构补入规则库,并作为完整性和章节核查的主要依据。
|
||||||
|
|
||||||
|
附件来源:
|
||||||
|
|
||||||
|
```text
|
||||||
|
docs/0.原始材料/附件 4 体外诊断试剂注册申报资料要求及说明.doc
|
||||||
|
```
|
||||||
|
|
||||||
|
如附件仍为旧版 `.doc`,允许在开发阶段通过 Pandoc、LibreOffice headless、Word COM 或受控脚本转换为 `.docx`/`.txt` 中间产物;中间产物只用于规则抽取和测试夹具,不改变第一阶段文件页数统计口径。
|
||||||
|
|
||||||
|
### 附件 4 目录覆盖范围
|
||||||
|
|
||||||
|
第二批 Demo 规则至少覆盖以下一级和二级标题。规则应支持“章节目录”类目录项、资料文件项、条件适用项和推荐项的区分。
|
||||||
|
|
||||||
|
| 一级目录 | 二级目录/资料项 |
|
||||||
|
| --- | --- |
|
||||||
|
| 1. 监管信息 | 1.1 章节目录、1.2 申请表、1.3 术语/缩写词列表、1.4 产品列表、1.5 关联文件、1.6 申报前与监管机构的联系情况和沟通记录、1.7 符合性声明 |
|
||||||
|
| 2. 综述资料 | 2.1 章节目录、2.2 概述、2.3 产品描述、2.4 预期用途、2.5 申报产品上市历史、2.6 其他需说明的内容 |
|
||||||
|
| 3. 非临床资料 | 3.1 章节目录、3.2 产品风险管理资料、3.3 体外诊断试剂安全和性能基本原则清单、3.4 产品技术要求及检验报告、3.5 分析性能研究、3.6 稳定性研究、3.7 阳性判断值或参考区间研究、3.8 其他资料 |
|
||||||
|
| 4. 临床评价资料 | 4.1 章节目录、4.2 临床评价资料 |
|
||||||
|
| 5. 产品说明书和标签样稿 | 5.1 章节目录、5.2 产品说明书、5.3 标签样稿、5.4 其他资料 |
|
||||||
|
| 6. 质量管理体系文件 | 6.1 综述、6.2 章节目录、6.3 生产制造信息、6.4 质量管理体系程序、6.5 管理职责程序、6.6 资源管理程序、6.7 产品实现程序、6.8 质量管理体系的测量/分析和改进程序、6.9 其他质量体系程序信息、6.10 质量管理体系核查文件 |
|
||||||
|
|
||||||
|
### 规则分级默认值
|
||||||
|
|
||||||
|
| 规则类型 | 默认风险 | 说明 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 一级目录整体缺失 | high | 如缺少“监管信息”“综述资料”“非临床资料”等完整章节 |
|
||||||
|
| 关键法定资料缺失 | blocking | 申请表、符合性声明、产品技术要求及检验报告等 |
|
||||||
|
| 关键技术/评价资料缺失 | high | 产品风险管理资料、分析性能研究、稳定性研究、临床评价资料、产品说明书、标签样稿等 |
|
||||||
|
| 条件适用资料缺失 | medium/high | 如上市历史、申报前沟通记录、其他资料;需结合 RR2-1 适用条件判断 |
|
||||||
|
| 章节目录缺失 | medium | 各一级目录下的章节目录缺失,影响资料可追溯性 |
|
||||||
|
|
||||||
|
### 与现有第一批链路的差异修正
|
||||||
|
|
||||||
|
| 当前能力 | 第二批修正 |
|
||||||
|
| --- | --- |
|
||||||
|
| 完整性核查只按文件名和相对路径匹配 | 增加目录名、首页文本/前若干页文本、章节标题候选匹配 |
|
||||||
|
| YAML 只覆盖 5 个 Demo 条目 | 扩展为附件 4 一级/二级目录规则,保留第一批 5 条并映射到附件 4 对应章节 |
|
||||||
|
| 章节核查只检查说明书储存条件/有效期/样本要求 | 改为同时检查申报资料目录结构和说明书内部关键章节 |
|
||||||
|
| RAG 可能跳过 `.doc` 材料 | 附件 4 必须可被转换或抽取,构建 RAG 前输出可读文本抽取状态 |
|
||||||
|
| 一致性只检查产品名称、型号规格、预期用途 | 保留这三项,并增加管理类别、分类编码、注册类型、临床评价路径等候选字段 |
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-2-001 | 将附件 4 `.doc` 抽取为可测试的结构化目录夹具 | `tests/fixtures/regulatory/attachment4_outline.json` 或同等 fixture |
|
||||||
|
| RR2-2-002 | 扩展 YAML 规则,覆盖附件 4 一级/二级目录、别名、适用条件、风险等级和整改建议 | `rules/nmpa_ivd_registration_v1.yaml` |
|
||||||
|
| RR2-2-003 | 增强规则加载校验,确保附件 4 必填目录项都有规则 ID、关键词、风险等级和 citation_query | `services/rule_loader.py` |
|
||||||
|
| RR2-2-004 | 增强完整性核查,支持文件名、目录名、首页文本/前若干页文本、章节标题候选匹配 | `services/completeness_check.py`、`services/text_extract.py` |
|
||||||
|
| RR2-2-005 | 增强文本抽取,缓存章节候选、字段候选、首页文本和抽取状态 | `services/text_extract.py`、`storage.py` |
|
||||||
|
| RR2-2-006 | 增强章节核查,支持附件 4 目录层级、别名、近似标题和证据片段 | `services/structure_check.py` |
|
||||||
|
| RR2-2-007 | 增强一致性核查,支持产品名称、型号规格、预期用途、管理类别、分类编码、注册类型、临床评价路径等来源值 | `services/consistency_check.py` |
|
||||||
|
| RR2-2-008 | RAG 引用写入 `rag_result_json` 过程产物,并记录附件 4 文本抽取/索引状态 | `services/rag_citation.py`、`storage.py` |
|
||||||
|
| RR2-2-009 | 增加附件 4 对齐测试 | `tests/test_regulatory_rule_loader.py`、`tests/test_regulatory_completeness.py`、`tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py`、`tests/test_regulatory_rag.py` |
|
||||||
|
|
||||||
|
### 验收样例
|
||||||
|
|
||||||
|
| 样例条件 | 预期 |
|
||||||
|
| --- | --- |
|
||||||
|
| 文件包缺少“监管信息/申请表” | 生成 blocking 或 high 问题,并引用附件 4 监管信息要求 |
|
||||||
|
| 文件包缺少“产品风险管理资料” | 生成 high 问题,category 为 completeness |
|
||||||
|
| 文件包缺少“分析性能研究”或“稳定性研究” | 生成 high 问题,给出补充研究资料建议 |
|
||||||
|
| 文件包有产品技术要求但无检验报告 | 生成 blocking 问题,规则映射到 3.4 |
|
||||||
|
| 文件包有产品说明书但无标签样稿 | 生成 high 问题,规则映射到 5.3 |
|
||||||
|
| 文件包缺少质量管理体系文件 | 生成 high 问题,规则映射到第 6 章 |
|
||||||
|
| 附件 4 `.doc` 未能抽取 | RAG 构建命令失败或明确报告附件 4 抽取失败,不允许静默跳过该核心材料 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_rule_loader.py tests/test_regulatory_completeness.py tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请先将附件 4《体外诊断试剂注册申报资料要求及说明》结构化为规则覆盖清单,再增强 YAML、完整性核查、章节核查、一致性核查和 RAG 过程产物。第二批必须覆盖附件 4 的 1-6 章一级目录和主要二级目录;证据必须包含文件路径、命中片段、字段名或规则 ID,便于人工复核。附件 4 作为核心法规材料,不允许在 RAG 构建中静默跳过。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、RR2-3 整包复核
|
||||||
|
|
||||||
|
### 口径
|
||||||
|
|
||||||
|
整包复核不是修改原法规核查批次,而是基于新的成功 `FileSummaryBatch` 创建新的 `RegulatoryReviewBatch`。新批次记录来源批次信息,用于报告中展示“复核来源”。
|
||||||
|
|
||||||
|
复核入口不新增独立页面。前端通过法规核查工作流卡片展示复核入口,用户点击后由 AI 在对话区发起确认与引导。
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-3-001 | 新增整包复核启动接口 | `regulatory_review/views.py` |
|
||||||
|
| RR2-3-002 | 支持指定新的 `file_summary_batch_id` | `workflow.py` |
|
||||||
|
| RR2-3-003 | 记录 source/regenerated_from 信息 | `RegulatoryReviewBatch.condition_json` 或独立字段 |
|
||||||
|
| RR2-3-004 | 报告展示整包复核来源 | `services/export.py` |
|
||||||
|
| RR2-3-005 | 增加测试 | `tests/test_regulatory_rectification.py` |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_rectification.py tests/test_regulatory_workflow.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现整包复核:用户完成新的文件汇总后,可基于新 FileSummaryBatch 创建新的 RegulatoryReviewBatch,并在报告中追溯原核查批次。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、RR2-4 缺失项复核
|
||||||
|
|
||||||
|
### 口径
|
||||||
|
|
||||||
|
缺失项复核针对原 `RegulatoryIssue` 更新状态,不新建完整法规核查批次。系统可读取补充文件对应的新 `FileSummaryBatch`,只对指定问题重新匹配相关规则。
|
||||||
|
|
||||||
|
缺失项复核同样不新增独立页面。卡片只展示入口和状态,具体确认动作通过 AI 对话完成,例如确认复核哪些问题、使用哪个补充文件汇总批次。
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-4-001 | 实现缺失项复核服务 | `services/rectification_review.py` |
|
||||||
|
| RR2-4-002 | 支持 issue_ids + file_summary_batch_id 输入 | `views.py` |
|
||||||
|
| RR2-4-003 | 复核通过更新 `review_passed`,不通过更新 `review_failed` | `services/rectification_review.py` |
|
||||||
|
| RR2-4-004 | 生成 `review_record` 过程产物 | `storage.py` |
|
||||||
|
| RR2-4-005 | 报告展示复核记录 | `services/export.py` |
|
||||||
|
| RR2-4-006 | 增加测试 | `tests/test_regulatory_rectification.py` |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_rectification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现缺失项复核。复核不重新跑完整法规核查工作流,只针对指定 RegulatoryIssue 和补充文件汇总批次更新问题状态,并生成 review_record 产物。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、RR2-5 mock 通知留痕
|
||||||
|
|
||||||
|
### 口径
|
||||||
|
|
||||||
|
真实飞书暂缓。第二批只在 blocking/high/medium 风险项出现时创建 `RegulatoryNotificationRecord(channel=mock)`,用于报告留痕和第三阶段接入。
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-5-001 | 实现 mock notifier | `services/feishu_notifier.py` |
|
||||||
|
| RR2-5-002 | 风险等级 blocking/high/medium 写通知记录 | `workflow.py` |
|
||||||
|
| RR2-5-003 | 通知记录进入 Markdown/Excel/JSON 报告 | `services/export.py` |
|
||||||
|
| RR2-5-004 | 增加测试 | `tests/test_regulatory_notification.py` |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_notification.py tests/test_regulatory_export.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请实现 mock 通知留痕。不要接真实飞书 CLI/API;只为阻断项、高风险、中风险写 RegulatoryNotificationRecord,并在报告中展示。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、RR2-6 前端和总体验收
|
||||||
|
|
||||||
|
### 任务
|
||||||
|
|
||||||
|
| 编号 | 内容 | 文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| RR2-6-001 | 前端显示条件确认卡片 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
| RR2-6-002 | 前端通过工作流卡片展示整包复核入口,并由 AI 对话确认 | `static/js/app.js` |
|
||||||
|
| RR2-6-003 | 前端通过工作流卡片展示缺失项复核入口,并由 AI 对话确认 | `static/js/app.js` |
|
||||||
|
| RR2-6-004 | 卡片展示通知和复核摘要 | `templates/home.html`、`static/js/app.js` |
|
||||||
|
| RR2-6-005 | 补 Playwright 或前端测试 | `tests/test_regulatory_frontend.py` |
|
||||||
|
| RR2-6-006 | 全量回归 | 全项目 |
|
||||||
|
|
||||||
|
### 验证命令
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
### Codex 执行提示
|
||||||
|
|
||||||
|
```text
|
||||||
|
请完善法规核查前端交互,包含条件选择框、卡片式整包复核入口、卡片式缺失项复核入口、AI 对话确认、mock 通知和复核记录展示。不要新增独立复核页面。最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、第二批 Codex 目标模式提示词
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md 执行第二阶段第二批开发。
|
||||||
|
|
||||||
|
前提:
|
||||||
|
第一批主链路已经完成并通过全量测试。
|
||||||
|
|
||||||
|
目标:
|
||||||
|
补齐法规核查完整整改闭环,包括适用条件对话选择框、waiting_user 暂停恢复、附件 4 申报资料目录规则对齐、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。
|
||||||
|
|
||||||
|
执行规则:
|
||||||
|
1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。
|
||||||
|
2. 按 RR2-1 到 RR2-6 顺序执行。
|
||||||
|
3. 每阶段完成后运行对应验证命令。
|
||||||
|
4. RR2-2 必须覆盖附件 4 的 1-6 章一级目录和主要二级目录,不能只保留第一批 5 条 Demo 规则。
|
||||||
|
5. 不接真实飞书 CLI/API。
|
||||||
|
6. 不做规则管理前端。
|
||||||
|
7. 不做自动填写目标文件。
|
||||||
|
8. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
632
docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md
Normal file
632
docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md
Normal file
@@ -0,0 +1,632 @@
|
|||||||
|
# 产品关键信息提取与申报文件自动填表开发计划
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 详细设计文档 | docs/3.详细设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 产品关键信息提取与申报文件自动填表 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 执行方式 | 单人开发 + Codex 目标模式自动化执行 |
|
||||||
|
| 计划日期 | 2026-06-07 |
|
||||||
|
| 计划版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、开发计划目标
|
||||||
|
|
||||||
|
本开发计划用于指导 Codex 目标模式按阶段完成“产品关键信息提取与申报文件自动填表”功能开发。该功能作为独立工作流 `application_form_fill` 实现,由用户对话触发,默认复用当前对话最近成功的文件汇总批次;如本次消息带新附件,则先串联文件汇总,再执行自动填表。
|
||||||
|
|
||||||
|
本期必须完成:独立填表批次、过程产物、飞书通知记录、模板配置、注册证 `.docx` 模板填充、字段抽取与合并、冲突高亮、追溯清单、Word 下载、自动填表工作流卡片和权限校验。
|
||||||
|
|
||||||
|
本期明确不强制完成:PDF 转换、字段级数据库表、`.doc` 模板自动转换、完整安全和性能基本原则清单条目拆解。这些事项已进入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、已确认开发规则
|
||||||
|
|
||||||
|
| 规则项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流类型 | 新增独立 `application_form_fill`,不塞入 `regulatory_review` 工作流 |
|
||||||
|
| 触发方式 | 用户对话触发,如“帮我填注册证”“给我这个内容对应的表格”“为我该方案生成申报模板” |
|
||||||
|
| 模板指定 | 用户可指定模板;未指定时按注册类型生成适用模板 |
|
||||||
|
| 文件来源 | 无新附件时复用当前对话最近成功 `FileSummaryBatch`;有新附件时先自动汇总 |
|
||||||
|
| 模板配置 | 放在 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 结构化抽取并行,合并处理 |
|
||||||
|
| 冲突处理 | 说明书优先;冲突字段在 Word 中黄色底色、红色字体 |
|
||||||
|
| 输出范围 | Demo 主链路优先 Word + Excel/JSON 追溯清单 |
|
||||||
|
| PDF | 数据结构预留,工作流节点可 skipped,不作为本期强验收 |
|
||||||
|
| 飞书 | 新增自动填表通知记录表,通知失败不阻断下载 |
|
||||||
|
| 数据库 | 新增三张表;字段级明细表暂缓 |
|
||||||
|
| Git 提交 | 每个阶段完成并验证通过后提交一次 |
|
||||||
|
| 测试要求 | 每阶段至少运行对应 pytest;前端阶段补卡片和渲染测试 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体验收标准
|
||||||
|
|
||||||
|
| 类别 | 完成标准 |
|
||||||
|
| --- | --- |
|
||||||
|
| 数据库 | `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord` 可通过 migration 落库 |
|
||||||
|
| 导出类型 | `ExportedSummaryFile.ExportType` 支持 `word`、`pdf`,并兼容既有 markdown/excel/json |
|
||||||
|
| 模块结构 | 新增 `review_agent/application_form_fill/` 独立模块 |
|
||||||
|
| 触发 | 用户说“帮我填注册证”等语句可触发 `application_form_fill` |
|
||||||
|
| 文件来源 | 无新附件时复用最近成功汇总批次;无汇总批次时提示上传资料 |
|
||||||
|
| 模板配置 | YAML 可加载、校验,并至少配置注册证格式 `.docx` 已识别字段 |
|
||||||
|
| 字段抽取 | 规则/正则与 LLM 抽取结果均可留底;LLM 失败时规则结果可继续 |
|
||||||
|
| 字段合并 | 说明书优先,冲突字段进入 `conflict_summary` 和追溯清单 |
|
||||||
|
| Word 填充 | 能按表格行名填入注册证模板字段,缺失字段留空 |
|
||||||
|
| 冲突高亮 | 冲突字段在 Word 内黄底红字 |
|
||||||
|
| 追溯清单 | 生成 Excel/JSON,记录规则结果、LLM 结果、合并字段、冲突和来源证据 |
|
||||||
|
| 下载 | 对话框提供填好 Word 和追溯清单下载链接 |
|
||||||
|
| 工作流卡片 | 前端支持 `application_form_fill` 卡片,展示准备资料、选择模板、复制模板、抽取字段、填写 Word 等节点 |
|
||||||
|
| 飞书通知 | 填表完成后写通知记录,可 mock;失败不阻断文件下载 |
|
||||||
|
| 权限 | A 对话不能查询或下载 B 对话的填表批次和导出文件 |
|
||||||
|
| 回归 | 第一批文件汇总、第二批法规核查既有测试不回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 阶段验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| AFF-0 | 准备与回归 | 创建开发分支,确认现有测试基线 | `python manage.py check` 和关键回归测试通过 |
|
||||||
|
| AFF-1 | 数据模型与通用导出扩展 | 新增三张表,扩展 word/pdf 导出类型 | migration、模型测试通过 |
|
||||||
|
| AFF-2 | 模块骨架与模板配置 | 新建独立模块、YAML 配置和配置校验 | 模板配置测试通过 |
|
||||||
|
| AFF-3 | 触发与工作流骨架 | 对话触发、批次创建、节点事件和状态查询 | 可创建并运行空工作流 |
|
||||||
|
| AFF-4 | 模板选择与文件来源 | 复用最近汇总批次,支持指定/默认模板选择 | 模板选择和来源批次测试通过 |
|
||||||
|
| AFF-5 | 字段抽取与合并 | 规则/正则 + LLM 并行抽取、冲突归并和产物留底 | 字段抽取、冲突测试通过 |
|
||||||
|
| AFF-6 | Word 填充与追溯导出 | 注册证 Word 填充、冲突高亮、Excel/JSON 追溯 | 可下载 Word 和追溯清单 |
|
||||||
|
| AFF-7 | 飞书通知与对话摘要 | 生成助手摘要、下载链接和通知记录 | 通知、摘要、下载权限测试通过 |
|
||||||
|
| AFF-8 | 前端卡片与总体验收 | 自动填表工作流卡片、状态恢复、全量回归 | 全量测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、AFF-0 准备与回归
|
||||||
|
|
||||||
|
### AFF-0-001 创建开发分支并确认现状
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | Git / 准备 |
|
||||||
|
| 前置任务 | 无 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 从当前稳定分支创建 `codex/YYYYMMDD-申报文件自动填表` 开发分支,并确认工作区状态 |
|
||||||
|
| 开发步骤 | 1. 检查当前分支和 `git status`;2. 确认第三批设计文档存在;3. 创建开发分支;4. 记录已有未提交变更,不得回滚用户变更 |
|
||||||
|
| 验收标准 | 分支创建成功,工作区变更来源清楚 |
|
||||||
|
| 验证命令 | `git branch --show-current`; `git status --short` |
|
||||||
|
| Codex 执行提示 | 请创建第三批自动填表开发分支,检查当前工作区状态和设计文档,不要回滚用户已有变更。 |
|
||||||
|
|
||||||
|
### AFF-0-002 运行基线回归
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 回归 |
|
||||||
|
| 前置任务 | AFF-0-001 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 确认现有文件汇总和法规核查主流程在开发前可用 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行文件汇总测试;3. 运行法规核查测试;4. 记录失败项并先判断是否为既有问题 |
|
||||||
|
| 验收标准 | 关键回归测试通过,或记录清楚既有失败和本阶段处理策略 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_*.py tests/test_regulatory_*.py` |
|
||||||
|
| Codex 执行提示 | 请在开发前运行 Django check 和文件汇总/法规核查关键测试,确认基线稳定。若存在既有失败,请记录,不要直接改无关代码。 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、AFF-1 数据模型与通用导出扩展
|
||||||
|
|
||||||
|
### AFF-1-001 新增自动填表 ORM 模型
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 后端 |
|
||||||
|
| 前置任务 | AFF-0 |
|
||||||
|
| 涉及文件 | `review_agent/models.py` |
|
||||||
|
| 目标 | 新增 `ApplicationFormFillBatch`、`ApplicationFormFillArtifact`、`ApplicationFormFillNotificationRecord` |
|
||||||
|
| 开发步骤 | 1. 定义批次状态枚举;2. 定义产物类型枚举;3. 定义通知状态和渠道枚举;4. 添加外键到 Conversation、User、Message、FileSummaryBatch、RegulatoryReviewBatch;5. 添加 JSONField、hash、路径、时间字段;6. 添加 `db_table`、索引和唯一约束 |
|
||||||
|
| 验收标准 | 模型字段、表名、索引与数据库设计一致 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请按 `docs/4.数据库设计/3.产品关键信息提取与申报文件自动填表.md` 新增自动填表三张表模型,模型集中放在 `review_agent/models.py`。 |
|
||||||
|
|
||||||
|
### AFF-1-002 扩展导出类型和权限查询能力
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 下载 |
|
||||||
|
| 前置任务 | AFF-1-001 |
|
||||||
|
| 涉及文件 | `review_agent/models.py`、导出下载权限相关视图 |
|
||||||
|
| 目标 | 为 `ExportedSummaryFile.ExportType` 增加 `word`、`pdf`,并确保下载权限支持 `application_form_fill` |
|
||||||
|
| 开发步骤 | 1. 扩展 `ExportType.WORD`;2. 扩展 `ExportType.PDF`;3. 检查下载接口按 workflow_type 分派权限;4. 增加 application_form_fill 反查批次的权限路径 |
|
||||||
|
| 验收标准 | Word/ PDF 导出记录可创建;填表导出下载权限可追溯到当前用户 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_views.py -k download` |
|
||||||
|
| Codex 执行提示 | 请扩展 ExportedSummaryFile 支持 word/pdf,并让现有下载接口能通过 workflow_type=application_form_fill 校验填表批次权限。 |
|
||||||
|
|
||||||
|
### AFF-1-003 生成迁移并补模型测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 测试 |
|
||||||
|
| 前置任务 | AFF-1-002 |
|
||||||
|
| 涉及文件 | `review_agent/migrations/`、`tests/test_application_form_fill_models.py` |
|
||||||
|
| 目标 | 生成迁移并覆盖新增表的基础约束和权限关系 |
|
||||||
|
| 开发步骤 | 1. 运行 makemigrations;2. 检查 migration 只包含第三批相关变更;3. 运行 migrate;4. 测试批次创建;5. 测试产物 hash 字段;6. 测试通知重试字段;7. 测试 ExportedSummaryFile word 类型 |
|
||||||
|
| 验收标准 | migration 可执行,模型测试通过 |
|
||||||
|
| 验证命令 | `python manage.py makemigrations review_agent`; `python manage.py migrate`; `pytest tests/test_application_form_fill_models.py` |
|
||||||
|
| Codex 执行提示 | 请为第三批模型生成迁移并新增模型测试,覆盖批次、产物、通知记录和 word/pdf 导出类型。 |
|
||||||
|
|
||||||
|
### AFF-1 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_models.py tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、AFF-2 模块骨架与模板配置
|
||||||
|
|
||||||
|
### AFF-2-001 创建 application_form_fill 模块骨架
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模块 |
|
||||||
|
| 前置任务 | AFF-1 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/` |
|
||||||
|
| 目标 | 建立独立模块目录、常量、schemas、storage、workflow、views 和 services 包 |
|
||||||
|
| 开发步骤 | 1. 创建模块目录;2. 创建 `constants.py`;3. 创建 `schemas.py`;4. 创建 `storage.py`;5. 创建 `workflow.py`;6. 创建 `views.py`;7. 创建 services 子模块;8. 创建 templates 和 prompts 目录 |
|
||||||
|
| 验收标准 | 模块可 import,不影响既有应用启动 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请新增 `review_agent/application_form_fill/` 独立模块骨架,先只放常量、schema、空服务和基础 import,不要改动法规核查模块。 |
|
||||||
|
|
||||||
|
### AFF-2-002 编写模板配置 YAML
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 配置 / 模板 |
|
||||||
|
| 前置任务 | AFF-2-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/templates/application_form_templates_v1.yaml` |
|
||||||
|
| 目标 | 建立模板配置,至少覆盖注册证 `.docx` 已识别字段 |
|
||||||
|
| 开发步骤 | 1. 定义 version;2. 定义 source_dir;3. 配置 `registration_certificate`;4. 配置 `change_registration` 为 `.doc` 待转换模板;5. 配置 `essential_principles` 为 `.doc` 待转换模板;6. 为注册证配置注册人名称、注册人住所、生产地址、产品名称、包装规格、主要组成成分、预期用途、储存条件及有效期、附件等字段 |
|
||||||
|
| 验收标准 | YAML 可解析,注册证字段映射到 table_row |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
|
||||||
|
| Codex 执行提示 | 请新增自动填表模板配置 YAML,配置路径必须是 `review_agent/application_form_fill/templates/application_form_templates_v1.yaml`,先完整录入注册证表格字段。 |
|
||||||
|
|
||||||
|
### AFF-2-003 实现模板配置加载与校验
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 配置 |
|
||||||
|
| 前置任务 | AFF-2-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_config.py`、`tests/test_application_form_fill_template_config.py` |
|
||||||
|
| 目标 | 读取、校验模板配置并计算 hash |
|
||||||
|
| 开发步骤 | 1. 实现 `load_template_config()`;2. 实现 `validate_template_config()`;3. 实现 `compute_config_hash()`;4. 校验 version、source_dir、templates、code 唯一、source_file 存在、target.type 支持;5. 对 `.doc` 待转换模板允许配置存在但标记运行时处理 |
|
||||||
|
| 验收标准 | 有效配置通过,缺失 source_dir 或重复 code 能被测试捕获 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_config.py` |
|
||||||
|
| Codex 执行提示 | 请实现模板配置加载和校验服务,配置错误必须返回清晰错误列表,不要在 import 时直接崩溃。 |
|
||||||
|
|
||||||
|
### AFF-2 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_template_config.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、AFF-3 触发与工作流骨架
|
||||||
|
|
||||||
|
### AFF-3-001 扩展意图路由
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 意图识别 |
|
||||||
|
| 前置任务 | AFF-2 |
|
||||||
|
| 涉及文件 | `review_agent/skill_router.py`、`review_agent/application_form_fill/constants.py`、`tests/test_application_form_fill_trigger.py` |
|
||||||
|
| 目标 | 用户话语命中自动填表意图时返回 `application_form_fill` |
|
||||||
|
| 开发步骤 | 1. 增加触发关键词;2. 支持“帮我填注册证”“对应的表格”“生成申报模板”等;3. 支持指定模板识别入口;4. 保持文件汇总和法规核查路由不回归 |
|
||||||
|
| 验收标准 | 自动填表语句触发正确,普通对话不误触发 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_trigger.py tests/test_regulatory_workflow.py -k router` |
|
||||||
|
| Codex 执行提示 | 请扩展现有意图路由,新增 application_form_fill 动作。不要破坏 file_summary 和 regulatory_review 的现有触发。 |
|
||||||
|
|
||||||
|
### AFF-3-002 实现批次创建和节点初始化
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-3-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`review_agent/application_form_fill/storage.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 创建填表批次、生成工作目录、初始化节点 |
|
||||||
|
| 开发步骤 | 1. 实现 `build_batch_no()`;2. 实现 `build_batch_work_dir()`;3. 实现 `create_application_form_fill_batch()`;4. 绑定 conversation、user、trigger_message、source_summary_batch;5. 初始化 `FORM_FILL_NODE_DEFINITIONS` 节点;6. 写 workflow_created 事件 |
|
||||||
|
| 验收标准 | 批次编号唯一,节点数量正确,工作目录在受控路径 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k create` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表批次创建和节点初始化,workflow_type 必须写 application_form_fill。 |
|
||||||
|
|
||||||
|
### AFF-3-003 实现工作流执行器骨架
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-3-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 实现节点串行执行、状态更新、事件推送和 skipped PDF 节点 |
|
||||||
|
| 开发步骤 | 1. 实现 `FormFillWorkflowExecutor.run()`;2. 实现 `_nodes()`;3. 实现 `_run_node()`;4. 每个节点写 running/success/skipped;5. `pdf_convert` 本期标记 skipped;6. 失败时写 batch.failed |
|
||||||
|
| 验收标准 | 空实现节点可完整跑到 success;PDF 节点 skipped |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k executor` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表工作流执行器骨架,先让节点状态可完整流转,PDF 转换节点本期标记 skipped。 |
|
||||||
|
|
||||||
|
### AFF-3-004 接入流式对话启动逻辑
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 对话 |
|
||||||
|
| 前置任务 | AFF-3-003 |
|
||||||
|
| 涉及文件 | `review_agent/services.py`、`review_agent/application_form_fill/views.py` |
|
||||||
|
| 目标 | 用户触发自动填表时启动工作流;有附件时先自动汇总,无附件时使用最近成功汇总批次 |
|
||||||
|
| 开发步骤 | 1. 在 stream_message 中处理 application_form_fill 路由;2. 如本次存在新附件,复用文件汇总启动逻辑;3. 无新附件时查找最近成功 `FileSummaryBatch`;4. 无来源批次时回复请上传资料;5. 返回 workflow meta |
|
||||||
|
| 验收标准 | 对话触发能创建填表批次;无汇总批次时不崩溃 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k stream` |
|
||||||
|
| Codex 执行提示 | 请把 application_form_fill 接入现有 stream_message。无附件时复用最近成功汇总批次;有新附件时先自动汇总。 |
|
||||||
|
|
||||||
|
### AFF-3 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_application_form_fill_trigger.py tests/test_application_form_fill_workflow.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、AFF-4 模板选择与文件来源
|
||||||
|
|
||||||
|
### AFF-4-001 实现模板指定解析
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板选择 |
|
||||||
|
| 前置任务 | AFF-3 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py`、`tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 目标 | 从用户话语中识别指定模板 |
|
||||||
|
| 开发步骤 | 1. 识别注册证;2. 识别变更注册备案文件;3. 识别安全和性能基本原则清单;4. 识别全部模板;5. 未指定返回空数组 |
|
||||||
|
| 验收标准 | 指定模板语句可返回正确 template_codes |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py -k requested` |
|
||||||
|
| Codex 执行提示 | 请实现用户指定模板解析,支持注册证、变更注册备案文件、安全和性能基本原则清单、全部模板。 |
|
||||||
|
|
||||||
|
### AFF-4-002 实现注册类型识别和模板选择
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板选择 |
|
||||||
|
| 前置任务 | AFF-4-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_select.py`、`tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 目标 | 按用户话语、法规确认条件、文件抽取识别注册类型,并选择模板 |
|
||||||
|
| 开发步骤 | 1. 用户话语识别首次注册、变更注册、备案;2. 从 `source_regulatory_batch.condition_json` 读取 confirmed_conditions;3. 从文件抽取候选读取 registration_type;4. 未指定模板时首次注册生成注册证 + 基本原则清单;5. 变更/备案生成变更文件 + 基本原则清单;6. 指定不适用模板允许生成但写 risk_notes |
|
||||||
|
| 验收标准 | 模板选择规则与功能设计一致 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_select.py` |
|
||||||
|
| Codex 执行提示 | 请实现注册类型识别和默认模板选择,优先级是用户话语、已确认法规核查条件、文件抽取、unknown。 |
|
||||||
|
|
||||||
|
### AFF-4-003 实现模板复制服务
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 模板 |
|
||||||
|
| 前置任务 | AFF-4-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/template_repository.py`、`review_agent/application_form_fill/storage.py`、`tests/test_application_form_fill_template_repository.py` |
|
||||||
|
| 目标 | 将原始模板复制到批次目录,原始模板只读 |
|
||||||
|
| 开发步骤 | 1. 根据 TemplateSpec 定位 source_file;2. 复制到 `work_dir/templates`;3. 记录 ApplicationFormFillArtifact(template_copy);4. `.doc` 且无工作模板时返回模板失败,不影响其他模板;5. 路径必须在受控工作目录内 |
|
||||||
|
| 验收标准 | 注册证 `.docx` 可复制;原始文件不被修改;产物 hash 写入 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_template_repository.py` |
|
||||||
|
| Codex 执行提示 | 请实现模板复制服务,只允许复制到批次工作目录,不能直接写原始法规材料目录。 |
|
||||||
|
|
||||||
|
### AFF-4 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_template_select.py tests/test_application_form_fill_template_repository.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、AFF-5 字段抽取与合并
|
||||||
|
|
||||||
|
### AFF-5-001 实现规则/正则字段抽取
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段抽取 |
|
||||||
|
| 前置任务 | AFF-4 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`tests/test_application_form_fill_field_extract.py` |
|
||||||
|
| 目标 | 从说明书、产品技术要求等文本中按标签和章节抽取字段 |
|
||||||
|
| 开发步骤 | 1. 复用 `regulatory_review.services.text_extract.extract_text`;2. 识别文件角色;3. 匹配 `字段名:值` 标签行;4. 支持多行值拼接;5. 保存 source_file、source_role、evidence、confidence、extractor=rule |
|
||||||
|
| 验收标准 | 能从测试说明书文本抽取产品名称、预期用途、储存条件、有效期、包装规格 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k rules` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表规则/正则字段抽取,优先覆盖注册证模板字段,抽取结果必须包含来源文件、来源角色和证据片段。 |
|
||||||
|
|
||||||
|
### AFF-5-002 实现 LLM 结构化抽取封装
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / LLM |
|
||||||
|
| 前置任务 | AFF-5-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`review_agent/application_form_fill/prompts/field_extract.md`、`tests/test_application_form_fill_field_extract.py` |
|
||||||
|
| 目标 | 调用现有 LLM 能力输出字段 JSON,失败时降级 |
|
||||||
|
| 开发步骤 | 1. 编写字段抽取 prompt;2. 输入模板字段、文件上下文和候选文本;3. 要求输出 JSON fields/checklist_items;4. 解析 JSON;5. 捕获超时和解析失败;6. 失败返回空 LLM 结果,不阻断规则抽取 |
|
||||||
|
| 验收标准 | monkeypatch LLM 后可解析结构化字段;LLM 异常时工作流继续 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k llm` |
|
||||||
|
| Codex 执行提示 | 请实现 LLM 结构化抽取封装,必须可测试、可降级。LLM 输出解析失败不能导致整个填表批次失败。 |
|
||||||
|
|
||||||
|
### AFF-5-003 实现并行抽取和产物留底
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段抽取 |
|
||||||
|
| 前置任务 | AFF-5-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_extract.py`、`review_agent/application_form_fill/storage.py` |
|
||||||
|
| 目标 | 并行执行规则/正则和 LLM 抽取,并保存 `field_extract_result.json` |
|
||||||
|
| 开发步骤 | 1. 使用 ThreadPoolExecutor;2. 规则和 LLM 两路并行;3. 组装 regex_results、llm_results、selected_templates、source_evidence;4. 保存 JSON;5. 写 ApplicationFormFillArtifact(field_extract_result) |
|
||||||
|
| 验收标准 | JSON 产物包含两路结果和模板列表 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_extract.py -k parallel` |
|
||||||
|
| Codex 执行提示 | 请实现字段并行抽取和 field_extract_result.json 产物留底,LLM 失败时也必须保存规则结果。 |
|
||||||
|
|
||||||
|
### AFF-5-004 实现字段合并与冲突检测
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 字段合并 |
|
||||||
|
| 前置任务 | AFF-5-003 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/field_merge.py`、`tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| 目标 | 合并规则和 LLM 字段,说明书优先,并生成冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 实现字段值归一化;2. 实现来源优先级排序;3. 同字段多值一致时合并;4. 不一致时选择最高优先级来源;5. 说明书与其他文件冲突时标记 conflict;6. 输出 merged_fields 和 conflicts |
|
||||||
|
| 验收标准 | 说明书优先;冲突字段包含 selected_value、selected_source、conflict_values、handling |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| Codex 执行提示 | 请实现字段合并服务,严格按说明书优先处理冲突,并把冲突列表写成可用于对话摘要和追溯清单的结构。 |
|
||||||
|
|
||||||
|
### AFF-5 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_field_extract.py tests/test_application_form_fill_field_merge.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、AFF-6 Word 填充与追溯导出
|
||||||
|
|
||||||
|
### AFF-6-001 实现 Word 表格行填充
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / Word |
|
||||||
|
| 前置任务 | AFF-5 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 目标 | 使用 `python-docx` 按表格行名写入注册证模板 |
|
||||||
|
| 开发步骤 | 1. 打开 docx 模板副本;2. 遍历 tables/rows/cells;3. 匹配第一列 row_label;4. 写入第二列;5. 缺失字段保持空白;6. 保存 output_path |
|
||||||
|
| 验收标准 | 产品名称、包装规格、预期用途等能写入注册证模板对应行 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k table` |
|
||||||
|
| Codex 执行提示 | 请实现 Word 表格行填充服务,先支持注册证模板的两列表格行名匹配。 |
|
||||||
|
|
||||||
|
### AFF-6-002 实现冲突高亮
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / Word |
|
||||||
|
| 前置任务 | AFF-6-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 目标 | 冲突字段在 Word 中黄底红字 |
|
||||||
|
| 开发步骤 | 1. 对冲突字段写入 run;2. 设置字体颜色 `FF0000`;3. 设置单元格 shading `FFFF00`;4. 非冲突字段保持原样式;5. 测试读取 docx XML 验证颜色和底色 |
|
||||||
|
| 验收标准 | 冲突字段样式可在 docx XML 中验证 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k highlight` |
|
||||||
|
| Codex 执行提示 | 请实现 Word 冲突高亮,冲突字段必须红色字体和黄色底色,测试需检查 docx XML。 |
|
||||||
|
|
||||||
|
### AFF-6-003 创建 Word 导出记录
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 导出 |
|
||||||
|
| 前置任务 | AFF-6-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/word_fill.py`、`review_agent/application_form_fill/workflow.py` |
|
||||||
|
| 目标 | Word 生成后写入 `ExportedSummaryFile(export_type=word)` 和产物记录 |
|
||||||
|
| 开发步骤 | 1. 按批次号、产品名、模板标签生成文件名;2. 保存到 `work_dir/filled`;3. 创建 `ApplicationFormFillArtifact(filled_template)`;4. 创建 `ExportedSummaryFile`;5. 记录模板失败时错误 |
|
||||||
|
| 验收标准 | 可查询到 word 导出记录和 filled_template 产物 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_word_fill.py -k export` |
|
||||||
|
| Codex 执行提示 | 请把 Word 填充结果保存为导出文件,export_type 使用 word,workflow_type 使用 application_form_fill。 |
|
||||||
|
|
||||||
|
### AFF-6-004 实现追溯清单 Excel/JSON
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 导出 |
|
||||||
|
| 前置任务 | AFF-6-003 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py`、`tests/test_application_form_fill_traceability.py` |
|
||||||
|
| 目标 | 输出字段来源追溯清单和合并结果 JSON |
|
||||||
|
| 开发步骤 | 1. 生成“字段追溯”Sheet;2. 生成“冲突字段”Sheet;3. 生成“低置信度条目”Sheet;4. 生成“生成结果”Sheet;5. 保存 Excel;6. 保存 merged_fields.json;7. 创建导出和产物记录 |
|
||||||
|
| 验收标准 | Excel 可打开,包含字段、来源、证据、冲突、处理方式 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_traceability.py` |
|
||||||
|
| Codex 执行提示 | 请实现字段来源追溯清单导出,必须包含规则/LLM 合并结果、冲突字段和生成结果。 |
|
||||||
|
|
||||||
|
### AFF-6 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_word_fill.py tests/test_application_form_fill_traceability.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、AFF-7 飞书通知与对话摘要
|
||||||
|
|
||||||
|
### AFF-7-001 生成助手 Markdown 摘要
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 对话 |
|
||||||
|
| 前置任务 | AFF-6 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/traceability_export.py`、`review_agent/application_form_fill/workflow.py` |
|
||||||
|
| 目标 | 工作流完成后向当前对话写入下载链接和冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 汇总 Word 导出;2. 汇总 PDF 状态为待增强;3. 汇总冲突字段;4. 添加追溯清单下载链接;5. 创建 assistant Message |
|
||||||
|
| 验收标准 | 对话中出现 Markdown 表格、Word 下载、追溯清单下载和冲突摘要 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k summary` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表完成后的助手 Markdown 摘要,PDF 本期显示为待增强,不作为失败。 |
|
||||||
|
|
||||||
|
### AFF-7-002 实现飞书通知记录和 mock 通知
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 通知 |
|
||||||
|
| 前置任务 | AFF-7-001 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/services/notifier.py`、`tests/test_application_form_fill_notification.py` |
|
||||||
|
| 目标 | 填表完成后记录通知,可 mock 发送,失败不阻断下载 |
|
||||||
|
| 开发步骤 | 1. 实现 `notify_completion()`;2. 默认 channel=mock;3. 写 template_codes、export_ids、message_summary;4. 支持 send_status success/failed;5. 失败时记录 error_message 和 retry_count |
|
||||||
|
| 验收标准 | 通知记录可查;通知失败不影响批次核心产物 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_notification.py` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表通知服务,先用 mock 通知记录即可。通知失败不得阻断 Word 下载。 |
|
||||||
|
|
||||||
|
### AFF-7-003 完成工作流状态归并
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | AFF-7-002 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/workflow.py`、`tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 目标 | 根据 Word、追溯清单、通知结果标记 success/partial_success/failed |
|
||||||
|
| 开发步骤 | 1. 所有目标 Word 成功时 success;2. 至少一个 Word 成功但非关键产物失败时 partial_success;3. 所有 Word 失败时 failed;4. PDF skipped 不导致失败;5. 发送 workflow_completed 事件 |
|
||||||
|
| 验收标准 | 批次状态符合详细设计 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_workflow.py -k status` |
|
||||||
|
| Codex 执行提示 | 请完成自动填表工作流状态归并,PDF skipped 不影响 success,通知失败最多导致 partial_success。 |
|
||||||
|
|
||||||
|
### AFF-7 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_application_form_fill_workflow.py tests/test_application_form_fill_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、AFF-8 前端卡片与总体验收
|
||||||
|
|
||||||
|
### AFF-8-001 后端状态接口
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 接口 |
|
||||||
|
| 前置任务 | AFF-7 |
|
||||||
|
| 涉及文件 | `review_agent/application_form_fill/views.py`、`review_agent/urls.py` 或相关 URL 文件 |
|
||||||
|
| 目标 | 提供自动填表启动和状态查询接口 |
|
||||||
|
| 开发步骤 | 1. 新增 start 接口;2. 新增 detail/status 接口;3. 返回 batch、nodes、conflicts、exports;4. 校验 conversation/user 权限;5. 接入 URL |
|
||||||
|
| 验收标准 | 当前用户可查自己的填表批次,不能查他人批次 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_views.py` |
|
||||||
|
| Codex 执行提示 | 请实现自动填表启动和状态查询接口,所有查询必须校验当前用户权限。 |
|
||||||
|
|
||||||
|
### AFF-8-002 前端支持 application_form_fill 卡片
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 前端 / 工作流卡片 |
|
||||||
|
| 前置任务 | AFF-8-001 |
|
||||||
|
| 涉及文件 | `static/js/app.js`、`templates/home.html`、静态 CSS 文件 |
|
||||||
|
| 目标 | 前端展示自动填表工作流卡片,并根据 SSE 更新节点 |
|
||||||
|
| 开发步骤 | 1. 解析 workflow_type=application_form_fill;2. 定义节点文案;3. 创建卡片;4. 更新节点状态;5. PDF 节点显示待增强/跳过;6. 页面刷新后恢复 |
|
||||||
|
| 验收标准 | 自动填表卡片可显示准备资料、选择模板、复制模板、抽取字段、填写 Word、追溯清单、飞书通知 |
|
||||||
|
| 验证命令 | `pytest tests/test_application_form_fill_frontend.py` 或现有前端测试命令 |
|
||||||
|
| Codex 执行提示 | 请在现有工作流卡片逻辑中新增 application_form_fill 类型,展示自动填表节点并支持状态恢复。 |
|
||||||
|
|
||||||
|
### AFF-8-003 前端展示结果和下载链接
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 前端 / Markdown |
|
||||||
|
| 前置任务 | AFF-8-002 |
|
||||||
|
| 涉及文件 | `static/js/app.js`、模板和 CSS |
|
||||||
|
| 目标 | 对话框正常展示 Word 下载、追溯清单、冲突摘要 |
|
||||||
|
| 开发步骤 | 1. 确认助手 Markdown 渲染支持表格;2. 验证 Word 下载链接点击;3. 验证冲突摘要表格;4. PDF 列显示待增强 |
|
||||||
|
| 验收标准 | 对话结果可读、链接可用、PDF 待增强不被误判为失败 |
|
||||||
|
| 验证命令 | 前端/Playwright 对应测试 |
|
||||||
|
| Codex 执行提示 | 请验证并完善自动填表结果展示,确保 Markdown 表格、Word 下载链接、追溯清单链接和冲突摘要正常显示。 |
|
||||||
|
|
||||||
|
### AFF-8-004 总体验收与回归
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 验收 / 回归 |
|
||||||
|
| 前置任务 | AFF-8-003 |
|
||||||
|
| 涉及文件 | 全项目 |
|
||||||
|
| 目标 | 运行全量测试,确认前三批能力均不回归 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行自动填表测试;3. 运行文件汇总测试;4. 运行法规核查测试;5. 如可用,运行前端/Playwright 测试;6. 检查 git status |
|
||||||
|
| 验收标准 | 全量测试通过;失败项均有解释;无意外文件变更 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest` |
|
||||||
|
| Codex 执行提示 | 请执行第三批自动填表总体验收,运行 Django check 和 pytest 全量回归,确认文件汇总与法规核查不回归。 |
|
||||||
|
|
||||||
|
### AFF-8 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、测试分层要求
|
||||||
|
|
||||||
|
| 层级 | 验证内容 | 建议文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 模型测试 | 三张新表、word/pdf 导出类型、权限关系 | `tests/test_application_form_fill_models.py` |
|
||||||
|
| 配置测试 | YAML 加载、模板配置校验、hash | `tests/test_application_form_fill_template_config.py` |
|
||||||
|
| 选择测试 | 触发语句、指定模板、注册类型优先级、默认模板 | `tests/test_application_form_fill_template_select.py` |
|
||||||
|
| 抽取测试 | 规则/正则、LLM 降级、并行抽取、字段合并 | `tests/test_application_form_fill_field_extract.py`、`tests/test_application_form_fill_field_merge.py` |
|
||||||
|
| Word 测试 | 表格行填充、冲突高亮、导出记录 | `tests/test_application_form_fill_word_fill.py` |
|
||||||
|
| 导出测试 | 追溯清单 Excel、JSON 产物、下载权限 | `tests/test_application_form_fill_traceability.py`、`tests/test_application_form_fill_views.py` |
|
||||||
|
| 工作流测试 | 批次创建、节点流转、状态归并、助手摘要 | `tests/test_application_form_fill_workflow.py` |
|
||||||
|
| 通知测试 | mock 通知、失败记录、重试字段 | `tests/test_application_form_fill_notification.py` |
|
||||||
|
| 前端测试 | 卡片节点、PDF 待增强、下载链接、冲突摘要 | `tests/test_application_form_fill_frontend.py` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、Codex 自动化执行规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 顺序执行 | 必须从 AFF-0 到 AFF-8 顺序执行,不得跳阶段 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| 当前阶段优先 | 某阶段失败时先修复当前阶段,不继续后续阶段 |
|
||||||
|
| 回归保护 | 文件汇总和法规核查已有测试不得回归 |
|
||||||
|
| PDF 边界 | PDF 节点本期可 skipped,不为 PDF 引入强依赖 |
|
||||||
|
| 字段表边界 | 不新增字段级数据库表,后续增强已在待办计划 |
|
||||||
|
| 每阶段验证 | 每阶段完成后运行对应验证命令 |
|
||||||
|
| 每阶段提交 | 每阶段验证通过后生成提交摘要并本地提交 |
|
||||||
|
| 不覆盖变更 | 不得回滚或覆盖用户已有未提交变更 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、推荐目标模式提示词
|
||||||
|
|
||||||
|
后续可直接对 Codex 输入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md 执行第三批开发。
|
||||||
|
|
||||||
|
目标:
|
||||||
|
完成独立 application_form_fill 工作流,通过用户对话触发自动填表,复用当前对话最近成功 FileSummaryBatch,支持模板配置、注册证 Word 自动填写、规则/正则与 LLM 并行字段抽取、说明书优先冲突归并、冲突高亮、字段来源追溯清单、Word 下载、自动填表工作流卡片和飞书 mock 通知记录。
|
||||||
|
|
||||||
|
执行规则:
|
||||||
|
1. 创建 codex/YYYYMMDD-申报文件自动填表 分支。
|
||||||
|
2. 按 AFF-0 到 AFF-8 顺序执行,不跳阶段。
|
||||||
|
3. 每阶段先写测试,再实现,完成后运行对应验证命令。
|
||||||
|
4. 不实现字段级数据库表。
|
||||||
|
5. PDF 转换本期作为 skipped/待增强,不引入强制 LibreOffice 依赖。
|
||||||
|
6. 模板配置路径必须为 review_agent/application_form_fill/templates/application_form_templates_v1.yaml。
|
||||||
|
7. Word 模板优先支持注册证格式 docx,两个 doc 模板可标记待转换或部分成功。
|
||||||
|
8. 每阶段验证通过后调用 git-commit-summary 生成提交摘要并本地提交。
|
||||||
|
9. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、待执行前检查清单
|
||||||
|
|
||||||
|
| 检查项 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 第三批需求分析、功能设计、详细设计、数据库设计均已存在 | 待执行时确认 |
|
||||||
|
| 当前分支是否适合创建开发分支 | 待执行时确认 |
|
||||||
|
| 是否存在用户未提交变更 | 待执行时确认 |
|
||||||
|
| `python-docx`、`openpyxl`、`PyYAML` 是否可用 | 待执行时确认 |
|
||||||
|
| 现有文件汇总和法规核查测试是否通过 | 待执行时确认 |
|
||||||
|
| 执行机器是否提供 `git-commit-summary` skill | 待执行时确认 |
|
||||||
|
| `.doc` 模板和 PDF 转换是否保持在待办边界内 | 待执行时确认 |
|
||||||
583
docs/5.开发计划/4.飞书通知与问答接入.md
Normal file
583
docs/5.开发计划/4.飞书通知与问答接入.md
Normal file
@@ -0,0 +1,583 @@
|
|||||||
|
# 飞书通知与问答接入开发计划
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/4.飞书通知与问答接入.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/4.飞书通知与问答接入.md |
|
||||||
|
| 详细设计文档 | docs/3.详细设计/4.飞书通知与问答接入.md |
|
||||||
|
| 数据库设计文档 | docs/4.数据库设计/4.飞书通知与问答接入.md |
|
||||||
|
| 功能名称 | 飞书通知与问答接入 |
|
||||||
|
| 所属模块 | 审核智能体 review_agent |
|
||||||
|
| 执行方式 | 单人开发 + Codex 自动化执行 |
|
||||||
|
| 计划日期 | 2026-06-07 |
|
||||||
|
| 计划版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Codex 自动执行说明
|
||||||
|
|
||||||
|
本文件用于 Codex 自动执行开发任务。执行时必须按阶段顺序推进,不得跳过测试、不得直接请求真实飞书接口作为自动化测试、不得把真实 App Secret 或 token 写入代码库。
|
||||||
|
|
||||||
|
执行规则:
|
||||||
|
|
||||||
|
| 规则 | 要求 |
|
||||||
|
| --- | --- |
|
||||||
|
| 执行顺序 | 必须从 FS-0 到 FS-8 顺序执行,前一阶段验证未通过不得进入下一阶段 |
|
||||||
|
| TDD | 每个服务、模型、命令和页面展示任务必须先写失败测试,再实现代码,再运行测试确认通过 |
|
||||||
|
| 外部 API | 自动化测试必须 mock 飞书 token API 和消息 API;真实飞书只通过 `send_test_feishu_notification` 手动命令验证 |
|
||||||
|
| 凭证安全 | 不得提交真实 `FEISHU_APP_ID`、`FEISHU_APP_SECRET`、`tenant_access_token`、用户 open_id/user_id |
|
||||||
|
| 失败处理 | 如测试失败,先定位是否由本阶段改动引起;不得修改无关功能绕过测试 |
|
||||||
|
| 工作区安全 | 不得回滚用户已有变更;如遇到同文件用户改动,先阅读并兼容 |
|
||||||
|
| 提交节奏 | 每个阶段完成并通过阶段验证后再提交,提交信息参考“建议提交切分” |
|
||||||
|
| 实现边界 | 首期只做指定个人账号私聊通知和问答预留;不得扩展外部群聊、事件订阅、LLM 问答解析 |
|
||||||
|
|
||||||
|
自动执行入口建议:
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/4.飞书通知与问答接入.md 从 FS-0 开始逐阶段执行。
|
||||||
|
每个阶段必须先写测试、运行失败、实现、运行通过,再进入下一阶段。
|
||||||
|
真实飞书 API 只能通过手动 management command 验证,pytest 中必须 mock。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、开发计划目标
|
||||||
|
|
||||||
|
本开发计划用于指导“飞书通知与问答接入”首期开发。首期目标是通过飞书官方智能体/应用机器人接口,把系统中三个工作流的结束结果发送到指定个人飞书账号,并为后续飞书内问答建立可测试的最小服务边界。
|
||||||
|
|
||||||
|
本期必须完成:
|
||||||
|
|
||||||
|
| 类别 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 真实飞书通知 | 使用 App ID/App Secret 获取 `tenant_access_token`,调用飞书消息 API 发送私聊通知 |
|
||||||
|
| 指定个人账号 | 通过 `.env` 配置 `FEISHU_DEFAULT_USER_OPEN_ID` 或 `FEISHU_DEFAULT_USER_ID`,首期优先发给该账号 |
|
||||||
|
| 三流程接入 | 自动汇总、法规核查、自动填表三个流程完成后均触发通知 |
|
||||||
|
| 数据库记录 | 新增统一通知记录表、飞书用户映射表、token 缓存表、问答日志表 |
|
||||||
|
| 页面展示 | 三个流程结果页或详情区域展示飞书通知状态 |
|
||||||
|
| 问答预留 | 建表、实现批次摘要查询、简单规则意图解析、本地模拟问答命令 |
|
||||||
|
| 测试策略 | 关键服务严格 TDD;自动化测试 mock 飞书 API;真实飞书发送通过 management command 手动验证 |
|
||||||
|
|
||||||
|
本期明确不做:
|
||||||
|
|
||||||
|
| 类别 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 外部群聊接入 | 暂不向群聊发送通知,不做群内 @ |
|
||||||
|
| 飞书事件订阅 | 暂不接收飞书回调,不实现真实私聊问答事件入口 |
|
||||||
|
| 手动重发 | 页面和 Admin 暂不提供重发按钮 |
|
||||||
|
| 自动后台重试 | 通知失败只记录;成功才判重,失败允许后续再次发送 |
|
||||||
|
| LLM 问答解析 | 问答预留只做简单规则解析,不接 LLM |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、已确认开发规则
|
||||||
|
|
||||||
|
| 规则项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 主接入方式 | 飞书官方智能体/应用机器人消息 API |
|
||||||
|
| 凭证配置 | `.env` 提供 `FEISHU_APP_ID`、`FEISHU_APP_SECRET` |
|
||||||
|
| 接收人配置 | `.env` + Django Admin 都做;首期发送优先使用 `.env` 指定个人账号 |
|
||||||
|
| 接收人优先级 | `FEISHU_DEFAULT_USER_OPEN_ID` > `FEISHU_DEFAULT_USER_ID` |
|
||||||
|
| token 缓存 | 数据库缓存 `tenant_access_token` 和过期时间 |
|
||||||
|
| 通知记录 | 新增统一 `WorkflowNotificationRecord`,三个流程都写入 |
|
||||||
|
| 判重策略 | 同一批次、同一流程、同一状态,只有成功记录才判重;失败后允许再次发送 |
|
||||||
|
| 系统链接 | 新增 `PUBLIC_BASE_URL`,默认 `http://127.0.0.1:8000` |
|
||||||
|
| 页面展示 | 三个流程结果页或详情区域展示通知状态 |
|
||||||
|
| 真实 API 测试 | 自动化测试全部 mock;新增 management command 手动发送真实测试消息 |
|
||||||
|
| TDD | 每个核心模块先写测试再实现 |
|
||||||
|
| 环境变量说明 | 写变量名和用途,不写真实值 |
|
||||||
|
| 阶段提交 | 模型、服务、工作流、页面、命令、测试分阶段提交 |
|
||||||
|
| 问答预留 | 建 `FeishuQuestionLog`,实现摘要查询、规则解析和本地模拟命令 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、总体验收标准
|
||||||
|
|
||||||
|
| 类别 | 完成标准 |
|
||||||
|
| --- | --- |
|
||||||
|
| 配置 | `.env` 支持 `FEISHU_APP_ID`、`FEISHU_APP_SECRET`、`FEISHU_DEFAULT_USER_OPEN_ID` / `FEISHU_DEFAULT_USER_ID`、`PUBLIC_BASE_URL` |
|
||||||
|
| token | 系统可获取、缓存、过期刷新 `tenant_access_token`;token API 失败会记录失败通知 |
|
||||||
|
| 发送 | 手动命令可向指定个人账号发送真实测试消息 |
|
||||||
|
| 通知 | 三个流程完成后均创建通知记录,并在启用配置时调用飞书消息 API |
|
||||||
|
| 判重 | 成功记录存在时,同一批次/流程/状态不重复发送;失败记录不阻止再次发送 |
|
||||||
|
| 失败隔离 | 飞书发送失败不影响业务工作流完成 |
|
||||||
|
| 页面 | 三个流程结果页或详情区域能看到通知通道、接收人、状态、时间、失败原因 |
|
||||||
|
| 问答预留 | 本地模拟命令可解析“最新/最近/批次号/工作流关键词”,返回批次摘要并记录日志 |
|
||||||
|
| 权限 | 普通用户只能查询自己的批次摘要;管理员可查全部 |
|
||||||
|
| 回归 | 文件汇总、法规核查、自动填表既有测试不回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 阶段验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| FS-0 | 准备与基线 | 确认文档和测试基线 | `python manage.py check` 与关键现有测试通过 |
|
||||||
|
| FS-1 | 数据模型与配置 | 新增通知、映射、token、问答日志模型和环境配置 | migration、模型测试通过 |
|
||||||
|
| FS-2 | 飞书 API 基础服务 | token 获取缓存、接收人解析、消息构造、消息 API client | 服务单测通过,全部 mock 外部 HTTP |
|
||||||
|
| FS-3 | 通知调度与记录 | 统一通知上下文、判重、成功/失败/disabled 落库 | 通知服务测试通过 |
|
||||||
|
| FS-4 | 三流程接入 | 自动汇总、法规核查、自动填表完成后触发通知 | 三流程通知集成测试通过 |
|
||||||
|
| FS-5 | 页面展示 | 批次详情或结果区域展示通知状态 | 页面/视图测试通过 |
|
||||||
|
| FS-6 | 手动真实测试命令 | management command 发送真实飞书测试消息 | 本地配置后可向个人账号发消息 |
|
||||||
|
| FS-7 | 问答预留能力 | 批次摘要查询、规则意图解析、模拟问答命令、问答日志 | 问答预留测试通过 |
|
||||||
|
| FS-8 | 文档与全量回归 | 更新环境变量说明,运行全量相关测试 | 回归通过,计划完成 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、FS-0 准备与基线
|
||||||
|
|
||||||
|
### FS-0-001 确认开发文档和当前工作区
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 准备 / Git |
|
||||||
|
| 前置任务 | 无 |
|
||||||
|
| 涉及文件 | 文档文件,不改代码 |
|
||||||
|
| 目标 | 确认需求、功能、数据库、详细设计和开发计划均存在,并记录当前工作区状态 |
|
||||||
|
| 开发步骤 | 1. 检查 `git status --short`;2. 确认四份设计文档与本开发计划存在;3. 确认当前未提交变更均为文档或用户已有变更;4. 不回滚任何用户变更 |
|
||||||
|
| 验收标准 | 工作区状态清楚,可进入开发 |
|
||||||
|
| 验证命令 | `git status --short` |
|
||||||
|
| Codex 执行提示 | 请先确认飞书接入四份设计文档和开发计划存在,检查工作区状态,不要回滚用户已有变更。 |
|
||||||
|
|
||||||
|
### FS-0-002 运行基线测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 回归 |
|
||||||
|
| 前置任务 | FS-0-001 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 确认开发前现有主流程可运行 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行通知相关旧测试;3. 运行三个工作流关键测试;4. 若失败,判断是否既有问题并记录 |
|
||||||
|
| 验收标准 | 基线通过,或既有失败已记录且不与本功能冲突 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_file_summary_workflow.py tests/test_regulatory_notification.py tests/test_application_form_fill_notification.py` |
|
||||||
|
| Codex 执行提示 | 请运行 Django check 和现有通知/工作流关键测试,确认开发前基线。 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、FS-1 数据模型与配置
|
||||||
|
|
||||||
|
### FS-1-001 新增飞书接入 ORM 模型测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 数据库 |
|
||||||
|
| 前置任务 | FS-0 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_models.py` |
|
||||||
|
| 目标 | 先写失败测试,覆盖飞书用户映射、token 缓存、统一通知记录、问答日志 |
|
||||||
|
| 开发步骤 | 1. 新增 `test_feishu_user_mapping_preferred_identifier`;2. 新增 `test_feishu_access_token_cache_expiry`;3. 新增 `test_workflow_notification_success_dedupe_only_success`;4. 新增 `test_feishu_question_log_records_summary_without_full_answer` |
|
||||||
|
| 验收标准 | 新测试因模型不存在而失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_models.py -q` |
|
||||||
|
| Codex 执行提示 | 请先为飞书相关模型写失败测试,覆盖接收人标识优先级、数据库 token 缓存、成功判重和问答日志摘要。 |
|
||||||
|
|
||||||
|
### FS-1-002 新增模型
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 后端 |
|
||||||
|
| 前置任务 | FS-1-001 |
|
||||||
|
| 涉及文件 | `review_agent/models.py` |
|
||||||
|
| 目标 | 新增 `FeishuUserMapping`、`FeishuAccessTokenCache`、`WorkflowNotificationRecord`、`FeishuQuestionLog` |
|
||||||
|
| 开发步骤 | 1. `FeishuUserMapping` 关联系统用户,支持 open_id、user_id、mobile、is_active;2. `FeishuAccessTokenCache` 保存 token、expires_at、app_id_hash、error_message;3. `WorkflowNotificationRecord` 保存 workflow_type、batch_id、batch_no、status、channel、target、send_status、summary、error、sent_at;4. `FeishuQuestionLog` 保存问题、意图、查询对象、回答摘要、权限结果和状态;5. 添加索引和模型方法 |
|
||||||
|
| 验收标准 | `python manage.py check` 通过 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请按数据库设计新增四个模型。注意 token 需要数据库缓存,通知判重只对 success 生效。 |
|
||||||
|
|
||||||
|
### FS-1-003 生成迁移并通过模型测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 数据库 / 测试 |
|
||||||
|
| 前置任务 | FS-1-002 |
|
||||||
|
| 涉及文件 | `review_agent/migrations/`、`tests/test_feishu_models.py` |
|
||||||
|
| 目标 | 生成 migration,模型测试全部通过 |
|
||||||
|
| 开发步骤 | 1. 运行 makemigrations;2. 检查 migration 只包含飞书相关表;3. 运行 migrate;4. 运行模型测试 |
|
||||||
|
| 验收标准 | migration 可执行,模型测试通过 |
|
||||||
|
| 验证命令 | `python manage.py makemigrations review_agent`; `python manage.py migrate`; `pytest tests/test_feishu_models.py -q` |
|
||||||
|
| Codex 执行提示 | 请生成飞书相关模型迁移并运行模型测试。 |
|
||||||
|
|
||||||
|
### FS-1-004 注册 Admin 和配置项
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后台 / 配置 |
|
||||||
|
| 前置任务 | FS-1-003 |
|
||||||
|
| 涉及文件 | `review_agent/admin.py`、`config/settings.py`、`.env.example` 或 README |
|
||||||
|
| 目标 | Admin 可维护用户映射;settings 暴露飞书配置;文档只写变量名不写真实值 |
|
||||||
|
| 开发步骤 | 1. 注册 `FeishuUserMapping`、`WorkflowNotificationRecord`、`FeishuAccessTokenCache`、`FeishuQuestionLog`;2. settings 读取 `FEISHU_NOTIFY_ENABLED`、`FEISHU_APP_ID`、`FEISHU_APP_SECRET`、`FEISHU_DEFAULT_USER_OPEN_ID`、`FEISHU_DEFAULT_USER_ID`、`FEISHU_DEFAULT_TARGET_NAME`、`PUBLIC_BASE_URL`;3. 默认 `PUBLIC_BASE_URL=http://127.0.0.1:8000`;4. 在说明文件中加入变量名和用途 |
|
||||||
|
| 验收标准 | Django check 通过;Admin 列表可展示字段 |
|
||||||
|
| 验证命令 | `python manage.py check` |
|
||||||
|
| Codex 执行提示 | 请注册飞书相关模型到 Admin,并新增环境变量配置说明,不要写入真实凭证。 |
|
||||||
|
|
||||||
|
### FS-1 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_feishu_models.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、FS-2 飞书 API 基础服务
|
||||||
|
|
||||||
|
### FS-2-001 token 服务 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 服务 |
|
||||||
|
| 前置任务 | FS-1 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_api_services.py` |
|
||||||
|
| 目标 | 先写 token 获取、缓存、过期刷新、失败记录测试 |
|
||||||
|
| 开发步骤 | 1. mock 飞书 token HTTP 成功;2. 测试首次获取后写数据库缓存;3. 测试未过期时不再请求 HTTP;4. 测试过期后重新请求;5. 测试 token API 失败返回错误对象 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_api_services.py -k token -q` |
|
||||||
|
| Codex 执行提示 | 请先写飞书 tenant_access_token 服务测试,外部 HTTP 必须 mock。 |
|
||||||
|
|
||||||
|
### FS-2-002 实现 token 服务
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 服务 |
|
||||||
|
| 前置任务 | FS-2-001 |
|
||||||
|
| 涉及文件 | `review_agent/notifications/feishu_token.py` |
|
||||||
|
| 目标 | 使用 App ID/App Secret 获取并数据库缓存 `tenant_access_token` |
|
||||||
|
| 开发步骤 | 1. 定义 `FeishuTokenResult`;2. 检查配置缺失;3. 查询未过期数据库缓存;4. 调用 token API;5. 保存 token 和 expires_at;6. token 失败时返回错误,不抛出到业务流程 |
|
||||||
|
| 验收标准 | token 服务测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_api_services.py -k token -q` |
|
||||||
|
| Codex 执行提示 | 请实现 token 服务,缓存放数据库,不打印 App Secret 和 token。 |
|
||||||
|
|
||||||
|
### FS-2-003 接收人解析和消息构造 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 服务 |
|
||||||
|
| 前置任务 | FS-2-002 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_api_services.py` |
|
||||||
|
| 目标 | 测试指定个人接收人优先级、配置缺失、富文本消息摘要 |
|
||||||
|
| 开发步骤 | 1. 测试 `FEISHU_DEFAULT_USER_OPEN_ID` 优先;2. 测试无 open_id 时使用 `FEISHU_DEFAULT_USER_ID`;3. 测试均缺失返回 `recipient_missing`;4. 测试消息包含流程、批次、状态、摘要、链接和发起人 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_api_services.py -k 'recipient or message' -q` |
|
||||||
|
| Codex 执行提示 | 请先写接收人解析和富文本消息构造测试。 |
|
||||||
|
|
||||||
|
### FS-2-004 实现接收人解析和消息构造
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 服务 |
|
||||||
|
| 前置任务 | FS-2-003 |
|
||||||
|
| 涉及文件 | `review_agent/notifications/recipient.py`、`review_agent/notifications/message_builder.py`、`review_agent/notifications/context.py` |
|
||||||
|
| 目标 | 生成统一通知上下文、指定个人接收人和飞书富文本 payload |
|
||||||
|
| 开发步骤 | 1. 定义 `NotificationContext`;2. 定义 `ResolvedFeishuTarget`;3. 实现 `resolve_configured_personal_recipient()`;4. 实现 `build_feishu_post_message()`;5. 实现 `build_message_summary()`;6. 链接使用 `PUBLIC_BASE_URL` |
|
||||||
|
| 验收标准 | 接收人和消息构造测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_api_services.py -k 'recipient or message' -q` |
|
||||||
|
| Codex 执行提示 | 请实现通知上下文、接收人解析和飞书富文本消息构造。首期不需要群 @。 |
|
||||||
|
|
||||||
|
### FS-2-005 消息 API client TDD 与实现
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 测试 |
|
||||||
|
| 前置任务 | FS-2-004 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_api_services.py`、`review_agent/notifications/feishu_message_api.py` |
|
||||||
|
| 目标 | mock 飞书消息 API,完成成功、超时、错误码、token 失效重试一次 |
|
||||||
|
| 开发步骤 | 1. 写成功测试,断言请求携带 Authorization;2. 写非 0 code 测试;3. 写超时测试;4. 写 token 失效后刷新 token 并同步重试一次测试;5. 实现 `send_personal_message()` |
|
||||||
|
| 验收标准 | 消息 API client 测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_api_services.py -q` |
|
||||||
|
| Codex 执行提示 | 请用 mock HTTP 实现飞书消息 API client。自动化测试不得请求真实飞书。 |
|
||||||
|
|
||||||
|
### FS-2 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_feishu_api_services.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、FS-3 通知调度与记录
|
||||||
|
|
||||||
|
### FS-3-001 通知记录服务 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 服务 |
|
||||||
|
| 前置任务 | FS-2 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_notification_dispatcher.py` |
|
||||||
|
| 目标 | 先写通知调度、成功判重、失败允许再次发送、disabled 记录测试 |
|
||||||
|
| 开发步骤 | 1. 测试通知关闭写 disabled;2. 测试发送成功写 success;3. 测试已有 success 时不再调用 API;4. 测试已有 failed 时允许再次调用 API;5. 测试 token 失败写 failed |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_notification_dispatcher.py -q` |
|
||||||
|
| Codex 执行提示 | 请先写统一通知调度测试,重点覆盖成功判重和失败可重试。 |
|
||||||
|
|
||||||
|
### FS-3-002 实现通知记录和 dispatcher
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 服务 |
|
||||||
|
| 前置任务 | FS-3-001 |
|
||||||
|
| 涉及文件 | `review_agent/notifications/records.py`、`review_agent/notifications/dispatcher.py` |
|
||||||
|
| 目标 | 实现统一通知调度入口 |
|
||||||
|
| 开发步骤 | 1. 实现 `already_successfully_sent(dedupe_key)`;2. 实现 disabled、success、failed 记录创建;3. 实现 `dispatch_workflow_notification(context)`;4. 捕获服务异常并写 failed;5. 不让异常冒泡阻断工作流 |
|
||||||
|
| 验收标准 | dispatcher 测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_notification_dispatcher.py -q` |
|
||||||
|
| Codex 执行提示 | 请实现统一通知调度和记录落库。注意 success 才判重,failed 不判重。 |
|
||||||
|
|
||||||
|
### FS-3 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_feishu_notification_dispatcher.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、FS-4 三流程接入
|
||||||
|
|
||||||
|
### FS-4-001 工作流 adapter TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 集成 |
|
||||||
|
| 前置任务 | FS-3 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_workflow_adapters.py` |
|
||||||
|
| 目标 | 测试自动汇总、法规核查、自动填表三类批次能生成正确通知上下文 |
|
||||||
|
| 开发步骤 | 1. 构造 `FileSummaryBatch` 和 items,断言文件摘要;2. 构造 `RegulatoryReviewBatch` 和 issues,断言风险摘要;3. 构造 `ApplicationFormFillBatch` 和 exports,断言导出/冲突摘要;4. 断言 result_url 使用 `PUBLIC_BASE_URL` |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_workflow_adapters.py -q` |
|
||||||
|
| Codex 执行提示 | 请先写三个工作流 adapter 的测试。 |
|
||||||
|
|
||||||
|
### FS-4-002 实现工作流 adapters
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 服务 |
|
||||||
|
| 前置任务 | FS-4-001 |
|
||||||
|
| 涉及文件 | `review_agent/notifications/workflow_adapters.py` |
|
||||||
|
| 目标 | 三个工作流批次转换为 `NotificationContext` |
|
||||||
|
| 开发步骤 | 1. 实现 `build_file_summary_context()`;2. 实现 `build_regulatory_review_context()`;3. 实现 `build_application_form_fill_context()`;4. 控制摘要长度;5. 处理 partial_success 和 failed |
|
||||||
|
| 验收标准 | adapter 测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_workflow_adapters.py -q` |
|
||||||
|
| Codex 执行提示 | 请实现三个工作流通知上下文 adapter。 |
|
||||||
|
|
||||||
|
### FS-4-003 接入三个工作流完成节点
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 工作流 |
|
||||||
|
| 前置任务 | FS-4-002 |
|
||||||
|
| 涉及文件 | `review_agent/file_summary/workflow.py`、`review_agent/regulatory_review/workflow.py`、`review_agent/application_form_fill/workflow.py` |
|
||||||
|
| 目标 | 三个工作流完成后调用通知 dispatcher |
|
||||||
|
| 开发步骤 | 1. 自动汇总成功/失败状态落定后触发通知;2. 法规核查报告和风险落库后触发通知;3. 自动填表 notify 节点改为统一通知服务;4. 捕获通知异常并记录非阻断错误;5. 保留现有 mock 测试兼容 |
|
||||||
|
| 验收标准 | 三流程通知集成测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_file_summary_workflow.py tests/test_regulatory_notification.py tests/test_application_form_fill_notification.py` |
|
||||||
|
| Codex 执行提示 | 请把统一通知服务接入三个工作流完成节点,通知失败不得影响业务状态。 |
|
||||||
|
|
||||||
|
### FS-4 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_feishu_workflow_adapters.py tests/test_file_summary_workflow.py tests/test_regulatory_notification.py tests/test_application_form_fill_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、FS-5 页面展示
|
||||||
|
|
||||||
|
### FS-5-001 通知状态展示测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 前端 |
|
||||||
|
| 前置任务 | FS-4 |
|
||||||
|
| 涉及文件 | `tests/test_file_summary_frontend.py`、`tests/test_regulatory_frontend.py`、`tests/test_application_form_fill_frontend.py` |
|
||||||
|
| 目标 | 测试三个流程页面或结果区域展示飞书通知状态 |
|
||||||
|
| 开发步骤 | 1. 准备 success 通知记录,断言页面出现“飞书通知已发送”;2. 准备 failed 记录,断言出现失败原因;3. 无记录时展示“暂无飞书通知记录”或不破坏页面 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_file_summary_frontend.py tests/test_regulatory_frontend.py tests/test_application_form_fill_frontend.py -k feishu` |
|
||||||
|
| Codex 执行提示 | 请先写三个流程通知状态展示测试。 |
|
||||||
|
|
||||||
|
### FS-5-002 实现通知状态 presenter 和页面展示
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 前端 |
|
||||||
|
| 前置任务 | FS-5-001 |
|
||||||
|
| 涉及文件 | `review_agent/notifications/presenter.py`、`review_agent/*/views.py`、`templates/home.html` 或相关模板 |
|
||||||
|
| 目标 | 页面展示通知状态、接收人、发送时间、失败原因 |
|
||||||
|
| 开发步骤 | 1. 实现 `get_notification_records(workflow_type, batch_id)`;2. 在三个流程视图上下文中加入通知记录;3. 模板展示最近一条通知状态;4. 保持页面无记录时兼容 |
|
||||||
|
| 验收标准 | 页面展示测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_file_summary_frontend.py tests/test_regulatory_frontend.py tests/test_application_form_fill_frontend.py -k feishu` |
|
||||||
|
| Codex 执行提示 | 请实现通知状态 presenter,并在三个流程结果页展示最近飞书通知状态。 |
|
||||||
|
|
||||||
|
### FS-5 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_file_summary_frontend.py tests/test_regulatory_frontend.py tests/test_application_form_fill_frontend.py -k feishu
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、FS-6 手动真实测试命令
|
||||||
|
|
||||||
|
### FS-6-001 测试命令 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 命令 |
|
||||||
|
| 前置任务 | FS-5 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_management_commands.py` |
|
||||||
|
| 目标 | 测试 management command 构造测试通知并调用 dispatcher |
|
||||||
|
| 开发步骤 | 1. mock dispatcher;2. 执行 `send_test_feishu_notification --username owner`;3. 断言构造测试上下文;4. 测试缺少用户时报错 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_management_commands.py -q` |
|
||||||
|
| Codex 执行提示 | 请先写真实飞书测试命令的自动化测试,dispatcher 要 mock。 |
|
||||||
|
|
||||||
|
### FS-6-002 实现发送测试消息命令
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 运维 / 命令 |
|
||||||
|
| 前置任务 | FS-6-001 |
|
||||||
|
| 涉及文件 | `review_agent/management/commands/send_test_feishu_notification.py` |
|
||||||
|
| 目标 | 本地可手动向指定个人飞书账号发送真实测试消息 |
|
||||||
|
| 开发步骤 | 1. 支持 `--username`;2. 构造 workflow_type=`manual_test` 的 `NotificationContext`;3. 调用 dispatcher;4. 输出 send_status、target、error_message;5. 不打印 token 和 App Secret |
|
||||||
|
| 验收标准 | 命令测试通过;本地配置真实凭证后可手动验证 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_management_commands.py -q`; `python manage.py send_test_feishu_notification --username owner` |
|
||||||
|
| Codex 执行提示 | 请实现发送测试飞书通知的 management command。自动测试 mock dispatcher,真实发送只手动运行。 |
|
||||||
|
|
||||||
|
### FS-6 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_feishu_management_commands.py -q
|
||||||
|
```
|
||||||
|
|
||||||
|
手动验证命令:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py send_test_feishu_notification --username owner
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、FS-7 问答预留能力
|
||||||
|
|
||||||
|
### FS-7-001 批次摘要查询服务 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 服务 |
|
||||||
|
| 前置任务 | FS-6 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_question_reserved.py` |
|
||||||
|
| 目标 | 测试按批次号、latest、工作流类型查询三个流程摘要 |
|
||||||
|
| 开发步骤 | 1. 普通用户查询自己的最新法规核查批次;2. 普通用户不能查询他人批次;3. 管理员可查全部;4. 按批次号精确查询;5. 返回状态、摘要、链接 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_question_reserved.py -k query -q` |
|
||||||
|
| Codex 执行提示 | 请先写飞书问答预留的批次摘要查询测试。 |
|
||||||
|
|
||||||
|
### FS-7-002 实现批次摘要查询服务
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 服务 |
|
||||||
|
| 前置任务 | FS-7-001 |
|
||||||
|
| 涉及文件 | `review_agent/feishu_questions/query.py`、`review_agent/feishu_questions/permissions.py` |
|
||||||
|
| 目标 | 支持三个工作流的摘要查询和权限过滤 |
|
||||||
|
| 开发步骤 | 1. 实现管理员/普通用户权限过滤;2. 实现 batch_no 查询;3. 实现 latest 查询;4. 实现 workflow_type 关键词映射;5. 返回统一摘要 dict |
|
||||||
|
| 验收标准 | 查询服务测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_question_reserved.py -k query -q` |
|
||||||
|
| Codex 执行提示 | 请实现问答预留查询服务,普通用户只能查自己的批次,管理员可查全部。 |
|
||||||
|
|
||||||
|
### FS-7-003 简单意图解析和日志 TDD
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 服务 |
|
||||||
|
| 前置任务 | FS-7-002 |
|
||||||
|
| 涉及文件 | `tests/test_feishu_question_reserved.py` |
|
||||||
|
| 目标 | 测试规则解析“最新/最近/批次号/工作流关键词”,并记录问答日志 |
|
||||||
|
| 开发步骤 | 1. 识别 `RR-`、`AFF-`、`FS-` 批次号;2. 识别“最新/最近”;3. 识别“法规核查/自动填表/自动汇总”;4. 记录 `FeishuQuestionLog`,不保存完整回答正文 |
|
||||||
|
| 验收标准 | 测试先失败 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_question_reserved.py -k 'intent or log' -q` |
|
||||||
|
| Codex 执行提示 | 请先写简单规则意图解析和问答日志测试,不接 LLM。 |
|
||||||
|
|
||||||
|
### FS-7-004 实现意图解析、问答服务和模拟命令
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 后端 / 命令 |
|
||||||
|
| 前置任务 | FS-7-003 |
|
||||||
|
| 涉及文件 | `review_agent/feishu_questions/intent.py`、`review_agent/feishu_questions/service.py`、`review_agent/management/commands/feishu_question_simulate.py` |
|
||||||
|
| 目标 | 本地模拟飞书问答输入,返回批次摘要并记录日志 |
|
||||||
|
| 开发步骤 | 1. 实现 `parse_question_intent(text)`;2. 实现 `answer_question(user, text)`;3. 写入 `FeishuQuestionLog`;4. 实现命令 `python manage.py feishu_question_simulate --username owner "查最新法规核查"`;5. 输出回答摘要 |
|
||||||
|
| 验收标准 | 问答预留测试和命令测试通过 |
|
||||||
|
| 验证命令 | `pytest tests/test_feishu_question_reserved.py -q`; `python manage.py feishu_question_simulate --username owner "查最新法规核查"` |
|
||||||
|
| Codex 执行提示 | 请实现飞书问答预留的规则解析、服务和本地模拟命令。 |
|
||||||
|
|
||||||
|
### FS-7 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_feishu_question_reserved.py -q
|
||||||
|
python manage.py feishu_question_simulate --username owner "查最新法规核查"
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、FS-8 文档与全量回归
|
||||||
|
|
||||||
|
### FS-8-001 更新配置说明
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 文档 / 配置 |
|
||||||
|
| 前置任务 | FS-7 |
|
||||||
|
| 涉及文件 | `README.md`、`.env.example` 或项目配置说明文档 |
|
||||||
|
| 目标 | 说明飞书相关环境变量和手动测试命令 |
|
||||||
|
| 开发步骤 | 1. 写明变量名和用途;2. 标注不要提交真实 App Secret;3. 写明 `send_test_feishu_notification` 用法;4. 写明自动化测试不请求真实飞书 |
|
||||||
|
| 验收标准 | 配置说明清楚,无真实密钥 |
|
||||||
|
| 验证命令 | 手动检查文档 |
|
||||||
|
| Codex 执行提示 | 请补充飞书配置说明,只写变量名和用途,不写真实值。 |
|
||||||
|
|
||||||
|
### FS-8-002 全量相关测试
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 任务类型 | 测试 / 回归 |
|
||||||
|
| 前置任务 | FS-8-001 |
|
||||||
|
| 涉及文件 | 无固定文件 |
|
||||||
|
| 目标 | 运行飞书新增测试和三个工作流关键回归 |
|
||||||
|
| 开发步骤 | 1. 运行 Django check;2. 运行飞书新增测试;3. 运行三个工作流关键测试;4. 修复与本功能相关失败;5. 记录无法处理的既有失败 |
|
||||||
|
| 验收标准 | 新增测试通过,关键回归通过 |
|
||||||
|
| 验证命令 | `python manage.py check`; `pytest tests/test_feishu_*.py tests/test_file_summary_workflow.py tests/test_regulatory_notification.py tests/test_application_form_fill_notification.py` |
|
||||||
|
| Codex 执行提示 | 请运行飞书新增测试和三个工作流关键回归,确保首期飞书接入不破坏既有功能。 |
|
||||||
|
|
||||||
|
### FS-8 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_feishu_*.py tests/test_file_summary_workflow.py tests/test_regulatory_notification.py tests/test_application_form_fill_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、建议提交切分
|
||||||
|
|
||||||
|
| 提交 | 建议提交信息 | 包含内容 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 1 | `feat: add feishu notification data models` | 模型、迁移、Admin、配置项 |
|
||||||
|
| 2 | `feat: add feishu api notification services` | token、接收人、消息构造、消息 API client |
|
||||||
|
| 3 | `feat: add workflow notification dispatcher` | dispatcher、记录判重、三流程 adapter |
|
||||||
|
| 4 | `feat: wire feishu notifications into workflows` | 三个工作流接入 |
|
||||||
|
| 5 | `feat: show feishu notification status` | 页面展示 |
|
||||||
|
| 6 | `feat: add feishu notification test command` | 真实发送测试命令 |
|
||||||
|
| 7 | `feat: add feishu question preview services` | 问答预留查询、解析、日志、模拟命令 |
|
||||||
|
| 8 | `docs: document feishu configuration` | 配置说明和回归修正 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、风险与处理策略
|
||||||
|
|
||||||
|
| 风险 | 影响 | 策略 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 飞书应用权限不足 | 消息 API 返回无权限 | 手动测试命令先验证;错误码入库展示 |
|
||||||
|
| open_id/user_id 不正确 | 个人账号收不到消息 | 接收人配置缺失或错误时记录 failed,命令输出错误 |
|
||||||
|
| token 缓存过期处理不当 | 偶发发送失败 | token 失效时刷新并允许消息 API 同步重试一次 |
|
||||||
|
| 三流程状态差异 | 通知触发点不一致 | 用 adapter 隔离各流程摘要生成 |
|
||||||
|
| 页面展示影响既有模板 | 前端回归失败 | 使用小型通知状态区块,无记录时不改变主流程展示 |
|
||||||
|
| 问答预留过度设计 | 影响首期交付 | 只做规则解析和摘要查询,不接事件订阅、不接 LLM |
|
||||||
622
docs/5.开发计划/5.第1章监管信息材料包生成.md
Normal file
622
docs/5.开发计划/5.第1章监管信息材料包生成.md
Normal file
@@ -0,0 +1,622 @@
|
|||||||
|
# 第1章监管信息材料包生成开发计划
|
||||||
|
|
||||||
|
## 文档信息
|
||||||
|
|
||||||
|
| 项目 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析文档 | docs/1.需求分析/5.第1章监管信息材料包生成.md |
|
||||||
|
| 功能设计文档 | docs/2.功能设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 数据库设计文档 | docs/3.数据库设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 详细设计文档 | docs/4.详细设计/5.第1章监管信息材料包生成.md |
|
||||||
|
| 参考开发计划 | docs/5.开发计划/3.产品关键信息提取与申报文件自动填表.md |
|
||||||
|
| 功能名称 | 第1章监管信息材料包生成 |
|
||||||
|
| 工作流编码 | regulatory_info_package |
|
||||||
|
| 批次号规则 | RIP-YYYYMMDDHHMMSS-abcdef |
|
||||||
|
| 计划日期 | 2026-06-10 |
|
||||||
|
| 计划版本 | V1.0 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、开发计划目标
|
||||||
|
|
||||||
|
本开发计划面向 Codex 执行,目标是把 `regulatory_info_package` 独立工作流按可验证、可回滚、可阶段验收的方式落地。计划以现有自动填表工作流 `application_form_fill` 为主要参考,但保持独立模块、独立批次、独立产物、独立通知和独立前端卡片。
|
||||||
|
|
||||||
|
现状裁决:当前最新代码中尚未存在 `regulatory_info_package` 正式工作流,本计划按“新建正式材料包工作流”执行;不得把该功能并入或改造 `application_form_fill`。
|
||||||
|
|
||||||
|
开发完成后,用户可在对话中上传或指定产品说明书,并通过“根据说明书生成第1章监管信息”触发工作流。系统基于 `docs/0.原始材料/第1章 监管信息` 样例模板生成 7 个监管信息文件,以 `第1章 监管信息(预生成版).zip` 作为首位下载入口,同时提供单文件和追溯 Excel 辅助下载。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、已确认开发规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 工作流独立 | 新增 `workflow_type=regulatory_info_package`,不并入 `application_form_fill` |
|
||||||
|
| 模块独立 | 新增 `review_agent/regulatory_info_package/`,服务与自动填表平级 |
|
||||||
|
| 模型集中 | Django 模型继续放在 `review_agent/models.py` |
|
||||||
|
| 节点幂等 | RIP 节点必须基于 `workflow_type + workflow_batch_id + node_code` 做幂等创建或数据库唯一约束 |
|
||||||
|
| 单说明书输入 | 用户消息指定文件名优先,其次 active 附件,再兼容最近成功文件汇总 |
|
||||||
|
| 多候选处理 | 不做选择弹窗,通过对话反问用户确认说明书文件名 |
|
||||||
|
| 模板固定 | 固定处理第1章监管信息 7 个模板 |
|
||||||
|
| 模板字段化 | 优先把模板整理为 Agent/代码可识别的字段模板,使用内容控件 Tag 或稳定占位符;代码只填字段,不依赖手工改格式 |
|
||||||
|
| 抽取策略 | 规则抽取和 LLM 抽取并行,LLM 最多重试 3 次,失败后规则结果继续 |
|
||||||
|
| 文档生成 | 工作流节点串行,`generate_docs` 节点内部每个文档独立线程处理 |
|
||||||
|
| `.doc` 策略 | CH1.9 能力驱动:探测到 Word COM/UNO 时优先原生 `.doc`,无原生能力时明确记录并允许 `.docx` 兜底 |
|
||||||
|
| zip 策略 | zip 只包含成功或兜底成功文件,失败文件不进入 zip |
|
||||||
|
| 高亮策略 | 缺失项 `/` 黄底;LLM-only 黄底;冲突黄底红字 |
|
||||||
|
| 追溯策略 | 用户下载 Excel;JSON 只写后台 logs 目录 |
|
||||||
|
| 前端策略 | 只做最小接入,不单独建设新页面或独立样式体系 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| Git 提交 | 每阶段验证通过后生成提交摘要;是否本地提交由用户确认 |
|
||||||
|
| 用户变更保护 | 不回滚、不覆盖用户已有未提交变更 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、规范依据与裁决
|
||||||
|
|
||||||
|
| 规范来源 | 命中内容 | 本计划裁决 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| GYRX 后端开发规范 | 接口响应、日志、增量规范 | 状态接口、下载权限、异常降级和日志留痕按现有 Django 模式实现 |
|
||||||
|
| GYRX 前端开发规范 | 样式复用、组件接入、下载图标建议 | 复用现有对话页和工作流卡片样式,必要时只补少量语义化样式 |
|
||||||
|
| 既有自动填表开发计划 | 阶段拆分、测试先行、每阶段验证 | 本计划沿用阶段结构和 Codex 执行提示粒度 |
|
||||||
|
| 第1章监管信息详细设计 | 独立模块、7 模板、doc 兜底、zip 首位 | 作为本计划最高优先级依据 |
|
||||||
|
|
||||||
|
未发现规范冲突。项目专项设计优先于通用规范。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、总体验收标准
|
||||||
|
|
||||||
|
| 类别 | 完成标准 |
|
||||||
|
| --- | --- |
|
||||||
|
| 触发 | 固定提示词和 LLM 路由均可触发 `regulatory_info_package` |
|
||||||
|
| 输入选择 | 能按用户指定文件名、active 附件、最近文件汇总选择说明书;多候选可反问 |
|
||||||
|
| 批次 | 能创建 `RegulatoryInfoPackageBatch`,节点和事件可查询 |
|
||||||
|
| 模板 | 能加载并校验 7 个模板配置,模板复制只写批次目录 |
|
||||||
|
| 抽取 | 规则抽取可独立跑通,LLM 失败不阻断主链路 |
|
||||||
|
| 合并 | missing、llm_only、conflict 均有可追溯结构和高亮决策 |
|
||||||
|
| docx 生成 | 6 个 `.docx` 文件能按模板生成并保留基本版式 |
|
||||||
|
| doc 处理 | CH1.9 优先 `.doc` 原生处理,失败时 `.docx` 兜底,状态可见 |
|
||||||
|
| ZIP | `第1章 监管信息(预生成版).zip` 排在助手回显首位,只包含成功/兜底成功文件 |
|
||||||
|
| 单文件 | 成功文件有辅助下载,失败文件显示原因且无下载链接 |
|
||||||
|
| 追溯 | 用户可下载 `traceability.xlsx`,JSON 写入 `logs/` |
|
||||||
|
| 前端 | 对话快捷入口、工作流卡片、状态轮询和下载列表正常 |
|
||||||
|
| 权限 | 非批次所属用户不能下载 RIP 产物 |
|
||||||
|
| 回归 | `python manage.py check` 和相关 pytest 通过,既有文件汇总/自动填表/法规核查不回归 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、阶段总览
|
||||||
|
|
||||||
|
| 阶段 | 名称 | 目标 | 阶段验收 |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| RIP-0 | 准备与基线回归 | 创建开发分支,确认依赖和既有测试状态 | 基线命令结果已记录 |
|
||||||
|
| RIP-1 | 数据模型与导出扩展 | 新增三张模型,扩展 zip 下载能力 | migration、模型和下载权限测试通过 |
|
||||||
|
| RIP-2 | 模块骨架与模板配置 | 新建模块、schema、YAML 配置和存储服务 | 配置加载和路径安全测试通过 |
|
||||||
|
| RIP-3 | 触发与工作流骨架 | 接入路由、批次创建、节点流转和状态接口 | 可创建并运行空工作流 |
|
||||||
|
| RIP-4 | 输入选择与说明书解析 | 选择说明书,解析 docx 段落、章节和表格 | 输入选择和说明书解析测试通过 |
|
||||||
|
| RIP-5 | 字段抽取与合并 | 规则 + LLM 并行抽取、重试、合并和高亮决策 | 抽取、重试、冲突合并测试通过 |
|
||||||
|
| RIP-6 | DOCX 文档生成 | 实现 6 个 docx 模板生成、产品列表重建和高亮 | docx 生成和 XML 高亮测试通过 |
|
||||||
|
| RIP-7 | CH1.9 DOC 适配 | 实现 `.doc` 原生适配探测和 `.docx` 兜底 | doc 兜底、失败隔离测试通过 |
|
||||||
|
| RIP-8 | 追溯、ZIP 与下载权限 | 生成 Excel、logs JSON、ZIP 和导出记录 | ZIP 内容、追溯、权限测试通过 |
|
||||||
|
| RIP-9 | 摘要、通知与状态归并 | 生成助手摘要,写通知记录,落定批次状态 | partial_success 等状态测试通过 |
|
||||||
|
| RIP-10 | 前端接入与总体验收 | 接入快捷入口、卡片、状态轮询和下载展示 | 前端回归和全量后端测试通过 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 六、RIP-0 准备与基线回归
|
||||||
|
|
||||||
|
### RIP-0-001 创建开发分支并确认工作区
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 创建本功能开发分支,确认当前工作区已有变更 |
|
||||||
|
| 修改范围 | Git 分支,不修改业务代码 |
|
||||||
|
| 验收标准 | 分支名符合 `codex/` 前缀;记录已有未提交变更,不回滚用户变更 |
|
||||||
|
| Codex 执行提示 | 请创建 `codex/regulatory-info-package` 开发分支,运行 `git status --short`,确认设计文档和目录重排状态,不要回滚无关变更。 |
|
||||||
|
|
||||||
|
### RIP-0-002 确认依赖与基线测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 确认 Django、python-docx、openpyxl、PyYAML、可选 Word COM 环境状态 |
|
||||||
|
| 修改范围 | 不修改业务代码 |
|
||||||
|
| 验收标准 | `python manage.py check` 可执行;关键依赖可 import;既有失败需记录 |
|
||||||
|
| Codex 执行提示 | 请运行 Django check 和关键回归测试,确认依赖可用。若发现既有失败,只记录并继续按计划隔离,不改无关代码。 |
|
||||||
|
|
||||||
|
### RIP-0 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 七、RIP-1 数据模型与导出扩展
|
||||||
|
|
||||||
|
### RIP-1-001 新增监管信息材料包 ORM 模型
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增 `RegulatoryInfoPackageBatch`、`RegulatoryInfoPackageArtifact`、`RegulatoryInfoPackageNotificationRecord` |
|
||||||
|
| 修改范围 | `review_agent/models.py` |
|
||||||
|
| 验收标准 | 字段、枚举、索引、软删除、关联关系符合数据库设计 |
|
||||||
|
| Codex 执行提示 | 请按 `docs/3.数据库设计/5.第1章监管信息材料包生成.md` 新增三张模型,模型集中放在 `review_agent/models.py`,不要新增字段级数据库表。 |
|
||||||
|
|
||||||
|
### RIP-1-002 扩展导出类型和下载 MIME
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | `ExportedSummaryFile.ExportType` 增加 `zip`,下载 MIME 支持 `.zip`、`.doc`、`.docx` |
|
||||||
|
| 修改范围 | `review_agent/models.py`、`review_agent/file_summary/views.py` |
|
||||||
|
| 验收标准 | zip 可下载;doc/docx MIME 正确;原有导出不回归 |
|
||||||
|
| Codex 执行提示 | 请扩展 `ExportedSummaryFile` 导出类型,并在下载接口按 workflow_type 和文件后缀处理权限与 content type。 |
|
||||||
|
|
||||||
|
### RIP-1-003 生成迁移并补模型测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成数据库迁移并覆盖基础模型行为 |
|
||||||
|
| 修改范围 | `review_agent/migrations/`、`tests/` |
|
||||||
|
| 验收标准 | migration 可应用;模型测试覆盖批次号、状态、artifact、通知、zip export type |
|
||||||
|
| Codex 执行提示 | 请生成迁移并新增 `tests/test_regulatory_info_package_models.py`,优先覆盖模型字段默认值、导出类型,以及 `WorkflowNodeRun` 在 RIP 批次下的幂等/唯一节点创建。 |
|
||||||
|
|
||||||
|
### RIP-1 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_models.py tests/test_file_summary_views.py -k download
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 八、RIP-2 模块骨架与模板配置
|
||||||
|
|
||||||
|
### RIP-2-001 创建 regulatory_info_package 模块骨架
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增独立模块目录和基础文件 |
|
||||||
|
| 修改范围 | `review_agent/regulatory_info_package/` |
|
||||||
|
| 验收标准 | 模块可 import;不影响现有 `application_form_fill` |
|
||||||
|
| Codex 执行提示 | 请创建详细设计中的模块骨架,先放常量、schema、storage、events、workflow 空实现和 service 包,不提前写复杂业务。 |
|
||||||
|
|
||||||
|
### RIP-2-002 编写模板配置 YAML
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 配置 7 个样例模板、输出文件名、策略、字段 Tag/占位符映射和 `.doc` 标记 |
|
||||||
|
| 修改范围 | `review_agent/regulatory_info_package/templates/regulatory_info_package_templates_v1.yaml` |
|
||||||
|
| 验收标准 | 7 个模板完整;zip 名称为 `第1章 监管信息(预生成版).zip`;字段映射优先使用内容控件 Tag 或稳定占位符 |
|
||||||
|
| Codex 执行提示 | 请按详细设计录入模板配置,source_dir 指向样例目录,字段 targets 优先写 content_control_tag 或 placeholder;CH1.9 声明 `prefer_legacy_doc_native: true` 且允许 docx fallback。 |
|
||||||
|
|
||||||
|
### RIP-2-003 实现配置加载、模板仓库和存储目录
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 实现 YAML 加载校验、模板复制、批次目录创建、路径安全检查 |
|
||||||
|
| 修改范围 | `template_config.py`、`template_repository.py`、`storage.py` |
|
||||||
|
| 验收标准 | 配置错误可返回清晰错误;模板只复制到批次目录;不写原始材料目录;能审计模板是否包含所需 Tag/占位符 |
|
||||||
|
| Codex 执行提示 | 请实现配置加载、模板复制和模板字段审计服务,所有路径必须校验位于批次工作目录内,原始模板目录只读。 |
|
||||||
|
|
||||||
|
### RIP-2-004 模板字段化整理与审计
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 将样例模板升级为代码友好的字段模板,不手工改生成文件格式 |
|
||||||
|
| 修改范围 | `docs/0.原始材料/第1章 监管信息` 的模板副本或 `review_agent/regulatory_info_package/templates/field_manifest.yaml` |
|
||||||
|
| 验收标准 | CH1.4 关键字段、复选框、声明类产品名/申请人位置有稳定 Tag 或占位符;审计缺失字段时测试失败 |
|
||||||
|
| Codex 执行提示 | 请优先使用 Word 内容控件 Tag;若暂不具备内容控件编辑能力,则使用不会影响版式的稳定占位符,并在配置中记录字段与目标位置。 |
|
||||||
|
|
||||||
|
### RIP-2 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_template_config.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 九、RIP-3 触发与工作流骨架
|
||||||
|
|
||||||
|
### RIP-3-001 扩展意图路由
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 新增 `regulatory_info_package` action,支持固定关键词和 LLM 路由 |
|
||||||
|
| 修改范围 | `review_agent/skill_router.py` |
|
||||||
|
| 验收标准 | 固定提示词直接命中;LLM action 列表包含本工作流;原路由不回归 |
|
||||||
|
| Codex 执行提示 | 请扩展意图路由,新增 `starts_regulatory_info_package` 标记,避免破坏 file_summary、regulatory_review 和 application_form_fill。 |
|
||||||
|
|
||||||
|
### RIP-3-002 实现批次创建和节点初始化
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 创建批次、生成节点、记录事件 |
|
||||||
|
| 修改范围 | `workflow.py`、`events.py`、`constants.py` |
|
||||||
|
| 验收标准 | 可创建 `RIP-...` 批次;节点按定义初始化;事件可查询 |
|
||||||
|
| Codex 执行提示 | 请实现批次创建和节点初始化,workflow_type 必须写 `regulatory_info_package`。 |
|
||||||
|
|
||||||
|
### RIP-3-003 实现执行器骨架和状态接口
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 工作流节点可完整流转,status 接口可返回批次、节点、导出和风险信息 |
|
||||||
|
| 修改范围 | `workflow.py`、`views.py`、`urls.py` 或现有 URL 注册文件 |
|
||||||
|
| 验收标准 | 空工作流可从 pending 到 completed;状态接口校验用户权限 |
|
||||||
|
| Codex 执行提示 | 请先实现可运行的空工作流骨架,业务节点可以临时 no-op,但状态流转和权限必须真实。 |
|
||||||
|
|
||||||
|
### RIP-3-004 接入对话启动逻辑
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | `stream_message` 能启动本工作流或返回说明书反问 |
|
||||||
|
| 修改范围 | `review_agent/services.py` |
|
||||||
|
| 验收标准 | 触发后发送 `workflow_started`;无输入或多候选时不误启动 |
|
||||||
|
| Codex 执行提示 | 请在 `stream_message` 增加 regulatory_info_package 分支,先调用输入选择服务,再决定启动、提示上传或反问。 |
|
||||||
|
|
||||||
|
### RIP-3 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_trigger.py tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_views.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十、RIP-4 输入选择与说明书解析
|
||||||
|
|
||||||
|
### RIP-4-001 实现说明书输入选择
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 按用户消息、active 附件、最近汇总批次选择说明书 |
|
||||||
|
| 修改范围 | `services/input_select.py` |
|
||||||
|
| 验收标准 | 文件名模糊匹配、唯一 docx、多个说明书、无说明书均有明确结果 |
|
||||||
|
| Codex 执行提示 | 请实现 `select_instruction_input`,多候选返回 waiting_user 语义,由对话反问用户确认具体文件名。 |
|
||||||
|
|
||||||
|
### RIP-4-002 实现说明书 docx 解析
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 读取说明书段落、章节、表格、组成成分表和 front_text |
|
||||||
|
| 修改范围 | `services/instruction_extract.py` |
|
||||||
|
| 验收标准 | 能解析 `目标产品说明书.docx` 的产品名称、章节和主要表格结构 |
|
||||||
|
| Codex 执行提示 | 请使用结构化 Word 解析能力,不用脆弱的纯字符串拼接;解析结果写入可序列化 schema。 |
|
||||||
|
|
||||||
|
### RIP-4-003 写入说明书抽取日志产物
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 保存 `logs/instruction_extract.json` 并创建 artifact |
|
||||||
|
| 修改范围 | `workflow.py`、`storage.py`、`instruction_extract.py` |
|
||||||
|
| 验收标准 | JSON 只在后台 logs 目录,不进入用户下载列表 |
|
||||||
|
| Codex 执行提示 | 请在 text_extract 节点保存说明书抽取 JSON,artifact 可记录,但不要创建 ExportedSummaryFile。 |
|
||||||
|
|
||||||
|
### RIP-4 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_input_select.py tests/test_regulatory_info_package_instruction_extract.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十一、RIP-5 字段抽取与合并
|
||||||
|
|
||||||
|
### RIP-5-001 实现规则字段抽取
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 从说明书章节和表格中抽取产品名称、包装规格、预期用途、组成、储存条件、样本类型、适用仪器、标准号等 |
|
||||||
|
| 修改范围 | `services/field_extract.py` |
|
||||||
|
| 验收标准 | 不依赖 LLM 时可抽取关键字段并支撑 demo |
|
||||||
|
| Codex 执行提示 | 请优先实现规则抽取,抽取结果包含 value、evidence、confidence 和 source。 |
|
||||||
|
|
||||||
|
### RIP-5-002 实现 LLM 抽取封装和三次重试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | LLM 结构化抽取,失败最多重试 3 次,失败后不阻断 |
|
||||||
|
| 修改范围 | `services/field_extract.py`、`prompts/field_extract.md` |
|
||||||
|
| 验收标准 | 0s/1s/2s 重试;解析失败可记录错误;规则结果继续 |
|
||||||
|
| Codex 执行提示 | 请封装 LLM 调用为可 mock 的函数,测试中不要真实调用外部模型。 |
|
||||||
|
|
||||||
|
### RIP-5-003 实现规则与 LLM 并行抽取
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 使用线程并行执行规则抽取和 LLM 抽取 |
|
||||||
|
| 修改范围 | `services/field_extract.py` |
|
||||||
|
| 验收标准 | 任一分支失败不影响另一分支结果;输出 `field_extract_result.json` |
|
||||||
|
| Codex 执行提示 | 请使用 `ThreadPoolExecutor(max_workers=2)`,不要在子线程直接写数据库。 |
|
||||||
|
|
||||||
|
### RIP-5-004 实现字段合并和高亮决策
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 输出 missing、llm_only、conflict 和最终写入值 |
|
||||||
|
| 修改范围 | `services/field_merge.py` |
|
||||||
|
| 验收标准 | 全缺失写 `/` 黄底;LLM-only 黄底;冲突黄底红字;合并结果可追溯 |
|
||||||
|
| Codex 执行提示 | 请实现 `MergedField` 结构,合并结果写 `logs/merged_fields.json`,并同步批次摘要字段。 |
|
||||||
|
|
||||||
|
### RIP-5 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_field_extract.py tests/test_regulatory_info_package_field_merge.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十二、RIP-6 DOCX 文档生成
|
||||||
|
|
||||||
|
### RIP-6-001 实现 DocxDocumentAdapter
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 支持段落/表格替换、表格单元格填充、黄色底色、红字 |
|
||||||
|
| 修改范围 | `services/document_writer.py`、`services/docx_document.py` |
|
||||||
|
| 验收标准 | 可处理 run 拆分;测试可检查 docx XML 高亮和红字 |
|
||||||
|
| Codex 执行提示 | 请优先支持本模板需要的替换和表格填充场景,复杂通用 Word 引擎不要过度设计。 |
|
||||||
|
|
||||||
|
### RIP-6-002 实现 6 个 DOCX 文件生成策略
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 CH1.2、CH1.4、CH1.5、CH1.11.1、CH1.11.5、CH1.11.6 |
|
||||||
|
| 修改范围 | `services/package_generate.py`、`services/standard_candidates.py` |
|
||||||
|
| 验收标准 | 6 个 docx 文件可生成;缺失/LLM-only/冲突样式正确 |
|
||||||
|
| Codex 执行提示 | 请先完成 docx 主链路。CH1.5 产品列表必须转成样例表头:包装规格、货号、组成、组分、主要组成成分、规格/数量,其中货号 `/` 黄底。 |
|
||||||
|
|
||||||
|
### RIP-6-003 实现 generate_docs 内部并发
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 每个文档独立线程生成,主线程统一写 artifact/export |
|
||||||
|
| 修改范围 | `services/package_generate.py`、`workflow.py` |
|
||||||
|
| 验收标准 | 单个文件失败不影响其他文件;返回 `GeneratedFileResult` 列表 |
|
||||||
|
| Codex 执行提示 | 请使用独立模板副本,子线程不要写数据库;所有异常转成文件级 failed 状态。 |
|
||||||
|
|
||||||
|
### RIP-6 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_docx_writer.py tests/test_regulatory_info_package_package_generate.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十三、RIP-7 CH1.9 DOC 适配
|
||||||
|
|
||||||
|
### RIP-7-001 实现 LegacyDocDocumentAdapter 能力探测
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 探测 Word COM、LibreOffice UNO 或可用兜底能力 |
|
||||||
|
| 修改范围 | `services/legacy_doc_document.py` |
|
||||||
|
| 验收标准 | 当前环境无原生能力时返回清晰 capability,不崩溃;测试不要求本机必须安装 Word 或 LibreOffice |
|
||||||
|
| Codex 执行提示 | 请先实现能力探测和接口骨架,Windows Word COM/LibreOffice UNO 可作为原生能力;不可用时明确进入 docx 兜底。 |
|
||||||
|
|
||||||
|
### RIP-7-002 实现 CH1.9 原生写入与 docx 兜底
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | CH1.9 优先 `.doc` 输出,失败时生成同语义 `.docx` |
|
||||||
|
| 修改范围 | `legacy_doc_document.py`、`package_generate.py` |
|
||||||
|
| 验收标准 | 有原生能力时原生成功状态 success;无原生能力或原生失败但兜底成功时状态 fallback_success;两者失败不进入 zip |
|
||||||
|
| Codex 执行提示 | 请把能力探测、原生失败和兜底失败都写入 `adapter_summary` 和 `risk_notes`,不要静默转换。 |
|
||||||
|
|
||||||
|
### RIP-7-003 补充 doc 适配器测试
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 覆盖 capability、兜底成功、失败隔离 |
|
||||||
|
| 修改范围 | `tests/test_regulatory_info_package_legacy_doc.py` |
|
||||||
|
| 验收标准 | 测试不依赖本机必须安装 Word;用 mock 覆盖原生成功/失败 |
|
||||||
|
| Codex 执行提示 | 请用 mock 模拟 Word COM 可用和不可用场景,保证 CI 或本地无 Word 时测试仍稳定。 |
|
||||||
|
|
||||||
|
### RIP-7 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_legacy_doc.py tests/test_regulatory_info_package_package_generate.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十四、RIP-8 追溯、ZIP 与下载权限
|
||||||
|
|
||||||
|
### RIP-8-001 实现追溯 Excel 和后台 JSON
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 `exports/traceability.xlsx` 和 `logs/traceability.json` |
|
||||||
|
| 修改范围 | `services/traceability_export.py` |
|
||||||
|
| 验收标准 | Excel 可下载;JSON 不进入用户下载列表 |
|
||||||
|
| Codex 执行提示 | 请用 openpyxl 生成 Excel,字段包含 target_file、target_field、final_value、extraction_source、evidence、highlight_reason、needs_review。 |
|
||||||
|
|
||||||
|
### RIP-8-002 实现 zip 打包
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 生成 `第1章 监管信息(预生成版).zip` |
|
||||||
|
| 修改范围 | `services/zip_export.py` |
|
||||||
|
| 验收标准 | zip 只包含 success/fallback_success 文件;失败文件不入包 |
|
||||||
|
| Codex 执行提示 | 请用 Python 标准库 `zipfile` 打包,zip 中保留最终输出文件名。CH1.9 兜底成功时放入 `.docx` 文件。 |
|
||||||
|
|
||||||
|
### RIP-8-003 创建导出记录和下载权限
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | zip、单文件、Excel 均写 `ExportedSummaryFile`;下载接口校验用户权限 |
|
||||||
|
| 修改范围 | `file_summary/views.py`、`storage.py`、`zip_export.py` |
|
||||||
|
| 验收标准 | 非批次用户不能下载;zip 在 exports 返回顺序中排首位 |
|
||||||
|
| Codex 执行提示 | 请按 `workflow_type=regulatory_info_package` 反查批次所属 conversation/user,软删除批次不可下载。 |
|
||||||
|
|
||||||
|
### RIP-8 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_traceability.py tests/test_regulatory_info_package_zip.py tests/test_regulatory_info_package_views.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十五、RIP-9 摘要、通知与状态归并
|
||||||
|
|
||||||
|
### RIP-9-001 实现助手 Markdown 摘要
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 完成后返回 zip 首位、单文件列表、失败原因、待确认摘要 |
|
||||||
|
| 修改范围 | `services/summary.py`、`workflow.py` |
|
||||||
|
| 验收标准 | zip 链接在回复首位;失败文件显示原因且无下载;待确认数量准确 |
|
||||||
|
| Codex 执行提示 | 请严格按详细设计生成助手摘要,partial_success 时也要展示可下载 zip 和失败文件原因。 |
|
||||||
|
|
||||||
|
### RIP-9-002 实现通知记录和统一通知接入
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 写 `RegulatoryInfoPackageNotificationRecord`,调用统一通知服务 |
|
||||||
|
| 修改范围 | `services/notifier.py`、`workflow.py` |
|
||||||
|
| 验收标准 | 通知失败不阻断下载;失败写 `risk_notes` |
|
||||||
|
| Codex 执行提示 | 请复用已有通知模式,先保证本地测试可 mock;不要让外部通知失败影响批次主状态。 |
|
||||||
|
|
||||||
|
### RIP-9-003 完成状态归并
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 根据生成结果、zip、追溯、通知落定 success/partial_success/failed/waiting_user |
|
||||||
|
| 修改范围 | `workflow.py` |
|
||||||
|
| 验收标准 | 7 文件成功为 success;部分文件失败但有 zip 为 partial_success;全部失败为 failed |
|
||||||
|
| Codex 执行提示 | 请把状态归并集中在一个函数,测试覆盖 docx 兜底、zip 失败、通知失败、产品名缺失。 |
|
||||||
|
|
||||||
|
### RIP-9 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pytest tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十六、RIP-10 前端接入与总体验收
|
||||||
|
|
||||||
|
### RIP-10-001 增加对话快捷入口
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 对话框底部增加“第1章监管信息”快捷提示 |
|
||||||
|
| 修改范围 | `templates/home.html` |
|
||||||
|
| 验收标准 | 点击后填入或发送 `根据说明书生成第1章监管信息` |
|
||||||
|
| Codex 执行提示 | 请复用现有 tool-chip 样式,不单独创建新前端样式文件,除非现有结构无法展示。 |
|
||||||
|
|
||||||
|
### RIP-10-002 工作流卡片和状态轮询支持
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 前端识别 `regulatory_info_package`,使用新 status URL 轮询 |
|
||||||
|
| 修改范围 | `static/js/app.js`、`templates/home.html` |
|
||||||
|
| 验收标准 | 卡片能展示节点、状态、风险和导出列表;终态识别 success/partial_success/failed/waiting_user |
|
||||||
|
| Codex 执行提示 | 请在现有工作流卡片逻辑中增量接入,不复制一套新卡片实现。 |
|
||||||
|
|
||||||
|
### RIP-10-003 下载展示和失败文件展示
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | zip 首位展示,单文件辅助下载,失败文件展示原因 |
|
||||||
|
| 修改范围 | `static/js/app.js` |
|
||||||
|
| 验收标准 | exports 返回顺序被保留;失败文件无下载按钮;traceability.xlsx 可下载 |
|
||||||
|
| Codex 执行提示 | 请以后端 exports 顺序为准,不新增 `is_primary` 字段;zip 已由后端排首位。 |
|
||||||
|
|
||||||
|
### RIP-10-004 总体验收与回归
|
||||||
|
|
||||||
|
| 项 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 目标 | 全链路验证和回归保护 |
|
||||||
|
| 修改范围 | 测试、必要的 bug fix |
|
||||||
|
| 验收标准 | Django check、RIP 测试、关键既有测试通过;能用样例说明书生成材料包 |
|
||||||
|
| Codex 执行提示 | 请用 `docs/0.原始材料/目标产品说明书.docx` 做端到端验证,确认 zip、单文件、Excel、logs 和摘要均符合设计。 |
|
||||||
|
|
||||||
|
### RIP-10 阶段验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python manage.py check
|
||||||
|
pytest tests/test_regulatory_info_package_frontend.py
|
||||||
|
pytest tests/test_regulatory_info_package_models.py tests/test_regulatory_info_package_trigger.py tests/test_regulatory_info_package_input_select.py tests/test_regulatory_info_package_template_config.py tests/test_regulatory_info_package_instruction_extract.py tests/test_regulatory_info_package_field_extract.py tests/test_regulatory_info_package_field_merge.py tests/test_regulatory_info_package_docx_writer.py tests/test_regulatory_info_package_legacy_doc.py tests/test_regulatory_info_package_package_generate.py tests/test_regulatory_info_package_traceability.py tests/test_regulatory_info_package_zip.py tests/test_regulatory_info_package_workflow.py tests/test_regulatory_info_package_views.py tests/test_regulatory_info_package_notification.py
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十七、测试分层要求
|
||||||
|
|
||||||
|
| 测试层 | 覆盖内容 | 建议文件 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 模型测试 | 批次、产物、通知、zip 导出类型 | `tests/test_regulatory_info_package_models.py` |
|
||||||
|
| 路由测试 | 固定关键词、LLM action、对话启动分支 | `tests/test_regulatory_info_package_trigger.py` |
|
||||||
|
| 输入测试 | 文件名匹配、active 附件、多候选反问 | `tests/test_regulatory_info_package_input_select.py` |
|
||||||
|
| 配置测试 | YAML 加载、模板缺失、code 唯一 | `tests/test_regulatory_info_package_template_config.py` |
|
||||||
|
| 解析测试 | 说明书章节、表格、组成成分表 | `tests/test_regulatory_info_package_instruction_extract.py` |
|
||||||
|
| 抽取测试 | 规则抽取、LLM 重试、失败降级 | `tests/test_regulatory_info_package_field_extract.py` |
|
||||||
|
| 合并测试 | missing、llm_only、conflict | `tests/test_regulatory_info_package_field_merge.py` |
|
||||||
|
| 文档测试 | docx 替换、表格、高亮、红字 | `tests/test_regulatory_info_package_docx_writer.py` |
|
||||||
|
| doc 测试 | adapter 探测、docx 兜底、失败状态 | `tests/test_regulatory_info_package_legacy_doc.py` |
|
||||||
|
| 生成测试 | 7 文件并发生成、异常隔离 | `tests/test_regulatory_info_package_package_generate.py` |
|
||||||
|
| 追溯测试 | Excel 下载、logs JSON | `tests/test_regulatory_info_package_traceability.py` |
|
||||||
|
| zip 测试 | 只打包 success/fallback_success | `tests/test_regulatory_info_package_zip.py` |
|
||||||
|
| 工作流测试 | 节点流转、状态归并、partial_success | `tests/test_regulatory_info_package_workflow.py` |
|
||||||
|
| 接口测试 | start/status/download 权限 | `tests/test_regulatory_info_package_views.py` |
|
||||||
|
| 通知测试 | 通知记录、通知失败降级 | `tests/test_regulatory_info_package_notification.py` |
|
||||||
|
| 前端测试 | chip、卡片、状态 URL、下载展示 | `tests/test_regulatory_info_package_frontend.py` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十八、Codex 自动化执行规则
|
||||||
|
|
||||||
|
| 规则 | 内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 顺序执行 | 必须从 RIP-0 到 RIP-10 顺序执行,不得跳阶段 |
|
||||||
|
| 阶段聚焦 | 当前阶段失败时先修复当前阶段,不继续后续阶段 |
|
||||||
|
| TDD | 新行为先写失败测试,再实现 |
|
||||||
|
| 小步修改 | 每次只修改当前阶段相关文件,避免顺手重构 |
|
||||||
|
| 用户变更保护 | 不得回滚或覆盖用户已有未提交变更 |
|
||||||
|
| 过程日志 | 每阶段记录关键命令结果和既有失败 |
|
||||||
|
| 阶段验证 | 每阶段完成后运行对应验证命令 |
|
||||||
|
| 阶段提交 | 每阶段验证通过后生成提交摘要;是否执行 `git commit` 由用户确认 |
|
||||||
|
| 回归保护 | 文件汇总、法规核查、自动填表现有测试不得回归 |
|
||||||
|
| doc 风险隔离 | `.doc` 原生能力不可用或原生处理失败不得阻断其他 6 个 docx 文件生成 |
|
||||||
|
| 外部依赖隔离 | LLM、通知、Word COM 均需可 mock,测试不依赖真实外部服务 |
|
||||||
|
| 下载安全 | 所有导出下载必须通过所属用户权限校验 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 十九、推荐目标模式提示词
|
||||||
|
|
||||||
|
后续可直接对 Codex 输入:
|
||||||
|
|
||||||
|
```text
|
||||||
|
请按 docs/5.开发计划/5.第1章监管信息材料包生成.md 执行开发。
|
||||||
|
|
||||||
|
执行要求:
|
||||||
|
1. 严格按 RIP-0 到 RIP-10 顺序推进,不跳阶段。
|
||||||
|
2. 每阶段先读对应需求、功能、数据库、详细设计文档。
|
||||||
|
3. 每阶段先写或补充测试,再实现代码。
|
||||||
|
4. 每阶段只修改当前阶段相关文件,不做无关重构。
|
||||||
|
5. 不回滚、不覆盖用户已有未提交变更。
|
||||||
|
6. LLM、通知、Word COM 等外部能力必须可 mock。
|
||||||
|
7. 每阶段完成后运行该阶段验证命令。
|
||||||
|
8. 验证通过后生成提交摘要,是否本地提交等待用户确认。
|
||||||
|
9. 最后使用 docs/0.原始材料/目标产品说明书.docx 做端到端验收。
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二十、待执行前检查清单
|
||||||
|
|
||||||
|
| 检查项 | 状态 |
|
||||||
|
| --- | --- |
|
||||||
|
| 需求分析、功能设计、数据库设计、详细设计均已存在 | 待执行时确认 |
|
||||||
|
| 当前分支是否适合创建开发分支 | 待执行时确认 |
|
||||||
|
| 是否存在用户未提交变更 | 待执行时确认 |
|
||||||
|
| `python-docx`、`openpyxl`、`PyYAML` 是否可用 | 待执行时确认 |
|
||||||
|
| Word COM 或 LibreOffice UNO 是否可用 | 待执行时确认,非阻塞 |
|
||||||
|
| 目标说明书 `docs/0.原始材料/目标产品说明书.docx` 是否存在 | 待执行时确认 |
|
||||||
|
| 样例模板目录 `docs/0.原始材料/第1章 监管信息` 是否完整 | 待执行时确认 |
|
||||||
|
| 现有文件汇总、法规核查、自动填表测试是否通过 | 待执行时确认 |
|
||||||
|
|
||||||
53
docs/6.待办计划/第二阶段暂缓事项.md
Normal file
53
docs/6.待办计划/第二阶段暂缓事项.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# 第二阶段暂缓事项待办表
|
||||||
|
|
||||||
|
## 一、待办原则
|
||||||
|
|
||||||
|
以下事项不进入第二阶段第一批或第二批落地范围。完成 Demo 主任务后,再根据展示效果和剩余时间决定是否进入第三阶段。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、第三阶段第一批建议事项
|
||||||
|
|
||||||
|
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| TODO-3-001 | 真实飞书 CLI/API 接入 | 第二阶段通知能力 | P0 | 替换第二阶段 mock 通知,支持真实发送 |
|
||||||
|
| TODO-3-002 | 用户与飞书账号映射 | 第二阶段通知能力 | P0 | 维护 Django User 到飞书 open_id、手机号或邮箱的映射 |
|
||||||
|
| TODO-3-003 | 飞书通知模板和失败重试完善 | 第二阶段通知能力 | P0 | 支持风险摘要、报告链接、重试、失败告警 |
|
||||||
|
| TODO-3-004 | 飞书通知权限和脱敏策略 | 第二阶段通知能力 | P1 | 通知中不暴露完整敏感文件内容 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、规则管理后续事项
|
||||||
|
|
||||||
|
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| TODO-RULE-001 | 规则管理前端 | YAML + DB 规则版本 | P1 | 展示 YAML 与数据库 hash 差异,支持人工确认导入 |
|
||||||
|
| TODO-RULE-002 | 规则导入审批流 | 合规追溯 | P1 | 规则版本变更需要审批和留痕 |
|
||||||
|
| TODO-RULE-003 | 规则/RAG 状态管理页 | RAG 运维 | P1 | 展示规则版本、YAML hash、Chroma 索引版本、索引状态和重建提示 |
|
||||||
|
| TODO-RULE-004 | RAG 索引重建前端入口 | RAG 运维 | P1 | 前端触发或提示重建法规 RAG 索引 |
|
||||||
|
| TODO-RULE-005 | 官网法规定期更新 | 原始需求法规来源 | P2 | 后续从 NMPA/CMDE 官网定期抓取或人工导入 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、原始需求 3 后续事项
|
||||||
|
|
||||||
|
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| TODO-FILL-001 | 字段级数据库表 | 第三批自动填表数据库设计 | P1 | 后续新增 `ApplicationFormFillField`,支持字段级查询、人工修改、审计和统计 |
|
||||||
|
| TODO-FILL-002 | PDF 转换与版式 QA | 第三批自动填表详细设计 | P1 | 使用 LibreOffice/soffice 将填好的 Word 转 PDF,并增加页数非 0、逐页截图或版式差异检查 |
|
||||||
|
| TODO-FILL-003 | `.doc` 模板预转换管理 | 第三批自动填表模板处理 | P1 | 将变更注册(备案)文件和安全和性能基本原则清单预转换为 `.docx` 工作模板,并人工确认版式 |
|
||||||
|
| TODO-FILL-004 | 安全和性能基本原则清单完整条目拆解 | 第三批自动填表模板配置 | P1 | 拆解清单条目编号、原则内容、适用性栏、证据栏和证明文件位置栏,写入 YAML 配置 |
|
||||||
|
| TODO-FILL-005 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff,供人工复核 |
|
||||||
|
| TODO-FILL-006 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前支持人工确认或二次审批 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、其他增强事项
|
||||||
|
|
||||||
|
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| TODO-EXT-001 | 无汇总批次时自动串联文件汇总 | 第二阶段启动方式 | P2 | 当前口径为提示用户先自动汇总,暂不自动串联 |
|
||||||
|
| TODO-EXT-002 | 文件夹上传增强 | 第一阶段边界 | P2 | 浏览器 `webkitdirectory` 或目录上传能力 |
|
||||||
|
| TODO-EXT-003 | Office 精确分页 | 第一阶段边界 | P2 | 引入 LibreOffice headless 转 PDF 后统计页数 |
|
||||||
|
| TODO-EXT-004 | OCR 文本抽取 | 章节/一致性核查增强 | P2 | 支持扫描件和图片型 PDF |
|
||||||
|
| TODO-EXT-005 | 独立 Chroma Server 部署 | RAG 运维增强 | P2 | 当前第二阶段使用本地持久化 ChromaDB,后续可演进为独立服务 |
|
||||||
311
docs/7.汇报材料/架构搭建思路汇报稿.md
Normal file
311
docs/7.汇报材料/架构搭建思路汇报稿.md
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
# 架构搭建思路汇报稿(基于 Demo 版)
|
||||||
|
|
||||||
|
## 一、设计路径:先锁规格,再实现代码
|
||||||
|
|
||||||
|
各位老师好,我本次 Demo 搭建的是一个面向体外诊断试剂注册资料准备与审核的智能体原型。
|
||||||
|
|
||||||
|
这次开发没有直接从代码开始,而是采用“文档先行、规格锁定、再实现代码”的路径。原因是注册资料审核不是一个简单问答场景,它涉及文件解析、法规规则、RAG 依据、工作流状态、导出文件、人工确认和整改闭环。如果一开始就写代码,很容易出现功能能跑但边界不清、结果不可追溯、后续难维护的问题。
|
||||||
|
|
||||||
|
所以整体设计路径分为四步:
|
||||||
|
|
||||||
|
```text
|
||||||
|
需求拆解
|
||||||
|
-> 生成需求分析、功能设计、详细设计、数据库设计和开发计划
|
||||||
|
-> 用文档锁定实现规格
|
||||||
|
-> 按规格实现 Django 代码、工作流、前端页面和测试
|
||||||
|
```
|
||||||
|
|
||||||
|
当前仓库中可以看到完整的规格文档链路:
|
||||||
|
|
||||||
|
| 阶段 | 产物 | 作用 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 需求分析 | `docs/1.需求分析` | 明确业务目标、用户动作、输入输出和异常场景 |
|
||||||
|
| 功能设计 | `docs/2.功能设计` | 把需求拆成文件汇总、法规核查、自动填表、飞书通知等模块 |
|
||||||
|
| 详细设计 | `docs/3.详细设计` | 锁定工作流节点、字段结构、状态流转和服务边界 |
|
||||||
|
| 数据库设计 | `docs/4.数据库设计` | 锁定批次、附件、节点、风险项、导出文件等模型 |
|
||||||
|
| 开发计划 | `docs/5.开发计划` | 将实现拆成可验证的开发任务和前端线框图 |
|
||||||
|
|
||||||
|
因此,这个 Demo 的核心不是“让大模型临时回答一个问题”,而是先用文档定义清楚系统应该如何工作,再把这些规格落实到代码、数据库、前端和测试中。最终形成的是一个可追溯、可复核、可继续扩展的审核工作台。
|
||||||
|
|
||||||
|
## 二、系统定位和 Demo 目标
|
||||||
|
|
||||||
|
这个 Demo 的目标不是简单做文件上传、文件解析或法规问答,而是把注册资料审核中几个高频、耗时、容易出错的环节串成一个智能工作流,包括:
|
||||||
|
|
||||||
|
```text
|
||||||
|
资料上传
|
||||||
|
-> 文件目录和页数汇总
|
||||||
|
-> NMPA 法规完整性核查
|
||||||
|
-> 法规依据 RAG 检索
|
||||||
|
-> 产品关键信息抽取
|
||||||
|
-> 一致性核查和风险预警
|
||||||
|
-> 申报文件自动填表
|
||||||
|
-> 报告导出和整改复核
|
||||||
|
```
|
||||||
|
|
||||||
|
从产品形态上看,它更像是一个“注册资料审核工作台”。用户上传一批申报资料后,系统先把资料包结构化,再按法规规则做核查,然后输出风险清单、整改建议、证据来源和导出文件。后续还可以继续复用抽取到的产品信息,自动填入申报模板。
|
||||||
|
|
||||||
|
## 三、技术栈和总体架构
|
||||||
|
|
||||||
|
本 Demo 采用轻量、可本地运行、便于测试和可解释的技术栈。
|
||||||
|
|
||||||
|
| 层级 | 技术/工具 | 作用 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| Web 框架 | Django | 路由、视图、模板、认证、ORM 和后台能力 |
|
||||||
|
| 数据库 | SQLite / Django ORM | Demo 阶段保存会话、附件、批次、节点、风险项和导出文件 |
|
||||||
|
| 前端 | Django Template + 原生 JS + CSS | 实现首页工作台、审核智能体、知识库管理、附件管理和流式对话 |
|
||||||
|
| 文件解析 | `pypdf`、`python-docx`、`python-pptx`、`openpyxl`、`xlrd`、`py7zr`、`zipfile` | 解析 PDF、Word、PPT、Excel、压缩包和旧 Office 文件 |
|
||||||
|
| 规则配置 | YAML | 维护 NMPA 体外诊断试剂注册资料核查规则 |
|
||||||
|
| RAG | ChromaDB + embedding provider | 构建法规材料向量索引,检索法规依据片段 |
|
||||||
|
| LLM | SiliconFlow / 可配置大模型接口 | 做意图路由、低置信度抽取、自然语言总结和辅助复核 |
|
||||||
|
| 流式交互 | SSE | 将工作流启动、节点进度和模型回复实时推给前端 |
|
||||||
|
| 自动化验证 | pytest + Django test client | 验证路由、页面、模型、工作流和导出结果 |
|
||||||
|
|
||||||
|
整体架构可以概括为:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户界面
|
||||||
|
-> Django 视图层
|
||||||
|
-> 对话服务和 Skill 路由器
|
||||||
|
-> 文件汇总 / 法规核查 / 自动填表工作流
|
||||||
|
-> ORM 状态记录和导出文件
|
||||||
|
-> RAG/LLM/规则服务
|
||||||
|
-> 前端工作流卡片和报告下载
|
||||||
|
```
|
||||||
|
|
||||||
|
这里的关键设计原则是:规则判断要稳定,RAG 负责补证据,LLM 做辅助,不把高风险合规结论完全交给大模型自由发挥。
|
||||||
|
|
||||||
|
## 四、对话流程:先识别意图,再决定 RAG 或工作流
|
||||||
|
|
||||||
|
审核智能体页面不是单纯把用户输入直接发给大模型,而是有一层对话编排流程。
|
||||||
|
|
||||||
|
一次用户消息进入系统后,大致会经历以下步骤:
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户输入
|
||||||
|
-> 保存用户消息
|
||||||
|
-> Skill Router 判断意图
|
||||||
|
-> 根据意图选择普通问答、附件读取或工作流
|
||||||
|
-> 必要时先检查附件和前置批次
|
||||||
|
-> 启动对应工作流或执行 RAG 问答
|
||||||
|
-> 保存助手回复和工作流事件
|
||||||
|
-> 前端通过 SSE 展示增量内容和节点状态
|
||||||
|
```
|
||||||
|
|
||||||
|
当前路由动作包括:
|
||||||
|
|
||||||
|
| action | 场景 | 后续动作 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| `normal_chat` | 普通法规问答或项目问答 | 先检索知识库,再把 RAG 片段放入大模型上下文 |
|
||||||
|
| `attachment_reader` | 用户要求阅读、提取、总结上传附件 | 调用附件读取 Skill,返回文件内容摘要 |
|
||||||
|
| `file_summary` | 用户要求汇总文件目录、页数、清单 | 启动文件汇总工作流 |
|
||||||
|
| `regulatory_review` | 用户要求法规核查、完整性核查、风险预警、整改建议 | 必要时先生成文件汇总批次,再启动法规核查工作流 |
|
||||||
|
| `application_form_fill` | 用户要求申报文件填表、模板填充、安全和性能清单 | 必要时先生成文件汇总批次,再启动自动填表工作流 |
|
||||||
|
|
||||||
|
也就是说,普通问题是“先 RAG,再回答”;工作流问题是“先路由,再检查前置条件,再启动工作流”。例如用户问“注册检验报告要求是什么”,系统会走 RAG 问答;用户说“请对当前资料做法规核查”,系统会进入法规核查工作流。
|
||||||
|
|
||||||
|
## 五、Skill 调用方式:路由器统一调度工具能力
|
||||||
|
|
||||||
|
Demo 中的 Skill 不是一个单独页面,而是对话服务后面的工具调用机制。用户不需要手动选择复杂功能,系统会根据用户话语和当前附件状态判断是否调用某个 Skill 或工作流。
|
||||||
|
|
||||||
|
当前实现中,`review_agent/skill_router.py` 负责意图路由。它采用两层判断:
|
||||||
|
|
||||||
|
```text
|
||||||
|
确定性规则预判
|
||||||
|
-> LLM 路由判断
|
||||||
|
-> 规则兜底
|
||||||
|
```
|
||||||
|
|
||||||
|
第一层是确定性规则。例如用户输入中包含“法规核查”“NMPA 核查”“风险预警”“自动填表”“申报模板”等明确关键词,系统可以直接判断要启动对应工作流。这样可以避免每次都依赖大模型判断。
|
||||||
|
|
||||||
|
第二层是 LLM 路由。系统会把用户消息和当前 active 附件列表发给路由模型,让模型只输出结构化 JSON:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"action": "regulatory_review",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"reason": "用户要求对当前注册资料进行法规完整性核查"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
第三层是规则兜底。如果 LLM 不可用、配置缺失或返回异常,系统会退回关键词和附件状态判断,保证 Demo 在本地环境也能稳定运行。
|
||||||
|
|
||||||
|
这个设计的好处是:用户体验上像是在和一个智能体对话,技术实现上则是由路由器把对话分发到不同工具、不同工作流和不同数据服务。
|
||||||
|
|
||||||
|
## 六、RAG 方式:法规依据和用户知识库共同参与
|
||||||
|
|
||||||
|
RAG 在 Demo 中有两类来源:
|
||||||
|
|
||||||
|
| 来源 | 说明 |
|
||||||
|
| --- | --- |
|
||||||
|
| 内置法规材料 | 来自 `docs/0.原始材料` 和 NMPA 相关法规文件,用于法规依据检索 |
|
||||||
|
| 用户管理知识库 | 由用户在“知识库管理”页面上传,可作为当前账号所有对话的补充知识 |
|
||||||
|
|
||||||
|
法规材料会被切分为文本块,写入 ChromaDB 向量库。每个 chunk 保留来源文件、chunk 编号、文本片段和元数据。embedding 支持真实语义 embedding,也支持 deterministic/local embedding,后者主要用于测试和 dry run。
|
||||||
|
|
||||||
|
RAG 在系统中的定位有两种:
|
||||||
|
|
||||||
|
### 1. 普通问答中的 RAG
|
||||||
|
|
||||||
|
如果用户提出普通问题,系统会先检索知识库,把命中的法规片段或用户知识库片段拼入上下文,再调用大模型回答。这样回答不会只依赖模型记忆,而是带有本地法规材料和用户资料依据。
|
||||||
|
|
||||||
|
```text
|
||||||
|
用户问题
|
||||||
|
-> 知识库检索
|
||||||
|
-> 过滤和排序相关片段
|
||||||
|
-> 组装为知识上下文
|
||||||
|
-> 调用 LLM 生成回答
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. 工作流中的 RAG
|
||||||
|
|
||||||
|
在法规核查工作流里,RAG 不直接决定是否合规,而是为规则判断补充法规依据。例如结构化规则已经判断“缺少注册检验报告”,RAG 再检索相关法规要求,给出来源文件和依据片段。
|
||||||
|
|
||||||
|
这种方式避免了“让大模型自由判断合规”的不稳定性,同时让报告具备可解释依据。
|
||||||
|
|
||||||
|
## 七、三条核心工作流
|
||||||
|
|
||||||
|
当前 Demo 拆成三条主链路:文件汇总、法规核查、自动填表。
|
||||||
|
|
||||||
|
### 1. 文件汇总链路
|
||||||
|
|
||||||
|
对应模块:`review_agent/file_summary`
|
||||||
|
|
||||||
|
```text
|
||||||
|
文件上传
|
||||||
|
-> 附件固化
|
||||||
|
-> 压缩包解压
|
||||||
|
-> 文件扫描
|
||||||
|
-> 页数统计
|
||||||
|
-> 产品名识别
|
||||||
|
-> Markdown/Excel 报告输出
|
||||||
|
```
|
||||||
|
|
||||||
|
这个链路负责把原始资料包转换成结构化文件清单。系统会生成 `FileSummaryBatch` 和 `FileSummaryItem`,后续法规核查和自动填表都复用这套文件清单,不再重复扫描资料。
|
||||||
|
|
||||||
|
输出字段包括序号、目录层级、文件名、文件类型、页数、相对路径、统计状态、重试次数和异常说明。
|
||||||
|
|
||||||
|
### 2. 法规核查链路
|
||||||
|
|
||||||
|
对应模块:`review_agent/regulatory_review`
|
||||||
|
|
||||||
|
```text
|
||||||
|
准备资料
|
||||||
|
-> 适用条件确认
|
||||||
|
-> 规则范围裁剪
|
||||||
|
-> 完整性核查
|
||||||
|
-> 文本抽取
|
||||||
|
-> 章节核查
|
||||||
|
-> 一致性核查
|
||||||
|
-> RAG 法规依据补充
|
||||||
|
-> 风险评估
|
||||||
|
-> 报告输出
|
||||||
|
-> 整改复核
|
||||||
|
```
|
||||||
|
|
||||||
|
这条链路使用 `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml` 作为结构化规则文件。规则中配置了附件 4 的资料要求,包括监管信息、综述资料、非临床资料、临床评价资料、说明书和标签样稿、质量管理体系文件等。
|
||||||
|
|
||||||
|
系统会检查是否缺少关键资料,例如注册申请表、符合性声明、产品技术要求、注册检验报告、说明书、标签样稿、临床评价资料和质量管理体系文件。缺失项会转成 `RegulatoryIssue`,并按阻断项、高风险、中风险、低风险和提示项分级。
|
||||||
|
|
||||||
|
### 3. 自动填表链路
|
||||||
|
|
||||||
|
对应模块:`review_agent/application_form_fill`
|
||||||
|
|
||||||
|
```text
|
||||||
|
准备资料
|
||||||
|
-> 模板选择
|
||||||
|
-> 模板复制
|
||||||
|
-> 字段抽取
|
||||||
|
-> 冲突归并
|
||||||
|
-> Word 填写
|
||||||
|
-> 追溯清单导出
|
||||||
|
-> 结果通知
|
||||||
|
```
|
||||||
|
|
||||||
|
这条链路会复用前面抽取到的产品信息,自动选择申报模板,并将字段填入 Word 模板。对于冲突字段,Demo 中采用明确的归并策略,同时在结果中保留冲突摘要和来源追溯。
|
||||||
|
|
||||||
|
## 八、页面和数据工作台
|
||||||
|
|
||||||
|
前端目前包括四个主要页面:
|
||||||
|
|
||||||
|
| 页面 | URL | 作用 |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| 首页工作台 | `/` | 展示对话、附件、知识库、批次状态和最近处理记录 |
|
||||||
|
| 审核智能体 | `/chat/` | 对话、上传附件、启动工作流、查看节点进度 |
|
||||||
|
| 知识库管理 | `/knowledge-base/` | 管理用户上传知识库、查看内置法规材料和索引状态 |
|
||||||
|
| 附件管理 | `/attachments/` | 管理不同对话下的上传附件、版本、启用状态和下载 |
|
||||||
|
|
||||||
|
首页工作台重点不是营销展示,而是运行态数据,包括:
|
||||||
|
|
||||||
|
```text
|
||||||
|
对话总数
|
||||||
|
附件总数
|
||||||
|
知识库材料数
|
||||||
|
执行中批次
|
||||||
|
已处理批次
|
||||||
|
成功批次
|
||||||
|
等待确认批次
|
||||||
|
失败批次
|
||||||
|
最近处理记录
|
||||||
|
```
|
||||||
|
|
||||||
|
知识库材料中同时统计用户管理文档和内置法规材料,避免把“知识库”误解成只包含用户上传文件。
|
||||||
|
|
||||||
|
## 九、过程留痕和可追溯设计
|
||||||
|
|
||||||
|
审核类系统不能只输出一个结论,还必须说明结论从哪里来。因此 Demo 对关键过程都做了结构化留痕。
|
||||||
|
|
||||||
|
| 过程 | 留痕内容 |
|
||||||
|
| --- | --- |
|
||||||
|
| 对话 | 用户消息、助手消息、会话标题、更新时间 |
|
||||||
|
| 附件 | 原始文件名、版本号、启用状态、存储路径、文件大小 |
|
||||||
|
| 文件汇总 | 批次号、文件明细、页数、统计状态、异常说明 |
|
||||||
|
| 工作流节点 | 节点编码、节点名称、进度、状态、错误信息 |
|
||||||
|
| 法规核查 | 规则编码、缺失项、风险等级、证据、整改建议 |
|
||||||
|
| RAG 检索 | 来源文件、片段文本、相似度、chunk 元数据 |
|
||||||
|
| 自动填表 | 字段来源、冲突摘要、模板选择、追溯清单 |
|
||||||
|
| 导出文件 | Markdown、Excel、JSON、Word 等结果文件 |
|
||||||
|
|
||||||
|
这保证了 Demo 输出的结果不是一次性回答,而是可以复核、下载、整改和继续追踪的过程资产。
|
||||||
|
|
||||||
|
## 十、Demo 可展示结果
|
||||||
|
|
||||||
|
本次 Demo 可以展示以下核心结果:
|
||||||
|
|
||||||
|
### 1. 文件目录汇总表
|
||||||
|
|
||||||
|
用户上传注册资料文件夹、散装文件或压缩包后,系统自动完成附件固化、解压、扫描和页数统计,最终生成 Markdown 汇总报告和 Excel 明细表。
|
||||||
|
|
||||||
|
### 2. 法规完整性报告
|
||||||
|
|
||||||
|
系统基于文件汇总结果和 NMPA 规则库做完整性核查,输出 Markdown 法规核查报告、Excel 问题清单和 JSON 结构化结果包。
|
||||||
|
|
||||||
|
### 3. 产品关键信息提取对照表
|
||||||
|
|
||||||
|
系统从说明书、产品技术要求、注册检验报告、申请表等文件中抽取产品名称、型号规格、预期用途、管理类别、分类编码、注册类型和临床评价路径,并保留来源文件和证据片段。
|
||||||
|
|
||||||
|
### 4. 风险预警列表
|
||||||
|
|
||||||
|
系统把完整性缺失、章节异常、字段冲突、文本抽取失败、页数不可确定、通知失败等问题统一沉淀为风险项,并按阻断项、高风险、中风险、低风险和提示项分级。
|
||||||
|
|
||||||
|
### 5. 申报文件自动填表结果
|
||||||
|
|
||||||
|
系统根据资料内容和适用条件选择模板,自动填充 Word 文件,并导出字段追溯清单,说明每个字段来自哪个文件、哪个证据片段。
|
||||||
|
|
||||||
|
## 十一、总结
|
||||||
|
|
||||||
|
整体来看,本 Demo 的架构搭建思路可以概括为:
|
||||||
|
|
||||||
|
```text
|
||||||
|
先用文档锁定规格
|
||||||
|
再用规则结构化审核逻辑
|
||||||
|
再用 RAG 补充法规依据
|
||||||
|
再用 Skill Router 调度工具和工作流
|
||||||
|
再用 ORM 和导出文件沉淀过程资产
|
||||||
|
最后通过工作台页面呈现状态和结果
|
||||||
|
```
|
||||||
|
|
||||||
|
它体现的是一个“资料输入、规则判断、证据追溯、风险输出、整改闭环”的智能体原型。
|
||||||
|
|
||||||
|
当前 Demo 已经完成了首页工作台、审核智能体对话、附件管理、知识库管理、文件汇总、法规核查、RAG 依据检索、风险预警、报告导出和自动填表主链路。后续如果继续增强,可以重点补充 OCR、扫描件识别、复杂 PDF 版式解析、规则后台维护、人工确认界面、飞书真实消息闭环,以及更完整的多智能体编排能力。
|
||||||
|
|
||||||
|
最终希望这个智能体能够从一个 Demo 原型,逐步演进为注册资料准备和审核过程中的智能协作平台。
|
||||||
34
docs/README.md
Normal file
34
docs/README.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Documentation Index
|
||||||
|
|
||||||
|
This directory keeps the working documents for DEMO-AGENT V2. The docs are organized by project phase rather than by code module.
|
||||||
|
|
||||||
|
## Main Sections
|
||||||
|
|
||||||
|
| Directory | Purpose |
|
||||||
|
| --- | --- |
|
||||||
|
| `0.原始材料/` | Source materials, templates, sample instructions, regulatory references |
|
||||||
|
| `1.需求分析/` | Requirement analysis for each workflow |
|
||||||
|
| `2.功能设计/` | Functional design and user-facing behavior |
|
||||||
|
| `3.数据库设计/` | Data model and persistence design |
|
||||||
|
| `4.详细设计/` | Module-level design, services, workflow details |
|
||||||
|
| `5.开发计划/` | Implementation plans and staged delivery notes |
|
||||||
|
| `6.待办计划/` | Deferred items |
|
||||||
|
| `7.汇报材料/` | Presentation and reporting material |
|
||||||
|
|
||||||
|
## Workflow Documents
|
||||||
|
|
||||||
|
| Workflow | Requirement | Functional Design | Detailed Design | Plan |
|
||||||
|
| --- | --- | --- | --- | --- |
|
||||||
|
| 自动汇总 | `1.需求分析/1.自动汇总.md` | `2.功能设计/1.自动汇总.md` | `4.详细设计/1.自动汇总.md` | `5.开发计划/1.自动汇总.md` |
|
||||||
|
| NMPA 注册资料法规核查 | `1.需求分析/2.NMPA注册资料法规核查与整改闭环.md` | `2.功能设计/2.NMPA注册资料法规核查与整改闭环.md` | `4.详细设计/2.NMPA注册资料法规核查与整改闭环.md` | `5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md` |
|
||||||
|
| 申报文件自动填表 | `1.需求分析/3.产品关键信息提取与申报文件自动填表.md` | `2.功能设计/3.产品关键信息提取与申报文件自动填表.md` | `4.详细设计/3.产品关键信息提取与申报文件自动填表.md` | `5.开发计划/3.产品关键信息提取与申报文件自动填表.md` |
|
||||||
|
| 飞书通知与问答 | `1.需求分析/4.飞书通知与问答接入.md` | `2.功能设计/4.飞书通知与问答接入.md` | `4.详细设计/4.飞书通知与问答接入.md` | `5.开发计划/4.飞书通知与问答接入.md` |
|
||||||
|
| 第 1 章监管信息材料包 | `1.需求分析/5.第1章监管信息材料包生成.md` | `2.功能设计/5.第1章监管信息材料包生成.md` | `4.详细设计/5.第1章监管信息材料包生成.md` | `5.开发计划/5.第1章监管信息材料包生成.md` |
|
||||||
|
|
||||||
|
## Maintenance Notes
|
||||||
|
|
||||||
|
- Keep README-level docs aligned with current `master`.
|
||||||
|
- When a workflow changes behavior, update the requirement/design/plan document closest to that behavior.
|
||||||
|
- Do not paste secrets from `.env` into docs.
|
||||||
|
- Prefer concrete file paths, command examples, and verification notes over broad prose.
|
||||||
|
|
||||||
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[pytest]
|
||||||
|
DJANGO_SETTINGS_MODULE = config.settings
|
||||||
|
python_files = tests.py test_*.py *_tests.py
|
||||||
@@ -1 +1,12 @@
|
|||||||
Django>=5.0,<6.0
|
Django>=5.0,<6.0
|
||||||
|
pypdf>=5.0
|
||||||
|
python-docx>=1.1
|
||||||
|
python-pptx>=1.0
|
||||||
|
openpyxl>=3.1
|
||||||
|
xlrd>=2.0
|
||||||
|
olefile>=0.47
|
||||||
|
py7zr>=0.21
|
||||||
|
playwright>=1.60
|
||||||
|
PyYAML>=6.0
|
||||||
|
chromadb>=0.5
|
||||||
|
httpx>=0.27
|
||||||
|
|||||||
74
review_agent/admin.py
Normal file
74
review_agent/admin.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from django.contrib import admin
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
FeishuAccessTokenCache,
|
||||||
|
FeishuQuestionLog,
|
||||||
|
FeishuUserMapping,
|
||||||
|
WorkflowNotificationRecord,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(FeishuUserMapping)
|
||||||
|
class FeishuUserMappingAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
"system_user",
|
||||||
|
"feishu_display_name",
|
||||||
|
"feishu_open_id",
|
||||||
|
"feishu_user_id",
|
||||||
|
"feishu_mobile",
|
||||||
|
"is_active",
|
||||||
|
"updated_at",
|
||||||
|
)
|
||||||
|
list_filter = ("is_active",)
|
||||||
|
search_fields = (
|
||||||
|
"system_user__username",
|
||||||
|
"feishu_display_name",
|
||||||
|
"feishu_open_id",
|
||||||
|
"feishu_user_id",
|
||||||
|
"feishu_mobile",
|
||||||
|
)
|
||||||
|
readonly_fields = ("created_at", "updated_at")
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(FeishuAccessTokenCache)
|
||||||
|
class FeishuAccessTokenCacheAdmin(admin.ModelAdmin):
|
||||||
|
list_display = ("app_id_hash", "expires_at", "updated_at", "has_error")
|
||||||
|
search_fields = ("app_id_hash", "error_message")
|
||||||
|
readonly_fields = ("created_at", "updated_at")
|
||||||
|
|
||||||
|
@admin.display(boolean=True, description="有错误")
|
||||||
|
def has_error(self, obj: FeishuAccessTokenCache) -> bool:
|
||||||
|
return bool(obj.error_message)
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(WorkflowNotificationRecord)
|
||||||
|
class WorkflowNotificationRecordAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
"workflow_type",
|
||||||
|
"workflow_batch_no",
|
||||||
|
"workflow_status",
|
||||||
|
"channel",
|
||||||
|
"send_status",
|
||||||
|
"target",
|
||||||
|
"sent_at",
|
||||||
|
"created_at",
|
||||||
|
)
|
||||||
|
list_filter = ("workflow_type", "channel", "send_status", "workflow_status")
|
||||||
|
search_fields = ("workflow_batch_no", "dedupe_key", "target", "error_message")
|
||||||
|
readonly_fields = ("created_at", "updated_at")
|
||||||
|
|
||||||
|
|
||||||
|
@admin.register(FeishuQuestionLog)
|
||||||
|
class FeishuQuestionLogAdmin(admin.ModelAdmin):
|
||||||
|
list_display = (
|
||||||
|
"system_user",
|
||||||
|
"source_type",
|
||||||
|
"intent",
|
||||||
|
"permission_result",
|
||||||
|
"status",
|
||||||
|
"processed_at",
|
||||||
|
"created_at",
|
||||||
|
)
|
||||||
|
list_filter = ("source_type", "intent", "permission_result", "status")
|
||||||
|
search_fields = ("system_user__username", "question_text", "answer_summary", "message_id")
|
||||||
|
readonly_fields = ("created_at",)
|
||||||
1
review_agent/application_form_fill/__init__.py
Normal file
1
review_agent/application_form_fill/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Application form auto-fill workflow package."""
|
||||||
36
review_agent/application_form_fill/constants.py
Normal file
36
review_agent/application_form_fill/constants.py
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
WORKFLOW_TYPE = "application_form_fill"
|
||||||
|
|
||||||
|
TEMPLATE_REGISTRATION_CERTIFICATE = "registration_certificate"
|
||||||
|
TEMPLATE_CHANGE_REGISTRATION = "change_registration"
|
||||||
|
TEMPLATE_ESSENTIAL_PRINCIPLES = "essential_principles"
|
||||||
|
|
||||||
|
DEFAULT_OUTPUT_TYPES = ["word", "excel", "json"]
|
||||||
|
|
||||||
|
FORM_FILL_TRIGGER_KEYWORDS = [
|
||||||
|
"填注册证",
|
||||||
|
"对应的表格",
|
||||||
|
"生成申报模板",
|
||||||
|
"安全和性能基本原则清单",
|
||||||
|
"填到申报模板",
|
||||||
|
"自动填表",
|
||||||
|
"生成表格",
|
||||||
|
"申报文件模板",
|
||||||
|
"申报文件填表",
|
||||||
|
"产品关键信息",
|
||||||
|
"字段来源追溯清单",
|
||||||
|
"注册证 word",
|
||||||
|
]
|
||||||
|
|
||||||
|
FORM_FILL_NODE_DEFINITIONS = [
|
||||||
|
("prepare", "准备资料", "form_fill"),
|
||||||
|
("template_select", "选择模板", "form_fill"),
|
||||||
|
("template_copy", "复制模板", "form_fill"),
|
||||||
|
("field_extract", "抽取字段", "form_fill"),
|
||||||
|
("conflict_merge", "冲突归并", "form_fill"),
|
||||||
|
("word_fill", "填写 Word", "form_fill"),
|
||||||
|
("pdf_convert", "转换 PDF", "form_fill"),
|
||||||
|
("trace_export", "追溯清单", "form_fill"),
|
||||||
|
("output_export", "输出下载", "form_fill"),
|
||||||
|
("notify", "飞书通知", "form_fill"),
|
||||||
|
("completed", "完成", "completed"),
|
||||||
|
]
|
||||||
27
review_agent/application_form_fill/events.py
Normal file
27
review_agent/application_form_fill/events.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.constants import WORKFLOW_TYPE
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, WorkflowEvent
|
||||||
|
|
||||||
|
|
||||||
|
def record_event(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
event_type: str,
|
||||||
|
payload: dict | None = None,
|
||||||
|
) -> WorkflowEvent:
|
||||||
|
return WorkflowEvent.objects.create(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
conversation=batch.conversation,
|
||||||
|
event_type=event_type,
|
||||||
|
payload=payload or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"id": event.pk,
|
||||||
|
"event_type": event.event_type,
|
||||||
|
"payload": event.payload,
|
||||||
|
"created_at": event.created_at.isoformat(),
|
||||||
|
}
|
||||||
23
review_agent/application_form_fill/prompts/field_extract.md
Normal file
23
review_agent/application_form_fill/prompts/field_extract.md
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
你是医疗器械体外诊断试剂申报资料字段抽取助手。
|
||||||
|
|
||||||
|
请只输出 JSON 对象,不要输出 Markdown。结构如下:
|
||||||
|
|
||||||
|
{
|
||||||
|
"fields": [
|
||||||
|
{
|
||||||
|
"key": "product_name",
|
||||||
|
"label": "产品名称",
|
||||||
|
"value": "字段值",
|
||||||
|
"source_file": "来源文件名",
|
||||||
|
"source_role": "说明书",
|
||||||
|
"evidence": "原文证据",
|
||||||
|
"confidence": 0.8
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"checklist_items": []
|
||||||
|
}
|
||||||
|
|
||||||
|
要求:
|
||||||
|
- 只抽取输入模板字段中出现的信息。
|
||||||
|
- 字段值必须来自资料原文,不要编造。
|
||||||
|
- 找不到时不要输出该字段。
|
||||||
58
review_agent/application_form_fill/schemas.py
Normal file
58
review_agent/application_form_fill/schemas.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, ExportedSummaryFile, FileSummaryBatch, RegulatoryReviewBatch
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TemplateSpec:
|
||||||
|
code: str
|
||||||
|
name: str
|
||||||
|
source_file: str
|
||||||
|
output_label: str
|
||||||
|
applies_when: dict[str, Any]
|
||||||
|
file_format: str
|
||||||
|
fields: list[dict[str, Any]]
|
||||||
|
checklist_items: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ExtractedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
source_role: str
|
||||||
|
evidence: str
|
||||||
|
extractor: str
|
||||||
|
confidence: float
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class MergedField:
|
||||||
|
key: str
|
||||||
|
label: str
|
||||||
|
value: str
|
||||||
|
source_file: str
|
||||||
|
evidence: str
|
||||||
|
confidence: float
|
||||||
|
has_conflict: bool = False
|
||||||
|
conflict_values: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FormFillContext:
|
||||||
|
batch: ApplicationFormFillBatch
|
||||||
|
source_summary_batch: FileSummaryBatch
|
||||||
|
source_regulatory_batch: RegulatoryReviewBatch | None
|
||||||
|
template_config: dict[str, Any] = field(default_factory=dict)
|
||||||
|
selected_templates: list[TemplateSpec] = field(default_factory=list)
|
||||||
|
document_texts: dict[str, str] = field(default_factory=dict)
|
||||||
|
regex_results: dict[str, Any] = field(default_factory=dict)
|
||||||
|
llm_results: dict[str, Any] = field(default_factory=dict)
|
||||||
|
merged_fields: dict[str, MergedField] = field(default_factory=dict)
|
||||||
|
checklist_items: dict[str, Any] = field(default_factory=dict)
|
||||||
|
conflicts: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
exports: list[ExportedSummaryFile] = field(default_factory=list)
|
||||||
1
review_agent/application_form_fill/services/__init__.py
Normal file
1
review_agent/application_form_fill/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Application form auto-fill services."""
|
||||||
278
review_agent/application_form_fill/services/field_extract.py
Normal file
278
review_agent/application_form_fill/services/field_extract.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.schemas import ExtractedField, TemplateSpec
|
||||||
|
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
|
||||||
|
from review_agent.llm import generate_completion
|
||||||
|
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch, FileSummaryBatch
|
||||||
|
from review_agent.regulatory_review.services.text_extract import extract_text
|
||||||
|
|
||||||
|
|
||||||
|
FIELD_ALIASES = {
|
||||||
|
"product_name": ["产品名称"],
|
||||||
|
"applicant_name": ["注册人名称", "申请人名称", "生产企业名称"],
|
||||||
|
"applicant_address": ["注册人住所", "申请人住所", "生产企业住所"],
|
||||||
|
"manufacturer_address": ["生产地址", "生产企业地址", "生产场所"],
|
||||||
|
"agent_name": ["代理人名称", "生产企业名称", "注册人名称", "申请人名称"],
|
||||||
|
"agent_address": ["代理人住所", "生产企业住所", "注册人住所", "申请人住所"],
|
||||||
|
"package_specification": ["包装规格", "规格"],
|
||||||
|
"main_components": ["主要组成成分", "主要组成", "组成成分"],
|
||||||
|
"intended_use": ["预期用途"],
|
||||||
|
"storage_condition_and_validity": ["产品储存条件及有效期", "储存条件及有效期", "储存条件", "有效期"],
|
||||||
|
}
|
||||||
|
|
||||||
|
STATIC_STOP_LABELS = [
|
||||||
|
"申请人",
|
||||||
|
"国家药品监督管理局",
|
||||||
|
"填表说明",
|
||||||
|
"注",
|
||||||
|
"保证书",
|
||||||
|
"应附资料",
|
||||||
|
"优先通道申请",
|
||||||
|
"分类编码",
|
||||||
|
"医疗器械唯一标识",
|
||||||
|
"注册产品目前是否",
|
||||||
|
"临床评价路径",
|
||||||
|
"临床试验",
|
||||||
|
"其他需要说明的问题",
|
||||||
|
"国家药监局器审中心医疗器械",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def collect_document_texts(summary_batch: FileSummaryBatch) -> dict[str, str]:
|
||||||
|
texts: dict[str, str] = {}
|
||||||
|
for item in summary_batch.items.order_by("file_index"):
|
||||||
|
path = Path(item.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / item.storage_path
|
||||||
|
if not path.exists():
|
||||||
|
continue
|
||||||
|
result = extract_text(path)
|
||||||
|
if result.status == "success" and result.text:
|
||||||
|
texts[item.file_name] = result.text
|
||||||
|
return texts
|
||||||
|
|
||||||
|
|
||||||
|
def extract_by_rules(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||||
|
fields: list[dict[str, Any]] = []
|
||||||
|
field_defs = _field_defs(specs)
|
||||||
|
labels = _all_field_labels(field_defs)
|
||||||
|
for file_name, text in texts.items():
|
||||||
|
source_role = detect_source_role(file_name, text)
|
||||||
|
for field in field_defs:
|
||||||
|
value, evidence = _extract_field_value(text, field, labels)
|
||||||
|
if not value:
|
||||||
|
continue
|
||||||
|
fields.append(
|
||||||
|
ExtractedField(
|
||||||
|
key=field["key"],
|
||||||
|
label=field["label"],
|
||||||
|
value=value,
|
||||||
|
source_file=file_name,
|
||||||
|
source_role=source_role,
|
||||||
|
evidence=evidence,
|
||||||
|
extractor="rule",
|
||||||
|
confidence=0.75 if source_role == "说明书" else 0.65,
|
||||||
|
).__dict__
|
||||||
|
)
|
||||||
|
return {"fields": fields, "checklist_items": []}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_by_llm(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
raw = generate_completion(
|
||||||
|
[
|
||||||
|
{"role": "system", "content": _prompt_text()},
|
||||||
|
{"role": "user", "content": _build_llm_user_prompt(texts, specs)},
|
||||||
|
],
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
payload = _parse_json_object(raw)
|
||||||
|
except Exception as exc:
|
||||||
|
return {"fields": [], "checklist_items": [], "error_message": str(exc)}
|
||||||
|
|
||||||
|
fields = []
|
||||||
|
allowed_keys = {field["key"] for field in _field_defs(specs)}
|
||||||
|
for item in payload.get("fields") or []:
|
||||||
|
if not isinstance(item, dict) or item.get("key") not in allowed_keys or not item.get("value"):
|
||||||
|
continue
|
||||||
|
fields.append(
|
||||||
|
{
|
||||||
|
"key": str(item.get("key") or ""),
|
||||||
|
"label": str(item.get("label") or item.get("key") or ""),
|
||||||
|
"value": str(item.get("value") or "").strip(),
|
||||||
|
"source_file": str(item.get("source_file") or ""),
|
||||||
|
"source_role": str(item.get("source_role") or detect_source_role(str(item.get("source_file") or ""), "")),
|
||||||
|
"evidence": str(item.get("evidence") or "").strip(),
|
||||||
|
"extractor": "llm",
|
||||||
|
"confidence": _float_confidence(item.get("confidence"), default=0.7),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return {"fields": fields, "checklist_items": payload.get("checklist_items") or []}
|
||||||
|
|
||||||
|
|
||||||
|
def run_parallel_extract(texts: dict[str, str], specs: list[TemplateSpec]) -> dict[str, Any]:
|
||||||
|
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||||
|
rule_future = executor.submit(extract_by_rules, texts, specs)
|
||||||
|
llm_future = executor.submit(extract_by_llm, texts, specs)
|
||||||
|
regex_results = rule_future.result()
|
||||||
|
llm_results = llm_future.result()
|
||||||
|
return {
|
||||||
|
"regex_results": regex_results,
|
||||||
|
"llm_results": llm_results,
|
||||||
|
"selected_templates": [spec.code for spec in specs],
|
||||||
|
"source_evidence": [{"source_file": name, "char_count": len(text)} for name, text in texts.items()],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def save_field_extract_result(batch: ApplicationFormFillBatch, payload: dict[str, Any]) -> ApplicationFormFillArtifact:
|
||||||
|
target_dir = ensure_batch_subdir(batch, "exports")
|
||||||
|
path = target_dir / "field_extract_result.json"
|
||||||
|
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
return create_artifact_for_file(
|
||||||
|
batch,
|
||||||
|
path=path,
|
||||||
|
artifact_type=ApplicationFormFillArtifact.ArtifactType.FIELD_EXTRACT_RESULT,
|
||||||
|
file_format=ApplicationFormFillArtifact.FileFormat.JSON,
|
||||||
|
name="field_extract_result",
|
||||||
|
metadata={"artifact": "field_extract_result"},
|
||||||
|
created_by_node="field_extract",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_source_role(file_name: str, text: str = "") -> str:
|
||||||
|
target = f"{file_name}\n{text[:200]}"
|
||||||
|
if "说明书" in target:
|
||||||
|
return "说明书"
|
||||||
|
if "产品技术要求" in target:
|
||||||
|
return "产品技术要求"
|
||||||
|
if "注册检验" in target or "检测报告" in target:
|
||||||
|
return "注册检验报告"
|
||||||
|
if "性能研究" in target:
|
||||||
|
return "性能研究资料"
|
||||||
|
if "申请表" in target:
|
||||||
|
return "申请表"
|
||||||
|
return "其他注册资料"
|
||||||
|
|
||||||
|
|
||||||
|
def _field_defs(specs: list[TemplateSpec]) -> list[dict[str, str]]:
|
||||||
|
fields: list[dict[str, str]] = []
|
||||||
|
for spec in specs:
|
||||||
|
for field in spec.fields:
|
||||||
|
key = str(field.get("key") or "")
|
||||||
|
label = str(field.get("label") or "")
|
||||||
|
if key and label:
|
||||||
|
fields.append({"key": key, "label": label})
|
||||||
|
return fields
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_field_value(text: str, field: dict[str, str], labels: list[str]) -> tuple[str, str]:
|
||||||
|
aliases = _field_aliases(field)
|
||||||
|
for label in aliases:
|
||||||
|
value, evidence = _extract_colon_label_value(text, label, labels + aliases)
|
||||||
|
if value:
|
||||||
|
return value, evidence
|
||||||
|
value, evidence = _extract_bracket_section_value(text, label)
|
||||||
|
if value:
|
||||||
|
return value, evidence
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
|
||||||
|
def _field_aliases(field: dict[str, str]) -> list[str]:
|
||||||
|
aliases = [field["label"]]
|
||||||
|
aliases.extend(FIELD_ALIASES.get(field["key"], []))
|
||||||
|
result: list[str] = []
|
||||||
|
for alias in aliases:
|
||||||
|
normalized = str(alias or "").strip()
|
||||||
|
if normalized and normalized not in result:
|
||||||
|
result.append(normalized)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _all_field_labels(fields: list[dict[str, str]]) -> list[str]:
|
||||||
|
labels: list[str] = list(STATIC_STOP_LABELS)
|
||||||
|
for field in fields:
|
||||||
|
for label in _field_aliases(field):
|
||||||
|
if label not in labels:
|
||||||
|
labels.append(label)
|
||||||
|
return labels
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||||
|
return _extract_colon_label_value(text, label, labels)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_colon_label_value(text: str, label: str, labels: list[str]) -> tuple[str, str]:
|
||||||
|
escaped_labels = "|".join(re.escape(item) for item in labels if item != label)
|
||||||
|
stop_pattern = rf"(?=\n\s*(?:{escaped_labels})(?:\s*[::]|\s*$))" if escaped_labels else r"(?=\Z)"
|
||||||
|
pattern = re.compile(rf"{re.escape(label)}\s*[::]\s*(.+?)(?:{stop_pattern}|\Z)", re.S)
|
||||||
|
match = pattern.search(text or "")
|
||||||
|
if not match:
|
||||||
|
return "", ""
|
||||||
|
raw = match.group(1).strip()
|
||||||
|
value = re.sub(r"\n{2,}.*\Z", "", raw, flags=re.S).strip()
|
||||||
|
value = "\n".join(line.strip() for line in value.splitlines() if line.strip())
|
||||||
|
evidence = f"{label}:{value}"[:300]
|
||||||
|
return value, evidence
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_bracket_section_value(text: str, label: str) -> tuple[str, str]:
|
||||||
|
heading_pattern = rf"^\s*[【\[]\s*{re.escape(label)}\s*[】\]]\s*$"
|
||||||
|
lines = (text or "").splitlines()
|
||||||
|
for index, line in enumerate(lines):
|
||||||
|
if not re.match(heading_pattern, line.strip()):
|
||||||
|
continue
|
||||||
|
value_parts: list[str] = []
|
||||||
|
for next_line in lines[index + 1 :]:
|
||||||
|
normalized = next_line.strip()
|
||||||
|
if not normalized:
|
||||||
|
continue
|
||||||
|
if _looks_like_bracket_heading(normalized):
|
||||||
|
break
|
||||||
|
value_parts.append(normalized)
|
||||||
|
value = "\n".join(value_parts).strip()
|
||||||
|
if value:
|
||||||
|
return value, f"【{label}】\n{value}"[:300]
|
||||||
|
return "", ""
|
||||||
|
|
||||||
|
|
||||||
|
def _looks_like_bracket_heading(line: str) -> bool:
|
||||||
|
return bool(re.match(r"^\s*[【\[].{1,40}[】\]]\s*$", line))
|
||||||
|
|
||||||
|
|
||||||
|
def _prompt_text() -> str:
|
||||||
|
path = Path(__file__).resolve().parents[1] / "prompts" / "field_extract.md"
|
||||||
|
return path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _build_llm_user_prompt(texts: dict[str, str], specs: list[TemplateSpec]) -> str:
|
||||||
|
fields = [{"key": field["key"], "label": field["label"]} for field in _field_defs(specs)]
|
||||||
|
documents = [{"source_file": name, "text": text[:4000]} for name, text in texts.items()]
|
||||||
|
return json.dumps({"fields": fields, "documents": documents}, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_json_object(raw: str) -> dict[str, Any]:
|
||||||
|
text = (raw or "").strip()
|
||||||
|
if text.startswith("```"):
|
||||||
|
text = text.strip("`").strip()
|
||||||
|
if text.lower().startswith("json"):
|
||||||
|
text = text[4:].strip()
|
||||||
|
start = text.find("{")
|
||||||
|
end = text.rfind("}")
|
||||||
|
if start == -1 or end == -1 or end < start:
|
||||||
|
raise json.JSONDecodeError("未找到 JSON 对象", text, 0)
|
||||||
|
return json.loads(text[start : end + 1])
|
||||||
|
|
||||||
|
|
||||||
|
def _float_confidence(value, *, default: float) -> float:
|
||||||
|
try:
|
||||||
|
return float(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return default
|
||||||
110
review_agent/application_form_fill/services/field_merge.py
Normal file
110
review_agent/application_form_fill/services/field_merge.py
Normal file
@@ -0,0 +1,110 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.schemas import MergedField
|
||||||
|
|
||||||
|
|
||||||
|
SOURCE_PRIORITY = {
|
||||||
|
"说明书": 1,
|
||||||
|
"产品技术要求": 2,
|
||||||
|
"注册检验报告": 3,
|
||||||
|
"检测报告": 3,
|
||||||
|
"性能研究资料": 4,
|
||||||
|
"其他注册资料": 5,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_field_value(value: str) -> str:
|
||||||
|
return re.sub(r"\s+", "", str(value or "")).strip().lower()
|
||||||
|
|
||||||
|
|
||||||
|
def rank_source(source_role: str, source_file: str = "") -> int:
|
||||||
|
target = f"{source_role}\n{source_file}"
|
||||||
|
for keyword, rank in SOURCE_PRIORITY.items():
|
||||||
|
if keyword in target:
|
||||||
|
return rank
|
||||||
|
return 9
|
||||||
|
|
||||||
|
|
||||||
|
def merge_fields(regex_results: dict[str, Any], llm_results: dict[str, Any]) -> tuple[dict[str, MergedField], list[dict]]:
|
||||||
|
grouped: dict[str, list[dict[str, Any]]] = {}
|
||||||
|
for item in list(regex_results.get("fields") or []) + list(llm_results.get("fields") or []):
|
||||||
|
key = str(item.get("key") or "")
|
||||||
|
value = str(item.get("value") or "").strip()
|
||||||
|
if not key or not value:
|
||||||
|
continue
|
||||||
|
grouped.setdefault(key, []).append(item)
|
||||||
|
|
||||||
|
merged: dict[str, MergedField] = {}
|
||||||
|
conflicts: list[dict] = []
|
||||||
|
for key, candidates in grouped.items():
|
||||||
|
selected = sorted(
|
||||||
|
candidates,
|
||||||
|
key=lambda item: (
|
||||||
|
rank_source(str(item.get("source_role") or ""), str(item.get("source_file") or "")),
|
||||||
|
-float(item.get("confidence") or 0),
|
||||||
|
),
|
||||||
|
)[0]
|
||||||
|
distinct = _distinct_values(candidates)
|
||||||
|
has_conflict = len(distinct) > 1
|
||||||
|
conflict_values = [
|
||||||
|
{
|
||||||
|
"value": item.get("value"),
|
||||||
|
"source_file": item.get("source_file", ""),
|
||||||
|
"source_role": item.get("source_role", ""),
|
||||||
|
"evidence": item.get("evidence", ""),
|
||||||
|
}
|
||||||
|
for item in candidates
|
||||||
|
if normalize_field_value(str(item.get("value") or "")) != normalize_field_value(str(selected.get("value") or ""))
|
||||||
|
]
|
||||||
|
merged_field = MergedField(
|
||||||
|
key=key,
|
||||||
|
label=str(selected.get("label") or key),
|
||||||
|
value=str(selected.get("value") or ""),
|
||||||
|
source_file=str(selected.get("source_file") or ""),
|
||||||
|
evidence=str(selected.get("evidence") or ""),
|
||||||
|
confidence=float(selected.get("confidence") or 0),
|
||||||
|
has_conflict=has_conflict,
|
||||||
|
conflict_values=conflict_values,
|
||||||
|
)
|
||||||
|
merged[key] = merged_field
|
||||||
|
if has_conflict:
|
||||||
|
conflicts.append(
|
||||||
|
{
|
||||||
|
"field_key": key,
|
||||||
|
"field_label": merged_field.label,
|
||||||
|
"selected_value": merged_field.value,
|
||||||
|
"selected_source": merged_field.source_file,
|
||||||
|
"conflict_values": conflict_values,
|
||||||
|
"handling": "说明书优先,模板内黄底红字高亮" if rank_source(merged_field.source_file, merged_field.source_file) == 1 else "按来源优先级采用最高优先级字段",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
_apply_agent_company_fallbacks(merged)
|
||||||
|
return merged, conflicts
|
||||||
|
|
||||||
|
|
||||||
|
def _distinct_values(candidates: list[dict[str, Any]]) -> set[str]:
|
||||||
|
return {normalize_field_value(str(item.get("value") or "")) for item in candidates if item.get("value")}
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_agent_company_fallbacks(merged: dict[str, MergedField]) -> None:
|
||||||
|
fallback_pairs = {
|
||||||
|
"agent_name": ("applicant_name", "代理人名称"),
|
||||||
|
"agent_address": ("applicant_address", "代理人住所"),
|
||||||
|
}
|
||||||
|
for target_key, (source_key, target_label) in fallback_pairs.items():
|
||||||
|
if target_key in merged or source_key not in merged:
|
||||||
|
continue
|
||||||
|
source = merged[source_key]
|
||||||
|
merged[target_key] = MergedField(
|
||||||
|
key=target_key,
|
||||||
|
label=target_label,
|
||||||
|
value=source.value,
|
||||||
|
source_file=source.source_file,
|
||||||
|
evidence=source.evidence,
|
||||||
|
confidence=source.confidence,
|
||||||
|
has_conflict=source.has_conflict,
|
||||||
|
conflict_values=source.conflict_values,
|
||||||
|
)
|
||||||
55
review_agent/application_form_fill/services/notifier.py
Normal file
55
review_agent/application_form_fill/services/notifier.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
ApplicationFormFillBatch,
|
||||||
|
ApplicationFormFillNotificationRecord,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
)
|
||||||
|
from review_agent.notifications.dispatcher import dispatch_workflow_notification
|
||||||
|
from review_agent.notifications.workflow_adapters import build_application_form_fill_context
|
||||||
|
|
||||||
|
|
||||||
|
def notify_completion(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
exports: list[ExportedSummaryFile],
|
||||||
|
*,
|
||||||
|
fail: bool = False,
|
||||||
|
) -> ApplicationFormFillNotificationRecord:
|
||||||
|
export_ids = [export.pk for export in exports]
|
||||||
|
message_summary = (
|
||||||
|
f"自动填表批次 {batch.batch_no} 已完成,"
|
||||||
|
f"模板 {', '.join(batch.selected_templates or []) or '未识别'},"
|
||||||
|
f"冲突字段 {len(batch.conflict_summary or [])} 个。"
|
||||||
|
)
|
||||||
|
if fail:
|
||||||
|
return ApplicationFormFillNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
recipient=batch.user,
|
||||||
|
channel=ApplicationFormFillNotificationRecord.Channel.MOCK,
|
||||||
|
template_codes=batch.selected_templates,
|
||||||
|
export_ids=export_ids,
|
||||||
|
message_summary=message_summary,
|
||||||
|
send_status=ApplicationFormFillNotificationRecord.SendStatus.FAILED,
|
||||||
|
retry_count=1,
|
||||||
|
error_message="mock notification failed",
|
||||||
|
)
|
||||||
|
unified_error = ""
|
||||||
|
try:
|
||||||
|
unified_record = dispatch_workflow_notification(build_application_form_fill_context(batch))
|
||||||
|
if unified_record.send_status == unified_record.SendStatus.FAILED:
|
||||||
|
unified_error = unified_record.error_message
|
||||||
|
except Exception as exc:
|
||||||
|
unified_error = str(exc)
|
||||||
|
return ApplicationFormFillNotificationRecord.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
recipient=batch.user,
|
||||||
|
channel=ApplicationFormFillNotificationRecord.Channel.MOCK,
|
||||||
|
template_codes=batch.selected_templates,
|
||||||
|
export_ids=export_ids,
|
||||||
|
message_summary=message_summary,
|
||||||
|
send_status=ApplicationFormFillNotificationRecord.SendStatus.SUCCESS,
|
||||||
|
error_message=unified_error,
|
||||||
|
sent_at=timezone.now(),
|
||||||
|
)
|
||||||
43
review_agent/application_form_fill/services/summary.py
Normal file
43
review_agent/application_form_fill/services/summary.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, ExportedSummaryFile
|
||||||
|
|
||||||
|
|
||||||
|
def build_assistant_summary(batch: ApplicationFormFillBatch, exports: list[ExportedSummaryFile]) -> str:
|
||||||
|
word_exports = [export for export in exports if export.export_type == ExportedSummaryFile.ExportType.WORD]
|
||||||
|
trace_exports = [
|
||||||
|
export
|
||||||
|
for export in exports
|
||||||
|
if export.export_type in {ExportedSummaryFile.ExportType.EXCEL, ExportedSummaryFile.ExportType.JSON}
|
||||||
|
]
|
||||||
|
lines = ["已生成申报模板自动填表文件。", "", "| 文件 | Word | PDF |", "| --- | --- | --- |"]
|
||||||
|
if word_exports:
|
||||||
|
for export in word_exports:
|
||||||
|
lines.append(f"| {export.file_name} | [下载](/api/review-agent/file-summary/exports/{export.pk}/download/) | 待增强 |")
|
||||||
|
else:
|
||||||
|
lines.append("| 自动填表结果 | 未生成 | 待增强 |")
|
||||||
|
|
||||||
|
conflicts = batch.conflict_summary or []
|
||||||
|
if conflicts:
|
||||||
|
lines.extend(["", "| 冲突字段 | 采用值 | 冲突来源 | 处理 |", "| --- | --- | --- | --- |"])
|
||||||
|
for item in conflicts:
|
||||||
|
conflict_sources = ";".join(
|
||||||
|
f"{_compact_table_text(value.get('source_file', ''))}:{_compact_table_text(value.get('value', ''))}"
|
||||||
|
for value in item.get("conflict_values", [])
|
||||||
|
)
|
||||||
|
lines.append(
|
||||||
|
f"| {_compact_table_text(item.get('field_label', item.get('field_key', '')))} | {_compact_table_text(item.get('selected_value', ''))} | {_compact_table_text(conflict_sources or '-')} | {_compact_table_text(item.get('handling', ''))} |"
|
||||||
|
)
|
||||||
|
|
||||||
|
if trace_exports:
|
||||||
|
lines.append("")
|
||||||
|
for export in trace_exports:
|
||||||
|
lines.append(f"[下载{export.file_name}](/api/review-agent/file-summary/exports/{export.pk}/download/)")
|
||||||
|
return "\n".join(lines).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _compact_table_text(value: object, *, limit: int = 80) -> str:
|
||||||
|
text = " ".join(str(value or "").replace("|", " ").split())
|
||||||
|
if len(text) <= limit:
|
||||||
|
return text
|
||||||
|
return f"{text[:limit]}..."
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_CONFIG_PATH = (
|
||||||
|
Path(settings.BASE_DIR)
|
||||||
|
/ "review_agent"
|
||||||
|
/ "application_form_fill"
|
||||||
|
/ "templates"
|
||||||
|
/ "application_form_templates_v1.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
SUPPORTED_TARGET_TYPES = {"table_row", "placeholder"}
|
||||||
|
SUPPORTED_FILE_FORMATS = {"doc", "docx"}
|
||||||
|
|
||||||
|
|
||||||
|
def load_template_config(path: str | Path | None = None) -> dict[str, Any]:
|
||||||
|
config_path = Path(path) if path else DEFAULT_CONFIG_PATH
|
||||||
|
with config_path.open("r", encoding="utf-8") as handle:
|
||||||
|
payload = yaml.safe_load(handle) or {}
|
||||||
|
return payload
|
||||||
|
|
||||||
|
|
||||||
|
def compute_config_hash(path: str | Path | None = None) -> str:
|
||||||
|
config_path = Path(path) if path else DEFAULT_CONFIG_PATH
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
with config_path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||||
|
digest.update(chunk)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_template_config(config: dict[str, Any], *, base_dir: str | Path | None = None) -> list[str]:
|
||||||
|
errors: list[str] = []
|
||||||
|
root = Path(base_dir) if base_dir else Path(settings.BASE_DIR)
|
||||||
|
|
||||||
|
version = config.get("version")
|
||||||
|
if not version:
|
||||||
|
errors.append("模板配置缺少 version。")
|
||||||
|
|
||||||
|
source_dir_value = config.get("source_dir")
|
||||||
|
source_dir = root / source_dir_value if source_dir_value else None
|
||||||
|
if not source_dir_value:
|
||||||
|
errors.append("模板配置缺少 source_dir。")
|
||||||
|
elif not source_dir.exists():
|
||||||
|
errors.append(f"模板 source_dir 不存在:{source_dir_value}")
|
||||||
|
|
||||||
|
templates = config.get("templates")
|
||||||
|
if not isinstance(templates, list) or not templates:
|
||||||
|
errors.append("模板配置必须包含非空 templates 列表。")
|
||||||
|
return errors
|
||||||
|
|
||||||
|
seen_codes: set[str] = set()
|
||||||
|
for index, template in enumerate(templates, start=1):
|
||||||
|
if not isinstance(template, dict):
|
||||||
|
errors.append(f"第 {index} 个模板配置必须是对象。")
|
||||||
|
continue
|
||||||
|
code = str(template.get("code") or "").strip()
|
||||||
|
if not code:
|
||||||
|
errors.append(f"第 {index} 个模板缺少 code。")
|
||||||
|
elif code in seen_codes:
|
||||||
|
errors.append(f"模板 code 重复:{code}")
|
||||||
|
seen_codes.add(code)
|
||||||
|
|
||||||
|
file_format = str(template.get("file_format") or "").strip().lower()
|
||||||
|
if file_format not in SUPPORTED_FILE_FORMATS:
|
||||||
|
errors.append(f"模板 {code or index} 的 file_format 不支持:{file_format or '空'}")
|
||||||
|
|
||||||
|
source_file = str(template.get("source_file") or "").strip()
|
||||||
|
if not source_file:
|
||||||
|
errors.append(f"模板 {code or index} 缺少 source_file。")
|
||||||
|
elif source_dir and source_dir.exists() and not (source_dir / source_file).exists():
|
||||||
|
errors.append(f"模板 {code or index} 的 source_file 不存在:{source_file}")
|
||||||
|
|
||||||
|
fields = template.get("fields") or []
|
||||||
|
if not isinstance(fields, list):
|
||||||
|
errors.append(f"模板 {code or index} 的 fields 必须是列表。")
|
||||||
|
continue
|
||||||
|
for field_index, field in enumerate(fields, start=1):
|
||||||
|
target = field.get("target") if isinstance(field, dict) else None
|
||||||
|
target_type = str((target or {}).get("type") or "").strip()
|
||||||
|
if target_type not in SUPPORTED_TARGET_TYPES:
|
||||||
|
errors.append(
|
||||||
|
f"模板 {code or index} 第 {field_index} 个字段 target.type 不支持:{target_type or '空'}"
|
||||||
|
)
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def template_specs(config: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
return list(config.get("templates") or [])
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.schemas import TemplateSpec
|
||||||
|
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
|
||||||
|
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch
|
||||||
|
|
||||||
|
|
||||||
|
class TemplateUnavailableError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_source_template(spec: TemplateSpec, config: dict[str, Any]) -> Path:
|
||||||
|
source_dir = Path(settings.BASE_DIR) / str(config.get("source_dir") or "")
|
||||||
|
working_template = getattr(spec, "working_template", "") or ""
|
||||||
|
if spec.file_format == "doc" and working_template:
|
||||||
|
candidate = source_dir / working_template
|
||||||
|
else:
|
||||||
|
candidate = source_dir / spec.source_file
|
||||||
|
if not candidate.exists():
|
||||||
|
raise TemplateUnavailableError(f"模板文件不存在:{spec.source_file}")
|
||||||
|
if spec.file_format == "doc" and candidate.suffix.lower() == ".doc":
|
||||||
|
raise TemplateUnavailableError(f"模板 {spec.code} 为 .doc,当前阶段需预转换为 .docx 后使用。")
|
||||||
|
return candidate
|
||||||
|
|
||||||
|
|
||||||
|
def copy_template_to_batch(
|
||||||
|
spec: TemplateSpec,
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
config: dict[str, Any],
|
||||||
|
) -> ApplicationFormFillArtifact:
|
||||||
|
source = resolve_source_template(spec, config)
|
||||||
|
target_dir = ensure_batch_subdir(batch, "templates")
|
||||||
|
target = target_dir / f"{spec.code}.source{source.suffix.lower()}"
|
||||||
|
shutil.copy2(source, target)
|
||||||
|
_ensure_under(target, Path(batch.work_dir))
|
||||||
|
return create_artifact_for_file(
|
||||||
|
batch,
|
||||||
|
path=target,
|
||||||
|
artifact_type=ApplicationFormFillArtifact.ArtifactType.TEMPLATE_COPY,
|
||||||
|
file_format=source.suffix.lower().lstrip(".") or spec.file_format,
|
||||||
|
name=spec.name,
|
||||||
|
metadata={"template_code": spec.code, "source_file": spec.source_file},
|
||||||
|
created_by_node="template_copy",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_under(path: Path, root: Path) -> None:
|
||||||
|
resolved_path = path.resolve()
|
||||||
|
resolved_root = root.resolve()
|
||||||
|
if resolved_path != resolved_root and resolved_root not in resolved_path.parents:
|
||||||
|
raise ValueError(f"模板复制目标不在批次工作目录内:{path}")
|
||||||
158
review_agent/application_form_fill/services/template_select.py
Normal file
158
review_agent/application_form_fill/services/template_select.py
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.constants import (
|
||||||
|
TEMPLATE_CHANGE_REGISTRATION,
|
||||||
|
TEMPLATE_ESSENTIAL_PRINCIPLES,
|
||||||
|
TEMPLATE_REGISTRATION_CERTIFICATE,
|
||||||
|
)
|
||||||
|
from review_agent.application_form_fill.schemas import TemplateSpec
|
||||||
|
from review_agent.models import ApplicationFormFillBatch
|
||||||
|
|
||||||
|
|
||||||
|
ALL_TEMPLATE_CODES = [
|
||||||
|
TEMPLATE_REGISTRATION_CERTIFICATE,
|
||||||
|
TEMPLATE_CHANGE_REGISTRATION,
|
||||||
|
TEMPLATE_ESSENTIAL_PRINCIPLES,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_requested_templates(message: str) -> list[str]:
|
||||||
|
normalized = (message or "").lower()
|
||||||
|
if any(keyword in normalized for keyword in ["全部模板", "所有模板", "全套模板", "全部表格", "所有表格"]):
|
||||||
|
return ALL_TEMPLATE_CODES.copy()
|
||||||
|
|
||||||
|
requested: list[str] = []
|
||||||
|
if "注册证" in normalized and "变更注册" not in normalized and "变更 注册" not in normalized:
|
||||||
|
requested.append(TEMPLATE_REGISTRATION_CERTIFICATE)
|
||||||
|
if any(keyword in normalized for keyword in ["变更注册", "变更 注册", "变更备案", "备案文件"]):
|
||||||
|
requested.append(TEMPLATE_CHANGE_REGISTRATION)
|
||||||
|
if any(keyword in normalized for keyword in ["安全和性能基本原则", "基本原则清单", "原则清单"]):
|
||||||
|
requested.append(TEMPLATE_ESSENTIAL_PRINCIPLES)
|
||||||
|
return _dedupe(requested)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_registration_type(
|
||||||
|
*,
|
||||||
|
batch: ApplicationFormFillBatch | None = None,
|
||||||
|
message: str = "",
|
||||||
|
file_candidates: dict[str, Any] | None = None,
|
||||||
|
) -> tuple[str, str]:
|
||||||
|
user_value = _registration_type_from_text(message)
|
||||||
|
if user_value:
|
||||||
|
return user_value, ApplicationFormFillBatch.RegistrationTypeSource.USER_MESSAGE
|
||||||
|
|
||||||
|
regulatory_value = _registration_type_from_regulatory_batch(batch)
|
||||||
|
if regulatory_value:
|
||||||
|
return regulatory_value, ApplicationFormFillBatch.RegistrationTypeSource.REGULATORY_BATCH
|
||||||
|
|
||||||
|
file_value = _registration_type_from_candidates(file_candidates or {})
|
||||||
|
if file_value:
|
||||||
|
return file_value, ApplicationFormFillBatch.RegistrationTypeSource.FILE_EXTRACT
|
||||||
|
|
||||||
|
return "unknown", ApplicationFormFillBatch.RegistrationTypeSource.UNKNOWN
|
||||||
|
|
||||||
|
|
||||||
|
def select_templates(
|
||||||
|
config: dict[str, Any],
|
||||||
|
requested_templates: list[str],
|
||||||
|
registration_type: str,
|
||||||
|
) -> tuple[list[TemplateSpec], list[dict[str, str]]]:
|
||||||
|
template_map = {item.get("code"): item for item in config.get("templates") or []}
|
||||||
|
risk_notes: list[dict[str, str]] = []
|
||||||
|
if requested_templates:
|
||||||
|
selected_codes = _dedupe(requested_templates)
|
||||||
|
elif registration_type in {"变更注册", "备案"}:
|
||||||
|
selected_codes = [TEMPLATE_CHANGE_REGISTRATION, TEMPLATE_ESSENTIAL_PRINCIPLES]
|
||||||
|
else:
|
||||||
|
selected_codes = [TEMPLATE_REGISTRATION_CERTIFICATE, TEMPLATE_ESSENTIAL_PRINCIPLES]
|
||||||
|
|
||||||
|
specs: list[TemplateSpec] = []
|
||||||
|
for code in selected_codes:
|
||||||
|
raw = template_map.get(code)
|
||||||
|
if not raw:
|
||||||
|
risk_notes.append({"type": "unknown_template", "message": f"模板不存在:{code}"})
|
||||||
|
continue
|
||||||
|
spec = _to_template_spec(raw)
|
||||||
|
if requested_templates and not _template_applies(spec, registration_type):
|
||||||
|
risk_notes.append(
|
||||||
|
{
|
||||||
|
"type": "template_registration_mismatch",
|
||||||
|
"message": f"用户指定模板 {spec.name} 与注册类型 {registration_type or 'unknown'} 可能不匹配,仍按指定生成。",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
specs.append(spec)
|
||||||
|
return specs, risk_notes
|
||||||
|
|
||||||
|
|
||||||
|
def _to_template_spec(raw: dict[str, Any]) -> TemplateSpec:
|
||||||
|
return TemplateSpec(
|
||||||
|
code=str(raw.get("code") or ""),
|
||||||
|
name=str(raw.get("name") or ""),
|
||||||
|
source_file=str(raw.get("source_file") or ""),
|
||||||
|
output_label=str(raw.get("output_label") or raw.get("name") or ""),
|
||||||
|
applies_when=dict(raw.get("applies_when") or {}),
|
||||||
|
file_format=str(raw.get("file_format") or ""),
|
||||||
|
fields=list(raw.get("fields") or []),
|
||||||
|
checklist_items=list(raw.get("checklist_items") or []),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _template_applies(spec: TemplateSpec, registration_type: str) -> bool:
|
||||||
|
allowed = spec.applies_when.get("registration_type") or []
|
||||||
|
if not allowed:
|
||||||
|
return True
|
||||||
|
return registration_type in allowed or (registration_type == "unknown" and "unknown" in allowed)
|
||||||
|
|
||||||
|
|
||||||
|
def _registration_type_from_text(message: str) -> str:
|
||||||
|
normalized = (message or "").lower()
|
||||||
|
if any(keyword in normalized for keyword in ["首次注册", "初次注册", "新注册"]):
|
||||||
|
return "首次注册"
|
||||||
|
if "变更注册" in normalized:
|
||||||
|
return "变更注册"
|
||||||
|
if "备案" in normalized:
|
||||||
|
return "备案"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _registration_type_from_regulatory_batch(batch: ApplicationFormFillBatch | None) -> str:
|
||||||
|
if not batch or not batch.source_regulatory_batch_id:
|
||||||
|
return ""
|
||||||
|
condition_json = batch.source_regulatory_batch.condition_json or {}
|
||||||
|
confirmed = condition_json.get("confirmed_conditions") or {}
|
||||||
|
candidates = condition_json.get("candidates") or {}
|
||||||
|
for payload in [confirmed, condition_json, candidates.get("registration_type") or {}]:
|
||||||
|
if isinstance(payload, dict):
|
||||||
|
value = payload.get("registration_type") or payload.get("suggested") or payload.get("value")
|
||||||
|
normalized = _normalize_registration_type(value)
|
||||||
|
if normalized:
|
||||||
|
return normalized
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _registration_type_from_candidates(candidates: dict[str, Any]) -> str:
|
||||||
|
value = candidates.get("registration_type") or candidates.get("suggested")
|
||||||
|
if isinstance(value, dict):
|
||||||
|
value = value.get("value") or value.get("suggested")
|
||||||
|
return _normalize_registration_type(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_registration_type(value: Any) -> str:
|
||||||
|
text = str(value or "")
|
||||||
|
if "首次" in text or "初次" in text:
|
||||||
|
return "首次注册"
|
||||||
|
if "变更" in text:
|
||||||
|
return "变更注册"
|
||||||
|
if "备案" in text:
|
||||||
|
return "备案"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe(values: list[str]) -> list[str]:
|
||||||
|
result: list[str] = []
|
||||||
|
for value in values:
|
||||||
|
if value and value not in result:
|
||||||
|
result.append(value)
|
||||||
|
return result
|
||||||
@@ -0,0 +1,145 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import asdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.constants import WORKFLOW_TYPE
|
||||||
|
from review_agent.application_form_fill.schemas import MergedField, TemplateSpec
|
||||||
|
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
|
||||||
|
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch, ExportedSummaryFile
|
||||||
|
|
||||||
|
|
||||||
|
def build_traceability_workbook(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
conflicts: list[dict[str, Any]],
|
||||||
|
specs: list[TemplateSpec],
|
||||||
|
generation_results: list[dict[str, Any]] | None = None,
|
||||||
|
) -> Workbook:
|
||||||
|
workbook = Workbook()
|
||||||
|
field_sheet = workbook.active
|
||||||
|
field_sheet.title = "字段追溯"
|
||||||
|
field_sheet.append(["模板", "字段", "填入值", "来源文件", "证据", "冲突状态"])
|
||||||
|
template_names = {field.get("key"): spec.output_label for spec in specs for field in spec.fields}
|
||||||
|
for key, field in merged_fields.items():
|
||||||
|
field_sheet.append(
|
||||||
|
[
|
||||||
|
template_names.get(key, ""),
|
||||||
|
field.label,
|
||||||
|
field.value,
|
||||||
|
field.source_file,
|
||||||
|
field.evidence,
|
||||||
|
"冲突" if field.has_conflict else "一致",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
conflict_sheet = workbook.create_sheet("冲突字段")
|
||||||
|
conflict_sheet.append(["字段", "采用值", "冲突值", "冲突来源", "处理方式"])
|
||||||
|
for conflict in conflicts:
|
||||||
|
conflict_values = conflict.get("conflict_values") or []
|
||||||
|
if not conflict_values:
|
||||||
|
conflict_sheet.append(
|
||||||
|
[
|
||||||
|
conflict.get("field_label", ""),
|
||||||
|
conflict.get("selected_value", ""),
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
conflict.get("handling", ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
for value in conflict_values:
|
||||||
|
conflict_sheet.append(
|
||||||
|
[
|
||||||
|
conflict.get("field_label", ""),
|
||||||
|
conflict.get("selected_value", ""),
|
||||||
|
value.get("value", ""),
|
||||||
|
value.get("source_file", ""),
|
||||||
|
conflict.get("handling", ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
low_confidence_sheet = workbook.create_sheet("低置信度条目")
|
||||||
|
low_confidence_sheet.append(["字段", "填入值", "置信度", "来源文件"])
|
||||||
|
for field in merged_fields.values():
|
||||||
|
if field.confidence < 0.6:
|
||||||
|
low_confidence_sheet.append([field.label, field.value, field.confidence, field.source_file])
|
||||||
|
|
||||||
|
result_sheet = workbook.create_sheet("生成结果")
|
||||||
|
result_sheet.append(["模板", "Word状态", "PDF状态", "错误说明"])
|
||||||
|
for result in generation_results or []:
|
||||||
|
result_sheet.append(
|
||||||
|
[
|
||||||
|
result.get("template_label", ""),
|
||||||
|
result.get("word_status", ""),
|
||||||
|
result.get("pdf_status", "待增强"),
|
||||||
|
result.get("error_message", ""),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if not generation_results:
|
||||||
|
for spec in specs:
|
||||||
|
result_sheet.append([spec.output_label, "待生成", "待增强", ""])
|
||||||
|
return workbook
|
||||||
|
|
||||||
|
|
||||||
|
def save_traceability_exports(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
merged_fields: dict[str, MergedField],
|
||||||
|
conflicts: list[dict[str, Any]],
|
||||||
|
specs: list[TemplateSpec],
|
||||||
|
generation_results: list[dict[str, Any]] | None = None,
|
||||||
|
) -> list[ExportedSummaryFile]:
|
||||||
|
target_dir = ensure_batch_subdir(batch, "exports")
|
||||||
|
workbook = build_traceability_workbook(batch, merged_fields, conflicts, specs, generation_results)
|
||||||
|
excel_path = target_dir / f"{batch.batch_no}-字段来源追溯清单.xlsx"
|
||||||
|
workbook.save(excel_path)
|
||||||
|
create_artifact_for_file(
|
||||||
|
batch,
|
||||||
|
path=excel_path,
|
||||||
|
artifact_type=ApplicationFormFillArtifact.ArtifactType.TRACEABILITY,
|
||||||
|
file_format=ApplicationFormFillArtifact.FileFormat.EXCEL,
|
||||||
|
name="字段来源追溯清单",
|
||||||
|
metadata={"conflict_count": len(conflicts)},
|
||||||
|
created_by_node="trace_export",
|
||||||
|
)
|
||||||
|
excel_export = ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch.source_summary_batch,
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="traceability",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||||
|
file_name=excel_path.name,
|
||||||
|
storage_path=str(excel_path),
|
||||||
|
)
|
||||||
|
|
||||||
|
json_path = target_dir / "merged_fields.json"
|
||||||
|
payload = {
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"merged_fields": {key: asdict(value) for key, value in merged_fields.items()},
|
||||||
|
"conflicts": conflicts,
|
||||||
|
"generation_results": generation_results or [],
|
||||||
|
}
|
||||||
|
json_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||||
|
create_artifact_for_file(
|
||||||
|
batch,
|
||||||
|
path=json_path,
|
||||||
|
artifact_type=ApplicationFormFillArtifact.ArtifactType.MERGED_FIELDS,
|
||||||
|
file_format=ApplicationFormFillArtifact.FileFormat.JSON,
|
||||||
|
name="merged_fields",
|
||||||
|
metadata={"conflict_count": len(conflicts)},
|
||||||
|
created_by_node="trace_export",
|
||||||
|
)
|
||||||
|
json_export = ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch.source_summary_batch,
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="traceability",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.JSON,
|
||||||
|
file_name=json_path.name,
|
||||||
|
storage_path=str(json_path),
|
||||||
|
)
|
||||||
|
return [excel_export, json_export]
|
||||||
141
review_agent/application_form_fill/services/word_fill.py
Normal file
141
review_agent/application_form_fill/services/word_fill.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docx import Document
|
||||||
|
from docx.oxml import OxmlElement
|
||||||
|
from docx.oxml.ns import qn
|
||||||
|
from docx.shared import RGBColor
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.constants import WORKFLOW_TYPE
|
||||||
|
from review_agent.application_form_fill.schemas import MergedField, TemplateSpec
|
||||||
|
from review_agent.application_form_fill.storage import create_artifact_for_file, ensure_batch_subdir
|
||||||
|
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch, ExportedSummaryFile
|
||||||
|
|
||||||
|
|
||||||
|
def fill_template(
|
||||||
|
template_path: str | Path,
|
||||||
|
output_path: str | Path,
|
||||||
|
spec: TemplateSpec,
|
||||||
|
fields: dict[str, MergedField],
|
||||||
|
conflicts: list[dict] | None = None,
|
||||||
|
) -> Path:
|
||||||
|
document = Document(str(template_path))
|
||||||
|
remove_fill_instructions(document)
|
||||||
|
conflict_keys = {item.get("field_key") for item in conflicts or []}
|
||||||
|
for field_config in spec.fields:
|
||||||
|
target = field_config.get("target") or {}
|
||||||
|
if target.get("type") != "table_row":
|
||||||
|
continue
|
||||||
|
key = field_config.get("key")
|
||||||
|
field = fields.get(key)
|
||||||
|
if not field:
|
||||||
|
continue
|
||||||
|
fill_table_row(
|
||||||
|
document,
|
||||||
|
str(target.get("row_label") or field_config.get("label") or ""),
|
||||||
|
field.value,
|
||||||
|
conflict=key in conflict_keys or field.has_conflict,
|
||||||
|
)
|
||||||
|
output = Path(output_path)
|
||||||
|
output.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
document.save(str(output))
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def remove_fill_instructions(document: Document) -> None:
|
||||||
|
removing = False
|
||||||
|
for paragraph in list(document.paragraphs):
|
||||||
|
text = _normalize_label(paragraph.text)
|
||||||
|
if text == "填表说明":
|
||||||
|
removing = True
|
||||||
|
if removing:
|
||||||
|
_remove_paragraph(paragraph)
|
||||||
|
continue
|
||||||
|
if text.startswith("注填表前") and "填表说明" in text:
|
||||||
|
_remove_paragraph(paragraph)
|
||||||
|
|
||||||
|
for table in document.tables:
|
||||||
|
for row in list(table.rows):
|
||||||
|
row_text = _normalize_label("".join(cell.text for cell in row.cells))
|
||||||
|
if row_text == "填表说明" or row_text.startswith("注填表前"):
|
||||||
|
_remove_row(row)
|
||||||
|
|
||||||
|
|
||||||
|
def fill_table_row(document: Document, row_label: str, value: str, *, conflict: bool = False) -> bool:
|
||||||
|
normalized_label = _normalize_label(row_label)
|
||||||
|
for table in document.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
if len(row.cells) < 2:
|
||||||
|
continue
|
||||||
|
if _normalize_label(row.cells[0].text) != normalized_label:
|
||||||
|
continue
|
||||||
|
target = row.cells[1]
|
||||||
|
target.text = ""
|
||||||
|
paragraph = target.paragraphs[0]
|
||||||
|
run = paragraph.add_run(value)
|
||||||
|
if conflict:
|
||||||
|
run.font.color.rgb = RGBColor(0xFF, 0x00, 0x00)
|
||||||
|
apply_cell_shading(target, "FFFF00")
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def apply_cell_shading(cell, fill: str) -> None:
|
||||||
|
tc_pr = cell._tc.get_or_add_tcPr()
|
||||||
|
shading = tc_pr.find(qn("w:shd"))
|
||||||
|
if shading is None:
|
||||||
|
shading = OxmlElement("w:shd")
|
||||||
|
tc_pr.append(shading)
|
||||||
|
shading.set(qn("w:fill"), fill)
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_paragraph(paragraph) -> None:
|
||||||
|
element = paragraph._element
|
||||||
|
element.getparent().remove(element)
|
||||||
|
|
||||||
|
|
||||||
|
def _remove_row(row) -> None:
|
||||||
|
row._tr.getparent().remove(row._tr)
|
||||||
|
|
||||||
|
|
||||||
|
def create_word_export(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
spec: TemplateSpec,
|
||||||
|
template_path: str | Path,
|
||||||
|
fields: dict[str, MergedField],
|
||||||
|
conflicts: list[dict] | None = None,
|
||||||
|
) -> ExportedSummaryFile:
|
||||||
|
target_dir = ensure_batch_subdir(batch, "filled")
|
||||||
|
product_name = _safe_filename(batch.product_name or fields.get("product_name", MergedField("product_name", "产品名称", "", "", "", 0)).value or "未识别产品")
|
||||||
|
output_path = target_dir / f"{batch.batch_no}-{product_name}-{_safe_filename(spec.output_label)}.docx"
|
||||||
|
fill_template(template_path, output_path, spec, fields, conflicts)
|
||||||
|
create_artifact_for_file(
|
||||||
|
batch,
|
||||||
|
path=output_path,
|
||||||
|
artifact_type=ApplicationFormFillArtifact.ArtifactType.FILLED_TEMPLATE,
|
||||||
|
file_format=ApplicationFormFillArtifact.FileFormat.DOCX,
|
||||||
|
name=spec.output_label,
|
||||||
|
metadata={"template_code": spec.code, "conflict_count": len(conflicts or [])},
|
||||||
|
created_by_node="word_fill",
|
||||||
|
)
|
||||||
|
return ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch.source_summary_batch,
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="filled_template",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.WORD,
|
||||||
|
file_name=output_path.name,
|
||||||
|
storage_path=str(output_path),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_label(value: str) -> str:
|
||||||
|
return re.sub(r"\s+", "", value or "").replace(":", "").replace(":", "")
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_filename(value: str) -> str:
|
||||||
|
text = re.sub(r"[\x00-\x1f\x7f]+", "", value or "")
|
||||||
|
text = re.sub(r'[\\/:*?"<>|]+', "_", text)
|
||||||
|
return text.strip()[:80] or "output"
|
||||||
55
review_agent/application_form_fill/storage.py
Normal file
55
review_agent/application_form_fill/storage.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import ApplicationFormFillArtifact, ApplicationFormFillBatch
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_work_dir(batch: ApplicationFormFillBatch | None = None, *, batch_no: str = "") -> Path:
|
||||||
|
if batch:
|
||||||
|
return Path(settings.MEDIA_ROOT) / "application_form_fill" / str(batch.user_id) / str(batch.conversation_id) / batch.batch_no
|
||||||
|
return Path(settings.MEDIA_ROOT) / "application_form_fill" / batch_no
|
||||||
|
|
||||||
|
|
||||||
|
def compute_file_sha256(path: str | Path) -> str:
|
||||||
|
file_path = Path(path)
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
with file_path.open("rb") as handle:
|
||||||
|
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||||
|
digest.update(chunk)
|
||||||
|
return digest.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_batch_subdir(batch: ApplicationFormFillBatch, name: str) -> Path:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else build_batch_work_dir(batch)
|
||||||
|
target = root / Path(name).name
|
||||||
|
target.mkdir(parents=True, exist_ok=True)
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def create_artifact_for_file(
|
||||||
|
batch: ApplicationFormFillBatch,
|
||||||
|
*,
|
||||||
|
path: str | Path,
|
||||||
|
artifact_type: str,
|
||||||
|
file_format: str,
|
||||||
|
name: str = "",
|
||||||
|
metadata: dict | None = None,
|
||||||
|
created_by_node: str = "",
|
||||||
|
) -> ApplicationFormFillArtifact:
|
||||||
|
file_path = Path(path)
|
||||||
|
return ApplicationFormFillArtifact.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
artifact_type=artifact_type,
|
||||||
|
file_format=file_format,
|
||||||
|
name=name or file_path.stem,
|
||||||
|
file_name=file_path.name,
|
||||||
|
storage_path=str(file_path),
|
||||||
|
file_size=file_path.stat().st_size if file_path.exists() else 0,
|
||||||
|
content_hash=compute_file_sha256(file_path) if file_path.exists() else "",
|
||||||
|
metadata=metadata or {},
|
||||||
|
created_by_node=created_by_node,
|
||||||
|
)
|
||||||
@@ -0,0 +1,130 @@
|
|||||||
|
version: application_form_templates_v1
|
||||||
|
source_dir: docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||||
|
templates:
|
||||||
|
- code: registration_certificate
|
||||||
|
name: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械注册证(体外诊断试剂)(格式).docx
|
||||||
|
output_label: 注册证格式
|
||||||
|
applies_when:
|
||||||
|
registration_type:
|
||||||
|
- 首次注册
|
||||||
|
- unknown
|
||||||
|
file_format: docx
|
||||||
|
fields:
|
||||||
|
- key: applicant_name
|
||||||
|
label: 注册人名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 注册人名称
|
||||||
|
source_roles:
|
||||||
|
- 申请表
|
||||||
|
- 说明书
|
||||||
|
- 企业信息
|
||||||
|
- key: applicant_address
|
||||||
|
label: 注册人住所
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 注册人住所
|
||||||
|
source_roles:
|
||||||
|
- 申请表
|
||||||
|
- 企业信息
|
||||||
|
- key: manufacturer_address
|
||||||
|
label: 生产地址
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 生产地址
|
||||||
|
source_roles:
|
||||||
|
- 申请表
|
||||||
|
- 质量管理体系文件
|
||||||
|
- key: agent_name
|
||||||
|
label: 代理人名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 代理人名称
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 企业信息
|
||||||
|
- 申请表
|
||||||
|
- key: agent_address
|
||||||
|
label: 代理人住所
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 代理人住所
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 企业信息
|
||||||
|
- 申请表
|
||||||
|
- key: product_name
|
||||||
|
label: 产品名称
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品名称
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 产品技术要求
|
||||||
|
- 注册检验报告
|
||||||
|
- key: package_specification
|
||||||
|
label: 包装规格
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 包装规格
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 产品技术要求
|
||||||
|
- key: main_components
|
||||||
|
label: 主要组成成分
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 主要组成成分
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 产品技术要求
|
||||||
|
- key: intended_use
|
||||||
|
label: 预期用途
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 预期用途
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 临床评价资料
|
||||||
|
- 产品技术要求
|
||||||
|
- key: storage_condition_and_validity
|
||||||
|
label: 产品储存条件及有效期
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 产品储存条件及有效期
|
||||||
|
source_roles:
|
||||||
|
- 说明书
|
||||||
|
- 产品技术要求
|
||||||
|
- 稳定性研究资料
|
||||||
|
- key: attachments
|
||||||
|
label: 附件
|
||||||
|
target:
|
||||||
|
type: table_row
|
||||||
|
row_label: 附件
|
||||||
|
source_roles:
|
||||||
|
- 注册申报资料
|
||||||
|
- 说明书
|
||||||
|
- code: change_registration
|
||||||
|
name: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式)
|
||||||
|
source_file: 中华人民共和国医疗器械变更注册(备案)文件(体外诊断试剂)(格式).doc
|
||||||
|
output_label: 变更注册备案文件
|
||||||
|
applies_when:
|
||||||
|
registration_type:
|
||||||
|
- 变更注册
|
||||||
|
- 备案
|
||||||
|
file_format: doc
|
||||||
|
fields: []
|
||||||
|
- code: essential_principles
|
||||||
|
name: 体外诊断试剂安全和性能基本原则清单
|
||||||
|
source_file: 体外诊断试剂安全和性能基本原则清单.doc
|
||||||
|
output_label: 安全和性能基本原则清单
|
||||||
|
applies_when:
|
||||||
|
registration_type:
|
||||||
|
- 首次注册
|
||||||
|
- 变更注册
|
||||||
|
- 备案
|
||||||
|
- unknown
|
||||||
|
file_format: doc
|
||||||
|
fields: []
|
||||||
|
checklist_items: []
|
||||||
131
review_agent/application_form_fill/views.py
Normal file
131
review_agent/application_form_fill/views.py
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.conf import settings
|
||||||
|
from django.http import Http404, JsonResponse
|
||||||
|
from django.views.decorators.http import require_http_methods
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.workflow import (
|
||||||
|
create_application_form_fill_batch,
|
||||||
|
find_latest_successful_summary_batch,
|
||||||
|
start_application_form_fill_workflow,
|
||||||
|
)
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, Conversation, ExportedSummaryFile, FileSummaryBatch, WorkflowNodeRun
|
||||||
|
from review_agent.notifications.presenter import serialize_notification_records
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def health(request):
|
||||||
|
return JsonResponse({"workflow_type": "application_form_fill", "status": "available"})
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["POST"])
|
||||||
|
def start(request):
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return JsonResponse({"error": "JSON 格式错误。"}, status=400)
|
||||||
|
|
||||||
|
conversation = Conversation.objects.filter(pk=payload.get("conversation_id"), user=request.user).first()
|
||||||
|
if not conversation:
|
||||||
|
raise Http404("对话不存在。")
|
||||||
|
|
||||||
|
summary_batch = None
|
||||||
|
if payload.get("file_summary_batch_id"):
|
||||||
|
summary_batch = FileSummaryBatch.objects.filter(
|
||||||
|
pk=payload.get("file_summary_batch_id"),
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
).first()
|
||||||
|
if summary_batch is None:
|
||||||
|
summary_batch = find_latest_successful_summary_batch(conversation)
|
||||||
|
if summary_batch is None:
|
||||||
|
return JsonResponse({"error": "请先上传资料并完成文件汇总。"}, status=400)
|
||||||
|
|
||||||
|
batch = create_application_form_fill_batch(
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
source_summary_batch=summary_batch,
|
||||||
|
requested_templates=payload.get("template_codes") or [],
|
||||||
|
output_types=payload.get("output_types") or None,
|
||||||
|
)
|
||||||
|
start_application_form_fill_workflow(batch, async_run=getattr(settings, "APPLICATION_FORM_FILL_ASYNC", True))
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch_id": batch.pk,
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"status": batch.status,
|
||||||
|
"selected_templates": batch.selected_templates,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@login_required
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
def batch_status(request, batch_id: int):
|
||||||
|
batch = ApplicationFormFillBatch.objects.filter(
|
||||||
|
pk=batch_id,
|
||||||
|
conversation__user=request.user,
|
||||||
|
is_deleted=False,
|
||||||
|
).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("填表批次不存在。")
|
||||||
|
exports = ExportedSummaryFile.objects.filter(
|
||||||
|
workflow_type="application_form_fill",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
notifications = serialize_notification_records("application_form_fill", batch.pk)
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "application_form_fill",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"product_name": batch.product_name,
|
||||||
|
"selected_templates": batch.selected_templates,
|
||||||
|
"conflict_count": len(batch.conflict_summary or []),
|
||||||
|
"risk_summary_text": _risk_summary_text(batch),
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
},
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"node_name": node.node_name,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
}
|
||||||
|
for node in WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type="application_form_fill",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
],
|
||||||
|
"conflicts": batch.conflict_summary or [],
|
||||||
|
"exports": [
|
||||||
|
{
|
||||||
|
"id": export.pk,
|
||||||
|
"export_type": export.export_type,
|
||||||
|
"export_category": export.export_category,
|
||||||
|
"file_name": export.file_name,
|
||||||
|
"download_url": f"/api/review-agent/file-summary/exports/{export.pk}/download/",
|
||||||
|
}
|
||||||
|
for export in exports
|
||||||
|
],
|
||||||
|
"notifications": notifications,
|
||||||
|
"latest_notification": notifications[0] if notifications else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _risk_summary_text(batch: ApplicationFormFillBatch) -> str:
|
||||||
|
parts = []
|
||||||
|
if batch.selected_templates:
|
||||||
|
parts.append("模板 " + "、".join(batch.selected_templates))
|
||||||
|
if batch.conflict_summary:
|
||||||
|
parts.append(f"冲突字段 {len(batch.conflict_summary)}")
|
||||||
|
if batch.risk_notes:
|
||||||
|
parts.append(f"提示 {len(batch.risk_notes)}")
|
||||||
|
return " · ".join(parts)
|
||||||
328
review_agent/application_form_fill/workflow.py
Normal file
328
review_agent/application_form_fill/workflow.py
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from threading import Thread
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.application_form_fill.constants import DEFAULT_OUTPUT_TYPES, FORM_FILL_NODE_DEFINITIONS, WORKFLOW_TYPE
|
||||||
|
from review_agent.application_form_fill.events import record_event
|
||||||
|
from review_agent.application_form_fill.services.field_extract import (
|
||||||
|
collect_document_texts,
|
||||||
|
run_parallel_extract,
|
||||||
|
save_field_extract_result,
|
||||||
|
)
|
||||||
|
from review_agent.application_form_fill.services.field_merge import merge_fields
|
||||||
|
from review_agent.application_form_fill.services.notifier import notify_completion
|
||||||
|
from review_agent.application_form_fill.services.summary import build_assistant_summary
|
||||||
|
from review_agent.application_form_fill.services.template_config import (
|
||||||
|
compute_config_hash,
|
||||||
|
load_template_config,
|
||||||
|
validate_template_config,
|
||||||
|
)
|
||||||
|
from review_agent.application_form_fill.services.template_repository import (
|
||||||
|
TemplateUnavailableError,
|
||||||
|
copy_template_to_batch,
|
||||||
|
)
|
||||||
|
from review_agent.application_form_fill.services.template_select import (
|
||||||
|
detect_registration_type,
|
||||||
|
parse_requested_templates,
|
||||||
|
select_templates,
|
||||||
|
)
|
||||||
|
from review_agent.application_form_fill.services.traceability_export import save_traceability_exports
|
||||||
|
from review_agent.application_form_fill.services.word_fill import create_word_export
|
||||||
|
from review_agent.application_form_fill.schemas import MergedField, TemplateSpec
|
||||||
|
from review_agent.application_form_fill.storage import build_batch_work_dir
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, Conversation, FileSummaryBatch, Message, WorkflowNodeRun
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.application_form_fill.workflow")
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_no() -> str:
|
||||||
|
return f"AFF-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
|
||||||
|
|
||||||
|
|
||||||
|
def find_latest_successful_summary_batch(conversation: Conversation) -> FileSummaryBatch | None:
|
||||||
|
return (
|
||||||
|
FileSummaryBatch.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
status=FileSummaryBatch.Status.SUCCESS,
|
||||||
|
)
|
||||||
|
.order_by("-finished_at", "-created_at", "-id")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def create_application_form_fill_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
source_summary_batch: FileSummaryBatch,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
requested_templates: list[str] | None = None,
|
||||||
|
output_types: list[str] | None = None,
|
||||||
|
) -> ApplicationFormFillBatch:
|
||||||
|
batch_no = build_batch_no()
|
||||||
|
work_dir = build_batch_work_dir(batch_no=batch_no)
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
batch = ApplicationFormFillBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger_message,
|
||||||
|
source_summary_batch=source_summary_batch,
|
||||||
|
batch_no=batch_no,
|
||||||
|
requested_templates=requested_templates or [],
|
||||||
|
output_types=output_types or DEFAULT_OUTPUT_TYPES,
|
||||||
|
work_dir=str(work_dir),
|
||||||
|
)
|
||||||
|
for code, name, group in FORM_FILL_NODE_DEFINITIONS:
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group=group,
|
||||||
|
node_code=code,
|
||||||
|
node_name=name,
|
||||||
|
)
|
||||||
|
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
class FormFillWorkflowExecutor:
|
||||||
|
"""Runs the auto-fill workflow skeleton; later stages fill node bodies."""
|
||||||
|
|
||||||
|
def __init__(self, batch: ApplicationFormFillBatch):
|
||||||
|
self.batch = batch
|
||||||
|
self.template_config: dict = {}
|
||||||
|
self.selected_templates: list[TemplateSpec] = []
|
||||||
|
self.template_paths: dict[str, str] = {}
|
||||||
|
self.document_texts: dict[str, str] = {}
|
||||||
|
self.extract_payload: dict = {}
|
||||||
|
self.merged_fields: dict[str, MergedField] = {}
|
||||||
|
self.conflicts: list[dict] = []
|
||||||
|
self.exports = []
|
||||||
|
self.generation_results: list[dict] = []
|
||||||
|
self.non_blocking_errors: list[str] = []
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
logger.info("自动填表工作流开始 batch_no=%s batch_id=%s", self.batch.batch_no, self.batch.pk)
|
||||||
|
self.batch.status = ApplicationFormFillBatch.Status.RUNNING
|
||||||
|
self.batch.started_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "started_at"])
|
||||||
|
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
|
||||||
|
|
||||||
|
try:
|
||||||
|
for node in self._nodes():
|
||||||
|
if node.status in {WorkflowNodeRun.Status.SUCCESS, WorkflowNodeRun.Status.SKIPPED}:
|
||||||
|
continue
|
||||||
|
self._run_node(node)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Application form fill workflow failed", extra={"batch_id": self.batch.pk})
|
||||||
|
self.batch.status = ApplicationFormFillBatch.Status.FAILED
|
||||||
|
self.batch.error_message = str(exc)
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "error_message", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_failed", {"message": str(exc)})
|
||||||
|
return
|
||||||
|
|
||||||
|
self.batch.refresh_from_db()
|
||||||
|
if self.batch.status != ApplicationFormFillBatch.Status.PARTIAL_SUCCESS:
|
||||||
|
self.batch.status = ApplicationFormFillBatch.Status.SUCCESS
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
|
||||||
|
logger.info("自动填表工作流完成 batch_no=%s", self.batch.batch_no)
|
||||||
|
|
||||||
|
def _nodes(self):
|
||||||
|
return WorkflowNodeRun.objects.filter(
|
||||||
|
workflow_type=WORKFLOW_TYPE,
|
||||||
|
workflow_batch_id=self.batch.pk,
|
||||||
|
).order_by("id")
|
||||||
|
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
node.status = WorkflowNodeRun.Status.RUNNING
|
||||||
|
node.progress = 10
|
||||||
|
node.started_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}处理中"
|
||||||
|
node.save(update_fields=["status", "progress", "started_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
|
||||||
|
if node.node_code == "pdf_convert":
|
||||||
|
self._append_risk_note(
|
||||||
|
{
|
||||||
|
"type": "pdf_pending",
|
||||||
|
"message": "PDF 转换为后续增强项,本次优先生成 Word。",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
node.status = WorkflowNodeRun.Status.SKIPPED
|
||||||
|
node.progress = 100
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = "PDF 转换为后续增强项,本次跳过"
|
||||||
|
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
self._execute_node(node)
|
||||||
|
|
||||||
|
node.status = WorkflowNodeRun.Status.SUCCESS
|
||||||
|
node.progress = 100
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}完成"
|
||||||
|
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _execute_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
if node.node_code == "prepare":
|
||||||
|
if self.batch.source_summary_batch.status != FileSummaryBatch.Status.SUCCESS:
|
||||||
|
raise ValueError("自动填表需要成功的文件汇总批次。")
|
||||||
|
return
|
||||||
|
if node.node_code == "template_select":
|
||||||
|
self.template_config = load_template_config()
|
||||||
|
errors = validate_template_config(self.template_config)
|
||||||
|
if errors:
|
||||||
|
raise ValueError(";".join(errors))
|
||||||
|
requested = parse_requested_templates(self.batch.trigger_message.content if self.batch.trigger_message else "")
|
||||||
|
registration_type, source = detect_registration_type(batch=self.batch, message=self.batch.trigger_message.content if self.batch.trigger_message else "")
|
||||||
|
specs, risk_notes = select_templates(self.template_config, requested, registration_type)
|
||||||
|
if not specs:
|
||||||
|
raise ValueError("未选择到可用申报模板。")
|
||||||
|
self.selected_templates = specs
|
||||||
|
self.batch.requested_templates = requested
|
||||||
|
self.batch.selected_templates = [spec.code for spec in specs]
|
||||||
|
self.batch.registration_type = registration_type
|
||||||
|
self.batch.registration_type_source = source
|
||||||
|
self.batch.template_config_version = str(self.template_config.get("version") or "")
|
||||||
|
self.batch.template_config_hash = compute_config_hash()
|
||||||
|
self.batch.risk_notes = list(self.batch.risk_notes or []) + risk_notes
|
||||||
|
self.batch.save(
|
||||||
|
update_fields=[
|
||||||
|
"requested_templates",
|
||||||
|
"selected_templates",
|
||||||
|
"registration_type",
|
||||||
|
"registration_type_source",
|
||||||
|
"template_config_version",
|
||||||
|
"template_config_hash",
|
||||||
|
"risk_notes",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "template_copy":
|
||||||
|
for spec in self.selected_templates:
|
||||||
|
try:
|
||||||
|
artifact = copy_template_to_batch(spec, self.batch, self.template_config)
|
||||||
|
self.template_paths[spec.code] = artifact.storage_path
|
||||||
|
except TemplateUnavailableError as exc:
|
||||||
|
self.non_blocking_errors.append(str(exc))
|
||||||
|
self._append_risk_note({"type": "template_unavailable", "message": str(exc), "template_code": spec.code})
|
||||||
|
if not self.template_paths:
|
||||||
|
raise ValueError("没有可用的 Word 模板副本。")
|
||||||
|
return
|
||||||
|
if node.node_code == "field_extract":
|
||||||
|
self.document_texts = collect_document_texts(self.batch.source_summary_batch)
|
||||||
|
self.extract_payload = run_parallel_extract(self.document_texts, self.selected_templates)
|
||||||
|
save_field_extract_result(self.batch, self.extract_payload)
|
||||||
|
return
|
||||||
|
if node.node_code == "conflict_merge":
|
||||||
|
self.merged_fields, self.conflicts = merge_fields(
|
||||||
|
self.extract_payload.get("regex_results") or {},
|
||||||
|
self.extract_payload.get("llm_results") or {},
|
||||||
|
)
|
||||||
|
product = self.merged_fields.get("product_name")
|
||||||
|
if product and product.value:
|
||||||
|
self.batch.product_name = product.value
|
||||||
|
self.batch.conflict_summary = self.conflicts
|
||||||
|
self.batch.save(update_fields=["product_name", "conflict_summary"])
|
||||||
|
return
|
||||||
|
if node.node_code == "word_fill":
|
||||||
|
for spec in self.selected_templates:
|
||||||
|
template_path = self.template_paths.get(spec.code)
|
||||||
|
if not template_path:
|
||||||
|
self.generation_results.append(
|
||||||
|
{
|
||||||
|
"template_code": spec.code,
|
||||||
|
"template_label": spec.output_label,
|
||||||
|
"word_status": "failed",
|
||||||
|
"pdf_status": "待增强",
|
||||||
|
"error_message": "模板不可用",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
export = create_word_export(self.batch, spec, template_path, self.merged_fields, self.conflicts)
|
||||||
|
self.exports.append(export)
|
||||||
|
self.generation_results.append(
|
||||||
|
{
|
||||||
|
"template_code": spec.code,
|
||||||
|
"template_label": spec.output_label,
|
||||||
|
"word_status": "success",
|
||||||
|
"pdf_status": "待增强",
|
||||||
|
"error_message": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if not any(item["word_status"] == "success" for item in self.generation_results):
|
||||||
|
raise ValueError("所有目标 Word 模板均生成失败。")
|
||||||
|
return
|
||||||
|
if node.node_code == "trace_export":
|
||||||
|
self.exports.extend(
|
||||||
|
save_traceability_exports(
|
||||||
|
self.batch,
|
||||||
|
self.merged_fields,
|
||||||
|
self.conflicts,
|
||||||
|
self.selected_templates,
|
||||||
|
self.generation_results,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "output_export":
|
||||||
|
Message.objects.create(
|
||||||
|
conversation=self.batch.conversation,
|
||||||
|
role=Message.Role.ASSISTANT,
|
||||||
|
content=build_assistant_summary(self.batch, self.exports),
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if node.node_code == "notify":
|
||||||
|
notification = notify_completion(
|
||||||
|
self.batch,
|
||||||
|
self.exports,
|
||||||
|
fail=getattr(settings, "APPLICATION_FORM_FILL_MOCK_NOTIFY_FAIL", False),
|
||||||
|
)
|
||||||
|
if notification.send_status == notification.SendStatus.FAILED:
|
||||||
|
self.non_blocking_errors.append(notification.error_message or "通知失败")
|
||||||
|
return
|
||||||
|
if node.node_code == "completed":
|
||||||
|
self._mark_final_status()
|
||||||
|
|
||||||
|
def _mark_final_status(self) -> None:
|
||||||
|
failed_word = any(item.get("word_status") == "failed" for item in self.generation_results)
|
||||||
|
if self.non_blocking_errors or failed_word:
|
||||||
|
self.batch.status = ApplicationFormFillBatch.Status.PARTIAL_SUCCESS
|
||||||
|
else:
|
||||||
|
self.batch.status = ApplicationFormFillBatch.Status.SUCCESS
|
||||||
|
self.batch.save(update_fields=["status"])
|
||||||
|
|
||||||
|
def _append_risk_note(self, note: dict) -> None:
|
||||||
|
self.batch.risk_notes = list(self.batch.risk_notes or []) + [note]
|
||||||
|
self.batch.save(update_fields=["risk_notes"])
|
||||||
|
|
||||||
|
|
||||||
|
def start_application_form_fill_workflow(batch: ApplicationFormFillBatch, *, async_run: bool = True) -> None:
|
||||||
|
executor = FormFillWorkflowExecutor(batch)
|
||||||
|
if not async_run:
|
||||||
|
executor.run()
|
||||||
|
return
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
1
review_agent/feishu_questions/__init__.py
Normal file
1
review_agent/feishu_questions/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Reserved Feishu question services."""
|
||||||
43
review_agent/feishu_questions/intent.py
Normal file
43
review_agent/feishu_questions/intent.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
WORKFLOW_KEYWORDS = {
|
||||||
|
"regulatory_review": ("法规核查", "风险", "整改", "RR-"),
|
||||||
|
"application_form_fill": ("自动填表", "填表", "申报文件", "AFF-"),
|
||||||
|
"file_summary": ("自动汇总", "文件汇总", "目录", "页数", "FS-"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_question_intent(text: str) -> dict[str, object]:
|
||||||
|
normalized = (text or "").strip()
|
||||||
|
batch_no = _extract_batch_no(normalized)
|
||||||
|
workflow_type = _detect_workflow_type(normalized, batch_no)
|
||||||
|
latest = bool(re.search(r"(最新|最近|上一个|最后一个)", normalized))
|
||||||
|
intent = "batch_status" if batch_no or latest else "unknown"
|
||||||
|
if workflow_type == "regulatory_review" and any(keyword in normalized for keyword in ["风险", "阻断", "整改"]):
|
||||||
|
intent = "risk_summary"
|
||||||
|
if workflow_type == "application_form_fill" and any(keyword in normalized for keyword in ["导出", "文件", "word", "Word"]):
|
||||||
|
intent = "export_summary"
|
||||||
|
if workflow_type == "file_summary" and any(keyword in normalized for keyword in ["缺失", "目录", "页数"]):
|
||||||
|
intent = "missing_summary"
|
||||||
|
return {
|
||||||
|
"intent": intent,
|
||||||
|
"workflow_type": workflow_type,
|
||||||
|
"batch_no": batch_no,
|
||||||
|
"latest": latest or not batch_no,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_batch_no(text: str) -> str:
|
||||||
|
match = re.search(r"\b(?:RR|AFF|FS)-[A-Za-z0-9-]+", text, flags=re.IGNORECASE)
|
||||||
|
return match.group(0).upper() if match else ""
|
||||||
|
|
||||||
|
|
||||||
|
def _detect_workflow_type(text: str, batch_no: str = "") -> str:
|
||||||
|
source = f"{text} {batch_no}"
|
||||||
|
for workflow_type, keywords in WORKFLOW_KEYWORDS.items():
|
||||||
|
if any(keyword in source for keyword in keywords):
|
||||||
|
return workflow_type
|
||||||
|
return ""
|
||||||
9
review_agent/feishu_questions/permissions.py
Normal file
9
review_agent/feishu_questions/permissions.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
||||||
|
def can_access_batch(user, batch) -> bool:
|
||||||
|
if not user or not getattr(user, "is_authenticated", False):
|
||||||
|
return False
|
||||||
|
if getattr(user, "is_staff", False) or getattr(user, "is_superuser", False):
|
||||||
|
return True
|
||||||
|
return getattr(batch, "user_id", None) == user.pk
|
||||||
85
review_agent/feishu_questions/query.py
Normal file
85
review_agent/feishu_questions/query.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.models import ApplicationFormFillBatch, ExportedSummaryFile, FileSummaryBatch, RegulatoryReviewBatch
|
||||||
|
|
||||||
|
from .permissions import can_access_batch
|
||||||
|
|
||||||
|
|
||||||
|
WORKFLOW_MODELS = {
|
||||||
|
"file_summary": FileSummaryBatch,
|
||||||
|
"regulatory_review": RegulatoryReviewBatch,
|
||||||
|
"application_form_fill": ApplicationFormFillBatch,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def query_batch_summary(user, *, workflow_type: str | None = None, batch_no: str | None = None, latest: bool = False) -> dict:
|
||||||
|
candidates = _candidate_batches(workflow_type)
|
||||||
|
if batch_no:
|
||||||
|
for current_workflow_type, model in candidates:
|
||||||
|
batch = model.objects.filter(batch_no=batch_no).first()
|
||||||
|
if batch:
|
||||||
|
return _serialize_allowed_batch(user, current_workflow_type, batch)
|
||||||
|
return {"ok": False, "permission_result": "not_found", "answer_summary": "未找到对应批次。"}
|
||||||
|
|
||||||
|
if latest:
|
||||||
|
for current_workflow_type, model in candidates:
|
||||||
|
queryset = model.objects.all().order_by("-finished_at", "-created_at", "-id")
|
||||||
|
for batch in queryset:
|
||||||
|
if can_access_batch(user, batch):
|
||||||
|
return _serialize_batch(current_workflow_type, batch, permission_result="allowed")
|
||||||
|
return {"ok": False, "permission_result": "not_found", "answer_summary": "未找到可访问的批次。"}
|
||||||
|
|
||||||
|
return {"ok": False, "permission_result": "not_found", "answer_summary": "请提供批次号,或询问最新/最近批次。"}
|
||||||
|
|
||||||
|
|
||||||
|
def _candidate_batches(workflow_type: str | None):
|
||||||
|
if workflow_type and workflow_type in WORKFLOW_MODELS:
|
||||||
|
return [(workflow_type, WORKFLOW_MODELS[workflow_type])]
|
||||||
|
return list(WORKFLOW_MODELS.items())
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_allowed_batch(user, workflow_type: str, batch) -> dict:
|
||||||
|
if not can_access_batch(user, batch):
|
||||||
|
return {"ok": False, "permission_result": "denied", "answer_summary": "无权限访问该批次。"}
|
||||||
|
return _serialize_batch(workflow_type, batch, permission_result="allowed")
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_batch(workflow_type: str, batch, *, permission_result: str) -> dict:
|
||||||
|
summary = _summary_for_batch(workflow_type, batch)
|
||||||
|
result_url = _result_url(workflow_type, batch.pk)
|
||||||
|
answer = f"{batch.batch_no} 状态 {batch.status}。{summary}"
|
||||||
|
return {
|
||||||
|
"ok": True,
|
||||||
|
"permission_result": permission_result,
|
||||||
|
"workflow_type": workflow_type,
|
||||||
|
"batch_id": batch.pk,
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"summary": summary,
|
||||||
|
"result_url": result_url,
|
||||||
|
"answer_summary": answer,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _summary_for_batch(workflow_type: str, batch) -> str:
|
||||||
|
if workflow_type == "file_summary":
|
||||||
|
return f"文件 {batch.total_files} 个,成功 {batch.success_files} 个,失败 {batch.failed_files} 个。"
|
||||||
|
if workflow_type == "regulatory_review":
|
||||||
|
risk = batch.risk_summary or {}
|
||||||
|
return f"阻断项 {int(risk.get('blocking') or 0)} 个,高风险 {int(risk.get('high') or 0)} 个。"
|
||||||
|
if workflow_type == "application_form_fill":
|
||||||
|
export_count = ExportedSummaryFile.objects.filter(
|
||||||
|
workflow_type="application_form_fill",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
).count()
|
||||||
|
return f"导出文件 {export_count} 个,冲突字段 {len(batch.conflict_summary or [])} 个。"
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _result_url(workflow_type: str, batch_id: int) -> str:
|
||||||
|
paths = {
|
||||||
|
"file_summary": f"/api/review-agent/file-summary/{batch_id}/status/",
|
||||||
|
"regulatory_review": f"/api/review-agent/regulatory-review/{batch_id}/status/",
|
||||||
|
"application_form_fill": f"/api/review-agent/application-form-fill/{batch_id}/status/",
|
||||||
|
}
|
||||||
|
return paths.get(workflow_type, "/")
|
||||||
37
review_agent/feishu_questions/service.py
Normal file
37
review_agent/feishu_questions/service.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import FeishuQuestionLog
|
||||||
|
|
||||||
|
from .intent import parse_question_intent
|
||||||
|
from .query import query_batch_summary
|
||||||
|
|
||||||
|
|
||||||
|
def answer_question(user, text: str, *, source_type: str = FeishuQuestionLog.SourceType.SIMULATE) -> dict:
|
||||||
|
parsed = parse_question_intent(text)
|
||||||
|
result = query_batch_summary(
|
||||||
|
user,
|
||||||
|
workflow_type=parsed.get("workflow_type") or None,
|
||||||
|
batch_no=parsed.get("batch_no") or None,
|
||||||
|
latest=bool(parsed.get("latest")),
|
||||||
|
)
|
||||||
|
status = FeishuQuestionLog.Status.SUCCESS if result.get("ok") else FeishuQuestionLog.Status.FAILED
|
||||||
|
answer_summary = str(result.get("answer_summary") or "")
|
||||||
|
log = FeishuQuestionLog.objects.create(
|
||||||
|
system_user=user if getattr(user, "is_authenticated", False) else None,
|
||||||
|
source_type=source_type,
|
||||||
|
question_text=text,
|
||||||
|
intent=str(parsed.get("intent") or "unknown"),
|
||||||
|
query_object={
|
||||||
|
"workflow_type": parsed.get("workflow_type") or "",
|
||||||
|
"batch_no": parsed.get("batch_no") or "",
|
||||||
|
"latest": bool(parsed.get("latest")),
|
||||||
|
},
|
||||||
|
answer_summary=answer_summary[:500],
|
||||||
|
permission_result=str(result.get("permission_result") or ""),
|
||||||
|
status=status,
|
||||||
|
error_message="" if result.get("ok") else answer_summary,
|
||||||
|
processed_at=timezone.now(),
|
||||||
|
)
|
||||||
|
return {**result, "intent": parsed.get("intent"), "log_id": log.pk}
|
||||||
1
review_agent/file_summary/__init__.py
Normal file
1
review_agent/file_summary/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
4
review_agent/file_summary/constants.py
Normal file
4
review_agent/file_summary/constants.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
ATTACHMENT_ROOT = Path("file_summary") / "users"
|
||||||
23
review_agent/file_summary/events.py
Normal file
23
review_agent/file_summary/events.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
||||||
|
|
||||||
|
|
||||||
|
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
|
||||||
|
return WorkflowEvent.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
conversation=batch.conversation,
|
||||||
|
event_type=event_type,
|
||||||
|
payload=payload or {},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"id": event.pk,
|
||||||
|
"event_type": event.event_type,
|
||||||
|
"payload": event.payload,
|
||||||
|
"created_at": event.created_at.isoformat(),
|
||||||
|
}
|
||||||
12
review_agent/file_summary/paths.py
Normal file
12
review_agent/file_summary/paths.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_storage_path(storage_path: str) -> Path:
|
||||||
|
path = Path(storage_path)
|
||||||
|
if path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path(settings.MEDIA_ROOT) / path
|
||||||
1
review_agent/file_summary/services/__init__.py
Normal file
1
review_agent/file_summary/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
125
review_agent/file_summary/services/archive.py
Normal file
125
review_agent/file_summary/services/archive.py
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
import py7zr
|
||||||
|
|
||||||
|
|
||||||
|
ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"}
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.services.archive")
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_inside_target(path: Path, target_dir: Path) -> None:
|
||||||
|
target = target_dir.resolve()
|
||||||
|
resolved = path.resolve()
|
||||||
|
if target != resolved and target not in resolved.parents:
|
||||||
|
raise ValueError("解压路径必须位于批次工作目录内。")
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_member_path(target_dir: Path, member_name: str) -> Path:
|
||||||
|
destination = target_dir / member_name
|
||||||
|
_ensure_inside_target(destination, target_dir)
|
||||||
|
return destination
|
||||||
|
|
||||||
|
|
||||||
|
def extract_archive(archive_path: str | Path, target_dir: str | Path) -> list[Path]:
|
||||||
|
archive_path = Path(archive_path)
|
||||||
|
target_dir = Path(target_dir)
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
ext = archive_path.suffix.lower().lstrip(".")
|
||||||
|
if ext not in ARCHIVE_EXTENSIONS:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if ext == "zip":
|
||||||
|
return _extract_zip(archive_path, target_dir)
|
||||||
|
if ext == "7z":
|
||||||
|
return _extract_7z(archive_path, target_dir)
|
||||||
|
return _extract_rar(archive_path, target_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_zip(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||||
|
extracted: list[Path] = []
|
||||||
|
with ZipFile(archive_path) as archive:
|
||||||
|
for member in archive.infolist():
|
||||||
|
destination = _safe_member_path(target_dir, member.filename)
|
||||||
|
if member.is_dir():
|
||||||
|
destination.mkdir(parents=True, exist_ok=True)
|
||||||
|
continue
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with archive.open(member) as source, destination.open("wb") as target:
|
||||||
|
target.write(source.read())
|
||||||
|
extracted.append(destination)
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||||
|
with py7zr.SevenZipFile(archive_path, mode="r") as archive:
|
||||||
|
names = archive.getnames()
|
||||||
|
for name in names:
|
||||||
|
_safe_member_path(target_dir, name)
|
||||||
|
archive.extractall(path=target_dir)
|
||||||
|
return [target_dir / name for name in names if (target_dir / name).is_file()]
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||||
|
try:
|
||||||
|
extracted = _extract_rar_with_libarchive(archive_path, target_dir)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"RAR libarchive extract failed, falling back to 7z",
|
||||||
|
extra={"archive_path": str(archive_path), "target_dir": str(target_dir), "error": str(exc)},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if extracted:
|
||||||
|
return extracted
|
||||||
|
logger.info(
|
||||||
|
"RAR libarchive extract produced no files, falling back to 7z",
|
||||||
|
extra={"archive_path": str(archive_path), "target_dir": str(target_dir)},
|
||||||
|
)
|
||||||
|
return _extract_rar_with_7z(archive_path, target_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_rar_with_libarchive(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||||
|
try:
|
||||||
|
import libarchive
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError("未安装 libarchive,跳过 Python RAR 解压。") from exc
|
||||||
|
|
||||||
|
extracted: list[Path] = []
|
||||||
|
with libarchive.file_reader(str(archive_path)) as entries:
|
||||||
|
for entry in entries:
|
||||||
|
destination = _safe_member_path(target_dir, entry.pathname)
|
||||||
|
if entry.isdir:
|
||||||
|
destination.mkdir(parents=True, exist_ok=True)
|
||||||
|
continue
|
||||||
|
if not entry.isfile:
|
||||||
|
logger.info(
|
||||||
|
"RAR libarchive skipped non-regular entry",
|
||||||
|
extra={"archive_path": str(archive_path), "entry": entry.pathname},
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with destination.open("wb") as target:
|
||||||
|
for block in entry.get_blocks():
|
||||||
|
target.write(block)
|
||||||
|
extracted.append(destination)
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_rar_with_7z(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||||
|
result = subprocess.run(
|
||||||
|
["7z", "x", f"-o{target_dir}", str(archive_path), "-y"],
|
||||||
|
check=False,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise RuntimeError(result.stderr or result.stdout or "rar 解压失败")
|
||||||
|
extracted = [path for path in target_dir.rglob("*") if path.is_file()]
|
||||||
|
for path in extracted:
|
||||||
|
_ensure_inside_target(path, target_dir)
|
||||||
|
return extracted
|
||||||
261
review_agent/file_summary/services/attachment_reader.py
Normal file
261
review_agent/file_summary/services/attachment_reader.py
Normal file
@@ -0,0 +1,261 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import logging
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import FileAttachment
|
||||||
|
from review_agent.file_summary.services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
||||||
|
|
||||||
|
|
||||||
|
TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "log"}
|
||||||
|
SUPPORTED_EXTENSIONS = TEXT_EXTENSIONS | {"pdf", "docx", "xlsx", "pptx"} | ARCHIVE_EXTENSIONS
|
||||||
|
MAX_PREVIEW_CHARS = 3000
|
||||||
|
MAX_ROWS_PER_SHEET = 20
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.attachment_reader")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AttachmentReadResult:
|
||||||
|
status: str
|
||||||
|
filename: str
|
||||||
|
file_type: str
|
||||||
|
file_size: int
|
||||||
|
preview_text: str = ""
|
||||||
|
sections: list[dict[str, object]] = field(default_factory=list)
|
||||||
|
error_message: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, object]:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
|
def read_attachment_details(attachment: FileAttachment) -> AttachmentReadResult:
|
||||||
|
file_path = _attachment_absolute_path(attachment)
|
||||||
|
file_type = Path(attachment.original_name).suffix.lower().lstrip(".")
|
||||||
|
logger.info(
|
||||||
|
"Attachment read started",
|
||||||
|
extra={
|
||||||
|
"attachment_id": attachment.pk,
|
||||||
|
"conversation_id": attachment.conversation_id,
|
||||||
|
"original_name": attachment.original_name,
|
||||||
|
"file_type": file_type,
|
||||||
|
"storage_path": attachment.storage_path,
|
||||||
|
"resolved_path": str(file_path),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if not file_path.exists():
|
||||||
|
logger.warning(
|
||||||
|
"Attachment read missing file",
|
||||||
|
extra={"attachment_id": attachment.pk, "resolved_path": str(file_path)},
|
||||||
|
)
|
||||||
|
return _failed(attachment, file_type, "附件文件不存在。")
|
||||||
|
if file_type not in SUPPORTED_EXTENSIONS:
|
||||||
|
logger.warning(
|
||||||
|
"Attachment read unsupported type",
|
||||||
|
extra={"attachment_id": attachment.pk, "file_type": file_type},
|
||||||
|
)
|
||||||
|
return _failed(attachment, file_type, f"暂不支持解析 .{file_type or 'unknown'} 文件。", "unsupported")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if file_type == "pdf":
|
||||||
|
sections = _read_pdf(file_path)
|
||||||
|
elif file_type == "docx":
|
||||||
|
sections = _read_docx(file_path)
|
||||||
|
elif file_type == "xlsx":
|
||||||
|
sections = _read_xlsx(file_path)
|
||||||
|
elif file_type == "pptx":
|
||||||
|
sections = _read_pptx(file_path)
|
||||||
|
elif file_type == "csv":
|
||||||
|
sections = _read_csv(file_path)
|
||||||
|
elif file_type in ARCHIVE_EXTENSIONS:
|
||||||
|
sections = _read_archive(file_path)
|
||||||
|
else:
|
||||||
|
sections = _read_text(file_path)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception(
|
||||||
|
"Attachment read failed",
|
||||||
|
extra={"attachment_id": attachment.pk, "file_type": file_type, "error": str(exc)},
|
||||||
|
)
|
||||||
|
return _failed(attachment, file_type, str(exc))
|
||||||
|
|
||||||
|
preview = _build_preview(sections)
|
||||||
|
logger.info(
|
||||||
|
"Attachment read finished",
|
||||||
|
extra={
|
||||||
|
"attachment_id": attachment.pk,
|
||||||
|
"section_count": len(sections),
|
||||||
|
"preview_length": len(preview),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return AttachmentReadResult(
|
||||||
|
status="success",
|
||||||
|
filename=attachment.original_name,
|
||||||
|
file_type=file_type,
|
||||||
|
file_size=attachment.file_size,
|
||||||
|
preview_text=preview[:MAX_PREVIEW_CHARS],
|
||||||
|
sections=sections,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _attachment_absolute_path(attachment: FileAttachment) -> Path:
|
||||||
|
path = Path(attachment.storage_path)
|
||||||
|
if path.is_absolute():
|
||||||
|
return path
|
||||||
|
return Path(settings.MEDIA_ROOT) / path
|
||||||
|
|
||||||
|
|
||||||
|
def _failed(
|
||||||
|
attachment: FileAttachment,
|
||||||
|
file_type: str,
|
||||||
|
message: str,
|
||||||
|
status: str = "failed",
|
||||||
|
) -> AttachmentReadResult:
|
||||||
|
return AttachmentReadResult(
|
||||||
|
status=status,
|
||||||
|
filename=attachment.original_name,
|
||||||
|
file_type=file_type,
|
||||||
|
file_size=attachment.file_size,
|
||||||
|
error_message=message,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _read_text(path: Path) -> list[dict[str, object]]:
|
||||||
|
text = path.read_text(encoding="utf-8", errors="replace")
|
||||||
|
return [{"type": "text", "name": path.name, "text": text[:MAX_PREVIEW_CHARS]}]
|
||||||
|
|
||||||
|
|
||||||
|
def _read_csv(path: Path) -> list[dict[str, object]]:
|
||||||
|
with path.open("r", encoding="utf-8-sig", errors="replace", newline="") as handle:
|
||||||
|
rows = [[str(cell) for cell in row] for row in csv.reader(handle)]
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"type": "table",
|
||||||
|
"name": path.name,
|
||||||
|
"row_count": len(rows),
|
||||||
|
"rows": rows[:MAX_ROWS_PER_SHEET],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _read_pdf(path: Path) -> list[dict[str, object]]:
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
reader = PdfReader(str(path))
|
||||||
|
pages = []
|
||||||
|
for index, page in enumerate(reader.pages, start=1):
|
||||||
|
text = page.extract_text() or ""
|
||||||
|
pages.append({"type": "page", "name": f"第 {index} 页", "text": text})
|
||||||
|
return pages
|
||||||
|
|
||||||
|
|
||||||
|
def _read_docx(path: Path) -> list[dict[str, object]]:
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
document = Document(str(path))
|
||||||
|
paragraphs = [item.text.strip() for item in document.paragraphs if item.text.strip()]
|
||||||
|
sections: list[dict[str, object]] = [
|
||||||
|
{"type": "text", "name": "正文", "text": "\n".join(paragraphs)}
|
||||||
|
]
|
||||||
|
for index, table in enumerate(document.tables, start=1):
|
||||||
|
rows = [[cell.text.strip() for cell in row.cells] for row in table.rows]
|
||||||
|
sections.append(
|
||||||
|
{
|
||||||
|
"type": "table",
|
||||||
|
"name": f"表格 {index}",
|
||||||
|
"row_count": len(rows),
|
||||||
|
"rows": rows[:MAX_ROWS_PER_SHEET],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _read_xlsx(path: Path) -> list[dict[str, object]]:
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
workbook = load_workbook(str(path), read_only=True, data_only=True)
|
||||||
|
sections = []
|
||||||
|
for sheet in workbook.worksheets:
|
||||||
|
rows = []
|
||||||
|
for row in sheet.iter_rows(max_row=MAX_ROWS_PER_SHEET, values_only=True):
|
||||||
|
rows.append(["" if cell is None else str(cell) for cell in row])
|
||||||
|
sections.append(
|
||||||
|
{
|
||||||
|
"type": "sheet",
|
||||||
|
"name": sheet.title,
|
||||||
|
"row_count": sheet.max_row,
|
||||||
|
"column_count": sheet.max_column,
|
||||||
|
"rows": rows,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
workbook.close()
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _read_pptx(path: Path) -> list[dict[str, object]]:
|
||||||
|
from pptx import Presentation
|
||||||
|
|
||||||
|
presentation = Presentation(str(path))
|
||||||
|
sections = []
|
||||||
|
for index, slide in enumerate(presentation.slides, start=1):
|
||||||
|
texts = []
|
||||||
|
for shape in slide.shapes:
|
||||||
|
if hasattr(shape, "text") and shape.text.strip():
|
||||||
|
texts.append(shape.text.strip())
|
||||||
|
sections.append({"type": "slide", "name": f"幻灯片 {index}", "text": "\n".join(texts)})
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _read_archive(path: Path) -> list[dict[str, object]]:
|
||||||
|
sections: list[dict[str, object]] = []
|
||||||
|
with TemporaryDirectory(prefix="attachment-reader-") as temp_dir:
|
||||||
|
extracted = extract_archive(path, Path(temp_dir))
|
||||||
|
if not extracted:
|
||||||
|
return [{"type": "archive", "name": path.name, "text": "压缩包未解出任何可读取文件。"}]
|
||||||
|
for item in extracted:
|
||||||
|
file_type = item.suffix.lower().lstrip(".")
|
||||||
|
if file_type not in SUPPORTED_EXTENSIONS or file_type in ARCHIVE_EXTENSIONS:
|
||||||
|
sections.append(
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"name": item.name,
|
||||||
|
"text": f"暂不支持预览压缩包内的 .{file_type or 'unknown'} 文件。",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
for section in _read_supported_file(item, file_type):
|
||||||
|
section = dict(section)
|
||||||
|
section["name"] = f"{item.name} / {section.get('name', item.name)}"
|
||||||
|
sections.append(section)
|
||||||
|
return sections
|
||||||
|
|
||||||
|
|
||||||
|
def _read_supported_file(path: Path, file_type: str) -> list[dict[str, object]]:
|
||||||
|
if file_type == "pdf":
|
||||||
|
return _read_pdf(path)
|
||||||
|
if file_type == "docx":
|
||||||
|
return _read_docx(path)
|
||||||
|
if file_type == "xlsx":
|
||||||
|
return _read_xlsx(path)
|
||||||
|
if file_type == "pptx":
|
||||||
|
return _read_pptx(path)
|
||||||
|
if file_type == "csv":
|
||||||
|
return _read_csv(path)
|
||||||
|
return _read_text(path)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_preview(sections: list[dict[str, object]]) -> str:
|
||||||
|
parts: list[str] = []
|
||||||
|
for section in sections:
|
||||||
|
if "text" in section and section["text"]:
|
||||||
|
parts.append(str(section["text"]))
|
||||||
|
rows = section.get("rows")
|
||||||
|
if rows:
|
||||||
|
parts.extend(" | ".join(str(cell) for cell in row) for row in rows[:5])
|
||||||
|
return "\n".join(part for part in parts if part).strip()
|
||||||
68
review_agent/file_summary/services/export_excel.py
Normal file
68
review_agent/file_summary/services/export_excel.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.export_excel")
|
||||||
|
|
||||||
|
|
||||||
|
def _exports_dir(batch: FileSummaryBatch) -> Path:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "file_summary" / batch.batch_no
|
||||||
|
export_dir = root / "exports"
|
||||||
|
export_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
return export_dir
|
||||||
|
|
||||||
|
|
||||||
|
def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
|
||||||
|
logger.info("Excel export generation started", extra={"batch_id": batch.pk})
|
||||||
|
workbook = Workbook()
|
||||||
|
summary = workbook.active
|
||||||
|
summary.title = "汇总信息"
|
||||||
|
summary.append(["批次号", batch.batch_no])
|
||||||
|
summary.append(["产品名称", batch.product_name or "-"])
|
||||||
|
summary.append(["文件总数", batch.total_files])
|
||||||
|
summary.append(["统计成功", batch.success_files])
|
||||||
|
summary.append(["统计失败", batch.failed_files])
|
||||||
|
summary.append(["不支持", batch.unsupported_files])
|
||||||
|
summary.append(["不确定", batch.uncertain_files])
|
||||||
|
summary.append(["总页数", batch.total_pages])
|
||||||
|
|
||||||
|
detail = workbook.create_sheet("文件明细")
|
||||||
|
detail.append(["序号", "目录层级", "文件名", "类型", "页数", "路径", "状态", "重试次数", "异常说明"])
|
||||||
|
for item in batch.items.order_by("file_index"):
|
||||||
|
detail.append(
|
||||||
|
[
|
||||||
|
item.file_index,
|
||||||
|
item.directory_level,
|
||||||
|
item.file_name,
|
||||||
|
item.file_type,
|
||||||
|
item.page_count,
|
||||||
|
item.relative_path,
|
||||||
|
item.statistics_status,
|
||||||
|
item.retry_count,
|
||||||
|
item.error_message,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
path = _exports_dir(batch) / f"{batch.batch_no}-summary.xlsx"
|
||||||
|
workbook.save(path)
|
||||||
|
exported = ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="summary",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||||
|
file_name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Excel export generation finished",
|
||||||
|
extra={"batch_id": batch.pk, "export_id": exported.pk, "path": str(path)},
|
||||||
|
)
|
||||||
|
return exported
|
||||||
49
review_agent/file_summary/services/inventory.py
Normal file
49
review_agent/file_summary/services/inventory.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch, FileSummaryItem
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
|
||||||
|
|
||||||
|
|
||||||
|
def _directory_level(relative_path: Path) -> str:
|
||||||
|
if len(relative_path.parts) <= 1:
|
||||||
|
return ""
|
||||||
|
return "/".join(relative_path.parts[:-1])
|
||||||
|
|
||||||
|
|
||||||
|
def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]:
|
||||||
|
files: list[tuple[Path, Path]] = []
|
||||||
|
for root in roots:
|
||||||
|
root = Path(root)
|
||||||
|
if root.is_file():
|
||||||
|
files.append((root.parent, root))
|
||||||
|
continue
|
||||||
|
for path in sorted(item for item in root.rglob("*") if item.is_file()):
|
||||||
|
if path.name.startswith(".") or path.stat().st_size == 0:
|
||||||
|
continue
|
||||||
|
files.append((root, path))
|
||||||
|
|
||||||
|
created: list[FileSummaryItem] = []
|
||||||
|
for index, (root, path) in enumerate(files, start=1):
|
||||||
|
relative = path.relative_to(root).as_posix()
|
||||||
|
file_type = path.suffix.lower().lstrip(".")
|
||||||
|
item = FileSummaryItem.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
file_index=index,
|
||||||
|
directory_level=_directory_level(Path(relative)),
|
||||||
|
file_name=path.name,
|
||||||
|
file_type=file_type,
|
||||||
|
relative_path=relative,
|
||||||
|
storage_path=str(path),
|
||||||
|
statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED,
|
||||||
|
)
|
||||||
|
created.append(item)
|
||||||
|
|
||||||
|
batch.total_files = len(created)
|
||||||
|
batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS)
|
||||||
|
batch.unsupported_files = len(created) - batch.supported_files
|
||||||
|
batch.save(update_fields=["total_files", "supported_files", "unsupported_files"])
|
||||||
|
return created
|
||||||
282
review_agent/file_summary/services/page_count.py
Normal file
282
review_agent/file_summary/services/page_count.py
Normal file
@@ -0,0 +1,282 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
from zipfile import ZipFile, is_zipfile
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.page_count")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class PageCountResult:
|
||||||
|
status: str
|
||||||
|
page_count: int | None = None
|
||||||
|
error_message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def count_document_pages(path: str | Path) -> PageCountResult:
|
||||||
|
file_path = Path(path)
|
||||||
|
ext = file_path.suffix.lower().lstrip(".")
|
||||||
|
if ext not in SUPPORTED_EXTENSIONS:
|
||||||
|
return PageCountResult(status="unsupported")
|
||||||
|
|
||||||
|
try:
|
||||||
|
if ext == "pdf":
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages))
|
||||||
|
if ext == "docx":
|
||||||
|
pages = _count_docx_pages_from_extended_properties(file_path)
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
pages = _count_word_pages_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
if ext == "xlsx":
|
||||||
|
pages = _count_xlsx_sheets(file_path) or (
|
||||||
|
_count_excel_sheets_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
)
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
if ext == "xls":
|
||||||
|
pages = _count_xls_sheets(file_path) or (
|
||||||
|
_count_excel_sheets_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
)
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
if ext == "pptx":
|
||||||
|
pages = _count_pptx_slides(file_path) or (
|
||||||
|
_count_powerpoint_slides_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
)
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
if ext == "doc":
|
||||||
|
pages = _count_word_pages_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return _ole_uncertain_or_failed(file_path)
|
||||||
|
if ext == "ppt":
|
||||||
|
pages = _count_powerpoint_slides_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||||
|
if pages:
|
||||||
|
return PageCountResult(status="success", page_count=pages)
|
||||||
|
return _ole_uncertain_or_failed(file_path)
|
||||||
|
except Exception as exc:
|
||||||
|
return PageCountResult(status="failed", error_message=str(exc))
|
||||||
|
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
|
||||||
|
|
||||||
|
def _count_docx_pages_from_extended_properties(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
with ZipFile(path) as archive:
|
||||||
|
app_entries = [
|
||||||
|
item for item in archive.infolist() if item.filename == "docProps/app.xml"
|
||||||
|
]
|
||||||
|
if not app_entries:
|
||||||
|
return None
|
||||||
|
content = archive.read(app_entries[-1]).decode("utf-8", errors="replace")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("DOCX extended properties read failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = ElementTree.fromstring(content)
|
||||||
|
except ElementTree.ParseError as exc:
|
||||||
|
logger.warning("DOCX extended properties parse failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
pages_node = root.find("{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}Pages")
|
||||||
|
if pages_node is None or not pages_node.text:
|
||||||
|
return None
|
||||||
|
return _positive_int(pages_node.text)
|
||||||
|
|
||||||
|
|
||||||
|
def _count_xlsx_sheets(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
workbook = load_workbook(str(path), read_only=True, data_only=True)
|
||||||
|
try:
|
||||||
|
return _positive_int(len(workbook.sheetnames))
|
||||||
|
finally:
|
||||||
|
workbook.close()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("XLSX sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _count_xls_sheets(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
import xlrd
|
||||||
|
|
||||||
|
workbook = xlrd.open_workbook(str(path), on_demand=True)
|
||||||
|
try:
|
||||||
|
return _positive_int(workbook.nsheets)
|
||||||
|
finally:
|
||||||
|
workbook.release_resources()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("XLS sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _count_pptx_slides(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
from pptx import Presentation
|
||||||
|
|
||||||
|
return _positive_int(len(Presentation(str(path)).slides))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("PPTX slide count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _ole_uncertain_or_failed(path: Path) -> PageCountResult:
|
||||||
|
try:
|
||||||
|
import olefile
|
||||||
|
|
||||||
|
if olefile.isOleFile(str(path)):
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。")
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("OLE validation failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return PageCountResult(status="uncertain")
|
||||||
|
|
||||||
|
|
||||||
|
def _can_try_com_fallback(path: Path, ext: str) -> bool:
|
||||||
|
if ext in {"docx", "xlsx", "pptx"}:
|
||||||
|
return is_zipfile(path)
|
||||||
|
if ext in {"doc", "xls", "ppt"}:
|
||||||
|
try:
|
||||||
|
import olefile
|
||||||
|
|
||||||
|
return olefile.isOleFile(str(path))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("OLE signature check failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _count_word_pages_with_com(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
import pythoncom
|
||||||
|
import win32com.client
|
||||||
|
except Exception as exc:
|
||||||
|
logger.info("Word COM page count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
word = None
|
||||||
|
document = None
|
||||||
|
pythoncom.CoInitialize()
|
||||||
|
try:
|
||||||
|
word = win32com.client.DispatchEx("Word.Application")
|
||||||
|
word.Visible = False
|
||||||
|
word.DisplayAlerts = 0
|
||||||
|
document = word.Documents.Open(
|
||||||
|
str(path.resolve()),
|
||||||
|
ReadOnly=True,
|
||||||
|
AddToRecentFiles=False,
|
||||||
|
ConfirmConversions=False,
|
||||||
|
)
|
||||||
|
document.Repaginate()
|
||||||
|
return _positive_int(document.ComputeStatistics(2))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Word COM page count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
if document is not None:
|
||||||
|
document.Close(False)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Word document close failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
try:
|
||||||
|
if word is not None:
|
||||||
|
word.Quit()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Word application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
pythoncom.CoUninitialize()
|
||||||
|
|
||||||
|
|
||||||
|
def _count_powerpoint_slides_with_com(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
import pythoncom
|
||||||
|
import win32com.client
|
||||||
|
except Exception as exc:
|
||||||
|
logger.info("PowerPoint COM slide count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
powerpoint = None
|
||||||
|
presentation = None
|
||||||
|
pythoncom.CoInitialize()
|
||||||
|
try:
|
||||||
|
powerpoint = win32com.client.DispatchEx("PowerPoint.Application")
|
||||||
|
presentation = powerpoint.Presentations.Open(
|
||||||
|
str(path.resolve()),
|
||||||
|
ReadOnly=True,
|
||||||
|
Untitled=False,
|
||||||
|
WithWindow=False,
|
||||||
|
)
|
||||||
|
return _positive_int(presentation.Slides.Count)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("PowerPoint COM slide count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
if presentation is not None:
|
||||||
|
presentation.Close()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("PowerPoint presentation close failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
try:
|
||||||
|
if powerpoint is not None:
|
||||||
|
powerpoint.Quit()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("PowerPoint application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
pythoncom.CoUninitialize()
|
||||||
|
|
||||||
|
|
||||||
|
def _count_excel_sheets_with_com(path: Path) -> int | None:
|
||||||
|
try:
|
||||||
|
import pythoncom
|
||||||
|
import win32com.client
|
||||||
|
except Exception as exc:
|
||||||
|
logger.info("Excel COM sheet count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
|
||||||
|
excel = None
|
||||||
|
workbook = None
|
||||||
|
pythoncom.CoInitialize()
|
||||||
|
try:
|
||||||
|
excel = win32com.client.DispatchEx("Excel.Application")
|
||||||
|
excel.Visible = False
|
||||||
|
excel.DisplayAlerts = False
|
||||||
|
workbook = excel.Workbooks.Open(str(path.resolve()), ReadOnly=True)
|
||||||
|
return _positive_int(workbook.Worksheets.Count)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Excel COM sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
return None
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
if workbook is not None:
|
||||||
|
workbook.Close(False)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Excel workbook close failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
try:
|
||||||
|
if excel is not None:
|
||||||
|
excel.Quit()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("Excel application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||||
|
pythoncom.CoUninitialize()
|
||||||
|
|
||||||
|
|
||||||
|
def _positive_int(value) -> int | None:
|
||||||
|
try:
|
||||||
|
number = int(value)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return None
|
||||||
|
return number if number > 0 else None
|
||||||
31
review_agent/file_summary/services/product_detect.py
Normal file
31
review_agent/file_summary/services/product_detect.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch
|
||||||
|
|
||||||
|
|
||||||
|
def detect_product_name(batch: FileSummaryBatch) -> str:
|
||||||
|
product_name = ""
|
||||||
|
for item in batch.items.order_by("file_index"):
|
||||||
|
parts = Path(item.relative_path).parts
|
||||||
|
if len(parts) > 1:
|
||||||
|
product_name = parts[0]
|
||||||
|
break
|
||||||
|
name = Path(item.file_name).stem
|
||||||
|
for keyword in ("产品", "试剂盒", "说明书"):
|
||||||
|
if keyword in name:
|
||||||
|
product_name = name
|
||||||
|
break
|
||||||
|
if product_name:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not product_name:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
batch.product_name = product_name
|
||||||
|
batch.save(update_fields=["product_name"])
|
||||||
|
if batch.conversation.title.startswith("新对话"):
|
||||||
|
batch.conversation.title = f"{product_name}-文件汇总"
|
||||||
|
batch.conversation.save(update_fields=["title", "updated_at"])
|
||||||
|
return product_name
|
||||||
79
review_agent/file_summary/services/report.py
Normal file
79
review_agent/file_summary/services/report.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
|
||||||
|
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.report")
|
||||||
|
|
||||||
|
|
||||||
|
def _exports_dir(batch: FileSummaryBatch) -> Path:
|
||||||
|
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "file_summary" / batch.batch_no
|
||||||
|
export_dir = root / "exports"
|
||||||
|
export_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
return export_dir
|
||||||
|
|
||||||
|
|
||||||
|
def build_summary_table(batch: FileSummaryBatch) -> str:
|
||||||
|
lines = [
|
||||||
|
"| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |",
|
||||||
|
"| --- | --- | --- | --- | --- | --- | --- |",
|
||||||
|
]
|
||||||
|
for item in batch.items.order_by("file_index"):
|
||||||
|
lines.append(
|
||||||
|
"| {index} | {directory} | {name} | {file_type} | {pages} | {status} | {error} |".format(
|
||||||
|
index=item.file_index,
|
||||||
|
directory=item.directory_level or "-",
|
||||||
|
name=item.file_name,
|
||||||
|
file_type=item.file_type,
|
||||||
|
pages=item.page_count if item.page_count is not None else "-",
|
||||||
|
status=item.statistics_status,
|
||||||
|
error=item.error_message or "-",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def build_markdown_report(batch: FileSummaryBatch) -> str:
|
||||||
|
return "\n\n".join(
|
||||||
|
[
|
||||||
|
f"# 文件目录与页数汇总报告\n\n批次号:{batch.batch_no}",
|
||||||
|
(
|
||||||
|
"## 汇总信息\n\n"
|
||||||
|
f"- 产品名称:{batch.product_name or '-'}\n"
|
||||||
|
f"- 文件总数:{batch.total_files}\n"
|
||||||
|
f"- 统计成功:{batch.success_files}\n"
|
||||||
|
f"- 统计失败:{batch.failed_files}\n"
|
||||||
|
f"- 不支持:{batch.unsupported_files}\n"
|
||||||
|
f"- 不确定:{batch.uncertain_files}\n"
|
||||||
|
f"- 总页数:{batch.total_pages}"
|
||||||
|
),
|
||||||
|
"## 文件明细\n\n" + build_summary_table(batch),
|
||||||
|
"## 处理说明\n\n单文件失败不会阻断批次,失败与不确定文件已在明细中标注。",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFile, str]:
|
||||||
|
logger.info("Markdown report generation started", extra={"batch_id": batch.pk})
|
||||||
|
content = build_markdown_report(batch)
|
||||||
|
path = _exports_dir(batch) / f"{batch.batch_no}-summary.md"
|
||||||
|
path.write_text(content, encoding="utf-8")
|
||||||
|
exported = ExportedSummaryFile.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
export_category="summary",
|
||||||
|
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
||||||
|
file_name=path.name,
|
||||||
|
storage_path=str(path),
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Markdown report generation finished",
|
||||||
|
extra={"batch_id": batch.pk, "export_id": exported.pk, "path": str(path)},
|
||||||
|
)
|
||||||
|
return exported, build_summary_table(batch)
|
||||||
1
review_agent/file_summary/skills/__init__.py
Normal file
1
review_agent/file_summary/skills/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
69
review_agent/file_summary/skills/archive_extract.py
Normal file
69
review_agent/file_summary/skills/archive_extract.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatchAttachment
|
||||||
|
|
||||||
|
from ..paths import resolve_storage_path
|
||||||
|
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.archive_extract")
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
|
||||||
|
stem = Path(binding.attachment.original_name).stem or "archive"
|
||||||
|
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
|
||||||
|
return f"{binding.attachment_id}_{safe_stem}"
|
||||||
|
|
||||||
|
|
||||||
|
class ArchiveExtractSkill(BaseSkill):
|
||||||
|
name = "archive_extract"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
extracted_count = 0
|
||||||
|
if not context.batch.work_dir:
|
||||||
|
message = "批次工作目录为空,无法解压压缩包。"
|
||||||
|
logger.error(
|
||||||
|
"Archive extract failed without work dir",
|
||||||
|
extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no},
|
||||||
|
)
|
||||||
|
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
||||||
|
target_root = Path(context.batch.work_dir)
|
||||||
|
|
||||||
|
archive_count = 0
|
||||||
|
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
||||||
|
path = resolve_storage_path(binding.attachment.storage_path)
|
||||||
|
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
|
||||||
|
continue
|
||||||
|
archive_count += 1
|
||||||
|
target_dir = target_root / "extracted" / _safe_archive_dir_name(binding)
|
||||||
|
logger.info(
|
||||||
|
"Archive extract started",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"attachment_id": binding.attachment_id,
|
||||||
|
"path": str(path),
|
||||||
|
"target_dir": str(target_dir),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
extracted_count += len(extract_archive(path, target_dir))
|
||||||
|
if archive_count and extracted_count == 0:
|
||||||
|
message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。"
|
||||||
|
logger.warning(
|
||||||
|
"Archive extract produced no files",
|
||||||
|
extra={"batch_id": context.batch.pk, "archive_count": archive_count},
|
||||||
|
)
|
||||||
|
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
||||||
|
logger.info(
|
||||||
|
"Archive extract finished",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"archive_count": archive_count,
|
||||||
|
"extracted_count": extracted_count,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return SkillResult(success=True, data={"extracted_count": extracted_count})
|
||||||
52
review_agent/file_summary/skills/attachment_reader.py
Normal file
52
review_agent/file_summary/skills/attachment_reader.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from collections.abc import Iterable
|
||||||
|
|
||||||
|
from review_agent.models import FileAttachment
|
||||||
|
|
||||||
|
from ..services.attachment_reader import read_attachment_details
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.attachment_reader")
|
||||||
|
|
||||||
|
|
||||||
|
class AttachmentReaderSkill(BaseSkill):
|
||||||
|
name = "attachment_reader"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
attachments = FileAttachment.objects.filter(
|
||||||
|
conversation=context.batch.conversation,
|
||||||
|
is_active=True,
|
||||||
|
).exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||||
|
return self.run_for_attachments(attachments)
|
||||||
|
|
||||||
|
def run_for_attachments(self, attachments: Iterable[FileAttachment]) -> SkillResult:
|
||||||
|
attachment_list = list(attachments)
|
||||||
|
logger.info(
|
||||||
|
"Attachment reader skill started",
|
||||||
|
extra={
|
||||||
|
"attachment_count": len(attachment_list),
|
||||||
|
"attachment_ids": [attachment.pk for attachment in attachment_list],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
results = [read_attachment_details(attachment).to_dict() for attachment in attachment_list]
|
||||||
|
if not results:
|
||||||
|
logger.warning("Attachment reader skill found no attachments")
|
||||||
|
return SkillResult(success=False, message="当前对话没有可读取的附件。")
|
||||||
|
|
||||||
|
has_success = any(item["status"] == "success" for item in results)
|
||||||
|
logger.info(
|
||||||
|
"Attachment reader skill finished",
|
||||||
|
extra={
|
||||||
|
"success": has_success,
|
||||||
|
"success_count": sum(1 for item in results if item["status"] == "success"),
|
||||||
|
"failed_count": sum(1 for item in results if item["status"] != "success"),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return SkillResult(
|
||||||
|
success=has_success,
|
||||||
|
data={"attachments": results},
|
||||||
|
message="附件解析完成。" if has_success else "附件解析失败。",
|
||||||
|
)
|
||||||
24
review_agent/file_summary/skills/base.py
Normal file
24
review_agent/file_summary/skills/base.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatch
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class WorkflowContext:
|
||||||
|
batch: FileSummaryBatch
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SkillResult:
|
||||||
|
success: bool
|
||||||
|
data: dict = field(default_factory=dict)
|
||||||
|
message: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
class BaseSkill:
|
||||||
|
name = ""
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
raise NotImplementedError
|
||||||
108
review_agent/file_summary/skills/document_page_count.py
Normal file
108
review_agent/file_summary/skills/document_page_count.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryItem
|
||||||
|
|
||||||
|
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.document_page_count")
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentPageCountSkill(BaseSkill):
|
||||||
|
name = "document_page_count"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
|
||||||
|
logger.info("Document page count started", extra={"batch_id": context.batch.pk})
|
||||||
|
for item in context.batch.items.order_by("file_index"):
|
||||||
|
if item.file_type not in SUPPORTED_EXTENSIONS:
|
||||||
|
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
||||||
|
unsupported_files += 1
|
||||||
|
item.save(update_fields=["statistics_status", "updated_at"])
|
||||||
|
logger.info(
|
||||||
|
"Document page count unsupported",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"item_id": item.pk,
|
||||||
|
"file_type": item.file_type,
|
||||||
|
"file_name": item.file_name,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
result = None
|
||||||
|
for attempt in range(1, 4):
|
||||||
|
logger.info(
|
||||||
|
"Document page count attempt",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"item_id": item.pk,
|
||||||
|
"attempt": attempt,
|
||||||
|
"storage_path": item.storage_path,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = count_document_pages(item.storage_path)
|
||||||
|
item.retry_count = attempt - 1
|
||||||
|
if result.status != "failed":
|
||||||
|
break
|
||||||
|
item.statistics_status = result.status
|
||||||
|
item.page_count = result.page_count
|
||||||
|
item.error_message = result.error_message
|
||||||
|
item.save(
|
||||||
|
update_fields=[
|
||||||
|
"statistics_status",
|
||||||
|
"page_count",
|
||||||
|
"retry_count",
|
||||||
|
"error_message",
|
||||||
|
"updated_at",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
|
||||||
|
success_files += 1
|
||||||
|
total_pages += result.page_count or 0
|
||||||
|
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
|
||||||
|
uncertain_files += 1
|
||||||
|
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
|
||||||
|
unsupported_files += 1
|
||||||
|
else:
|
||||||
|
failed_files += 1
|
||||||
|
logger.warning(
|
||||||
|
"Document page count failed",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"item_id": item.pk,
|
||||||
|
"file_name": item.file_name,
|
||||||
|
"error": result.error_message,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
context.batch.success_files = success_files
|
||||||
|
context.batch.failed_files = failed_files
|
||||||
|
context.batch.unsupported_files = unsupported_files
|
||||||
|
context.batch.uncertain_files = uncertain_files
|
||||||
|
context.batch.total_pages = total_pages
|
||||||
|
context.batch.save(
|
||||||
|
update_fields=[
|
||||||
|
"success_files",
|
||||||
|
"failed_files",
|
||||||
|
"unsupported_files",
|
||||||
|
"uncertain_files",
|
||||||
|
"total_pages",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Document page count finished",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"success_files": success_files,
|
||||||
|
"failed_files": failed_files,
|
||||||
|
"unsupported_files": unsupported_files,
|
||||||
|
"uncertain_files": uncertain_files,
|
||||||
|
"total_pages": total_pages,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return SkillResult(success=True)
|
||||||
69
review_agent/file_summary/skills/file_inventory.py
Normal file
69
review_agent/file_summary/skills/file_inventory.py
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
from review_agent.models import FileSummaryBatchAttachment
|
||||||
|
|
||||||
|
from ..paths import resolve_storage_path
|
||||||
|
from ..services.archive import ARCHIVE_EXTENSIONS
|
||||||
|
from ..services.inventory import scan_files_to_items
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.file_inventory")
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
|
||||||
|
stem = Path(binding.attachment.original_name).stem or "archive"
|
||||||
|
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
|
||||||
|
return f"{binding.attachment_id}_{safe_stem}"
|
||||||
|
|
||||||
|
|
||||||
|
class FileInventorySkill(BaseSkill):
|
||||||
|
name = "file_inventory"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
roots: list[Path] = []
|
||||||
|
missing_extract_roots: list[str] = []
|
||||||
|
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
||||||
|
original_path = resolve_storage_path(binding.attachment.storage_path)
|
||||||
|
is_archive = original_path.suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
|
||||||
|
if not is_archive:
|
||||||
|
roots.append(original_path)
|
||||||
|
continue
|
||||||
|
|
||||||
|
extracted_root = (
|
||||||
|
Path(context.batch.work_dir)
|
||||||
|
/ "extracted"
|
||||||
|
/ _safe_archive_dir_name(binding)
|
||||||
|
)
|
||||||
|
if extracted_root.exists():
|
||||||
|
roots.append(extracted_root)
|
||||||
|
else:
|
||||||
|
missing_extract_roots.append(str(extracted_root))
|
||||||
|
if missing_extract_roots:
|
||||||
|
message = "压缩包解压目录不存在,无法扫描解压后的文件。"
|
||||||
|
logger.warning(
|
||||||
|
"File inventory missing extracted roots",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"missing_extract_roots": missing_extract_roots,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return SkillResult(success=False, message=message)
|
||||||
|
logger.info(
|
||||||
|
"File inventory started",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"root_count": len(roots),
|
||||||
|
"roots": [str(root) for root in roots],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
items = scan_files_to_items(batch=context.batch, roots=roots)
|
||||||
|
logger.info(
|
||||||
|
"File inventory finished",
|
||||||
|
extra={"batch_id": context.batch.pk, "total_files": len(items)},
|
||||||
|
)
|
||||||
|
return SkillResult(success=True, data={"total_files": len(items)})
|
||||||
22
review_agent/file_summary/skills/product_detect.py
Normal file
22
review_agent/file_summary/skills/product_detect.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from ..services.product_detect import detect_product_name
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.product_detect")
|
||||||
|
|
||||||
|
|
||||||
|
class ProductDetectSkill(BaseSkill):
|
||||||
|
name = "product_detect"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
logger.info("Product detect started", extra={"batch_id": context.batch.pk})
|
||||||
|
product_name = detect_product_name(context.batch)
|
||||||
|
logger.info(
|
||||||
|
"Product detect finished",
|
||||||
|
extra={"batch_id": context.batch.pk, "product_name": product_name},
|
||||||
|
)
|
||||||
|
return SkillResult(success=True, data={"product_name": product_name})
|
||||||
44
review_agent/file_summary/skills/registry.py
Normal file
44
review_agent/file_summary/skills/registry.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills")
|
||||||
|
|
||||||
|
|
||||||
|
class SkillRegistry:
|
||||||
|
def __init__(self):
|
||||||
|
self._skills: dict[str, BaseSkill] = {}
|
||||||
|
|
||||||
|
def register(self, skill: BaseSkill) -> None:
|
||||||
|
if not skill.name:
|
||||||
|
raise ValueError("Skill 必须声明 name。")
|
||||||
|
self._skills[skill.name] = skill
|
||||||
|
logger.info("Skill registered: %s", skill.name, extra={"skill_name": skill.name})
|
||||||
|
|
||||||
|
def get(self, name: str) -> BaseSkill:
|
||||||
|
try:
|
||||||
|
return self._skills[name]
|
||||||
|
except KeyError as exc:
|
||||||
|
raise KeyError(f"Skill 未注册:{name}") from exc
|
||||||
|
|
||||||
|
def execute(self, name: str, context: WorkflowContext) -> SkillResult:
|
||||||
|
logger.info("Skill started: %s", name, extra={"skill_name": name, "batch_id": context.batch.pk})
|
||||||
|
try:
|
||||||
|
result = self.get(name).run(context)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Skill crashed: %s", name, extra={"skill_name": name, "batch_id": context.batch.pk})
|
||||||
|
raise
|
||||||
|
logger.info(
|
||||||
|
"Skill finished: %s",
|
||||||
|
name,
|
||||||
|
extra={
|
||||||
|
"skill_name": name,
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"success": result.success,
|
||||||
|
"result_message": result.message,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return result
|
||||||
47
review_agent/file_summary/skills/summary_report.py
Normal file
47
review_agent/file_summary/skills/summary_report.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from django.urls import reverse
|
||||||
|
|
||||||
|
from review_agent.models import Message
|
||||||
|
|
||||||
|
from ..services.export_excel import generate_excel_export
|
||||||
|
from ..services.report import generate_markdown_report
|
||||||
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.skills.summary_report")
|
||||||
|
|
||||||
|
|
||||||
|
class SummaryReportSkill(BaseSkill):
|
||||||
|
name = "summary_report"
|
||||||
|
|
||||||
|
def run(self, context: WorkflowContext) -> SkillResult:
|
||||||
|
logger.info("Summary report started", extra={"batch_id": context.batch.pk})
|
||||||
|
markdown_export, summary_table = generate_markdown_report(context.batch)
|
||||||
|
excel_export = generate_excel_export(context.batch)
|
||||||
|
markdown_url = reverse("file_summary_export_download", args=[markdown_export.pk])
|
||||||
|
excel_url = reverse("file_summary_export_download", args=[excel_export.pk])
|
||||||
|
content = (
|
||||||
|
"文件目录与页数汇总已完成。\n\n"
|
||||||
|
f"{summary_table}\n\n"
|
||||||
|
f"[下载 Markdown 报告]({markdown_url}) | [下载 Excel 明细]({excel_url})"
|
||||||
|
)
|
||||||
|
Message.objects.create(
|
||||||
|
conversation=context.batch.conversation,
|
||||||
|
role=Message.Role.ASSISTANT,
|
||||||
|
content=content,
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Summary report finished",
|
||||||
|
extra={
|
||||||
|
"batch_id": context.batch.pk,
|
||||||
|
"markdown_export_id": markdown_export.pk,
|
||||||
|
"excel_export_id": excel_export.pk,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return SkillResult(
|
||||||
|
success=True,
|
||||||
|
data={"markdown_export_id": markdown_export.pk, "excel_export_id": excel_export.pk},
|
||||||
|
)
|
||||||
112
review_agent/file_summary/storage.py
Normal file
112
review_agent/file_summary/storage.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils.text import get_valid_filename
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileAttachment
|
||||||
|
|
||||||
|
from .constants import ATTACHMENT_ROOT
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.storage")
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_original_name(name: str) -> str:
|
||||||
|
clean = get_valid_filename(Path(name).name)
|
||||||
|
return clean or f"upload-{uuid4().hex}"
|
||||||
|
|
||||||
|
|
||||||
|
def _relative_attachment_path(conversation: Conversation, filename: str, version_no: int) -> Path:
|
||||||
|
suffix = Path(filename).suffix
|
||||||
|
stem = Path(filename).stem
|
||||||
|
stored_name = f"{stem}_v{version_no}_{uuid4().hex[:8]}{suffix}"
|
||||||
|
return (
|
||||||
|
ATTACHMENT_ROOT
|
||||||
|
/ str(conversation.user_id)
|
||||||
|
/ str(conversation.pk)
|
||||||
|
/ "attachments"
|
||||||
|
/ stored_name
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_inside_media_root(path: Path) -> None:
|
||||||
|
media_root = Path(settings.MEDIA_ROOT).resolve()
|
||||||
|
resolved = path.resolve()
|
||||||
|
if media_root != resolved and media_root not in resolved.parents:
|
||||||
|
raise ValueError("上传路径必须位于 MEDIA_ROOT 内。")
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def save_uploaded_attachment(*, conversation: Conversation, user, uploaded_file) -> FileAttachment:
|
||||||
|
"""Stores an uploaded file and creates a versioned attachment record."""
|
||||||
|
|
||||||
|
original_name = _safe_original_name(uploaded_file.name)
|
||||||
|
logger.info(
|
||||||
|
"Attachment upload save started",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"user_id": user.pk,
|
||||||
|
"original_name": original_name,
|
||||||
|
"file_size": uploaded_file.size,
|
||||||
|
"content_type": getattr(uploaded_file, "content_type", "") or "",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
latest = (
|
||||||
|
FileAttachment.objects.filter(conversation=conversation, original_name=original_name)
|
||||||
|
.order_by("-version_no")
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
version_no = (latest.version_no if latest else 0) + 1
|
||||||
|
relative_path = _relative_attachment_path(conversation, original_name, version_no)
|
||||||
|
absolute_path = Path(settings.MEDIA_ROOT) / relative_path
|
||||||
|
_ensure_inside_media_root(absolute_path)
|
||||||
|
absolute_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
with absolute_path.open("wb") as target:
|
||||||
|
for chunk in uploaded_file.chunks():
|
||||||
|
target.write(chunk)
|
||||||
|
|
||||||
|
FileAttachment.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
original_name=original_name,
|
||||||
|
is_active=True,
|
||||||
|
).update(is_active=False)
|
||||||
|
|
||||||
|
attachment = FileAttachment.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
original_name=original_name,
|
||||||
|
version_no=version_no,
|
||||||
|
is_active=True,
|
||||||
|
storage_path=relative_path.as_posix(),
|
||||||
|
file_size=uploaded_file.size,
|
||||||
|
content_type=getattr(uploaded_file, "content_type", "") or "",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Attachment upload save finished",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"attachment_id": attachment.pk,
|
||||||
|
"version_no": attachment.version_no,
|
||||||
|
"storage_path": attachment.storage_path,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return attachment
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_attachment(attachment: FileAttachment) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"id": attachment.pk,
|
||||||
|
"original_name": attachment.original_name,
|
||||||
|
"version_no": attachment.version_no,
|
||||||
|
"is_active": attachment.is_active,
|
||||||
|
"file_size": attachment.file_size,
|
||||||
|
"content_type": attachment.content_type,
|
||||||
|
"upload_status": attachment.upload_status,
|
||||||
|
"created_at": attachment.created_at.isoformat(),
|
||||||
|
}
|
||||||
365
review_agent/file_summary/views.py
Normal file
365
review_agent/file_summary/views.py
Normal file
@@ -0,0 +1,365 @@
|
|||||||
|
from django.contrib.auth.decorators import login_required
|
||||||
|
from django.db import transaction
|
||||||
|
from django.db.models import Count, Q
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from django.http import FileResponse, Http404, JsonResponse
|
||||||
|
from django.views.decorators.http import require_http_methods
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
ApplicationFormFillBatch,
|
||||||
|
Conversation,
|
||||||
|
ExportedSummaryFile,
|
||||||
|
FileAttachment,
|
||||||
|
Message,
|
||||||
|
RegulatoryInfoPackageBatch,
|
||||||
|
RegulatoryReviewBatch,
|
||||||
|
)
|
||||||
|
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
||||||
|
from review_agent.notifications.presenter import serialize_notification_records
|
||||||
|
from .events import serialize_event
|
||||||
|
from .paths import resolve_storage_path
|
||||||
|
|
||||||
|
from .storage import save_uploaded_attachment, serialize_attachment
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.views")
|
||||||
|
|
||||||
|
|
||||||
|
def _conversation_for_user(user, conversation_id: int) -> Conversation:
|
||||||
|
conversation = Conversation.objects.filter(pk=conversation_id, user=user).first()
|
||||||
|
if not conversation:
|
||||||
|
raise Http404("对话不存在。")
|
||||||
|
return conversation
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["POST", "GET"])
|
||||||
|
@login_required
|
||||||
|
def attachments(request, conversation_id: int):
|
||||||
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
|
||||||
|
if request.method == "POST":
|
||||||
|
files = request.FILES.getlist("files")
|
||||||
|
if not files:
|
||||||
|
return JsonResponse({"error": "请选择至少一个文件。"}, status=400)
|
||||||
|
logger.info(
|
||||||
|
"Attachment upload request received",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"user_id": request.user.pk,
|
||||||
|
"file_count": len(files),
|
||||||
|
"filenames": [uploaded_file.name for uploaded_file in files],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
saved = [
|
||||||
|
save_uploaded_attachment(
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
uploaded_file=uploaded_file,
|
||||||
|
)
|
||||||
|
for uploaded_file in files
|
||||||
|
]
|
||||||
|
logger.info(
|
||||||
|
"Attachment upload request finished",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"attachment_ids": [attachment.pk for attachment in saved],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return JsonResponse({"attachments": [serialize_attachment(item) for item in saved]})
|
||||||
|
|
||||||
|
queryset = FileAttachment.objects.filter(conversation=conversation).order_by(
|
||||||
|
"original_name",
|
||||||
|
"-version_no",
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Attachment list requested",
|
||||||
|
extra={"conversation_id": conversation.pk, "attachment_count": queryset.count()},
|
||||||
|
)
|
||||||
|
return JsonResponse({"attachments": [serialize_attachment(item) for item in queryset]})
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["DELETE", "PATCH"])
|
||||||
|
@login_required
|
||||||
|
def attachment_detail(request, conversation_id: int, attachment_id: int):
|
||||||
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
attachment = FileAttachment.objects.filter(
|
||||||
|
pk=attachment_id,
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
).first()
|
||||||
|
if not attachment:
|
||||||
|
raise Http404("附件不存在。")
|
||||||
|
|
||||||
|
if request.method == "PATCH":
|
||||||
|
try:
|
||||||
|
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return JsonResponse({"error": "JSON 格式错误。"}, status=400)
|
||||||
|
|
||||||
|
update_fields = []
|
||||||
|
original_name = (payload.get("original_name") or "").strip()
|
||||||
|
if original_name:
|
||||||
|
attachment.original_name = Path(original_name).name
|
||||||
|
update_fields.append("original_name")
|
||||||
|
if "is_active" in payload:
|
||||||
|
attachment.is_active = bool(payload["is_active"])
|
||||||
|
update_fields.append("is_active")
|
||||||
|
if update_fields:
|
||||||
|
attachment.save(update_fields=update_fields)
|
||||||
|
logger.info(
|
||||||
|
"Attachment updated",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"attachment_id": attachment.pk,
|
||||||
|
"update_fields": update_fields,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
|
||||||
|
|
||||||
|
attachment.upload_status = FileAttachment.UploadStatus.DELETED
|
||||||
|
attachment.is_active = False
|
||||||
|
attachment.save(update_fields=["upload_status", "is_active"])
|
||||||
|
logger.info(
|
||||||
|
"Attachment deleted",
|
||||||
|
extra={"conversation_id": conversation.pk, "attachment_id": attachment.pk},
|
||||||
|
)
|
||||||
|
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def conversation_list(request):
|
||||||
|
conversations = (
|
||||||
|
Conversation.objects.filter(user=request.user)
|
||||||
|
.annotate(
|
||||||
|
attachment_count=Count(
|
||||||
|
"file_attachments",
|
||||||
|
filter=~Q(file_attachments__upload_status=FileAttachment.UploadStatus.DELETED),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.order_by("-updated_at", "-id")
|
||||||
|
)
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"conversations": [
|
||||||
|
{
|
||||||
|
"id": conversation.pk,
|
||||||
|
"title": conversation.title or "新对话",
|
||||||
|
"updated_at": conversation.updated_at.isoformat(),
|
||||||
|
"attachment_count": conversation.attachment_count,
|
||||||
|
}
|
||||||
|
for conversation in conversations
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["DELETE"])
|
||||||
|
@login_required
|
||||||
|
def conversation_detail(request, conversation_id: int):
|
||||||
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
with transaction.atomic():
|
||||||
|
ApplicationFormFillBatch.objects.filter(conversation=conversation).delete()
|
||||||
|
RegulatoryReviewBatch.objects.filter(conversation=conversation).delete()
|
||||||
|
conversation.delete()
|
||||||
|
return JsonResponse({"ok": True, "conversation_id": conversation_id})
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def attachment_download(request, conversation_id: int, attachment_id: int):
|
||||||
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
attachment = FileAttachment.objects.filter(
|
||||||
|
pk=attachment_id,
|
||||||
|
conversation=conversation,
|
||||||
|
user=request.user,
|
||||||
|
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).first()
|
||||||
|
if not attachment:
|
||||||
|
raise Http404("附件不存在。")
|
||||||
|
|
||||||
|
path = resolve_storage_path(attachment.storage_path)
|
||||||
|
if not path.exists():
|
||||||
|
logger.warning(
|
||||||
|
"Attachment download missing file",
|
||||||
|
extra={"attachment_id": attachment.pk, "storage_path": attachment.storage_path},
|
||||||
|
)
|
||||||
|
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||||
|
logger.info(
|
||||||
|
"Attachment download started",
|
||||||
|
extra={"conversation_id": conversation.pk, "attachment_id": attachment.pk},
|
||||||
|
)
|
||||||
|
return FileResponse(
|
||||||
|
path.open("rb"),
|
||||||
|
as_attachment=True,
|
||||||
|
filename=attachment.original_name,
|
||||||
|
content_type=attachment.content_type or "application/octet-stream",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _serialize_message(message: Message) -> dict[str, object]:
|
||||||
|
return {
|
||||||
|
"id": message.pk,
|
||||||
|
"role": message.role,
|
||||||
|
"content": message.content,
|
||||||
|
"created_at": message.created_at.isoformat(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def conversation_messages(request, conversation_id: int):
|
||||||
|
conversation = _conversation_for_user(request.user, conversation_id)
|
||||||
|
after = request.GET.get("after") or "0"
|
||||||
|
try:
|
||||||
|
after_id = int(after)
|
||||||
|
except ValueError:
|
||||||
|
after_id = 0
|
||||||
|
|
||||||
|
messages = list(conversation.messages.filter(pk__gt=after_id).order_by("id"))
|
||||||
|
latest_message_id = (
|
||||||
|
conversation.messages.order_by("-id").values_list("id", flat=True).first() or 0
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Conversation incremental messages requested",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"after_id": after_id,
|
||||||
|
"message_count": len(messages),
|
||||||
|
"latest_message_id": latest_message_id,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"latest_message_id": latest_message_id,
|
||||||
|
"messages": [_serialize_message(message) for message in messages],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def batch_status(request, batch_id: int):
|
||||||
|
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
notifications = serialize_notification_records("file_summary", batch.pk)
|
||||||
|
return JsonResponse(
|
||||||
|
{
|
||||||
|
"batch": {
|
||||||
|
"id": batch.pk,
|
||||||
|
"workflow_type": "file_summary",
|
||||||
|
"batch_no": batch.batch_no,
|
||||||
|
"status": batch.status,
|
||||||
|
"product_name": batch.product_name,
|
||||||
|
"total_files": batch.total_files,
|
||||||
|
"success_files": batch.success_files,
|
||||||
|
"failed_files": batch.failed_files,
|
||||||
|
"total_pages": batch.total_pages,
|
||||||
|
"error_message": batch.error_message,
|
||||||
|
},
|
||||||
|
"nodes": [
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"node_name": node.node_name,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
}
|
||||||
|
for node in batch.node_runs.order_by("id")
|
||||||
|
],
|
||||||
|
"notifications": notifications,
|
||||||
|
"latest_notification": notifications[0] if notifications else None,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def batch_events(request, batch_id: int):
|
||||||
|
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||||
|
if not batch:
|
||||||
|
raise Http404("批次不存在。")
|
||||||
|
after = request.GET.get("after") or "0"
|
||||||
|
try:
|
||||||
|
after_id = int(after)
|
||||||
|
except ValueError:
|
||||||
|
after_id = 0
|
||||||
|
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
|
||||||
|
return JsonResponse({"events": [serialize_event(event) for event in events]})
|
||||||
|
|
||||||
|
|
||||||
|
@require_http_methods(["GET"])
|
||||||
|
@login_required
|
||||||
|
def export_download(request, export_id: int):
|
||||||
|
exported = _export_for_user(request.user, export_id)
|
||||||
|
if not exported:
|
||||||
|
raise Http404("导出文件不存在。")
|
||||||
|
path = Path(exported.storage_path)
|
||||||
|
if not path.exists():
|
||||||
|
logger.warning(
|
||||||
|
"Export download missing file",
|
||||||
|
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
||||||
|
)
|
||||||
|
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||||
|
suffix = Path(exported.file_name).suffix.lower()
|
||||||
|
content_types = {
|
||||||
|
ExportedSummaryFile.ExportType.MARKDOWN: "text/markdown; charset=utf-8",
|
||||||
|
ExportedSummaryFile.ExportType.EXCEL: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
ExportedSummaryFile.ExportType.JSON: "application/json; charset=utf-8",
|
||||||
|
ExportedSummaryFile.ExportType.WORD: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
ExportedSummaryFile.ExportType.PDF: "application/pdf",
|
||||||
|
ExportedSummaryFile.ExportType.ZIP: "application/zip",
|
||||||
|
}
|
||||||
|
content_type = content_types.get(exported.export_type, "application/octet-stream")
|
||||||
|
if exported.export_type == ExportedSummaryFile.ExportType.WORD and suffix == ".doc":
|
||||||
|
content_type = "application/msword"
|
||||||
|
elif exported.export_type == ExportedSummaryFile.ExportType.WORD and suffix == ".docx":
|
||||||
|
content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
logger.info(
|
||||||
|
"Export download started",
|
||||||
|
extra={
|
||||||
|
"export_id": exported.pk,
|
||||||
|
"batch_id": exported.batch_id,
|
||||||
|
"file_name": exported.file_name,
|
||||||
|
"content_type": content_type,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return FileResponse(
|
||||||
|
path.open("rb"),
|
||||||
|
as_attachment=True,
|
||||||
|
filename=exported.file_name,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _export_for_user(user, export_id: int) -> ExportedSummaryFile | None:
|
||||||
|
exported = ExportedSummaryFile.objects.filter(pk=export_id).first()
|
||||||
|
if not exported:
|
||||||
|
return None
|
||||||
|
if exported.workflow_type == "application_form_fill":
|
||||||
|
if not exported.workflow_batch_id:
|
||||||
|
return None
|
||||||
|
allowed = ApplicationFormFillBatch.objects.filter(
|
||||||
|
pk=exported.workflow_batch_id,
|
||||||
|
conversation__user=user,
|
||||||
|
is_deleted=False,
|
||||||
|
).exists()
|
||||||
|
return exported if allowed else None
|
||||||
|
if exported.workflow_type == "regulatory_info_package":
|
||||||
|
if not exported.workflow_batch_id:
|
||||||
|
return None
|
||||||
|
allowed = RegulatoryInfoPackageBatch.objects.filter(
|
||||||
|
pk=exported.workflow_batch_id,
|
||||||
|
conversation__user=user,
|
||||||
|
is_deleted=False,
|
||||||
|
).exists()
|
||||||
|
return exported if allowed else None
|
||||||
|
if exported.batch_id is None:
|
||||||
|
return None
|
||||||
|
if exported.batch.user_id != user.pk:
|
||||||
|
return None
|
||||||
|
return exported
|
||||||
257
review_agent/file_summary/workflow.py
Normal file
257
review_agent/file_summary/workflow.py
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from threading import Thread
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.db import transaction
|
||||||
|
from django.utils import timezone
|
||||||
|
|
||||||
|
from review_agent.models import (
|
||||||
|
Conversation,
|
||||||
|
FileAttachment,
|
||||||
|
FileSummaryBatch,
|
||||||
|
FileSummaryBatchAttachment,
|
||||||
|
Message,
|
||||||
|
WorkflowNodeRun,
|
||||||
|
)
|
||||||
|
from review_agent.notifications.dispatcher import dispatch_workflow_notification
|
||||||
|
from review_agent.notifications.workflow_adapters import build_file_summary_context
|
||||||
|
|
||||||
|
from .events import record_event
|
||||||
|
from .services.archive import ARCHIVE_EXTENSIONS
|
||||||
|
from .skills.archive_extract import ArchiveExtractSkill
|
||||||
|
from .skills.base import WorkflowContext
|
||||||
|
from .skills.document_page_count import DocumentPageCountSkill
|
||||||
|
from .skills.file_inventory import FileInventorySkill
|
||||||
|
from .skills.product_detect import ProductDetectSkill
|
||||||
|
from .skills.registry import SkillRegistry
|
||||||
|
from .skills.summary_report import SummaryReportSkill
|
||||||
|
|
||||||
|
|
||||||
|
NODE_DEFINITIONS = [
|
||||||
|
("upload", "附件固化", ""),
|
||||||
|
("extract", "压缩包解压", "archive_extract"),
|
||||||
|
("inventory", "文件扫描", "file_inventory"),
|
||||||
|
("page_count", "页数统计", "document_page_count"),
|
||||||
|
("product_detect", "产品识别", "product_detect"),
|
||||||
|
("report", "报告输出", "summary_report"),
|
||||||
|
("complete", "完成", ""),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger("review_agent.file_summary.workflow")
|
||||||
|
|
||||||
|
|
||||||
|
def default_skill_registry() -> SkillRegistry:
|
||||||
|
registry = SkillRegistry()
|
||||||
|
registry.register(ArchiveExtractSkill())
|
||||||
|
registry.register(FileInventorySkill())
|
||||||
|
registry.register(DocumentPageCountSkill())
|
||||||
|
registry.register(ProductDetectSkill())
|
||||||
|
registry.register(SummaryReportSkill())
|
||||||
|
return registry
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_no() -> str:
|
||||||
|
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
|
||||||
|
|
||||||
|
|
||||||
|
def build_batch_work_dir(batch_no: str) -> Path:
|
||||||
|
return Path(settings.MEDIA_ROOT) / "file_summary" / "work" / batch_no
|
||||||
|
|
||||||
|
|
||||||
|
@transaction.atomic
|
||||||
|
def create_file_summary_batch(
|
||||||
|
*,
|
||||||
|
conversation: Conversation,
|
||||||
|
user,
|
||||||
|
trigger_message: Message | None = None,
|
||||||
|
) -> FileSummaryBatch:
|
||||||
|
active_attachments = list(
|
||||||
|
FileAttachment.objects.select_for_update()
|
||||||
|
.filter(conversation=conversation, is_active=True)
|
||||||
|
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||||
|
.order_by("original_name", "-created_at")
|
||||||
|
)
|
||||||
|
if not active_attachments:
|
||||||
|
raise ValueError("当前对话没有可用附件。")
|
||||||
|
logger.info(
|
||||||
|
"File summary batch creation started",
|
||||||
|
extra={
|
||||||
|
"conversation_id": conversation.pk,
|
||||||
|
"user_id": user.pk,
|
||||||
|
"attachment_ids": [attachment.pk for attachment in active_attachments],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
batch_no = build_batch_no()
|
||||||
|
work_dir = build_batch_work_dir(batch_no)
|
||||||
|
work_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
batch = FileSummaryBatch.objects.create(
|
||||||
|
conversation=conversation,
|
||||||
|
user=user,
|
||||||
|
trigger_message=trigger_message,
|
||||||
|
batch_no=batch_no,
|
||||||
|
work_dir=str(work_dir),
|
||||||
|
)
|
||||||
|
|
||||||
|
for attachment in active_attachments:
|
||||||
|
source_role = (
|
||||||
|
FileSummaryBatchAttachment.SourceRole.ARCHIVE
|
||||||
|
if Path(attachment.original_name).suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
|
||||||
|
else FileSummaryBatchAttachment.SourceRole.MULTI_FILE
|
||||||
|
)
|
||||||
|
FileSummaryBatchAttachment.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
attachment=attachment,
|
||||||
|
source_role=source_role,
|
||||||
|
)
|
||||||
|
attachment.upload_status = FileAttachment.UploadStatus.BOUND
|
||||||
|
attachment.save(update_fields=["upload_status"])
|
||||||
|
|
||||||
|
for code, name, _skill_name in NODE_DEFINITIONS:
|
||||||
|
WorkflowNodeRun.objects.create(
|
||||||
|
batch=batch,
|
||||||
|
workflow_type="file_summary",
|
||||||
|
workflow_batch_id=batch.pk,
|
||||||
|
node_group="file_summary",
|
||||||
|
node_code=code,
|
||||||
|
node_name=name,
|
||||||
|
)
|
||||||
|
|
||||||
|
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||||
|
logger.info(
|
||||||
|
"File summary batch created",
|
||||||
|
extra={"batch_id": batch.pk, "batch_no": batch.batch_no},
|
||||||
|
)
|
||||||
|
return batch
|
||||||
|
|
||||||
|
|
||||||
|
class WorkflowExecutor:
|
||||||
|
def __init__(self, batch: FileSummaryBatch, registry: SkillRegistry | None = None):
|
||||||
|
self.batch = batch
|
||||||
|
self.registry = registry or default_skill_registry()
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
logger.info("Workflow run started", extra={"batch_id": self.batch.pk})
|
||||||
|
self.batch.status = FileSummaryBatch.Status.RUNNING
|
||||||
|
self.batch.started_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "started_at"])
|
||||||
|
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
|
||||||
|
|
||||||
|
try:
|
||||||
|
for node in self.batch.node_runs.order_by("id"):
|
||||||
|
self._run_node(node)
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception(
|
||||||
|
"Workflow run failed",
|
||||||
|
extra={"batch_id": self.batch.pk, "error": str(exc)},
|
||||||
|
)
|
||||||
|
self.batch.status = FileSummaryBatch.Status.FAILED
|
||||||
|
self.batch.error_message = str(exc)
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "error_message", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_failed", {"message": str(exc)})
|
||||||
|
self._dispatch_completion_notification()
|
||||||
|
return
|
||||||
|
|
||||||
|
self.batch.status = FileSummaryBatch.Status.SUCCESS
|
||||||
|
self.batch.finished_at = timezone.now()
|
||||||
|
self.batch.save(update_fields=["status", "finished_at"])
|
||||||
|
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
|
||||||
|
self._dispatch_completion_notification()
|
||||||
|
logger.info("Workflow run completed", extra={"batch_id": self.batch.pk})
|
||||||
|
|
||||||
|
def _dispatch_completion_notification(self) -> None:
|
||||||
|
try:
|
||||||
|
dispatch_workflow_notification(build_file_summary_context(self.batch))
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning(
|
||||||
|
"File summary notification failed without blocking workflow",
|
||||||
|
extra={"batch_id": self.batch.pk, "error": str(exc)},
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run_node(self, node: WorkflowNodeRun) -> None:
|
||||||
|
logger.info(
|
||||||
|
"Workflow node started",
|
||||||
|
extra={
|
||||||
|
"batch_id": self.batch.pk,
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"node_name": node.node_name,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
now = timezone.now()
|
||||||
|
node.status = WorkflowNodeRun.Status.RUNNING
|
||||||
|
node.progress = 10
|
||||||
|
node.started_at = now
|
||||||
|
node.message = f"{node.node_name}处理中"
|
||||||
|
node.save(update_fields=["status", "progress", "started_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
|
||||||
|
skill_name = next(
|
||||||
|
(skill for code, _name, skill in NODE_DEFINITIONS if code == node.node_code),
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
if skill_name:
|
||||||
|
try:
|
||||||
|
result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch))
|
||||||
|
if not result.success:
|
||||||
|
logger.warning(
|
||||||
|
"Workflow node skill failed",
|
||||||
|
extra={
|
||||||
|
"batch_id": self.batch.pk,
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"skill_name": skill_name,
|
||||||
|
"result_message": result.message,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise RuntimeError(result.message or f"{node.node_name}执行失败")
|
||||||
|
except Exception as exc:
|
||||||
|
node.status = WorkflowNodeRun.Status.FAILED
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = str(exc)
|
||||||
|
node.save(update_fields=["status", "finished_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{
|
||||||
|
"node_code": node.node_code,
|
||||||
|
"status": node.status,
|
||||||
|
"progress": node.progress,
|
||||||
|
"message": node.message,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise
|
||||||
|
|
||||||
|
node.status = WorkflowNodeRun.Status.SUCCESS
|
||||||
|
node.progress = 100
|
||||||
|
node.finished_at = timezone.now()
|
||||||
|
node.message = f"{node.node_name}完成"
|
||||||
|
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||||
|
record_event(
|
||||||
|
self.batch,
|
||||||
|
"node_progress",
|
||||||
|
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||||
|
)
|
||||||
|
logger.info(
|
||||||
|
"Workflow node finished",
|
||||||
|
extra={"batch_id": self.batch.pk, "node_code": node.node_code},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
|
||||||
|
executor = WorkflowExecutor(batch)
|
||||||
|
if not async_run:
|
||||||
|
logger.info("Workflow starting synchronously", extra={"batch_id": batch.pk})
|
||||||
|
executor.run()
|
||||||
|
return
|
||||||
|
logger.info("Workflow starting asynchronously", extra={"batch_id": batch.pk})
|
||||||
|
Thread(target=executor.run, daemon=True).start()
|
||||||
89
review_agent/file_summary/workflow_trigger.py
Normal file
89
review_agent/file_summary/workflow_trigger.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from review_agent.models import Conversation, FileAttachment
|
||||||
|
|
||||||
|
|
||||||
|
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
|
||||||
|
ATTACHMENT_READER_KEYWORDS = (
|
||||||
|
"阅读附件",
|
||||||
|
"读取附件",
|
||||||
|
"解析附件",
|
||||||
|
"分析附件",
|
||||||
|
"查看附件",
|
||||||
|
"附件详情",
|
||||||
|
"文件详情",
|
||||||
|
"文件内容",
|
||||||
|
"附件内容",
|
||||||
|
"简历文件",
|
||||||
|
"提供的文件",
|
||||||
|
"提供的简历",
|
||||||
|
"上传的文件",
|
||||||
|
"上传文件",
|
||||||
|
"这个文件",
|
||||||
|
"该文件",
|
||||||
|
"总结附件",
|
||||||
|
"总结文件",
|
||||||
|
"分析这个文件",
|
||||||
|
"阅读这个文件",
|
||||||
|
)
|
||||||
|
ATTACHMENT_REFERENCE_KEYWORDS = ("附件", "文件", "简历", "上传")
|
||||||
|
ATTACHMENT_READ_INTENT_KEYWORDS = (
|
||||||
|
"阅读",
|
||||||
|
"读取",
|
||||||
|
"读",
|
||||||
|
"解析",
|
||||||
|
"分析",
|
||||||
|
"查看",
|
||||||
|
"提取",
|
||||||
|
"整理",
|
||||||
|
"总结",
|
||||||
|
"介绍",
|
||||||
|
"项目经历",
|
||||||
|
"工作经历",
|
||||||
|
"经历",
|
||||||
|
"信息",
|
||||||
|
"内容",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class TriggerResult:
|
||||||
|
should_start: bool
|
||||||
|
workflow_type: str = ""
|
||||||
|
reason: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
|
||||||
|
text = (content or "").strip()
|
||||||
|
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
|
||||||
|
return TriggerResult(should_start=False, reason="not_matched")
|
||||||
|
|
||||||
|
has_attachment = FileAttachment.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
is_active=True,
|
||||||
|
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
|
||||||
|
if not has_attachment:
|
||||||
|
return TriggerResult(should_start=False, reason="missing_attachment")
|
||||||
|
|
||||||
|
return TriggerResult(should_start=True, workflow_type="file_summary")
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_attachment_reader_trigger(conversation: Conversation, content: str) -> TriggerResult:
|
||||||
|
text = (content or "").strip()
|
||||||
|
matched = any(keyword in text for keyword in ATTACHMENT_READER_KEYWORDS) or (
|
||||||
|
any(keyword in text for keyword in ATTACHMENT_REFERENCE_KEYWORDS)
|
||||||
|
and any(keyword in text for keyword in ATTACHMENT_READ_INTENT_KEYWORDS)
|
||||||
|
)
|
||||||
|
if not matched:
|
||||||
|
return TriggerResult(should_start=False, reason="not_matched")
|
||||||
|
|
||||||
|
has_attachment = FileAttachment.objects.filter(
|
||||||
|
conversation=conversation,
|
||||||
|
is_active=True,
|
||||||
|
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
|
||||||
|
if not has_attachment:
|
||||||
|
return TriggerResult(should_start=False, reason="missing_attachment")
|
||||||
|
|
||||||
|
return TriggerResult(should_start=True, workflow_type="attachment_reader")
|
||||||
415
review_agent/knowledge_base.py
Normal file
415
review_agent/knowledge_base.py
Normal file
@@ -0,0 +1,415 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.core.files.uploadedfile import UploadedFile
|
||||||
|
|
||||||
|
from review_agent.models import KnowledgeBaseDocument
|
||||||
|
from review_agent.regulatory_review.services.rag_citation import RagIndexUnavailable, retrieve_citations
|
||||||
|
from review_agent.regulatory_review.services.rag_embedding import get_embedding_provider
|
||||||
|
from review_agent.regulatory_review.services.rag_index import chunk_text, extract_text_from_path, is_excluded_source_path
|
||||||
|
from review_agent.regulatory_review.services.rule_loader import DEFAULT_RULE_PATH, compute_file_sha256, load_rule_file
|
||||||
|
|
||||||
|
|
||||||
|
SUPPORTED_SOURCE_SUFFIXES = {".doc", ".docx", ".pdf", ".txt", ".md", ".pptx", ".xlsx"}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class ChromaCollectionState:
|
||||||
|
exists: bool
|
||||||
|
count: int = 0
|
||||||
|
error_message: str = ""
|
||||||
|
sample_metadatas: list[dict[str, Any]] | None = None
|
||||||
|
source_chunk_counts: dict[str, int] | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_base_context() -> dict[str, Any]:
|
||||||
|
rule_info = _rule_info()
|
||||||
|
source_dir = Path(settings.BASE_DIR) / str(rule_info.get("source_material_dir") or "docs/0.原始材料")
|
||||||
|
sources = list_source_documents(source_dir)
|
||||||
|
collection = get_chroma_collection_state()
|
||||||
|
return {
|
||||||
|
"name": "NMPA IVD 注册资料法规库",
|
||||||
|
"description": "用于体外诊断试剂注册资料法规核查的结构化规则和 RAG 依据检索。",
|
||||||
|
"provider": settings.REGULATORY_RAG_PROVIDER,
|
||||||
|
"collection_name": settings.REGULATORY_RAG_COLLECTION,
|
||||||
|
"chroma_path": settings.REGULATORY_RAG_CHROMA_PATH,
|
||||||
|
"rule": rule_info,
|
||||||
|
"source_dir": str(source_dir),
|
||||||
|
"sources": sources,
|
||||||
|
"source_count": len(sources),
|
||||||
|
"supported_source_count": sum(1 for item in sources if item["supported"]),
|
||||||
|
"collection": {
|
||||||
|
"exists": collection.exists,
|
||||||
|
"count": collection.count,
|
||||||
|
"error_message": collection.error_message,
|
||||||
|
"sample_metadatas": collection.sample_metadatas or [],
|
||||||
|
},
|
||||||
|
"status": _status_label(collection),
|
||||||
|
"build_commands": [
|
||||||
|
"python manage.py regulatory_rag_build --provider deterministic",
|
||||||
|
"python manage.py regulatory_rag_build --provider siliconflow",
|
||||||
|
],
|
||||||
|
"managed_documents": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_knowledge_base_context_for_user(user) -> dict[str, Any]:
|
||||||
|
context = build_knowledge_base_context()
|
||||||
|
documents = list_documents_for_user(user)
|
||||||
|
context["managed_documents"] = documents
|
||||||
|
context["managed_document_count"] = len(documents)
|
||||||
|
context["active_managed_document_count"] = sum(1 for item in documents if item["is_active"])
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
def list_source_documents(source_dir: Path) -> list[dict[str, Any]]:
|
||||||
|
if not source_dir.exists():
|
||||||
|
return []
|
||||||
|
collection = get_chroma_collection_state()
|
||||||
|
source_chunk_counts = collection.source_chunk_counts or {}
|
||||||
|
documents: list[dict[str, Any]] = []
|
||||||
|
for path in sorted(source_dir.rglob("*")):
|
||||||
|
if not path.is_file():
|
||||||
|
continue
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
relative_path = str(path.relative_to(source_dir))
|
||||||
|
if is_excluded_source_path(relative_path):
|
||||||
|
continue
|
||||||
|
indexed_chunk_count = source_chunk_counts.get(relative_path, 0)
|
||||||
|
documents.append(
|
||||||
|
{
|
||||||
|
"name": path.name,
|
||||||
|
"relative_path": relative_path,
|
||||||
|
"suffix": suffix.lstrip(".") or "unknown",
|
||||||
|
"size": path.stat().st_size,
|
||||||
|
"supported": suffix in SUPPORTED_SOURCE_SUFFIXES,
|
||||||
|
"indexed": indexed_chunk_count > 0,
|
||||||
|
"indexed_chunk_count": indexed_chunk_count,
|
||||||
|
"indexed_label": f"已入库 {indexed_chunk_count} 片" if indexed_chunk_count else "未入库",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return documents
|
||||||
|
|
||||||
|
|
||||||
|
def search_knowledge_base(query: str, *, n_results: int = 3) -> dict[str, Any]:
|
||||||
|
normalized = (query or "").strip()
|
||||||
|
if not normalized:
|
||||||
|
return {"query": normalized, "results": [], "error_message": "请输入检索问题。"}
|
||||||
|
try:
|
||||||
|
results = retrieve_citations(
|
||||||
|
normalized,
|
||||||
|
embedding_provider=get_embedding_provider(),
|
||||||
|
n_results=n_results,
|
||||||
|
)
|
||||||
|
except RagIndexUnavailable as exc:
|
||||||
|
return {"query": normalized, "results": [], "error_message": str(exc)}
|
||||||
|
except Exception as exc:
|
||||||
|
return {"query": normalized, "results": [], "error_message": f"检索失败:{exc}"}
|
||||||
|
return {"query": normalized, "results": filter_active_knowledge_results(results), "error_message": ""}
|
||||||
|
|
||||||
|
|
||||||
|
def list_documents_for_user(user) -> list[dict[str, Any]]:
|
||||||
|
return [
|
||||||
|
serialize_document(document)
|
||||||
|
for document in KnowledgeBaseDocument.objects.filter(user=user).exclude(status=KnowledgeBaseDocument.Status.DELETED)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def create_document_from_upload(
|
||||||
|
*,
|
||||||
|
user,
|
||||||
|
uploaded_file: UploadedFile,
|
||||||
|
display_name: str = "",
|
||||||
|
description: str = "",
|
||||||
|
is_active: bool = True,
|
||||||
|
) -> KnowledgeBaseDocument:
|
||||||
|
root = Path(settings.MEDIA_ROOT) / "knowledge_base" / "users" / str(user.pk)
|
||||||
|
root.mkdir(parents=True, exist_ok=True)
|
||||||
|
target = _unique_target_path(root, uploaded_file.name)
|
||||||
|
with target.open("wb") as handle:
|
||||||
|
for chunk in uploaded_file.chunks():
|
||||||
|
handle.write(chunk)
|
||||||
|
status = KnowledgeBaseDocument.Status.ACTIVE if is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
document = KnowledgeBaseDocument.objects.create(
|
||||||
|
user=user,
|
||||||
|
display_name=(display_name or uploaded_file.name).strip(),
|
||||||
|
original_name=uploaded_file.name,
|
||||||
|
storage_path=str(target),
|
||||||
|
file_size=target.stat().st_size,
|
||||||
|
content_type=getattr(uploaded_file, "content_type", "") or "",
|
||||||
|
description=description.strip(),
|
||||||
|
status=status,
|
||||||
|
is_active=is_active,
|
||||||
|
)
|
||||||
|
if is_active:
|
||||||
|
index_managed_document(document)
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def update_document(document: KnowledgeBaseDocument, payload: dict[str, Any]) -> KnowledgeBaseDocument:
|
||||||
|
update_fields = []
|
||||||
|
active_changed = False
|
||||||
|
if "display_name" in payload:
|
||||||
|
document.display_name = str(payload.get("display_name") or "").strip() or document.original_name
|
||||||
|
update_fields.append("display_name")
|
||||||
|
if "description" in payload:
|
||||||
|
document.description = str(payload.get("description") or "").strip()
|
||||||
|
update_fields.append("description")
|
||||||
|
if "is_active" in payload:
|
||||||
|
next_is_active = bool(payload.get("is_active"))
|
||||||
|
active_changed = document.is_active != next_is_active
|
||||||
|
document.is_active = next_is_active
|
||||||
|
document.status = KnowledgeBaseDocument.Status.ACTIVE if next_is_active else KnowledgeBaseDocument.Status.DISABLED
|
||||||
|
update_fields.extend(["is_active", "status"])
|
||||||
|
if not next_is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
update_fields.extend(["indexed_chunk_count", "metadata"])
|
||||||
|
if update_fields:
|
||||||
|
update_fields.append("updated_at")
|
||||||
|
document.save(update_fields=update_fields)
|
||||||
|
if active_changed and document.is_active:
|
||||||
|
index_managed_document(document)
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def delete_document(document: KnowledgeBaseDocument) -> KnowledgeBaseDocument:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.status = KnowledgeBaseDocument.Status.DELETED
|
||||||
|
document.is_active = False
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "deleted", "index_error": ""}
|
||||||
|
document.save(update_fields=["status", "is_active", "indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return document
|
||||||
|
|
||||||
|
|
||||||
|
def serialize_document(document: KnowledgeBaseDocument) -> dict[str, Any]:
|
||||||
|
indexed_label = f"已入库 {document.indexed_chunk_count} 片" if document.indexed_chunk_count else "未入库"
|
||||||
|
return {
|
||||||
|
"id": document.pk,
|
||||||
|
"display_name": document.display_name,
|
||||||
|
"original_name": document.original_name,
|
||||||
|
"description": document.description,
|
||||||
|
"file_size": document.file_size,
|
||||||
|
"content_type": document.content_type,
|
||||||
|
"status": document.status,
|
||||||
|
"is_active": document.is_active,
|
||||||
|
"indexed_chunk_count": document.indexed_chunk_count,
|
||||||
|
"indexed_label": indexed_label,
|
||||||
|
"created_at": document.created_at.isoformat() if document.created_at else "",
|
||||||
|
"updated_at": document.updated_at.isoformat() if document.updated_at else "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def index_managed_document(document: KnowledgeBaseDocument) -> int:
|
||||||
|
if document.status != KnowledgeBaseDocument.Status.ACTIVE or not document.is_active:
|
||||||
|
remove_managed_document_from_index(document)
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "disabled", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
|
path = Path(document.storage_path)
|
||||||
|
if not path.is_absolute():
|
||||||
|
path = Path(settings.MEDIA_ROOT) / document.storage_path
|
||||||
|
try:
|
||||||
|
text = extract_text_from_path(path)
|
||||||
|
source = f"用户知识库/{document.user_id}/{document.pk}/{document.original_name}"
|
||||||
|
chunks = chunk_text(text, source=source)
|
||||||
|
if not chunks:
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "empty", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
|
collection = _load_chroma_collection()
|
||||||
|
texts = [chunk.text for chunk in chunks]
|
||||||
|
embeddings = get_embedding_provider()(texts)
|
||||||
|
ids = [
|
||||||
|
hashlib.sha256(f"managed:{document.pk}:{chunk.metadata['chunk_index']}".encode("utf-8")).hexdigest()
|
||||||
|
for chunk in chunks
|
||||||
|
]
|
||||||
|
metadatas = [
|
||||||
|
{
|
||||||
|
**chunk.metadata,
|
||||||
|
"source_type": "managed_document",
|
||||||
|
"document_id": document.pk,
|
||||||
|
"user_id": document.user_id,
|
||||||
|
"original_name": document.original_name,
|
||||||
|
}
|
||||||
|
for chunk in chunks
|
||||||
|
]
|
||||||
|
collection.upsert(ids=ids, documents=texts, metadatas=metadatas, embeddings=embeddings)
|
||||||
|
document.indexed_chunk_count = len(chunks)
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "indexed", "index_error": ""}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return len(chunks)
|
||||||
|
except Exception as exc:
|
||||||
|
document.indexed_chunk_count = 0
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_status": "failed", "index_error": str(exc)}
|
||||||
|
document.save(update_fields=["indexed_chunk_count", "metadata", "updated_at"])
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def remove_managed_document_from_index(document: KnowledgeBaseDocument) -> None:
|
||||||
|
try:
|
||||||
|
collection = _load_chroma_collection()
|
||||||
|
collection.delete(where={"document_id": document.pk})
|
||||||
|
except Exception as exc:
|
||||||
|
document.metadata = {**(document.metadata or {}), "index_delete_error": str(exc)}
|
||||||
|
|
||||||
|
|
||||||
|
def filter_active_knowledge_results(results: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
managed_ids = {
|
||||||
|
int((item.get("metadata") or {}).get("document_id"))
|
||||||
|
for item in results
|
||||||
|
if (item.get("metadata") or {}).get("source_type") == "managed_document"
|
||||||
|
and (item.get("metadata") or {}).get("document_id") is not None
|
||||||
|
}
|
||||||
|
if not managed_ids:
|
||||||
|
return results
|
||||||
|
active_ids = set(
|
||||||
|
KnowledgeBaseDocument.objects.filter(
|
||||||
|
pk__in=managed_ids,
|
||||||
|
status=KnowledgeBaseDocument.Status.ACTIVE,
|
||||||
|
is_active=True,
|
||||||
|
).values_list("pk", flat=True)
|
||||||
|
)
|
||||||
|
filtered = []
|
||||||
|
for item in results:
|
||||||
|
metadata = item.get("metadata") or {}
|
||||||
|
if metadata.get("source_type") != "managed_document":
|
||||||
|
filtered.append(item)
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
document_id = int(metadata.get("document_id"))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if document_id in active_ids:
|
||||||
|
filtered.append(item)
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def _load_chroma_collection():
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError as exc:
|
||||||
|
raise RuntimeError("chromadb 未安装。") from exc
|
||||||
|
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
persist_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
return chromadb.PersistentClient(path=str(persist_path)).get_or_create_collection(
|
||||||
|
settings.REGULATORY_RAG_COLLECTION
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_chroma_collection_state() -> ChromaCollectionState:
|
||||||
|
persist_path = Path(settings.REGULATORY_RAG_CHROMA_PATH)
|
||||||
|
if not persist_path.exists():
|
||||||
|
return ChromaCollectionState(exists=False, error_message="法规 RAG 索引目录不存在。")
|
||||||
|
try:
|
||||||
|
import chromadb
|
||||||
|
except ImportError:
|
||||||
|
return ChromaCollectionState(exists=False, error_message="chromadb 未安装。")
|
||||||
|
try:
|
||||||
|
collection = chromadb.PersistentClient(path=str(persist_path)).get_collection(settings.REGULATORY_RAG_COLLECTION)
|
||||||
|
count = collection.count()
|
||||||
|
metadatas = _load_collection_metadatas(collection, count)
|
||||||
|
return ChromaCollectionState(
|
||||||
|
exists=True,
|
||||||
|
count=count,
|
||||||
|
sample_metadatas=metadatas[:10],
|
||||||
|
source_chunk_counts=_count_chunks_by_source(metadatas),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return ChromaCollectionState(exists=False, error_message=f"法规 RAG collection 不可用:{exc}")
|
||||||
|
|
||||||
|
|
||||||
|
def _load_collection_metadatas(collection, count: int) -> list[dict[str, Any]]:
|
||||||
|
metadatas: list[dict[str, Any]] = []
|
||||||
|
if count <= 0:
|
||||||
|
return metadatas
|
||||||
|
page_size = 500
|
||||||
|
for offset in range(0, count, page_size):
|
||||||
|
payload = collection.get(
|
||||||
|
include=["metadatas"],
|
||||||
|
limit=min(page_size, count - offset),
|
||||||
|
offset=offset,
|
||||||
|
)
|
||||||
|
metadatas.extend(payload.get("metadatas") or [])
|
||||||
|
return metadatas
|
||||||
|
|
||||||
|
|
||||||
|
def _count_chunks_by_source(metadatas: list[dict[str, Any]]) -> dict[str, int]:
|
||||||
|
counts: dict[str, int] = {}
|
||||||
|
for metadata in metadatas:
|
||||||
|
source = str((metadata or {}).get("source") or "")
|
||||||
|
if source:
|
||||||
|
counts[source] = counts.get(source, 0) + 1
|
||||||
|
return counts
|
||||||
|
|
||||||
|
|
||||||
|
def _rule_info() -> dict[str, Any]:
|
||||||
|
try:
|
||||||
|
payload = load_rule_file()
|
||||||
|
requirements = payload.get("requirements") or []
|
||||||
|
severity_counts: dict[str, int] = {}
|
||||||
|
chapter_codes = set()
|
||||||
|
for requirement in requirements:
|
||||||
|
severity = str(requirement.get("severity") or "unknown")
|
||||||
|
severity_counts[severity] = severity_counts.get(severity, 0) + 1
|
||||||
|
attachment4_code = str(requirement.get("attachment4_code") or "")
|
||||||
|
if attachment4_code:
|
||||||
|
chapter_codes.add(attachment4_code.split(".")[0])
|
||||||
|
return {
|
||||||
|
"status": "ok",
|
||||||
|
"code": payload.get("code", ""),
|
||||||
|
"name": payload.get("name", ""),
|
||||||
|
"path": str(DEFAULT_RULE_PATH),
|
||||||
|
"hash": compute_file_sha256(DEFAULT_RULE_PATH),
|
||||||
|
"rag_collection": payload.get("rag_collection", ""),
|
||||||
|
"source_material_dir": payload.get("source_material_dir", "docs/0.原始材料"),
|
||||||
|
"requirement_count": len(requirements),
|
||||||
|
"chapter_count": len(chapter_codes),
|
||||||
|
"severity_counts": severity_counts,
|
||||||
|
}
|
||||||
|
except Exception as exc:
|
||||||
|
return {
|
||||||
|
"status": "failed",
|
||||||
|
"code": "",
|
||||||
|
"name": "",
|
||||||
|
"path": str(DEFAULT_RULE_PATH),
|
||||||
|
"hash": "",
|
||||||
|
"rag_collection": "",
|
||||||
|
"source_material_dir": "docs/0.原始材料",
|
||||||
|
"requirement_count": 0,
|
||||||
|
"chapter_count": 0,
|
||||||
|
"severity_counts": {},
|
||||||
|
"error_message": str(exc),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _status_label(collection: ChromaCollectionState) -> dict[str, str]:
|
||||||
|
if not collection.exists:
|
||||||
|
return {"code": "missing", "label": "未构建", "message": collection.error_message}
|
||||||
|
if collection.count < 20:
|
||||||
|
return {"code": "thin", "label": "索引过少", "message": "RAG 能力已打通,但当前索引内容较少,建议补齐材料后重建。"}
|
||||||
|
return {"code": "ready", "label": "可用", "message": "RAG 索引已构建,可用于法规依据辅助检索。"}
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_target_path(root: Path, original_name: str) -> Path:
|
||||||
|
safe_name = Path(original_name).name or "document"
|
||||||
|
target = root / safe_name
|
||||||
|
if not target.exists():
|
||||||
|
return target
|
||||||
|
stem = target.stem
|
||||||
|
suffix = target.suffix
|
||||||
|
index = 2
|
||||||
|
while True:
|
||||||
|
candidate = root / f"{stem}-{index}{suffix}"
|
||||||
|
if not candidate.exists():
|
||||||
|
return candidate
|
||||||
|
index += 1
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from urllib import error, request
|
from urllib import error, request
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -12,7 +13,10 @@ class LLMRequestError(RuntimeError):
|
|||||||
"""Raised when the remote LLM provider call fails."""
|
"""Raised when the remote LLM provider call fails."""
|
||||||
|
|
||||||
|
|
||||||
def generate_reply(conversation, user_message: str) -> str:
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_reply(conversation, user_message: str, knowledge_context: str = "") -> str:
|
||||||
"""Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""
|
"""Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""
|
||||||
|
|
||||||
if not settings.LLM_API_KEY:
|
if not settings.LLM_API_KEY:
|
||||||
@@ -22,7 +26,7 @@ def generate_reply(conversation, user_message: str) -> str:
|
|||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": settings.LLM_MODEL,
|
"model": settings.LLM_MODEL,
|
||||||
"messages": build_messages(conversation, user_message),
|
"messages": build_messages(conversation, user_message, knowledge_context=knowledge_context),
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
}
|
}
|
||||||
body = json.dumps(payload).encode("utf-8")
|
body = json.dumps(payload).encode("utf-8")
|
||||||
@@ -53,7 +57,48 @@ def generate_reply(conversation, user_message: str) -> str:
|
|||||||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||||||
|
|
||||||
|
|
||||||
def stream_reply(conversation, user_message: str):
|
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0, timeout: float = 60) -> str:
|
||||||
|
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
|
||||||
|
|
||||||
|
if not settings.LLM_API_KEY:
|
||||||
|
raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
|
||||||
|
if not settings.LLM_MODEL:
|
||||||
|
raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": settings.LLM_MODEL,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": temperature,
|
||||||
|
}
|
||||||
|
body = json.dumps(payload).encode("utf-8")
|
||||||
|
endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
|
||||||
|
|
||||||
|
http_request = request.Request(
|
||||||
|
endpoint,
|
||||||
|
data=body,
|
||||||
|
headers={
|
||||||
|
"Authorization": f"Bearer {settings.LLM_API_KEY}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with request.urlopen(http_request, timeout=timeout) as response:
|
||||||
|
data = json.loads(response.read().decode("utf-8"))
|
||||||
|
except error.HTTPError as exc:
|
||||||
|
details = exc.read().decode("utf-8", errors="ignore")
|
||||||
|
raise LLMRequestError(f"模型接口调用失败:HTTP {exc.code} {details}") from exc
|
||||||
|
except error.URLError as exc:
|
||||||
|
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||||||
|
|
||||||
|
try:
|
||||||
|
return data["choices"][0]["message"]["content"].strip()
|
||||||
|
except (KeyError, IndexError, TypeError) as exc:
|
||||||
|
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def stream_reply(conversation, user_message: str, knowledge_context: str = ""):
|
||||||
"""Streams incremental assistant text from the SiliconFlow chat endpoint."""
|
"""Streams incremental assistant text from the SiliconFlow chat endpoint."""
|
||||||
|
|
||||||
if not settings.LLM_API_KEY:
|
if not settings.LLM_API_KEY:
|
||||||
@@ -63,7 +108,7 @@ def stream_reply(conversation, user_message: str):
|
|||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": settings.LLM_MODEL,
|
"model": settings.LLM_MODEL,
|
||||||
"messages": build_messages(conversation, user_message),
|
"messages": build_messages(conversation, user_message, knowledge_context=knowledge_context),
|
||||||
"temperature": 0.3,
|
"temperature": 0.3,
|
||||||
"stream": True,
|
"stream": True,
|
||||||
}
|
}
|
||||||
@@ -89,7 +134,11 @@ def stream_reply(conversation, user_message: str):
|
|||||||
data = line[5:].strip()
|
data = line[5:].strip()
|
||||||
if data == "[DONE]":
|
if data == "[DONE]":
|
||||||
break
|
break
|
||||||
payload = json.loads(data)
|
try:
|
||||||
|
payload = json.loads(data)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.warning("Skipping malformed LLM stream data", extra={"data": data[:200]})
|
||||||
|
continue
|
||||||
delta = (
|
delta = (
|
||||||
payload.get("choices", [{}])[0]
|
payload.get("choices", [{}])[0]
|
||||||
.get("delta", {})
|
.get("delta", {})
|
||||||
@@ -104,10 +153,21 @@ def stream_reply(conversation, user_message: str):
|
|||||||
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||||||
|
|
||||||
|
|
||||||
def build_messages(conversation, latest_user_message: str) -> list[dict[str, str]]:
|
def build_messages(conversation, latest_user_message: str, knowledge_context: str = "") -> list[dict[str, str]]:
|
||||||
"""Builds system and conversation history messages for the provider call."""
|
"""Builds system and conversation history messages for the provider call."""
|
||||||
|
|
||||||
messages = [{"role": "system", "content": system_prompt()}]
|
messages = [{"role": "system", "content": system_prompt()}]
|
||||||
|
if knowledge_context.strip():
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": (
|
||||||
|
"以下是全局知识库检索到的材料片段。回答用户时优先依据这些片段;"
|
||||||
|
"如果片段不足以支持结论,请明确说明信息不足,不要编造。\n\n"
|
||||||
|
f"{knowledge_context.strip()}"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
for message in conversation.messages.all():
|
for message in conversation.messages.all():
|
||||||
messages.append({"role": message.role, "content": message.content})
|
messages.append({"role": message.role, "content": message.content})
|
||||||
|
|||||||
15
review_agent/logging_filters.py
Normal file
15
review_agent/logging_filters.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class SuppressWorkflowStatusPollFilter(logging.Filter):
|
||||||
|
"""Hides noisy workflow status polling access logs from runserver output."""
|
||||||
|
|
||||||
|
STATUS_POLL_PATTERN = re.compile(
|
||||||
|
r'"GET /api/review-agent/(?:file-summary|regulatory-review)/\d+/status/ HTTP/[0-9.]+" 200 '
|
||||||
|
)
|
||||||
|
|
||||||
|
def filter(self, record: logging.LogRecord) -> bool:
|
||||||
|
return not self.STATUS_POLL_PATTERN.search(record.getMessage())
|
||||||
1
review_agent/management/__init__.py
Normal file
1
review_agent/management/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Management command package for review_agent."""
|
||||||
1
review_agent/management/commands/__init__.py
Normal file
1
review_agent/management/commands/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Management commands for review_agent."""
|
||||||
21
review_agent/management/commands/feishu_question_simulate.py
Normal file
21
review_agent/management/commands/feishu_question_simulate.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from django.contrib.auth import get_user_model
|
||||||
|
from django.core.management.base import BaseCommand, CommandError
|
||||||
|
|
||||||
|
from review_agent.feishu_questions.service import answer_question
|
||||||
|
|
||||||
|
|
||||||
|
class Command(BaseCommand):
|
||||||
|
help = "Simulate a reserved Feishu question against local workflow data."
|
||||||
|
|
||||||
|
def add_arguments(self, parser):
|
||||||
|
parser.add_argument("--username", required=True, help="System username used as asker.")
|
||||||
|
parser.add_argument("question", help="Question text, for example: 查最新法规核查")
|
||||||
|
|
||||||
|
def handle(self, *args, **options):
|
||||||
|
user = get_user_model().objects.filter(username=options["username"]).first()
|
||||||
|
if not user:
|
||||||
|
raise CommandError(f"用户不存在:{options['username']}")
|
||||||
|
result = answer_question(user, options["question"])
|
||||||
|
self.stdout.write(result.get("answer_summary") or "无可返回摘要。")
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user