Compare commits
23 Commits
b96ab1303a
...
f179749cfb
| Author | SHA1 | Date | |
|---|---|---|---|
| f179749cfb | |||
| e58da66853 | |||
| df3f393dd2 | |||
| 0fca20756b | |||
| 3c6ec67371 | |||
| 7e561ea213 | |||
| daa0642142 | |||
| c78ff3a1fd | |||
| 460d418921 | |||
| 54c37edf19 | |||
| fa77c68d77 | |||
| 47b5ad1054 | |||
| fd88ff4652 | |||
| b1a336d019 | |||
| 311eb1b129 | |||
| 77db0d978a | |||
| 684682f86d | |||
| a917a18ca1 | |||
| 61bd31790b | |||
| 18d045d487 | |||
| 51e7c0c007 | |||
| eb87d9040d | |||
| 855afcdee3 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,4 +6,5 @@ db.sqlite3
|
||||
staticfiles/
|
||||
media/
|
||||
.pytest_cache/
|
||||
.tmp/
|
||||
.idea/
|
||||
|
||||
17
README.md
17
README.md
@@ -18,3 +18,20 @@ python manage.py runserver
|
||||
- 登录页:http://127.0.0.1:8000/login/
|
||||
- 首页:http://127.0.0.1:8000/
|
||||
- 管理后台:http://127.0.0.1:8000/admin/
|
||||
|
||||
## 文件汇总依赖
|
||||
|
||||
自动汇总文件目录与页数功能使用轻量 Python 库读取 PDF、Word、Excel、PowerPoint 文件。
|
||||
Docker 或生产环境如需处理 `.7z` 与 `.rar` 压缩包,还需要安装系统 `7z`/`p7zip`
|
||||
命令,并确认以下命令可用:
|
||||
|
||||
```bash
|
||||
7z
|
||||
7z i
|
||||
```
|
||||
|
||||
LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。
|
||||
|
||||
上传原始文件、批次工作目录和导出文件默认存储在 Django `MEDIA_ROOT` 下的
|
||||
`file_summary/users/<user_id>/<conversation_id>/` 或批次 `work_dir` 目录中。生产环境
|
||||
需要把 `MEDIA_ROOT` 挂载到持久化卷,并纳入备份或归档策略。
|
||||
|
||||
@@ -92,6 +92,8 @@ USE_TZ = True
|
||||
|
||||
STATIC_URL = "static/"
|
||||
STATICFILES_DIRS = [BASE_DIR / "static"]
|
||||
MEDIA_ROOT = BASE_DIR / "media"
|
||||
MEDIA_URL = "media/"
|
||||
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
|
||||
@@ -102,3 +104,26 @@ LOGOUT_REDIRECT_URL = "login"
|
||||
LLM_API_KEY = os.environ.get("LLM_API_KEY", "")
|
||||
LLM_BASE_URL = os.environ.get("LLM_BASE_URL", "https://api.siliconflow.cn/v1")
|
||||
LLM_MODEL = os.environ.get("LLM_MODEL", "")
|
||||
|
||||
LOGGING = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "verbose",
|
||||
},
|
||||
},
|
||||
"formatters": {
|
||||
"verbose": {
|
||||
"format": "%(asctime)s %(levelname)s %(name)s %(message)s",
|
||||
},
|
||||
},
|
||||
"loggers": {
|
||||
"review_agent": {
|
||||
"handlers": ["console"],
|
||||
"level": os.environ.get("REVIEW_AGENT_LOG_LEVEL", "INFO"),
|
||||
"propagate": True,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
from django.contrib import admin
|
||||
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
|
||||
from django.urls import path
|
||||
from django.urls import include, path
|
||||
|
||||
from review_agent.views import stream_chat, workspace
|
||||
from review_agent.views import attachment_manager, stream_chat, workspace
|
||||
|
||||
urlpatterns = [
|
||||
path("", workspace, name="home"),
|
||||
path("attachments/", attachment_manager, name="attachment_manager"),
|
||||
path("", include("review_agent.urls")),
|
||||
path("chat/stream/", stream_chat, name="chat_stream"),
|
||||
path(
|
||||
"login/",
|
||||
|
||||
74
docs/5.开发计划/1.自动汇总-前端线框图.md
Normal file
74
docs/5.开发计划/1.自动汇总-前端线框图.md
Normal file
@@ -0,0 +1,74 @@
|
||||
# 自动汇总前端线框图
|
||||
|
||||
## 评审目标
|
||||
|
||||
在实现三栏页面前,先确认审核智能体工作台的信息架构、右侧文件汇总面板、工作流状态展示和移动端降级方式。
|
||||
|
||||
## 桌面端布局
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A["左栏:会话列表<br/>新对话 / 搜索 / 历史会话"] --> B["中栏:聊天区<br/>顶部导航 / 消息流 / 输入框"]
|
||||
B --> C["右栏:文件汇总面板"]
|
||||
C --> C1["上半区:上传区<br/>拖拽上传 / 选择文件 / 上传状态"]
|
||||
C --> C2["中段:当前对话附件<br/>文件名 / 版本 / 大小 / 状态 / 删除"]
|
||||
C --> C3["下半区:工作流卡片<br/>批次号 / 节点进度 / 下载入口"]
|
||||
```
|
||||
|
||||
## 右侧面板结构
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
P["文件汇总面板"] --> U["上传拖拽区"]
|
||||
U --> U0["无附件:提示上传文件或压缩包"]
|
||||
U --> U1["上传中:显示文件名和处理中状态"]
|
||||
U --> U2["上传失败:展示错误并允许重试"]
|
||||
P --> L["附件列表"]
|
||||
L --> L1["active 版本优先展示"]
|
||||
L --> L2["历史版本保留展示"]
|
||||
L --> L3["逻辑删除后从默认候选移除"]
|
||||
P --> W["工作流卡片列表"]
|
||||
W --> W1["运行中:节点逐项更新"]
|
||||
W --> W2["成功:展示 Markdown/Excel 下载"]
|
||||
W --> W3["失败:展示失败节点和错误说明"]
|
||||
```
|
||||
|
||||
## 工作流状态流转
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> Pending: 用户上传附件
|
||||
Pending --> Running: 发送自动汇总提示词
|
||||
Running --> Extracting: 固化附件
|
||||
Extracting --> Scanning: 解压完成或跳过
|
||||
Scanning --> Counting: 生成文件清单
|
||||
Counting --> Detecting: 页数统计完成
|
||||
Detecting --> Reporting: 产品名识别完成
|
||||
Reporting --> Success: 生成报告与下载
|
||||
Running --> Failed: 批次级异常
|
||||
Extracting --> Failed: 解压安全检查失败
|
||||
Reporting --> Failed: 报告生成失败
|
||||
Success --> Restored: 刷新页面后状态恢复
|
||||
Failed --> Restored: 刷新页面后状态恢复
|
||||
```
|
||||
|
||||
## 移动端布局
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
M["移动端工作台"] --> T["顶部:侧栏按钮 / 当前页面 / 用户菜单"]
|
||||
T --> Chat["聊天区优先展示"]
|
||||
Chat --> Composer["底部输入框"]
|
||||
T --> Drawer["会话侧栏抽屉"]
|
||||
Chat --> Panel["文件汇总面板下移或折叠"]
|
||||
Panel --> Upload["上传区"]
|
||||
Panel --> Workflow["工作流卡片"]
|
||||
```
|
||||
|
||||
## 关键评审点
|
||||
|
||||
- 桌面端保持左侧会话、中间聊天、右侧文件汇总三栏,不改变现有聊天主路径。
|
||||
- 右侧面板上半部分用于上传和附件列表,下半部分用于批次工作流卡片。
|
||||
- 工作流卡片节点顺序固定为:附件固化、压缩包解压、文件扫描、页数统计、产品识别、报告输出、完成。
|
||||
- 助手消息中的文件汇总结果使用安全 Markdown 渲染,用户消息仍按纯文本转义。
|
||||
- 移动端优先保证聊天可用,文件汇总面板折叠或下移,不能遮挡输入框。
|
||||
415
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md
Normal file
415
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md
Normal file
@@ -0,0 +1,415 @@
|
||||
# NMPA 注册资料法规核查与整改闭环开发计划(第一批:主链路)
|
||||
|
||||
## 一、已确认口径
|
||||
|
||||
| 问题 | 结论 |
|
||||
| --- | --- |
|
||||
| 第二阶段覆盖范围 | 覆盖原始需求 2、4、5:法规完整性核查、章节/一致性核查、风险预警与整改建议 |
|
||||
| 原始需求 3 | 本阶段只做核查所需的信息抽取,不做自动填写目标文件 |
|
||||
| 执行策略 | 第二阶段拆成两次 Codex 目标执行;第一批先打通 Demo 主链路 |
|
||||
| 启动方式 | 用户对话提示词触发法规核查工作流,不做上传后自动核查 |
|
||||
| 汇总批次 | 默认复用当前对话最近一次成功 `FileSummaryBatch`,不自动串联文件汇总 |
|
||||
| 规则来源 | Demo 先用本地 YAML;数据库记录规则版本、路径、hash、RAG 索引信息 |
|
||||
| 规则差异 | 自动检测 YAML 与数据库记录差异,提示人工确认更新;第一批不做规则管理前端 |
|
||||
| RAG | 必须使用向量库;默认 ChromaDB |
|
||||
| Embedding | Provider 可配置;Demo 默认 SiliconFlow `Qwen/Qwen3-Embedding-4B` |
|
||||
| 法规材料 | 先索引 `docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告` |
|
||||
| 法规文档抽取 | 允许使用 LibreOffice headless 转换本地法规 `.doc` 材料;该依赖只服务 RAG 建库,不改变第一阶段页数统计口径 |
|
||||
| ChromaDB 运行方式 | 第一批采用本地持久化模式,不单独启动 Chroma Server |
|
||||
| 飞书 | 第一批不接真实飞书;暂缓项写入待办计划 |
|
||||
|
||||
---
|
||||
|
||||
## 二、第一批目标
|
||||
|
||||
第一批只追求“可运行、可演示、可追溯”的法规核查主链路:
|
||||
|
||||
```text
|
||||
已有文件汇总批次
|
||||
-> 用户提示词触发法规核查
|
||||
-> 读取本地 YAML 规则
|
||||
-> 检查规则版本和 RAG 索引状态
|
||||
-> 使用 ChromaDB 检索法规依据
|
||||
-> 完整性核查
|
||||
-> 基础章节核查
|
||||
-> 基础一致性核查
|
||||
-> 风险分级和整改建议
|
||||
-> 生成对话摘要、Markdown 报告、Excel 清单、JSON 结果包
|
||||
-> 前端展示法规核查工作流卡片
|
||||
```
|
||||
|
||||
第一批完成后,Demo 应能展示:
|
||||
|
||||
| 展示项 | 内容 |
|
||||
| --- | --- |
|
||||
| 法规依据 | RAG 返回本地法规材料来源和片段 |
|
||||
| 完整性问题 | 如缺少注册检验报告、临床评价资料等 |
|
||||
| 章节问题 | 如说明书缺少储存条件、有效期、样本要求等章节 |
|
||||
| 一致性问题 | 如产品名称、型号规格、预期用途在不同文件中不一致 |
|
||||
| 风险清单 | blocking/high/medium/low/info 五级 |
|
||||
| 报告下载 | Markdown、Excel、JSON |
|
||||
|
||||
---
|
||||
|
||||
## 三、阶段拆分
|
||||
|
||||
| 阶段 | 名称 | 目标 | 验收 |
|
||||
| --- | --- | --- | --- |
|
||||
| RR1-0 | 准备与回归 | 确认第一阶段稳定,创建开发分支 | `pytest` 通过 |
|
||||
| RR1-1 | 模型与兼容改造 | 新增法规核查模型,兼容工作流/导出通用字段 | migration 和模型测试通过 |
|
||||
| RR1-2 | YAML 规则与版本记录 | 建立 Demo 规则文件、规则版本表、hash 差异检测 | 能识别 YAML 与 DB 差异 |
|
||||
| RR1-3 | RAG 索引与检索 | 用 ChromaDB + SiliconFlow embedding 构建本地法规索引 | 能检索法规依据 |
|
||||
| RR1-4 | 触发与工作流骨架 | 对话提示词触发法规核查,复用最近成功汇总批次 | 能创建并运行法规核查批次 |
|
||||
| RR1-5 | 核查服务 | 完整性、基础章节、基础一致性核查 | 生成 findings |
|
||||
| RR1-6 | 风险与导出 | 风险归并、Issue 落库、报告导出 | 生成助手摘要和下载文件 |
|
||||
| RR1-7 | 前端与验收 | 法规核查卡片、状态恢复、Markdown 结果展示 | 全量测试通过 |
|
||||
|
||||
---
|
||||
|
||||
## 四、RR1-0 准备与回归
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 |
|
||||
| --- | --- |
|
||||
| RR1-0-001 | 从当前稳定分支创建 `codex/YYYYMMDD-NMPA法规核查主链路` |
|
||||
| RR1-0-002 | 运行 `python manage.py check`、`pytest` |
|
||||
| RR1-0-003 | 记录第一阶段边界:文件夹上传不作为强验收、RAR 依赖 7z、Office 页数口径可不精确 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
python manage.py check
|
||||
pytest
|
||||
git status --short
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请创建第二阶段第一批开发分支,先确认第一阶段文件汇总功能全量测试通过。本阶段不要修改业务代码,只做环境和边界确认。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、RR1-1 模型与兼容改造
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-1-001 | 新增法规核查模型和枚举 | `review_agent/models.py` |
|
||||
| RR1-1-002 | 给 `WorkflowNodeRun` 增加 `workflow_type`、`workflow_batch_id`、`node_group` | `review_agent/models.py` |
|
||||
| RR1-1-003 | 给 `WorkflowEvent` 增加 `workflow_type`、`workflow_batch_id`、`conversation_id` | `review_agent/models.py` |
|
||||
| RR1-1-004 | 给 `ExportedSummaryFile` 增加 `workflow_type`、`workflow_batch_id`、`export_category` | `review_agent/models.py` |
|
||||
| RR1-1-005 | 保持第一阶段文件汇总写入兼容 | `review_agent/file_summary/*` |
|
||||
| RR1-1-006 | 生成 migration 并补模型测试 | `review_agent/migrations/`、`tests/test_regulatory_models.py` |
|
||||
|
||||
### 新增模型
|
||||
|
||||
| 模型 | 说明 |
|
||||
| --- | --- |
|
||||
| `RegulatoryRuleVersion` | 规则版本、YAML 路径、文件 hash、RAG 索引版本 |
|
||||
| `RegulatoryReviewBatch` | 法规核查批次 |
|
||||
| `RegulatoryIssue` | 风险问题和整改状态 |
|
||||
| `RegulatoryArtifact` | 过程产物 |
|
||||
| `RegulatoryNotificationRecord` | mock 通知预留记录,第一批可只建表不接真实通知 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
python manage.py makemigrations review_agent
|
||||
python manage.py migrate
|
||||
python manage.py check
|
||||
pytest tests/test_regulatory_models.py tests/test_file_summary_workflow.py tests/test_file_summary_views.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请新增法规核查相关模型,并轻量通用化现有工作流节点、事件和导出文件表。必须保持第一阶段文件汇总测试通过,不要重写第一阶段工作流。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 六、RR1-2 YAML 规则与版本记录
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-2-001 | 新建法规核查模块目录 | `review_agent/regulatory_review/` |
|
||||
| RR1-2-002 | 编写 Demo YAML 规则 | `review_agent/regulatory_review/rules/nmpa_ivd_registration_v1.yaml` |
|
||||
| RR1-2-003 | 实现规则 hash 计算和版本记录 | `services/rule_loader.py` |
|
||||
| RR1-2-004 | 实现 YAML 与 DB 差异检测 | `services/rule_loader.py` |
|
||||
| RR1-2-005 | 增加规则版本初始化/检查管理命令 | `management/commands/regulatory_rules_check.py` |
|
||||
| RR1-2-006 | 增加测试 | `tests/test_regulatory_rule_loader.py` |
|
||||
|
||||
### Demo 规则至少覆盖
|
||||
|
||||
| 文件项 | 类型 | 风险 |
|
||||
| --- | --- | --- |
|
||||
| 产品技术要求 | required | blocking |
|
||||
| 说明书 | required | high |
|
||||
| 注册检验报告 | required | blocking |
|
||||
| 临床评价资料 | conditional | high |
|
||||
| 安全和性能基本原则清单 | recommended | medium |
|
||||
|
||||
YAML 规则内容需参考本地法规资料目录:
|
||||
|
||||
```text
|
||||
docs/0.原始材料/关于公布体外诊断试剂注册申报资料要求和批准证明文件格式的公告
|
||||
```
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_rule_loader.py
|
||||
python manage.py regulatory_rules_check
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请建立 Demo 版 NMPA IVD 注册资料 YAML 规则库,并实现规则版本、文件 hash 和数据库记录差异检测。发现 YAML 与 DB hash 不一致时只提示需要更新,不自动覆盖。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 七、RR1-3 RAG 索引与检索
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-3-001 | 增加依赖 `chromadb` 和必要 HTTP 客户端 | `requirements.txt` |
|
||||
| RR1-3-002 | 实现 embedding provider 抽象 | `services/rag_embedding.py` |
|
||||
| RR1-3-003 | 实现 SiliconFlow embedding provider | `services/rag_embedding.py` |
|
||||
| RR1-3-004 | 实现法规文档文本抽取和切块 | `services/rag_index.py` |
|
||||
| RR1-3-005 | 实现 ChromaDB 持久化索引构建命令 | `management/commands/regulatory_rag_build.py` |
|
||||
| RR1-3-006 | 实现 RAG 引用检索服务 | `services/rag_citation.py` |
|
||||
| RR1-3-007 | 增加测试 | `tests/test_regulatory_rag.py` |
|
||||
|
||||
### 配置
|
||||
|
||||
| 配置项 | 默认 |
|
||||
| --- | --- |
|
||||
| `REGULATORY_RAG_PROVIDER` | `siliconflow` |
|
||||
| `REGULATORY_RAG_CHROMA_PATH` | `media/regulatory_review/rag/chroma/` |
|
||||
| `SILICONFLOW_BASE_URL` | `https://api.siliconflow.cn/v1` |
|
||||
| `SILICONFLOW_API_KEY` | 从环境变量读取 |
|
||||
| `SILICONFLOW_EMBEDDING_MODEL` | `Qwen/Qwen3-Embedding-4B` |
|
||||
| `SILICONFLOW_EMBEDDING_DIMENSIONS` | `1024` |
|
||||
| `REGULATORY_RAG_COLLECTION` | `nmpa_ivd_registration_v1` |
|
||||
|
||||
SiliconFlow Embedding API 参考:
|
||||
|
||||
```text
|
||||
https://docs.siliconflow.com/en/api-reference/embeddings/create-embeddings
|
||||
```
|
||||
|
||||
### 规则
|
||||
|
||||
| 场景 | 处理 |
|
||||
| --- | --- |
|
||||
| RAG 索引不存在 | 核查时提示先构建索引,不在核查中临时构建 |
|
||||
| Embedding API 不可用 | 构建命令失败,核查不启动 |
|
||||
| RAG 无命中 | 规则问题仍输出,法规依据标记“原文依据待补充” |
|
||||
| 本地法规 `.doc` 无法直接抽取 | 允许通过 LibreOffice headless 转换后抽取;Docker 部署说明需写明可选安装方式 |
|
||||
| ChromaDB 存储 | 使用本地持久化目录,Docker 部署时通过 volume 挂载保留索引 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
python manage.py regulatory_rag_build
|
||||
pytest tests/test_regulatory_rag.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现基于 ChromaDB 的本地法规 RAG。Embedding Provider 要可配置,Demo 默认使用 SiliconFlow Qwen/Qwen3-Embedding-4B。ChromaDB 使用本地持久化目录,不单独启动服务。法规 `.doc` 材料允许用 LibreOffice headless 转换后抽取。核查流程只检查索引可用性,不临时构建索引。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 八、RR1-4 触发与工作流骨架
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-4-001 | 实现法规核查提示词路由 | `review_agent/skill_router.py` |
|
||||
| RR1-4-002 | 实现法规核查批次创建 | `regulatory_review/workflow.py` |
|
||||
| RR1-4-003 | 默认查找当前对话最近成功 `FileSummaryBatch` | `workflow.py` |
|
||||
| RR1-4-004 | 无成功汇总批次时提示用户先执行自动汇总 | `services.py` |
|
||||
| RR1-4-005 | 实现启动、状态、事件接口 | `regulatory_review/views.py`、`urls.py` |
|
||||
| RR1-4-006 | 接入项目 URL | `config/urls.py` 或 `review_agent/urls.py` |
|
||||
| RR1-4-007 | 增加测试 | `tests/test_regulatory_workflow.py`、`tests/test_regulatory_views.py` |
|
||||
|
||||
### 第一批节点
|
||||
|
||||
```text
|
||||
prepare
|
||||
-> rule_scope
|
||||
-> completeness_check
|
||||
-> text_extract
|
||||
-> structure_check
|
||||
-> consistency_check
|
||||
-> risk_assess
|
||||
-> report_export
|
||||
-> completed
|
||||
```
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_workflow.py tests/test_regulatory_views.py
|
||||
pytest tests/test_file_summary_trigger.py tests/test_llm_streaming.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现法规核查提示词触发和工作流骨架。用户说“法规核查、NMPA核查、完整性核查、风险预警”等意图时启动 regulatory_review;默认复用当前对话最近成功 FileSummaryBatch;没有成功汇总批次时提示先自动汇总。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 九、RR1-5 核查服务
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-5-001 | 实现统一 Finding dataclass | `regulatory_review/schemas.py` |
|
||||
| RR1-5-002 | 完整性核查:文件名、目录名、首页文本匹配 | `services/completeness_check.py` |
|
||||
| RR1-5-003 | 文本抽取:docx/pdf/xlsx/pptx/txt/md 基础文本 | `services/text_extract.py` |
|
||||
| RR1-5-004 | 基础章节核查:按规则关键词判断章节是否存在 | `services/structure_check.py` |
|
||||
| RR1-5-005 | 基础一致性核查:产品名称、型号规格、预期用途 | `services/consistency_check.py` |
|
||||
| RR1-5-006 | 过程产物保存和 hash | `storage.py` |
|
||||
| RR1-5-007 | 增加测试 | `tests/test_regulatory_completeness.py`、`tests/test_regulatory_text_extract.py`、`tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py` |
|
||||
|
||||
### Demo 验收样例
|
||||
|
||||
测试或演示资料中至少构造:
|
||||
|
||||
| 条件 | 预期 |
|
||||
| --- | --- |
|
||||
| 有说明书 | 可匹配说明书规则 |
|
||||
| 有产品技术要求 | 可匹配产品技术要求规则 |
|
||||
| 缺少注册检验报告 | 生成 blocking 问题 |
|
||||
| 说明书缺少储存条件章节 | 生成 high 或 medium 问题 |
|
||||
| 产品名称在两个文件中不一致 | 生成 consistency 问题 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_completeness.py tests/test_regulatory_text_extract.py tests/test_regulatory_structure.py tests/test_regulatory_consistency.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现完整性核查、文本抽取、基础章节核查和基础一致性核查。所有核查服务只返回 Finding,不直接创建 RegulatoryIssue。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 十、RR1-6 风险与导出
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-6-001 | Findings 去重和风险归并 | `services/risk_assess.py` |
|
||||
| RR1-6-002 | RAG 引用挂载到问题证据 | `services/risk_assess.py`、`services/rag_citation.py` |
|
||||
| RR1-6-003 | 创建 `RegulatoryIssue` | `services/risk_assess.py` |
|
||||
| RR1-6-004 | 生成 Markdown 核查报告 | `services/export.py` |
|
||||
| RR1-6-005 | 生成 Excel 缺失清单 | `services/export.py` |
|
||||
| RR1-6-006 | 生成 JSON 结果包 | `services/export.py` |
|
||||
| RR1-6-007 | 工作流完成后写入助手消息 | `workflow.py` |
|
||||
| RR1-6-008 | 增加测试 | `tests/test_regulatory_risk_assess.py`、`tests/test_regulatory_export.py` |
|
||||
|
||||
### 对话摘要
|
||||
|
||||
助手消息至少包含:
|
||||
|
||||
```markdown
|
||||
已完成 NMPA 注册资料法规核查。
|
||||
|
||||
| 风险等级 | 数量 |
|
||||
| --- | --- |
|
||||
| 阻断项 | 1 |
|
||||
| 高风险 | 1 |
|
||||
|
||||
| 等级 | 问题 | 状态 | 建议 |
|
||||
| --- | --- | --- | --- |
|
||||
| 阻断项 | 缺少注册检验报告 | 待处理 | 请补充注册检验报告并复核 |
|
||||
|
||||
[下载 Markdown 核查报告](...)
|
||||
[下载 Excel 缺失清单](...)
|
||||
[下载 JSON 结果包](...)
|
||||
```
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_risk_assess.py tests/test_regulatory_export.py tests/test_regulatory_workflow.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现风险归并、RAG 法规依据挂载、Issue 落库和最终报告导出。工作流完成后必须向当前对话写入 Markdown 摘要和下载链接。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 十一、RR1-7 前端与总体验收
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR1-7-001 | 工作流卡片支持 `regulatory_review` 类型 | `templates/home.html`、`static/js/app.js` |
|
||||
| RR1-7-002 | 卡片使用 `workflow_type + workflow_batch_id` 区分 | `static/js/app.js` |
|
||||
| RR1-7-003 | 显示法规核查节点和风险摘要 | `templates/home.html`、`static/js/app.js` |
|
||||
| RR1-7-004 | 页面刷新恢复法规核查卡片 | `views.py`、`static/js/app.js` |
|
||||
| RR1-7-005 | 补前端测试 | `tests/test_regulatory_frontend.py` |
|
||||
| RR1-7-006 | 全量回归 | 全项目 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
python manage.py check
|
||||
pytest
|
||||
```
|
||||
|
||||
如浏览器可用,再运行 Playwright 端到端验证。
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请在现有工作流卡片轮播基础上支持 regulatory_review 类型,展示法规核查节点、风险摘要和完成状态。最后运行 python manage.py check 和 pytest 全量验收。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 十二、第一批 Codex 目标模式提示词
|
||||
|
||||
```text
|
||||
请按 docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第一批主链路.md 执行第二阶段第一批开发。
|
||||
|
||||
目标:
|
||||
完成 NMPA 法规核查主链路,复用当前对话最近成功 FileSummaryBatch,通过用户提示词触发 regulatory_review 工作流,实现 YAML 规则、ChromaDB + SiliconFlow Embedding RAG、完整性核查、基础章节核查、基础一致性核查、风险分级、Markdown/Excel/JSON 报告和前端法规核查卡片。
|
||||
|
||||
执行规则:
|
||||
1. 创建 codex/YYYYMMDD-NMPA法规核查主链路 分支。
|
||||
2. 按 RR1-0 到 RR1-7 顺序执行,不跳阶段。
|
||||
3. 每阶段完成后运行对应验证命令。
|
||||
4. 第一阶段文件汇总测试不得回归。
|
||||
5. 不自动串联文件汇总;没有成功汇总批次时提示用户先自动汇总。
|
||||
6. 不接真实飞书,不做规则管理前端,不做自动填写目标文件。
|
||||
7. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||
```
|
||||
242
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md
Normal file
242
docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md
Normal file
@@ -0,0 +1,242 @@
|
||||
# NMPA 注册资料法规核查与整改闭环开发计划(第二批:完整闭环补齐)
|
||||
|
||||
## 一、第二批目标
|
||||
|
||||
第二批在第一批主链路通过后执行,补齐完整整改闭环和交互能力:
|
||||
|
||||
```text
|
||||
适用条件对话选择框
|
||||
-> waiting_user 暂停恢复
|
||||
-> 整包复核
|
||||
-> 缺失项复核
|
||||
-> mock 通知留痕
|
||||
-> 更完整的过程产物
|
||||
-> 更强的前端交互和验收测试
|
||||
```
|
||||
|
||||
飞书真实 CLI/API、规则管理前端、自动填写目标文件不在第二批落地,进入 `docs/6.待办计划/第二阶段暂缓事项.md`。
|
||||
|
||||
---
|
||||
|
||||
## 二、阶段总览
|
||||
|
||||
| 阶段 | 名称 | 目标 | 验收 |
|
||||
| --- | --- | --- | --- |
|
||||
| RR2-1 | 适用条件确认 | 对话选择框确认产品类别、注册类型、临床评价路径等 | waiting_user 可暂停恢复 |
|
||||
| RR2-2 | 核查能力增强 | 扩展章节、一致性、RAG 引用和文本抽取范围 | 复杂样例可识别更多问题 |
|
||||
| RR2-3 | 整包复核 | 基于新的汇总批次创建新的法规核查批次 | 可追溯来源批次 |
|
||||
| RR2-4 | 缺失项复核 | 针对原 Issue 执行复核并更新状态 | 生成 review_record |
|
||||
| RR2-5 | mock 通知留痕 | 对 blocking/high/medium 写 mock 通知记录 | 报告展示通知记录 |
|
||||
| RR2-6 | 前端和总体验收 | 条件选择框、复核入口、通知/复核记录展示 | 全量测试通过 |
|
||||
|
||||
---
|
||||
|
||||
## 三、RR2-1 适用条件确认
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-1-001 | 实现适用条件候选识别 | `services/info_extract.py` |
|
||||
| RR2-1-002 | 工作流支持 `waiting_user` 暂停 | `regulatory_review/workflow.py` |
|
||||
| RR2-1-003 | 实现条件确认接口 | `regulatory_review/views.py` |
|
||||
| RR2-1-004 | 实现对话选择框 UI | `templates/home.html`、`static/js/app.js` |
|
||||
| RR2-1-005 | 确认后从 `rule_scope` 或下一节点恢复 | `workflow.py` |
|
||||
| RR2-1-006 | 增加测试 | `tests/test_regulatory_condition.py`、`tests/test_regulatory_frontend.py` |
|
||||
|
||||
### 确认字段
|
||||
|
||||
以下选项来自既有第二阶段功能/详细设计:`RegulatoryInfoExtract` 输出产品类别、注册类型、临床评价路径,功能设计中明确注册类型包括“首次注册、变更注册、延续注册等”,临床评价路径包括“临床试验、免临床、同品种比对等”。因此 Demo 版按下表实现。
|
||||
|
||||
| 字段 | 交互 |
|
||||
| --- | --- |
|
||||
| 产品类别 | 体外诊断试剂 / 医疗器械 / 其他 |
|
||||
| 注册类型 | 首次注册 / 变更注册 / 延续注册 |
|
||||
| 临床评价路径 | 临床试验 / 免临床 / 同品种比对 / 待确认 |
|
||||
| 产品名称 | 文本输入 |
|
||||
| 型号规格 | 文本输入 |
|
||||
| 预期用途 | 文本输入 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_condition.py tests/test_regulatory_frontend.py tests/test_regulatory_workflow.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现法规适用条件候选识别、waiting_user 暂停恢复和对话选择框确认。用户确认前工作流不得继续执行规则裁剪。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 四、RR2-2 核查能力增强
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-2-001 | 扩展 YAML 规则中的必需章节和一致性字段 | `rules/nmpa_ivd_registration_v1.yaml` |
|
||||
| RR2-2-002 | 增强文本抽取,缓存章节候选和字段候选 | `services/text_extract.py` |
|
||||
| RR2-2-003 | 增强章节核查,支持别名、近似标题和证据片段 | `services/structure_check.py` |
|
||||
| RR2-2-004 | 增强一致性核查,支持多个来源值和低置信度提示项 | `services/consistency_check.py` |
|
||||
| RR2-2-005 | RAG 引用写入 `rag_result_json` 过程产物 | `services/rag_citation.py`、`storage.py` |
|
||||
| RR2-2-006 | 增加测试 | `tests/test_regulatory_structure.py`、`tests/test_regulatory_consistency.py`、`tests/test_regulatory_rag.py` |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_structure.py tests/test_regulatory_consistency.py tests/test_regulatory_rag.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请增强章节核查、一致性核查和 RAG 过程产物。证据必须包含文件路径、命中片段、字段名或规则 ID,便于人工复核。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 五、RR2-3 整包复核
|
||||
|
||||
### 口径
|
||||
|
||||
整包复核不是修改原法规核查批次,而是基于新的成功 `FileSummaryBatch` 创建新的 `RegulatoryReviewBatch`。新批次记录来源批次信息,用于报告中展示“复核来源”。
|
||||
|
||||
复核入口不新增独立页面。前端通过法规核查工作流卡片展示复核入口,用户点击后由 AI 在对话区发起确认与引导。
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-3-001 | 新增整包复核启动接口 | `regulatory_review/views.py` |
|
||||
| RR2-3-002 | 支持指定新的 `file_summary_batch_id` | `workflow.py` |
|
||||
| RR2-3-003 | 记录 source/regenerated_from 信息 | `RegulatoryReviewBatch.condition_json` 或独立字段 |
|
||||
| RR2-3-004 | 报告展示整包复核来源 | `services/export.py` |
|
||||
| RR2-3-005 | 增加测试 | `tests/test_regulatory_rectification.py` |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_rectification.py tests/test_regulatory_workflow.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现整包复核:用户完成新的文件汇总后,可基于新 FileSummaryBatch 创建新的 RegulatoryReviewBatch,并在报告中追溯原核查批次。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 六、RR2-4 缺失项复核
|
||||
|
||||
### 口径
|
||||
|
||||
缺失项复核针对原 `RegulatoryIssue` 更新状态,不新建完整法规核查批次。系统可读取补充文件对应的新 `FileSummaryBatch`,只对指定问题重新匹配相关规则。
|
||||
|
||||
缺失项复核同样不新增独立页面。卡片只展示入口和状态,具体确认动作通过 AI 对话完成,例如确认复核哪些问题、使用哪个补充文件汇总批次。
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-4-001 | 实现缺失项复核服务 | `services/rectification_review.py` |
|
||||
| RR2-4-002 | 支持 issue_ids + file_summary_batch_id 输入 | `views.py` |
|
||||
| RR2-4-003 | 复核通过更新 `review_passed`,不通过更新 `review_failed` | `services/rectification_review.py` |
|
||||
| RR2-4-004 | 生成 `review_record` 过程产物 | `storage.py` |
|
||||
| RR2-4-005 | 报告展示复核记录 | `services/export.py` |
|
||||
| RR2-4-006 | 增加测试 | `tests/test_regulatory_rectification.py` |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_rectification.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现缺失项复核。复核不重新跑完整法规核查工作流,只针对指定 RegulatoryIssue 和补充文件汇总批次更新问题状态,并生成 review_record 产物。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 七、RR2-5 mock 通知留痕
|
||||
|
||||
### 口径
|
||||
|
||||
真实飞书暂缓。第二批只在 blocking/high/medium 风险项出现时创建 `RegulatoryNotificationRecord(channel=mock)`,用于报告留痕和第三阶段接入。
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-5-001 | 实现 mock notifier | `services/feishu_notifier.py` |
|
||||
| RR2-5-002 | 风险等级 blocking/high/medium 写通知记录 | `workflow.py` |
|
||||
| RR2-5-003 | 通知记录进入 Markdown/Excel/JSON 报告 | `services/export.py` |
|
||||
| RR2-5-004 | 增加测试 | `tests/test_regulatory_notification.py` |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
pytest tests/test_regulatory_notification.py tests/test_regulatory_export.py
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请实现 mock 通知留痕。不要接真实飞书 CLI/API;只为阻断项、高风险、中风险写 RegulatoryNotificationRecord,并在报告中展示。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 八、RR2-6 前端和总体验收
|
||||
|
||||
### 任务
|
||||
|
||||
| 编号 | 内容 | 文件 |
|
||||
| --- | --- | --- |
|
||||
| RR2-6-001 | 前端显示条件确认卡片 | `templates/home.html`、`static/js/app.js` |
|
||||
| RR2-6-002 | 前端通过工作流卡片展示整包复核入口,并由 AI 对话确认 | `static/js/app.js` |
|
||||
| RR2-6-003 | 前端通过工作流卡片展示缺失项复核入口,并由 AI 对话确认 | `static/js/app.js` |
|
||||
| RR2-6-004 | 卡片展示通知和复核摘要 | `templates/home.html`、`static/js/app.js` |
|
||||
| RR2-6-005 | 补 Playwright 或前端测试 | `tests/test_regulatory_frontend.py` |
|
||||
| RR2-6-006 | 全量回归 | 全项目 |
|
||||
|
||||
### 验证命令
|
||||
|
||||
```bash
|
||||
python manage.py check
|
||||
pytest
|
||||
```
|
||||
|
||||
### Codex 执行提示
|
||||
|
||||
```text
|
||||
请完善法规核查前端交互,包含条件选择框、卡片式整包复核入口、卡片式缺失项复核入口、AI 对话确认、mock 通知和复核记录展示。不要新增独立复核页面。最后运行 python manage.py check 和 pytest 全量验收。
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 九、第二批 Codex 目标模式提示词
|
||||
|
||||
```text
|
||||
请按 docs/5.开发计划/2.NMPA注册资料法规核查与整改闭环-第二批完整闭环.md 执行第二阶段第二批开发。
|
||||
|
||||
前提:
|
||||
第一批主链路已经完成并通过全量测试。
|
||||
|
||||
目标:
|
||||
补齐法规核查完整整改闭环,包括适用条件对话选择框、waiting_user 暂停恢复、整包复核、缺失项复核、mock 通知留痕、增强章节/一致性核查和前端交互。
|
||||
|
||||
执行规则:
|
||||
1. 从第一批完成后的稳定分支创建 codex/YYYYMMDD-NMPA法规核查完整闭环 分支。
|
||||
2. 按 RR2-1 到 RR2-6 顺序执行。
|
||||
3. 每阶段完成后运行对应验证命令。
|
||||
4. 不接真实飞书 CLI/API。
|
||||
5. 不做规则管理前端。
|
||||
6. 不做自动填写目标文件。
|
||||
7. 最后运行 python manage.py check 和 pytest 全量验收。
|
||||
```
|
||||
51
docs/6.待办计划/第二阶段暂缓事项.md
Normal file
51
docs/6.待办计划/第二阶段暂缓事项.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# 第二阶段暂缓事项待办表
|
||||
|
||||
## 一、待办原则
|
||||
|
||||
以下事项不进入第二阶段第一批或第二批落地范围。完成 Demo 主任务后,再根据展示效果和剩余时间决定是否进入第三阶段。
|
||||
|
||||
---
|
||||
|
||||
## 二、第三阶段第一批建议事项
|
||||
|
||||
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| TODO-3-001 | 真实飞书 CLI/API 接入 | 第二阶段通知能力 | P0 | 替换第二阶段 mock 通知,支持真实发送 |
|
||||
| TODO-3-002 | 用户与飞书账号映射 | 第二阶段通知能力 | P0 | 维护 Django User 到飞书 open_id、手机号或邮箱的映射 |
|
||||
| TODO-3-003 | 飞书通知模板和失败重试完善 | 第二阶段通知能力 | P0 | 支持风险摘要、报告链接、重试、失败告警 |
|
||||
| TODO-3-004 | 飞书通知权限和脱敏策略 | 第二阶段通知能力 | P1 | 通知中不暴露完整敏感文件内容 |
|
||||
|
||||
---
|
||||
|
||||
## 三、规则管理后续事项
|
||||
|
||||
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| TODO-RULE-001 | 规则管理前端 | YAML + DB 规则版本 | P1 | 展示 YAML 与数据库 hash 差异,支持人工确认导入 |
|
||||
| TODO-RULE-002 | 规则导入审批流 | 合规追溯 | P1 | 规则版本变更需要审批和留痕 |
|
||||
| TODO-RULE-003 | 规则/RAG 状态管理页 | RAG 运维 | P1 | 展示规则版本、YAML hash、Chroma 索引版本、索引状态和重建提示 |
|
||||
| TODO-RULE-004 | RAG 索引重建前端入口 | RAG 运维 | P1 | 前端触发或提示重建法规 RAG 索引 |
|
||||
| TODO-RULE-005 | 官网法规定期更新 | 原始需求法规来源 | P2 | 后续从 NMPA/CMDE 官网定期抓取或人工导入 |
|
||||
|
||||
---
|
||||
|
||||
## 四、原始需求 3 后续事项
|
||||
|
||||
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| TODO-FILL-001 | 产品关键信息抽取结果确认 | 原始需求 3 | P1 | 将第二阶段抽取字段转成可人工确认的信息表 |
|
||||
| TODO-FILL-002 | 自动填写目标文件 | 原始需求 3 | P1 | 将确认后的字段写入注册申报表格或对照清单 |
|
||||
| TODO-FILL-003 | 填写前后差异报告 | 自动填写风控 | P1 | 输出写入前后 diff,供人工复核 |
|
||||
| TODO-FILL-004 | 自动填写审批确认 | 自动填写风控 | P1 | 文件写操作前必须人工确认 |
|
||||
|
||||
---
|
||||
|
||||
## 五、其他增强事项
|
||||
|
||||
| 编号 | 待办项 | 来源 | 建议优先级 | 说明 |
|
||||
| --- | --- | --- | --- | --- |
|
||||
| TODO-EXT-001 | 无汇总批次时自动串联文件汇总 | 第二阶段启动方式 | P2 | 当前口径为提示用户先自动汇总,暂不自动串联 |
|
||||
| TODO-EXT-002 | 文件夹上传增强 | 第一阶段边界 | P2 | 浏览器 `webkitdirectory` 或目录上传能力 |
|
||||
| TODO-EXT-003 | Office 精确分页 | 第一阶段边界 | P2 | 引入 LibreOffice headless 转 PDF 后统计页数 |
|
||||
| TODO-EXT-004 | OCR 文本抽取 | 章节/一致性核查增强 | P2 | 支持扫描件和图片型 PDF |
|
||||
| TODO-EXT-005 | 独立 Chroma Server 部署 | RAG 运维增强 | P2 | 当前第二阶段使用本地持久化 ChromaDB,后续可演进为独立服务 |
|
||||
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@@ -0,0 +1,3 @@
|
||||
[pytest]
|
||||
DJANGO_SETTINGS_MODULE = config.settings
|
||||
python_files = tests.py test_*.py *_tests.py
|
||||
@@ -1 +1,9 @@
|
||||
Django>=5.0,<6.0
|
||||
pypdf>=5.0
|
||||
python-docx>=1.1
|
||||
python-pptx>=1.0
|
||||
openpyxl>=3.1
|
||||
xlrd>=2.0
|
||||
olefile>=0.47
|
||||
py7zr>=0.21
|
||||
playwright>=1.60
|
||||
|
||||
1
review_agent/file_summary/__init__.py
Normal file
1
review_agent/file_summary/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
4
review_agent/file_summary/constants.py
Normal file
4
review_agent/file_summary/constants.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ATTACHMENT_ROOT = Path("file_summary") / "users"
|
||||
16
review_agent/file_summary/events.py
Normal file
16
review_agent/file_summary/events.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
||||
|
||||
|
||||
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
|
||||
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
|
||||
|
||||
|
||||
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
|
||||
return {
|
||||
"id": event.pk,
|
||||
"event_type": event.event_type,
|
||||
"payload": event.payload,
|
||||
"created_at": event.created_at.isoformat(),
|
||||
}
|
||||
12
review_agent/file_summary/paths.py
Normal file
12
review_agent/file_summary/paths.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
def resolve_storage_path(storage_path: str) -> Path:
|
||||
path = Path(storage_path)
|
||||
if path.is_absolute():
|
||||
return path
|
||||
return Path(settings.MEDIA_ROOT) / path
|
||||
1
review_agent/file_summary/services/__init__.py
Normal file
1
review_agent/file_summary/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
125
review_agent/file_summary/services/archive.py
Normal file
125
review_agent/file_summary/services/archive.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
|
||||
import py7zr
|
||||
|
||||
|
||||
ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"}
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.services.archive")
|
||||
|
||||
|
||||
def _ensure_inside_target(path: Path, target_dir: Path) -> None:
|
||||
target = target_dir.resolve()
|
||||
resolved = path.resolve()
|
||||
if target != resolved and target not in resolved.parents:
|
||||
raise ValueError("解压路径必须位于批次工作目录内。")
|
||||
|
||||
|
||||
def _safe_member_path(target_dir: Path, member_name: str) -> Path:
|
||||
destination = target_dir / member_name
|
||||
_ensure_inside_target(destination, target_dir)
|
||||
return destination
|
||||
|
||||
|
||||
def extract_archive(archive_path: str | Path, target_dir: str | Path) -> list[Path]:
|
||||
archive_path = Path(archive_path)
|
||||
target_dir = Path(target_dir)
|
||||
target_dir.mkdir(parents=True, exist_ok=True)
|
||||
ext = archive_path.suffix.lower().lstrip(".")
|
||||
if ext not in ARCHIVE_EXTENSIONS:
|
||||
return []
|
||||
|
||||
if ext == "zip":
|
||||
return _extract_zip(archive_path, target_dir)
|
||||
if ext == "7z":
|
||||
return _extract_7z(archive_path, target_dir)
|
||||
return _extract_rar(archive_path, target_dir)
|
||||
|
||||
|
||||
def _extract_zip(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||
extracted: list[Path] = []
|
||||
with ZipFile(archive_path) as archive:
|
||||
for member in archive.infolist():
|
||||
destination = _safe_member_path(target_dir, member.filename)
|
||||
if member.is_dir():
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
continue
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
with archive.open(member) as source, destination.open("wb") as target:
|
||||
target.write(source.read())
|
||||
extracted.append(destination)
|
||||
return extracted
|
||||
|
||||
|
||||
def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||
with py7zr.SevenZipFile(archive_path, mode="r") as archive:
|
||||
names = archive.getnames()
|
||||
for name in names:
|
||||
_safe_member_path(target_dir, name)
|
||||
archive.extractall(path=target_dir)
|
||||
return [target_dir / name for name in names if (target_dir / name).is_file()]
|
||||
|
||||
|
||||
def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||
try:
|
||||
extracted = _extract_rar_with_libarchive(archive_path, target_dir)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"RAR libarchive extract failed, falling back to 7z",
|
||||
extra={"archive_path": str(archive_path), "target_dir": str(target_dir), "error": str(exc)},
|
||||
)
|
||||
else:
|
||||
if extracted:
|
||||
return extracted
|
||||
logger.info(
|
||||
"RAR libarchive extract produced no files, falling back to 7z",
|
||||
extra={"archive_path": str(archive_path), "target_dir": str(target_dir)},
|
||||
)
|
||||
return _extract_rar_with_7z(archive_path, target_dir)
|
||||
|
||||
|
||||
def _extract_rar_with_libarchive(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||
try:
|
||||
import libarchive
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("未安装 libarchive,跳过 Python RAR 解压。") from exc
|
||||
|
||||
extracted: list[Path] = []
|
||||
with libarchive.file_reader(str(archive_path)) as entries:
|
||||
for entry in entries:
|
||||
destination = _safe_member_path(target_dir, entry.pathname)
|
||||
if entry.isdir:
|
||||
destination.mkdir(parents=True, exist_ok=True)
|
||||
continue
|
||||
if not entry.isfile:
|
||||
logger.info(
|
||||
"RAR libarchive skipped non-regular entry",
|
||||
extra={"archive_path": str(archive_path), "entry": entry.pathname},
|
||||
)
|
||||
continue
|
||||
destination.parent.mkdir(parents=True, exist_ok=True)
|
||||
with destination.open("wb") as target:
|
||||
for block in entry.get_blocks():
|
||||
target.write(block)
|
||||
extracted.append(destination)
|
||||
return extracted
|
||||
|
||||
|
||||
def _extract_rar_with_7z(archive_path: Path, target_dir: Path) -> list[Path]:
|
||||
result = subprocess.run(
|
||||
["7z", "x", f"-o{target_dir}", str(archive_path), "-y"],
|
||||
check=False,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(result.stderr or result.stdout or "rar 解压失败")
|
||||
extracted = [path for path in target_dir.rglob("*") if path.is_file()]
|
||||
for path in extracted:
|
||||
_ensure_inside_target(path, target_dir)
|
||||
return extracted
|
||||
219
review_agent/file_summary/services/attachment_reader.py
Normal file
219
review_agent/file_summary/services/attachment_reader.py
Normal file
@@ -0,0 +1,219 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import FileAttachment
|
||||
|
||||
|
||||
TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "log"}
|
||||
SUPPORTED_EXTENSIONS = TEXT_EXTENSIONS | {"pdf", "docx", "xlsx", "pptx"}
|
||||
MAX_PREVIEW_CHARS = 3000
|
||||
MAX_ROWS_PER_SHEET = 20
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.attachment_reader")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AttachmentReadResult:
|
||||
status: str
|
||||
filename: str
|
||||
file_type: str
|
||||
file_size: int
|
||||
preview_text: str = ""
|
||||
sections: list[dict[str, object]] = field(default_factory=list)
|
||||
error_message: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, object]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def read_attachment_details(attachment: FileAttachment) -> AttachmentReadResult:
|
||||
file_path = _attachment_absolute_path(attachment)
|
||||
file_type = Path(attachment.original_name).suffix.lower().lstrip(".")
|
||||
logger.info(
|
||||
"Attachment read started",
|
||||
extra={
|
||||
"attachment_id": attachment.pk,
|
||||
"conversation_id": attachment.conversation_id,
|
||||
"original_name": attachment.original_name,
|
||||
"file_type": file_type,
|
||||
"storage_path": attachment.storage_path,
|
||||
"resolved_path": str(file_path),
|
||||
},
|
||||
)
|
||||
|
||||
if not file_path.exists():
|
||||
logger.warning(
|
||||
"Attachment read missing file",
|
||||
extra={"attachment_id": attachment.pk, "resolved_path": str(file_path)},
|
||||
)
|
||||
return _failed(attachment, file_type, "附件文件不存在。")
|
||||
if file_type not in SUPPORTED_EXTENSIONS:
|
||||
logger.warning(
|
||||
"Attachment read unsupported type",
|
||||
extra={"attachment_id": attachment.pk, "file_type": file_type},
|
||||
)
|
||||
return _failed(attachment, file_type, f"暂不支持解析 .{file_type or 'unknown'} 文件。", "unsupported")
|
||||
|
||||
try:
|
||||
if file_type == "pdf":
|
||||
sections = _read_pdf(file_path)
|
||||
elif file_type == "docx":
|
||||
sections = _read_docx(file_path)
|
||||
elif file_type == "xlsx":
|
||||
sections = _read_xlsx(file_path)
|
||||
elif file_type == "pptx":
|
||||
sections = _read_pptx(file_path)
|
||||
elif file_type == "csv":
|
||||
sections = _read_csv(file_path)
|
||||
else:
|
||||
sections = _read_text(file_path)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"Attachment read failed",
|
||||
extra={"attachment_id": attachment.pk, "file_type": file_type, "error": str(exc)},
|
||||
)
|
||||
return _failed(attachment, file_type, str(exc))
|
||||
|
||||
preview = _build_preview(sections)
|
||||
logger.info(
|
||||
"Attachment read finished",
|
||||
extra={
|
||||
"attachment_id": attachment.pk,
|
||||
"section_count": len(sections),
|
||||
"preview_length": len(preview),
|
||||
},
|
||||
)
|
||||
return AttachmentReadResult(
|
||||
status="success",
|
||||
filename=attachment.original_name,
|
||||
file_type=file_type,
|
||||
file_size=attachment.file_size,
|
||||
preview_text=preview[:MAX_PREVIEW_CHARS],
|
||||
sections=sections,
|
||||
)
|
||||
|
||||
|
||||
def _attachment_absolute_path(attachment: FileAttachment) -> Path:
|
||||
path = Path(attachment.storage_path)
|
||||
if path.is_absolute():
|
||||
return path
|
||||
return Path(settings.MEDIA_ROOT) / path
|
||||
|
||||
|
||||
def _failed(
|
||||
attachment: FileAttachment,
|
||||
file_type: str,
|
||||
message: str,
|
||||
status: str = "failed",
|
||||
) -> AttachmentReadResult:
|
||||
return AttachmentReadResult(
|
||||
status=status,
|
||||
filename=attachment.original_name,
|
||||
file_type=file_type,
|
||||
file_size=attachment.file_size,
|
||||
error_message=message,
|
||||
)
|
||||
|
||||
|
||||
def _read_text(path: Path) -> list[dict[str, object]]:
|
||||
text = path.read_text(encoding="utf-8", errors="replace")
|
||||
return [{"type": "text", "name": path.name, "text": text[:MAX_PREVIEW_CHARS]}]
|
||||
|
||||
|
||||
def _read_csv(path: Path) -> list[dict[str, object]]:
|
||||
with path.open("r", encoding="utf-8-sig", errors="replace", newline="") as handle:
|
||||
rows = [[str(cell) for cell in row] for row in csv.reader(handle)]
|
||||
return [
|
||||
{
|
||||
"type": "table",
|
||||
"name": path.name,
|
||||
"row_count": len(rows),
|
||||
"rows": rows[:MAX_ROWS_PER_SHEET],
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def _read_pdf(path: Path) -> list[dict[str, object]]:
|
||||
from pypdf import PdfReader
|
||||
|
||||
reader = PdfReader(str(path))
|
||||
pages = []
|
||||
for index, page in enumerate(reader.pages, start=1):
|
||||
text = page.extract_text() or ""
|
||||
pages.append({"type": "page", "name": f"第 {index} 页", "text": text})
|
||||
return pages
|
||||
|
||||
|
||||
def _read_docx(path: Path) -> list[dict[str, object]]:
|
||||
from docx import Document
|
||||
|
||||
document = Document(str(path))
|
||||
paragraphs = [item.text.strip() for item in document.paragraphs if item.text.strip()]
|
||||
sections: list[dict[str, object]] = [
|
||||
{"type": "text", "name": "正文", "text": "\n".join(paragraphs)}
|
||||
]
|
||||
for index, table in enumerate(document.tables, start=1):
|
||||
rows = [[cell.text.strip() for cell in row.cells] for row in table.rows]
|
||||
sections.append(
|
||||
{
|
||||
"type": "table",
|
||||
"name": f"表格 {index}",
|
||||
"row_count": len(rows),
|
||||
"rows": rows[:MAX_ROWS_PER_SHEET],
|
||||
}
|
||||
)
|
||||
return sections
|
||||
|
||||
|
||||
def _read_xlsx(path: Path) -> list[dict[str, object]]:
|
||||
from openpyxl import load_workbook
|
||||
|
||||
workbook = load_workbook(str(path), read_only=True, data_only=True)
|
||||
sections = []
|
||||
for sheet in workbook.worksheets:
|
||||
rows = []
|
||||
for row in sheet.iter_rows(max_row=MAX_ROWS_PER_SHEET, values_only=True):
|
||||
rows.append(["" if cell is None else str(cell) for cell in row])
|
||||
sections.append(
|
||||
{
|
||||
"type": "sheet",
|
||||
"name": sheet.title,
|
||||
"row_count": sheet.max_row,
|
||||
"column_count": sheet.max_column,
|
||||
"rows": rows,
|
||||
}
|
||||
)
|
||||
workbook.close()
|
||||
return sections
|
||||
|
||||
|
||||
def _read_pptx(path: Path) -> list[dict[str, object]]:
|
||||
from pptx import Presentation
|
||||
|
||||
presentation = Presentation(str(path))
|
||||
sections = []
|
||||
for index, slide in enumerate(presentation.slides, start=1):
|
||||
texts = []
|
||||
for shape in slide.shapes:
|
||||
if hasattr(shape, "text") and shape.text.strip():
|
||||
texts.append(shape.text.strip())
|
||||
sections.append({"type": "slide", "name": f"幻灯片 {index}", "text": "\n".join(texts)})
|
||||
return sections
|
||||
|
||||
|
||||
def _build_preview(sections: list[dict[str, object]]) -> str:
|
||||
parts: list[str] = []
|
||||
for section in sections:
|
||||
if "text" in section and section["text"]:
|
||||
parts.append(str(section["text"]))
|
||||
rows = section.get("rows")
|
||||
if rows:
|
||||
parts.extend(" | ".join(str(cell) for cell in row) for row in rows[:5])
|
||||
return "\n".join(part for part in parts if part).strip()
|
||||
65
review_agent/file_summary/services/export_excel.py
Normal file
65
review_agent/file_summary/services/export_excel.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
from openpyxl import Workbook
|
||||
|
||||
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.export_excel")
|
||||
|
||||
|
||||
def _exports_dir(batch: FileSummaryBatch) -> Path:
|
||||
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "file_summary" / batch.batch_no
|
||||
export_dir = root / "exports"
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
return export_dir
|
||||
|
||||
|
||||
def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
|
||||
logger.info("Excel export generation started", extra={"batch_id": batch.pk})
|
||||
workbook = Workbook()
|
||||
summary = workbook.active
|
||||
summary.title = "汇总信息"
|
||||
summary.append(["批次号", batch.batch_no])
|
||||
summary.append(["产品名称", batch.product_name or "-"])
|
||||
summary.append(["文件总数", batch.total_files])
|
||||
summary.append(["统计成功", batch.success_files])
|
||||
summary.append(["统计失败", batch.failed_files])
|
||||
summary.append(["不支持", batch.unsupported_files])
|
||||
summary.append(["不确定", batch.uncertain_files])
|
||||
summary.append(["总页数", batch.total_pages])
|
||||
|
||||
detail = workbook.create_sheet("文件明细")
|
||||
detail.append(["序号", "目录层级", "文件名", "类型", "页数", "路径", "状态", "重试次数", "异常说明"])
|
||||
for item in batch.items.order_by("file_index"):
|
||||
detail.append(
|
||||
[
|
||||
item.file_index,
|
||||
item.directory_level,
|
||||
item.file_name,
|
||||
item.file_type,
|
||||
item.page_count,
|
||||
item.relative_path,
|
||||
item.statistics_status,
|
||||
item.retry_count,
|
||||
item.error_message,
|
||||
]
|
||||
)
|
||||
|
||||
path = _exports_dir(batch) / f"{batch.batch_no}-summary.xlsx"
|
||||
workbook.save(path)
|
||||
exported = ExportedSummaryFile.objects.create(
|
||||
batch=batch,
|
||||
export_type=ExportedSummaryFile.ExportType.EXCEL,
|
||||
file_name=path.name,
|
||||
storage_path=str(path),
|
||||
)
|
||||
logger.info(
|
||||
"Excel export generation finished",
|
||||
extra={"batch_id": batch.pk, "export_id": exported.pk, "path": str(path)},
|
||||
)
|
||||
return exported
|
||||
49
review_agent/file_summary/services/inventory.py
Normal file
49
review_agent/file_summary/services/inventory.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from review_agent.models import FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
|
||||
|
||||
|
||||
def _directory_level(relative_path: Path) -> str:
|
||||
if len(relative_path.parts) <= 1:
|
||||
return ""
|
||||
return "/".join(relative_path.parts[:-1])
|
||||
|
||||
|
||||
def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]:
|
||||
files: list[tuple[Path, Path]] = []
|
||||
for root in roots:
|
||||
root = Path(root)
|
||||
if root.is_file():
|
||||
files.append((root.parent, root))
|
||||
continue
|
||||
for path in sorted(item for item in root.rglob("*") if item.is_file()):
|
||||
if path.name.startswith(".") or path.stat().st_size == 0:
|
||||
continue
|
||||
files.append((root, path))
|
||||
|
||||
created: list[FileSummaryItem] = []
|
||||
for index, (root, path) in enumerate(files, start=1):
|
||||
relative = path.relative_to(root).as_posix()
|
||||
file_type = path.suffix.lower().lstrip(".")
|
||||
item = FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=index,
|
||||
directory_level=_directory_level(Path(relative)),
|
||||
file_name=path.name,
|
||||
file_type=file_type,
|
||||
relative_path=relative,
|
||||
storage_path=str(path),
|
||||
statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED,
|
||||
)
|
||||
created.append(item)
|
||||
|
||||
batch.total_files = len(created)
|
||||
batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS)
|
||||
batch.unsupported_files = len(created) - batch.supported_files
|
||||
batch.save(update_fields=["total_files", "supported_files", "unsupported_files"])
|
||||
return created
|
||||
282
review_agent/file_summary/services/page_count.py
Normal file
282
review_agent/file_summary/services/page_count.py
Normal file
@@ -0,0 +1,282 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from xml.etree import ElementTree
|
||||
from zipfile import ZipFile, is_zipfile
|
||||
|
||||
|
||||
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
|
||||
logger = logging.getLogger("review_agent.file_summary.page_count")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PageCountResult:
|
||||
status: str
|
||||
page_count: int | None = None
|
||||
error_message: str = ""
|
||||
|
||||
|
||||
def count_document_pages(path: str | Path) -> PageCountResult:
|
||||
file_path = Path(path)
|
||||
ext = file_path.suffix.lower().lstrip(".")
|
||||
if ext not in SUPPORTED_EXTENSIONS:
|
||||
return PageCountResult(status="unsupported")
|
||||
|
||||
try:
|
||||
if ext == "pdf":
|
||||
from pypdf import PdfReader
|
||||
|
||||
return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages))
|
||||
if ext == "docx":
|
||||
pages = _count_docx_pages_from_extended_properties(file_path)
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
pages = _count_word_pages_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return PageCountResult(status="uncertain")
|
||||
if ext == "xlsx":
|
||||
pages = _count_xlsx_sheets(file_path) or (
|
||||
_count_excel_sheets_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
)
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return PageCountResult(status="uncertain")
|
||||
if ext == "xls":
|
||||
pages = _count_xls_sheets(file_path) or (
|
||||
_count_excel_sheets_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
)
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return PageCountResult(status="uncertain")
|
||||
if ext == "pptx":
|
||||
pages = _count_pptx_slides(file_path) or (
|
||||
_count_powerpoint_slides_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
)
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return PageCountResult(status="uncertain")
|
||||
if ext == "doc":
|
||||
pages = _count_word_pages_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return _ole_uncertain_or_failed(file_path)
|
||||
if ext == "ppt":
|
||||
pages = _count_powerpoint_slides_with_com(file_path) if _can_try_com_fallback(file_path, ext) else None
|
||||
if pages:
|
||||
return PageCountResult(status="success", page_count=pages)
|
||||
return _ole_uncertain_or_failed(file_path)
|
||||
except Exception as exc:
|
||||
return PageCountResult(status="failed", error_message=str(exc))
|
||||
|
||||
return PageCountResult(status="uncertain")
|
||||
|
||||
|
||||
def _count_docx_pages_from_extended_properties(path: Path) -> int | None:
|
||||
try:
|
||||
with ZipFile(path) as archive:
|
||||
app_entries = [
|
||||
item for item in archive.infolist() if item.filename == "docProps/app.xml"
|
||||
]
|
||||
if not app_entries:
|
||||
return None
|
||||
content = archive.read(app_entries[-1]).decode("utf-8", errors="replace")
|
||||
except Exception as exc:
|
||||
logger.warning("DOCX extended properties read failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
try:
|
||||
root = ElementTree.fromstring(content)
|
||||
except ElementTree.ParseError as exc:
|
||||
logger.warning("DOCX extended properties parse failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
pages_node = root.find("{http://schemas.openxmlformats.org/officeDocument/2006/extended-properties}Pages")
|
||||
if pages_node is None or not pages_node.text:
|
||||
return None
|
||||
return _positive_int(pages_node.text)
|
||||
|
||||
|
||||
def _count_xlsx_sheets(path: Path) -> int | None:
|
||||
try:
|
||||
from openpyxl import load_workbook
|
||||
|
||||
workbook = load_workbook(str(path), read_only=True, data_only=True)
|
||||
try:
|
||||
return _positive_int(len(workbook.sheetnames))
|
||||
finally:
|
||||
workbook.close()
|
||||
except Exception as exc:
|
||||
logger.warning("XLSX sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
|
||||
def _count_xls_sheets(path: Path) -> int | None:
|
||||
try:
|
||||
import xlrd
|
||||
|
||||
workbook = xlrd.open_workbook(str(path), on_demand=True)
|
||||
try:
|
||||
return _positive_int(workbook.nsheets)
|
||||
finally:
|
||||
workbook.release_resources()
|
||||
except Exception as exc:
|
||||
logger.warning("XLS sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
|
||||
def _count_pptx_slides(path: Path) -> int | None:
|
||||
try:
|
||||
from pptx import Presentation
|
||||
|
||||
return _positive_int(len(Presentation(str(path)).slides))
|
||||
except Exception as exc:
|
||||
logger.warning("PPTX slide count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
|
||||
def _ole_uncertain_or_failed(path: Path) -> PageCountResult:
|
||||
try:
|
||||
import olefile
|
||||
|
||||
if olefile.isOleFile(str(path)):
|
||||
return PageCountResult(status="uncertain")
|
||||
return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。")
|
||||
except Exception as exc:
|
||||
logger.warning("OLE validation failed", extra={"path": str(path), "error": str(exc)})
|
||||
return PageCountResult(status="uncertain")
|
||||
|
||||
|
||||
def _can_try_com_fallback(path: Path, ext: str) -> bool:
|
||||
if ext in {"docx", "xlsx", "pptx"}:
|
||||
return is_zipfile(path)
|
||||
if ext in {"doc", "xls", "ppt"}:
|
||||
try:
|
||||
import olefile
|
||||
|
||||
return olefile.isOleFile(str(path))
|
||||
except Exception as exc:
|
||||
logger.warning("OLE signature check failed", extra={"path": str(path), "error": str(exc)})
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
def _count_word_pages_with_com(path: Path) -> int | None:
|
||||
try:
|
||||
import pythoncom
|
||||
import win32com.client
|
||||
except Exception as exc:
|
||||
logger.info("Word COM page count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
word = None
|
||||
document = None
|
||||
pythoncom.CoInitialize()
|
||||
try:
|
||||
word = win32com.client.DispatchEx("Word.Application")
|
||||
word.Visible = False
|
||||
word.DisplayAlerts = 0
|
||||
document = word.Documents.Open(
|
||||
str(path.resolve()),
|
||||
ReadOnly=True,
|
||||
AddToRecentFiles=False,
|
||||
ConfirmConversions=False,
|
||||
)
|
||||
document.Repaginate()
|
||||
return _positive_int(document.ComputeStatistics(2))
|
||||
except Exception as exc:
|
||||
logger.warning("Word COM page count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if document is not None:
|
||||
document.Close(False)
|
||||
except Exception as exc:
|
||||
logger.debug("Word document close failed", extra={"path": str(path), "error": str(exc)})
|
||||
try:
|
||||
if word is not None:
|
||||
word.Quit()
|
||||
except Exception as exc:
|
||||
logger.debug("Word application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||
pythoncom.CoUninitialize()
|
||||
|
||||
|
||||
def _count_powerpoint_slides_with_com(path: Path) -> int | None:
|
||||
try:
|
||||
import pythoncom
|
||||
import win32com.client
|
||||
except Exception as exc:
|
||||
logger.info("PowerPoint COM slide count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
powerpoint = None
|
||||
presentation = None
|
||||
pythoncom.CoInitialize()
|
||||
try:
|
||||
powerpoint = win32com.client.DispatchEx("PowerPoint.Application")
|
||||
presentation = powerpoint.Presentations.Open(
|
||||
str(path.resolve()),
|
||||
ReadOnly=True,
|
||||
Untitled=False,
|
||||
WithWindow=False,
|
||||
)
|
||||
return _positive_int(presentation.Slides.Count)
|
||||
except Exception as exc:
|
||||
logger.warning("PowerPoint COM slide count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if presentation is not None:
|
||||
presentation.Close()
|
||||
except Exception as exc:
|
||||
logger.debug("PowerPoint presentation close failed", extra={"path": str(path), "error": str(exc)})
|
||||
try:
|
||||
if powerpoint is not None:
|
||||
powerpoint.Quit()
|
||||
except Exception as exc:
|
||||
logger.debug("PowerPoint application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||
pythoncom.CoUninitialize()
|
||||
|
||||
|
||||
def _count_excel_sheets_with_com(path: Path) -> int | None:
|
||||
try:
|
||||
import pythoncom
|
||||
import win32com.client
|
||||
except Exception as exc:
|
||||
logger.info("Excel COM sheet count unavailable", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
|
||||
excel = None
|
||||
workbook = None
|
||||
pythoncom.CoInitialize()
|
||||
try:
|
||||
excel = win32com.client.DispatchEx("Excel.Application")
|
||||
excel.Visible = False
|
||||
excel.DisplayAlerts = False
|
||||
workbook = excel.Workbooks.Open(str(path.resolve()), ReadOnly=True)
|
||||
return _positive_int(workbook.Worksheets.Count)
|
||||
except Exception as exc:
|
||||
logger.warning("Excel COM sheet count failed", extra={"path": str(path), "error": str(exc)})
|
||||
return None
|
||||
finally:
|
||||
try:
|
||||
if workbook is not None:
|
||||
workbook.Close(False)
|
||||
except Exception as exc:
|
||||
logger.debug("Excel workbook close failed", extra={"path": str(path), "error": str(exc)})
|
||||
try:
|
||||
if excel is not None:
|
||||
excel.Quit()
|
||||
except Exception as exc:
|
||||
logger.debug("Excel application quit failed", extra={"path": str(path), "error": str(exc)})
|
||||
pythoncom.CoUninitialize()
|
||||
|
||||
|
||||
def _positive_int(value) -> int | None:
|
||||
try:
|
||||
number = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return number if number > 0 else None
|
||||
31
review_agent/file_summary/services/product_detect.py
Normal file
31
review_agent/file_summary/services/product_detect.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from review_agent.models import FileSummaryBatch
|
||||
|
||||
|
||||
def detect_product_name(batch: FileSummaryBatch) -> str:
|
||||
product_name = ""
|
||||
for item in batch.items.order_by("file_index"):
|
||||
parts = Path(item.relative_path).parts
|
||||
if len(parts) > 1:
|
||||
product_name = parts[0]
|
||||
break
|
||||
name = Path(item.file_name).stem
|
||||
for keyword in ("产品", "试剂盒", "说明书"):
|
||||
if keyword in name:
|
||||
product_name = name
|
||||
break
|
||||
if product_name:
|
||||
break
|
||||
|
||||
if not product_name:
|
||||
return ""
|
||||
|
||||
batch.product_name = product_name
|
||||
batch.save(update_fields=["product_name"])
|
||||
if batch.conversation.title.startswith("新对话"):
|
||||
batch.conversation.title = f"{product_name}-文件汇总"
|
||||
batch.conversation.save(update_fields=["title", "updated_at"])
|
||||
return product_name
|
||||
76
review_agent/file_summary/services/report.py
Normal file
76
review_agent/file_summary/services/report.py
Normal file
@@ -0,0 +1,76 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.report")
|
||||
|
||||
|
||||
def _exports_dir(batch: FileSummaryBatch) -> Path:
|
||||
root = Path(batch.work_dir) if batch.work_dir else Path(settings.MEDIA_ROOT) / "file_summary" / batch.batch_no
|
||||
export_dir = root / "exports"
|
||||
export_dir.mkdir(parents=True, exist_ok=True)
|
||||
return export_dir
|
||||
|
||||
|
||||
def build_summary_table(batch: FileSummaryBatch) -> str:
|
||||
lines = [
|
||||
"| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |",
|
||||
"| --- | --- | --- | --- | --- | --- | --- |",
|
||||
]
|
||||
for item in batch.items.order_by("file_index"):
|
||||
lines.append(
|
||||
"| {index} | {directory} | {name} | {file_type} | {pages} | {status} | {error} |".format(
|
||||
index=item.file_index,
|
||||
directory=item.directory_level or "-",
|
||||
name=item.file_name,
|
||||
file_type=item.file_type,
|
||||
pages=item.page_count if item.page_count is not None else "-",
|
||||
status=item.statistics_status,
|
||||
error=item.error_message or "-",
|
||||
)
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def build_markdown_report(batch: FileSummaryBatch) -> str:
|
||||
return "\n\n".join(
|
||||
[
|
||||
f"# 文件目录与页数汇总报告\n\n批次号:{batch.batch_no}",
|
||||
(
|
||||
"## 汇总信息\n\n"
|
||||
f"- 产品名称:{batch.product_name or '-'}\n"
|
||||
f"- 文件总数:{batch.total_files}\n"
|
||||
f"- 统计成功:{batch.success_files}\n"
|
||||
f"- 统计失败:{batch.failed_files}\n"
|
||||
f"- 不支持:{batch.unsupported_files}\n"
|
||||
f"- 不确定:{batch.uncertain_files}\n"
|
||||
f"- 总页数:{batch.total_pages}"
|
||||
),
|
||||
"## 文件明细\n\n" + build_summary_table(batch),
|
||||
"## 处理说明\n\n单文件失败不会阻断批次,失败与不确定文件已在明细中标注。",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFile, str]:
|
||||
logger.info("Markdown report generation started", extra={"batch_id": batch.pk})
|
||||
content = build_markdown_report(batch)
|
||||
path = _exports_dir(batch) / f"{batch.batch_no}-summary.md"
|
||||
path.write_text(content, encoding="utf-8")
|
||||
exported = ExportedSummaryFile.objects.create(
|
||||
batch=batch,
|
||||
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
||||
file_name=path.name,
|
||||
storage_path=str(path),
|
||||
)
|
||||
logger.info(
|
||||
"Markdown report generation finished",
|
||||
extra={"batch_id": batch.pk, "export_id": exported.pk, "path": str(path)},
|
||||
)
|
||||
return exported, build_summary_table(batch)
|
||||
1
review_agent/file_summary/skills/__init__.py
Normal file
1
review_agent/file_summary/skills/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
69
review_agent/file_summary/skills/archive_extract.py
Normal file
69
review_agent/file_summary/skills/archive_extract.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
from review_agent.models import FileSummaryBatchAttachment
|
||||
|
||||
from ..paths import resolve_storage_path
|
||||
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.archive_extract")
|
||||
|
||||
|
||||
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
|
||||
stem = Path(binding.attachment.original_name).stem or "archive"
|
||||
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
|
||||
return f"{binding.attachment_id}_{safe_stem}"
|
||||
|
||||
|
||||
class ArchiveExtractSkill(BaseSkill):
|
||||
name = "archive_extract"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
extracted_count = 0
|
||||
if not context.batch.work_dir:
|
||||
message = "批次工作目录为空,无法解压压缩包。"
|
||||
logger.error(
|
||||
"Archive extract failed without work dir",
|
||||
extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no},
|
||||
)
|
||||
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
||||
target_root = Path(context.batch.work_dir)
|
||||
|
||||
archive_count = 0
|
||||
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
||||
path = resolve_storage_path(binding.attachment.storage_path)
|
||||
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
|
||||
continue
|
||||
archive_count += 1
|
||||
target_dir = target_root / "extracted" / _safe_archive_dir_name(binding)
|
||||
logger.info(
|
||||
"Archive extract started",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"attachment_id": binding.attachment_id,
|
||||
"path": str(path),
|
||||
"target_dir": str(target_dir),
|
||||
},
|
||||
)
|
||||
extracted_count += len(extract_archive(path, target_dir))
|
||||
if archive_count and extracted_count == 0:
|
||||
message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。"
|
||||
logger.warning(
|
||||
"Archive extract produced no files",
|
||||
extra={"batch_id": context.batch.pk, "archive_count": archive_count},
|
||||
)
|
||||
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
||||
logger.info(
|
||||
"Archive extract finished",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"archive_count": archive_count,
|
||||
"extracted_count": extracted_count,
|
||||
},
|
||||
)
|
||||
return SkillResult(success=True, data={"extracted_count": extracted_count})
|
||||
52
review_agent/file_summary/skills/attachment_reader.py
Normal file
52
review_agent/file_summary/skills/attachment_reader.py
Normal file
@@ -0,0 +1,52 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Iterable
|
||||
|
||||
from review_agent.models import FileAttachment
|
||||
|
||||
from ..services.attachment_reader import read_attachment_details
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.attachment_reader")
|
||||
|
||||
|
||||
class AttachmentReaderSkill(BaseSkill):
|
||||
name = "attachment_reader"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
attachments = FileAttachment.objects.filter(
|
||||
conversation=context.batch.conversation,
|
||||
is_active=True,
|
||||
).exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||
return self.run_for_attachments(attachments)
|
||||
|
||||
def run_for_attachments(self, attachments: Iterable[FileAttachment]) -> SkillResult:
|
||||
attachment_list = list(attachments)
|
||||
logger.info(
|
||||
"Attachment reader skill started",
|
||||
extra={
|
||||
"attachment_count": len(attachment_list),
|
||||
"attachment_ids": [attachment.pk for attachment in attachment_list],
|
||||
},
|
||||
)
|
||||
results = [read_attachment_details(attachment).to_dict() for attachment in attachment_list]
|
||||
if not results:
|
||||
logger.warning("Attachment reader skill found no attachments")
|
||||
return SkillResult(success=False, message="当前对话没有可读取的附件。")
|
||||
|
||||
has_success = any(item["status"] == "success" for item in results)
|
||||
logger.info(
|
||||
"Attachment reader skill finished",
|
||||
extra={
|
||||
"success": has_success,
|
||||
"success_count": sum(1 for item in results if item["status"] == "success"),
|
||||
"failed_count": sum(1 for item in results if item["status"] != "success"),
|
||||
},
|
||||
)
|
||||
return SkillResult(
|
||||
success=has_success,
|
||||
data={"attachments": results},
|
||||
message="附件解析完成。" if has_success else "附件解析失败。",
|
||||
)
|
||||
24
review_agent/file_summary/skills/base.py
Normal file
24
review_agent/file_summary/skills/base.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from review_agent.models import FileSummaryBatch
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowContext:
|
||||
batch: FileSummaryBatch
|
||||
|
||||
|
||||
@dataclass
|
||||
class SkillResult:
|
||||
success: bool
|
||||
data: dict = field(default_factory=dict)
|
||||
message: str = ""
|
||||
|
||||
|
||||
class BaseSkill:
|
||||
name = ""
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
raise NotImplementedError
|
||||
108
review_agent/file_summary/skills/document_page_count.py
Normal file
108
review_agent/file_summary/skills/document_page_count.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from review_agent.models import FileSummaryItem
|
||||
|
||||
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.document_page_count")
|
||||
|
||||
|
||||
class DocumentPageCountSkill(BaseSkill):
|
||||
name = "document_page_count"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
|
||||
logger.info("Document page count started", extra={"batch_id": context.batch.pk})
|
||||
for item in context.batch.items.order_by("file_index"):
|
||||
if item.file_type not in SUPPORTED_EXTENSIONS:
|
||||
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
||||
unsupported_files += 1
|
||||
item.save(update_fields=["statistics_status", "updated_at"])
|
||||
logger.info(
|
||||
"Document page count unsupported",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"item_id": item.pk,
|
||||
"file_type": item.file_type,
|
||||
"file_name": item.file_name,
|
||||
},
|
||||
)
|
||||
continue
|
||||
|
||||
result = None
|
||||
for attempt in range(1, 4):
|
||||
logger.info(
|
||||
"Document page count attempt",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"item_id": item.pk,
|
||||
"attempt": attempt,
|
||||
"storage_path": item.storage_path,
|
||||
},
|
||||
)
|
||||
result = count_document_pages(item.storage_path)
|
||||
item.retry_count = attempt - 1
|
||||
if result.status != "failed":
|
||||
break
|
||||
item.statistics_status = result.status
|
||||
item.page_count = result.page_count
|
||||
item.error_message = result.error_message
|
||||
item.save(
|
||||
update_fields=[
|
||||
"statistics_status",
|
||||
"page_count",
|
||||
"retry_count",
|
||||
"error_message",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
|
||||
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
|
||||
success_files += 1
|
||||
total_pages += result.page_count or 0
|
||||
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
|
||||
uncertain_files += 1
|
||||
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
|
||||
unsupported_files += 1
|
||||
else:
|
||||
failed_files += 1
|
||||
logger.warning(
|
||||
"Document page count failed",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"item_id": item.pk,
|
||||
"file_name": item.file_name,
|
||||
"error": result.error_message,
|
||||
},
|
||||
)
|
||||
|
||||
context.batch.success_files = success_files
|
||||
context.batch.failed_files = failed_files
|
||||
context.batch.unsupported_files = unsupported_files
|
||||
context.batch.uncertain_files = uncertain_files
|
||||
context.batch.total_pages = total_pages
|
||||
context.batch.save(
|
||||
update_fields=[
|
||||
"success_files",
|
||||
"failed_files",
|
||||
"unsupported_files",
|
||||
"uncertain_files",
|
||||
"total_pages",
|
||||
]
|
||||
)
|
||||
logger.info(
|
||||
"Document page count finished",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"success_files": success_files,
|
||||
"failed_files": failed_files,
|
||||
"unsupported_files": unsupported_files,
|
||||
"uncertain_files": uncertain_files,
|
||||
"total_pages": total_pages,
|
||||
},
|
||||
)
|
||||
return SkillResult(success=True)
|
||||
69
review_agent/file_summary/skills/file_inventory.py
Normal file
69
review_agent/file_summary/skills/file_inventory.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
from review_agent.models import FileSummaryBatchAttachment
|
||||
|
||||
from ..paths import resolve_storage_path
|
||||
from ..services.archive import ARCHIVE_EXTENSIONS
|
||||
from ..services.inventory import scan_files_to_items
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.file_inventory")
|
||||
|
||||
|
||||
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
|
||||
stem = Path(binding.attachment.original_name).stem or "archive"
|
||||
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
|
||||
return f"{binding.attachment_id}_{safe_stem}"
|
||||
|
||||
|
||||
class FileInventorySkill(BaseSkill):
|
||||
name = "file_inventory"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
roots: list[Path] = []
|
||||
missing_extract_roots: list[str] = []
|
||||
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
||||
original_path = resolve_storage_path(binding.attachment.storage_path)
|
||||
is_archive = original_path.suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
|
||||
if not is_archive:
|
||||
roots.append(original_path)
|
||||
continue
|
||||
|
||||
extracted_root = (
|
||||
Path(context.batch.work_dir)
|
||||
/ "extracted"
|
||||
/ _safe_archive_dir_name(binding)
|
||||
)
|
||||
if extracted_root.exists():
|
||||
roots.append(extracted_root)
|
||||
else:
|
||||
missing_extract_roots.append(str(extracted_root))
|
||||
if missing_extract_roots:
|
||||
message = "压缩包解压目录不存在,无法扫描解压后的文件。"
|
||||
logger.warning(
|
||||
"File inventory missing extracted roots",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"missing_extract_roots": missing_extract_roots,
|
||||
},
|
||||
)
|
||||
return SkillResult(success=False, message=message)
|
||||
logger.info(
|
||||
"File inventory started",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"root_count": len(roots),
|
||||
"roots": [str(root) for root in roots],
|
||||
},
|
||||
)
|
||||
items = scan_files_to_items(batch=context.batch, roots=roots)
|
||||
logger.info(
|
||||
"File inventory finished",
|
||||
extra={"batch_id": context.batch.pk, "total_files": len(items)},
|
||||
)
|
||||
return SkillResult(success=True, data={"total_files": len(items)})
|
||||
22
review_agent/file_summary/skills/product_detect.py
Normal file
22
review_agent/file_summary/skills/product_detect.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from ..services.product_detect import detect_product_name
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.product_detect")
|
||||
|
||||
|
||||
class ProductDetectSkill(BaseSkill):
|
||||
name = "product_detect"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
logger.info("Product detect started", extra={"batch_id": context.batch.pk})
|
||||
product_name = detect_product_name(context.batch)
|
||||
logger.info(
|
||||
"Product detect finished",
|
||||
extra={"batch_id": context.batch.pk, "product_name": product_name},
|
||||
)
|
||||
return SkillResult(success=True, data={"product_name": product_name})
|
||||
44
review_agent/file_summary/skills/registry.py
Normal file
44
review_agent/file_summary/skills/registry.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills")
|
||||
|
||||
|
||||
class SkillRegistry:
|
||||
def __init__(self):
|
||||
self._skills: dict[str, BaseSkill] = {}
|
||||
|
||||
def register(self, skill: BaseSkill) -> None:
|
||||
if not skill.name:
|
||||
raise ValueError("Skill 必须声明 name。")
|
||||
self._skills[skill.name] = skill
|
||||
logger.info("Skill registered: %s", skill.name, extra={"skill_name": skill.name})
|
||||
|
||||
def get(self, name: str) -> BaseSkill:
|
||||
try:
|
||||
return self._skills[name]
|
||||
except KeyError as exc:
|
||||
raise KeyError(f"Skill 未注册:{name}") from exc
|
||||
|
||||
def execute(self, name: str, context: WorkflowContext) -> SkillResult:
|
||||
logger.info("Skill started: %s", name, extra={"skill_name": name, "batch_id": context.batch.pk})
|
||||
try:
|
||||
result = self.get(name).run(context)
|
||||
except Exception:
|
||||
logger.exception("Skill crashed: %s", name, extra={"skill_name": name, "batch_id": context.batch.pk})
|
||||
raise
|
||||
logger.info(
|
||||
"Skill finished: %s",
|
||||
name,
|
||||
extra={
|
||||
"skill_name": name,
|
||||
"batch_id": context.batch.pk,
|
||||
"success": result.success,
|
||||
"result_message": result.message,
|
||||
},
|
||||
)
|
||||
return result
|
||||
47
review_agent/file_summary/skills/summary_report.py
Normal file
47
review_agent/file_summary/skills/summary_report.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from django.urls import reverse
|
||||
|
||||
from review_agent.models import Message
|
||||
|
||||
from ..services.export_excel import generate_excel_export
|
||||
from ..services.report import generate_markdown_report
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.skills.summary_report")
|
||||
|
||||
|
||||
class SummaryReportSkill(BaseSkill):
|
||||
name = "summary_report"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
logger.info("Summary report started", extra={"batch_id": context.batch.pk})
|
||||
markdown_export, summary_table = generate_markdown_report(context.batch)
|
||||
excel_export = generate_excel_export(context.batch)
|
||||
markdown_url = reverse("file_summary_export_download", args=[markdown_export.pk])
|
||||
excel_url = reverse("file_summary_export_download", args=[excel_export.pk])
|
||||
content = (
|
||||
"文件目录与页数汇总已完成。\n\n"
|
||||
f"{summary_table}\n\n"
|
||||
f"[下载 Markdown 报告]({markdown_url}) | [下载 Excel 明细]({excel_url})"
|
||||
)
|
||||
Message.objects.create(
|
||||
conversation=context.batch.conversation,
|
||||
role=Message.Role.ASSISTANT,
|
||||
content=content,
|
||||
)
|
||||
logger.info(
|
||||
"Summary report finished",
|
||||
extra={
|
||||
"batch_id": context.batch.pk,
|
||||
"markdown_export_id": markdown_export.pk,
|
||||
"excel_export_id": excel_export.pk,
|
||||
},
|
||||
)
|
||||
return SkillResult(
|
||||
success=True,
|
||||
data={"markdown_export_id": markdown_export.pk, "excel_export_id": excel_export.pk},
|
||||
)
|
||||
112
review_agent/file_summary/storage.py
Normal file
112
review_agent/file_summary/storage.py
Normal file
@@ -0,0 +1,112 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils.text import get_valid_filename
|
||||
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
from .constants import ATTACHMENT_ROOT
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.storage")
|
||||
|
||||
|
||||
def _safe_original_name(name: str) -> str:
|
||||
clean = get_valid_filename(Path(name).name)
|
||||
return clean or f"upload-{uuid4().hex}"
|
||||
|
||||
|
||||
def _relative_attachment_path(conversation: Conversation, filename: str, version_no: int) -> Path:
|
||||
suffix = Path(filename).suffix
|
||||
stem = Path(filename).stem
|
||||
stored_name = f"{stem}_v{version_no}_{uuid4().hex[:8]}{suffix}"
|
||||
return (
|
||||
ATTACHMENT_ROOT
|
||||
/ str(conversation.user_id)
|
||||
/ str(conversation.pk)
|
||||
/ "attachments"
|
||||
/ stored_name
|
||||
)
|
||||
|
||||
|
||||
def _ensure_inside_media_root(path: Path) -> None:
|
||||
media_root = Path(settings.MEDIA_ROOT).resolve()
|
||||
resolved = path.resolve()
|
||||
if media_root != resolved and media_root not in resolved.parents:
|
||||
raise ValueError("上传路径必须位于 MEDIA_ROOT 内。")
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def save_uploaded_attachment(*, conversation: Conversation, user, uploaded_file) -> FileAttachment:
|
||||
"""Stores an uploaded file and creates a versioned attachment record."""
|
||||
|
||||
original_name = _safe_original_name(uploaded_file.name)
|
||||
logger.info(
|
||||
"Attachment upload save started",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"user_id": user.pk,
|
||||
"original_name": original_name,
|
||||
"file_size": uploaded_file.size,
|
||||
"content_type": getattr(uploaded_file, "content_type", "") or "",
|
||||
},
|
||||
)
|
||||
latest = (
|
||||
FileAttachment.objects.filter(conversation=conversation, original_name=original_name)
|
||||
.order_by("-version_no")
|
||||
.first()
|
||||
)
|
||||
version_no = (latest.version_no if latest else 0) + 1
|
||||
relative_path = _relative_attachment_path(conversation, original_name, version_no)
|
||||
absolute_path = Path(settings.MEDIA_ROOT) / relative_path
|
||||
_ensure_inside_media_root(absolute_path)
|
||||
absolute_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with absolute_path.open("wb") as target:
|
||||
for chunk in uploaded_file.chunks():
|
||||
target.write(chunk)
|
||||
|
||||
FileAttachment.objects.filter(
|
||||
conversation=conversation,
|
||||
original_name=original_name,
|
||||
is_active=True,
|
||||
).update(is_active=False)
|
||||
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name=original_name,
|
||||
version_no=version_no,
|
||||
is_active=True,
|
||||
storage_path=relative_path.as_posix(),
|
||||
file_size=uploaded_file.size,
|
||||
content_type=getattr(uploaded_file, "content_type", "") or "",
|
||||
)
|
||||
logger.info(
|
||||
"Attachment upload save finished",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"attachment_id": attachment.pk,
|
||||
"version_no": attachment.version_no,
|
||||
"storage_path": attachment.storage_path,
|
||||
},
|
||||
)
|
||||
return attachment
|
||||
|
||||
|
||||
def serialize_attachment(attachment: FileAttachment) -> dict[str, object]:
|
||||
return {
|
||||
"id": attachment.pk,
|
||||
"original_name": attachment.original_name,
|
||||
"version_no": attachment.version_no,
|
||||
"is_active": attachment.is_active,
|
||||
"file_size": attachment.file_size,
|
||||
"content_type": attachment.content_type,
|
||||
"upload_status": attachment.upload_status,
|
||||
"created_at": attachment.created_at.isoformat(),
|
||||
}
|
||||
305
review_agent/file_summary/views.py
Normal file
305
review_agent/file_summary/views.py
Normal file
@@ -0,0 +1,305 @@
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.db.models import Count, Q
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from django.http import FileResponse, Http404, JsonResponse
|
||||
from django.views.decorators.http import require_http_methods
|
||||
|
||||
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, Message
|
||||
from review_agent.models import FileSummaryBatch, WorkflowEvent
|
||||
from .events import serialize_event
|
||||
from .paths import resolve_storage_path
|
||||
|
||||
from .storage import save_uploaded_attachment, serialize_attachment
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.views")
|
||||
|
||||
|
||||
def _conversation_for_user(user, conversation_id: int) -> Conversation:
|
||||
conversation = Conversation.objects.filter(pk=conversation_id, user=user).first()
|
||||
if not conversation:
|
||||
raise Http404("对话不存在。")
|
||||
return conversation
|
||||
|
||||
|
||||
@require_http_methods(["POST", "GET"])
|
||||
@login_required
|
||||
def attachments(request, conversation_id: int):
|
||||
conversation = _conversation_for_user(request.user, conversation_id)
|
||||
|
||||
if request.method == "POST":
|
||||
files = request.FILES.getlist("files")
|
||||
if not files:
|
||||
return JsonResponse({"error": "请选择至少一个文件。"}, status=400)
|
||||
logger.info(
|
||||
"Attachment upload request received",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"user_id": request.user.pk,
|
||||
"file_count": len(files),
|
||||
"filenames": [uploaded_file.name for uploaded_file in files],
|
||||
},
|
||||
)
|
||||
saved = [
|
||||
save_uploaded_attachment(
|
||||
conversation=conversation,
|
||||
user=request.user,
|
||||
uploaded_file=uploaded_file,
|
||||
)
|
||||
for uploaded_file in files
|
||||
]
|
||||
logger.info(
|
||||
"Attachment upload request finished",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"attachment_ids": [attachment.pk for attachment in saved],
|
||||
},
|
||||
)
|
||||
return JsonResponse({"attachments": [serialize_attachment(item) for item in saved]})
|
||||
|
||||
queryset = FileAttachment.objects.filter(conversation=conversation).order_by(
|
||||
"original_name",
|
||||
"-version_no",
|
||||
)
|
||||
logger.info(
|
||||
"Attachment list requested",
|
||||
extra={"conversation_id": conversation.pk, "attachment_count": queryset.count()},
|
||||
)
|
||||
return JsonResponse({"attachments": [serialize_attachment(item) for item in queryset]})
|
||||
|
||||
|
||||
@require_http_methods(["DELETE", "PATCH"])
|
||||
@login_required
|
||||
def attachment_detail(request, conversation_id: int, attachment_id: int):
|
||||
conversation = _conversation_for_user(request.user, conversation_id)
|
||||
attachment = FileAttachment.objects.filter(
|
||||
pk=attachment_id,
|
||||
conversation=conversation,
|
||||
user=request.user,
|
||||
).first()
|
||||
if not attachment:
|
||||
raise Http404("附件不存在。")
|
||||
|
||||
if request.method == "PATCH":
|
||||
try:
|
||||
payload = json.loads(request.body.decode("utf-8") or "{}")
|
||||
except json.JSONDecodeError:
|
||||
return JsonResponse({"error": "JSON 格式错误。"}, status=400)
|
||||
|
||||
update_fields = []
|
||||
original_name = (payload.get("original_name") or "").strip()
|
||||
if original_name:
|
||||
attachment.original_name = Path(original_name).name
|
||||
update_fields.append("original_name")
|
||||
if "is_active" in payload:
|
||||
attachment.is_active = bool(payload["is_active"])
|
||||
update_fields.append("is_active")
|
||||
if update_fields:
|
||||
attachment.save(update_fields=update_fields)
|
||||
logger.info(
|
||||
"Attachment updated",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"attachment_id": attachment.pk,
|
||||
"update_fields": update_fields,
|
||||
},
|
||||
)
|
||||
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
|
||||
|
||||
attachment.upload_status = FileAttachment.UploadStatus.DELETED
|
||||
attachment.is_active = False
|
||||
attachment.save(update_fields=["upload_status", "is_active"])
|
||||
logger.info(
|
||||
"Attachment deleted",
|
||||
extra={"conversation_id": conversation.pk, "attachment_id": attachment.pk},
|
||||
)
|
||||
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def conversation_list(request):
|
||||
conversations = (
|
||||
Conversation.objects.filter(user=request.user)
|
||||
.annotate(
|
||||
attachment_count=Count(
|
||||
"file_attachments",
|
||||
filter=~Q(file_attachments__upload_status=FileAttachment.UploadStatus.DELETED),
|
||||
)
|
||||
)
|
||||
.order_by("-updated_at", "-id")
|
||||
)
|
||||
return JsonResponse(
|
||||
{
|
||||
"conversations": [
|
||||
{
|
||||
"id": conversation.pk,
|
||||
"title": conversation.title or "新对话",
|
||||
"updated_at": conversation.updated_at.isoformat(),
|
||||
"attachment_count": conversation.attachment_count,
|
||||
}
|
||||
for conversation in conversations
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def attachment_download(request, conversation_id: int, attachment_id: int):
|
||||
conversation = _conversation_for_user(request.user, conversation_id)
|
||||
attachment = FileAttachment.objects.filter(
|
||||
pk=attachment_id,
|
||||
conversation=conversation,
|
||||
user=request.user,
|
||||
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).first()
|
||||
if not attachment:
|
||||
raise Http404("附件不存在。")
|
||||
|
||||
path = resolve_storage_path(attachment.storage_path)
|
||||
if not path.exists():
|
||||
logger.warning(
|
||||
"Attachment download missing file",
|
||||
extra={"attachment_id": attachment.pk, "storage_path": attachment.storage_path},
|
||||
)
|
||||
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||
logger.info(
|
||||
"Attachment download started",
|
||||
extra={"conversation_id": conversation.pk, "attachment_id": attachment.pk},
|
||||
)
|
||||
return FileResponse(
|
||||
path.open("rb"),
|
||||
as_attachment=True,
|
||||
filename=attachment.original_name,
|
||||
content_type=attachment.content_type or "application/octet-stream",
|
||||
)
|
||||
|
||||
|
||||
def _serialize_message(message: Message) -> dict[str, object]:
|
||||
return {
|
||||
"id": message.pk,
|
||||
"role": message.role,
|
||||
"content": message.content,
|
||||
"created_at": message.created_at.isoformat(),
|
||||
}
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def conversation_messages(request, conversation_id: int):
|
||||
conversation = _conversation_for_user(request.user, conversation_id)
|
||||
after = request.GET.get("after") or "0"
|
||||
try:
|
||||
after_id = int(after)
|
||||
except ValueError:
|
||||
after_id = 0
|
||||
|
||||
messages = list(conversation.messages.filter(pk__gt=after_id).order_by("id"))
|
||||
latest_message_id = (
|
||||
conversation.messages.order_by("-id").values_list("id", flat=True).first() or 0
|
||||
)
|
||||
logger.info(
|
||||
"Conversation incremental messages requested",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"after_id": after_id,
|
||||
"message_count": len(messages),
|
||||
"latest_message_id": latest_message_id,
|
||||
},
|
||||
)
|
||||
return JsonResponse(
|
||||
{
|
||||
"conversation_id": conversation.pk,
|
||||
"latest_message_id": latest_message_id,
|
||||
"messages": [_serialize_message(message) for message in messages],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def batch_status(request, batch_id: int):
|
||||
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||
if not batch:
|
||||
raise Http404("批次不存在。")
|
||||
return JsonResponse(
|
||||
{
|
||||
"batch": {
|
||||
"id": batch.pk,
|
||||
"batch_no": batch.batch_no,
|
||||
"status": batch.status,
|
||||
"product_name": batch.product_name,
|
||||
"total_files": batch.total_files,
|
||||
"success_files": batch.success_files,
|
||||
"failed_files": batch.failed_files,
|
||||
"total_pages": batch.total_pages,
|
||||
"error_message": batch.error_message,
|
||||
},
|
||||
"nodes": [
|
||||
{
|
||||
"node_code": node.node_code,
|
||||
"node_name": node.node_name,
|
||||
"status": node.status,
|
||||
"progress": node.progress,
|
||||
"message": node.message,
|
||||
}
|
||||
for node in batch.node_runs.order_by("id")
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def batch_events(request, batch_id: int):
|
||||
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
|
||||
if not batch:
|
||||
raise Http404("批次不存在。")
|
||||
after = request.GET.get("after") or "0"
|
||||
try:
|
||||
after_id = int(after)
|
||||
except ValueError:
|
||||
after_id = 0
|
||||
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
|
||||
return JsonResponse({"events": [serialize_event(event) for event in events]})
|
||||
|
||||
|
||||
@require_http_methods(["GET"])
|
||||
@login_required
|
||||
def export_download(request, export_id: int):
|
||||
exported = ExportedSummaryFile.objects.filter(
|
||||
pk=export_id,
|
||||
batch__user=request.user,
|
||||
).first()
|
||||
if not exported:
|
||||
raise Http404("导出文件不存在。")
|
||||
path = Path(exported.storage_path)
|
||||
if not path.exists():
|
||||
logger.warning(
|
||||
"Export download missing file",
|
||||
extra={"export_id": exported.pk, "storage_path": exported.storage_path},
|
||||
)
|
||||
return JsonResponse({"error": "文件不存在。"}, status=404)
|
||||
content_type = (
|
||||
"text/markdown; charset=utf-8"
|
||||
if exported.export_type == ExportedSummaryFile.ExportType.MARKDOWN
|
||||
else "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
)
|
||||
logger.info(
|
||||
"Export download started",
|
||||
extra={
|
||||
"export_id": exported.pk,
|
||||
"batch_id": exported.batch_id,
|
||||
"file_name": exported.file_name,
|
||||
"content_type": content_type,
|
||||
},
|
||||
)
|
||||
return FileResponse(
|
||||
path.open("rb"),
|
||||
as_attachment=True,
|
||||
filename=exported.file_name,
|
||||
content_type=content_type,
|
||||
)
|
||||
237
review_agent/file_summary/workflow.py
Normal file
237
review_agent/file_summary/workflow.py
Normal file
@@ -0,0 +1,237 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from threading import Thread
|
||||
from uuid import uuid4
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from review_agent.models import (
|
||||
Conversation,
|
||||
FileAttachment,
|
||||
FileSummaryBatch,
|
||||
FileSummaryBatchAttachment,
|
||||
Message,
|
||||
WorkflowNodeRun,
|
||||
)
|
||||
|
||||
from .events import record_event
|
||||
from .services.archive import ARCHIVE_EXTENSIONS
|
||||
from .skills.archive_extract import ArchiveExtractSkill
|
||||
from .skills.base import WorkflowContext
|
||||
from .skills.document_page_count import DocumentPageCountSkill
|
||||
from .skills.file_inventory import FileInventorySkill
|
||||
from .skills.product_detect import ProductDetectSkill
|
||||
from .skills.registry import SkillRegistry
|
||||
from .skills.summary_report import SummaryReportSkill
|
||||
|
||||
|
||||
NODE_DEFINITIONS = [
|
||||
("upload", "附件固化", ""),
|
||||
("extract", "压缩包解压", "archive_extract"),
|
||||
("inventory", "文件扫描", "file_inventory"),
|
||||
("page_count", "页数统计", "document_page_count"),
|
||||
("product_detect", "产品识别", "product_detect"),
|
||||
("report", "报告输出", "summary_report"),
|
||||
("complete", "完成", ""),
|
||||
]
|
||||
|
||||
|
||||
logger = logging.getLogger("review_agent.file_summary.workflow")
|
||||
|
||||
|
||||
def default_skill_registry() -> SkillRegistry:
|
||||
registry = SkillRegistry()
|
||||
registry.register(ArchiveExtractSkill())
|
||||
registry.register(FileInventorySkill())
|
||||
registry.register(DocumentPageCountSkill())
|
||||
registry.register(ProductDetectSkill())
|
||||
registry.register(SummaryReportSkill())
|
||||
return registry
|
||||
|
||||
|
||||
def build_batch_no() -> str:
|
||||
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
|
||||
|
||||
|
||||
def build_batch_work_dir(batch_no: str) -> Path:
|
||||
return Path(settings.MEDIA_ROOT) / "file_summary" / "work" / batch_no
|
||||
|
||||
|
||||
@transaction.atomic
|
||||
def create_file_summary_batch(
|
||||
*,
|
||||
conversation: Conversation,
|
||||
user,
|
||||
trigger_message: Message | None = None,
|
||||
) -> FileSummaryBatch:
|
||||
active_attachments = list(
|
||||
FileAttachment.objects.select_for_update()
|
||||
.filter(conversation=conversation, is_active=True)
|
||||
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||
.order_by("original_name", "-created_at")
|
||||
)
|
||||
if not active_attachments:
|
||||
raise ValueError("当前对话没有可用附件。")
|
||||
logger.info(
|
||||
"File summary batch creation started",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"user_id": user.pk,
|
||||
"attachment_ids": [attachment.pk for attachment in active_attachments],
|
||||
},
|
||||
)
|
||||
|
||||
batch_no = build_batch_no()
|
||||
work_dir = build_batch_work_dir(batch_no)
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
batch = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
trigger_message=trigger_message,
|
||||
batch_no=batch_no,
|
||||
work_dir=str(work_dir),
|
||||
)
|
||||
|
||||
for attachment in active_attachments:
|
||||
source_role = (
|
||||
FileSummaryBatchAttachment.SourceRole.ARCHIVE
|
||||
if Path(attachment.original_name).suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
|
||||
else FileSummaryBatchAttachment.SourceRole.MULTI_FILE
|
||||
)
|
||||
FileSummaryBatchAttachment.objects.create(
|
||||
batch=batch,
|
||||
attachment=attachment,
|
||||
source_role=source_role,
|
||||
)
|
||||
attachment.upload_status = FileAttachment.UploadStatus.BOUND
|
||||
attachment.save(update_fields=["upload_status"])
|
||||
|
||||
for code, name, _skill_name in NODE_DEFINITIONS:
|
||||
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
|
||||
|
||||
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
|
||||
logger.info(
|
||||
"File summary batch created",
|
||||
extra={"batch_id": batch.pk, "batch_no": batch.batch_no},
|
||||
)
|
||||
return batch
|
||||
|
||||
|
||||
class WorkflowExecutor:
|
||||
def __init__(self, batch: FileSummaryBatch, registry: SkillRegistry | None = None):
|
||||
self.batch = batch
|
||||
self.registry = registry or default_skill_registry()
|
||||
|
||||
def run(self) -> None:
|
||||
logger.info("Workflow run started", extra={"batch_id": self.batch.pk})
|
||||
self.batch.status = FileSummaryBatch.Status.RUNNING
|
||||
self.batch.started_at = timezone.now()
|
||||
self.batch.save(update_fields=["status", "started_at"])
|
||||
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
|
||||
|
||||
try:
|
||||
for node in self.batch.node_runs.order_by("id"):
|
||||
self._run_node(node)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"Workflow run failed",
|
||||
extra={"batch_id": self.batch.pk, "error": str(exc)},
|
||||
)
|
||||
self.batch.status = FileSummaryBatch.Status.FAILED
|
||||
self.batch.error_message = str(exc)
|
||||
self.batch.finished_at = timezone.now()
|
||||
self.batch.save(update_fields=["status", "error_message", "finished_at"])
|
||||
record_event(self.batch, "workflow_failed", {"message": str(exc)})
|
||||
return
|
||||
|
||||
self.batch.status = FileSummaryBatch.Status.SUCCESS
|
||||
self.batch.finished_at = timezone.now()
|
||||
self.batch.save(update_fields=["status", "finished_at"])
|
||||
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
|
||||
logger.info("Workflow run completed", extra={"batch_id": self.batch.pk})
|
||||
|
||||
def _run_node(self, node: WorkflowNodeRun) -> None:
|
||||
logger.info(
|
||||
"Workflow node started",
|
||||
extra={
|
||||
"batch_id": self.batch.pk,
|
||||
"node_code": node.node_code,
|
||||
"node_name": node.node_name,
|
||||
},
|
||||
)
|
||||
now = timezone.now()
|
||||
node.status = WorkflowNodeRun.Status.RUNNING
|
||||
node.progress = 10
|
||||
node.started_at = now
|
||||
node.message = f"{node.node_name}处理中"
|
||||
node.save(update_fields=["status", "progress", "started_at", "message"])
|
||||
record_event(
|
||||
self.batch,
|
||||
"node_progress",
|
||||
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||
)
|
||||
|
||||
skill_name = next(
|
||||
(skill for code, _name, skill in NODE_DEFINITIONS if code == node.node_code),
|
||||
"",
|
||||
)
|
||||
if skill_name:
|
||||
try:
|
||||
result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch))
|
||||
if not result.success:
|
||||
logger.warning(
|
||||
"Workflow node skill failed",
|
||||
extra={
|
||||
"batch_id": self.batch.pk,
|
||||
"node_code": node.node_code,
|
||||
"skill_name": skill_name,
|
||||
"result_message": result.message,
|
||||
},
|
||||
)
|
||||
raise RuntimeError(result.message or f"{node.node_name}执行失败")
|
||||
except Exception as exc:
|
||||
node.status = WorkflowNodeRun.Status.FAILED
|
||||
node.finished_at = timezone.now()
|
||||
node.message = str(exc)
|
||||
node.save(update_fields=["status", "finished_at", "message"])
|
||||
record_event(
|
||||
self.batch,
|
||||
"node_progress",
|
||||
{
|
||||
"node_code": node.node_code,
|
||||
"status": node.status,
|
||||
"progress": node.progress,
|
||||
"message": node.message,
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
node.status = WorkflowNodeRun.Status.SUCCESS
|
||||
node.progress = 100
|
||||
node.finished_at = timezone.now()
|
||||
node.message = f"{node.node_name}完成"
|
||||
node.save(update_fields=["status", "progress", "finished_at", "message"])
|
||||
record_event(
|
||||
self.batch,
|
||||
"node_progress",
|
||||
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
|
||||
)
|
||||
logger.info(
|
||||
"Workflow node finished",
|
||||
extra={"batch_id": self.batch.pk, "node_code": node.node_code},
|
||||
)
|
||||
|
||||
|
||||
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
|
||||
executor = WorkflowExecutor(batch)
|
||||
if not async_run:
|
||||
logger.info("Workflow starting synchronously", extra={"batch_id": batch.pk})
|
||||
executor.run()
|
||||
return
|
||||
logger.info("Workflow starting asynchronously", extra={"batch_id": batch.pk})
|
||||
Thread(target=executor.run, daemon=True).start()
|
||||
89
review_agent/file_summary/workflow_trigger.py
Normal file
89
review_agent/file_summary/workflow_trigger.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
|
||||
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
|
||||
ATTACHMENT_READER_KEYWORDS = (
|
||||
"阅读附件",
|
||||
"读取附件",
|
||||
"解析附件",
|
||||
"分析附件",
|
||||
"查看附件",
|
||||
"附件详情",
|
||||
"文件详情",
|
||||
"文件内容",
|
||||
"附件内容",
|
||||
"简历文件",
|
||||
"提供的文件",
|
||||
"提供的简历",
|
||||
"上传的文件",
|
||||
"上传文件",
|
||||
"这个文件",
|
||||
"该文件",
|
||||
"总结附件",
|
||||
"总结文件",
|
||||
"分析这个文件",
|
||||
"阅读这个文件",
|
||||
)
|
||||
ATTACHMENT_REFERENCE_KEYWORDS = ("附件", "文件", "简历", "上传")
|
||||
ATTACHMENT_READ_INTENT_KEYWORDS = (
|
||||
"阅读",
|
||||
"读取",
|
||||
"读",
|
||||
"解析",
|
||||
"分析",
|
||||
"查看",
|
||||
"提取",
|
||||
"整理",
|
||||
"总结",
|
||||
"介绍",
|
||||
"项目经历",
|
||||
"工作经历",
|
||||
"经历",
|
||||
"信息",
|
||||
"内容",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TriggerResult:
|
||||
should_start: bool
|
||||
workflow_type: str = ""
|
||||
reason: str = ""
|
||||
|
||||
|
||||
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
|
||||
text = (content or "").strip()
|
||||
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
|
||||
return TriggerResult(should_start=False, reason="not_matched")
|
||||
|
||||
has_attachment = FileAttachment.objects.filter(
|
||||
conversation=conversation,
|
||||
is_active=True,
|
||||
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
|
||||
if not has_attachment:
|
||||
return TriggerResult(should_start=False, reason="missing_attachment")
|
||||
|
||||
return TriggerResult(should_start=True, workflow_type="file_summary")
|
||||
|
||||
|
||||
def evaluate_attachment_reader_trigger(conversation: Conversation, content: str) -> TriggerResult:
|
||||
text = (content or "").strip()
|
||||
matched = any(keyword in text for keyword in ATTACHMENT_READER_KEYWORDS) or (
|
||||
any(keyword in text for keyword in ATTACHMENT_REFERENCE_KEYWORDS)
|
||||
and any(keyword in text for keyword in ATTACHMENT_READ_INTENT_KEYWORDS)
|
||||
)
|
||||
if not matched:
|
||||
return TriggerResult(should_start=False, reason="not_matched")
|
||||
|
||||
has_attachment = FileAttachment.objects.filter(
|
||||
conversation=conversation,
|
||||
is_active=True,
|
||||
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
|
||||
if not has_attachment:
|
||||
return TriggerResult(should_start=False, reason="missing_attachment")
|
||||
|
||||
return TriggerResult(should_start=True, workflow_type="attachment_reader")
|
||||
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
from urllib import error, request
|
||||
|
||||
from django.conf import settings
|
||||
@@ -12,6 +13,9 @@ class LLMRequestError(RuntimeError):
|
||||
"""Raised when the remote LLM provider call fails."""
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_reply(conversation, user_message: str) -> str:
|
||||
"""Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""
|
||||
|
||||
@@ -53,6 +57,47 @@ def generate_reply(conversation, user_message: str) -> str:
|
||||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||||
|
||||
|
||||
def generate_completion(messages: list[dict[str, str]], *, temperature: float = 0.0) -> str:
|
||||
"""Calls the configured chat endpoint with explicit messages and returns assistant text."""
|
||||
|
||||
if not settings.LLM_API_KEY:
|
||||
raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
|
||||
if not settings.LLM_MODEL:
|
||||
raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
|
||||
|
||||
payload = {
|
||||
"model": settings.LLM_MODEL,
|
||||
"messages": messages,
|
||||
"temperature": temperature,
|
||||
}
|
||||
body = json.dumps(payload).encode("utf-8")
|
||||
endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
|
||||
|
||||
http_request = request.Request(
|
||||
endpoint,
|
||||
data=body,
|
||||
headers={
|
||||
"Authorization": f"Bearer {settings.LLM_API_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with request.urlopen(http_request, timeout=60) as response:
|
||||
data = json.loads(response.read().decode("utf-8"))
|
||||
except error.HTTPError as exc:
|
||||
details = exc.read().decode("utf-8", errors="ignore")
|
||||
raise LLMRequestError(f"模型接口调用失败:HTTP {exc.code} {details}") from exc
|
||||
except error.URLError as exc:
|
||||
raise LLMRequestError(f"模型接口调用失败:{exc.reason}") from exc
|
||||
|
||||
try:
|
||||
return data["choices"][0]["message"]["content"].strip()
|
||||
except (KeyError, IndexError, TypeError) as exc:
|
||||
raise LLMRequestError("模型接口返回格式不符合预期。") from exc
|
||||
|
||||
|
||||
def stream_reply(conversation, user_message: str):
|
||||
"""Streams incremental assistant text from the SiliconFlow chat endpoint."""
|
||||
|
||||
@@ -89,7 +134,11 @@ def stream_reply(conversation, user_message: str):
|
||||
data = line[5:].strip()
|
||||
if data == "[DONE]":
|
||||
break
|
||||
try:
|
||||
payload = json.loads(data)
|
||||
except json.JSONDecodeError:
|
||||
logger.warning("Skipping malformed LLM stream data", extra={"data": data[:200]})
|
||||
continue
|
||||
delta = (
|
||||
payload.get("choices", [{}])[0]
|
||||
.get("delta", {})
|
||||
|
||||
@@ -0,0 +1,481 @@
|
||||
# Generated by Django 5.2.14 on 2026-06-05 17:09
|
||||
|
||||
import django.db.models.deletion
|
||||
from django.conf import settings
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
("review_agent", "0001_initial"),
|
||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="FileAttachment",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("original_name", models.CharField(max_length=255)),
|
||||
("version_no", models.PositiveIntegerField(default=1)),
|
||||
("is_active", models.BooleanField(default=True)),
|
||||
("storage_path", models.CharField(max_length=500)),
|
||||
("file_size", models.BigIntegerField(default=0)),
|
||||
(
|
||||
"content_type",
|
||||
models.CharField(blank=True, default="", max_length=120),
|
||||
),
|
||||
(
|
||||
"upload_status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("uploaded", "已上传"),
|
||||
("bound", "已绑定"),
|
||||
("deleted", "已删除"),
|
||||
],
|
||||
default="uploaded",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"conversation",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="file_attachments",
|
||||
to="review_agent.conversation",
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="review_file_attachments",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_file_attachment",
|
||||
"ordering": ["-created_at", "-id"],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="FileSummaryBatch",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("batch_no", models.CharField(max_length=64, unique=True)),
|
||||
(
|
||||
"product_name",
|
||||
models.CharField(blank=True, default="", max_length=200),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "待执行"),
|
||||
("running", "执行中"),
|
||||
("success", "成功"),
|
||||
("failed", "失败"),
|
||||
],
|
||||
default="pending",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("total_files", models.IntegerField(default=0)),
|
||||
("supported_files", models.IntegerField(default=0)),
|
||||
("success_files", models.IntegerField(default=0)),
|
||||
("failed_files", models.IntegerField(default=0)),
|
||||
("unsupported_files", models.IntegerField(default=0)),
|
||||
("uncertain_files", models.IntegerField(default=0)),
|
||||
("total_pages", models.IntegerField(default=0)),
|
||||
("work_dir", models.CharField(blank=True, default="", max_length=500)),
|
||||
("error_message", models.TextField(blank=True, default="")),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("started_at", models.DateTimeField(blank=True, null=True)),
|
||||
("finished_at", models.DateTimeField(blank=True, null=True)),
|
||||
(
|
||||
"conversation",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="file_summary_batches",
|
||||
to="review_agent.conversation",
|
||||
),
|
||||
),
|
||||
(
|
||||
"trigger_message",
|
||||
models.ForeignKey(
|
||||
blank=True,
|
||||
null=True,
|
||||
on_delete=django.db.models.deletion.SET_NULL,
|
||||
related_name="triggered_file_summary_batches",
|
||||
to="review_agent.message",
|
||||
),
|
||||
),
|
||||
(
|
||||
"user",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="review_file_summary_batches",
|
||||
to=settings.AUTH_USER_MODEL,
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_file_summary_batch",
|
||||
"ordering": ["-created_at", "-id"],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="ExportedSummaryFile",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"export_type",
|
||||
models.CharField(
|
||||
choices=[("markdown", "Markdown"), ("excel", "Excel")],
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("file_name", models.CharField(max_length=255)),
|
||||
("storage_path", models.CharField(max_length=500)),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[("success", "成功"), ("failed", "失败")],
|
||||
default="success",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("error_message", models.TextField(blank=True, default="")),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"batch",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="exports",
|
||||
to="review_agent.filesummarybatch",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_exported_summary_file",
|
||||
"ordering": ["-created_at", "-id"],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="FileSummaryBatchAttachment",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
(
|
||||
"source_role",
|
||||
models.CharField(
|
||||
choices=[("archive", "压缩包"), ("multi_file", "多文件")],
|
||||
default="multi_file",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"attachment",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="batch_bindings",
|
||||
to="review_agent.fileattachment",
|
||||
),
|
||||
),
|
||||
(
|
||||
"batch",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="batch_attachments",
|
||||
to="review_agent.filesummarybatch",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_file_summary_batch_attachment",
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="FileSummaryItem",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("file_index", models.PositiveIntegerField()),
|
||||
(
|
||||
"directory_level",
|
||||
models.CharField(blank=True, default="", max_length=300),
|
||||
),
|
||||
("file_name", models.CharField(max_length=255)),
|
||||
("file_type", models.CharField(max_length=20)),
|
||||
("relative_path", models.CharField(max_length=500)),
|
||||
("storage_path", models.CharField(max_length=500)),
|
||||
("page_count", models.IntegerField(blank=True, null=True)),
|
||||
(
|
||||
"statistics_status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("success", "成功"),
|
||||
("failed", "失败"),
|
||||
("unsupported", "不支持"),
|
||||
("uncertain", "不确定"),
|
||||
("skipped", "跳过"),
|
||||
],
|
||||
default="skipped",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("retry_count", models.PositiveIntegerField(default=0)),
|
||||
("error_message", models.TextField(blank=True, default="")),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"batch",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="items",
|
||||
to="review_agent.filesummarybatch",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_file_summary_item",
|
||||
"ordering": ["file_index", "id"],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="WorkflowEvent",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("event_type", models.CharField(max_length=40)),
|
||||
("payload", models.JSONField(default=dict)),
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
(
|
||||
"batch",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="events",
|
||||
to="review_agent.filesummarybatch",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_workflow_event",
|
||||
"ordering": ["id"],
|
||||
},
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="WorkflowNodeRun",
|
||||
fields=[
|
||||
(
|
||||
"id",
|
||||
models.BigAutoField(
|
||||
auto_created=True,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
verbose_name="ID",
|
||||
),
|
||||
),
|
||||
("node_code", models.CharField(max_length=40)),
|
||||
("node_name", models.CharField(max_length=80)),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("pending", "等待中"),
|
||||
("running", "执行中"),
|
||||
("retrying", "重试中"),
|
||||
("success", "成功"),
|
||||
("failed", "失败"),
|
||||
("skipped", "跳过"),
|
||||
],
|
||||
default="pending",
|
||||
max_length=20,
|
||||
),
|
||||
),
|
||||
("progress", models.PositiveIntegerField(default=0)),
|
||||
("message", models.TextField(blank=True, default="")),
|
||||
("started_at", models.DateTimeField(blank=True, null=True)),
|
||||
("finished_at", models.DateTimeField(blank=True, null=True)),
|
||||
(
|
||||
"batch",
|
||||
models.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="node_runs",
|
||||
to="review_agent.filesummarybatch",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"db_table": "ra_workflow_node_run",
|
||||
},
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="fileattachment",
|
||||
index=models.Index(
|
||||
fields=["conversation", "created_at"],
|
||||
name="idx_ra_attachment_conv_created",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="fileattachment",
|
||||
index=models.Index(
|
||||
fields=["user", "created_at"], name="idx_ra_attachment_user_created"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="fileattachment",
|
||||
index=models.Index(
|
||||
fields=["conversation", "original_name", "is_active"],
|
||||
name="idx_ra_attachment_active",
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="fileattachment",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("conversation", "original_name", "version_no"),
|
||||
name="uq_ra_attachment_conv_name_version",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummarybatch",
|
||||
index=models.Index(
|
||||
fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummarybatch",
|
||||
index=models.Index(
|
||||
fields=["user", "created_at"], name="idx_ra_batch_user_created"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummarybatch",
|
||||
index=models.Index(
|
||||
fields=["status", "created_at"], name="idx_ra_batch_status"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="exportedsummaryfile",
|
||||
index=models.Index(
|
||||
fields=["batch", "export_type"], name="idx_ra_export_batch_type"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="exportedsummaryfile",
|
||||
index=models.Index(
|
||||
fields=["batch", "created_at"], name="idx_ra_export_batch_created"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummarybatchattachment",
|
||||
index=models.Index(
|
||||
fields=["batch", "created_at"], name="idx_ra_batch_attachment_batch"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummarybatchattachment",
|
||||
index=models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="filesummarybatchattachment",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("batch", "attachment"), name="uq_ra_batch_attachment"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummaryitem",
|
||||
index=models.Index(
|
||||
fields=["batch", "file_index"], name="idx_ra_item_batch_index"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummaryitem",
|
||||
index=models.Index(
|
||||
fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="filesummaryitem",
|
||||
index=models.Index(
|
||||
fields=["batch", "file_type"], name="idx_ra_item_batch_type"
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="filesummaryitem",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("batch", "relative_path"), name="uq_ra_item_batch_relative_path"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="workflowevent",
|
||||
index=models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="workflowevent",
|
||||
index=models.Index(
|
||||
fields=["batch", "created_at"], name="idx_ra_event_batch_created"
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="workflownoderun",
|
||||
index=models.Index(
|
||||
fields=["batch", "status"], name="idx_ra_node_batch_status"
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="workflownoderun",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("batch", "node_code"), name="uq_ra_node_batch_code"
|
||||
),
|
||||
),
|
||||
]
|
||||
@@ -42,3 +42,293 @@ class Message(models.Model):
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.get_role_display()} - {self.conversation_id}"
|
||||
|
||||
|
||||
class FileAttachment(models.Model):
|
||||
"""Stores an uploaded file version for one conversation."""
|
||||
|
||||
class UploadStatus(models.TextChoices):
|
||||
UPLOADED = "uploaded", "已上传"
|
||||
BOUND = "bound", "已绑定"
|
||||
DELETED = "deleted", "已删除"
|
||||
|
||||
conversation = models.ForeignKey(
|
||||
Conversation,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="file_attachments",
|
||||
)
|
||||
user = models.ForeignKey(
|
||||
settings.AUTH_USER_MODEL,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="review_file_attachments",
|
||||
)
|
||||
original_name = models.CharField(max_length=255)
|
||||
version_no = models.PositiveIntegerField(default=1)
|
||||
is_active = models.BooleanField(default=True)
|
||||
storage_path = models.CharField(max_length=500)
|
||||
file_size = models.BigIntegerField(default=0)
|
||||
content_type = models.CharField(max_length=120, blank=True, default="")
|
||||
upload_status = models.CharField(
|
||||
max_length=20,
|
||||
choices=UploadStatus.choices,
|
||||
default=UploadStatus.UPLOADED,
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_file_attachment"
|
||||
ordering = ["-created_at", "-id"]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["conversation", "original_name", "version_no"],
|
||||
name="uq_ra_attachment_conv_name_version",
|
||||
)
|
||||
]
|
||||
indexes = [
|
||||
models.Index(
|
||||
fields=["conversation", "created_at"],
|
||||
name="idx_ra_attachment_conv_created",
|
||||
),
|
||||
models.Index(
|
||||
fields=["user", "created_at"],
|
||||
name="idx_ra_attachment_user_created",
|
||||
),
|
||||
models.Index(
|
||||
fields=["conversation", "original_name", "is_active"],
|
||||
name="idx_ra_attachment_active",
|
||||
),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.original_name} v{self.version_no}"
|
||||
|
||||
|
||||
class FileSummaryBatch(models.Model):
|
||||
"""Tracks one automatic file inventory and page-count workflow run."""
|
||||
|
||||
class Status(models.TextChoices):
|
||||
PENDING = "pending", "待执行"
|
||||
RUNNING = "running", "执行中"
|
||||
SUCCESS = "success", "成功"
|
||||
FAILED = "failed", "失败"
|
||||
|
||||
conversation = models.ForeignKey(
|
||||
Conversation,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="file_summary_batches",
|
||||
)
|
||||
user = models.ForeignKey(
|
||||
settings.AUTH_USER_MODEL,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="review_file_summary_batches",
|
||||
)
|
||||
trigger_message = models.ForeignKey(
|
||||
Message,
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name="triggered_file_summary_batches",
|
||||
)
|
||||
batch_no = models.CharField(max_length=64, unique=True)
|
||||
product_name = models.CharField(max_length=200, blank=True, default="")
|
||||
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
||||
total_files = models.IntegerField(default=0)
|
||||
supported_files = models.IntegerField(default=0)
|
||||
success_files = models.IntegerField(default=0)
|
||||
failed_files = models.IntegerField(default=0)
|
||||
unsupported_files = models.IntegerField(default=0)
|
||||
uncertain_files = models.IntegerField(default=0)
|
||||
total_pages = models.IntegerField(default=0)
|
||||
work_dir = models.CharField(max_length=500, blank=True, default="")
|
||||
error_message = models.TextField(blank=True, default="")
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
started_at = models.DateTimeField(null=True, blank=True)
|
||||
finished_at = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_file_summary_batch"
|
||||
ordering = ["-created_at", "-id"]
|
||||
indexes = [
|
||||
models.Index(fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"),
|
||||
models.Index(fields=["user", "created_at"], name="idx_ra_batch_user_created"),
|
||||
models.Index(fields=["status", "created_at"], name="idx_ra_batch_status"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.batch_no
|
||||
|
||||
|
||||
class FileSummaryBatchAttachment(models.Model):
|
||||
"""Binds a workflow batch to the exact attachment versions it uses."""
|
||||
|
||||
class SourceRole(models.TextChoices):
|
||||
ARCHIVE = "archive", "压缩包"
|
||||
MULTI_FILE = "multi_file", "多文件"
|
||||
|
||||
batch = models.ForeignKey(
|
||||
FileSummaryBatch,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="batch_attachments",
|
||||
)
|
||||
attachment = models.ForeignKey(
|
||||
FileAttachment,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="batch_bindings",
|
||||
)
|
||||
source_role = models.CharField(
|
||||
max_length=20,
|
||||
choices=SourceRole.choices,
|
||||
default=SourceRole.MULTI_FILE,
|
||||
)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_file_summary_batch_attachment"
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["batch", "attachment"],
|
||||
name="uq_ra_batch_attachment",
|
||||
)
|
||||
]
|
||||
indexes = [
|
||||
models.Index(
|
||||
fields=["batch", "created_at"],
|
||||
name="idx_ra_batch_attachment_batch",
|
||||
),
|
||||
models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
|
||||
]
|
||||
|
||||
|
||||
class FileSummaryItem(models.Model):
|
||||
"""Stores one scanned file and its page-count result."""
|
||||
|
||||
class StatisticsStatus(models.TextChoices):
|
||||
SUCCESS = "success", "成功"
|
||||
FAILED = "failed", "失败"
|
||||
UNSUPPORTED = "unsupported", "不支持"
|
||||
UNCERTAIN = "uncertain", "不确定"
|
||||
SKIPPED = "skipped", "跳过"
|
||||
|
||||
batch = models.ForeignKey(
|
||||
FileSummaryBatch,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="items",
|
||||
)
|
||||
file_index = models.PositiveIntegerField()
|
||||
directory_level = models.CharField(max_length=300, blank=True, default="")
|
||||
file_name = models.CharField(max_length=255)
|
||||
file_type = models.CharField(max_length=20)
|
||||
relative_path = models.CharField(max_length=500)
|
||||
storage_path = models.CharField(max_length=500)
|
||||
page_count = models.IntegerField(null=True, blank=True)
|
||||
statistics_status = models.CharField(
|
||||
max_length=20,
|
||||
choices=StatisticsStatus.choices,
|
||||
default=StatisticsStatus.SKIPPED,
|
||||
)
|
||||
retry_count = models.PositiveIntegerField(default=0)
|
||||
error_message = models.TextField(blank=True, default="")
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_file_summary_item"
|
||||
ordering = ["file_index", "id"]
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=["batch", "relative_path"],
|
||||
name="uq_ra_item_batch_relative_path",
|
||||
)
|
||||
]
|
||||
indexes = [
|
||||
models.Index(fields=["batch", "file_index"], name="idx_ra_item_batch_index"),
|
||||
models.Index(fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"),
|
||||
models.Index(fields=["batch", "file_type"], name="idx_ra_item_batch_type"),
|
||||
]
|
||||
|
||||
|
||||
class WorkflowNodeRun(models.Model):
|
||||
"""Stores recoverable status for one workflow node."""
|
||||
|
||||
class Status(models.TextChoices):
|
||||
PENDING = "pending", "等待中"
|
||||
RUNNING = "running", "执行中"
|
||||
RETRYING = "retrying", "重试中"
|
||||
SUCCESS = "success", "成功"
|
||||
FAILED = "failed", "失败"
|
||||
SKIPPED = "skipped", "跳过"
|
||||
|
||||
batch = models.ForeignKey(
|
||||
FileSummaryBatch,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="node_runs",
|
||||
)
|
||||
node_code = models.CharField(max_length=40)
|
||||
node_name = models.CharField(max_length=80)
|
||||
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
|
||||
progress = models.PositiveIntegerField(default=0)
|
||||
message = models.TextField(blank=True, default="")
|
||||
started_at = models.DateTimeField(null=True, blank=True)
|
||||
finished_at = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_workflow_node_run"
|
||||
constraints = [
|
||||
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code")
|
||||
]
|
||||
indexes = [
|
||||
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
|
||||
]
|
||||
|
||||
|
||||
class WorkflowEvent(models.Model):
|
||||
"""Persists workflow events for SSE replay and diagnostics."""
|
||||
|
||||
batch = models.ForeignKey(
|
||||
FileSummaryBatch,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="events",
|
||||
)
|
||||
event_type = models.CharField(max_length=40)
|
||||
payload = models.JSONField(default=dict)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_workflow_event"
|
||||
ordering = ["id"]
|
||||
indexes = [
|
||||
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
|
||||
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
|
||||
]
|
||||
|
||||
|
||||
class ExportedSummaryFile(models.Model):
|
||||
"""Stores generated report files for permission-checked download."""
|
||||
|
||||
class ExportType(models.TextChoices):
|
||||
MARKDOWN = "markdown", "Markdown"
|
||||
EXCEL = "excel", "Excel"
|
||||
|
||||
class Status(models.TextChoices):
|
||||
SUCCESS = "success", "成功"
|
||||
FAILED = "failed", "失败"
|
||||
|
||||
batch = models.ForeignKey(
|
||||
FileSummaryBatch,
|
||||
on_delete=models.CASCADE,
|
||||
related_name="exports",
|
||||
)
|
||||
export_type = models.CharField(max_length=20, choices=ExportType.choices)
|
||||
file_name = models.CharField(max_length=255)
|
||||
storage_path = models.CharField(max_length=500)
|
||||
status = models.CharField(max_length=20, choices=Status.choices, default=Status.SUCCESS)
|
||||
error_message = models.TextField(blank=True, default="")
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
db_table = "ra_exported_summary_file"
|
||||
ordering = ["-created_at", "-id"]
|
||||
indexes = [
|
||||
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
|
||||
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
|
||||
]
|
||||
|
||||
@@ -1,12 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from django.db.models import Q, QuerySet
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
|
||||
from .file_summary.skills.attachment_reader import AttachmentReaderSkill
|
||||
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
|
||||
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
|
||||
from .models import Conversation, Message
|
||||
from .models import Conversation, FileAttachment, Message
|
||||
from .skill_router import route_message_intent
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def list_conversations(user, search: str = "") -> QuerySet[Conversation]:
|
||||
@@ -47,6 +55,14 @@ def append_user_message(conversation: Conversation, content: str) -> Message:
|
||||
role=Message.Role.USER,
|
||||
content=content.strip(),
|
||||
)
|
||||
logger.info(
|
||||
"User message appended",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"message_id": message.pk,
|
||||
"content_length": len(message.content),
|
||||
},
|
||||
)
|
||||
|
||||
if conversation.messages.filter(role=Message.Role.USER).count() == 1:
|
||||
conversation.title = build_conversation_title(content)
|
||||
@@ -58,11 +74,20 @@ def append_user_message(conversation: Conversation, content: str) -> Message:
|
||||
def append_assistant_message(conversation: Conversation, content: str) -> Message:
|
||||
"""Appends the deterministic assistant reply."""
|
||||
|
||||
return Message.objects.create(
|
||||
message = Message.objects.create(
|
||||
conversation=conversation,
|
||||
role=Message.Role.ASSISTANT,
|
||||
content=content,
|
||||
)
|
||||
logger.info(
|
||||
"Assistant message appended",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"message_id": message.pk,
|
||||
"content_length": len(content or ""),
|
||||
},
|
||||
)
|
||||
return message
|
||||
|
||||
|
||||
def send_message(conversation: Conversation, content: str) -> tuple[Message, Message]:
|
||||
@@ -88,6 +113,18 @@ def stream_message(conversation: Conversation, content: str):
|
||||
|
||||
user_message = append_user_message(conversation, content)
|
||||
assistant_parts: list[str] = []
|
||||
route = route_message_intent(conversation, content)
|
||||
logger.info(
|
||||
"Stream message started",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"user_message_id": user_message.pk,
|
||||
"route_action": route.action,
|
||||
"route_source": route.source,
|
||||
"route_confidence": route.confidence,
|
||||
"route_reason": route.reason,
|
||||
},
|
||||
)
|
||||
|
||||
yield sse_event(
|
||||
"meta",
|
||||
@@ -99,13 +136,134 @@ def stream_message(conversation: Conversation, content: str):
|
||||
},
|
||||
)
|
||||
|
||||
if route.starts_file_summary and not _has_active_attachments(conversation):
|
||||
reply_content = "请先在当前对话右侧上传需要汇总的文件或压缩包,然后再发送自动汇总指令。"
|
||||
assistant_message = append_assistant_message(conversation, reply_content)
|
||||
yield sse_event("chunk", {"delta": reply_content})
|
||||
yield sse_event(
|
||||
"done",
|
||||
{
|
||||
"assistant_message_id": assistant_message.pk,
|
||||
"conversation_id": conversation.pk,
|
||||
"title": conversation.title,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if route.uses_attachment_reader and not _has_active_attachments(conversation):
|
||||
reply_content = "请先在当前对话右侧上传需要阅读的附件,然后再发送解析或阅读附件指令。"
|
||||
assistant_message = append_assistant_message(conversation, reply_content)
|
||||
yield sse_event("chunk", {"delta": reply_content})
|
||||
yield sse_event(
|
||||
"done",
|
||||
{
|
||||
"assistant_message_id": assistant_message.pk,
|
||||
"conversation_id": conversation.pk,
|
||||
"title": conversation.title,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if route.uses_attachment_reader:
|
||||
attachments = _select_attachments_for_reader(conversation, content)
|
||||
logger.info(
|
||||
"Attachment reader path selected",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"attachment_count": len(attachments),
|
||||
"attachment_ids": [attachment.pk for attachment in attachments],
|
||||
},
|
||||
)
|
||||
result = AttachmentReaderSkill().run_for_attachments(attachments)
|
||||
reply_content = _format_attachment_reader_reply(result.data.get("attachments", []), result.message)
|
||||
assistant_message = append_assistant_message(conversation, reply_content)
|
||||
yield sse_event("chunk", {"delta": reply_content})
|
||||
yield sse_event(
|
||||
"done",
|
||||
{
|
||||
"assistant_message_id": assistant_message.pk,
|
||||
"conversation_id": conversation.pk,
|
||||
"title": conversation.title,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
if route.starts_file_summary:
|
||||
batch = create_file_summary_batch(
|
||||
conversation=conversation,
|
||||
user=conversation.user,
|
||||
trigger_message=user_message,
|
||||
)
|
||||
start_file_summary_workflow(
|
||||
batch,
|
||||
async_run=getattr(settings, "FILE_SUMMARY_ASYNC", True),
|
||||
)
|
||||
reply_content = f"已启动文件目录与页数自动汇总工作流,批次号:{batch.batch_no}。"
|
||||
assistant_message = append_assistant_message(conversation, reply_content)
|
||||
yield sse_event(
|
||||
"workflow_started",
|
||||
{
|
||||
"workflow_type": "file_summary",
|
||||
"batch_id": batch.pk,
|
||||
"batch_no": batch.batch_no,
|
||||
},
|
||||
)
|
||||
yield sse_event("chunk", {"delta": reply_content})
|
||||
yield sse_event(
|
||||
"done",
|
||||
{
|
||||
"assistant_message_id": assistant_message.pk,
|
||||
"conversation_id": conversation.pk,
|
||||
"title": conversation.title,
|
||||
},
|
||||
)
|
||||
return
|
||||
|
||||
stream_failed = False
|
||||
stream_error = ""
|
||||
try:
|
||||
for chunk in stream_reply(conversation, content):
|
||||
assistant_parts.append(chunk)
|
||||
yield sse_event("chunk", {"delta": chunk})
|
||||
except (LLMConfigurationError, LLMRequestError) as exc:
|
||||
stream_failed = True
|
||||
stream_error = str(exc)
|
||||
logger.warning(
|
||||
"LLM stream failed",
|
||||
extra={"conversation_id": conversation.pk, "error": str(exc)},
|
||||
)
|
||||
except Exception as exc:
|
||||
stream_failed = True
|
||||
stream_error = str(exc)
|
||||
logger.exception(
|
||||
"Unexpected stream failure",
|
||||
extra={"conversation_id": conversation.pk, "error": str(exc)},
|
||||
)
|
||||
|
||||
if stream_failed:
|
||||
try:
|
||||
fallback_reply = generate_reply(conversation, content)
|
||||
assistant_parts = [fallback_reply]
|
||||
logger.info(
|
||||
"Non-stream fallback reply succeeded",
|
||||
extra={"conversation_id": conversation.pk, "content_length": len(fallback_reply)},
|
||||
)
|
||||
yield sse_event("replace", {"content": fallback_reply})
|
||||
except (LLMConfigurationError, LLMRequestError) as exc:
|
||||
fallback = f"模型调用失败:{exc}"
|
||||
assistant_parts = [fallback]
|
||||
logger.warning(
|
||||
"Non-stream fallback reply failed",
|
||||
extra={"conversation_id": conversation.pk, "error": str(exc), "stream_error": stream_error},
|
||||
)
|
||||
yield sse_event("error", {"message": fallback})
|
||||
except Exception as exc:
|
||||
fallback = f"回复生成中断:{stream_error or exc}"
|
||||
assistant_parts.append("\n\n" + fallback)
|
||||
logger.exception(
|
||||
"Non-stream fallback crashed",
|
||||
extra={"conversation_id": conversation.pk, "error": str(exc), "stream_error": stream_error},
|
||||
)
|
||||
yield sse_event("error", {"message": fallback})
|
||||
|
||||
assistant_message = append_assistant_message(conversation, "".join(assistant_parts).strip())
|
||||
@@ -133,6 +291,70 @@ def build_conversation_title(content: str) -> str:
|
||||
return normalized[:24]
|
||||
|
||||
|
||||
def _select_attachments_for_reader(conversation: Conversation, content: str):
|
||||
attachments = list(
|
||||
FileAttachment.objects.filter(
|
||||
conversation=conversation,
|
||||
is_active=True,
|
||||
)
|
||||
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||
.order_by("original_name", "-version_no")
|
||||
)
|
||||
matched = [attachment for attachment in attachments if attachment.original_name in content]
|
||||
return matched or attachments
|
||||
|
||||
|
||||
def _has_active_attachments(conversation: Conversation) -> bool:
|
||||
return (
|
||||
FileAttachment.objects.filter(conversation=conversation, is_active=True)
|
||||
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||
.exists()
|
||||
)
|
||||
|
||||
|
||||
def _format_attachment_reader_reply(attachments: list[dict[str, object]], message: str) -> str:
|
||||
if not attachments:
|
||||
return message or "当前对话没有可读取的附件。"
|
||||
|
||||
lines = ["## 附件解析结果"]
|
||||
for item in attachments:
|
||||
status = item.get("status", "")
|
||||
filename = item.get("filename", "")
|
||||
file_type = item.get("file_type", "")
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
f"### {filename}",
|
||||
f"- 类型:{file_type or '未知'}",
|
||||
f"- 状态:{status}",
|
||||
]
|
||||
)
|
||||
if item.get("error_message"):
|
||||
lines.append(f"- 错误:{item['error_message']}")
|
||||
continue
|
||||
|
||||
preview = str(item.get("preview_text") or "").strip()
|
||||
if preview:
|
||||
lines.extend(["", "摘要预览:", "```text", preview, "```"])
|
||||
|
||||
sections = item.get("sections") or []
|
||||
if sections:
|
||||
lines.append("")
|
||||
lines.append("结构详情:")
|
||||
for section in sections[:8]:
|
||||
if not isinstance(section, dict):
|
||||
continue
|
||||
section_type = section.get("type", "section")
|
||||
name = section.get("name", "")
|
||||
extra = ""
|
||||
if "row_count" in section:
|
||||
extra = f",{section['row_count']} 行"
|
||||
if "column_count" in section:
|
||||
extra += f",{section['column_count']} 列"
|
||||
lines.append(f"- {name}({section_type}{extra})")
|
||||
return "\n".join(lines).strip()
|
||||
|
||||
|
||||
def sse_event(event_name: str, payload: dict[str, object]) -> str:
|
||||
"""Formats one server-sent event frame."""
|
||||
|
||||
|
||||
189
review_agent/skill_router.py
Normal file
189
review_agent/skill_router.py
Normal file
@@ -0,0 +1,189 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from .file_summary.workflow_trigger import (
|
||||
evaluate_attachment_reader_trigger,
|
||||
evaluate_file_summary_trigger,
|
||||
)
|
||||
from .llm import LLMConfigurationError, LLMRequestError, generate_completion
|
||||
from .models import Conversation, FileAttachment
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
ROUTE_ACTIONS = {"normal_chat", "attachment_reader", "file_summary"}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SkillRoute:
|
||||
action: str
|
||||
skill_name: str = ""
|
||||
workflow_type: str = ""
|
||||
confidence: float = 0.0
|
||||
reason: str = ""
|
||||
source: str = "llm"
|
||||
|
||||
@property
|
||||
def uses_attachment_reader(self) -> bool:
|
||||
return self.action == "attachment_reader"
|
||||
|
||||
@property
|
||||
def starts_file_summary(self) -> bool:
|
||||
return self.action == "file_summary"
|
||||
|
||||
@property
|
||||
def is_normal_chat(self) -> bool:
|
||||
return self.action == "normal_chat"
|
||||
|
||||
|
||||
def route_message_intent(conversation: Conversation, content: str) -> SkillRoute:
|
||||
attachments = list(_active_attachments(conversation))
|
||||
try:
|
||||
route = _route_with_llm(conversation, content, attachments)
|
||||
logger.info(
|
||||
"LLM skill route selected",
|
||||
extra={
|
||||
"conversation_id": conversation.pk,
|
||||
"action": route.action,
|
||||
"skill_name": route.skill_name,
|
||||
"workflow_type": route.workflow_type,
|
||||
"confidence": route.confidence,
|
||||
"route_source": route.source,
|
||||
"reason": route.reason,
|
||||
},
|
||||
)
|
||||
return route
|
||||
except (LLMConfigurationError, LLMRequestError, ValueError, json.JSONDecodeError) as exc:
|
||||
logger.warning(
|
||||
"LLM skill route failed, fallback to rules",
|
||||
extra={"conversation_id": conversation.pk, "error": str(exc)},
|
||||
)
|
||||
return _route_with_rules(conversation, content)
|
||||
|
||||
|
||||
def _route_with_llm(
|
||||
conversation: Conversation,
|
||||
content: str,
|
||||
attachments: list[FileAttachment],
|
||||
) -> SkillRoute:
|
||||
raw = generate_completion(
|
||||
[
|
||||
{"role": "system", "content": _router_system_prompt()},
|
||||
{
|
||||
"role": "user",
|
||||
"content": _router_user_prompt(
|
||||
user_message=content,
|
||||
attachments=attachments,
|
||||
),
|
||||
},
|
||||
],
|
||||
temperature=0.0,
|
||||
)
|
||||
payload = _parse_json_object(raw)
|
||||
action = str(payload.get("action", "normal_chat")).strip()
|
||||
if action not in ROUTE_ACTIONS:
|
||||
raise ValueError(f"不支持的路由动作:{action}")
|
||||
|
||||
if action in {"attachment_reader", "file_summary"} and not attachments:
|
||||
return SkillRoute(
|
||||
action=action,
|
||||
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
||||
workflow_type="file_summary" if action == "file_summary" else "",
|
||||
confidence=_float_or_zero(payload.get("confidence")),
|
||||
reason=str(payload.get("reason") or "LLM 判断需要附件,但当前无附件。"),
|
||||
source="llm_missing_attachment",
|
||||
)
|
||||
|
||||
return SkillRoute(
|
||||
action=action,
|
||||
skill_name="attachment_reader" if action == "attachment_reader" else "",
|
||||
workflow_type="file_summary" if action == "file_summary" else "",
|
||||
confidence=_float_or_zero(payload.get("confidence")),
|
||||
reason=str(payload.get("reason") or ""),
|
||||
source="llm",
|
||||
)
|
||||
|
||||
|
||||
def _route_with_rules(conversation: Conversation, content: str) -> SkillRoute:
|
||||
file_summary = evaluate_file_summary_trigger(conversation, content)
|
||||
if file_summary.should_start or file_summary.reason == "missing_attachment":
|
||||
return SkillRoute(
|
||||
action="file_summary",
|
||||
workflow_type="file_summary",
|
||||
confidence=0.5,
|
||||
reason=file_summary.reason,
|
||||
source="rule_fallback",
|
||||
)
|
||||
|
||||
attachment_reader = evaluate_attachment_reader_trigger(conversation, content)
|
||||
if attachment_reader.should_start or attachment_reader.reason == "missing_attachment":
|
||||
return SkillRoute(
|
||||
action="attachment_reader",
|
||||
skill_name="attachment_reader",
|
||||
confidence=0.5,
|
||||
reason=attachment_reader.reason,
|
||||
source="rule_fallback",
|
||||
)
|
||||
|
||||
return SkillRoute(
|
||||
action="normal_chat",
|
||||
confidence=0.5,
|
||||
reason="未匹配到需要调用 Skill 或工作流的意图。",
|
||||
source="rule_fallback",
|
||||
)
|
||||
|
||||
|
||||
def _active_attachments(conversation: Conversation):
|
||||
return (
|
||||
FileAttachment.objects.filter(conversation=conversation, is_active=True)
|
||||
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
|
||||
.order_by("original_name", "-version_no")
|
||||
)
|
||||
|
||||
|
||||
def _router_system_prompt() -> str:
|
||||
return (
|
||||
"你是审核智能体的工具路由器,只判断是否需要调用工具,不直接回答用户。"
|
||||
"你必须只输出 JSON 对象,不要输出 Markdown。"
|
||||
"可选 action:normal_chat、attachment_reader、file_summary。"
|
||||
"attachment_reader 用于用户要求阅读、提取、分析、总结、查看上传附件内容。"
|
||||
"file_summary 用于用户要求自动汇总文件目录、页数、清单或生成目录页数报告。"
|
||||
"normal_chat 用于不需要读取附件或执行工作流的一般问答。"
|
||||
"输出字段:action、confidence、reason。"
|
||||
)
|
||||
|
||||
|
||||
def _router_user_prompt(*, user_message: str, attachments: list[FileAttachment]) -> str:
|
||||
attachment_lines = [
|
||||
f"- id={attachment.pk}, name={attachment.original_name}, active={attachment.is_active}, status={attachment.upload_status}"
|
||||
for attachment in attachments
|
||||
]
|
||||
attachment_text = "\n".join(attachment_lines) if attachment_lines else "无 active 附件"
|
||||
return (
|
||||
f"用户消息:{user_message}\n\n"
|
||||
f"当前 active 附件:\n{attachment_text}\n\n"
|
||||
"请判断应调用哪个 action。只输出 JSON。"
|
||||
)
|
||||
|
||||
|
||||
def _parse_json_object(raw: str) -> dict:
|
||||
text = (raw or "").strip()
|
||||
if text.startswith("```"):
|
||||
text = text.strip("`").strip()
|
||||
if text.lower().startswith("json"):
|
||||
text = text[4:].strip()
|
||||
start = text.find("{")
|
||||
end = text.rfind("}")
|
||||
if start == -1 or end == -1 or end < start:
|
||||
raise json.JSONDecodeError("未找到 JSON 对象", text, 0)
|
||||
return json.loads(text[start : end + 1])
|
||||
|
||||
|
||||
def _float_or_zero(value) -> float:
|
||||
try:
|
||||
return float(value)
|
||||
except (TypeError, ValueError):
|
||||
return 0.0
|
||||
61
review_agent/urls.py
Normal file
61
review_agent/urls.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from django.urls import path
|
||||
|
||||
from .file_summary.views import (
|
||||
attachment_download,
|
||||
attachment_detail,
|
||||
attachments,
|
||||
batch_events,
|
||||
batch_status,
|
||||
conversation_list,
|
||||
conversation_messages,
|
||||
export_download,
|
||||
)
|
||||
|
||||
|
||||
urlpatterns = [
|
||||
path(
|
||||
"api/review-agent/conversations/",
|
||||
conversation_list,
|
||||
name="review_agent_conversation_list",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/conversations/<int:conversation_id>/attachments/",
|
||||
attachments,
|
||||
name="file_summary_attachment_upload",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/conversations/<int:conversation_id>/attachments/",
|
||||
attachments,
|
||||
name="file_summary_attachment_list",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/conversations/<int:conversation_id>/attachments/<int:attachment_id>/",
|
||||
attachment_detail,
|
||||
name="file_summary_attachment_detail",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/conversations/<int:conversation_id>/attachments/<int:attachment_id>/download/",
|
||||
attachment_download,
|
||||
name="file_summary_attachment_download",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/conversations/<int:conversation_id>/messages/",
|
||||
conversation_messages,
|
||||
name="review_agent_conversation_messages",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/file-summary/<int:batch_id>/status/",
|
||||
batch_status,
|
||||
name="file_summary_batch_status",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/file-summary/<int:batch_id>/events/",
|
||||
batch_events,
|
||||
name="file_summary_batch_events",
|
||||
),
|
||||
path(
|
||||
"api/review-agent/file-summary/exports/<int:export_id>/download/",
|
||||
export_download,
|
||||
name="file_summary_export_download",
|
||||
),
|
||||
]
|
||||
@@ -1,4 +1,5 @@
|
||||
from django.contrib.auth.decorators import login_required
|
||||
from django.db.models import Count, Q
|
||||
from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
|
||||
from django.shortcuts import redirect, render
|
||||
from django.views.decorators.http import require_http_methods
|
||||
@@ -10,6 +11,7 @@ from .services import (
|
||||
send_message,
|
||||
stream_message,
|
||||
)
|
||||
from .models import Conversation, FileAttachment, FileSummaryBatch
|
||||
|
||||
|
||||
@login_required
|
||||
@@ -49,6 +51,40 @@ def workspace(request: HttpRequest) -> HttpResponse:
|
||||
"conversations": conversations,
|
||||
"current_conversation": current,
|
||||
"messages": current.messages.all() if current else [],
|
||||
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
|
||||
"summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
@require_http_methods(["GET"])
|
||||
def attachment_manager(request: HttpRequest) -> HttpResponse:
|
||||
conversations = (
|
||||
Conversation.objects.filter(user=request.user)
|
||||
.annotate(
|
||||
attachment_count=Count(
|
||||
"file_attachments",
|
||||
filter=~Q(file_attachments__upload_status=FileAttachment.UploadStatus.DELETED),
|
||||
)
|
||||
)
|
||||
.order_by("-updated_at", "-id")
|
||||
)
|
||||
selected = get_conversation_for_user(request.user, request.GET.get("conversation"))
|
||||
attachments = (
|
||||
FileAttachment.objects.filter(conversation=selected)
|
||||
.order_by("original_name", "-version_no")
|
||||
if selected
|
||||
else []
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
"attachment_manager.html",
|
||||
{
|
||||
"page_title": "附件管理",
|
||||
"conversations": conversations,
|
||||
"selected_conversation": selected,
|
||||
"attachments": attachments,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -125,10 +125,20 @@ input:focus {
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.app-shell {
|
||||
display: grid;
|
||||
grid-template-rows: 60px minmax(0, 1fr);
|
||||
height: 100vh;
|
||||
min-height: 0;
|
||||
background: var(--bg);
|
||||
}
|
||||
|
||||
.workspace {
|
||||
display: grid;
|
||||
grid-template-columns: 296px minmax(0, 1fr);
|
||||
min-height: 100vh;
|
||||
grid-template-columns: 296px minmax(0, 1fr) 340px;
|
||||
min-height: 0;
|
||||
height: 100%;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.sidebar {
|
||||
@@ -136,6 +146,8 @@ input:focus {
|
||||
flex-direction: column;
|
||||
gap: 24px;
|
||||
padding: 18px;
|
||||
min-height: 0;
|
||||
overflow-y: auto;
|
||||
background: linear-gradient(180deg, var(--sidebar) 0%, var(--sidebar-strong) 100%);
|
||||
border-right: 1px solid var(--line);
|
||||
transition: width 180ms ease, padding 180ms ease, transform 180ms ease;
|
||||
@@ -146,6 +158,12 @@ input:focus {
|
||||
gap: 14px;
|
||||
}
|
||||
|
||||
.sidebar-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.brand {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
@@ -310,8 +328,9 @@ input:focus {
|
||||
|
||||
.chat-shell {
|
||||
display: grid;
|
||||
grid-template-rows: auto minmax(0, 1fr);
|
||||
grid-template-rows: minmax(0, 1fr);
|
||||
min-width: 0;
|
||||
min-height: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
@@ -322,6 +341,8 @@ input:focus {
|
||||
gap: 16px;
|
||||
padding: 0 24px;
|
||||
min-height: 60px;
|
||||
position: relative;
|
||||
z-index: 30;
|
||||
border-bottom: 1px solid var(--line);
|
||||
background: #ffffff;
|
||||
}
|
||||
@@ -346,6 +367,8 @@ input:focus {
|
||||
}
|
||||
|
||||
.tab {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
height: 60px;
|
||||
padding: 0 20px;
|
||||
border: 0;
|
||||
@@ -355,6 +378,7 @@ input:focus {
|
||||
font: inherit;
|
||||
font-weight: 600;
|
||||
border-bottom: 2px solid transparent;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.tab.active {
|
||||
@@ -470,7 +494,7 @@ input:focus {
|
||||
display: grid;
|
||||
grid-template-rows: minmax(0, 1fr) auto;
|
||||
min-height: 0;
|
||||
height: calc(100vh - 60px);
|
||||
height: 100%;
|
||||
background: #ffffff;
|
||||
overflow: hidden;
|
||||
}
|
||||
@@ -560,10 +584,40 @@ input:focus {
|
||||
border-radius: 18px;
|
||||
background: #f8fbff;
|
||||
line-height: 1.7;
|
||||
min-width: 0;
|
||||
max-width: 100%;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.message-bubble p {
|
||||
.message-bubble p,
|
||||
.message-content p {
|
||||
margin: 0;
|
||||
line-height: 1.8;
|
||||
min-width: 0;
|
||||
max-width: 100%;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.message-content {
|
||||
display: grid;
|
||||
gap: 14px;
|
||||
line-height: 1.8;
|
||||
min-width: 0;
|
||||
max-width: 100%;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.message-content a {
|
||||
color: var(--accent);
|
||||
font-weight: 700;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.message-content a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.message-bubble.streaming {
|
||||
@@ -737,7 +791,7 @@ input:focus {
|
||||
}
|
||||
|
||||
.workspace[data-sidebar-state="collapsed"] {
|
||||
grid-template-columns: 88px minmax(0, 1fr);
|
||||
grid-template-columns: 88px minmax(0, 1fr) 340px;
|
||||
}
|
||||
|
||||
.workspace[data-sidebar-state="collapsed"] .brand-text,
|
||||
@@ -760,14 +814,321 @@ input:focus {
|
||||
padding-right: 12px;
|
||||
}
|
||||
|
||||
.workspace[data-sidebar-state="collapsed"] .sidebar-header {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.workspace[data-sidebar-state="collapsed"] .brand {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.summary-panel {
|
||||
display: grid;
|
||||
grid-template-rows: auto auto minmax(0, 1fr);
|
||||
gap: 14px;
|
||||
min-width: 0;
|
||||
max-height: 100%;
|
||||
padding: 16px;
|
||||
overflow: auto;
|
||||
border-left: 1px solid var(--line);
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
.summary-section {
|
||||
display: grid;
|
||||
gap: 12px;
|
||||
padding: 14px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: var(--panel-soft);
|
||||
}
|
||||
|
||||
.summary-heading,
|
||||
.summary-subheading,
|
||||
.workflow-card header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.summary-heading h2,
|
||||
.summary-subheading h3 {
|
||||
margin: 0;
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
.summary-heading span {
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.upload-dropzone {
|
||||
display: grid;
|
||||
place-items: center;
|
||||
gap: 6px;
|
||||
min-height: 112px;
|
||||
padding: 18px;
|
||||
border: 1px dashed var(--accent);
|
||||
border-radius: 8px;
|
||||
background: #f5f9ff;
|
||||
color: var(--text);
|
||||
cursor: pointer;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.upload-dropzone.dragging {
|
||||
border-color: var(--accent-dark);
|
||||
background: #eaf2ff;
|
||||
}
|
||||
|
||||
.upload-dropzone span,
|
||||
.upload-status,
|
||||
.attachment-item span,
|
||||
.workflow-card em,
|
||||
.workflow-card small,
|
||||
.workflow-error {
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.attachment-manager-link {
|
||||
display: inline-grid;
|
||||
place-items: center;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 999px;
|
||||
color: var(--accent);
|
||||
text-decoration: none;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.attachment-manager-link:hover {
|
||||
border-color: var(--accent);
|
||||
background: #eaf2ff;
|
||||
}
|
||||
|
||||
.upload-status {
|
||||
margin: 0;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.attachment-list,
|
||||
.workflow-card-list {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.attachment-item,
|
||||
.workflow-card {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
padding: 12px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
.attachment-item {
|
||||
grid-template-columns: minmax(0, 1fr) auto;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.attachment-item strong,
|
||||
.workflow-card strong {
|
||||
display: block;
|
||||
overflow-wrap: anywhere;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.attachment-item em,
|
||||
.workflow-status {
|
||||
padding: 3px 8px;
|
||||
border-radius: 999px;
|
||||
background: #eaf2ff;
|
||||
color: var(--accent);
|
||||
font-size: 11px;
|
||||
font-style: normal;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.workflow-card ol {
|
||||
display: grid;
|
||||
gap: 8px;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
list-style: none;
|
||||
}
|
||||
|
||||
.workflow-batch-carousel {
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.workflow-batch-carousel .workflow-card {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.workflow-batch-carousel .workflow-card.active {
|
||||
display: grid;
|
||||
}
|
||||
|
||||
.workflow-batch-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 8px;
|
||||
min-height: 30px;
|
||||
}
|
||||
|
||||
.workflow-batch-btn {
|
||||
display: inline-grid;
|
||||
place-items: center;
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 999px;
|
||||
background: #ffffff;
|
||||
color: var(--text);
|
||||
cursor: pointer;
|
||||
font-size: 18px;
|
||||
line-height: 1;
|
||||
}
|
||||
|
||||
.workflow-batch-btn:hover {
|
||||
border-color: var(--accent);
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
.workflow-batch-dots {
|
||||
display: flex;
|
||||
flex: 1;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: 6px;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.workflow-batch-dot {
|
||||
width: 7px;
|
||||
height: 7px;
|
||||
padding: 0;
|
||||
border: 0;
|
||||
border-radius: 999px;
|
||||
background: #cbd5e1;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.workflow-batch-dot.active {
|
||||
width: 18px;
|
||||
background: var(--accent);
|
||||
}
|
||||
|
||||
.node-status {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 10px;
|
||||
padding: 8px 0;
|
||||
border-top: 1px solid var(--line);
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.node-status div {
|
||||
display: grid;
|
||||
min-width: 0;
|
||||
gap: 2px;
|
||||
}
|
||||
|
||||
.node-status span,
|
||||
.node-status small,
|
||||
.workflow-error {
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.workflow-error {
|
||||
margin: 0;
|
||||
padding: 8px 10px;
|
||||
border-radius: 6px;
|
||||
background: #fff1f0;
|
||||
color: #b42318;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.status-running,
|
||||
.status-retrying {
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
.status-success {
|
||||
color: #047857;
|
||||
}
|
||||
|
||||
.status-failed {
|
||||
color: var(--danger-text);
|
||||
}
|
||||
|
||||
.panel-empty {
|
||||
padding: 14px;
|
||||
border: 1px dashed var(--line);
|
||||
border-radius: 8px;
|
||||
color: var(--muted);
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.message-bubble table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 13px;
|
||||
line-height: 1.6;
|
||||
table-layout: fixed;
|
||||
}
|
||||
|
||||
.message-bubble th,
|
||||
.message-bubble td {
|
||||
padding: 8px;
|
||||
border: 1px solid var(--line);
|
||||
text-align: left;
|
||||
vertical-align: top;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.message-bubble pre {
|
||||
max-width: 100%;
|
||||
overflow-x: auto;
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: anywhere;
|
||||
}
|
||||
|
||||
.message-bubble code {
|
||||
white-space: pre-wrap;
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
@media (max-width: 980px) {
|
||||
.app-body {
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.app-shell {
|
||||
grid-template-rows: 60px auto;
|
||||
height: auto;
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.workspace {
|
||||
grid-template-columns: minmax(0, 1fr);
|
||||
height: auto;
|
||||
min-height: 0;
|
||||
overflow: visible;
|
||||
}
|
||||
|
||||
.sidebar {
|
||||
position: fixed;
|
||||
inset: 0 auto 0 0;
|
||||
inset: 60px auto 0 0;
|
||||
width: 280px;
|
||||
z-index: 20;
|
||||
box-shadow: var(--shadow);
|
||||
@@ -786,10 +1147,6 @@ input:focus {
|
||||
display: inline-flex;
|
||||
}
|
||||
|
||||
.sidebar-toggle {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.topbar,
|
||||
.chat-scroll,
|
||||
.composer-wrap {
|
||||
@@ -798,16 +1155,22 @@ input:focus {
|
||||
}
|
||||
|
||||
.topbar {
|
||||
align-items: flex-start;
|
||||
flex-direction: column;
|
||||
min-height: auto;
|
||||
padding-top: 12px;
|
||||
align-items: center;
|
||||
flex-direction: row;
|
||||
min-height: 60px;
|
||||
padding-top: 0;
|
||||
padding-bottom: 0;
|
||||
}
|
||||
|
||||
.topbar-left {
|
||||
flex: 1 1 auto;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.topbar-right {
|
||||
width: 100%;
|
||||
justify-content: space-between;
|
||||
flex: 0 0 auto;
|
||||
width: auto;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
|
||||
.conversation-header {
|
||||
@@ -815,7 +1178,14 @@ input:focus {
|
||||
}
|
||||
|
||||
.chat-stage {
|
||||
height: calc(100vh - 88px);
|
||||
min-height: calc(100vh - 60px);
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.summary-panel {
|
||||
max-height: none;
|
||||
border-left: 0;
|
||||
border-top: 1px solid var(--line);
|
||||
}
|
||||
|
||||
.chat-scroll {
|
||||
@@ -827,6 +1197,215 @@ input:focus {
|
||||
}
|
||||
}
|
||||
|
||||
.attachment-manager-page {
|
||||
display: grid;
|
||||
align-content: start;
|
||||
gap: 12px;
|
||||
min-height: 0;
|
||||
height: calc(100vh - 60px);
|
||||
overflow-y: auto;
|
||||
padding: 16px 24px 20px;
|
||||
background: var(--bg);
|
||||
}
|
||||
|
||||
.attachment-manager-hero,
|
||||
.attachment-manager-panel,
|
||||
.attachment-manager-content {
|
||||
width: min(1440px, 100%);
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.attachment-manager-hero {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
gap: 16px;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.attachment-manager-hero h1 {
|
||||
margin: 2px 0;
|
||||
font-size: 22px;
|
||||
}
|
||||
|
||||
.attachment-manager-hero p {
|
||||
margin: 0;
|
||||
color: var(--muted);
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.attachment-manager-toolbar {
|
||||
min-height: 66px;
|
||||
}
|
||||
|
||||
.attachment-manager-selectbar {
|
||||
display: grid;
|
||||
grid-template-columns: auto minmax(420px, 680px) auto;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
min-width: min(900px, 60vw);
|
||||
}
|
||||
|
||||
.attachment-manager-selectbar label {
|
||||
color: var(--muted);
|
||||
font-size: 13px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.return-chat-link {
|
||||
padding: 8px 12px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
color: var(--accent);
|
||||
text-decoration: none;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.attachment-manager-panel {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
padding: 12px 14px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
.attachment-manager-panel label {
|
||||
color: var(--muted);
|
||||
font-size: 13px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.attachment-manager-panel select,
|
||||
.attachment-search {
|
||||
min-height: 34px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: #ffffff;
|
||||
color: var(--text);
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
.attachment-manager-panel select,
|
||||
.attachment-manager-select-control {
|
||||
width: 100%;
|
||||
height: 38px;
|
||||
padding: 0 12px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 8px;
|
||||
background: #ffffff;
|
||||
color: var(--text);
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
.attachment-manager-select-control:focus {
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 3px rgba(58, 114, 216, 0.14);
|
||||
outline: none;
|
||||
}
|
||||
|
||||
.attachment-manager-content {
|
||||
display: grid;
|
||||
gap: 12px;
|
||||
}
|
||||
|
||||
.attachment-manager-split {
|
||||
grid-template-columns: minmax(280px, 360px) minmax(0, 1fr);
|
||||
align-items: start;
|
||||
}
|
||||
|
||||
.attachment-search {
|
||||
width: 220px;
|
||||
padding: 0 10px;
|
||||
}
|
||||
|
||||
.manager-upload-dropzone {
|
||||
min-height: 132px;
|
||||
padding: 14px;
|
||||
}
|
||||
|
||||
.upload-manager-panel .summary-subheading span {
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
}
|
||||
|
||||
.attachment-table-wrap {
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.attachment-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 13px;
|
||||
}
|
||||
|
||||
.attachment-table th,
|
||||
.attachment-table td {
|
||||
padding: 10px 8px;
|
||||
border-bottom: 1px solid var(--line);
|
||||
text-align: left;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.attachment-table th {
|
||||
color: var(--muted);
|
||||
font-size: 12px;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.attachment-name {
|
||||
max-width: 360px;
|
||||
overflow-wrap: anywhere;
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.attachment-actions {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.attachment-actions a,
|
||||
.attachment-actions button {
|
||||
min-height: 28px;
|
||||
padding: 4px 8px;
|
||||
border: 1px solid var(--line);
|
||||
border-radius: 6px;
|
||||
background: #ffffff;
|
||||
color: var(--accent);
|
||||
cursor: pointer;
|
||||
font: inherit;
|
||||
font-size: 12px;
|
||||
font-weight: 700;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.attachment-actions a:hover,
|
||||
.attachment-actions button:hover {
|
||||
border-color: var(--accent);
|
||||
background: #eaf2ff;
|
||||
}
|
||||
|
||||
.table-empty,
|
||||
.attachment-manager-empty {
|
||||
color: var(--muted);
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.attachment-manager-empty {
|
||||
min-height: 150px;
|
||||
place-content: center;
|
||||
}
|
||||
|
||||
.attachment-manager-empty h2 {
|
||||
margin: 0;
|
||||
font-size: 18px;
|
||||
}
|
||||
|
||||
.attachment-manager-empty p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
@media (max-width: 640px) {
|
||||
.tabbar {
|
||||
overflow-x: auto;
|
||||
@@ -876,7 +1455,8 @@ input:focus {
|
||||
}
|
||||
|
||||
.chat-stage {
|
||||
height: calc(100vh - 126px);
|
||||
min-height: calc(100vh - 60px);
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.chat-scroll {
|
||||
@@ -893,6 +1473,30 @@ input:focus {
|
||||
width: 10px;
|
||||
height: 10px;
|
||||
}
|
||||
|
||||
.attachment-manager-page {
|
||||
height: auto;
|
||||
min-height: calc(100vh - 60px);
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.attachment-manager-hero {
|
||||
align-items: stretch;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.attachment-manager-selectbar {
|
||||
grid-template-columns: 1fr;
|
||||
min-width: 0;
|
||||
}
|
||||
|
||||
.attachment-manager-split {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.attachment-search {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes pulse-caret {
|
||||
|
||||
572
static/js/app.js
572
static/js/app.js
@@ -11,7 +11,16 @@
|
||||
var sendButton = document.getElementById("sendButton");
|
||||
var conversationIdInput = document.getElementById("conversationIdInput");
|
||||
var chatStage = document.querySelector(".chat-stage");
|
||||
var summaryPanel = document.getElementById("summaryPanel");
|
||||
var uploadDropzone = document.getElementById("uploadDropzone");
|
||||
var attachmentInput = document.getElementById("attachmentInput");
|
||||
var attachmentList = document.getElementById("attachmentList");
|
||||
var uploadStatus = document.getElementById("uploadStatus");
|
||||
var workflowCardList = document.getElementById("workflowCardList");
|
||||
var nodeAnchors = [];
|
||||
var workflowPollingTimers = {};
|
||||
var WORKFLOW_POLL_INTERVAL_MS = 1500;
|
||||
var latestMessageId = 0;
|
||||
|
||||
if (!workspace) {
|
||||
return;
|
||||
@@ -32,7 +41,7 @@
|
||||
|
||||
function syncSidebarState() {
|
||||
if (isMobile()) {
|
||||
if (workspace.getAttribute("data-sidebar-state") === "collapsed") {
|
||||
if (workspace.getAttribute("data-sidebar-state") !== "closed") {
|
||||
workspace.setAttribute("data-sidebar-state", "closed");
|
||||
}
|
||||
} else if (workspace.getAttribute("data-sidebar-state") === "closed") {
|
||||
@@ -44,6 +53,15 @@
|
||||
nodeAnchors = Array.prototype.slice.call(document.querySelectorAll(".node-anchor"));
|
||||
}
|
||||
|
||||
function syncLatestMessageIdFromDom() {
|
||||
document.querySelectorAll(".message[data-message-id]").forEach(function (message) {
|
||||
var id = parseInt(message.getAttribute("data-message-id"), 10);
|
||||
if (!Number.isNaN(id)) {
|
||||
latestMessageId = Math.max(latestMessageId, id);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (sidebarToggle) {
|
||||
sidebarToggle.addEventListener("click", toggleSidebar);
|
||||
}
|
||||
@@ -147,6 +165,112 @@
|
||||
return escapeHtml(text).replace(/\n/g, "<br>");
|
||||
}
|
||||
|
||||
function renderInlineMarkdown(text) {
|
||||
return escapeHtml(text || "").replace(/\[([^\]]+)\]\(([^)]+)\)/g, function (_match, label, href) {
|
||||
var safeHref = escapeHtml(href);
|
||||
var safeLabel = escapeHtml(label);
|
||||
if (!/^\/[^/\\]/.test(href) && !/^https?:\/\//.test(href)) {
|
||||
return safeLabel;
|
||||
}
|
||||
return '<a href="' + safeHref + '" target="_blank" rel="noopener">' + safeLabel + "</a>";
|
||||
});
|
||||
}
|
||||
|
||||
function renderMarkdownTable(lines, startIndex) {
|
||||
var header = lines[startIndex].trim();
|
||||
var separator = lines[startIndex + 1] ? lines[startIndex + 1].trim() : "";
|
||||
if (header.charAt(0) !== "|" || separator.indexOf("---") === -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function cells(line) {
|
||||
return line
|
||||
.trim()
|
||||
.replace(/^\|/, "")
|
||||
.replace(/\|$/, "")
|
||||
.split("|")
|
||||
.map(function (cell) {
|
||||
return cell.trim();
|
||||
});
|
||||
}
|
||||
|
||||
var html = "<table><thead><tr>";
|
||||
cells(header).forEach(function (cell) {
|
||||
html += "<th>" + renderInlineMarkdown(cell) + "</th>";
|
||||
});
|
||||
html += "</tr></thead><tbody>";
|
||||
|
||||
var index = startIndex + 2;
|
||||
while (index < lines.length && lines[index].trim().charAt(0) === "|") {
|
||||
html += "<tr>";
|
||||
cells(lines[index]).forEach(function (cell) {
|
||||
html += "<td>" + renderInlineMarkdown(cell || "-") + "</td>";
|
||||
});
|
||||
html += "</tr>";
|
||||
index += 1;
|
||||
}
|
||||
html += "</tbody></table>";
|
||||
return { html: html, nextIndex: index };
|
||||
}
|
||||
|
||||
function renderBasicMarkdown(text) {
|
||||
var lines = (text || "").split(/\r?\n/);
|
||||
var html = "";
|
||||
var paragraph = [];
|
||||
var index = 0;
|
||||
|
||||
function flushParagraph() {
|
||||
if (!paragraph.length) {
|
||||
return;
|
||||
}
|
||||
html += "<p>" + renderInlineMarkdown(paragraph.join("\n")).replace(/\n/g, "<br>") + "</p>";
|
||||
paragraph = [];
|
||||
}
|
||||
|
||||
while (index < lines.length) {
|
||||
var line = lines[index];
|
||||
var table = renderMarkdownTable(lines, index);
|
||||
if (table) {
|
||||
flushParagraph();
|
||||
html += table.html;
|
||||
index = table.nextIndex;
|
||||
continue;
|
||||
}
|
||||
if (!line.trim()) {
|
||||
flushParagraph();
|
||||
} else {
|
||||
paragraph.push(line);
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
flushParagraph();
|
||||
return html;
|
||||
}
|
||||
|
||||
function renderAssistantContent(text) {
|
||||
try {
|
||||
if (window.marked && window.DOMPurify) {
|
||||
return window.DOMPurify.sanitize(window.marked.parse(text || ""));
|
||||
}
|
||||
return renderBasicMarkdown(text || "");
|
||||
} catch (error) {
|
||||
console.error("Markdown render failed", error);
|
||||
return nl2br(text || "");
|
||||
}
|
||||
}
|
||||
|
||||
function renderExistingAssistantMessages() {
|
||||
document.querySelectorAll(".message.assistant .message-bubble").forEach(function (bubble) {
|
||||
var target = bubble.querySelector(".markdown-content");
|
||||
var raw = bubble.querySelector(".message-raw");
|
||||
if (!target || !raw || target.dataset.rendered === "true") {
|
||||
return;
|
||||
}
|
||||
target.innerHTML = renderAssistantContent(raw.content ? raw.content.textContent : raw.textContent);
|
||||
target.dataset.rendered = "true";
|
||||
});
|
||||
}
|
||||
|
||||
function scrollChatToBottom() {
|
||||
if (chatScroll) {
|
||||
chatScroll.scrollTop = chatScroll.scrollHeight;
|
||||
@@ -157,6 +281,9 @@
|
||||
var article = document.createElement("article");
|
||||
article.className = "message " + role;
|
||||
article.id = messageId;
|
||||
if (typeof messageId === "number") {
|
||||
article.setAttribute("data-message-id", messageId);
|
||||
}
|
||||
if (label) {
|
||||
article.setAttribute("data-node-label", label);
|
||||
}
|
||||
@@ -168,8 +295,11 @@
|
||||
var bubble = document.createElement("div");
|
||||
bubble.className = "message-bubble";
|
||||
|
||||
var text = document.createElement("p");
|
||||
text.innerHTML = nl2br(content);
|
||||
var text = document.createElement(role === "assistant" ? "div" : "p");
|
||||
if (role === "assistant") {
|
||||
text.className = "message-content markdown-content";
|
||||
}
|
||||
text.innerHTML = role === "assistant" ? renderAssistantContent(content) : nl2br(content);
|
||||
bubble.appendChild(text);
|
||||
|
||||
article.appendChild(avatar);
|
||||
@@ -178,6 +308,48 @@
|
||||
return { article: article, bubble: bubble, text: text };
|
||||
}
|
||||
|
||||
function appendConversationMessage(message) {
|
||||
if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) {
|
||||
return;
|
||||
}
|
||||
var label = message.role === "assistant" ? "AI " : "用户 ";
|
||||
label += document.querySelectorAll(".message").length + 1;
|
||||
var created = createMessage(message.role, message.content || "", "message-" + message.id, label);
|
||||
created.article.setAttribute("data-message-id", message.id);
|
||||
latestMessageId = Math.max(latestMessageId, message.id);
|
||||
if (message.role === "user") {
|
||||
appendNode(created.article.id, label, true);
|
||||
}
|
||||
}
|
||||
|
||||
async function refreshConversationMessages() {
|
||||
var conversationId = currentConversationId();
|
||||
if (!conversationId || !summaryPanel) {
|
||||
return;
|
||||
}
|
||||
var url = templateUrl("data-message-url-template", "__conversation_id__", conversationId);
|
||||
if (!url) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
var response = await fetch(url + "?after=" + latestMessageId, { cache: "no-store" });
|
||||
if (!response.ok) {
|
||||
return;
|
||||
}
|
||||
var payload = await response.json();
|
||||
(payload.messages || []).forEach(appendConversationMessage);
|
||||
if (payload.latest_message_id) {
|
||||
latestMessageId = Math.max(latestMessageId, payload.latest_message_id);
|
||||
}
|
||||
syncNodeRailVisibility();
|
||||
bindNodeAnchorClicks();
|
||||
setActiveNode();
|
||||
scrollChatToBottom();
|
||||
} catch (error) {
|
||||
console.error("Conversation message refresh failed", error);
|
||||
}
|
||||
}
|
||||
|
||||
function appendNode(targetId, title, isLatest) {
|
||||
if (!nodeRail) {
|
||||
return;
|
||||
@@ -203,7 +375,7 @@
|
||||
return;
|
||||
}
|
||||
var encodedTitle = title;
|
||||
var existing = document.querySelector('.history-item[href*="conversation=' + conversationId + '"]');
|
||||
var existing = document.querySelector('.history-item[data-conversation-id="' + conversationId + '"]');
|
||||
var list = document.querySelector(".history-list");
|
||||
var currentTime = new Date();
|
||||
var month = String(currentTime.getMonth() + 1).padStart(2, "0");
|
||||
@@ -237,6 +409,7 @@
|
||||
|
||||
var item = document.createElement("a");
|
||||
item.className = "history-item active";
|
||||
item.setAttribute("data-conversation-id", conversationId);
|
||||
item.href = "/?conversation=" + conversationId;
|
||||
item.innerHTML =
|
||||
'<span class="history-title">' +
|
||||
@@ -271,6 +444,327 @@
|
||||
}
|
||||
}
|
||||
|
||||
function currentConversationId() {
|
||||
return conversationIdInput ? conversationIdInput.value : "";
|
||||
}
|
||||
|
||||
function templateUrl(attributeName, token, value) {
|
||||
if (!summaryPanel) {
|
||||
return "";
|
||||
}
|
||||
return summaryPanel.getAttribute(attributeName).replace(token, value);
|
||||
}
|
||||
|
||||
function renderAttachments(attachments) {
|
||||
if (!attachmentList) {
|
||||
return;
|
||||
}
|
||||
attachmentList.innerHTML = "";
|
||||
if (!attachments.length) {
|
||||
attachmentList.innerHTML = '<div class="panel-empty">暂无附件</div>';
|
||||
return;
|
||||
}
|
||||
attachments.forEach(function (attachment) {
|
||||
var item = document.createElement("div");
|
||||
item.className = "attachment-item";
|
||||
item.setAttribute("data-attachment-id", attachment.id);
|
||||
item.innerHTML =
|
||||
"<div><strong>" +
|
||||
escapeHtml(attachment.original_name) +
|
||||
"</strong><span>v" +
|
||||
attachment.version_no +
|
||||
" · " +
|
||||
attachment.file_size +
|
||||
" bytes · " +
|
||||
escapeHtml(attachment.upload_status) +
|
||||
"</span></div>" +
|
||||
(attachment.is_active ? "<em>active</em>" : "");
|
||||
attachmentList.appendChild(item);
|
||||
});
|
||||
}
|
||||
|
||||
async function refreshAttachments() {
|
||||
var conversationId = currentConversationId();
|
||||
if (!conversationId || !summaryPanel) {
|
||||
return;
|
||||
}
|
||||
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId));
|
||||
if (!response.ok) {
|
||||
return;
|
||||
}
|
||||
var payload = await response.json();
|
||||
renderAttachments(payload.attachments || []);
|
||||
}
|
||||
|
||||
async function uploadFiles(files) {
|
||||
var conversationId = currentConversationId();
|
||||
if (!conversationId || !files.length || !summaryPanel) {
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "请先创建或选择一个对话。";
|
||||
}
|
||||
return;
|
||||
}
|
||||
var data = new FormData();
|
||||
Array.prototype.forEach.call(files, function (file) {
|
||||
data.append("files", file);
|
||||
});
|
||||
var csrf = new FormData(composer).get("csrfmiddlewaretoken");
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "正在上传 " + files.length + " 个文件...";
|
||||
}
|
||||
try {
|
||||
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId), {
|
||||
method: "POST",
|
||||
headers: { "X-CSRFToken": csrf },
|
||||
body: data,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error("上传失败。");
|
||||
}
|
||||
var payload = await response.json();
|
||||
renderAttachments(payload.attachments || []);
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "上传完成,可发送自动汇总提示词。";
|
||||
}
|
||||
await refreshAttachments();
|
||||
} catch (error) {
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "上传失败,请重试。";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function ensureWorkflowCard(batch) {
|
||||
if (!workflowCardList || !batch) {
|
||||
return null;
|
||||
}
|
||||
var empty = workflowCardList.querySelector(".panel-empty");
|
||||
if (empty) {
|
||||
empty.remove();
|
||||
}
|
||||
var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]');
|
||||
if (card) {
|
||||
return card;
|
||||
}
|
||||
card = document.createElement("article");
|
||||
card.className = "workflow-card";
|
||||
card.setAttribute("data-batch-id", batch.batch_id);
|
||||
card.innerHTML =
|
||||
"<header><strong>" +
|
||||
escapeHtml(batch.batch_no || "文件汇总") +
|
||||
'</strong><span class="workflow-status status-running">running</span></header><ol></ol>';
|
||||
workflowCardList.prepend(card);
|
||||
refreshWorkflowBatchCarousel(0);
|
||||
return card;
|
||||
}
|
||||
|
||||
function workflowCards() {
|
||||
if (!workflowCardList) {
|
||||
return [];
|
||||
}
|
||||
return Array.prototype.slice.call(workflowCardList.querySelectorAll(".workflow-card"));
|
||||
}
|
||||
|
||||
function ensureWorkflowBatchControls() {
|
||||
if (!workflowCardList || workflowCardList.querySelector(".workflow-batch-controls")) {
|
||||
return;
|
||||
}
|
||||
var controls = document.createElement("div");
|
||||
controls.className = "workflow-batch-controls";
|
||||
controls.innerHTML =
|
||||
'<button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">‹</button>' +
|
||||
'<div class="workflow-batch-dots" aria-label="工作流批次"></div>' +
|
||||
'<button type="button" class="workflow-batch-btn" data-workflow-action="next" aria-label="下一个工作流">›</button>';
|
||||
workflowCardList.appendChild(controls);
|
||||
}
|
||||
|
||||
function selectWorkflowBatchIndex(index) {
|
||||
var cards = workflowCards();
|
||||
if (!workflowCardList || !cards.length) {
|
||||
return;
|
||||
}
|
||||
var safeIndex = Math.max(0, Math.min(index, cards.length - 1));
|
||||
workflowCardList.setAttribute("data-active-index", safeIndex);
|
||||
cards.forEach(function (card, cardIndex) {
|
||||
var isActive = cardIndex === safeIndex;
|
||||
card.classList.toggle("active", isActive);
|
||||
card.setAttribute("data-workflow-index", cardIndex);
|
||||
card.setAttribute("aria-hidden", isActive ? "false" : "true");
|
||||
});
|
||||
var dots = workflowCardList.querySelector(".workflow-batch-dots");
|
||||
if (!dots) {
|
||||
return;
|
||||
}
|
||||
dots.querySelectorAll("[data-workflow-index-dot]").forEach(function (dot) {
|
||||
var dotIndex = parseInt(dot.getAttribute("data-workflow-index-dot"), 10);
|
||||
var isActive = dotIndex === safeIndex;
|
||||
dot.classList.toggle("active", isActive);
|
||||
dot.setAttribute("aria-current", isActive ? "true" : "false");
|
||||
});
|
||||
}
|
||||
|
||||
function refreshWorkflowBatchCarousel(preferredIndex) {
|
||||
var cards = workflowCards();
|
||||
if (!workflowCardList || !cards.length) {
|
||||
return;
|
||||
}
|
||||
workflowCardList.classList.add("workflow-batch-carousel");
|
||||
ensureWorkflowBatchControls();
|
||||
var dots = workflowCardList.querySelector(".workflow-batch-dots");
|
||||
if (dots) {
|
||||
dots.innerHTML = "";
|
||||
cards.forEach(function (card, index) {
|
||||
card.setAttribute("data-workflow-index", index);
|
||||
var title = card.querySelector("strong");
|
||||
var dot = document.createElement("button");
|
||||
dot.type = "button";
|
||||
dot.className = "workflow-batch-dot";
|
||||
dot.setAttribute("data-workflow-index-dot", index);
|
||||
dot.setAttribute("aria-label", "查看" + (title ? title.textContent.trim() : "工作流") + "状态");
|
||||
dots.appendChild(dot);
|
||||
});
|
||||
}
|
||||
var activeIndex =
|
||||
typeof preferredIndex === "number"
|
||||
? preferredIndex
|
||||
: parseInt(workflowCardList.getAttribute("data-active-index") || "0", 10);
|
||||
if (Number.isNaN(activeIndex)) {
|
||||
activeIndex = 0;
|
||||
}
|
||||
selectWorkflowBatchIndex(activeIndex);
|
||||
}
|
||||
|
||||
async function refreshWorkflowCard(batchId) {
|
||||
if (!summaryPanel || !batchId) {
|
||||
return "";
|
||||
}
|
||||
var response;
|
||||
try {
|
||||
response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId), {
|
||||
cache: "no-store",
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Workflow status refresh failed", { batchId: batchId, error: error });
|
||||
return "";
|
||||
}
|
||||
if (!response.ok) {
|
||||
console.error("Workflow status refresh returned non-OK", { batchId: batchId, status: response.status });
|
||||
return "";
|
||||
}
|
||||
var payload = await response.json();
|
||||
var card = ensureWorkflowCard({
|
||||
batch_id: payload.batch.id,
|
||||
batch_no: payload.batch.batch_no,
|
||||
});
|
||||
if (!card) {
|
||||
return payload.batch.status || "";
|
||||
}
|
||||
var status = card.querySelector(".workflow-status");
|
||||
status.textContent = payload.batch.status;
|
||||
status.className = "workflow-status status-" + payload.batch.status;
|
||||
var batchError = card.querySelector(".workflow-error");
|
||||
if (payload.batch.error_message) {
|
||||
if (!batchError) {
|
||||
batchError = document.createElement("p");
|
||||
batchError.className = "workflow-error";
|
||||
card.insertBefore(batchError, card.querySelector("ol"));
|
||||
}
|
||||
batchError.textContent = payload.batch.error_message;
|
||||
} else if (batchError) {
|
||||
batchError.remove();
|
||||
}
|
||||
var list = card.querySelector("ol");
|
||||
list.innerHTML = "";
|
||||
(payload.nodes || []).forEach(function (node) {
|
||||
var item = document.createElement("li");
|
||||
item.className = "node-status status-" + node.status;
|
||||
item.setAttribute("data-node-code", node.node_code);
|
||||
item.innerHTML =
|
||||
'<div><span>' +
|
||||
escapeHtml(node.node_name) +
|
||||
"</span>" +
|
||||
(node.message ? "<small>" + escapeHtml(node.message) + "</small>" : "") +
|
||||
"</div><em>" +
|
||||
node.progress +
|
||||
"%</em>";
|
||||
list.appendChild(item);
|
||||
});
|
||||
refreshWorkflowBatchCarousel();
|
||||
return payload.batch.status || "";
|
||||
}
|
||||
|
||||
function bindWorkflowBatchCarouselControls() {
|
||||
if (!workflowCardList) {
|
||||
return;
|
||||
}
|
||||
workflowCardList.addEventListener("click", function (event) {
|
||||
var cards = workflowCards();
|
||||
if (!cards.length) {
|
||||
return;
|
||||
}
|
||||
var actionButton = event.target.closest("[data-workflow-action]");
|
||||
var dotButton = event.target.closest("[data-workflow-index-dot]");
|
||||
var currentIndex = parseInt(workflowCardList.getAttribute("data-active-index") || "0", 10);
|
||||
if (Number.isNaN(currentIndex)) {
|
||||
currentIndex = 0;
|
||||
}
|
||||
if (actionButton) {
|
||||
var nextIndex =
|
||||
actionButton.getAttribute("data-workflow-action") === "next"
|
||||
? (currentIndex + 1) % cards.length
|
||||
: (currentIndex - 1 + cards.length) % cards.length;
|
||||
selectWorkflowBatchIndex(nextIndex);
|
||||
} else if (dotButton) {
|
||||
selectWorkflowBatchIndex(parseInt(dotButton.getAttribute("data-workflow-index-dot"), 10));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function isWorkflowTerminalStatus(status) {
|
||||
return status === "success" || status === "failed";
|
||||
}
|
||||
|
||||
function stopWorkflowPolling(batchId) {
|
||||
if (!workflowPollingTimers[batchId]) {
|
||||
return;
|
||||
}
|
||||
window.clearInterval(workflowPollingTimers[batchId]);
|
||||
delete workflowPollingTimers[batchId];
|
||||
}
|
||||
|
||||
function startWorkflowPolling(batchId) {
|
||||
if (!batchId || workflowPollingTimers[batchId]) {
|
||||
return;
|
||||
}
|
||||
workflowPollingTimers[batchId] = window.setInterval(async function () {
|
||||
var status = await refreshWorkflowCard(batchId);
|
||||
if (isWorkflowTerminalStatus(status)) {
|
||||
refreshConversationMessages();
|
||||
stopWorkflowPolling(batchId);
|
||||
}
|
||||
}, WORKFLOW_POLL_INTERVAL_MS);
|
||||
refreshWorkflowCard(batchId).then(function (status) {
|
||||
if (isWorkflowTerminalStatus(status)) {
|
||||
refreshConversationMessages();
|
||||
stopWorkflowPolling(batchId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function refreshRunningWorkflowCards() {
|
||||
if (!workflowCardList) {
|
||||
return;
|
||||
}
|
||||
workflowCardList.querySelectorAll(".workflow-card").forEach(function (card) {
|
||||
var batchId = card.getAttribute("data-batch-id");
|
||||
var status = card.querySelector(".workflow-status");
|
||||
var statusText = status ? status.textContent.trim() : "";
|
||||
if (!isWorkflowTerminalStatus(statusText)) {
|
||||
startWorkflowPolling(batchId);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async function streamChat(event) {
|
||||
event.preventDefault();
|
||||
if (!composer || !promptInput || !sendButton || !chatStage) {
|
||||
@@ -344,8 +838,19 @@
|
||||
return;
|
||||
}
|
||||
|
||||
var payload = JSON.parse(dataText);
|
||||
var payload;
|
||||
try {
|
||||
payload = JSON.parse(dataText);
|
||||
} catch (error) {
|
||||
console.error("SSE frame parse failed", { error: error, frame: frame });
|
||||
return;
|
||||
}
|
||||
if (eventName === "meta") {
|
||||
if (payload.user_message_id) {
|
||||
userMessage.article.id = "message-" + payload.user_message_id;
|
||||
userMessage.article.setAttribute("data-message-id", payload.user_message_id);
|
||||
latestMessageId = Math.max(latestMessageId, payload.user_message_id);
|
||||
}
|
||||
if (payload.conversation_id) {
|
||||
conversationIdInput.value = payload.conversation_id;
|
||||
window.history.replaceState({}, "", "/?conversation=" + payload.conversation_id);
|
||||
@@ -356,14 +861,23 @@
|
||||
}
|
||||
} else if (eventName === "chunk") {
|
||||
assistantText += payload.delta || "";
|
||||
assistantMessage.text.innerHTML = nl2br(assistantText);
|
||||
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
|
||||
scrollChatToBottom();
|
||||
} else if (eventName === "replace") {
|
||||
assistantText = payload.content || "";
|
||||
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
|
||||
scrollChatToBottom();
|
||||
} else if (eventName === "error") {
|
||||
assistantText = payload.message || "模型调用失败。";
|
||||
assistantMessage.text.innerHTML = nl2br(assistantText);
|
||||
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
|
||||
} else if (eventName === "workflow_started") {
|
||||
ensureWorkflowCard(payload);
|
||||
startWorkflowPolling(payload.batch_id);
|
||||
} else if (eventName === "done") {
|
||||
if (payload.assistant_message_id) {
|
||||
assistantMessage.article.id = "message-" + payload.assistant_message_id;
|
||||
assistantMessage.article.setAttribute("data-message-id", payload.assistant_message_id);
|
||||
latestMessageId = Math.max(latestMessageId, payload.assistant_message_id);
|
||||
}
|
||||
if (payload.title) {
|
||||
setConversationTitle(payload.title);
|
||||
@@ -388,8 +902,29 @@
|
||||
}
|
||||
}
|
||||
|
||||
function bindPromptKeyboardShortcuts() {
|
||||
if (!promptInput || !composer) {
|
||||
return;
|
||||
}
|
||||
promptInput.addEventListener("keydown", function (event) {
|
||||
if (event.key === "Enter" && !event.ctrlKey) {
|
||||
event.preventDefault();
|
||||
if (typeof composer.requestSubmit === "function") {
|
||||
composer.requestSubmit();
|
||||
} else {
|
||||
composer.dispatchEvent(new Event("submit", { cancelable: true }));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
syncNodeRailVisibility();
|
||||
syncLatestMessageIdFromDom();
|
||||
bindNodeAnchorClicks();
|
||||
renderExistingAssistantMessages();
|
||||
refreshWorkflowBatchCarousel(0);
|
||||
bindWorkflowBatchCarouselControls();
|
||||
refreshRunningWorkflowCards();
|
||||
|
||||
if (chatScroll) {
|
||||
chatScroll.addEventListener("scroll", setActiveNode, { passive: true });
|
||||
@@ -399,6 +934,29 @@
|
||||
if (composer) {
|
||||
composer.addEventListener("submit", streamChat);
|
||||
}
|
||||
bindPromptKeyboardShortcuts();
|
||||
|
||||
if (uploadDropzone && attachmentInput) {
|
||||
uploadDropzone.addEventListener("click", function () {
|
||||
attachmentInput.click();
|
||||
});
|
||||
uploadDropzone.addEventListener("dragover", function (event) {
|
||||
event.preventDefault();
|
||||
uploadDropzone.classList.add("dragging");
|
||||
});
|
||||
uploadDropzone.addEventListener("dragleave", function () {
|
||||
uploadDropzone.classList.remove("dragging");
|
||||
});
|
||||
uploadDropzone.addEventListener("drop", function (event) {
|
||||
event.preventDefault();
|
||||
uploadDropzone.classList.remove("dragging");
|
||||
uploadFiles(event.dataTransfer.files);
|
||||
});
|
||||
attachmentInput.addEventListener("change", function () {
|
||||
uploadFiles(attachmentInput.files);
|
||||
attachmentInput.value = "";
|
||||
});
|
||||
}
|
||||
|
||||
window.addEventListener("resize", syncSidebarState);
|
||||
syncSidebarState();
|
||||
|
||||
147
static/js/attachment_manager.js
Normal file
147
static/js/attachment_manager.js
Normal file
@@ -0,0 +1,147 @@
|
||||
(function () {
|
||||
var page = document.querySelector(".attachment-manager-page");
|
||||
if (!page) {
|
||||
return;
|
||||
}
|
||||
|
||||
var conversationSelect = document.getElementById("attachmentConversationSelect");
|
||||
var uploadDropzone = document.getElementById("managerUploadDropzone");
|
||||
var attachmentInput = document.getElementById("managerAttachmentInput");
|
||||
var uploadStatus = document.getElementById("managerUploadStatus");
|
||||
var searchInput = document.getElementById("attachmentSearch");
|
||||
var table = document.getElementById("attachmentManagerTable");
|
||||
|
||||
function csrfToken() {
|
||||
var cookie = document.cookie.split("; ").find(function (item) {
|
||||
return item.indexOf("csrftoken=") === 0;
|
||||
});
|
||||
return cookie ? decodeURIComponent(cookie.split("=")[1]) : "";
|
||||
}
|
||||
|
||||
function selectedConversationUrl(id) {
|
||||
return id ? "/attachments/?conversation=" + encodeURIComponent(id) : "/attachments/";
|
||||
}
|
||||
|
||||
async function patchAttachment(row, payload) {
|
||||
var response = await fetch(row.getAttribute("data-update-url"), {
|
||||
method: "PATCH",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"X-CSRFToken": csrfToken(),
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error("附件更新失败。");
|
||||
}
|
||||
return response.json();
|
||||
}
|
||||
|
||||
async function deleteAttachment(row) {
|
||||
var response = await fetch(row.getAttribute("data-update-url"), {
|
||||
method: "DELETE",
|
||||
headers: { "X-CSRFToken": csrfToken() },
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error("附件删除失败。");
|
||||
}
|
||||
}
|
||||
|
||||
async function uploadFiles(files) {
|
||||
if (!uploadDropzone || !files || !files.length) {
|
||||
return;
|
||||
}
|
||||
var formData = new FormData();
|
||||
Array.prototype.forEach.call(files, function (file) {
|
||||
formData.append("files", file);
|
||||
});
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "上传中...";
|
||||
}
|
||||
try {
|
||||
var response = await fetch(uploadDropzone.getAttribute("data-upload-url"), {
|
||||
method: "POST",
|
||||
headers: { "X-CSRFToken": csrfToken() },
|
||||
body: formData,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error("上传失败。");
|
||||
}
|
||||
window.location.reload();
|
||||
} catch (error) {
|
||||
if (uploadStatus) {
|
||||
uploadStatus.textContent = "上传失败,请重试。";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (conversationSelect) {
|
||||
conversationSelect.addEventListener("change", function () {
|
||||
window.location.href = selectedConversationUrl(conversationSelect.value);
|
||||
});
|
||||
}
|
||||
|
||||
if (uploadDropzone && attachmentInput) {
|
||||
uploadDropzone.addEventListener("click", function () {
|
||||
attachmentInput.click();
|
||||
});
|
||||
uploadDropzone.addEventListener("dragover", function (event) {
|
||||
event.preventDefault();
|
||||
uploadDropzone.classList.add("dragging");
|
||||
});
|
||||
uploadDropzone.addEventListener("dragleave", function () {
|
||||
uploadDropzone.classList.remove("dragging");
|
||||
});
|
||||
uploadDropzone.addEventListener("drop", function (event) {
|
||||
event.preventDefault();
|
||||
uploadDropzone.classList.remove("dragging");
|
||||
uploadFiles(event.dataTransfer.files);
|
||||
});
|
||||
attachmentInput.addEventListener("change", function () {
|
||||
uploadFiles(attachmentInput.files);
|
||||
attachmentInput.value = "";
|
||||
});
|
||||
}
|
||||
|
||||
if (searchInput && table) {
|
||||
searchInput.addEventListener("input", function () {
|
||||
var keyword = searchInput.value.trim().toLowerCase();
|
||||
table.querySelectorAll("tbody tr[data-attachment-id]").forEach(function (row) {
|
||||
var name = (row.querySelector(".attachment-name") || row).textContent.toLowerCase();
|
||||
row.hidden = keyword && name.indexOf(keyword) === -1;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (table) {
|
||||
table.addEventListener("click", async function (event) {
|
||||
var actionButton = event.target.closest("[data-attachment-action]");
|
||||
if (!actionButton) {
|
||||
return;
|
||||
}
|
||||
var row = actionButton.closest("tr[data-attachment-id]");
|
||||
if (!row) {
|
||||
return;
|
||||
}
|
||||
var action = actionButton.getAttribute("data-attachment-action");
|
||||
try {
|
||||
if (action === "edit") {
|
||||
var nameCell = row.querySelector(".attachment-name");
|
||||
var nextName = window.prompt("请输入新的附件展示名", nameCell ? nameCell.textContent.trim() : "");
|
||||
if (nextName) {
|
||||
await patchAttachment(row, { original_name: nextName });
|
||||
window.location.reload();
|
||||
}
|
||||
} else if (action === "toggle") {
|
||||
await patchAttachment(row, { is_active: actionButton.textContent.trim() === "启用" });
|
||||
window.location.reload();
|
||||
} else if (action === "delete" && window.confirm("确认删除该附件?")) {
|
||||
await deleteAttachment(row);
|
||||
window.location.reload();
|
||||
}
|
||||
} catch (error) {
|
||||
window.alert(error.message || "附件操作失败。");
|
||||
}
|
||||
});
|
||||
}
|
||||
})();
|
||||
139
templates/attachment_manager.html
Normal file
139
templates/attachment_manager.html
Normal file
@@ -0,0 +1,139 @@
|
||||
{% extends "base.html" %}
|
||||
{% load static %}
|
||||
|
||||
{% block title %}附件管理 - DEMO-AGENT V2{% endblock %}
|
||||
{% block body_class %}app-body{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<main class="app-shell">
|
||||
<header class="topbar">
|
||||
<div class="topbar-left">
|
||||
<div class="tabbar" role="tablist" aria-label="页面切换">
|
||||
<a class="tab" href="/" role="tab" aria-selected="false">首页</a>
|
||||
<button class="tab" type="button" role="tab" aria-selected="false">知识库管理</button>
|
||||
<a class="tab" href="/" role="tab" aria-selected="false">审核智能体</a>
|
||||
<a class="tab active" href="{% url 'attachment_manager' %}" role="tab" aria-selected="true">附件管理</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="topbar-right">
|
||||
<div class="user-menu">
|
||||
<button class="user-menu-trigger" type="button">
|
||||
<span class="avatar large">{{ request.user.username|slice:":1"|upper }}</span>
|
||||
<div class="user-copy">
|
||||
<strong>{{ request.user.username }}</strong>
|
||||
<span>当前登录用户</span>
|
||||
</div>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<section
|
||||
class="attachment-manager-page"
|
||||
data-selected-conversation="{% if selected_conversation %}{{ selected_conversation.pk }}{% endif %}"
|
||||
>
|
||||
<header class="attachment-manager-hero attachment-manager-toolbar">
|
||||
<div>
|
||||
<p class="eyebrow">附件管理</p>
|
||||
<h1>附件管理</h1>
|
||||
<p>管理各对话下上传的审核资料、版本、状态和下载。</p>
|
||||
</div>
|
||||
<div class="attachment-manager-selectbar">
|
||||
<label for="attachmentConversationSelect">对话</label>
|
||||
<select class="attachment-manager-select-control" id="attachmentConversationSelect">
|
||||
<option value="">请选择对话</option>
|
||||
{% for conversation in conversations %}
|
||||
<option
|
||||
value="{{ conversation.pk }}"
|
||||
{% if selected_conversation and selected_conversation.pk == conversation.pk %}selected{% endif %}
|
||||
>
|
||||
{{ conversation.title|default:"新对话" }} · {{ conversation.updated_at|date:"m月d日 H:i" }} · {{ conversation.attachment_count }} 个附件
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
{% if selected_conversation %}
|
||||
<a class="return-chat-link" href="{% url 'home' %}?conversation={{ selected_conversation.pk }}">返回对话</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{% if selected_conversation %}
|
||||
<div class="attachment-manager-content attachment-manager-split">
|
||||
<section class="attachment-manager-panel upload-manager-panel">
|
||||
<div class="summary-subheading">
|
||||
<h3>上传附件</h3>
|
||||
<span>{{ selected_conversation.title|default:"新对话" }}</span>
|
||||
</div>
|
||||
<div
|
||||
class="upload-dropzone manager-upload-dropzone"
|
||||
id="managerUploadDropzone"
|
||||
data-upload-url="{% url 'file_summary_attachment_upload' selected_conversation.pk %}"
|
||||
tabindex="0"
|
||||
role="button"
|
||||
>
|
||||
<input id="managerAttachmentInput" type="file" multiple hidden>
|
||||
<strong>拖拽文件到这里</strong>
|
||||
<span>支持 doc、docx、xls、xlsx、ppt、pptx、pdf、zip、7z、rar</span>
|
||||
</div>
|
||||
<p class="upload-status" id="managerUploadStatus">上传后会归属到当前选择的对话。</p>
|
||||
</section>
|
||||
|
||||
<section class="attachment-manager-panel">
|
||||
<div class="summary-subheading">
|
||||
<h3>附件列表</h3>
|
||||
<input class="attachment-search" id="attachmentSearch" type="search" placeholder="搜索文件名">
|
||||
</div>
|
||||
<div class="attachment-table-wrap">
|
||||
<table class="attachment-table" id="attachmentManagerTable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>状态</th>
|
||||
<th>文件名</th>
|
||||
<th>版本</th>
|
||||
<th>大小</th>
|
||||
<th>上传时间</th>
|
||||
<th>操作</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for attachment in attachments %}
|
||||
<tr
|
||||
data-attachment-id="{{ attachment.pk }}"
|
||||
data-update-url="{% url 'file_summary_attachment_detail' selected_conversation.pk attachment.pk %}"
|
||||
data-download-url="{% url 'file_summary_attachment_download' selected_conversation.pk attachment.pk %}"
|
||||
>
|
||||
<td>{% if attachment.is_active %}启用{% else %}禁用{% endif %}</td>
|
||||
<td class="attachment-name">{{ attachment.original_name }}</td>
|
||||
<td>v{{ attachment.version_no }}</td>
|
||||
<td>{{ attachment.file_size }} bytes</td>
|
||||
<td>{{ attachment.created_at|date:"Y-m-d H:i" }}</td>
|
||||
<td class="attachment-actions">
|
||||
<a href="{% url 'file_summary_attachment_download' selected_conversation.pk attachment.pk %}">下载</a>
|
||||
<button type="button" data-attachment-action="edit">编辑</button>
|
||||
<button type="button" data-attachment-action="toggle">{% if attachment.is_active %}禁用{% else %}启用{% endif %}</button>
|
||||
<button type="button" data-attachment-action="delete">删除</button>
|
||||
</td>
|
||||
</tr>
|
||||
{% empty %}
|
||||
<tr>
|
||||
<td colspan="6" class="table-empty">当前对话暂无附件</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
</div>
|
||||
{% else %}
|
||||
<section class="attachment-manager-panel attachment-manager-empty attachment-manager-content">
|
||||
<h2>请选择一个对话查看附件</h2>
|
||||
<p>通过上方下拉框选择对话后,可上传、下载、编辑、启用禁用或删除附件。</p>
|
||||
</section>
|
||||
{% endif %}
|
||||
</section>
|
||||
</main>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script src="{% static 'js/attachment_manager.js' %}"></script>
|
||||
{% endblock %}
|
||||
@@ -5,65 +5,14 @@
|
||||
{% block body_class %}app-body{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<main class="workspace" data-sidebar-state="open">
|
||||
<aside class="sidebar" id="sidebar">
|
||||
<div class="sidebar-top">
|
||||
<button class="icon-button sidebar-toggle" type="button" id="sidebarToggle" aria-label="折叠侧边栏">
|
||||
<span></span>
|
||||
<span></span>
|
||||
</button>
|
||||
<div class="brand">
|
||||
<span class="brand-mark">审</span>
|
||||
<div class="brand-copy">
|
||||
<strong class="brand-text">审核智能体</strong>
|
||||
<span class="brand-subtitle">临床注册文件审核工作台</span>
|
||||
</div>
|
||||
</div>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<input type="hidden" name="action" value="new_conversation">
|
||||
<button class="new-chat" type="submit">+ 新对话</button>
|
||||
</form>
|
||||
<form class="search-form" method="get">
|
||||
<label class="sr-only" for="conversationSearch">搜索会话</label>
|
||||
<input id="conversationSearch" type="text" name="q" value="{{ search_query }}" placeholder="搜索会话...">
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="sidebar-group">
|
||||
<p class="sidebar-label">对话记录</p>
|
||||
<nav class="history-list" aria-label="对话历史">
|
||||
{% for conversation in conversations %}
|
||||
<a
|
||||
class="history-item{% if current_conversation and current_conversation.pk == conversation.pk %} active{% endif %}"
|
||||
href="/?conversation={{ conversation.pk }}{% if search_query %}&q={{ search_query|urlencode }}{% endif %}"
|
||||
>
|
||||
<span class="history-title">{{ conversation.title|default:"新对话" }}</span>
|
||||
<span class="history-meta">{{ conversation.updated_at|date:"m月d日 H:i" }}</span>
|
||||
</a>
|
||||
{% empty %}
|
||||
<div class="history-empty">
|
||||
<p>暂无会话记录</p>
|
||||
<span>点击上方“新对话”开始审核。</span>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</nav>
|
||||
</div>
|
||||
|
||||
</aside>
|
||||
|
||||
<section class="chat-shell">
|
||||
<main class="app-shell">
|
||||
<header class="topbar">
|
||||
<div class="topbar-left">
|
||||
<button class="icon-button mobile-toggle" type="button" id="mobileSidebarToggle" aria-label="展开侧边栏">
|
||||
<span></span>
|
||||
<span></span>
|
||||
</button>
|
||||
<div class="tabbar" role="tablist" aria-label="页面切换">
|
||||
<button class="tab" type="button" role="tab" aria-selected="false">首页</button>
|
||||
<a class="tab" href="/" role="tab" aria-selected="false">首页</a>
|
||||
<button class="tab" type="button" role="tab" aria-selected="false">知识库管理</button>
|
||||
<button class="tab active" type="button" role="tab" aria-selected="true">审核智能体</button>
|
||||
<button class="tab" type="button" role="tab" aria-selected="false">视频实时监测</button>
|
||||
<a class="tab active" href="/" role="tab" aria-selected="true">审核智能体</a>
|
||||
<a class="tab" href="{% url 'attachment_manager' %}" role="tab" aria-selected="false">附件管理</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -92,6 +41,57 @@
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<section class="workspace" data-sidebar-state="open">
|
||||
<aside class="sidebar" id="sidebar">
|
||||
<div class="sidebar-top">
|
||||
<div class="sidebar-header">
|
||||
<button class="icon-button sidebar-toggle" type="button" id="sidebarToggle" aria-label="折叠侧边栏">
|
||||
<span></span>
|
||||
<span></span>
|
||||
</button>
|
||||
<div class="brand">
|
||||
<span class="brand-mark">审</span>
|
||||
<div class="brand-copy">
|
||||
<strong class="brand-text">审核智能体</strong>
|
||||
<span class="brand-subtitle">临床注册文件审核工作台</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<form method="post">
|
||||
{% csrf_token %}
|
||||
<input type="hidden" name="action" value="new_conversation">
|
||||
<button class="new-chat" type="submit">+ 新对话</button>
|
||||
</form>
|
||||
<form class="search-form" method="get">
|
||||
<label class="sr-only" for="conversationSearch">搜索会话</label>
|
||||
<input id="conversationSearch" type="text" name="q" value="{{ search_query }}" placeholder="搜索会话...">
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<div class="sidebar-group">
|
||||
<p class="sidebar-label">对话记录</p>
|
||||
<nav class="history-list" aria-label="对话历史">
|
||||
{% for conversation in conversations %}
|
||||
<a
|
||||
class="history-item{% if current_conversation and current_conversation.pk == conversation.pk %} active{% endif %}"
|
||||
data-conversation-id="{{ conversation.pk }}"
|
||||
href="/?conversation={{ conversation.pk }}{% if search_query %}&q={{ search_query|urlencode }}{% endif %}"
|
||||
>
|
||||
<span class="history-title">{{ conversation.title|default:"新对话" }}</span>
|
||||
<span class="history-meta">{{ conversation.updated_at|date:"m月d日 H:i" }}</span>
|
||||
</a>
|
||||
{% empty %}
|
||||
<div class="history-empty">
|
||||
<p>暂无会话记录</p>
|
||||
<span>点击上方“新对话”开始审核。</span>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</nav>
|
||||
</div>
|
||||
|
||||
</aside>
|
||||
|
||||
<section class="chat-shell">
|
||||
<section class="chat-stage" data-stream-url="{% url 'chat_stream' %}">
|
||||
<div class="chat-scroll-wrap">
|
||||
<div class="chat-scroll" id="chatScroll">
|
||||
@@ -108,13 +108,19 @@
|
||||
<article
|
||||
class="message {{ message.role }}"
|
||||
id="message-{{ message.pk }}"
|
||||
data-message-id="{{ message.pk }}"
|
||||
data-node-label="{% if message.role == 'assistant' %}AI{% else %}用户{% endif %} {{ forloop.counter }}"
|
||||
>
|
||||
<div class="message-avatar{% if message.role == 'user' %} user-mark{% endif %}">
|
||||
{% if message.role == "assistant" %}AI{% else %}{{ request.user.username|slice:":1"|upper }}{% endif %}
|
||||
</div>
|
||||
<div class="message-bubble">
|
||||
{% if message.role == "assistant" %}
|
||||
<div class="message-content markdown-content"></div>
|
||||
<template class="message-raw">{{ message.content }}</template>
|
||||
{% else %}
|
||||
<p>{{ message.content|linebreaksbr }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</article>
|
||||
{% endfor %}
|
||||
@@ -164,9 +170,112 @@
|
||||
</div>
|
||||
</section>
|
||||
</section>
|
||||
|
||||
<aside
|
||||
class="summary-panel"
|
||||
id="summaryPanel"
|
||||
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
|
||||
data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/"
|
||||
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
|
||||
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
|
||||
>
|
||||
<section class="summary-section upload-section">
|
||||
<div class="summary-heading">
|
||||
<h2>文件汇总</h2>
|
||||
<span>当前对话</span>
|
||||
</div>
|
||||
<div class="upload-dropzone" id="uploadDropzone" tabindex="0" role="button">
|
||||
<input id="attachmentInput" type="file" multiple hidden>
|
||||
<strong>拖拽文件到这里</strong>
|
||||
<span>支持多文件、zip、7z、rar</span>
|
||||
</div>
|
||||
<p class="upload-status" id="uploadStatus">上传后发送“自动汇总文件目录与页数”启动工作流。</p>
|
||||
</section>
|
||||
|
||||
<section class="summary-section attachment-section">
|
||||
<div class="summary-subheading">
|
||||
<h3>附件</h3>
|
||||
<a
|
||||
class="attachment-manager-link"
|
||||
href="{% url 'attachment_manager' %}{% if current_conversation %}?conversation={{ current_conversation.pk }}{% endif %}"
|
||||
aria-label="打开附件管理页面"
|
||||
>↗</a>
|
||||
</div>
|
||||
<div class="attachment-list" id="attachmentList">
|
||||
{% for attachment in attachments %}
|
||||
<div class="attachment-item" data-attachment-id="{{ attachment.pk }}">
|
||||
<div>
|
||||
<strong>{{ attachment.original_name }}</strong>
|
||||
<span>v{{ attachment.version_no }} · {{ attachment.file_size }} bytes · {{ attachment.upload_status }}</span>
|
||||
</div>
|
||||
{% if attachment.is_active %}<em>active</em>{% endif %}
|
||||
</div>
|
||||
{% empty %}
|
||||
<div class="panel-empty">暂无附件</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section class="summary-section workflow-section">
|
||||
<div class="summary-subheading">
|
||||
<h3>工作流</h3>
|
||||
</div>
|
||||
<div class="workflow-card-list workflow-batch-carousel" id="workflowCardList" data-active-index="0">
|
||||
{% for batch in summary_batches %}
|
||||
<article
|
||||
class="workflow-card{% if forloop.first %} active{% endif %}"
|
||||
data-batch-id="{{ batch.pk }}"
|
||||
data-workflow-index="{{ forloop.counter0 }}"
|
||||
aria-hidden="{% if forloop.first %}false{% else %}true{% endif %}"
|
||||
>
|
||||
<header>
|
||||
<strong>{{ batch.batch_no }}</strong>
|
||||
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
|
||||
</header>
|
||||
{% if batch.error_message %}
|
||||
<p class="workflow-error">{{ batch.error_message }}</p>
|
||||
{% endif %}
|
||||
<ol>
|
||||
{% for node in batch.node_runs.all %}
|
||||
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
|
||||
<div>
|
||||
<span>{{ node.node_name }}</span>
|
||||
{% if node.message %}<small>{{ node.message }}</small>{% endif %}
|
||||
</div>
|
||||
<em>{{ node.progress }}%</em>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ol>
|
||||
</article>
|
||||
{% empty %}
|
||||
<div class="panel-empty">暂无工作流</div>
|
||||
{% endfor %}
|
||||
{% if summary_batches %}
|
||||
<div class="workflow-batch-controls">
|
||||
<button type="button" class="workflow-batch-btn" data-workflow-action="prev" aria-label="上一个工作流">‹</button>
|
||||
<div class="workflow-batch-dots" aria-label="工作流批次">
|
||||
{% for batch in summary_batches %}
|
||||
<button
|
||||
type="button"
|
||||
class="workflow-batch-dot{% if forloop.first %} active{% endif %}"
|
||||
data-workflow-index-dot="{{ forloop.counter0 }}"
|
||||
aria-label="查看{{ batch.batch_no }}状态"
|
||||
aria-current="{% if forloop.first %}true{% else %}false{% endif %}"
|
||||
></button>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<button type="button" class="workflow-batch-btn" data-workflow-action="next" aria-label="下一个工作流">›</button>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</section>
|
||||
</aside>
|
||||
</section>
|
||||
</main>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.6/dist/purify.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.12/marked.min.js"></script>
|
||||
<script src="{% static 'js/app.js' %}"></script>
|
||||
{% endblock %}
|
||||
|
||||
111
tests/test_attachment_reader.py
Normal file
111
tests/test_attachment_reader.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_read_attachment_extracts_text_file_details(settings, tmp_path, django_user_model):
|
||||
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
relative_path = Path("uploads") / "note.txt"
|
||||
absolute_path = tmp_path / relative_path
|
||||
absolute_path.parent.mkdir(parents=True)
|
||||
absolute_path.write_text("产品名称:智能审核\n关键结论:可以解析附件详情", encoding="utf-8")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="note.txt",
|
||||
storage_path=relative_path.as_posix(),
|
||||
file_size=absolute_path.stat().st_size,
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
result = read_attachment_details(attachment)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.filename == "note.txt"
|
||||
assert result.file_type == "txt"
|
||||
assert "智能审核" in result.preview_text
|
||||
assert result.sections[0]["type"] == "text"
|
||||
|
||||
|
||||
def test_read_attachment_extracts_docx_and_xlsx_details(settings, tmp_path, django_user_model):
|
||||
from docx import Document
|
||||
from openpyxl import Workbook
|
||||
|
||||
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
docx_path = tmp_path / "uploads" / "summary.docx"
|
||||
docx_path.parent.mkdir(parents=True)
|
||||
doc = Document()
|
||||
doc.add_heading("项目摘要", level=1)
|
||||
doc.add_paragraph("这是 Word 附件里的正文。")
|
||||
doc.save(docx_path)
|
||||
docx_attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="summary.docx",
|
||||
storage_path="uploads/summary.docx",
|
||||
file_size=docx_path.stat().st_size,
|
||||
)
|
||||
|
||||
workbook_path = tmp_path / "uploads" / "inventory.xlsx"
|
||||
workbook = Workbook()
|
||||
sheet = workbook.active
|
||||
sheet.title = "清单"
|
||||
sheet.append(["文件名", "页数"])
|
||||
sheet.append(["a.pdf", 3])
|
||||
workbook.save(workbook_path)
|
||||
xlsx_attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="inventory.xlsx",
|
||||
storage_path="uploads/inventory.xlsx",
|
||||
file_size=workbook_path.stat().st_size,
|
||||
)
|
||||
|
||||
docx_result = read_attachment_details(docx_attachment)
|
||||
xlsx_result = read_attachment_details(xlsx_attachment)
|
||||
|
||||
assert docx_result.status == "success"
|
||||
assert "项目摘要" in docx_result.preview_text
|
||||
assert "Word 附件里的正文" in docx_result.preview_text
|
||||
assert xlsx_result.status == "success"
|
||||
assert xlsx_result.sections[0]["name"] == "清单"
|
||||
assert xlsx_result.sections[0]["rows"][1] == ["a.pdf", "3"]
|
||||
|
||||
|
||||
def test_attachment_reader_skill_returns_structured_details(settings, tmp_path, django_user_model):
|
||||
from review_agent.file_summary.skills.attachment_reader import AttachmentReaderSkill
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
file_path = tmp_path / "uploads" / "readme.txt"
|
||||
file_path.parent.mkdir(parents=True)
|
||||
file_path.write_text("请读取这个附件。", encoding="utf-8")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="readme.txt",
|
||||
storage_path="uploads/readme.txt",
|
||||
file_size=file_path.stat().st_size,
|
||||
)
|
||||
|
||||
result = AttachmentReaderSkill().run_for_attachments([attachment])
|
||||
|
||||
assert result.success is True
|
||||
assert result.data["attachments"][0]["filename"] == "readme.txt"
|
||||
assert "请读取这个附件" in result.data["attachments"][0]["preview_text"]
|
||||
25
tests/test_file_summary_archive.py
Normal file
25
tests/test_file_summary_archive.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from zipfile import ZipFile
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.archive import extract_archive
|
||||
|
||||
|
||||
def test_extract_zip_preserves_safe_paths(tmp_path):
|
||||
archive_path = tmp_path / "safe.zip"
|
||||
with ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("dir/a.txt", "content")
|
||||
|
||||
target = tmp_path / "out"
|
||||
extracted = extract_archive(archive_path, target)
|
||||
|
||||
assert extracted == [target / "dir" / "a.txt"]
|
||||
assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content"
|
||||
|
||||
|
||||
def test_extract_zip_rejects_path_traversal(tmp_path):
|
||||
archive_path = tmp_path / "evil.zip"
|
||||
with ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("../evil.txt", "bad")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
extract_archive(archive_path, tmp_path / "out")
|
||||
60
tests/test_file_summary_e2e.py
Normal file
60
tests/test_file_summary_e2e.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from review_agent.models import Conversation, Message
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def _browser_path() -> str | None:
|
||||
candidates = [
|
||||
Path(r"C:\Program Files\Google\Chrome\Application\chrome.exe"),
|
||||
Path(r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"),
|
||||
]
|
||||
for candidate in candidates:
|
||||
if candidate.exists():
|
||||
return str(candidate)
|
||||
return None
|
||||
|
||||
|
||||
def test_file_summary_panel_desktop_and_mobile_with_playwright(live_server, django_user_model):
|
||||
playwright_api = pytest.importorskip("playwright.sync_api")
|
||||
executable_path = _browser_path()
|
||||
if not executable_path:
|
||||
pytest.skip("No Chrome or Edge executable available for Playwright E2E.")
|
||||
|
||||
user = django_user_model.objects.create_user(username="e2e_user", password="e2e-pass-123")
|
||||
conversation = Conversation.objects.create(user=user, title="E2E 会话")
|
||||
Message.objects.create(
|
||||
conversation=conversation,
|
||||
role=Message.Role.ASSISTANT,
|
||||
content=(
|
||||
"文件目录与页数汇总已完成。\n\n"
|
||||
"| 序号 | 文件名 | 页数 | 状态 |\n"
|
||||
"| --- | --- | --- | --- |\n"
|
||||
"| 1 | a.pdf | 4 | success |\n\n"
|
||||
"[下载 Markdown 报告](/api/review-agent/file-summary/exports/1/download/)"
|
||||
),
|
||||
)
|
||||
|
||||
with playwright_api.sync_playwright() as p:
|
||||
browser = p.chromium.launch(headless=True, executable_path=executable_path)
|
||||
page = browser.new_page(viewport={"width": 1440, "height": 900})
|
||||
page.goto(f"{live_server.url}/login/")
|
||||
page.fill('input[name="username"]', "e2e_user")
|
||||
page.fill('input[name="password"]', "e2e-pass-123")
|
||||
page.click('button[type="submit"]')
|
||||
page.wait_for_url(f"{live_server.url}/")
|
||||
|
||||
playwright_api.expect(page.locator("#summaryPanel")).to_be_visible()
|
||||
playwright_api.expect(page.locator("#uploadDropzone")).to_be_visible()
|
||||
playwright_api.expect(page.locator("#workflowCardList")).to_be_visible()
|
||||
playwright_api.expect(page.locator(".message.assistant table")).to_be_visible()
|
||||
playwright_api.expect(page.locator('.message.assistant a[href="/api/review-agent/file-summary/exports/1/download/"]')).to_be_visible()
|
||||
|
||||
page.set_viewport_size({"width": 390, "height": 844})
|
||||
playwright_api.expect(page.locator("#summaryPanel")).to_be_visible()
|
||||
playwright_api.expect(page.locator("#sidebar")).not_to_be_in_viewport()
|
||||
browser.close()
|
||||
225
tests/test_file_summary_frontend.py
Normal file
225
tests/test_file_summary_frontend.py
Normal file
@@ -0,0 +1,225 @@
|
||||
import pytest
|
||||
from django.urls import reverse
|
||||
|
||||
from review_agent.models import Conversation, FileAttachment, FileSummaryBatch, Message, WorkflowNodeRun
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_workspace_renders_summary_panel(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
Message.objects.create(
|
||||
conversation=conversation,
|
||||
role=Message.Role.ASSISTANT,
|
||||
content="| 序号 | 文件名 |\n| --- | --- |\n| 1 | a.pdf |\n\n[下载](/api/review-agent/file-summary/exports/1/download/)",
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
assert 'id="summaryPanel"' in content
|
||||
assert 'id="uploadDropzone"' in content
|
||||
assert 'id="workflowCardList"' in content
|
||||
assert 'data-conversation-id="' in content
|
||||
assert 'data-message-id="' in content
|
||||
assert 'data-message-url-template="' in content
|
||||
assert 'class="message-content markdown-content"' in content
|
||||
assert 'class="message-raw"' in content
|
||||
assert "自动汇总文件目录与页数" in content
|
||||
|
||||
|
||||
def test_workspace_links_to_attachment_manager(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
assert "附件管理" in content
|
||||
assert "视频实时监测" not in content
|
||||
assert f'href="{reverse("attachment_manager")}?conversation={conversation.pk}"' in content
|
||||
assert 'class="attachment-manager-link"' in content
|
||||
|
||||
|
||||
def test_attachment_manager_requires_conversation_selection(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
Conversation.objects.create(user=user, title="待选择会话")
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(reverse("attachment_manager"))
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
assert "附件管理" in content
|
||||
assert "请选择一个对话查看附件" in content
|
||||
assert "待选择会话" in content
|
||||
assert 'id="attachmentConversationSelect"' in content
|
||||
|
||||
|
||||
def test_attachment_manager_selects_conversation_and_lists_attachments(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="资料会话")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=128,
|
||||
is_active=True,
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"{reverse('attachment_manager')}?conversation={conversation.pk}")
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
assert "资料会话" in content
|
||||
assert "a.docx" in content
|
||||
assert "下载" in content
|
||||
assert "编辑" in content
|
||||
assert "删除" in content
|
||||
assert "attachment-manager-split" in content
|
||||
assert reverse("home") + f"?conversation={conversation.pk}" in content
|
||||
|
||||
|
||||
def test_attachment_manager_uses_compact_admin_layout(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
Conversation.objects.create(user=user, title="紧凑会话")
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(reverse("attachment_manager"))
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
css = open("static/css/login.css", encoding="utf-8").read()
|
||||
assert "attachment-manager-toolbar" in content
|
||||
assert "attachment-manager-content" in content
|
||||
assert "attachment-manager-select-control" in content
|
||||
assert ".attachment-manager-page" in css
|
||||
assert "align-content: start" in css
|
||||
assert ".attachment-manager-toolbar" in css
|
||||
assert ".attachment-manager-select-control" in css
|
||||
assert ".attachment-manager-split" in css
|
||||
|
||||
|
||||
def test_workspace_renders_workflow_history_as_batch_carousel(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
older = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-OLDER",
|
||||
status=FileSummaryBatch.Status.SUCCESS,
|
||||
)
|
||||
latest = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-LATEST",
|
||||
status=FileSummaryBatch.Status.FAILED,
|
||||
error_message="解压失败",
|
||||
)
|
||||
WorkflowNodeRun.objects.create(
|
||||
batch=older,
|
||||
node_code="upload",
|
||||
node_name="附件固化",
|
||||
status=WorkflowNodeRun.Status.SUCCESS,
|
||||
progress=100,
|
||||
message="附件固化完成",
|
||||
)
|
||||
WorkflowNodeRun.objects.create(
|
||||
batch=latest,
|
||||
node_code="extract",
|
||||
node_name="压缩包解压",
|
||||
status=WorkflowNodeRun.Status.FAILED,
|
||||
progress=10,
|
||||
message="压缩包损坏",
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
|
||||
|
||||
assert response.status_code == 200
|
||||
content = response.content.decode("utf-8")
|
||||
assert "workflow-batch-carousel" in content
|
||||
assert 'class="workflow-card active"' in content
|
||||
assert 'data-workflow-index="0"' in content
|
||||
assert 'data-workflow-action="prev"' in content
|
||||
assert 'data-workflow-action="next"' in content
|
||||
assert content.index("FS-LATEST") < content.index("FS-OLDER")
|
||||
assert "压缩包损坏" in content
|
||||
|
||||
|
||||
def test_frontend_prevents_long_message_overflow():
|
||||
css = open("static/css/login.css", encoding="utf-8").read()
|
||||
|
||||
assert ".message-bubble" in css
|
||||
assert "overflow-wrap: anywhere" in css
|
||||
assert "word-break: break-word" in css
|
||||
|
||||
|
||||
def test_frontend_polls_running_workflow_cards():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
|
||||
assert "startWorkflowPolling" in script
|
||||
assert "setInterval" in script
|
||||
assert "refreshRunningWorkflowCards" in script
|
||||
|
||||
|
||||
def test_frontend_updates_sidebar_conversation_by_stable_id():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
|
||||
assert "data-conversation-id" in script
|
||||
assert "setAttribute(\"data-conversation-id\"" in script
|
||||
assert ".history-item[data-conversation-id=" in script
|
||||
|
||||
|
||||
def test_frontend_refreshes_generated_workflow_messages():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
|
||||
assert "refreshConversationMessages" in script
|
||||
assert "latestMessageId" in script
|
||||
assert "data-message-url-template" in script
|
||||
|
||||
|
||||
def test_frontend_can_replace_partial_stream_content():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
|
||||
assert 'eventName === "replace"' in script
|
||||
assert "assistantText = payload.content" in script
|
||||
|
||||
|
||||
def test_frontend_enter_sends_and_ctrl_enter_inserts_newline():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
|
||||
assert "bindPromptKeyboardShortcuts" in script
|
||||
assert "event.key === \"Enter\"" in script
|
||||
assert "event.ctrlKey" in script
|
||||
assert "composer.requestSubmit()" in script
|
||||
|
||||
|
||||
def test_frontend_renders_workflow_error_messages():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
css = open("static/css/login.css", encoding="utf-8").read()
|
||||
|
||||
assert "payload.batch.error_message" in script
|
||||
assert "workflow-error" in script
|
||||
assert "node.message" in script
|
||||
assert ".workflow-error" in css
|
||||
|
||||
|
||||
def test_frontend_renders_workflow_batches_as_carousel():
|
||||
script = open("static/js/app.js", encoding="utf-8").read()
|
||||
css = open("static/css/login.css", encoding="utf-8").read()
|
||||
|
||||
assert "selectWorkflowBatchIndex" in script
|
||||
assert "refreshWorkflowBatchCarousel" in script
|
||||
assert "data-workflow-action" in script
|
||||
assert "workflow-batch-carousel" in script
|
||||
assert ".workflow-batch-controls" in css
|
||||
assert ".workflow-card.active" in css
|
||||
24
tests/test_file_summary_inventory.py
Normal file
24
tests/test_file_summary_inventory.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.inventory import scan_files_to_items
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model):
|
||||
root = tmp_path / "work"
|
||||
(root / "a").mkdir(parents=True)
|
||||
(root / "a" / "one.pdf").write_bytes(b"pdf")
|
||||
(root / "two.txt").write_text("x", encoding="utf-8")
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I")
|
||||
|
||||
items = scan_files_to_items(batch=batch, roots=[root])
|
||||
|
||||
assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"]
|
||||
assert FileSummaryItem.objects.filter(batch=batch).count() == 2
|
||||
assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED
|
||||
113
tests/test_file_summary_models.py
Normal file
113
tests/test_file_summary_models.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import pytest
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.db import IntegrityError, transaction
|
||||
|
||||
from review_agent.models import (
|
||||
Conversation,
|
||||
ExportedSummaryFile,
|
||||
FileAttachment,
|
||||
FileSummaryBatch,
|
||||
FileSummaryBatchAttachment,
|
||||
FileSummaryItem,
|
||||
)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def create_user(username="u1"):
|
||||
return get_user_model().objects.create_user(username=username, password="pass")
|
||||
|
||||
|
||||
def test_attachment_versions_are_unique_per_conversation_and_name():
|
||||
user = create_user()
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
first = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="资料.docx",
|
||||
version_no=1,
|
||||
is_active=False,
|
||||
storage_path="media/a.docx",
|
||||
file_size=10,
|
||||
)
|
||||
second = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="资料.docx",
|
||||
version_no=2,
|
||||
storage_path="media/b.docx",
|
||||
file_size=12,
|
||||
)
|
||||
|
||||
assert first.version_no == 1
|
||||
assert second.version_no == 2
|
||||
|
||||
with pytest.raises(IntegrityError), transaction.atomic():
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="资料.docx",
|
||||
version_no=2,
|
||||
storage_path="media/c.docx",
|
||||
file_size=14,
|
||||
)
|
||||
|
||||
|
||||
def test_batch_attachment_and_item_unique_constraints():
|
||||
user = create_user()
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="资料.docx",
|
||||
storage_path="media/a.docx",
|
||||
file_size=10,
|
||||
)
|
||||
batch = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-001",
|
||||
)
|
||||
|
||||
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
|
||||
with pytest.raises(IntegrityError), transaction.atomic():
|
||||
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
|
||||
|
||||
FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="资料.docx",
|
||||
file_type="docx",
|
||||
relative_path="资料.docx",
|
||||
storage_path="media/a.docx",
|
||||
)
|
||||
with pytest.raises(IntegrityError), transaction.atomic():
|
||||
FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=2,
|
||||
file_name="资料.docx",
|
||||
file_type="docx",
|
||||
relative_path="资料.docx",
|
||||
storage_path="media/a.docx",
|
||||
)
|
||||
|
||||
|
||||
def test_exported_file_traces_to_user_and_conversation():
|
||||
user = create_user()
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-002",
|
||||
)
|
||||
exported = ExportedSummaryFile.objects.create(
|
||||
batch=batch,
|
||||
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
||||
file_name="summary.md",
|
||||
storage_path="media/summary.md",
|
||||
)
|
||||
|
||||
assert exported.batch.user == user
|
||||
assert exported.batch.conversation == conversation
|
||||
180
tests/test_file_summary_page_count.py
Normal file
180
tests/test_file_summary_page_count.py
Normal file
@@ -0,0 +1,180 @@
|
||||
import pytest
|
||||
import shutil
|
||||
from zipfile import ZipFile
|
||||
from docx import Document
|
||||
from openpyxl import Workbook
|
||||
from pptx import Presentation
|
||||
|
||||
from review_agent.file_summary.services.page_count import count_document_pages
|
||||
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
|
||||
from review_agent.file_summary.skills.base import WorkflowContext
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_count_document_pages_for_office_formats(tmp_path):
|
||||
docx_path = tmp_path / "a.docx"
|
||||
Document().save(docx_path)
|
||||
|
||||
xlsx_path = tmp_path / "a.xlsx"
|
||||
workbook = Workbook()
|
||||
workbook.create_sheet("第二页")
|
||||
workbook.save(xlsx_path)
|
||||
|
||||
pptx_path = tmp_path / "a.pptx"
|
||||
presentation = Presentation()
|
||||
presentation.slides.add_slide(presentation.slide_layouts[6])
|
||||
presentation.save(pptx_path)
|
||||
|
||||
assert count_document_pages(docx_path).status in {"success", "uncertain"}
|
||||
assert count_document_pages(xlsx_path).page_count == 2
|
||||
assert count_document_pages(pptx_path).page_count == 1
|
||||
|
||||
|
||||
def test_count_docx_pages_from_extended_properties(tmp_path):
|
||||
docx_path = tmp_path / "with-pages.docx"
|
||||
Document().save(docx_path)
|
||||
app_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">'
|
||||
"<Pages>7</Pages>"
|
||||
"</Properties>"
|
||||
)
|
||||
rewritten = tmp_path / "rewritten.docx"
|
||||
with ZipFile(docx_path) as source, ZipFile(rewritten, "w") as target:
|
||||
for entry in source.infolist():
|
||||
if entry.filename != "docProps/app.xml":
|
||||
target.writestr(entry, source.read(entry.filename))
|
||||
target.writestr("docProps/app.xml", app_xml)
|
||||
shutil.move(rewritten, docx_path)
|
||||
|
||||
result = count_document_pages(docx_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 7
|
||||
|
||||
|
||||
def test_count_docx_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
||||
docx_path = tmp_path / "without-pages.docx"
|
||||
Document().save(docx_path)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_docx_pages_from_extended_properties",
|
||||
lambda path: None,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
||||
lambda path: 22,
|
||||
)
|
||||
|
||||
result = count_document_pages(docx_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 22
|
||||
|
||||
|
||||
def test_count_doc_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
||||
doc_path = tmp_path / "legacy.doc"
|
||||
doc_path.write_bytes(b"legacy-doc-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
||||
lambda path, ext: True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
||||
lambda path: 5,
|
||||
)
|
||||
|
||||
result = count_document_pages(doc_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 5
|
||||
|
||||
|
||||
def test_count_ppt_pages_uses_powerpoint_com_fallback(monkeypatch, tmp_path):
|
||||
ppt_path = tmp_path / "legacy.ppt"
|
||||
ppt_path.write_bytes(b"legacy-ppt-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
||||
lambda path, ext: True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_powerpoint_slides_with_com",
|
||||
lambda path: 9,
|
||||
)
|
||||
|
||||
result = count_document_pages(ppt_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 9
|
||||
|
||||
|
||||
def test_count_excel_pages_uses_excel_com_fallback(monkeypatch, tmp_path):
|
||||
xls_path = tmp_path / "legacy.xls"
|
||||
xls_path.write_bytes(b"legacy-xls-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
||||
lambda path, ext: True,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_excel_sheets_with_com",
|
||||
lambda path: 3,
|
||||
)
|
||||
|
||||
result = count_document_pages(xls_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 3
|
||||
|
||||
|
||||
def test_invalid_xlsx_does_not_start_excel_com(monkeypatch, tmp_path):
|
||||
xlsx_path = tmp_path / "broken.xlsx"
|
||||
xlsx_path.write_bytes(b"not a real workbook")
|
||||
|
||||
def fail_if_called(path):
|
||||
raise AssertionError("Excel COM should not start for invalid xlsx signatures")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_excel_sheets_with_com",
|
||||
fail_if_called,
|
||||
)
|
||||
|
||||
result = count_document_pages(xlsx_path)
|
||||
|
||||
assert result.status == "uncertain"
|
||||
|
||||
|
||||
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
|
||||
xlsx_path = tmp_path / "a.xlsx"
|
||||
workbook = Workbook()
|
||||
workbook.save(xlsx_path)
|
||||
txt_path = tmp_path / "a.txt"
|
||||
txt_path.write_text("x", encoding="utf-8")
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
|
||||
xlsx_item = FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="a.xlsx",
|
||||
file_type="xlsx",
|
||||
relative_path="a.xlsx",
|
||||
storage_path=str(xlsx_path),
|
||||
)
|
||||
txt_item = FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=2,
|
||||
file_name="a.txt",
|
||||
file_type="txt",
|
||||
relative_path="a.txt",
|
||||
storage_path=str(txt_path),
|
||||
)
|
||||
|
||||
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
|
||||
|
||||
xlsx_item.refresh_from_db()
|
||||
txt_item.refresh_from_db()
|
||||
assert result.success is True
|
||||
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
|
||||
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
||||
29
tests/test_file_summary_product_detect.py
Normal file
29
tests/test_file_summary_product_detect.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.services.product_detect import detect_product_name
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_detect_product_name_from_top_level_directory(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="新对话 06-06")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D")
|
||||
FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="说明书.docx",
|
||||
file_type="docx",
|
||||
relative_path="甲型试剂盒/说明书.docx",
|
||||
storage_path="x",
|
||||
)
|
||||
|
||||
product_name = detect_product_name(batch)
|
||||
|
||||
batch.refresh_from_db()
|
||||
conversation.refresh_from_db()
|
||||
assert product_name == "甲型试剂盒"
|
||||
assert batch.product_name == "甲型试剂盒"
|
||||
assert conversation.title == "甲型试剂盒-文件汇总"
|
||||
82
tests/test_file_summary_report.py
Normal file
82
tests/test_file_summary_report.py
Normal file
@@ -0,0 +1,82 @@
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from review_agent.file_summary.services.export_excel import generate_excel_export
|
||||
from review_agent.file_summary.services.report import generate_markdown_report
|
||||
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem, Message
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def make_batch(tmp_path, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-R",
|
||||
work_dir=str(tmp_path),
|
||||
total_files=1,
|
||||
success_files=1,
|
||||
total_pages=2,
|
||||
)
|
||||
FileSummaryItem.objects.create(
|
||||
batch=batch,
|
||||
file_index=1,
|
||||
file_name="a.xlsx",
|
||||
file_type="xlsx",
|
||||
relative_path="a.xlsx",
|
||||
storage_path=str(tmp_path / "a.xlsx"),
|
||||
page_count=2,
|
||||
statistics_status=FileSummaryItem.StatisticsStatus.SUCCESS,
|
||||
)
|
||||
return batch
|
||||
|
||||
|
||||
def test_generate_markdown_report_creates_export_and_summary(tmp_path, django_user_model):
|
||||
batch = make_batch(tmp_path, django_user_model)
|
||||
|
||||
exported, summary = generate_markdown_report(batch)
|
||||
|
||||
assert exported.export_type == "markdown"
|
||||
assert Path(exported.storage_path).exists()
|
||||
assert "| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |" in summary
|
||||
assert "a.xlsx" in Path(exported.storage_path).read_text(encoding="utf-8")
|
||||
|
||||
|
||||
def test_generate_excel_export_contains_summary_and_items(tmp_path, django_user_model):
|
||||
batch = make_batch(tmp_path, django_user_model)
|
||||
|
||||
exported = generate_excel_export(batch)
|
||||
|
||||
workbook = load_workbook(exported.storage_path)
|
||||
assert workbook.sheetnames == ["汇总信息", "文件明细"]
|
||||
assert workbook["文件明细"]["C2"].value == "a.xlsx"
|
||||
|
||||
|
||||
def test_workflow_report_node_writes_assistant_message(tmp_path, settings, django_user_model):
|
||||
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
|
||||
from review_agent.models import FileAttachment
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
settings.FILE_SUMMARY_ASYNC = False
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
file_path = tmp_path / "a.xlsx"
|
||||
file_path.write_bytes(b"not a real workbook")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.txt",
|
||||
storage_path=str(file_path),
|
||||
file_size=file_path.stat().st_size,
|
||||
)
|
||||
batch = create_file_summary_batch(conversation=conversation, user=user)
|
||||
batch.work_dir = str(tmp_path / "batch")
|
||||
batch.save(update_fields=["work_dir"])
|
||||
|
||||
start_file_summary_workflow(batch, async_run=False)
|
||||
|
||||
assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists()
|
||||
46
tests/test_file_summary_skills.py
Normal file
46
tests/test_file_summary_skills.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import pytest
|
||||
import logging
|
||||
|
||||
from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext
|
||||
from review_agent.file_summary.skills.registry import SkillRegistry
|
||||
|
||||
|
||||
class EchoSkill(BaseSkill):
|
||||
name = "echo"
|
||||
|
||||
def run(self, context):
|
||||
return SkillResult(success=True, data={"batch_id": context.batch.id})
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_skill_registry_executes_registered_skill(django_user_model):
|
||||
from review_agent.models import Conversation, FileSummaryBatch
|
||||
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X")
|
||||
registry = SkillRegistry()
|
||||
registry.register(EchoSkill())
|
||||
|
||||
result = registry.execute("echo", WorkflowContext(batch=batch))
|
||||
|
||||
assert result.success is True
|
||||
assert result.data == {"batch_id": batch.id}
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
def test_skill_registry_logs_skill_lifecycle(caplog, django_user_model):
|
||||
from review_agent.models import Conversation, FileSummaryBatch
|
||||
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-LOG")
|
||||
registry = SkillRegistry()
|
||||
registry.register(EchoSkill())
|
||||
|
||||
with caplog.at_level(logging.INFO, logger="review_agent.file_summary"):
|
||||
registry.execute("echo", WorkflowContext(batch=batch))
|
||||
|
||||
messages = [record.getMessage() for record in caplog.records]
|
||||
assert any("Skill started" in message and "echo" in message for message in messages)
|
||||
assert any("Skill finished" in message and "echo" in message for message in messages)
|
||||
48
tests/test_file_summary_storage.py
Normal file
48
tests/test_file_summary_storage.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.storage import save_uploaded_attachment
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_save_uploaded_attachment_versions_same_name(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
first = save_uploaded_attachment(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
uploaded_file=SimpleUploadedFile("资料.docx", b"first"),
|
||||
)
|
||||
second = save_uploaded_attachment(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
uploaded_file=SimpleUploadedFile("资料.docx", b"second"),
|
||||
)
|
||||
|
||||
first.refresh_from_db()
|
||||
assert first.version_no == 1
|
||||
assert first.is_active is False
|
||||
assert second.version_no == 2
|
||||
assert second.is_active is True
|
||||
assert FileAttachment.objects.filter(conversation=conversation).count() == 2
|
||||
assert (tmp_path / second.storage_path).read_bytes() == b"second"
|
||||
|
||||
|
||||
def test_save_uploaded_attachment_rejects_path_traversal(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
attachment = save_uploaded_attachment(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
uploaded_file=SimpleUploadedFile("../资料.docx", b"content"),
|
||||
)
|
||||
|
||||
assert ".." not in attachment.storage_path
|
||||
assert (tmp_path / attachment.storage_path).exists()
|
||||
73
tests/test_file_summary_trigger.py
Normal file
73
tests/test_file_summary_trigger.py
Normal file
@@ -0,0 +1,73 @@
|
||||
import pytest
|
||||
|
||||
from review_agent.file_summary.workflow_trigger import (
|
||||
evaluate_attachment_reader_trigger,
|
||||
evaluate_file_summary_trigger,
|
||||
)
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_trigger_matches_keywords_only_when_active_attachment_exists(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
no_file = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
|
||||
assert no_file.should_start is False
|
||||
assert no_file.reason == "missing_attachment"
|
||||
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
|
||||
matched = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
|
||||
assert matched.should_start is True
|
||||
assert matched.workflow_type == "file_summary"
|
||||
|
||||
normal = evaluate_file_summary_trigger(conversation, "你好,帮我解释法规")
|
||||
assert normal.should_start is False
|
||||
assert normal.reason == "not_matched"
|
||||
|
||||
|
||||
def test_attachment_reader_trigger_matches_file_content_phrases(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
missing = evaluate_attachment_reader_trigger(conversation, "根据提供的简历文件内容,简要介绍")
|
||||
assert missing.should_start is False
|
||||
assert missing.reason == "missing_attachment"
|
||||
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="resume.docx",
|
||||
storage_path="x/resume.docx",
|
||||
file_size=1,
|
||||
)
|
||||
|
||||
matched = evaluate_attachment_reader_trigger(conversation, "根据提供的简历文件内容,简要介绍")
|
||||
assert matched.should_start is True
|
||||
assert matched.workflow_type == "attachment_reader"
|
||||
|
||||
|
||||
def test_attachment_reader_trigger_matches_resume_project_experience_request(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="resume.docx",
|
||||
storage_path="x/resume.docx",
|
||||
file_size=1,
|
||||
)
|
||||
|
||||
matched = evaluate_attachment_reader_trigger(conversation, "阅读下附件简历中的项目经历")
|
||||
|
||||
assert matched.should_start is True
|
||||
assert matched.workflow_type == "attachment_reader"
|
||||
260
tests/test_file_summary_views.py
Normal file
260
tests/test_file_summary_views.py
Normal file
@@ -0,0 +1,260 @@
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.urls import reverse
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from review_agent.models import (
|
||||
Conversation,
|
||||
ExportedSummaryFile,
|
||||
FileAttachment,
|
||||
FileSummaryBatch,
|
||||
Message,
|
||||
WorkflowNodeRun,
|
||||
)
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_upload_attachments_requires_conversation_owner(client, settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||
client.force_login(other)
|
||||
|
||||
response = client.post(
|
||||
reverse("file_summary_attachment_upload", args=[conversation.pk]),
|
||||
{"files": [SimpleUploadedFile("a.docx", b"a")]},
|
||||
)
|
||||
|
||||
assert response.status_code == 404
|
||||
|
||||
|
||||
def test_attachment_api_requires_login(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
|
||||
|
||||
assert response.status_code == 302
|
||||
|
||||
|
||||
def test_upload_and_list_current_conversation_attachments(client, settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
client.force_login(user)
|
||||
|
||||
upload_response = client.post(
|
||||
reverse("file_summary_attachment_upload", args=[conversation.pk]),
|
||||
{
|
||||
"files": [
|
||||
SimpleUploadedFile("a.docx", b"a", content_type="application/docx"),
|
||||
SimpleUploadedFile("b.zip", b"b", content_type="application/zip"),
|
||||
]
|
||||
},
|
||||
)
|
||||
list_response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
|
||||
|
||||
assert upload_response.status_code == 200
|
||||
assert upload_response.json()["attachments"][0]["original_name"] == "a.docx"
|
||||
assert len(list_response.json()["attachments"]) == 2
|
||||
|
||||
|
||||
def test_delete_attachment_is_logical_and_scoped(client, settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.delete(reverse("file_summary_attachment_detail", args=[conversation.pk, attachment.pk]))
|
||||
|
||||
attachment.refresh_from_db()
|
||||
assert response.status_code == 200
|
||||
assert attachment.upload_status == FileAttachment.UploadStatus.DELETED
|
||||
assert attachment.is_active is False
|
||||
|
||||
|
||||
def test_export_download_requires_batch_owner(client, tmp_path, django_user_model):
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(conversation=conversation, user=owner, batch_no="FS-DL")
|
||||
report_path = tmp_path / "summary.md"
|
||||
report_path.write_text("ok", encoding="utf-8")
|
||||
exported = ExportedSummaryFile.objects.create(
|
||||
batch=batch,
|
||||
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
|
||||
file_name="summary.md",
|
||||
storage_path=str(report_path),
|
||||
)
|
||||
|
||||
client.force_login(other)
|
||||
denied = client.get(reverse("file_summary_export_download", args=[exported.pk]))
|
||||
assert denied.status_code == 404
|
||||
|
||||
client.force_login(owner)
|
||||
allowed = client.get(reverse("file_summary_export_download", args=[exported.pk]))
|
||||
assert allowed.status_code == 200
|
||||
assert "attachment" in allowed["Content-Disposition"]
|
||||
assert "summary.md" in allowed["Content-Disposition"]
|
||||
assert allowed["Content-Type"].startswith("text/markdown")
|
||||
|
||||
|
||||
def test_conversation_messages_returns_incremental_messages(client, django_user_model):
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||
first = Message.objects.create(
|
||||
conversation=conversation,
|
||||
role=Message.Role.USER,
|
||||
content="用户消息",
|
||||
)
|
||||
second = Message.objects.create(
|
||||
conversation=conversation,
|
||||
role=Message.Role.ASSISTANT,
|
||||
content="报告消息",
|
||||
)
|
||||
|
||||
client.force_login(other)
|
||||
denied = client.get(reverse("review_agent_conversation_messages", args=[conversation.pk]))
|
||||
assert denied.status_code == 404
|
||||
|
||||
client.force_login(owner)
|
||||
response = client.get(
|
||||
f"{reverse('review_agent_conversation_messages', args=[conversation.pk])}?after={first.pk}"
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["latest_message_id"] == second.pk
|
||||
assert payload["messages"] == [
|
||||
{
|
||||
"id": second.pk,
|
||||
"role": Message.Role.ASSISTANT,
|
||||
"content": "报告消息",
|
||||
"created_at": second.created_at.isoformat(),
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_batch_status_exposes_batch_and_node_errors(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
batch = FileSummaryBatch.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
batch_no="FS-ERR",
|
||||
status=FileSummaryBatch.Status.FAILED,
|
||||
error_message="压缩包解压失败",
|
||||
)
|
||||
WorkflowNodeRun.objects.create(
|
||||
batch=batch,
|
||||
node_code="extract",
|
||||
node_name="压缩包解压",
|
||||
status=WorkflowNodeRun.Status.FAILED,
|
||||
progress=10,
|
||||
message="未解出任何可扫描文件",
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.get(reverse("file_summary_batch_status", args=[batch.pk]))
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["batch"]["error_message"] == "压缩包解压失败"
|
||||
assert payload["nodes"][0]["message"] == "未解出任何可扫描文件"
|
||||
|
||||
|
||||
def test_conversation_list_api_returns_owned_conversations_with_attachment_counts(client, django_user_model):
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
owned = Conversation.objects.create(user=owner, title="有附件会话")
|
||||
Conversation.objects.create(user=other, title="其他用户会话")
|
||||
FileAttachment.objects.create(
|
||||
conversation=owned,
|
||||
user=owner,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
FileAttachment.objects.create(
|
||||
conversation=owned,
|
||||
user=owner,
|
||||
original_name="deleted.docx",
|
||||
storage_path="x/deleted.docx",
|
||||
file_size=1,
|
||||
upload_status=FileAttachment.UploadStatus.DELETED,
|
||||
is_active=False,
|
||||
)
|
||||
client.force_login(owner)
|
||||
|
||||
response = client.get(reverse("review_agent_conversation_list"))
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert [item["title"] for item in payload["conversations"]] == ["有附件会话"]
|
||||
assert payload["conversations"][0]["attachment_count"] == 1
|
||||
|
||||
|
||||
def test_patch_attachment_updates_name_and_active_state(client, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="old.docx",
|
||||
storage_path="x/old.docx",
|
||||
file_size=1,
|
||||
is_active=True,
|
||||
)
|
||||
client.force_login(user)
|
||||
|
||||
response = client.patch(
|
||||
reverse("file_summary_attachment_detail", args=[conversation.pk, attachment.pk]),
|
||||
data=json.dumps({"original_name": "new.docx", "is_active": False}),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
attachment.refresh_from_db()
|
||||
assert response.status_code == 200
|
||||
assert attachment.original_name == "new.docx"
|
||||
assert attachment.is_active is False
|
||||
assert response.json()["attachment"]["original_name"] == "new.docx"
|
||||
|
||||
|
||||
def test_attachment_download_requires_owner_and_returns_file(client, settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
owner = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
other = django_user_model.objects.create_user(username="other", password="pass")
|
||||
conversation = Conversation.objects.create(user=owner, title="会话")
|
||||
attachment_path = tmp_path / "uploads" / "a.docx"
|
||||
attachment_path.parent.mkdir(parents=True)
|
||||
attachment_path.write_bytes(b"attachment-content")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=owner,
|
||||
original_name="a.docx",
|
||||
storage_path=str(attachment_path),
|
||||
file_size=attachment_path.stat().st_size,
|
||||
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
|
||||
client.force_login(other)
|
||||
denied = client.get(reverse("file_summary_attachment_download", args=[conversation.pk, attachment.pk]))
|
||||
assert denied.status_code == 404
|
||||
|
||||
client.force_login(owner)
|
||||
allowed = client.get(reverse("file_summary_attachment_download", args=[conversation.pk, attachment.pk]))
|
||||
assert allowed.status_code == 200
|
||||
assert "attachment" in allowed["Content-Disposition"]
|
||||
assert "a.docx" in allowed["Content-Disposition"]
|
||||
assert b"".join(allowed.streaming_content) == b"attachment-content"
|
||||
319
tests/test_file_summary_workflow.py
Normal file
319
tests/test_file_summary_workflow.py
Normal file
@@ -0,0 +1,319 @@
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from zipfile import ZipFile
|
||||
|
||||
from review_agent.file_summary.services import archive as archive_service
|
||||
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
|
||||
from review_agent.skill_router import SkillRoute
|
||||
from review_agent.models import (
|
||||
Conversation,
|
||||
FileAttachment,
|
||||
FileSummaryBatch,
|
||||
FileSummaryBatchAttachment,
|
||||
Message,
|
||||
WorkflowEvent,
|
||||
WorkflowNodeRun,
|
||||
)
|
||||
from review_agent.services import stream_message
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
|
||||
active = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="old.docx",
|
||||
is_active=False,
|
||||
storage_path="x/old.docx",
|
||||
file_size=1,
|
||||
)
|
||||
|
||||
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
|
||||
|
||||
assert batch.status == FileSummaryBatch.Status.PENDING
|
||||
assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active
|
||||
active.refresh_from_db()
|
||||
assert active.upload_status == FileAttachment.UploadStatus.BOUND
|
||||
assert batch.work_dir
|
||||
assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6
|
||||
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists()
|
||||
|
||||
|
||||
def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
|
||||
|
||||
start_file_summary_workflow(batch, async_run=False)
|
||||
|
||||
batch.refresh_from_db()
|
||||
assert batch.status == FileSummaryBatch.Status.SUCCESS
|
||||
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists()
|
||||
|
||||
|
||||
def test_workflow_extracts_archive_and_scans_extracted_files(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
archive_path = tmp_path / "upload.zip"
|
||||
with ZipFile(archive_path, "w") as archive:
|
||||
archive.writestr("folder/a.pdf", b"%PDF-1.4\n%%EOF")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="upload.zip",
|
||||
storage_path=str(archive_path),
|
||||
file_size=archive_path.stat().st_size,
|
||||
)
|
||||
batch = create_file_summary_batch(conversation=conversation, user=user)
|
||||
|
||||
start_file_summary_workflow(batch, async_run=False)
|
||||
|
||||
batch.refresh_from_db()
|
||||
assert batch.total_files == 1
|
||||
assert batch.items.get().file_name == "a.pdf"
|
||||
assert not batch.items.filter(file_type="zip").exists()
|
||||
|
||||
|
||||
def test_workflow_marks_archive_extract_failure_visible(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
archive_path = tmp_path / "empty.zip"
|
||||
with ZipFile(archive_path, "w"):
|
||||
pass
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="empty.zip",
|
||||
storage_path=str(archive_path),
|
||||
file_size=archive_path.stat().st_size,
|
||||
)
|
||||
batch = create_file_summary_batch(conversation=conversation, user=user)
|
||||
|
||||
start_file_summary_workflow(batch, async_run=False)
|
||||
|
||||
batch.refresh_from_db()
|
||||
extract_node = batch.node_runs.get(node_code="extract")
|
||||
assert batch.status == FileSummaryBatch.Status.FAILED
|
||||
assert "未解出任何可扫描文件" in batch.error_message
|
||||
assert extract_node.status == WorkflowNodeRun.Status.FAILED
|
||||
assert "未解出任何可扫描文件" in extract_node.message
|
||||
failed_event = WorkflowEvent.objects.filter(
|
||||
batch=batch,
|
||||
event_type="node_progress",
|
||||
payload__status=WorkflowNodeRun.Status.FAILED,
|
||||
).latest("id")
|
||||
assert "未解出任何可扫描文件" in failed_event.payload["message"]
|
||||
|
||||
|
||||
def test_rar_extract_uses_python_libarchive_before_7z(monkeypatch, tmp_path):
|
||||
archive_path = tmp_path / "sample.rar"
|
||||
archive_path.write_bytes(b"rar")
|
||||
target_dir = tmp_path / "out"
|
||||
calls = []
|
||||
|
||||
def fake_libarchive_extract(path: Path, target: Path):
|
||||
calls.append(("libarchive", path, target))
|
||||
extracted = target / "a.docx"
|
||||
extracted.parent.mkdir(parents=True, exist_ok=True)
|
||||
extracted.write_bytes(b"doc")
|
||||
return [extracted]
|
||||
|
||||
def fake_7z_extract(path: Path, target: Path):
|
||||
calls.append(("7z", path, target))
|
||||
return []
|
||||
|
||||
monkeypatch.setattr(archive_service, "_extract_rar_with_libarchive", fake_libarchive_extract)
|
||||
monkeypatch.setattr(archive_service, "_extract_rar_with_7z", fake_7z_extract)
|
||||
|
||||
extracted = archive_service.extract_archive(archive_path, target_dir)
|
||||
|
||||
assert [path.name for path in extracted] == ["a.docx"]
|
||||
assert calls == [("libarchive", archive_path, target_dir)]
|
||||
|
||||
|
||||
def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model):
|
||||
settings.FILE_SUMMARY_ASYNC = False
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
|
||||
frames = list(stream_message(conversation, "请自动汇总文件目录与页数"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "workflow_started" in joined
|
||||
assert "\"workflow_type\": \"file_summary\"" in joined
|
||||
assert FileSummaryBatch.objects.filter(conversation=conversation).exists()
|
||||
|
||||
|
||||
def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
def fake_stream_reply(conversation, content):
|
||||
yield "普通回复"
|
||||
|
||||
monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply)
|
||||
|
||||
frames = list(stream_message(conversation, "你好"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "普通回复" in joined
|
||||
assert "workflow_started" not in joined
|
||||
|
||||
|
||||
def test_stream_message_meta_uses_first_prompt_title_for_new_conversation(monkeypatch, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="新对话 01-01 10:00")
|
||||
|
||||
def fake_stream_reply(conversation, content):
|
||||
yield "普通回复"
|
||||
|
||||
monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply)
|
||||
|
||||
frames = list(stream_message(conversation, "这是第一条新对话消息"))
|
||||
|
||||
assert '"title": "这是第一条新对话消息"' in frames[0]
|
||||
assert '"title": "这是第一条新对话消息"' in frames[-1]
|
||||
|
||||
|
||||
def test_stream_message_reads_active_attachment_when_requested(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment_path = tmp_path / "uploads" / "detail.txt"
|
||||
attachment_path.parent.mkdir(parents=True)
|
||||
attachment_path.write_text("合同编号:RA-2026\n结论:附件阅读成功", encoding="utf-8")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="detail.txt",
|
||||
storage_path="uploads/detail.txt",
|
||||
file_size=attachment_path.stat().st_size,
|
||||
)
|
||||
|
||||
frames = list(stream_message(conversation, "请阅读附件并给出详情"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "附件解析结果" in joined
|
||||
assert "detail.txt" in joined
|
||||
assert "RA-2026" in joined
|
||||
assert "workflow_started" not in joined
|
||||
|
||||
|
||||
def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkeypatch, django_user_model):
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
def broken_stream_reply(conversation, content):
|
||||
yield "已生成部分内容"
|
||||
raise RuntimeError("provider connection reset")
|
||||
|
||||
monkeypatch.setattr("review_agent.services.stream_reply", broken_stream_reply)
|
||||
monkeypatch.setattr("review_agent.services.generate_reply", lambda conversation, content: "非流式完整回复")
|
||||
|
||||
frames = list(stream_message(conversation, "普通问题"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "已生成部分内容" in joined
|
||||
assert "replace" in joined
|
||||
assert "非流式完整回复" in joined
|
||||
assert "done" in joined
|
||||
assistant_message = Message.objects.get(conversation=conversation, role=Message.Role.ASSISTANT)
|
||||
assert assistant_message.content == "非流式完整回复"
|
||||
|
||||
|
||||
def test_stream_message_uses_llm_router_for_attachment_reader(
|
||||
monkeypatch,
|
||||
settings,
|
||||
tmp_path,
|
||||
django_user_model,
|
||||
):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment_path = tmp_path / "uploads" / "resume.txt"
|
||||
attachment_path.parent.mkdir(parents=True)
|
||||
attachment_path.write_text("项目经历:负责审核智能体附件解析模块。", encoding="utf-8")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="resume.txt",
|
||||
storage_path="uploads/resume.txt",
|
||||
file_size=attachment_path.stat().st_size,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"review_agent.services.route_message_intent",
|
||||
lambda conversation, content: SkillRoute(
|
||||
action="attachment_reader",
|
||||
skill_name="attachment_reader",
|
||||
confidence=0.91,
|
||||
reason="需要读取上传简历。",
|
||||
source="llm",
|
||||
),
|
||||
)
|
||||
|
||||
frames = list(stream_message(conversation, "帮我整理其中的项目经历"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "附件解析结果" in joined
|
||||
assert "审核智能体附件解析模块" in joined
|
||||
assert "模型调用失败" not in joined
|
||||
|
||||
|
||||
def test_stream_message_uses_llm_router_for_file_summary(monkeypatch, settings, django_user_model):
|
||||
settings.FILE_SUMMARY_ASYNC = False
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="a.docx",
|
||||
storage_path="x/a.docx",
|
||||
file_size=1,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.services.route_message_intent",
|
||||
lambda conversation, content: SkillRoute(
|
||||
action="file_summary",
|
||||
workflow_type="file_summary",
|
||||
confidence=0.93,
|
||||
reason="需要执行文件目录与页数汇总。",
|
||||
source="llm",
|
||||
),
|
||||
)
|
||||
|
||||
frames = list(stream_message(conversation, "处理一下这批资料"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "workflow_started" in joined
|
||||
assert "\"workflow_type\": \"file_summary\"" in joined
|
||||
assert FileSummaryBatch.objects.filter(conversation=conversation).exists()
|
||||
41
tests/test_llm_streaming.py
Normal file
41
tests/test_llm_streaming.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import io
|
||||
from urllib import request
|
||||
|
||||
import pytest
|
||||
|
||||
from review_agent.llm import stream_reply
|
||||
from review_agent.models import Conversation
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
class FakeStreamingResponse:
|
||||
def __iter__(self):
|
||||
return iter(
|
||||
[
|
||||
b'data: {"choices":[{"delta":{"content":"A"}}]}\n\n',
|
||||
b"data: not-json\n\n",
|
||||
b'data: {"choices":[{"delta":{"content":"B"}}]}\n\n',
|
||||
b"data: [DONE]\n\n",
|
||||
]
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, traceback):
|
||||
return False
|
||||
|
||||
|
||||
def test_stream_reply_skips_malformed_sse_data(monkeypatch, settings, django_user_model):
|
||||
settings.LLM_API_KEY = "key"
|
||||
settings.LLM_MODEL = "model"
|
||||
settings.LLM_BASE_URL = "https://example.test/v1"
|
||||
monkeypatch.setattr(request, "urlopen", lambda req, timeout: FakeStreamingResponse())
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
chunks = list(stream_reply(conversation, "你好"))
|
||||
|
||||
assert chunks == ["A", "B"]
|
||||
Reference in New Issue
Block a user