merge: 自动汇总文件目录页数

This commit is contained in:
2026-06-06 10:28:14 +08:00
48 changed files with 3029 additions and 8 deletions

View File

@@ -18,3 +18,20 @@ python manage.py runserver
- 登录页http://127.0.0.1:8000/login/ - 登录页http://127.0.0.1:8000/login/
- 首页http://127.0.0.1:8000/ - 首页http://127.0.0.1:8000/
- 管理后台http://127.0.0.1:8000/admin/ - 管理后台http://127.0.0.1:8000/admin/
## 文件汇总依赖
自动汇总文件目录与页数功能使用轻量 Python 库读取 PDF、Word、Excel、PowerPoint 文件。
Docker 或生产环境如需处理 `.7z``.rar` 压缩包,还需要安装系统 `7z`/`p7zip`
命令,并确认以下命令可用:
```bash
7z
7z i
```
LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。
上传原始文件、批次工作目录和导出文件默认存储在 Django `MEDIA_ROOT` 下的
`file_summary/users/<user_id>/<conversation_id>/` 或批次 `work_dir` 目录中。生产环境
需要把 `MEDIA_ROOT` 挂载到持久化卷,并纳入备份或归档策略。

View File

@@ -1,11 +1,12 @@
from django.contrib import admin from django.contrib import admin
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
from django.urls import path from django.urls import include, path
from review_agent.views import stream_chat, workspace from review_agent.views import stream_chat, workspace
urlpatterns = [ urlpatterns = [
path("", workspace, name="home"), path("", workspace, name="home"),
path("", include("review_agent.urls")),
path("chat/stream/", stream_chat, name="chat_stream"), path("chat/stream/", stream_chat, name="chat_stream"),
path( path(
"login/", "login/",

View File

@@ -0,0 +1,74 @@
# 自动汇总前端线框图
## 评审目标
在实现三栏页面前,先确认审核智能体工作台的信息架构、右侧文件汇总面板、工作流状态展示和移动端降级方式。
## 桌面端布局
```mermaid
flowchart LR
A["左栏:会话列表<br/>新对话 / 搜索 / 历史会话"] --> B["中栏:聊天区<br/>顶部导航 / 消息流 / 输入框"]
B --> C["右栏:文件汇总面板"]
C --> C1["上半区:上传区<br/>拖拽上传 / 选择文件 / 上传状态"]
C --> C2["中段:当前对话附件<br/>文件名 / 版本 / 大小 / 状态 / 删除"]
C --> C3["下半区:工作流卡片<br/>批次号 / 节点进度 / 下载入口"]
```
## 右侧面板结构
```mermaid
flowchart TB
P["文件汇总面板"] --> U["上传拖拽区"]
U --> U0["无附件:提示上传文件或压缩包"]
U --> U1["上传中:显示文件名和处理中状态"]
U --> U2["上传失败:展示错误并允许重试"]
P --> L["附件列表"]
L --> L1["active 版本优先展示"]
L --> L2["历史版本保留展示"]
L --> L3["逻辑删除后从默认候选移除"]
P --> W["工作流卡片列表"]
W --> W1["运行中:节点逐项更新"]
W --> W2["成功:展示 Markdown/Excel 下载"]
W --> W3["失败:展示失败节点和错误说明"]
```
## 工作流状态流转
```mermaid
stateDiagram-v2
[*] --> Pending: 用户上传附件
Pending --> Running: 发送自动汇总提示词
Running --> Extracting: 固化附件
Extracting --> Scanning: 解压完成或跳过
Scanning --> Counting: 生成文件清单
Counting --> Detecting: 页数统计完成
Detecting --> Reporting: 产品名识别完成
Reporting --> Success: 生成报告与下载
Running --> Failed: 批次级异常
Extracting --> Failed: 解压安全检查失败
Reporting --> Failed: 报告生成失败
Success --> Restored: 刷新页面后状态恢复
Failed --> Restored: 刷新页面后状态恢复
```
## 移动端布局
```mermaid
flowchart TB
M["移动端工作台"] --> T["顶部:侧栏按钮 / 当前页面 / 用户菜单"]
T --> Chat["聊天区优先展示"]
Chat --> Composer["底部输入框"]
T --> Drawer["会话侧栏抽屉"]
Chat --> Panel["文件汇总面板下移或折叠"]
Panel --> Upload["上传区"]
Panel --> Workflow["工作流卡片"]
```
## 关键评审点
- 桌面端保持左侧会话、中间聊天、右侧文件汇总三栏,不改变现有聊天主路径。
- 右侧面板上半部分用于上传和附件列表,下半部分用于批次工作流卡片。
- 工作流卡片节点顺序固定为:附件固化、压缩包解压、文件扫描、页数统计、产品识别、报告输出、完成。
- 助手消息中的文件汇总结果使用安全 Markdown 渲染,用户消息仍按纯文本转义。
- 移动端优先保证聊天可用,文件汇总面板折叠或下移,不能遮挡输入框。

3
pytest.ini Normal file
View File

@@ -0,0 +1,3 @@
[pytest]
DJANGO_SETTINGS_MODULE = config.settings
python_files = tests.py test_*.py *_tests.py

View File

@@ -1 +1,8 @@
Django>=5.0,<6.0 Django>=5.0,<6.0
pypdf>=5.0
python-docx>=1.1
python-pptx>=1.0
openpyxl>=3.1
xlrd>=2.0
olefile>=0.47
py7zr>=0.21

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,4 @@
from pathlib import Path
ATTACHMENT_ROOT = Path("file_summary") / "users"

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
from review_agent.models import FileSummaryBatch, WorkflowEvent
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
return {
"id": event.pk,
"event_type": event.event_type,
"payload": event.payload,
"created_at": event.created_at.isoformat(),
}

View File

@@ -0,0 +1,12 @@
from __future__ import annotations
from pathlib import Path
from django.conf import settings
def resolve_storage_path(storage_path: str) -> Path:
path = Path(storage_path)
if path.is_absolute():
return path
return Path(settings.MEDIA_ROOT) / path

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import subprocess
from pathlib import Path
from zipfile import ZipFile
import py7zr
ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"}
def _ensure_inside_target(path: Path, target_dir: Path) -> None:
target = target_dir.resolve()
resolved = path.resolve()
if target != resolved and target not in resolved.parents:
raise ValueError("解压路径必须位于批次工作目录内。")
def _safe_member_path(target_dir: Path, member_name: str) -> Path:
destination = target_dir / member_name
_ensure_inside_target(destination, target_dir)
return destination
def extract_archive(archive_path: str | Path, target_dir: str | Path) -> list[Path]:
archive_path = Path(archive_path)
target_dir = Path(target_dir)
target_dir.mkdir(parents=True, exist_ok=True)
ext = archive_path.suffix.lower().lstrip(".")
if ext not in ARCHIVE_EXTENSIONS:
return []
if ext == "zip":
return _extract_zip(archive_path, target_dir)
if ext == "7z":
return _extract_7z(archive_path, target_dir)
return _extract_rar(archive_path, target_dir)
def _extract_zip(archive_path: Path, target_dir: Path) -> list[Path]:
extracted: list[Path] = []
with ZipFile(archive_path) as archive:
for member in archive.infolist():
destination = _safe_member_path(target_dir, member.filename)
if member.is_dir():
destination.mkdir(parents=True, exist_ok=True)
continue
destination.parent.mkdir(parents=True, exist_ok=True)
with archive.open(member) as source, destination.open("wb") as target:
target.write(source.read())
extracted.append(destination)
return extracted
def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]:
with py7zr.SevenZipFile(archive_path, mode="r") as archive:
names = archive.getnames()
for name in names:
_safe_member_path(target_dir, name)
archive.extractall(path=target_dir)
return [target_dir / name for name in names if (target_dir / name).is_file()]
def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]:
result = subprocess.run(
["7z", "x", f"-o{target_dir}", str(archive_path), "-y"],
check=False,
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(result.stderr or result.stdout or "rar 解压失败")
extracted = [path for path in target_dir.rglob("*") if path.is_file()]
for path in extracted:
_ensure_inside_target(path, target_dir)
return extracted

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
from pathlib import Path
from openpyxl import Workbook
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
def _exports_dir(batch: FileSummaryBatch) -> Path:
root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no)
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
return export_dir
def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
workbook = Workbook()
summary = workbook.active
summary.title = "汇总信息"
summary.append(["批次号", batch.batch_no])
summary.append(["产品名称", batch.product_name or "-"])
summary.append(["文件总数", batch.total_files])
summary.append(["统计成功", batch.success_files])
summary.append(["统计失败", batch.failed_files])
summary.append(["不支持", batch.unsupported_files])
summary.append(["不确定", batch.uncertain_files])
summary.append(["总页数", batch.total_pages])
detail = workbook.create_sheet("文件明细")
detail.append(["序号", "目录层级", "文件名", "类型", "页数", "路径", "状态", "重试次数", "异常说明"])
for item in batch.items.order_by("file_index"):
detail.append(
[
item.file_index,
item.directory_level,
item.file_name,
item.file_type,
item.page_count,
item.relative_path,
item.statistics_status,
item.retry_count,
item.error_message,
]
)
path = _exports_dir(batch) / f"{batch.batch_no}-summary.xlsx"
workbook.save(path)
return ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=path.name,
storage_path=str(path),
)

View File

@@ -0,0 +1,49 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatch, FileSummaryItem
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
def _directory_level(relative_path: Path) -> str:
if len(relative_path.parts) <= 1:
return ""
return "/".join(relative_path.parts[:-1])
def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]:
files: list[tuple[Path, Path]] = []
for root in roots:
root = Path(root)
if root.is_file():
files.append((root.parent, root))
continue
for path in sorted(item for item in root.rglob("*") if item.is_file()):
if path.name.startswith(".") or path.stat().st_size == 0:
continue
files.append((root, path))
created: list[FileSummaryItem] = []
for index, (root, path) in enumerate(files, start=1):
relative = path.relative_to(root).as_posix()
file_type = path.suffix.lower().lstrip(".")
item = FileSummaryItem.objects.create(
batch=batch,
file_index=index,
directory_level=_directory_level(Path(relative)),
file_name=path.name,
file_type=file_type,
relative_path=relative,
storage_path=str(path),
statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED,
)
created.append(item)
batch.total_files = len(created)
batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS)
batch.unsupported_files = len(created) - batch.supported_files
batch.save(update_fields=["total_files", "supported_files", "unsupported_files"])
return created

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
@dataclass(frozen=True)
class PageCountResult:
status: str
page_count: int | None = None
error_message: str = ""
def count_document_pages(path: str | Path) -> PageCountResult:
file_path = Path(path)
ext = file_path.suffix.lower().lstrip(".")
if ext not in SUPPORTED_EXTENSIONS:
return PageCountResult(status="unsupported")
try:
if ext == "pdf":
from pypdf import PdfReader
return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages))
if ext == "docx":
from docx import Document
properties = Document(str(file_path)).core_properties
pages = getattr(properties, "pages", None)
if pages:
return PageCountResult(status="success", page_count=pages)
return PageCountResult(status="uncertain")
if ext == "xlsx":
from openpyxl import load_workbook
workbook = load_workbook(str(file_path), read_only=True, data_only=True)
return PageCountResult(status="success", page_count=len(workbook.sheetnames))
if ext == "xls":
import xlrd
workbook = xlrd.open_workbook(str(file_path), on_demand=True)
return PageCountResult(status="success", page_count=workbook.nsheets)
if ext == "pptx":
from pptx import Presentation
return PageCountResult(status="success", page_count=len(Presentation(str(file_path)).slides))
if ext in {"doc", "ppt"}:
import olefile
if olefile.isOleFile(str(file_path)):
return PageCountResult(status="uncertain")
return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。")
except Exception as exc:
return PageCountResult(status="failed", error_message=str(exc))
return PageCountResult(status="uncertain")

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatch
def detect_product_name(batch: FileSummaryBatch) -> str:
product_name = ""
for item in batch.items.order_by("file_index"):
parts = Path(item.relative_path).parts
if len(parts) > 1:
product_name = parts[0]
break
name = Path(item.file_name).stem
for keyword in ("产品", "试剂盒", "说明书"):
if keyword in name:
product_name = name
break
if product_name:
break
if not product_name:
return ""
batch.product_name = product_name
batch.save(update_fields=["product_name"])
if batch.conversation.title.startswith("新对话"):
batch.conversation.title = f"{product_name}-文件汇总"
batch.conversation.save(update_fields=["title", "updated_at"])
return product_name

View File

@@ -0,0 +1,65 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
def _exports_dir(batch: FileSummaryBatch) -> Path:
root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no)
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
return export_dir
def build_summary_table(batch: FileSummaryBatch) -> str:
lines = [
"| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |",
"| --- | --- | --- | --- | --- | --- | --- |",
]
for item in batch.items.order_by("file_index"):
lines.append(
"| {index} | {directory} | {name} | {file_type} | {pages} | {status} | {error} |".format(
index=item.file_index,
directory=item.directory_level or "-",
name=item.file_name,
file_type=item.file_type,
pages=item.page_count if item.page_count is not None else "-",
status=item.statistics_status,
error=item.error_message or "-",
)
)
return "\n".join(lines)
def build_markdown_report(batch: FileSummaryBatch) -> str:
return "\n\n".join(
[
f"# 文件目录与页数汇总报告\n\n批次号:{batch.batch_no}",
(
"## 汇总信息\n\n"
f"- 产品名称:{batch.product_name or '-'}\n"
f"- 文件总数:{batch.total_files}\n"
f"- 统计成功:{batch.success_files}\n"
f"- 统计失败:{batch.failed_files}\n"
f"- 不支持:{batch.unsupported_files}\n"
f"- 不确定:{batch.uncertain_files}\n"
f"- 总页数:{batch.total_pages}"
),
"## 文件明细\n\n" + build_summary_table(batch),
"## 处理说明\n\n单文件失败不会阻断批次,失败与不确定文件已在明细中标注。",
]
)
def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFile, str]:
content = build_markdown_report(batch)
path = _exports_dir(batch) / f"{batch.batch_no}-summary.md"
path.write_text(content, encoding="utf-8")
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name=path.name,
storage_path=str(path),
)
return exported, build_summary_table(batch)

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,26 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
from .base import BaseSkill, SkillResult, WorkflowContext
class ArchiveExtractSkill(BaseSkill):
name = "archive_extract"
def run(self, context: WorkflowContext) -> SkillResult:
extracted_count = 0
target_dir = Path(context.batch.work_dir or "")
if not target_dir:
return SkillResult(success=True, data={"extracted_count": 0})
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
path = resolve_storage_path(binding.attachment.storage_path)
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
continue
extracted_count += len(extract_archive(path, target_dir))
return SkillResult(success=True, data={"extracted_count": extracted_count})

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
from dataclasses import dataclass, field
from review_agent.models import FileSummaryBatch
@dataclass(frozen=True)
class WorkflowContext:
batch: FileSummaryBatch
@dataclass
class SkillResult:
success: bool
data: dict = field(default_factory=dict)
message: str = ""
class BaseSkill:
name = ""
def run(self, context: WorkflowContext) -> SkillResult:
raise NotImplementedError

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
from review_agent.models import FileSummaryItem
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
from .base import BaseSkill, SkillResult, WorkflowContext
class DocumentPageCountSkill(BaseSkill):
name = "document_page_count"
def run(self, context: WorkflowContext) -> SkillResult:
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
for item in context.batch.items.order_by("file_index"):
if item.file_type not in SUPPORTED_EXTENSIONS:
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
unsupported_files += 1
item.save(update_fields=["statistics_status", "updated_at"])
continue
result = None
for attempt in range(1, 4):
result = count_document_pages(item.storage_path)
item.retry_count = attempt - 1
if result.status != "failed":
break
item.statistics_status = result.status
item.page_count = result.page_count
item.error_message = result.error_message
item.save(
update_fields=[
"statistics_status",
"page_count",
"retry_count",
"error_message",
"updated_at",
]
)
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
success_files += 1
total_pages += result.page_count or 0
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
uncertain_files += 1
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
unsupported_files += 1
else:
failed_files += 1
context.batch.success_files = success_files
context.batch.failed_files = failed_files
context.batch.unsupported_files = unsupported_files
context.batch.uncertain_files = uncertain_files
context.batch.total_pages = total_pages
context.batch.save(
update_fields=[
"success_files",
"failed_files",
"unsupported_files",
"uncertain_files",
"total_pages",
]
)
return SkillResult(success=True)

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.inventory import scan_files_to_items
from .base import BaseSkill, SkillResult, WorkflowContext
class FileInventorySkill(BaseSkill):
name = "file_inventory"
def run(self, context: WorkflowContext) -> SkillResult:
roots = [
resolve_storage_path(binding.attachment.storage_path)
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch)
]
items = scan_files_to_items(batch=context.batch, roots=roots)
return SkillResult(success=True, data={"total_files": len(items)})

View File

@@ -0,0 +1,12 @@
from __future__ import annotations
from ..services.product_detect import detect_product_name
from .base import BaseSkill, SkillResult, WorkflowContext
class ProductDetectSkill(BaseSkill):
name = "product_detect"
def run(self, context: WorkflowContext) -> SkillResult:
product_name = detect_product_name(context.batch)
return SkillResult(success=True, data={"product_name": product_name})

View File

@@ -0,0 +1,22 @@
from __future__ import annotations
from .base import BaseSkill, SkillResult, WorkflowContext
class SkillRegistry:
def __init__(self):
self._skills: dict[str, BaseSkill] = {}
def register(self, skill: BaseSkill) -> None:
if not skill.name:
raise ValueError("Skill 必须声明 name。")
self._skills[skill.name] = skill
def get(self, name: str) -> BaseSkill:
try:
return self._skills[name]
except KeyError as exc:
raise KeyError(f"Skill 未注册:{name}") from exc
def execute(self, name: str, context: WorkflowContext) -> SkillResult:
return self.get(name).run(context)

View File

@@ -0,0 +1,33 @@
from __future__ import annotations
from django.urls import reverse
from review_agent.models import Message
from ..services.export_excel import generate_excel_export
from ..services.report import generate_markdown_report
from .base import BaseSkill, SkillResult, WorkflowContext
class SummaryReportSkill(BaseSkill):
name = "summary_report"
def run(self, context: WorkflowContext) -> SkillResult:
markdown_export, summary_table = generate_markdown_report(context.batch)
excel_export = generate_excel_export(context.batch)
markdown_url = reverse("file_summary_export_download", args=[markdown_export.pk])
excel_url = reverse("file_summary_export_download", args=[excel_export.pk])
content = (
"文件目录与页数汇总已完成。\n\n"
f"{summary_table}\n\n"
f"[下载 Markdown 报告]({markdown_url}) | [下载 Excel 明细]({excel_url})"
)
Message.objects.create(
conversation=context.batch.conversation,
role=Message.Role.ASSISTANT,
content=content,
)
return SkillResult(
success=True,
data={"markdown_export_id": markdown_export.pk, "excel_export_id": excel_export.pk},
)

View File

@@ -0,0 +1,88 @@
from __future__ import annotations
from pathlib import Path
from uuid import uuid4
from django.conf import settings
from django.db import transaction
from django.utils.text import get_valid_filename
from review_agent.models import Conversation, FileAttachment
from .constants import ATTACHMENT_ROOT
def _safe_original_name(name: str) -> str:
clean = get_valid_filename(Path(name).name)
return clean or f"upload-{uuid4().hex}"
def _relative_attachment_path(conversation: Conversation, filename: str, version_no: int) -> Path:
suffix = Path(filename).suffix
stem = Path(filename).stem
stored_name = f"{stem}_v{version_no}_{uuid4().hex[:8]}{suffix}"
return (
ATTACHMENT_ROOT
/ str(conversation.user_id)
/ str(conversation.pk)
/ "attachments"
/ stored_name
)
def _ensure_inside_media_root(path: Path) -> None:
media_root = Path(settings.MEDIA_ROOT).resolve()
resolved = path.resolve()
if media_root != resolved and media_root not in resolved.parents:
raise ValueError("上传路径必须位于 MEDIA_ROOT 内。")
@transaction.atomic
def save_uploaded_attachment(*, conversation: Conversation, user, uploaded_file) -> FileAttachment:
"""Stores an uploaded file and creates a versioned attachment record."""
original_name = _safe_original_name(uploaded_file.name)
latest = (
FileAttachment.objects.filter(conversation=conversation, original_name=original_name)
.order_by("-version_no")
.first()
)
version_no = (latest.version_no if latest else 0) + 1
relative_path = _relative_attachment_path(conversation, original_name, version_no)
absolute_path = Path(settings.MEDIA_ROOT) / relative_path
_ensure_inside_media_root(absolute_path)
absolute_path.parent.mkdir(parents=True, exist_ok=True)
with absolute_path.open("wb") as target:
for chunk in uploaded_file.chunks():
target.write(chunk)
FileAttachment.objects.filter(
conversation=conversation,
original_name=original_name,
is_active=True,
).update(is_active=False)
return FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name=original_name,
version_no=version_no,
is_active=True,
storage_path=relative_path.as_posix(),
file_size=uploaded_file.size,
content_type=getattr(uploaded_file, "content_type", "") or "",
)
def serialize_attachment(attachment: FileAttachment) -> dict[str, object]:
return {
"id": attachment.pk,
"original_name": attachment.original_name,
"version_no": attachment.version_no,
"is_active": attachment.is_active,
"file_size": attachment.file_size,
"content_type": attachment.content_type,
"upload_status": attachment.upload_status,
"created_at": attachment.created_at.isoformat(),
}

View File

@@ -0,0 +1,124 @@
from django.contrib.auth.decorators import login_required
from pathlib import Path
from django.http import FileResponse, Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment
from review_agent.models import FileSummaryBatch, WorkflowEvent
from .events import serialize_event
from .storage import save_uploaded_attachment, serialize_attachment
def _conversation_for_user(user, conversation_id: int) -> Conversation:
conversation = Conversation.objects.filter(pk=conversation_id, user=user).first()
if not conversation:
raise Http404("对话不存在。")
return conversation
@require_http_methods(["POST", "GET"])
@login_required
def attachments(request, conversation_id: int):
conversation = _conversation_for_user(request.user, conversation_id)
if request.method == "POST":
files = request.FILES.getlist("files")
if not files:
return JsonResponse({"error": "请选择至少一个文件。"}, status=400)
saved = [
save_uploaded_attachment(
conversation=conversation,
user=request.user,
uploaded_file=uploaded_file,
)
for uploaded_file in files
]
return JsonResponse({"attachments": [serialize_attachment(item) for item in saved]})
queryset = FileAttachment.objects.filter(conversation=conversation).order_by(
"original_name",
"-version_no",
)
return JsonResponse({"attachments": [serialize_attachment(item) for item in queryset]})
@require_http_methods(["DELETE"])
@login_required
def attachment_detail(request, conversation_id: int, attachment_id: int):
conversation = _conversation_for_user(request.user, conversation_id)
attachment = FileAttachment.objects.filter(
pk=attachment_id,
conversation=conversation,
user=request.user,
).first()
if not attachment:
raise Http404("附件不存在。")
attachment.upload_status = FileAttachment.UploadStatus.DELETED
attachment.is_active = False
attachment.save(update_fields=["upload_status", "is_active"])
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
return JsonResponse(
{
"batch": {
"id": batch.pk,
"batch_no": batch.batch_no,
"status": batch.status,
"product_name": batch.product_name,
"total_files": batch.total_files,
"success_files": batch.success_files,
"failed_files": batch.failed_files,
"total_pages": batch.total_pages,
},
"nodes": [
{
"node_code": node.node_code,
"node_name": node.node_name,
"status": node.status,
"progress": node.progress,
"message": node.message,
}
for node in batch.node_runs.order_by("id")
],
}
)
@require_http_methods(["GET"])
@login_required
def batch_events(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
after = request.GET.get("after") or "0"
try:
after_id = int(after)
except ValueError:
after_id = 0
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
return JsonResponse({"events": [serialize_event(event) for event in events]})
@require_http_methods(["GET"])
@login_required
def export_download(request, export_id: int):
exported = ExportedSummaryFile.objects.filter(
pk=export_id,
batch__user=request.user,
).first()
if not exported:
raise Http404("导出文件不存在。")
path = Path(exported.storage_path)
if not path.exists():
return JsonResponse({"error": "文件不存在。"}, status=404)
return FileResponse(path.open("rb"), as_attachment=True, filename=exported.file_name)

View File

@@ -0,0 +1,154 @@
from __future__ import annotations
from threading import Thread
from uuid import uuid4
from django.db import transaction
from django.utils import timezone
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowNodeRun,
)
from .events import record_event
from .skills.archive_extract import ArchiveExtractSkill
from .skills.base import WorkflowContext
from .skills.document_page_count import DocumentPageCountSkill
from .skills.file_inventory import FileInventorySkill
from .skills.product_detect import ProductDetectSkill
from .skills.registry import SkillRegistry
from .skills.summary_report import SummaryReportSkill
NODE_DEFINITIONS = [
("upload", "附件固化", ""),
("extract", "压缩包解压", "archive_extract"),
("inventory", "文件扫描", "file_inventory"),
("page_count", "页数统计", "document_page_count"),
("product_detect", "产品识别", "product_detect"),
("report", "报告输出", "summary_report"),
("complete", "完成", ""),
]
def default_skill_registry() -> SkillRegistry:
registry = SkillRegistry()
registry.register(ArchiveExtractSkill())
registry.register(FileInventorySkill())
registry.register(DocumentPageCountSkill())
registry.register(ProductDetectSkill())
registry.register(SummaryReportSkill())
return registry
def build_batch_no() -> str:
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
@transaction.atomic
def create_file_summary_batch(
*,
conversation: Conversation,
user,
trigger_message: Message | None = None,
) -> FileSummaryBatch:
active_attachments = list(
FileAttachment.objects.select_for_update()
.filter(conversation=conversation, is_active=True)
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
.order_by("original_name", "-created_at")
)
if not active_attachments:
raise ValueError("当前对话没有可用附件。")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
batch_no=build_batch_no(),
)
for attachment in active_attachments:
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
attachment.upload_status = FileAttachment.UploadStatus.BOUND
attachment.save(update_fields=["upload_status"])
for code, name, _skill_name in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
return batch
class WorkflowExecutor:
def __init__(self, batch: FileSummaryBatch, registry: SkillRegistry | None = None):
self.batch = batch
self.registry = registry or default_skill_registry()
def run(self) -> None:
self.batch.status = FileSummaryBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
try:
for node in self.batch.node_runs.order_by("id"):
self._run_node(node)
except Exception as exc:
self.batch.status = FileSummaryBatch.Status.FAILED
self.batch.error_message = str(exc)
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "error_message", "finished_at"])
record_event(self.batch, "workflow_failed", {"message": str(exc)})
return
self.batch.status = FileSummaryBatch.Status.SUCCESS
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
def _run_node(self, node: WorkflowNodeRun) -> None:
now = timezone.now()
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = now
node.message = f"{node.node_name}处理中"
node.save(update_fields=["status", "progress", "started_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
skill_name = next(
(skill for code, _name, skill in NODE_DEFINITIONS if code == node.node_code),
"",
)
if skill_name:
result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch))
if not result.success:
raise RuntimeError(result.message or f"{node.node_name}执行失败")
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
node.message = f"{node.node_name}完成"
node.save(update_fields=["status", "progress", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
executor = WorkflowExecutor(batch)
if not async_run:
executor.run()
return
Thread(target=executor.run, daemon=True).start()

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
from dataclasses import dataclass
from review_agent.models import Conversation, FileAttachment
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
@dataclass(frozen=True)
class TriggerResult:
should_start: bool
workflow_type: str = ""
reason: str = ""
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
text = (content or "").strip()
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
return TriggerResult(should_start=False, reason="not_matched")
has_attachment = FileAttachment.objects.filter(
conversation=conversation,
is_active=True,
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
if not has_attachment:
return TriggerResult(should_start=False, reason="missing_attachment")
return TriggerResult(should_start=True, workflow_type="file_summary")

View File

@@ -0,0 +1,481 @@
# Generated by Django 5.2.14 on 2026-06-05 17:09
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("review_agent", "0001_initial"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name="FileAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("original_name", models.CharField(max_length=255)),
("version_no", models.PositiveIntegerField(default=1)),
("is_active", models.BooleanField(default=True)),
("storage_path", models.CharField(max_length=500)),
("file_size", models.BigIntegerField(default=0)),
(
"content_type",
models.CharField(blank=True, default="", max_length=120),
),
(
"upload_status",
models.CharField(
choices=[
("uploaded", "已上传"),
("bound", "已绑定"),
("deleted", "已删除"),
],
default="uploaded",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_attachments",
to="review_agent.conversation",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_attachments",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_attachment",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatch",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("batch_no", models.CharField(max_length=64, unique=True)),
(
"product_name",
models.CharField(blank=True, default="", max_length=200),
),
(
"status",
models.CharField(
choices=[
("pending", "待执行"),
("running", "执行中"),
("success", "成功"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
("total_files", models.IntegerField(default=0)),
("supported_files", models.IntegerField(default=0)),
("success_files", models.IntegerField(default=0)),
("failed_files", models.IntegerField(default=0)),
("unsupported_files", models.IntegerField(default=0)),
("uncertain_files", models.IntegerField(default=0)),
("total_pages", models.IntegerField(default=0)),
("work_dir", models.CharField(blank=True, default="", max_length=500)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_summary_batches",
to="review_agent.conversation",
),
),
(
"trigger_message",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="triggered_file_summary_batches",
to="review_agent.message",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_summary_batches",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_summary_batch",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="ExportedSummaryFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"export_type",
models.CharField(
choices=[("markdown", "Markdown"), ("excel", "Excel")],
max_length=20,
),
),
("file_name", models.CharField(max_length=255)),
("storage_path", models.CharField(max_length=500)),
(
"status",
models.CharField(
choices=[("success", "成功"), ("failed", "失败")],
default="success",
max_length=20,
),
),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="exports",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_exported_summary_file",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatchAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"source_role",
models.CharField(
choices=[("archive", "压缩包"), ("multi_file", "多文件")],
default="multi_file",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"attachment",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_bindings",
to="review_agent.fileattachment",
),
),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_attachments",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_batch_attachment",
},
),
migrations.CreateModel(
name="FileSummaryItem",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file_index", models.PositiveIntegerField()),
(
"directory_level",
models.CharField(blank=True, default="", max_length=300),
),
("file_name", models.CharField(max_length=255)),
("file_type", models.CharField(max_length=20)),
("relative_path", models.CharField(max_length=500)),
("storage_path", models.CharField(max_length=500)),
("page_count", models.IntegerField(blank=True, null=True)),
(
"statistics_status",
models.CharField(
choices=[
("success", "成功"),
("failed", "失败"),
("unsupported", "不支持"),
("uncertain", "不确定"),
("skipped", "跳过"),
],
default="skipped",
max_length=20,
),
),
("retry_count", models.PositiveIntegerField(default=0)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="items",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_item",
"ordering": ["file_index", "id"],
},
),
migrations.CreateModel(
name="WorkflowEvent",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("event_type", models.CharField(max_length=40)),
("payload", models.JSONField(default=dict)),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="events",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_event",
"ordering": ["id"],
},
),
migrations.CreateModel(
name="WorkflowNodeRun",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("node_code", models.CharField(max_length=40)),
("node_name", models.CharField(max_length=80)),
(
"status",
models.CharField(
choices=[
("pending", "等待中"),
("running", "执行中"),
("retrying", "重试中"),
("success", "成功"),
("failed", "失败"),
("skipped", "跳过"),
],
default="pending",
max_length=20,
),
),
("progress", models.PositiveIntegerField(default=0)),
("message", models.TextField(blank=True, default="")),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="node_runs",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_node_run",
},
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_attachment_user_created"
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
),
migrations.AddConstraint(
model_name="fileattachment",
constraint=models.UniqueConstraint(
fields=("conversation", "original_name", "version_no"),
name="uq_ra_attachment_conv_name_version",
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_batch_user_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["status", "created_at"], name="idx_ra_batch_status"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "export_type"], name="idx_ra_export_batch_type"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_export_batch_created"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_batch_attachment_batch"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
),
migrations.AddConstraint(
model_name="filesummarybatchattachment",
constraint=models.UniqueConstraint(
fields=("batch", "attachment"), name="uq_ra_batch_attachment"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_index"], name="idx_ra_item_batch_index"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_type"], name="idx_ra_item_batch_type"
),
),
migrations.AddConstraint(
model_name="filesummaryitem",
constraint=models.UniqueConstraint(
fields=("batch", "relative_path"), name="uq_ra_item_batch_relative_path"
),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_event_batch_created"
),
),
migrations.AddIndex(
model_name="workflownoderun",
index=models.Index(
fields=["batch", "status"], name="idx_ra_node_batch_status"
),
),
migrations.AddConstraint(
model_name="workflownoderun",
constraint=models.UniqueConstraint(
fields=("batch", "node_code"), name="uq_ra_node_batch_code"
),
),
]

View File

@@ -42,3 +42,293 @@ class Message(models.Model):
def __str__(self) -> str: def __str__(self) -> str:
return f"{self.get_role_display()} - {self.conversation_id}" return f"{self.get_role_display()} - {self.conversation_id}"
class FileAttachment(models.Model):
"""Stores an uploaded file version for one conversation."""
class UploadStatus(models.TextChoices):
UPLOADED = "uploaded", "已上传"
BOUND = "bound", "已绑定"
DELETED = "deleted", "已删除"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_attachments",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_attachments",
)
original_name = models.CharField(max_length=255)
version_no = models.PositiveIntegerField(default=1)
is_active = models.BooleanField(default=True)
storage_path = models.CharField(max_length=500)
file_size = models.BigIntegerField(default=0)
content_type = models.CharField(max_length=120, blank=True, default="")
upload_status = models.CharField(
max_length=20,
choices=UploadStatus.choices,
default=UploadStatus.UPLOADED,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_attachment"
ordering = ["-created_at", "-id"]
constraints = [
models.UniqueConstraint(
fields=["conversation", "original_name", "version_no"],
name="uq_ra_attachment_conv_name_version",
)
]
indexes = [
models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
models.Index(
fields=["user", "created_at"],
name="idx_ra_attachment_user_created",
),
models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
]
def __str__(self) -> str:
return f"{self.original_name} v{self.version_no}"
class FileSummaryBatch(models.Model):
"""Tracks one automatic file inventory and page-count workflow run."""
class Status(models.TextChoices):
PENDING = "pending", "待执行"
RUNNING = "running", "执行中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_summary_batches",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_summary_batches",
)
trigger_message = models.ForeignKey(
Message,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="triggered_file_summary_batches",
)
batch_no = models.CharField(max_length=64, unique=True)
product_name = models.CharField(max_length=200, blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
total_files = models.IntegerField(default=0)
supported_files = models.IntegerField(default=0)
success_files = models.IntegerField(default=0)
failed_files = models.IntegerField(default=0)
unsupported_files = models.IntegerField(default=0)
uncertain_files = models.IntegerField(default=0)
total_pages = models.IntegerField(default=0)
work_dir = models.CharField(max_length=500, blank=True, default="")
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_file_summary_batch"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"),
models.Index(fields=["user", "created_at"], name="idx_ra_batch_user_created"),
models.Index(fields=["status", "created_at"], name="idx_ra_batch_status"),
]
def __str__(self) -> str:
return self.batch_no
class FileSummaryBatchAttachment(models.Model):
"""Binds a workflow batch to the exact attachment versions it uses."""
class SourceRole(models.TextChoices):
ARCHIVE = "archive", "压缩包"
MULTI_FILE = "multi_file", "多文件"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="batch_attachments",
)
attachment = models.ForeignKey(
FileAttachment,
on_delete=models.CASCADE,
related_name="batch_bindings",
)
source_role = models.CharField(
max_length=20,
choices=SourceRole.choices,
default=SourceRole.MULTI_FILE,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_summary_batch_attachment"
constraints = [
models.UniqueConstraint(
fields=["batch", "attachment"],
name="uq_ra_batch_attachment",
)
]
indexes = [
models.Index(
fields=["batch", "created_at"],
name="idx_ra_batch_attachment_batch",
),
models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
]
class FileSummaryItem(models.Model):
"""Stores one scanned file and its page-count result."""
class StatisticsStatus(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
UNSUPPORTED = "unsupported", "不支持"
UNCERTAIN = "uncertain", "不确定"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="items",
)
file_index = models.PositiveIntegerField()
directory_level = models.CharField(max_length=300, blank=True, default="")
file_name = models.CharField(max_length=255)
file_type = models.CharField(max_length=20)
relative_path = models.CharField(max_length=500)
storage_path = models.CharField(max_length=500)
page_count = models.IntegerField(null=True, blank=True)
statistics_status = models.CharField(
max_length=20,
choices=StatisticsStatus.choices,
default=StatisticsStatus.SKIPPED,
)
retry_count = models.PositiveIntegerField(default=0)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "ra_file_summary_item"
ordering = ["file_index", "id"]
constraints = [
models.UniqueConstraint(
fields=["batch", "relative_path"],
name="uq_ra_item_batch_relative_path",
)
]
indexes = [
models.Index(fields=["batch", "file_index"], name="idx_ra_item_batch_index"),
models.Index(fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"),
models.Index(fields=["batch", "file_type"], name="idx_ra_item_batch_type"),
]
class WorkflowNodeRun(models.Model):
"""Stores recoverable status for one workflow node."""
class Status(models.TextChoices):
PENDING = "pending", "等待中"
RUNNING = "running", "执行中"
RETRYING = "retrying", "重试中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="node_runs",
)
node_code = models.CharField(max_length=40)
node_name = models.CharField(max_length=80)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
progress = models.PositiveIntegerField(default=0)
message = models.TextField(blank=True, default="")
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_workflow_node_run"
constraints = [
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code")
]
indexes = [
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
]
class WorkflowEvent(models.Model):
"""Persists workflow events for SSE replay and diagnostics."""
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="events",
)
event_type = models.CharField(max_length=40)
payload = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_workflow_event"
ordering = ["id"]
indexes = [
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
]
class ExportedSummaryFile(models.Model):
"""Stores generated report files for permission-checked download."""
class ExportType(models.TextChoices):
MARKDOWN = "markdown", "Markdown"
EXCEL = "excel", "Excel"
class Status(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="exports",
)
export_type = models.CharField(max_length=20, choices=ExportType.choices)
file_name = models.CharField(max_length=255)
storage_path = models.CharField(max_length=500)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.SUCCESS)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_exported_summary_file"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
]

View File

@@ -3,8 +3,11 @@ from __future__ import annotations
import json import json
from django.db.models import Q, QuerySet from django.db.models import Q, QuerySet
from django.conf import settings
from django.utils import timezone from django.utils import timezone
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from .file_summary.workflow_trigger import evaluate_file_summary_trigger
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
from .models import Conversation, Message from .models import Conversation, Message
@@ -88,6 +91,7 @@ def stream_message(conversation: Conversation, content: str):
user_message = append_user_message(conversation, content) user_message = append_user_message(conversation, content)
assistant_parts: list[str] = [] assistant_parts: list[str] = []
trigger = evaluate_file_summary_trigger(conversation, content)
yield sse_event( yield sse_event(
"meta", "meta",
@@ -99,6 +103,51 @@ def stream_message(conversation: Conversation, content: str):
}, },
) )
if trigger.reason == "missing_attachment":
reply_content = "请先在当前对话右侧上传需要汇总的文件或压缩包,然后再发送自动汇总指令。"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
if trigger.should_start:
batch = create_file_summary_batch(
conversation=conversation,
user=conversation.user,
trigger_message=user_message,
)
start_file_summary_workflow(
batch,
async_run=getattr(settings, "FILE_SUMMARY_ASYNC", True),
)
reply_content = f"已启动文件目录与页数自动汇总工作流,批次号:{batch.batch_no}"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event(
"workflow_started",
{
"workflow_type": "file_summary",
"batch_id": batch.pk,
"batch_no": batch.batch_no,
},
)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
try: try:
for chunk in stream_reply(conversation, content): for chunk in stream_reply(conversation, content):
assistant_parts.append(chunk) assistant_parts.append(chunk)

37
review_agent/urls.py Normal file
View File

@@ -0,0 +1,37 @@
from django.urls import path
from .file_summary.views import attachment_detail, attachments, batch_events, batch_status, export_download
urlpatterns = [
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/",
attachments,
name="file_summary_attachment_upload",
),
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/",
attachments,
name="file_summary_attachment_list",
),
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/<int:attachment_id>/",
attachment_detail,
name="file_summary_attachment_detail",
),
path(
"api/review-agent/file-summary/<int:batch_id>/status/",
batch_status,
name="file_summary_batch_status",
),
path(
"api/review-agent/file-summary/<int:batch_id>/events/",
batch_events,
name="file_summary_batch_events",
),
path(
"api/review-agent/file-summary/exports/<int:export_id>/download/",
export_download,
name="file_summary_export_download",
),
]

View File

@@ -10,6 +10,7 @@ from .services import (
send_message, send_message,
stream_message, stream_message,
) )
from .models import FileAttachment, FileSummaryBatch
@login_required @login_required
@@ -49,6 +50,8 @@ def workspace(request: HttpRequest) -> HttpResponse:
"conversations": conversations, "conversations": conversations,
"current_conversation": current, "current_conversation": current,
"messages": current.messages.all() if current else [], "messages": current.messages.all() if current else [],
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
"summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [],
}, },
) )

View File

@@ -127,7 +127,7 @@ input:focus {
.workspace { .workspace {
display: grid; display: grid;
grid-template-columns: 296px minmax(0, 1fr); grid-template-columns: 296px minmax(0, 1fr) 340px;
min-height: 100vh; min-height: 100vh;
} }
@@ -760,9 +760,176 @@ input:focus {
padding-right: 12px; padding-right: 12px;
} }
.summary-panel {
display: grid;
grid-template-rows: auto auto minmax(0, 1fr);
gap: 14px;
min-width: 0;
max-height: 100vh;
padding: 16px;
overflow: auto;
border-left: 1px solid var(--line);
background: #ffffff;
}
.summary-section {
display: grid;
gap: 12px;
padding: 14px;
border: 1px solid var(--line);
border-radius: 8px;
background: var(--panel-soft);
}
.summary-heading,
.summary-subheading,
.workflow-card header {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
}
.summary-heading h2,
.summary-subheading h3 {
margin: 0;
font-size: 16px;
}
.summary-heading span {
color: var(--muted);
font-size: 12px;
}
.upload-dropzone {
display: grid;
place-items: center;
gap: 6px;
min-height: 112px;
padding: 18px;
border: 1px dashed var(--accent);
border-radius: 8px;
background: #f5f9ff;
color: var(--text);
cursor: pointer;
text-align: center;
}
.upload-dropzone.dragging {
border-color: var(--accent-dark);
background: #eaf2ff;
}
.upload-dropzone span,
.upload-status,
.attachment-item span,
.workflow-card em {
color: var(--muted);
font-size: 12px;
}
.upload-status {
margin: 0;
line-height: 1.5;
}
.attachment-list,
.workflow-card-list {
display: grid;
gap: 10px;
}
.attachment-item,
.workflow-card {
display: grid;
gap: 10px;
padding: 12px;
border: 1px solid var(--line);
border-radius: 8px;
background: #ffffff;
}
.attachment-item {
grid-template-columns: minmax(0, 1fr) auto;
align-items: center;
}
.attachment-item strong,
.workflow-card strong {
display: block;
overflow-wrap: anywhere;
font-size: 13px;
}
.attachment-item em,
.workflow-status {
padding: 3px 8px;
border-radius: 999px;
background: #eaf2ff;
color: var(--accent);
font-size: 11px;
font-style: normal;
font-weight: 700;
}
.workflow-card ol {
display: grid;
gap: 8px;
margin: 0;
padding: 0;
list-style: none;
}
.node-status {
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
padding: 8px 0;
border-top: 1px solid var(--line);
font-size: 13px;
}
.status-running,
.status-retrying {
color: var(--accent);
}
.status-success {
color: #047857;
}
.status-failed {
color: var(--danger-text);
}
.panel-empty {
padding: 14px;
border: 1px dashed var(--line);
border-radius: 8px;
color: var(--muted);
text-align: center;
}
.message-bubble table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
}
.message-bubble th,
.message-bubble td {
padding: 8px;
border: 1px solid var(--line);
text-align: left;
vertical-align: top;
}
@media (max-width: 980px) { @media (max-width: 980px) {
.workspace { .workspace {
grid-template-columns: minmax(0, 1fr); grid-template-columns: minmax(0, 1fr);
min-height: 100vh;
overflow: auto;
} }
.sidebar { .sidebar {
@@ -815,7 +982,14 @@ input:focus {
} }
.chat-stage { .chat-stage {
height: calc(100vh - 88px); min-height: calc(100vh - 88px);
height: auto;
}
.summary-panel {
max-height: none;
border-left: 0;
border-top: 1px solid var(--line);
} }
.chat-scroll { .chat-scroll {

View File

@@ -11,6 +11,12 @@
var sendButton = document.getElementById("sendButton"); var sendButton = document.getElementById("sendButton");
var conversationIdInput = document.getElementById("conversationIdInput"); var conversationIdInput = document.getElementById("conversationIdInput");
var chatStage = document.querySelector(".chat-stage"); var chatStage = document.querySelector(".chat-stage");
var summaryPanel = document.getElementById("summaryPanel");
var uploadDropzone = document.getElementById("uploadDropzone");
var attachmentInput = document.getElementById("attachmentInput");
var attachmentList = document.getElementById("attachmentList");
var uploadStatus = document.getElementById("uploadStatus");
var workflowCardList = document.getElementById("workflowCardList");
var nodeAnchors = []; var nodeAnchors = [];
if (!workspace) { if (!workspace) {
@@ -32,7 +38,7 @@
function syncSidebarState() { function syncSidebarState() {
if (isMobile()) { if (isMobile()) {
if (workspace.getAttribute("data-sidebar-state") === "collapsed") { if (workspace.getAttribute("data-sidebar-state") !== "closed") {
workspace.setAttribute("data-sidebar-state", "closed"); workspace.setAttribute("data-sidebar-state", "closed");
} }
} else if (workspace.getAttribute("data-sidebar-state") === "closed") { } else if (workspace.getAttribute("data-sidebar-state") === "closed") {
@@ -147,6 +153,13 @@
return escapeHtml(text).replace(/\n/g, "<br>"); return escapeHtml(text).replace(/\n/g, "<br>");
} }
function renderAssistantContent(text) {
if (window.marked && window.DOMPurify) {
return window.DOMPurify.sanitize(window.marked.parse(text || ""));
}
return nl2br(text || "");
}
function scrollChatToBottom() { function scrollChatToBottom() {
if (chatScroll) { if (chatScroll) {
chatScroll.scrollTop = chatScroll.scrollHeight; chatScroll.scrollTop = chatScroll.scrollHeight;
@@ -169,7 +182,7 @@
bubble.className = "message-bubble"; bubble.className = "message-bubble";
var text = document.createElement("p"); var text = document.createElement("p");
text.innerHTML = nl2br(content); text.innerHTML = role === "assistant" ? renderAssistantContent(content) : nl2br(content);
bubble.appendChild(text); bubble.appendChild(text);
article.appendChild(avatar); article.appendChild(avatar);
@@ -271,6 +284,149 @@
} }
} }
function currentConversationId() {
return conversationIdInput ? conversationIdInput.value : "";
}
function templateUrl(attributeName, token, value) {
if (!summaryPanel) {
return "";
}
return summaryPanel.getAttribute(attributeName).replace(token, value);
}
function renderAttachments(attachments) {
if (!attachmentList) {
return;
}
attachmentList.innerHTML = "";
if (!attachments.length) {
attachmentList.innerHTML = '<div class="panel-empty">暂无附件</div>';
return;
}
attachments.forEach(function (attachment) {
var item = document.createElement("div");
item.className = "attachment-item";
item.setAttribute("data-attachment-id", attachment.id);
item.innerHTML =
"<div><strong>" +
escapeHtml(attachment.original_name) +
"</strong><span>v" +
attachment.version_no +
" · " +
attachment.file_size +
" bytes · " +
escapeHtml(attachment.upload_status) +
"</span></div>" +
(attachment.is_active ? "<em>active</em>" : "");
attachmentList.appendChild(item);
});
}
async function refreshAttachments() {
var conversationId = currentConversationId();
if (!conversationId || !summaryPanel) {
return;
}
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId));
if (!response.ok) {
return;
}
var payload = await response.json();
renderAttachments(payload.attachments || []);
}
async function uploadFiles(files) {
var conversationId = currentConversationId();
if (!conversationId || !files.length || !summaryPanel) {
if (uploadStatus) {
uploadStatus.textContent = "请先创建或选择一个对话。";
}
return;
}
var data = new FormData();
Array.prototype.forEach.call(files, function (file) {
data.append("files", file);
});
var csrf = new FormData(composer).get("csrfmiddlewaretoken");
if (uploadStatus) {
uploadStatus.textContent = "正在上传 " + files.length + " 个文件...";
}
try {
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId), {
method: "POST",
headers: { "X-CSRFToken": csrf },
body: data,
});
if (!response.ok) {
throw new Error("上传失败。");
}
var payload = await response.json();
renderAttachments(payload.attachments || []);
if (uploadStatus) {
uploadStatus.textContent = "上传完成,可发送自动汇总提示词。";
}
await refreshAttachments();
} catch (error) {
if (uploadStatus) {
uploadStatus.textContent = "上传失败,请重试。";
}
}
}
function ensureWorkflowCard(batch) {
if (!workflowCardList || !batch) {
return null;
}
var empty = workflowCardList.querySelector(".panel-empty");
if (empty) {
empty.remove();
}
var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]');
if (card) {
return card;
}
card = document.createElement("article");
card.className = "workflow-card";
card.setAttribute("data-batch-id", batch.batch_id);
card.innerHTML =
"<header><strong>" +
escapeHtml(batch.batch_no || "文件汇总") +
'</strong><span class="workflow-status status-running">running</span></header><ol></ol>';
workflowCardList.prepend(card);
return card;
}
async function refreshWorkflowCard(batchId) {
if (!summaryPanel || !batchId) {
return;
}
var response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId));
if (!response.ok) {
return;
}
var payload = await response.json();
var card = ensureWorkflowCard({
batch_id: payload.batch.id,
batch_no: payload.batch.batch_no,
});
if (!card) {
return;
}
var status = card.querySelector(".workflow-status");
status.textContent = payload.batch.status;
status.className = "workflow-status status-" + payload.batch.status;
var list = card.querySelector("ol");
list.innerHTML = "";
(payload.nodes || []).forEach(function (node) {
var item = document.createElement("li");
item.className = "node-status status-" + node.status;
item.setAttribute("data-node-code", node.node_code);
item.innerHTML = "<span>" + escapeHtml(node.node_name) + "</span><em>" + node.progress + "%</em>";
list.appendChild(item);
});
}
async function streamChat(event) { async function streamChat(event) {
event.preventDefault(); event.preventDefault();
if (!composer || !promptInput || !sendButton || !chatStage) { if (!composer || !promptInput || !sendButton || !chatStage) {
@@ -356,11 +512,14 @@
} }
} else if (eventName === "chunk") { } else if (eventName === "chunk") {
assistantText += payload.delta || ""; assistantText += payload.delta || "";
assistantMessage.text.innerHTML = nl2br(assistantText); assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
scrollChatToBottom(); scrollChatToBottom();
} else if (eventName === "error") { } else if (eventName === "error") {
assistantText = payload.message || "模型调用失败。"; assistantText = payload.message || "模型调用失败。";
assistantMessage.text.innerHTML = nl2br(assistantText); assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
} else if (eventName === "workflow_started") {
ensureWorkflowCard(payload);
refreshWorkflowCard(payload.batch_id);
} else if (eventName === "done") { } else if (eventName === "done") {
if (payload.assistant_message_id) { if (payload.assistant_message_id) {
assistantMessage.article.id = "message-" + payload.assistant_message_id; assistantMessage.article.id = "message-" + payload.assistant_message_id;
@@ -400,6 +559,28 @@
composer.addEventListener("submit", streamChat); composer.addEventListener("submit", streamChat);
} }
if (uploadDropzone && attachmentInput) {
uploadDropzone.addEventListener("click", function () {
attachmentInput.click();
});
uploadDropzone.addEventListener("dragover", function (event) {
event.preventDefault();
uploadDropzone.classList.add("dragging");
});
uploadDropzone.addEventListener("dragleave", function () {
uploadDropzone.classList.remove("dragging");
});
uploadDropzone.addEventListener("drop", function (event) {
event.preventDefault();
uploadDropzone.classList.remove("dragging");
uploadFiles(event.dataTransfer.files);
});
attachmentInput.addEventListener("change", function () {
uploadFiles(attachmentInput.files);
attachmentInput.value = "";
});
}
window.addEventListener("resize", syncSidebarState); window.addEventListener("resize", syncSidebarState);
syncSidebarState(); syncSidebarState();
})(); })();

View File

@@ -164,9 +164,77 @@
</div> </div>
</section> </section>
</section> </section>
<aside
class="summary-panel"
id="summaryPanel"
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
>
<section class="summary-section upload-section">
<div class="summary-heading">
<h2>文件汇总</h2>
<span>当前对话</span>
</div>
<div class="upload-dropzone" id="uploadDropzone" tabindex="0" role="button">
<input id="attachmentInput" type="file" multiple hidden>
<strong>拖拽文件到这里</strong>
<span>支持多文件、zip、7z、rar</span>
</div>
<p class="upload-status" id="uploadStatus">上传后发送“自动汇总文件目录与页数”启动工作流。</p>
</section>
<section class="summary-section attachment-section">
<div class="summary-subheading">
<h3>附件</h3>
</div>
<div class="attachment-list" id="attachmentList">
{% for attachment in attachments %}
<div class="attachment-item" data-attachment-id="{{ attachment.pk }}">
<div>
<strong>{{ attachment.original_name }}</strong>
<span>v{{ attachment.version_no }} · {{ attachment.file_size }} bytes · {{ attachment.upload_status }}</span>
</div>
{% if attachment.is_active %}<em>active</em>{% endif %}
</div>
{% empty %}
<div class="panel-empty">暂无附件</div>
{% endfor %}
</div>
</section>
<section class="summary-section workflow-section">
<div class="summary-subheading">
<h3>工作流</h3>
</div>
<div class="workflow-card-list" id="workflowCardList">
{% for batch in summary_batches %}
<article class="workflow-card" data-batch-id="{{ batch.pk }}">
<header>
<strong>{{ batch.batch_no }}</strong>
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
</header>
<ol>
{% for node in batch.node_runs.all %}
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
<span>{{ node.node_name }}</span>
<em>{{ node.progress }}%</em>
</li>
{% endfor %}
</ol>
</article>
{% empty %}
<div class="panel-empty">暂无工作流</div>
{% endfor %}
</div>
</section>
</aside>
</main> </main>
{% endblock %} {% endblock %}
{% block scripts %} {% block scripts %}
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.6/dist/purify.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.12/marked.min.js"></script>
<script src="{% static 'js/app.js' %}"></script> <script src="{% static 'js/app.js' %}"></script>
{% endblock %} {% endblock %}

View File

@@ -0,0 +1,25 @@
from zipfile import ZipFile
import pytest
from review_agent.file_summary.services.archive import extract_archive
def test_extract_zip_preserves_safe_paths(tmp_path):
archive_path = tmp_path / "safe.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("dir/a.txt", "content")
target = tmp_path / "out"
extracted = extract_archive(archive_path, target)
assert extracted == [target / "dir" / "a.txt"]
assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content"
def test_extract_zip_rejects_path_traversal(tmp_path):
archive_path = tmp_path / "evil.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("../evil.txt", "bad")
with pytest.raises(ValueError):
extract_archive(archive_path, tmp_path / "out")

View File

@@ -0,0 +1,22 @@
import pytest
from django.urls import reverse
from review_agent.models import Conversation
pytestmark = pytest.mark.django_db
def test_workspace_renders_summary_panel(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
assert response.status_code == 200
content = response.content.decode("utf-8")
assert 'id="summaryPanel"' in content
assert 'id="uploadDropzone"' in content
assert 'id="workflowCardList"' in content
assert "自动汇总文件目录与页数" in content

View File

@@ -0,0 +1,24 @@
from pathlib import Path
import pytest
from review_agent.file_summary.services.inventory import scan_files_to_items
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model):
root = tmp_path / "work"
(root / "a").mkdir(parents=True)
(root / "a" / "one.pdf").write_bytes(b"pdf")
(root / "two.txt").write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I")
items = scan_files_to_items(batch=batch, roots=[root])
assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"]
assert FileSummaryItem.objects.filter(batch=batch).count() == 2
assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED

View File

@@ -0,0 +1,113 @@
import pytest
from django.contrib.auth import get_user_model
from django.db import IntegrityError, transaction
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
FileSummaryItem,
)
pytestmark = pytest.mark.django_db
def create_user(username="u1"):
return get_user_model().objects.create_user(username=username, password="pass")
def test_attachment_versions_are_unique_per_conversation_and_name():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
first = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=1,
is_active=False,
storage_path="media/a.docx",
file_size=10,
)
second = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/b.docx",
file_size=12,
)
assert first.version_no == 1
assert second.version_no == 2
with pytest.raises(IntegrityError), transaction.atomic():
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/c.docx",
file_size=14,
)
def test_batch_attachment_and_item_unique_constraints():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
storage_path="media/a.docx",
file_size=10,
)
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-001",
)
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
def test_exported_file_traces_to_user_and_conversation():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-002",
)
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name="summary.md",
storage_path="media/summary.md",
)
assert exported.batch.user == user
assert exported.batch.conversation == conversation

View File

@@ -0,0 +1,66 @@
import pytest
from docx import Document
from openpyxl import Workbook
from pptx import Presentation
from review_agent.file_summary.services.page_count import count_document_pages
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
from review_agent.file_summary.skills.base import WorkflowContext
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_count_document_pages_for_office_formats(tmp_path):
docx_path = tmp_path / "a.docx"
Document().save(docx_path)
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.create_sheet("第二页")
workbook.save(xlsx_path)
pptx_path = tmp_path / "a.pptx"
presentation = Presentation()
presentation.slides.add_slide(presentation.slide_layouts[6])
presentation.save(pptx_path)
assert count_document_pages(docx_path).status in {"success", "uncertain"}
assert count_document_pages(xlsx_path).page_count == 2
assert count_document_pages(pptx_path).page_count == 1
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.save(xlsx_path)
txt_path = tmp_path / "a.txt"
txt_path.write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
xlsx_item = FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="a.xlsx",
file_type="xlsx",
relative_path="a.xlsx",
storage_path=str(xlsx_path),
)
txt_item = FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="a.txt",
file_type="txt",
relative_path="a.txt",
storage_path=str(txt_path),
)
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
xlsx_item.refresh_from_db()
txt_item.refresh_from_db()
assert result.success is True
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED

View File

@@ -0,0 +1,29 @@
import pytest
from review_agent.file_summary.services.product_detect import detect_product_name
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_detect_product_name_from_top_level_directory(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="新对话 06-06")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D")
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="说明书.docx",
file_type="docx",
relative_path="甲型试剂盒/说明书.docx",
storage_path="x",
)
product_name = detect_product_name(batch)
batch.refresh_from_db()
conversation.refresh_from_db()
assert product_name == "甲型试剂盒"
assert batch.product_name == "甲型试剂盒"
assert conversation.title == "甲型试剂盒-文件汇总"

View File

@@ -0,0 +1,82 @@
from pathlib import Path
import pytest
from openpyxl import load_workbook
from review_agent.file_summary.services.export_excel import generate_excel_export
from review_agent.file_summary.services.report import generate_markdown_report
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem, Message
pytestmark = pytest.mark.django_db
def make_batch(tmp_path, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-R",
work_dir=str(tmp_path),
total_files=1,
success_files=1,
total_pages=2,
)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="a.xlsx",
file_type="xlsx",
relative_path="a.xlsx",
storage_path=str(tmp_path / "a.xlsx"),
page_count=2,
statistics_status=FileSummaryItem.StatisticsStatus.SUCCESS,
)
return batch
def test_generate_markdown_report_creates_export_and_summary(tmp_path, django_user_model):
batch = make_batch(tmp_path, django_user_model)
exported, summary = generate_markdown_report(batch)
assert exported.export_type == "markdown"
assert Path(exported.storage_path).exists()
assert "| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |" in summary
assert "a.xlsx" in Path(exported.storage_path).read_text(encoding="utf-8")
def test_generate_excel_export_contains_summary_and_items(tmp_path, django_user_model):
batch = make_batch(tmp_path, django_user_model)
exported = generate_excel_export(batch)
workbook = load_workbook(exported.storage_path)
assert workbook.sheetnames == ["汇总信息", "文件明细"]
assert workbook["文件明细"]["C2"].value == "a.xlsx"
def test_workflow_report_node_writes_assistant_message(tmp_path, settings, django_user_model):
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.models import FileAttachment
settings.MEDIA_ROOT = tmp_path
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
file_path = tmp_path / "a.xlsx"
file_path.write_bytes(b"not a real workbook")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.txt",
storage_path=str(file_path),
file_size=file_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
batch.work_dir = str(tmp_path / "batch")
batch.save(update_fields=["work_dir"])
start_file_summary_workflow(batch, async_run=False)
assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists()

View File

@@ -0,0 +1,27 @@
import pytest
from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext
from review_agent.file_summary.skills.registry import SkillRegistry
class EchoSkill(BaseSkill):
name = "echo"
def run(self, context):
return SkillResult(success=True, data={"batch_id": context.batch.id})
@pytest.mark.django_db
def test_skill_registry_executes_registered_skill(django_user_model):
from review_agent.models import Conversation, FileSummaryBatch
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X")
registry = SkillRegistry()
registry.register(EchoSkill())
result = registry.execute("echo", WorkflowContext(batch=batch))
assert result.success is True
assert result.data == {"batch_id": batch.id}

View File

@@ -0,0 +1,48 @@
from django.core.files.uploadedfile import SimpleUploadedFile
import pytest
from review_agent.file_summary.storage import save_uploaded_attachment
from review_agent.models import Conversation, FileAttachment
pytestmark = pytest.mark.django_db
def test_save_uploaded_attachment_versions_same_name(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
first = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("资料.docx", b"first"),
)
second = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("资料.docx", b"second"),
)
first.refresh_from_db()
assert first.version_no == 1
assert first.is_active is False
assert second.version_no == 2
assert second.is_active is True
assert FileAttachment.objects.filter(conversation=conversation).count() == 2
assert (tmp_path / second.storage_path).read_bytes() == b"second"
def test_save_uploaded_attachment_rejects_path_traversal(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
attachment = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("../资料.docx", b"content"),
)
assert ".." not in attachment.storage_path
assert (tmp_path / attachment.storage_path).exists()

View File

@@ -0,0 +1,32 @@
import pytest
from review_agent.file_summary.workflow_trigger import evaluate_file_summary_trigger
from review_agent.models import Conversation, FileAttachment
pytestmark = pytest.mark.django_db
def test_trigger_matches_keywords_only_when_active_attachment_exists(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
no_file = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
assert no_file.should_start is False
assert no_file.reason == "missing_attachment"
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
matched = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
assert matched.should_start is True
assert matched.workflow_type == "file_summary"
normal = evaluate_file_summary_trigger(conversation, "你好,帮我解释法规")
assert normal.should_start is False
assert normal.reason == "not_matched"

View File

@@ -0,0 +1,98 @@
from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
import pytest
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, FileSummaryBatch
pytestmark = pytest.mark.django_db
def test_upload_attachments_requires_conversation_owner(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
client.force_login(other)
response = client.post(
reverse("file_summary_attachment_upload", args=[conversation.pk]),
{"files": [SimpleUploadedFile("a.docx", b"a")]},
)
assert response.status_code == 404
def test_attachment_api_requires_login(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
assert response.status_code == 302
def test_upload_and_list_current_conversation_attachments(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
client.force_login(user)
upload_response = client.post(
reverse("file_summary_attachment_upload", args=[conversation.pk]),
{
"files": [
SimpleUploadedFile("a.docx", b"a", content_type="application/docx"),
SimpleUploadedFile("b.zip", b"b", content_type="application/zip"),
]
},
)
list_response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
assert upload_response.status_code == 200
assert upload_response.json()["attachments"][0]["original_name"] == "a.docx"
assert len(list_response.json()["attachments"]) == 2
def test_delete_attachment_is_logical_and_scoped(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
client.force_login(user)
response = client.delete(reverse("file_summary_attachment_detail", args=[conversation.pk, attachment.pk]))
attachment.refresh_from_db()
assert response.status_code == 200
assert attachment.upload_status == FileAttachment.UploadStatus.DELETED
assert attachment.is_active is False
def test_export_download_requires_batch_owner(client, tmp_path, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=owner, batch_no="FS-DL")
report_path = tmp_path / "summary.md"
report_path.write_text("ok", encoding="utf-8")
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name="summary.md",
storage_path=str(report_path),
)
client.force_login(other)
denied = client.get(reverse("file_summary_export_download", args=[exported.pk]))
assert denied.status_code == 404
client.force_login(owner)
allowed = client.get(reverse("file_summary_export_download", args=[exported.pk]))
assert allowed.status_code == 200

View File

@@ -0,0 +1,102 @@
import pytest
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowEvent,
WorkflowNodeRun,
)
from review_agent.services import stream_message
pytestmark = pytest.mark.django_db
def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
active = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="old.docx",
is_active=False,
storage_path="x/old.docx",
file_size=1,
)
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
assert batch.status == FileSummaryBatch.Status.PENDING
assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active
active.refresh_from_db()
assert active.upload_status == FileAttachment.UploadStatus.BOUND
assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists()
def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == FileSummaryBatch.Status.SUCCESS
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists()
def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model):
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
frames = list(stream_message(conversation, "请自动汇总文件目录与页数"))
joined = "".join(frames)
assert "workflow_started" in joined
assert "\"workflow_type\": \"file_summary\"" in joined
assert FileSummaryBatch.objects.filter(conversation=conversation).exists()
def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
def fake_stream_reply(conversation, content):
yield "普通回复"
monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply)
frames = list(stream_message(conversation, "你好"))
joined = "".join(frames)
assert "普通回复" in joined
assert "workflow_started" not in joined