merge: 自动汇总文件目录页数

This commit is contained in:
2026-06-06 10:28:14 +08:00
48 changed files with 3029 additions and 8 deletions

View File

@@ -18,3 +18,20 @@ python manage.py runserver
- 登录页http://127.0.0.1:8000/login/
- 首页http://127.0.0.1:8000/
- 管理后台http://127.0.0.1:8000/admin/
## 文件汇总依赖
自动汇总文件目录与页数功能使用轻量 Python 库读取 PDF、Word、Excel、PowerPoint 文件。
Docker 或生产环境如需处理 `.7z``.rar` 压缩包,还需要安装系统 `7z`/`p7zip`
命令,并确认以下命令可用:
```bash
7z
7z i
```
LibreOffice 不是必需依赖,仅作为未来增强老格式文档解析的可选能力。
上传原始文件、批次工作目录和导出文件默认存储在 Django `MEDIA_ROOT` 下的
`file_summary/users/<user_id>/<conversation_id>/` 或批次 `work_dir` 目录中。生产环境
需要把 `MEDIA_ROOT` 挂载到持久化卷,并纳入备份或归档策略。

View File

@@ -1,11 +1,12 @@
from django.contrib import admin
from django.contrib.auth.views import LoginView, LogoutView, PasswordChangeView
from django.urls import path
from django.urls import include, path
from review_agent.views import stream_chat, workspace
urlpatterns = [
path("", workspace, name="home"),
path("", include("review_agent.urls")),
path("chat/stream/", stream_chat, name="chat_stream"),
path(
"login/",

View File

@@ -0,0 +1,74 @@
# 自动汇总前端线框图
## 评审目标
在实现三栏页面前,先确认审核智能体工作台的信息架构、右侧文件汇总面板、工作流状态展示和移动端降级方式。
## 桌面端布局
```mermaid
flowchart LR
A["左栏:会话列表<br/>新对话 / 搜索 / 历史会话"] --> B["中栏:聊天区<br/>顶部导航 / 消息流 / 输入框"]
B --> C["右栏:文件汇总面板"]
C --> C1["上半区:上传区<br/>拖拽上传 / 选择文件 / 上传状态"]
C --> C2["中段:当前对话附件<br/>文件名 / 版本 / 大小 / 状态 / 删除"]
C --> C3["下半区:工作流卡片<br/>批次号 / 节点进度 / 下载入口"]
```
## 右侧面板结构
```mermaid
flowchart TB
P["文件汇总面板"] --> U["上传拖拽区"]
U --> U0["无附件:提示上传文件或压缩包"]
U --> U1["上传中:显示文件名和处理中状态"]
U --> U2["上传失败:展示错误并允许重试"]
P --> L["附件列表"]
L --> L1["active 版本优先展示"]
L --> L2["历史版本保留展示"]
L --> L3["逻辑删除后从默认候选移除"]
P --> W["工作流卡片列表"]
W --> W1["运行中:节点逐项更新"]
W --> W2["成功:展示 Markdown/Excel 下载"]
W --> W3["失败:展示失败节点和错误说明"]
```
## 工作流状态流转
```mermaid
stateDiagram-v2
[*] --> Pending: 用户上传附件
Pending --> Running: 发送自动汇总提示词
Running --> Extracting: 固化附件
Extracting --> Scanning: 解压完成或跳过
Scanning --> Counting: 生成文件清单
Counting --> Detecting: 页数统计完成
Detecting --> Reporting: 产品名识别完成
Reporting --> Success: 生成报告与下载
Running --> Failed: 批次级异常
Extracting --> Failed: 解压安全检查失败
Reporting --> Failed: 报告生成失败
Success --> Restored: 刷新页面后状态恢复
Failed --> Restored: 刷新页面后状态恢复
```
## 移动端布局
```mermaid
flowchart TB
M["移动端工作台"] --> T["顶部:侧栏按钮 / 当前页面 / 用户菜单"]
T --> Chat["聊天区优先展示"]
Chat --> Composer["底部输入框"]
T --> Drawer["会话侧栏抽屉"]
Chat --> Panel["文件汇总面板下移或折叠"]
Panel --> Upload["上传区"]
Panel --> Workflow["工作流卡片"]
```
## 关键评审点
- 桌面端保持左侧会话、中间聊天、右侧文件汇总三栏,不改变现有聊天主路径。
- 右侧面板上半部分用于上传和附件列表,下半部分用于批次工作流卡片。
- 工作流卡片节点顺序固定为:附件固化、压缩包解压、文件扫描、页数统计、产品识别、报告输出、完成。
- 助手消息中的文件汇总结果使用安全 Markdown 渲染,用户消息仍按纯文本转义。
- 移动端优先保证聊天可用,文件汇总面板折叠或下移,不能遮挡输入框。

3
pytest.ini Normal file
View File

@@ -0,0 +1,3 @@
[pytest]
DJANGO_SETTINGS_MODULE = config.settings
python_files = tests.py test_*.py *_tests.py

View File

@@ -1 +1,8 @@
Django>=5.0,<6.0
pypdf>=5.0
python-docx>=1.1
python-pptx>=1.0
openpyxl>=3.1
xlrd>=2.0
olefile>=0.47
py7zr>=0.21

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,4 @@
from pathlib import Path
ATTACHMENT_ROOT = Path("file_summary") / "users"

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
from review_agent.models import FileSummaryBatch, WorkflowEvent
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
return {
"id": event.pk,
"event_type": event.event_type,
"payload": event.payload,
"created_at": event.created_at.isoformat(),
}

View File

@@ -0,0 +1,12 @@
from __future__ import annotations
from pathlib import Path
from django.conf import settings
def resolve_storage_path(storage_path: str) -> Path:
path = Path(storage_path)
if path.is_absolute():
return path
return Path(settings.MEDIA_ROOT) / path

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
import subprocess
from pathlib import Path
from zipfile import ZipFile
import py7zr
ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"}
def _ensure_inside_target(path: Path, target_dir: Path) -> None:
target = target_dir.resolve()
resolved = path.resolve()
if target != resolved and target not in resolved.parents:
raise ValueError("解压路径必须位于批次工作目录内。")
def _safe_member_path(target_dir: Path, member_name: str) -> Path:
destination = target_dir / member_name
_ensure_inside_target(destination, target_dir)
return destination
def extract_archive(archive_path: str | Path, target_dir: str | Path) -> list[Path]:
archive_path = Path(archive_path)
target_dir = Path(target_dir)
target_dir.mkdir(parents=True, exist_ok=True)
ext = archive_path.suffix.lower().lstrip(".")
if ext not in ARCHIVE_EXTENSIONS:
return []
if ext == "zip":
return _extract_zip(archive_path, target_dir)
if ext == "7z":
return _extract_7z(archive_path, target_dir)
return _extract_rar(archive_path, target_dir)
def _extract_zip(archive_path: Path, target_dir: Path) -> list[Path]:
extracted: list[Path] = []
with ZipFile(archive_path) as archive:
for member in archive.infolist():
destination = _safe_member_path(target_dir, member.filename)
if member.is_dir():
destination.mkdir(parents=True, exist_ok=True)
continue
destination.parent.mkdir(parents=True, exist_ok=True)
with archive.open(member) as source, destination.open("wb") as target:
target.write(source.read())
extracted.append(destination)
return extracted
def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]:
with py7zr.SevenZipFile(archive_path, mode="r") as archive:
names = archive.getnames()
for name in names:
_safe_member_path(target_dir, name)
archive.extractall(path=target_dir)
return [target_dir / name for name in names if (target_dir / name).is_file()]
def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]:
result = subprocess.run(
["7z", "x", f"-o{target_dir}", str(archive_path), "-y"],
check=False,
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(result.stderr or result.stdout or "rar 解压失败")
extracted = [path for path in target_dir.rglob("*") if path.is_file()]
for path in extracted:
_ensure_inside_target(path, target_dir)
return extracted

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
from pathlib import Path
from openpyxl import Workbook
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
def _exports_dir(batch: FileSummaryBatch) -> Path:
root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no)
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
return export_dir
def generate_excel_export(batch: FileSummaryBatch) -> ExportedSummaryFile:
workbook = Workbook()
summary = workbook.active
summary.title = "汇总信息"
summary.append(["批次号", batch.batch_no])
summary.append(["产品名称", batch.product_name or "-"])
summary.append(["文件总数", batch.total_files])
summary.append(["统计成功", batch.success_files])
summary.append(["统计失败", batch.failed_files])
summary.append(["不支持", batch.unsupported_files])
summary.append(["不确定", batch.uncertain_files])
summary.append(["总页数", batch.total_pages])
detail = workbook.create_sheet("文件明细")
detail.append(["序号", "目录层级", "文件名", "类型", "页数", "路径", "状态", "重试次数", "异常说明"])
for item in batch.items.order_by("file_index"):
detail.append(
[
item.file_index,
item.directory_level,
item.file_name,
item.file_type,
item.page_count,
item.relative_path,
item.statistics_status,
item.retry_count,
item.error_message,
]
)
path = _exports_dir(batch) / f"{batch.batch_no}-summary.xlsx"
workbook.save(path)
return ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.EXCEL,
file_name=path.name,
storage_path=str(path),
)

View File

@@ -0,0 +1,49 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatch, FileSummaryItem
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
def _directory_level(relative_path: Path) -> str:
if len(relative_path.parts) <= 1:
return ""
return "/".join(relative_path.parts[:-1])
def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]:
files: list[tuple[Path, Path]] = []
for root in roots:
root = Path(root)
if root.is_file():
files.append((root.parent, root))
continue
for path in sorted(item for item in root.rglob("*") if item.is_file()):
if path.name.startswith(".") or path.stat().st_size == 0:
continue
files.append((root, path))
created: list[FileSummaryItem] = []
for index, (root, path) in enumerate(files, start=1):
relative = path.relative_to(root).as_posix()
file_type = path.suffix.lower().lstrip(".")
item = FileSummaryItem.objects.create(
batch=batch,
file_index=index,
directory_level=_directory_level(Path(relative)),
file_name=path.name,
file_type=file_type,
relative_path=relative,
storage_path=str(path),
statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED,
)
created.append(item)
batch.total_files = len(created)
batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS)
batch.unsupported_files = len(created) - batch.supported_files
batch.save(update_fields=["total_files", "supported_files", "unsupported_files"])
return created

View File

@@ -0,0 +1,59 @@
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
@dataclass(frozen=True)
class PageCountResult:
status: str
page_count: int | None = None
error_message: str = ""
def count_document_pages(path: str | Path) -> PageCountResult:
file_path = Path(path)
ext = file_path.suffix.lower().lstrip(".")
if ext not in SUPPORTED_EXTENSIONS:
return PageCountResult(status="unsupported")
try:
if ext == "pdf":
from pypdf import PdfReader
return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages))
if ext == "docx":
from docx import Document
properties = Document(str(file_path)).core_properties
pages = getattr(properties, "pages", None)
if pages:
return PageCountResult(status="success", page_count=pages)
return PageCountResult(status="uncertain")
if ext == "xlsx":
from openpyxl import load_workbook
workbook = load_workbook(str(file_path), read_only=True, data_only=True)
return PageCountResult(status="success", page_count=len(workbook.sheetnames))
if ext == "xls":
import xlrd
workbook = xlrd.open_workbook(str(file_path), on_demand=True)
return PageCountResult(status="success", page_count=workbook.nsheets)
if ext == "pptx":
from pptx import Presentation
return PageCountResult(status="success", page_count=len(Presentation(str(file_path)).slides))
if ext in {"doc", "ppt"}:
import olefile
if olefile.isOleFile(str(file_path)):
return PageCountResult(status="uncertain")
return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。")
except Exception as exc:
return PageCountResult(status="failed", error_message=str(exc))
return PageCountResult(status="uncertain")

View File

@@ -0,0 +1,31 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatch
def detect_product_name(batch: FileSummaryBatch) -> str:
product_name = ""
for item in batch.items.order_by("file_index"):
parts = Path(item.relative_path).parts
if len(parts) > 1:
product_name = parts[0]
break
name = Path(item.file_name).stem
for keyword in ("产品", "试剂盒", "说明书"):
if keyword in name:
product_name = name
break
if product_name:
break
if not product_name:
return ""
batch.product_name = product_name
batch.save(update_fields=["product_name"])
if batch.conversation.title.startswith("新对话"):
batch.conversation.title = f"{product_name}-文件汇总"
batch.conversation.save(update_fields=["title", "updated_at"])
return product_name

View File

@@ -0,0 +1,65 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import ExportedSummaryFile, FileSummaryBatch
def _exports_dir(batch: FileSummaryBatch) -> Path:
root = Path(batch.work_dir or Path("media") / "file_summary" / batch.batch_no)
export_dir = root / "exports"
export_dir.mkdir(parents=True, exist_ok=True)
return export_dir
def build_summary_table(batch: FileSummaryBatch) -> str:
lines = [
"| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |",
"| --- | --- | --- | --- | --- | --- | --- |",
]
for item in batch.items.order_by("file_index"):
lines.append(
"| {index} | {directory} | {name} | {file_type} | {pages} | {status} | {error} |".format(
index=item.file_index,
directory=item.directory_level or "-",
name=item.file_name,
file_type=item.file_type,
pages=item.page_count if item.page_count is not None else "-",
status=item.statistics_status,
error=item.error_message or "-",
)
)
return "\n".join(lines)
def build_markdown_report(batch: FileSummaryBatch) -> str:
return "\n\n".join(
[
f"# 文件目录与页数汇总报告\n\n批次号:{batch.batch_no}",
(
"## 汇总信息\n\n"
f"- 产品名称:{batch.product_name or '-'}\n"
f"- 文件总数:{batch.total_files}\n"
f"- 统计成功:{batch.success_files}\n"
f"- 统计失败:{batch.failed_files}\n"
f"- 不支持:{batch.unsupported_files}\n"
f"- 不确定:{batch.uncertain_files}\n"
f"- 总页数:{batch.total_pages}"
),
"## 文件明细\n\n" + build_summary_table(batch),
"## 处理说明\n\n单文件失败不会阻断批次,失败与不确定文件已在明细中标注。",
]
)
def generate_markdown_report(batch: FileSummaryBatch) -> tuple[ExportedSummaryFile, str]:
content = build_markdown_report(batch)
path = _exports_dir(batch) / f"{batch.batch_no}-summary.md"
path.write_text(content, encoding="utf-8")
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name=path.name,
storage_path=str(path),
)
return exported, build_summary_table(batch)

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,26 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
from .base import BaseSkill, SkillResult, WorkflowContext
class ArchiveExtractSkill(BaseSkill):
name = "archive_extract"
def run(self, context: WorkflowContext) -> SkillResult:
extracted_count = 0
target_dir = Path(context.batch.work_dir or "")
if not target_dir:
return SkillResult(success=True, data={"extracted_count": 0})
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
path = resolve_storage_path(binding.attachment.storage_path)
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
continue
extracted_count += len(extract_archive(path, target_dir))
return SkillResult(success=True, data={"extracted_count": extracted_count})

View File

@@ -0,0 +1,24 @@
from __future__ import annotations
from dataclasses import dataclass, field
from review_agent.models import FileSummaryBatch
@dataclass(frozen=True)
class WorkflowContext:
batch: FileSummaryBatch
@dataclass
class SkillResult:
success: bool
data: dict = field(default_factory=dict)
message: str = ""
class BaseSkill:
name = ""
def run(self, context: WorkflowContext) -> SkillResult:
raise NotImplementedError

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
from review_agent.models import FileSummaryItem
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
from .base import BaseSkill, SkillResult, WorkflowContext
class DocumentPageCountSkill(BaseSkill):
name = "document_page_count"
def run(self, context: WorkflowContext) -> SkillResult:
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
for item in context.batch.items.order_by("file_index"):
if item.file_type not in SUPPORTED_EXTENSIONS:
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
unsupported_files += 1
item.save(update_fields=["statistics_status", "updated_at"])
continue
result = None
for attempt in range(1, 4):
result = count_document_pages(item.storage_path)
item.retry_count = attempt - 1
if result.status != "failed":
break
item.statistics_status = result.status
item.page_count = result.page_count
item.error_message = result.error_message
item.save(
update_fields=[
"statistics_status",
"page_count",
"retry_count",
"error_message",
"updated_at",
]
)
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
success_files += 1
total_pages += result.page_count or 0
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
uncertain_files += 1
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
unsupported_files += 1
else:
failed_files += 1
context.batch.success_files = success_files
context.batch.failed_files = failed_files
context.batch.unsupported_files = unsupported_files
context.batch.uncertain_files = uncertain_files
context.batch.total_pages = total_pages
context.batch.save(
update_fields=[
"success_files",
"failed_files",
"unsupported_files",
"uncertain_files",
"total_pages",
]
)
return SkillResult(success=True)

View File

@@ -0,0 +1,21 @@
from __future__ import annotations
from pathlib import Path
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.inventory import scan_files_to_items
from .base import BaseSkill, SkillResult, WorkflowContext
class FileInventorySkill(BaseSkill):
name = "file_inventory"
def run(self, context: WorkflowContext) -> SkillResult:
roots = [
resolve_storage_path(binding.attachment.storage_path)
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch)
]
items = scan_files_to_items(batch=context.batch, roots=roots)
return SkillResult(success=True, data={"total_files": len(items)})

View File

@@ -0,0 +1,12 @@
from __future__ import annotations
from ..services.product_detect import detect_product_name
from .base import BaseSkill, SkillResult, WorkflowContext
class ProductDetectSkill(BaseSkill):
name = "product_detect"
def run(self, context: WorkflowContext) -> SkillResult:
product_name = detect_product_name(context.batch)
return SkillResult(success=True, data={"product_name": product_name})

View File

@@ -0,0 +1,22 @@
from __future__ import annotations
from .base import BaseSkill, SkillResult, WorkflowContext
class SkillRegistry:
def __init__(self):
self._skills: dict[str, BaseSkill] = {}
def register(self, skill: BaseSkill) -> None:
if not skill.name:
raise ValueError("Skill 必须声明 name。")
self._skills[skill.name] = skill
def get(self, name: str) -> BaseSkill:
try:
return self._skills[name]
except KeyError as exc:
raise KeyError(f"Skill 未注册:{name}") from exc
def execute(self, name: str, context: WorkflowContext) -> SkillResult:
return self.get(name).run(context)

View File

@@ -0,0 +1,33 @@
from __future__ import annotations
from django.urls import reverse
from review_agent.models import Message
from ..services.export_excel import generate_excel_export
from ..services.report import generate_markdown_report
from .base import BaseSkill, SkillResult, WorkflowContext
class SummaryReportSkill(BaseSkill):
name = "summary_report"
def run(self, context: WorkflowContext) -> SkillResult:
markdown_export, summary_table = generate_markdown_report(context.batch)
excel_export = generate_excel_export(context.batch)
markdown_url = reverse("file_summary_export_download", args=[markdown_export.pk])
excel_url = reverse("file_summary_export_download", args=[excel_export.pk])
content = (
"文件目录与页数汇总已完成。\n\n"
f"{summary_table}\n\n"
f"[下载 Markdown 报告]({markdown_url}) | [下载 Excel 明细]({excel_url})"
)
Message.objects.create(
conversation=context.batch.conversation,
role=Message.Role.ASSISTANT,
content=content,
)
return SkillResult(
success=True,
data={"markdown_export_id": markdown_export.pk, "excel_export_id": excel_export.pk},
)

View File

@@ -0,0 +1,88 @@
from __future__ import annotations
from pathlib import Path
from uuid import uuid4
from django.conf import settings
from django.db import transaction
from django.utils.text import get_valid_filename
from review_agent.models import Conversation, FileAttachment
from .constants import ATTACHMENT_ROOT
def _safe_original_name(name: str) -> str:
clean = get_valid_filename(Path(name).name)
return clean or f"upload-{uuid4().hex}"
def _relative_attachment_path(conversation: Conversation, filename: str, version_no: int) -> Path:
suffix = Path(filename).suffix
stem = Path(filename).stem
stored_name = f"{stem}_v{version_no}_{uuid4().hex[:8]}{suffix}"
return (
ATTACHMENT_ROOT
/ str(conversation.user_id)
/ str(conversation.pk)
/ "attachments"
/ stored_name
)
def _ensure_inside_media_root(path: Path) -> None:
media_root = Path(settings.MEDIA_ROOT).resolve()
resolved = path.resolve()
if media_root != resolved and media_root not in resolved.parents:
raise ValueError("上传路径必须位于 MEDIA_ROOT 内。")
@transaction.atomic
def save_uploaded_attachment(*, conversation: Conversation, user, uploaded_file) -> FileAttachment:
"""Stores an uploaded file and creates a versioned attachment record."""
original_name = _safe_original_name(uploaded_file.name)
latest = (
FileAttachment.objects.filter(conversation=conversation, original_name=original_name)
.order_by("-version_no")
.first()
)
version_no = (latest.version_no if latest else 0) + 1
relative_path = _relative_attachment_path(conversation, original_name, version_no)
absolute_path = Path(settings.MEDIA_ROOT) / relative_path
_ensure_inside_media_root(absolute_path)
absolute_path.parent.mkdir(parents=True, exist_ok=True)
with absolute_path.open("wb") as target:
for chunk in uploaded_file.chunks():
target.write(chunk)
FileAttachment.objects.filter(
conversation=conversation,
original_name=original_name,
is_active=True,
).update(is_active=False)
return FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name=original_name,
version_no=version_no,
is_active=True,
storage_path=relative_path.as_posix(),
file_size=uploaded_file.size,
content_type=getattr(uploaded_file, "content_type", "") or "",
)
def serialize_attachment(attachment: FileAttachment) -> dict[str, object]:
return {
"id": attachment.pk,
"original_name": attachment.original_name,
"version_no": attachment.version_no,
"is_active": attachment.is_active,
"file_size": attachment.file_size,
"content_type": attachment.content_type,
"upload_status": attachment.upload_status,
"created_at": attachment.created_at.isoformat(),
}

View File

@@ -0,0 +1,124 @@
from django.contrib.auth.decorators import login_required
from pathlib import Path
from django.http import FileResponse, Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment
from review_agent.models import FileSummaryBatch, WorkflowEvent
from .events import serialize_event
from .storage import save_uploaded_attachment, serialize_attachment
def _conversation_for_user(user, conversation_id: int) -> Conversation:
conversation = Conversation.objects.filter(pk=conversation_id, user=user).first()
if not conversation:
raise Http404("对话不存在。")
return conversation
@require_http_methods(["POST", "GET"])
@login_required
def attachments(request, conversation_id: int):
conversation = _conversation_for_user(request.user, conversation_id)
if request.method == "POST":
files = request.FILES.getlist("files")
if not files:
return JsonResponse({"error": "请选择至少一个文件。"}, status=400)
saved = [
save_uploaded_attachment(
conversation=conversation,
user=request.user,
uploaded_file=uploaded_file,
)
for uploaded_file in files
]
return JsonResponse({"attachments": [serialize_attachment(item) for item in saved]})
queryset = FileAttachment.objects.filter(conversation=conversation).order_by(
"original_name",
"-version_no",
)
return JsonResponse({"attachments": [serialize_attachment(item) for item in queryset]})
@require_http_methods(["DELETE"])
@login_required
def attachment_detail(request, conversation_id: int, attachment_id: int):
conversation = _conversation_for_user(request.user, conversation_id)
attachment = FileAttachment.objects.filter(
pk=attachment_id,
conversation=conversation,
user=request.user,
).first()
if not attachment:
raise Http404("附件不存在。")
attachment.upload_status = FileAttachment.UploadStatus.DELETED
attachment.is_active = False
attachment.save(update_fields=["upload_status", "is_active"])
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
return JsonResponse(
{
"batch": {
"id": batch.pk,
"batch_no": batch.batch_no,
"status": batch.status,
"product_name": batch.product_name,
"total_files": batch.total_files,
"success_files": batch.success_files,
"failed_files": batch.failed_files,
"total_pages": batch.total_pages,
},
"nodes": [
{
"node_code": node.node_code,
"node_name": node.node_name,
"status": node.status,
"progress": node.progress,
"message": node.message,
}
for node in batch.node_runs.order_by("id")
],
}
)
@require_http_methods(["GET"])
@login_required
def batch_events(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
after = request.GET.get("after") or "0"
try:
after_id = int(after)
except ValueError:
after_id = 0
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
return JsonResponse({"events": [serialize_event(event) for event in events]})
@require_http_methods(["GET"])
@login_required
def export_download(request, export_id: int):
exported = ExportedSummaryFile.objects.filter(
pk=export_id,
batch__user=request.user,
).first()
if not exported:
raise Http404("导出文件不存在。")
path = Path(exported.storage_path)
if not path.exists():
return JsonResponse({"error": "文件不存在。"}, status=404)
return FileResponse(path.open("rb"), as_attachment=True, filename=exported.file_name)

View File

@@ -0,0 +1,154 @@
from __future__ import annotations
from threading import Thread
from uuid import uuid4
from django.db import transaction
from django.utils import timezone
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowNodeRun,
)
from .events import record_event
from .skills.archive_extract import ArchiveExtractSkill
from .skills.base import WorkflowContext
from .skills.document_page_count import DocumentPageCountSkill
from .skills.file_inventory import FileInventorySkill
from .skills.product_detect import ProductDetectSkill
from .skills.registry import SkillRegistry
from .skills.summary_report import SummaryReportSkill
NODE_DEFINITIONS = [
("upload", "附件固化", ""),
("extract", "压缩包解压", "archive_extract"),
("inventory", "文件扫描", "file_inventory"),
("page_count", "页数统计", "document_page_count"),
("product_detect", "产品识别", "product_detect"),
("report", "报告输出", "summary_report"),
("complete", "完成", ""),
]
def default_skill_registry() -> SkillRegistry:
registry = SkillRegistry()
registry.register(ArchiveExtractSkill())
registry.register(FileInventorySkill())
registry.register(DocumentPageCountSkill())
registry.register(ProductDetectSkill())
registry.register(SummaryReportSkill())
return registry
def build_batch_no() -> str:
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
@transaction.atomic
def create_file_summary_batch(
*,
conversation: Conversation,
user,
trigger_message: Message | None = None,
) -> FileSummaryBatch:
active_attachments = list(
FileAttachment.objects.select_for_update()
.filter(conversation=conversation, is_active=True)
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
.order_by("original_name", "-created_at")
)
if not active_attachments:
raise ValueError("当前对话没有可用附件。")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
batch_no=build_batch_no(),
)
for attachment in active_attachments:
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
attachment.upload_status = FileAttachment.UploadStatus.BOUND
attachment.save(update_fields=["upload_status"])
for code, name, _skill_name in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
return batch
class WorkflowExecutor:
def __init__(self, batch: FileSummaryBatch, registry: SkillRegistry | None = None):
self.batch = batch
self.registry = registry or default_skill_registry()
def run(self) -> None:
self.batch.status = FileSummaryBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
try:
for node in self.batch.node_runs.order_by("id"):
self._run_node(node)
except Exception as exc:
self.batch.status = FileSummaryBatch.Status.FAILED
self.batch.error_message = str(exc)
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "error_message", "finished_at"])
record_event(self.batch, "workflow_failed", {"message": str(exc)})
return
self.batch.status = FileSummaryBatch.Status.SUCCESS
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
def _run_node(self, node: WorkflowNodeRun) -> None:
now = timezone.now()
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = now
node.message = f"{node.node_name}处理中"
node.save(update_fields=["status", "progress", "started_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
skill_name = next(
(skill for code, _name, skill in NODE_DEFINITIONS if code == node.node_code),
"",
)
if skill_name:
result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch))
if not result.success:
raise RuntimeError(result.message or f"{node.node_name}执行失败")
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
node.message = f"{node.node_name}完成"
node.save(update_fields=["status", "progress", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
executor = WorkflowExecutor(batch)
if not async_run:
executor.run()
return
Thread(target=executor.run, daemon=True).start()

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
from dataclasses import dataclass
from review_agent.models import Conversation, FileAttachment
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
@dataclass(frozen=True)
class TriggerResult:
should_start: bool
workflow_type: str = ""
reason: str = ""
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
text = (content or "").strip()
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
return TriggerResult(should_start=False, reason="not_matched")
has_attachment = FileAttachment.objects.filter(
conversation=conversation,
is_active=True,
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
if not has_attachment:
return TriggerResult(should_start=False, reason="missing_attachment")
return TriggerResult(should_start=True, workflow_type="file_summary")

View File

@@ -0,0 +1,481 @@
# Generated by Django 5.2.14 on 2026-06-05 17:09
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
("review_agent", "0001_initial"),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name="FileAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("original_name", models.CharField(max_length=255)),
("version_no", models.PositiveIntegerField(default=1)),
("is_active", models.BooleanField(default=True)),
("storage_path", models.CharField(max_length=500)),
("file_size", models.BigIntegerField(default=0)),
(
"content_type",
models.CharField(blank=True, default="", max_length=120),
),
(
"upload_status",
models.CharField(
choices=[
("uploaded", "已上传"),
("bound", "已绑定"),
("deleted", "已删除"),
],
default="uploaded",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_attachments",
to="review_agent.conversation",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_attachments",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_attachment",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatch",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("batch_no", models.CharField(max_length=64, unique=True)),
(
"product_name",
models.CharField(blank=True, default="", max_length=200),
),
(
"status",
models.CharField(
choices=[
("pending", "待执行"),
("running", "执行中"),
("success", "成功"),
("failed", "失败"),
],
default="pending",
max_length=20,
),
),
("total_files", models.IntegerField(default=0)),
("supported_files", models.IntegerField(default=0)),
("success_files", models.IntegerField(default=0)),
("failed_files", models.IntegerField(default=0)),
("unsupported_files", models.IntegerField(default=0)),
("uncertain_files", models.IntegerField(default=0)),
("total_pages", models.IntegerField(default=0)),
("work_dir", models.CharField(blank=True, default="", max_length=500)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"conversation",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="file_summary_batches",
to="review_agent.conversation",
),
),
(
"trigger_message",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
related_name="triggered_file_summary_batches",
to="review_agent.message",
),
),
(
"user",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="review_file_summary_batches",
to=settings.AUTH_USER_MODEL,
),
),
],
options={
"db_table": "ra_file_summary_batch",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="ExportedSummaryFile",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"export_type",
models.CharField(
choices=[("markdown", "Markdown"), ("excel", "Excel")],
max_length=20,
),
),
("file_name", models.CharField(max_length=255)),
("storage_path", models.CharField(max_length=500)),
(
"status",
models.CharField(
choices=[("success", "成功"), ("failed", "失败")],
default="success",
max_length=20,
),
),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="exports",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_exported_summary_file",
"ordering": ["-created_at", "-id"],
},
),
migrations.CreateModel(
name="FileSummaryBatchAttachment",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"source_role",
models.CharField(
choices=[("archive", "压缩包"), ("multi_file", "多文件")],
default="multi_file",
max_length=20,
),
),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"attachment",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_bindings",
to="review_agent.fileattachment",
),
),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="batch_attachments",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_batch_attachment",
},
),
migrations.CreateModel(
name="FileSummaryItem",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("file_index", models.PositiveIntegerField()),
(
"directory_level",
models.CharField(blank=True, default="", max_length=300),
),
("file_name", models.CharField(max_length=255)),
("file_type", models.CharField(max_length=20)),
("relative_path", models.CharField(max_length=500)),
("storage_path", models.CharField(max_length=500)),
("page_count", models.IntegerField(blank=True, null=True)),
(
"statistics_status",
models.CharField(
choices=[
("success", "成功"),
("failed", "失败"),
("unsupported", "不支持"),
("uncertain", "不确定"),
("skipped", "跳过"),
],
default="skipped",
max_length=20,
),
),
("retry_count", models.PositiveIntegerField(default=0)),
("error_message", models.TextField(blank=True, default="")),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="items",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_file_summary_item",
"ordering": ["file_index", "id"],
},
),
migrations.CreateModel(
name="WorkflowEvent",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("event_type", models.CharField(max_length=40)),
("payload", models.JSONField(default=dict)),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="events",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_event",
"ordering": ["id"],
},
),
migrations.CreateModel(
name="WorkflowNodeRun",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("node_code", models.CharField(max_length=40)),
("node_name", models.CharField(max_length=80)),
(
"status",
models.CharField(
choices=[
("pending", "等待中"),
("running", "执行中"),
("retrying", "重试中"),
("success", "成功"),
("failed", "失败"),
("skipped", "跳过"),
],
default="pending",
max_length=20,
),
),
("progress", models.PositiveIntegerField(default=0)),
("message", models.TextField(blank=True, default="")),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"batch",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="node_runs",
to="review_agent.filesummarybatch",
),
),
],
options={
"db_table": "ra_workflow_node_run",
},
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_attachment_user_created"
),
),
migrations.AddIndex(
model_name="fileattachment",
index=models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
),
migrations.AddConstraint(
model_name="fileattachment",
constraint=models.UniqueConstraint(
fields=("conversation", "original_name", "version_no"),
name="uq_ra_attachment_conv_name_version",
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["user", "created_at"], name="idx_ra_batch_user_created"
),
),
migrations.AddIndex(
model_name="filesummarybatch",
index=models.Index(
fields=["status", "created_at"], name="idx_ra_batch_status"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "export_type"], name="idx_ra_export_batch_type"
),
),
migrations.AddIndex(
model_name="exportedsummaryfile",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_export_batch_created"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_batch_attachment_batch"
),
),
migrations.AddIndex(
model_name="filesummarybatchattachment",
index=models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
),
migrations.AddConstraint(
model_name="filesummarybatchattachment",
constraint=models.UniqueConstraint(
fields=("batch", "attachment"), name="uq_ra_batch_attachment"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_index"], name="idx_ra_item_batch_index"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"
),
),
migrations.AddIndex(
model_name="filesummaryitem",
index=models.Index(
fields=["batch", "file_type"], name="idx_ra_item_batch_type"
),
),
migrations.AddConstraint(
model_name="filesummaryitem",
constraint=models.UniqueConstraint(
fields=("batch", "relative_path"), name="uq_ra_item_batch_relative_path"
),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
),
migrations.AddIndex(
model_name="workflowevent",
index=models.Index(
fields=["batch", "created_at"], name="idx_ra_event_batch_created"
),
),
migrations.AddIndex(
model_name="workflownoderun",
index=models.Index(
fields=["batch", "status"], name="idx_ra_node_batch_status"
),
),
migrations.AddConstraint(
model_name="workflownoderun",
constraint=models.UniqueConstraint(
fields=("batch", "node_code"), name="uq_ra_node_batch_code"
),
),
]

View File

@@ -42,3 +42,293 @@ class Message(models.Model):
def __str__(self) -> str:
return f"{self.get_role_display()} - {self.conversation_id}"
class FileAttachment(models.Model):
"""Stores an uploaded file version for one conversation."""
class UploadStatus(models.TextChoices):
UPLOADED = "uploaded", "已上传"
BOUND = "bound", "已绑定"
DELETED = "deleted", "已删除"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_attachments",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_attachments",
)
original_name = models.CharField(max_length=255)
version_no = models.PositiveIntegerField(default=1)
is_active = models.BooleanField(default=True)
storage_path = models.CharField(max_length=500)
file_size = models.BigIntegerField(default=0)
content_type = models.CharField(max_length=120, blank=True, default="")
upload_status = models.CharField(
max_length=20,
choices=UploadStatus.choices,
default=UploadStatus.UPLOADED,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_attachment"
ordering = ["-created_at", "-id"]
constraints = [
models.UniqueConstraint(
fields=["conversation", "original_name", "version_no"],
name="uq_ra_attachment_conv_name_version",
)
]
indexes = [
models.Index(
fields=["conversation", "created_at"],
name="idx_ra_attachment_conv_created",
),
models.Index(
fields=["user", "created_at"],
name="idx_ra_attachment_user_created",
),
models.Index(
fields=["conversation", "original_name", "is_active"],
name="idx_ra_attachment_active",
),
]
def __str__(self) -> str:
return f"{self.original_name} v{self.version_no}"
class FileSummaryBatch(models.Model):
"""Tracks one automatic file inventory and page-count workflow run."""
class Status(models.TextChoices):
PENDING = "pending", "待执行"
RUNNING = "running", "执行中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
conversation = models.ForeignKey(
Conversation,
on_delete=models.CASCADE,
related_name="file_summary_batches",
)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="review_file_summary_batches",
)
trigger_message = models.ForeignKey(
Message,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="triggered_file_summary_batches",
)
batch_no = models.CharField(max_length=64, unique=True)
product_name = models.CharField(max_length=200, blank=True, default="")
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
total_files = models.IntegerField(default=0)
supported_files = models.IntegerField(default=0)
success_files = models.IntegerField(default=0)
failed_files = models.IntegerField(default=0)
unsupported_files = models.IntegerField(default=0)
uncertain_files = models.IntegerField(default=0)
total_pages = models.IntegerField(default=0)
work_dir = models.CharField(max_length=500, blank=True, default="")
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_file_summary_batch"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["conversation", "created_at"], name="idx_ra_batch_conv_created"),
models.Index(fields=["user", "created_at"], name="idx_ra_batch_user_created"),
models.Index(fields=["status", "created_at"], name="idx_ra_batch_status"),
]
def __str__(self) -> str:
return self.batch_no
class FileSummaryBatchAttachment(models.Model):
"""Binds a workflow batch to the exact attachment versions it uses."""
class SourceRole(models.TextChoices):
ARCHIVE = "archive", "压缩包"
MULTI_FILE = "multi_file", "多文件"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="batch_attachments",
)
attachment = models.ForeignKey(
FileAttachment,
on_delete=models.CASCADE,
related_name="batch_bindings",
)
source_role = models.CharField(
max_length=20,
choices=SourceRole.choices,
default=SourceRole.MULTI_FILE,
)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_file_summary_batch_attachment"
constraints = [
models.UniqueConstraint(
fields=["batch", "attachment"],
name="uq_ra_batch_attachment",
)
]
indexes = [
models.Index(
fields=["batch", "created_at"],
name="idx_ra_batch_attachment_batch",
),
models.Index(fields=["attachment"], name="idx_ra_batch_attach_file"),
]
class FileSummaryItem(models.Model):
"""Stores one scanned file and its page-count result."""
class StatisticsStatus(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
UNSUPPORTED = "unsupported", "不支持"
UNCERTAIN = "uncertain", "不确定"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="items",
)
file_index = models.PositiveIntegerField()
directory_level = models.CharField(max_length=300, blank=True, default="")
file_name = models.CharField(max_length=255)
file_type = models.CharField(max_length=20)
relative_path = models.CharField(max_length=500)
storage_path = models.CharField(max_length=500)
page_count = models.IntegerField(null=True, blank=True)
statistics_status = models.CharField(
max_length=20,
choices=StatisticsStatus.choices,
default=StatisticsStatus.SKIPPED,
)
retry_count = models.PositiveIntegerField(default=0)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
db_table = "ra_file_summary_item"
ordering = ["file_index", "id"]
constraints = [
models.UniqueConstraint(
fields=["batch", "relative_path"],
name="uq_ra_item_batch_relative_path",
)
]
indexes = [
models.Index(fields=["batch", "file_index"], name="idx_ra_item_batch_index"),
models.Index(fields=["batch", "statistics_status"], name="idx_ra_item_batch_status"),
models.Index(fields=["batch", "file_type"], name="idx_ra_item_batch_type"),
]
class WorkflowNodeRun(models.Model):
"""Stores recoverable status for one workflow node."""
class Status(models.TextChoices):
PENDING = "pending", "等待中"
RUNNING = "running", "执行中"
RETRYING = "retrying", "重试中"
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
SKIPPED = "skipped", "跳过"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="node_runs",
)
node_code = models.CharField(max_length=40)
node_name = models.CharField(max_length=80)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.PENDING)
progress = models.PositiveIntegerField(default=0)
message = models.TextField(blank=True, default="")
started_at = models.DateTimeField(null=True, blank=True)
finished_at = models.DateTimeField(null=True, blank=True)
class Meta:
db_table = "ra_workflow_node_run"
constraints = [
models.UniqueConstraint(fields=["batch", "node_code"], name="uq_ra_node_batch_code")
]
indexes = [
models.Index(fields=["batch", "status"], name="idx_ra_node_batch_status"),
]
class WorkflowEvent(models.Model):
"""Persists workflow events for SSE replay and diagnostics."""
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="events",
)
event_type = models.CharField(max_length=40)
payload = models.JSONField(default=dict)
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_workflow_event"
ordering = ["id"]
indexes = [
models.Index(fields=["batch", "id"], name="idx_ra_event_batch_id"),
models.Index(fields=["batch", "created_at"], name="idx_ra_event_batch_created"),
]
class ExportedSummaryFile(models.Model):
"""Stores generated report files for permission-checked download."""
class ExportType(models.TextChoices):
MARKDOWN = "markdown", "Markdown"
EXCEL = "excel", "Excel"
class Status(models.TextChoices):
SUCCESS = "success", "成功"
FAILED = "failed", "失败"
batch = models.ForeignKey(
FileSummaryBatch,
on_delete=models.CASCADE,
related_name="exports",
)
export_type = models.CharField(max_length=20, choices=ExportType.choices)
file_name = models.CharField(max_length=255)
storage_path = models.CharField(max_length=500)
status = models.CharField(max_length=20, choices=Status.choices, default=Status.SUCCESS)
error_message = models.TextField(blank=True, default="")
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
db_table = "ra_exported_summary_file"
ordering = ["-created_at", "-id"]
indexes = [
models.Index(fields=["batch", "export_type"], name="idx_ra_export_batch_type"),
models.Index(fields=["batch", "created_at"], name="idx_ra_export_batch_created"),
]

View File

@@ -3,8 +3,11 @@ from __future__ import annotations
import json
from django.db.models import Q, QuerySet
from django.conf import settings
from django.utils import timezone
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from .file_summary.workflow_trigger import evaluate_file_summary_trigger
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
from .models import Conversation, Message
@@ -88,6 +91,7 @@ def stream_message(conversation: Conversation, content: str):
user_message = append_user_message(conversation, content)
assistant_parts: list[str] = []
trigger = evaluate_file_summary_trigger(conversation, content)
yield sse_event(
"meta",
@@ -99,6 +103,51 @@ def stream_message(conversation: Conversation, content: str):
},
)
if trigger.reason == "missing_attachment":
reply_content = "请先在当前对话右侧上传需要汇总的文件或压缩包,然后再发送自动汇总指令。"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
if trigger.should_start:
batch = create_file_summary_batch(
conversation=conversation,
user=conversation.user,
trigger_message=user_message,
)
start_file_summary_workflow(
batch,
async_run=getattr(settings, "FILE_SUMMARY_ASYNC", True),
)
reply_content = f"已启动文件目录与页数自动汇总工作流,批次号:{batch.batch_no}"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event(
"workflow_started",
{
"workflow_type": "file_summary",
"batch_id": batch.pk,
"batch_no": batch.batch_no,
},
)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
try:
for chunk in stream_reply(conversation, content):
assistant_parts.append(chunk)

37
review_agent/urls.py Normal file
View File

@@ -0,0 +1,37 @@
from django.urls import path
from .file_summary.views import attachment_detail, attachments, batch_events, batch_status, export_download
urlpatterns = [
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/",
attachments,
name="file_summary_attachment_upload",
),
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/",
attachments,
name="file_summary_attachment_list",
),
path(
"api/review-agent/conversations/<int:conversation_id>/attachments/<int:attachment_id>/",
attachment_detail,
name="file_summary_attachment_detail",
),
path(
"api/review-agent/file-summary/<int:batch_id>/status/",
batch_status,
name="file_summary_batch_status",
),
path(
"api/review-agent/file-summary/<int:batch_id>/events/",
batch_events,
name="file_summary_batch_events",
),
path(
"api/review-agent/file-summary/exports/<int:export_id>/download/",
export_download,
name="file_summary_export_download",
),
]

View File

@@ -10,6 +10,7 @@ from .services import (
send_message,
stream_message,
)
from .models import FileAttachment, FileSummaryBatch
@login_required
@@ -49,6 +50,8 @@ def workspace(request: HttpRequest) -> HttpResponse:
"conversations": conversations,
"current_conversation": current,
"messages": current.messages.all() if current else [],
"attachments": FileAttachment.objects.filter(conversation=current).order_by("original_name", "-version_no") if current else [],
"summary_batches": FileSummaryBatch.objects.filter(conversation=current).prefetch_related("node_runs").order_by("-created_at")[:5] if current else [],
},
)

View File

@@ -127,7 +127,7 @@ input:focus {
.workspace {
display: grid;
grid-template-columns: 296px minmax(0, 1fr);
grid-template-columns: 296px minmax(0, 1fr) 340px;
min-height: 100vh;
}
@@ -760,9 +760,176 @@ input:focus {
padding-right: 12px;
}
.summary-panel {
display: grid;
grid-template-rows: auto auto minmax(0, 1fr);
gap: 14px;
min-width: 0;
max-height: 100vh;
padding: 16px;
overflow: auto;
border-left: 1px solid var(--line);
background: #ffffff;
}
.summary-section {
display: grid;
gap: 12px;
padding: 14px;
border: 1px solid var(--line);
border-radius: 8px;
background: var(--panel-soft);
}
.summary-heading,
.summary-subheading,
.workflow-card header {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
}
.summary-heading h2,
.summary-subheading h3 {
margin: 0;
font-size: 16px;
}
.summary-heading span {
color: var(--muted);
font-size: 12px;
}
.upload-dropzone {
display: grid;
place-items: center;
gap: 6px;
min-height: 112px;
padding: 18px;
border: 1px dashed var(--accent);
border-radius: 8px;
background: #f5f9ff;
color: var(--text);
cursor: pointer;
text-align: center;
}
.upload-dropzone.dragging {
border-color: var(--accent-dark);
background: #eaf2ff;
}
.upload-dropzone span,
.upload-status,
.attachment-item span,
.workflow-card em {
color: var(--muted);
font-size: 12px;
}
.upload-status {
margin: 0;
line-height: 1.5;
}
.attachment-list,
.workflow-card-list {
display: grid;
gap: 10px;
}
.attachment-item,
.workflow-card {
display: grid;
gap: 10px;
padding: 12px;
border: 1px solid var(--line);
border-radius: 8px;
background: #ffffff;
}
.attachment-item {
grid-template-columns: minmax(0, 1fr) auto;
align-items: center;
}
.attachment-item strong,
.workflow-card strong {
display: block;
overflow-wrap: anywhere;
font-size: 13px;
}
.attachment-item em,
.workflow-status {
padding: 3px 8px;
border-radius: 999px;
background: #eaf2ff;
color: var(--accent);
font-size: 11px;
font-style: normal;
font-weight: 700;
}
.workflow-card ol {
display: grid;
gap: 8px;
margin: 0;
padding: 0;
list-style: none;
}
.node-status {
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
padding: 8px 0;
border-top: 1px solid var(--line);
font-size: 13px;
}
.status-running,
.status-retrying {
color: var(--accent);
}
.status-success {
color: #047857;
}
.status-failed {
color: var(--danger-text);
}
.panel-empty {
padding: 14px;
border: 1px dashed var(--line);
border-radius: 8px;
color: var(--muted);
text-align: center;
}
.message-bubble table {
width: 100%;
border-collapse: collapse;
font-size: 13px;
}
.message-bubble th,
.message-bubble td {
padding: 8px;
border: 1px solid var(--line);
text-align: left;
vertical-align: top;
}
@media (max-width: 980px) {
.workspace {
grid-template-columns: minmax(0, 1fr);
min-height: 100vh;
overflow: auto;
}
.sidebar {
@@ -815,7 +982,14 @@ input:focus {
}
.chat-stage {
height: calc(100vh - 88px);
min-height: calc(100vh - 88px);
height: auto;
}
.summary-panel {
max-height: none;
border-left: 0;
border-top: 1px solid var(--line);
}
.chat-scroll {
@@ -889,7 +1063,7 @@ input:focus {
width: 20px;
}
.node-dot {
.node-dot {
width: 10px;
height: 10px;
}

View File

@@ -11,6 +11,12 @@
var sendButton = document.getElementById("sendButton");
var conversationIdInput = document.getElementById("conversationIdInput");
var chatStage = document.querySelector(".chat-stage");
var summaryPanel = document.getElementById("summaryPanel");
var uploadDropzone = document.getElementById("uploadDropzone");
var attachmentInput = document.getElementById("attachmentInput");
var attachmentList = document.getElementById("attachmentList");
var uploadStatus = document.getElementById("uploadStatus");
var workflowCardList = document.getElementById("workflowCardList");
var nodeAnchors = [];
if (!workspace) {
@@ -32,7 +38,7 @@
function syncSidebarState() {
if (isMobile()) {
if (workspace.getAttribute("data-sidebar-state") === "collapsed") {
if (workspace.getAttribute("data-sidebar-state") !== "closed") {
workspace.setAttribute("data-sidebar-state", "closed");
}
} else if (workspace.getAttribute("data-sidebar-state") === "closed") {
@@ -147,6 +153,13 @@
return escapeHtml(text).replace(/\n/g, "<br>");
}
function renderAssistantContent(text) {
if (window.marked && window.DOMPurify) {
return window.DOMPurify.sanitize(window.marked.parse(text || ""));
}
return nl2br(text || "");
}
function scrollChatToBottom() {
if (chatScroll) {
chatScroll.scrollTop = chatScroll.scrollHeight;
@@ -169,7 +182,7 @@
bubble.className = "message-bubble";
var text = document.createElement("p");
text.innerHTML = nl2br(content);
text.innerHTML = role === "assistant" ? renderAssistantContent(content) : nl2br(content);
bubble.appendChild(text);
article.appendChild(avatar);
@@ -271,6 +284,149 @@
}
}
function currentConversationId() {
return conversationIdInput ? conversationIdInput.value : "";
}
function templateUrl(attributeName, token, value) {
if (!summaryPanel) {
return "";
}
return summaryPanel.getAttribute(attributeName).replace(token, value);
}
function renderAttachments(attachments) {
if (!attachmentList) {
return;
}
attachmentList.innerHTML = "";
if (!attachments.length) {
attachmentList.innerHTML = '<div class="panel-empty">暂无附件</div>';
return;
}
attachments.forEach(function (attachment) {
var item = document.createElement("div");
item.className = "attachment-item";
item.setAttribute("data-attachment-id", attachment.id);
item.innerHTML =
"<div><strong>" +
escapeHtml(attachment.original_name) +
"</strong><span>v" +
attachment.version_no +
" · " +
attachment.file_size +
" bytes · " +
escapeHtml(attachment.upload_status) +
"</span></div>" +
(attachment.is_active ? "<em>active</em>" : "");
attachmentList.appendChild(item);
});
}
async function refreshAttachments() {
var conversationId = currentConversationId();
if (!conversationId || !summaryPanel) {
return;
}
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId));
if (!response.ok) {
return;
}
var payload = await response.json();
renderAttachments(payload.attachments || []);
}
async function uploadFiles(files) {
var conversationId = currentConversationId();
if (!conversationId || !files.length || !summaryPanel) {
if (uploadStatus) {
uploadStatus.textContent = "请先创建或选择一个对话。";
}
return;
}
var data = new FormData();
Array.prototype.forEach.call(files, function (file) {
data.append("files", file);
});
var csrf = new FormData(composer).get("csrfmiddlewaretoken");
if (uploadStatus) {
uploadStatus.textContent = "正在上传 " + files.length + " 个文件...";
}
try {
var response = await fetch(templateUrl("data-attachment-url-template", "__conversation_id__", conversationId), {
method: "POST",
headers: { "X-CSRFToken": csrf },
body: data,
});
if (!response.ok) {
throw new Error("上传失败。");
}
var payload = await response.json();
renderAttachments(payload.attachments || []);
if (uploadStatus) {
uploadStatus.textContent = "上传完成,可发送自动汇总提示词。";
}
await refreshAttachments();
} catch (error) {
if (uploadStatus) {
uploadStatus.textContent = "上传失败,请重试。";
}
}
}
function ensureWorkflowCard(batch) {
if (!workflowCardList || !batch) {
return null;
}
var empty = workflowCardList.querySelector(".panel-empty");
if (empty) {
empty.remove();
}
var card = workflowCardList.querySelector('[data-batch-id="' + batch.batch_id + '"]');
if (card) {
return card;
}
card = document.createElement("article");
card.className = "workflow-card";
card.setAttribute("data-batch-id", batch.batch_id);
card.innerHTML =
"<header><strong>" +
escapeHtml(batch.batch_no || "文件汇总") +
'</strong><span class="workflow-status status-running">running</span></header><ol></ol>';
workflowCardList.prepend(card);
return card;
}
async function refreshWorkflowCard(batchId) {
if (!summaryPanel || !batchId) {
return;
}
var response = await fetch(templateUrl("data-status-url-template", "__batch_id__", batchId));
if (!response.ok) {
return;
}
var payload = await response.json();
var card = ensureWorkflowCard({
batch_id: payload.batch.id,
batch_no: payload.batch.batch_no,
});
if (!card) {
return;
}
var status = card.querySelector(".workflow-status");
status.textContent = payload.batch.status;
status.className = "workflow-status status-" + payload.batch.status;
var list = card.querySelector("ol");
list.innerHTML = "";
(payload.nodes || []).forEach(function (node) {
var item = document.createElement("li");
item.className = "node-status status-" + node.status;
item.setAttribute("data-node-code", node.node_code);
item.innerHTML = "<span>" + escapeHtml(node.node_name) + "</span><em>" + node.progress + "%</em>";
list.appendChild(item);
});
}
async function streamChat(event) {
event.preventDefault();
if (!composer || !promptInput || !sendButton || !chatStage) {
@@ -356,11 +512,14 @@
}
} else if (eventName === "chunk") {
assistantText += payload.delta || "";
assistantMessage.text.innerHTML = nl2br(assistantText);
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
scrollChatToBottom();
} else if (eventName === "error") {
assistantText = payload.message || "模型调用失败。";
assistantMessage.text.innerHTML = nl2br(assistantText);
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
} else if (eventName === "workflow_started") {
ensureWorkflowCard(payload);
refreshWorkflowCard(payload.batch_id);
} else if (eventName === "done") {
if (payload.assistant_message_id) {
assistantMessage.article.id = "message-" + payload.assistant_message_id;
@@ -400,6 +559,28 @@
composer.addEventListener("submit", streamChat);
}
if (uploadDropzone && attachmentInput) {
uploadDropzone.addEventListener("click", function () {
attachmentInput.click();
});
uploadDropzone.addEventListener("dragover", function (event) {
event.preventDefault();
uploadDropzone.classList.add("dragging");
});
uploadDropzone.addEventListener("dragleave", function () {
uploadDropzone.classList.remove("dragging");
});
uploadDropzone.addEventListener("drop", function (event) {
event.preventDefault();
uploadDropzone.classList.remove("dragging");
uploadFiles(event.dataTransfer.files);
});
attachmentInput.addEventListener("change", function () {
uploadFiles(attachmentInput.files);
attachmentInput.value = "";
});
}
window.addEventListener("resize", syncSidebarState);
syncSidebarState();
})();

View File

@@ -164,9 +164,77 @@
</div>
</section>
</section>
<aside
class="summary-panel"
id="summaryPanel"
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
>
<section class="summary-section upload-section">
<div class="summary-heading">
<h2>文件汇总</h2>
<span>当前对话</span>
</div>
<div class="upload-dropzone" id="uploadDropzone" tabindex="0" role="button">
<input id="attachmentInput" type="file" multiple hidden>
<strong>拖拽文件到这里</strong>
<span>支持多文件、zip、7z、rar</span>
</div>
<p class="upload-status" id="uploadStatus">上传后发送“自动汇总文件目录与页数”启动工作流。</p>
</section>
<section class="summary-section attachment-section">
<div class="summary-subheading">
<h3>附件</h3>
</div>
<div class="attachment-list" id="attachmentList">
{% for attachment in attachments %}
<div class="attachment-item" data-attachment-id="{{ attachment.pk }}">
<div>
<strong>{{ attachment.original_name }}</strong>
<span>v{{ attachment.version_no }} · {{ attachment.file_size }} bytes · {{ attachment.upload_status }}</span>
</div>
{% if attachment.is_active %}<em>active</em>{% endif %}
</div>
{% empty %}
<div class="panel-empty">暂无附件</div>
{% endfor %}
</div>
</section>
<section class="summary-section workflow-section">
<div class="summary-subheading">
<h3>工作流</h3>
</div>
<div class="workflow-card-list" id="workflowCardList">
{% for batch in summary_batches %}
<article class="workflow-card" data-batch-id="{{ batch.pk }}">
<header>
<strong>{{ batch.batch_no }}</strong>
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
</header>
<ol>
{% for node in batch.node_runs.all %}
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
<span>{{ node.node_name }}</span>
<em>{{ node.progress }}%</em>
</li>
{% endfor %}
</ol>
</article>
{% empty %}
<div class="panel-empty">暂无工作流</div>
{% endfor %}
</div>
</section>
</aside>
</main>
{% endblock %}
{% block scripts %}
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.6/dist/purify.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.12/marked.min.js"></script>
<script src="{% static 'js/app.js' %}"></script>
{% endblock %}

View File

@@ -0,0 +1,25 @@
from zipfile import ZipFile
import pytest
from review_agent.file_summary.services.archive import extract_archive
def test_extract_zip_preserves_safe_paths(tmp_path):
archive_path = tmp_path / "safe.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("dir/a.txt", "content")
target = tmp_path / "out"
extracted = extract_archive(archive_path, target)
assert extracted == [target / "dir" / "a.txt"]
assert (target / "dir" / "a.txt").read_text(encoding="utf-8") == "content"
def test_extract_zip_rejects_path_traversal(tmp_path):
archive_path = tmp_path / "evil.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("../evil.txt", "bad")
with pytest.raises(ValueError):
extract_archive(archive_path, tmp_path / "out")

View File

@@ -0,0 +1,22 @@
import pytest
from django.urls import reverse
from review_agent.models import Conversation
pytestmark = pytest.mark.django_db
def test_workspace_renders_summary_panel(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
client.force_login(user)
response = client.get(f"{reverse('home')}?conversation={conversation.pk}")
assert response.status_code == 200
content = response.content.decode("utf-8")
assert 'id="summaryPanel"' in content
assert 'id="uploadDropzone"' in content
assert 'id="workflowCardList"' in content
assert "自动汇总文件目录与页数" in content

View File

@@ -0,0 +1,24 @@
from pathlib import Path
import pytest
from review_agent.file_summary.services.inventory import scan_files_to_items
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_scan_files_to_items_preserves_relative_paths(tmp_path, django_user_model):
root = tmp_path / "work"
(root / "a").mkdir(parents=True)
(root / "a" / "one.pdf").write_bytes(b"pdf")
(root / "two.txt").write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-I")
items = scan_files_to_items(batch=batch, roots=[root])
assert [item.relative_path for item in items] == ["a/one.pdf", "two.txt"]
assert FileSummaryItem.objects.filter(batch=batch).count() == 2
assert items[0].statistics_status == FileSummaryItem.StatisticsStatus.SKIPPED

View File

@@ -0,0 +1,113 @@
import pytest
from django.contrib.auth import get_user_model
from django.db import IntegrityError, transaction
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
FileSummaryItem,
)
pytestmark = pytest.mark.django_db
def create_user(username="u1"):
return get_user_model().objects.create_user(username=username, password="pass")
def test_attachment_versions_are_unique_per_conversation_and_name():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
first = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=1,
is_active=False,
storage_path="media/a.docx",
file_size=10,
)
second = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/b.docx",
file_size=12,
)
assert first.version_no == 1
assert second.version_no == 2
with pytest.raises(IntegrityError), transaction.atomic():
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
version_no=2,
storage_path="media/c.docx",
file_size=14,
)
def test_batch_attachment_and_item_unique_constraints():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="资料.docx",
storage_path="media/a.docx",
file_size=10,
)
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-001",
)
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
with pytest.raises(IntegrityError), transaction.atomic():
FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="资料.docx",
file_type="docx",
relative_path="资料.docx",
storage_path="media/a.docx",
)
def test_exported_file_traces_to_user_and_conversation():
user = create_user()
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-002",
)
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name="summary.md",
storage_path="media/summary.md",
)
assert exported.batch.user == user
assert exported.batch.conversation == conversation

View File

@@ -0,0 +1,66 @@
import pytest
from docx import Document
from openpyxl import Workbook
from pptx import Presentation
from review_agent.file_summary.services.page_count import count_document_pages
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
from review_agent.file_summary.skills.base import WorkflowContext
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_count_document_pages_for_office_formats(tmp_path):
docx_path = tmp_path / "a.docx"
Document().save(docx_path)
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.create_sheet("第二页")
workbook.save(xlsx_path)
pptx_path = tmp_path / "a.pptx"
presentation = Presentation()
presentation.slides.add_slide(presentation.slide_layouts[6])
presentation.save(pptx_path)
assert count_document_pages(docx_path).status in {"success", "uncertain"}
assert count_document_pages(xlsx_path).page_count == 2
assert count_document_pages(pptx_path).page_count == 1
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
xlsx_path = tmp_path / "a.xlsx"
workbook = Workbook()
workbook.save(xlsx_path)
txt_path = tmp_path / "a.txt"
txt_path.write_text("x", encoding="utf-8")
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
xlsx_item = FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="a.xlsx",
file_type="xlsx",
relative_path="a.xlsx",
storage_path=str(xlsx_path),
)
txt_item = FileSummaryItem.objects.create(
batch=batch,
file_index=2,
file_name="a.txt",
file_type="txt",
relative_path="a.txt",
storage_path=str(txt_path),
)
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
xlsx_item.refresh_from_db()
txt_item.refresh_from_db()
assert result.success is True
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED

View File

@@ -0,0 +1,29 @@
import pytest
from review_agent.file_summary.services.product_detect import detect_product_name
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
pytestmark = pytest.mark.django_db
def test_detect_product_name_from_top_level_directory(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="新对话 06-06")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-D")
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="说明书.docx",
file_type="docx",
relative_path="甲型试剂盒/说明书.docx",
storage_path="x",
)
product_name = detect_product_name(batch)
batch.refresh_from_db()
conversation.refresh_from_db()
assert product_name == "甲型试剂盒"
assert batch.product_name == "甲型试剂盒"
assert conversation.title == "甲型试剂盒-文件汇总"

View File

@@ -0,0 +1,82 @@
from pathlib import Path
import pytest
from openpyxl import load_workbook
from review_agent.file_summary.services.export_excel import generate_excel_export
from review_agent.file_summary.services.report import generate_markdown_report
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem, Message
pytestmark = pytest.mark.django_db
def make_batch(tmp_path, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-R",
work_dir=str(tmp_path),
total_files=1,
success_files=1,
total_pages=2,
)
FileSummaryItem.objects.create(
batch=batch,
file_index=1,
file_name="a.xlsx",
file_type="xlsx",
relative_path="a.xlsx",
storage_path=str(tmp_path / "a.xlsx"),
page_count=2,
statistics_status=FileSummaryItem.StatisticsStatus.SUCCESS,
)
return batch
def test_generate_markdown_report_creates_export_and_summary(tmp_path, django_user_model):
batch = make_batch(tmp_path, django_user_model)
exported, summary = generate_markdown_report(batch)
assert exported.export_type == "markdown"
assert Path(exported.storage_path).exists()
assert "| 序号 | 目录层级 | 文件名 | 类型 | 页数 | 状态 | 异常说明 |" in summary
assert "a.xlsx" in Path(exported.storage_path).read_text(encoding="utf-8")
def test_generate_excel_export_contains_summary_and_items(tmp_path, django_user_model):
batch = make_batch(tmp_path, django_user_model)
exported = generate_excel_export(batch)
workbook = load_workbook(exported.storage_path)
assert workbook.sheetnames == ["汇总信息", "文件明细"]
assert workbook["文件明细"]["C2"].value == "a.xlsx"
def test_workflow_report_node_writes_assistant_message(tmp_path, settings, django_user_model):
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.models import FileAttachment
settings.MEDIA_ROOT = tmp_path
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
file_path = tmp_path / "a.xlsx"
file_path.write_bytes(b"not a real workbook")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.txt",
storage_path=str(file_path),
file_size=file_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
batch.work_dir = str(tmp_path / "batch")
batch.save(update_fields=["work_dir"])
start_file_summary_workflow(batch, async_run=False)
assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists()

View File

@@ -0,0 +1,27 @@
import pytest
from review_agent.file_summary.skills.base import BaseSkill, SkillResult, WorkflowContext
from review_agent.file_summary.skills.registry import SkillRegistry
class EchoSkill(BaseSkill):
name = "echo"
def run(self, context):
return SkillResult(success=True, data={"batch_id": context.batch.id})
@pytest.mark.django_db
def test_skill_registry_executes_registered_skill(django_user_model):
from review_agent.models import Conversation, FileSummaryBatch
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-X")
registry = SkillRegistry()
registry.register(EchoSkill())
result = registry.execute("echo", WorkflowContext(batch=batch))
assert result.success is True
assert result.data == {"batch_id": batch.id}

View File

@@ -0,0 +1,48 @@
from django.core.files.uploadedfile import SimpleUploadedFile
import pytest
from review_agent.file_summary.storage import save_uploaded_attachment
from review_agent.models import Conversation, FileAttachment
pytestmark = pytest.mark.django_db
def test_save_uploaded_attachment_versions_same_name(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
first = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("资料.docx", b"first"),
)
second = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("资料.docx", b"second"),
)
first.refresh_from_db()
assert first.version_no == 1
assert first.is_active is False
assert second.version_no == 2
assert second.is_active is True
assert FileAttachment.objects.filter(conversation=conversation).count() == 2
assert (tmp_path / second.storage_path).read_bytes() == b"second"
def test_save_uploaded_attachment_rejects_path_traversal(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
attachment = save_uploaded_attachment(
conversation=conversation,
user=user,
uploaded_file=SimpleUploadedFile("../资料.docx", b"content"),
)
assert ".." not in attachment.storage_path
assert (tmp_path / attachment.storage_path).exists()

View File

@@ -0,0 +1,32 @@
import pytest
from review_agent.file_summary.workflow_trigger import evaluate_file_summary_trigger
from review_agent.models import Conversation, FileAttachment
pytestmark = pytest.mark.django_db
def test_trigger_matches_keywords_only_when_active_attachment_exists(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
no_file = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
assert no_file.should_start is False
assert no_file.reason == "missing_attachment"
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
matched = evaluate_file_summary_trigger(conversation, "请自动汇总文件目录与页数")
assert matched.should_start is True
assert matched.workflow_type == "file_summary"
normal = evaluate_file_summary_trigger(conversation, "你好,帮我解释法规")
assert normal.should_start is False
assert normal.reason == "not_matched"

View File

@@ -0,0 +1,98 @@
from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
import pytest
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, FileSummaryBatch
pytestmark = pytest.mark.django_db
def test_upload_attachments_requires_conversation_owner(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
client.force_login(other)
response = client.post(
reverse("file_summary_attachment_upload", args=[conversation.pk]),
{"files": [SimpleUploadedFile("a.docx", b"a")]},
)
assert response.status_code == 404
def test_attachment_api_requires_login(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
assert response.status_code == 302
def test_upload_and_list_current_conversation_attachments(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
client.force_login(user)
upload_response = client.post(
reverse("file_summary_attachment_upload", args=[conversation.pk]),
{
"files": [
SimpleUploadedFile("a.docx", b"a", content_type="application/docx"),
SimpleUploadedFile("b.zip", b"b", content_type="application/zip"),
]
},
)
list_response = client.get(reverse("file_summary_attachment_list", args=[conversation.pk]))
assert upload_response.status_code == 200
assert upload_response.json()["attachments"][0]["original_name"] == "a.docx"
assert len(list_response.json()["attachments"]) == 2
def test_delete_attachment_is_logical_and_scoped(client, settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
client.force_login(user)
response = client.delete(reverse("file_summary_attachment_detail", args=[conversation.pk, attachment.pk]))
attachment.refresh_from_db()
assert response.status_code == 200
assert attachment.upload_status == FileAttachment.UploadStatus.DELETED
assert attachment.is_active is False
def test_export_download_requires_batch_owner(client, tmp_path, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
batch = FileSummaryBatch.objects.create(conversation=conversation, user=owner, batch_no="FS-DL")
report_path = tmp_path / "summary.md"
report_path.write_text("ok", encoding="utf-8")
exported = ExportedSummaryFile.objects.create(
batch=batch,
export_type=ExportedSummaryFile.ExportType.MARKDOWN,
file_name="summary.md",
storage_path=str(report_path),
)
client.force_login(other)
denied = client.get(reverse("file_summary_export_download", args=[exported.pk]))
assert denied.status_code == 404
client.force_login(owner)
allowed = client.get(reverse("file_summary_export_download", args=[exported.pk]))
assert allowed.status_code == 200

View File

@@ -0,0 +1,102 @@
import pytest
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowEvent,
WorkflowNodeRun,
)
from review_agent.services import stream_message
pytestmark = pytest.mark.django_db
def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
active = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="old.docx",
is_active=False,
storage_path="x/old.docx",
file_size=1,
)
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
assert batch.status == FileSummaryBatch.Status.PENDING
assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active
active.refresh_from_db()
assert active.upload_status == FileAttachment.UploadStatus.BOUND
assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists()
def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.status == FileSummaryBatch.Status.SUCCESS
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists()
def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model):
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="a.docx",
storage_path="x/a.docx",
file_size=1,
)
frames = list(stream_message(conversation, "请自动汇总文件目录与页数"))
joined = "".join(frames)
assert "workflow_started" in joined
assert "\"workflow_type\": \"file_summary\"" in joined
assert FileSummaryBatch.objects.filter(conversation=conversation).exists()
def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
def fake_stream_reply(conversation, content):
yield "普通回复"
monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply)
frames = list(stream_message(conversation, "你好"))
joined = "".join(frames)
assert "普通回复" in joined
assert "workflow_started" not in joined