fix(file-summary): 同步压缩包工作流状态与结果刷新

This commit is contained in:
2026-06-06 19:45:49 +08:00
parent daa0642142
commit 7e561ea213
12 changed files with 560 additions and 32 deletions

View File

@@ -1,5 +1,6 @@
from __future__ import annotations
import logging
import subprocess
from pathlib import Path
from zipfile import ZipFile
@@ -9,6 +10,8 @@ import py7zr
ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"}
logger = logging.getLogger("review_agent.file_summary.services.archive")
def _ensure_inside_target(path: Path, target_dir: Path) -> None:
target = target_dir.resolve()
@@ -63,6 +66,51 @@ def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]:
def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]:
try:
extracted = _extract_rar_with_libarchive(archive_path, target_dir)
except Exception as exc:
logger.warning(
"RAR libarchive extract failed, falling back to 7z",
extra={"archive_path": str(archive_path), "target_dir": str(target_dir), "error": str(exc)},
)
else:
if extracted:
return extracted
logger.info(
"RAR libarchive extract produced no files, falling back to 7z",
extra={"archive_path": str(archive_path), "target_dir": str(target_dir)},
)
return _extract_rar_with_7z(archive_path, target_dir)
def _extract_rar_with_libarchive(archive_path: Path, target_dir: Path) -> list[Path]:
try:
import libarchive
except ImportError as exc:
raise RuntimeError("未安装 libarchive跳过 Python RAR 解压。") from exc
extracted: list[Path] = []
with libarchive.file_reader(str(archive_path)) as entries:
for entry in entries:
destination = _safe_member_path(target_dir, entry.pathname)
if entry.isdir:
destination.mkdir(parents=True, exist_ok=True)
continue
if not entry.isfile:
logger.info(
"RAR libarchive skipped non-regular entry",
extra={"archive_path": str(archive_path), "entry": entry.pathname},
)
continue
destination.parent.mkdir(parents=True, exist_ok=True)
with destination.open("wb") as target:
for block in entry.get_blocks():
target.write(block)
extracted.append(destination)
return extracted
def _extract_rar_with_7z(archive_path: Path, target_dir: Path) -> list[Path]:
result = subprocess.run(
["7z", "x", f"-o{target_dir}", str(archive_path), "-y"],
check=False,

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
import logging
from pathlib import Path
import re
from review_agent.models import FileSummaryBatchAttachment
@@ -13,34 +14,56 @@ from .base import BaseSkill, SkillResult, WorkflowContext
logger = logging.getLogger("review_agent.file_summary.skills.archive_extract")
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
stem = Path(binding.attachment.original_name).stem or "archive"
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
return f"{binding.attachment_id}_{safe_stem}"
class ArchiveExtractSkill(BaseSkill):
name = "archive_extract"
def run(self, context: WorkflowContext) -> SkillResult:
extracted_count = 0
target_dir = Path(context.batch.work_dir or "")
if not target_dir:
logger.info(
"Archive extract skipped without work dir",
if not context.batch.work_dir:
message = "批次工作目录为空,无法解压压缩包。"
logger.error(
"Archive extract failed without work dir",
extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no},
)
return SkillResult(success=True, data={"extracted_count": 0})
return SkillResult(success=False, message=message, data={"extracted_count": 0})
target_root = Path(context.batch.work_dir)
archive_count = 0
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
path = resolve_storage_path(binding.attachment.storage_path)
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
continue
archive_count += 1
target_dir = target_root / "extracted" / _safe_archive_dir_name(binding)
logger.info(
"Archive extract started",
extra={
"batch_id": context.batch.pk,
"attachment_id": binding.attachment_id,
"path": str(path),
"target_dir": str(target_dir),
},
)
extracted_count += len(extract_archive(path, target_dir))
if archive_count and extracted_count == 0:
message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。"
logger.warning(
"Archive extract produced no files",
extra={"batch_id": context.batch.pk, "archive_count": archive_count},
)
return SkillResult(success=False, message=message, data={"extracted_count": 0})
logger.info(
"Archive extract finished",
extra={"batch_id": context.batch.pk, "extracted_count": extracted_count},
extra={
"batch_id": context.batch.pk,
"archive_count": archive_count,
"extracted_count": extracted_count,
},
)
return SkillResult(success=True, data={"extracted_count": extracted_count})

View File

@@ -2,10 +2,12 @@ from __future__ import annotations
import logging
from pathlib import Path
import re
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.archive import ARCHIVE_EXTENSIONS
from ..services.inventory import scan_files_to_items
from .base import BaseSkill, SkillResult, WorkflowContext
@@ -13,14 +15,44 @@ from .base import BaseSkill, SkillResult, WorkflowContext
logger = logging.getLogger("review_agent.file_summary.skills.file_inventory")
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
stem = Path(binding.attachment.original_name).stem or "archive"
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
return f"{binding.attachment_id}_{safe_stem}"
class FileInventorySkill(BaseSkill):
name = "file_inventory"
def run(self, context: WorkflowContext) -> SkillResult:
roots = [
resolve_storage_path(binding.attachment.storage_path)
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch)
]
roots: list[Path] = []
missing_extract_roots: list[str] = []
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
original_path = resolve_storage_path(binding.attachment.storage_path)
is_archive = original_path.suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
if not is_archive:
roots.append(original_path)
continue
extracted_root = (
Path(context.batch.work_dir)
/ "extracted"
/ _safe_archive_dir_name(binding)
)
if extracted_root.exists():
roots.append(extracted_root)
else:
missing_extract_roots.append(str(extracted_root))
if missing_extract_roots:
message = "压缩包解压目录不存在,无法扫描解压后的文件。"
logger.warning(
"File inventory missing extracted roots",
extra={
"batch_id": context.batch.pk,
"missing_extract_roots": missing_extract_roots,
},
)
return SkillResult(success=False, message=message)
logger.info(
"File inventory started",
extra={

View File

@@ -5,7 +5,7 @@ from pathlib import Path
from django.http import FileResponse, Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, Message
from review_agent.models import FileSummaryBatch, WorkflowEvent
from .events import serialize_event
@@ -90,6 +90,47 @@ def attachment_detail(request, conversation_id: int, attachment_id: int):
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
def _serialize_message(message: Message) -> dict[str, object]:
return {
"id": message.pk,
"role": message.role,
"content": message.content,
"created_at": message.created_at.isoformat(),
}
@require_http_methods(["GET"])
@login_required
def conversation_messages(request, conversation_id: int):
conversation = _conversation_for_user(request.user, conversation_id)
after = request.GET.get("after") or "0"
try:
after_id = int(after)
except ValueError:
after_id = 0
messages = list(conversation.messages.filter(pk__gt=after_id).order_by("id"))
latest_message_id = (
conversation.messages.order_by("-id").values_list("id", flat=True).first() or 0
)
logger.info(
"Conversation incremental messages requested",
extra={
"conversation_id": conversation.pk,
"after_id": after_id,
"message_count": len(messages),
"latest_message_id": latest_message_id,
},
)
return JsonResponse(
{
"conversation_id": conversation.pk,
"latest_message_id": latest_message_id,
"messages": [_serialize_message(message) for message in messages],
}
)
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
@@ -107,6 +148,7 @@ def batch_status(request, batch_id: int):
"success_files": batch.success_files,
"failed_files": batch.failed_files,
"total_pages": batch.total_pages,
"error_message": batch.error_message,
},
"nodes": [
{

View File

@@ -1,9 +1,11 @@
from __future__ import annotations
import logging
from pathlib import Path
from threading import Thread
from uuid import uuid4
from django.conf import settings
from django.db import transaction
from django.utils import timezone
@@ -17,6 +19,7 @@ from review_agent.models import (
)
from .events import record_event
from .services.archive import ARCHIVE_EXTENSIONS
from .skills.archive_extract import ArchiveExtractSkill
from .skills.base import WorkflowContext
from .skills.document_page_count import DocumentPageCountSkill
@@ -54,6 +57,10 @@ def build_batch_no() -> str:
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
def build_batch_work_dir(batch_no: str) -> Path:
return Path(settings.MEDIA_ROOT) / "file_summary" / "work" / batch_no
@transaction.atomic
def create_file_summary_batch(
*,
@@ -78,15 +85,29 @@ def create_file_summary_batch(
},
)
batch_no = build_batch_no()
work_dir = build_batch_work_dir(batch_no)
work_dir.mkdir(parents=True, exist_ok=True)
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
batch_no=build_batch_no(),
batch_no=batch_no,
work_dir=str(work_dir),
)
for attachment in active_attachments:
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
source_role = (
FileSummaryBatchAttachment.SourceRole.ARCHIVE
if Path(attachment.original_name).suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
else FileSummaryBatchAttachment.SourceRole.MULTI_FILE
)
FileSummaryBatchAttachment.objects.create(
batch=batch,
attachment=attachment,
source_role=source_role,
)
attachment.upload_status = FileAttachment.UploadStatus.BOUND
attachment.save(update_fields=["upload_status"])
@@ -152,7 +173,7 @@ class WorkflowExecutor:
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
skill_name = next(
@@ -160,6 +181,7 @@ class WorkflowExecutor:
"",
)
if skill_name:
try:
result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch))
if not result.success:
logger.warning(
@@ -172,6 +194,22 @@ class WorkflowExecutor:
},
)
raise RuntimeError(result.message or f"{node.node_name}执行失败")
except Exception as exc:
node.status = WorkflowNodeRun.Status.FAILED
node.finished_at = timezone.now()
node.message = str(exc)
node.save(update_fields=["status", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{
"node_code": node.node_code,
"status": node.status,
"progress": node.progress,
"message": node.message,
},
)
raise
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
@@ -181,7 +219,7 @@ class WorkflowExecutor:
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
{"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message},
)
logger.info(
"Workflow node finished",

View File

@@ -1,6 +1,13 @@
from django.urls import path
from .file_summary.views import attachment_detail, attachments, batch_events, batch_status, export_download
from .file_summary.views import (
attachment_detail,
attachments,
batch_events,
batch_status,
conversation_messages,
export_download,
)
urlpatterns = [
@@ -19,6 +26,11 @@ urlpatterns = [
attachment_detail,
name="file_summary_attachment_detail",
),
path(
"api/review-agent/conversations/<int:conversation_id>/messages/",
conversation_messages,
name="review_agent_conversation_messages",
),
path(
"api/review-agent/file-summary/<int:batch_id>/status/",
batch_status,

View File

@@ -882,7 +882,9 @@ input:focus {
.upload-dropzone span,
.upload-status,
.attachment-item span,
.workflow-card em {
.workflow-card em,
.workflow-card small,
.workflow-error {
color: var(--muted);
font-size: 12px;
}
@@ -949,6 +951,28 @@ input:focus {
font-size: 13px;
}
.node-status div {
display: grid;
min-width: 0;
gap: 2px;
}
.node-status span,
.node-status small,
.workflow-error {
overflow-wrap: anywhere;
word-break: break-word;
}
.workflow-error {
margin: 0;
padding: 8px 10px;
border-radius: 6px;
background: #fff1f0;
color: #b42318;
line-height: 1.5;
}
.status-running,
.status-retrying {
color: var(--accent);

View File

@@ -20,6 +20,7 @@
var nodeAnchors = [];
var workflowPollingTimers = {};
var WORKFLOW_POLL_INTERVAL_MS = 1500;
var latestMessageId = 0;
if (!workspace) {
return;
@@ -52,6 +53,15 @@
nodeAnchors = Array.prototype.slice.call(document.querySelectorAll(".node-anchor"));
}
function syncLatestMessageIdFromDom() {
document.querySelectorAll(".message[data-message-id]").forEach(function (message) {
var id = parseInt(message.getAttribute("data-message-id"), 10);
if (!Number.isNaN(id)) {
latestMessageId = Math.max(latestMessageId, id);
}
});
}
if (sidebarToggle) {
sidebarToggle.addEventListener("click", toggleSidebar);
}
@@ -271,6 +281,9 @@
var article = document.createElement("article");
article.className = "message " + role;
article.id = messageId;
if (typeof messageId === "number") {
article.setAttribute("data-message-id", messageId);
}
if (label) {
article.setAttribute("data-node-label", label);
}
@@ -295,6 +308,48 @@
return { article: article, bubble: bubble, text: text };
}
function appendConversationMessage(message) {
if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) {
return;
}
var label = message.role === "assistant" ? "AI " : "用户 ";
label += document.querySelectorAll(".message").length + 1;
var created = createMessage(message.role, message.content || "", "message-" + message.id, label);
created.article.setAttribute("data-message-id", message.id);
latestMessageId = Math.max(latestMessageId, message.id);
if (message.role === "user") {
appendNode(created.article.id, label, true);
}
}
async function refreshConversationMessages() {
var conversationId = currentConversationId();
if (!conversationId || !summaryPanel) {
return;
}
var url = templateUrl("data-message-url-template", "__conversation_id__", conversationId);
if (!url) {
return;
}
try {
var response = await fetch(url + "?after=" + latestMessageId, { cache: "no-store" });
if (!response.ok) {
return;
}
var payload = await response.json();
(payload.messages || []).forEach(appendConversationMessage);
if (payload.latest_message_id) {
latestMessageId = Math.max(latestMessageId, payload.latest_message_id);
}
syncNodeRailVisibility();
bindNodeAnchorClicks();
setActiveNode();
scrollChatToBottom();
} catch (error) {
console.error("Conversation message refresh failed", error);
}
}
function appendNode(targetId, title, isLatest) {
if (!nodeRail) {
return;
@@ -530,13 +585,31 @@
var status = card.querySelector(".workflow-status");
status.textContent = payload.batch.status;
status.className = "workflow-status status-" + payload.batch.status;
var batchError = card.querySelector(".workflow-error");
if (payload.batch.error_message) {
if (!batchError) {
batchError = document.createElement("p");
batchError.className = "workflow-error";
card.insertBefore(batchError, card.querySelector("ol"));
}
batchError.textContent = payload.batch.error_message;
} else if (batchError) {
batchError.remove();
}
var list = card.querySelector("ol");
list.innerHTML = "";
(payload.nodes || []).forEach(function (node) {
var item = document.createElement("li");
item.className = "node-status status-" + node.status;
item.setAttribute("data-node-code", node.node_code);
item.innerHTML = "<span>" + escapeHtml(node.node_name) + "</span><em>" + node.progress + "%</em>";
item.innerHTML =
'<div><span>' +
escapeHtml(node.node_name) +
"</span>" +
(node.message ? "<small>" + escapeHtml(node.message) + "</small>" : "") +
"</div><em>" +
node.progress +
"%</em>";
list.appendChild(item);
});
return payload.batch.status || "";
@@ -561,11 +634,13 @@
workflowPollingTimers[batchId] = window.setInterval(async function () {
var status = await refreshWorkflowCard(batchId);
if (isWorkflowTerminalStatus(status)) {
refreshConversationMessages();
stopWorkflowPolling(batchId);
}
}, WORKFLOW_POLL_INTERVAL_MS);
refreshWorkflowCard(batchId).then(function (status) {
if (isWorkflowTerminalStatus(status)) {
refreshConversationMessages();
stopWorkflowPolling(batchId);
}
});
@@ -666,6 +741,11 @@
return;
}
if (eventName === "meta") {
if (payload.user_message_id) {
userMessage.article.id = "message-" + payload.user_message_id;
userMessage.article.setAttribute("data-message-id", payload.user_message_id);
latestMessageId = Math.max(latestMessageId, payload.user_message_id);
}
if (payload.conversation_id) {
conversationIdInput.value = payload.conversation_id;
window.history.replaceState({}, "", "/?conversation=" + payload.conversation_id);
@@ -678,6 +758,10 @@
assistantText += payload.delta || "";
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
scrollChatToBottom();
} else if (eventName === "replace") {
assistantText = payload.content || "";
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
scrollChatToBottom();
} else if (eventName === "error") {
assistantText = payload.message || "模型调用失败。";
assistantMessage.text.innerHTML = renderAssistantContent(assistantText);
@@ -687,6 +771,8 @@
} else if (eventName === "done") {
if (payload.assistant_message_id) {
assistantMessage.article.id = "message-" + payload.assistant_message_id;
assistantMessage.article.setAttribute("data-message-id", payload.assistant_message_id);
latestMessageId = Math.max(latestMessageId, payload.assistant_message_id);
}
if (payload.title) {
setConversationTitle(payload.title);
@@ -711,7 +797,24 @@
}
}
function bindPromptKeyboardShortcuts() {
if (!promptInput || !composer) {
return;
}
promptInput.addEventListener("keydown", function (event) {
if (event.key === "Enter" && !event.ctrlKey) {
event.preventDefault();
if (typeof composer.requestSubmit === "function") {
composer.requestSubmit();
} else {
composer.dispatchEvent(new Event("submit", { cancelable: true }));
}
}
});
}
syncNodeRailVisibility();
syncLatestMessageIdFromDom();
bindNodeAnchorClicks();
renderExistingAssistantMessages();
refreshRunningWorkflowCards();
@@ -724,6 +827,7 @@
if (composer) {
composer.addEventListener("submit", streamChat);
}
bindPromptKeyboardShortcuts();
if (uploadDropzone && attachmentInput) {
uploadDropzone.addEventListener("click", function () {

View File

@@ -108,6 +108,7 @@
<article
class="message {{ message.role }}"
id="message-{{ message.pk }}"
data-message-id="{{ message.pk }}"
data-node-label="{% if message.role == 'assistant' %}AI{% else %}用户{% endif %} {{ forloop.counter }}"
>
<div class="message-avatar{% if message.role == 'user' %} user-mark{% endif %}">
@@ -174,6 +175,7 @@
class="summary-panel"
id="summaryPanel"
data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/"
data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/"
data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/"
data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/"
>
@@ -220,10 +222,16 @@
<strong>{{ batch.batch_no }}</strong>
<span class="workflow-status status-{{ batch.status }}">{{ batch.status }}</span>
</header>
{% if batch.error_message %}
<p class="workflow-error">{{ batch.error_message }}</p>
{% endif %}
<ol>
{% for node in batch.node_runs.all %}
<li class="node-status status-{{ node.status }}" data-node-code="{{ node.node_code }}">
<div>
<span>{{ node.node_name }}</span>
{% if node.message %}<small>{{ node.message }}</small>{% endif %}
</div>
<em>{{ node.progress }}%</em>
</li>
{% endfor %}

View File

@@ -25,6 +25,8 @@ def test_workspace_renders_summary_panel(client, django_user_model):
assert 'id="uploadDropzone"' in content
assert 'id="workflowCardList"' in content
assert 'data-conversation-id="' in content
assert 'data-message-id="' in content
assert 'data-message-url-template="' in content
assert 'class="message-content markdown-content"' in content
assert 'class="message-raw"' in content
assert "自动汇总文件目录与页数" in content
@@ -52,3 +54,37 @@ def test_frontend_updates_sidebar_conversation_by_stable_id():
assert "data-conversation-id" in script
assert "setAttribute(\"data-conversation-id\"" in script
assert ".history-item[data-conversation-id=" in script
def test_frontend_refreshes_generated_workflow_messages():
script = open("static/js/app.js", encoding="utf-8").read()
assert "refreshConversationMessages" in script
assert "latestMessageId" in script
assert "data-message-url-template" in script
def test_frontend_can_replace_partial_stream_content():
script = open("static/js/app.js", encoding="utf-8").read()
assert 'eventName === "replace"' in script
assert "assistantText = payload.content" in script
def test_frontend_enter_sends_and_ctrl_enter_inserts_newline():
script = open("static/js/app.js", encoding="utf-8").read()
assert "bindPromptKeyboardShortcuts" in script
assert "event.key === \"Enter\"" in script
assert "event.ctrlKey" in script
assert "composer.requestSubmit()" in script
def test_frontend_renders_workflow_error_messages():
script = open("static/js/app.js", encoding="utf-8").read()
css = open("static/css/login.css", encoding="utf-8").read()
assert "payload.batch.error_message" in script
assert "workflow-error" in script
assert "node.message" in script
assert ".workflow-error" in css

View File

@@ -2,7 +2,14 @@ from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
import pytest
from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, FileSummaryBatch
from review_agent.models import (
Conversation,
ExportedSummaryFile,
FileAttachment,
FileSummaryBatch,
Message,
WorkflowNodeRun,
)
pytestmark = pytest.mark.django_db
@@ -99,3 +106,68 @@ def test_export_download_requires_batch_owner(client, tmp_path, django_user_mode
assert "attachment" in allowed["Content-Disposition"]
assert "summary.md" in allowed["Content-Disposition"]
assert allowed["Content-Type"].startswith("text/markdown")
def test_conversation_messages_returns_incremental_messages(client, django_user_model):
owner = django_user_model.objects.create_user(username="owner", password="pass")
other = django_user_model.objects.create_user(username="other", password="pass")
conversation = Conversation.objects.create(user=owner, title="会话")
first = Message.objects.create(
conversation=conversation,
role=Message.Role.USER,
content="用户消息",
)
second = Message.objects.create(
conversation=conversation,
role=Message.Role.ASSISTANT,
content="报告消息",
)
client.force_login(other)
denied = client.get(reverse("review_agent_conversation_messages", args=[conversation.pk]))
assert denied.status_code == 404
client.force_login(owner)
response = client.get(
f"{reverse('review_agent_conversation_messages', args=[conversation.pk])}?after={first.pk}"
)
assert response.status_code == 200
payload = response.json()
assert payload["latest_message_id"] == second.pk
assert payload["messages"] == [
{
"id": second.pk,
"role": Message.Role.ASSISTANT,
"content": "报告消息",
"created_at": second.created_at.isoformat(),
}
]
def test_batch_status_exposes_batch_and_node_errors(client, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
batch_no="FS-ERR",
status=FileSummaryBatch.Status.FAILED,
error_message="压缩包解压失败",
)
WorkflowNodeRun.objects.create(
batch=batch,
node_code="extract",
node_name="压缩包解压",
status=WorkflowNodeRun.Status.FAILED,
progress=10,
message="未解出任何可扫描文件",
)
client.force_login(user)
response = client.get(reverse("file_summary_batch_status", args=[batch.pk]))
assert response.status_code == 200
payload = response.json()
assert payload["batch"]["error_message"] == "压缩包解压失败"
assert payload["nodes"][0]["message"] == "未解出任何可扫描文件"

View File

@@ -1,5 +1,8 @@
import pytest
from pathlib import Path
from zipfile import ZipFile
from review_agent.file_summary.services import archive as archive_service
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.skill_router import SkillRoute
from review_agent.models import (
@@ -43,6 +46,7 @@ def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user
assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active
active.refresh_from_db()
assert active.upload_status == FileAttachment.UploadStatus.BOUND
assert batch.work_dir
assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists()
@@ -67,6 +71,88 @@ def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_mo
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists()
def test_workflow_extracts_archive_and_scans_extracted_files(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
archive_path = tmp_path / "upload.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("folder/a.pdf", b"%PDF-1.4\n%%EOF")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="upload.zip",
storage_path=str(archive_path),
file_size=archive_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.total_files == 1
assert batch.items.get().file_name == "a.pdf"
assert not batch.items.filter(file_type="zip").exists()
def test_workflow_marks_archive_extract_failure_visible(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
archive_path = tmp_path / "empty.zip"
with ZipFile(archive_path, "w"):
pass
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="empty.zip",
storage_path=str(archive_path),
file_size=archive_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
extract_node = batch.node_runs.get(node_code="extract")
assert batch.status == FileSummaryBatch.Status.FAILED
assert "未解出任何可扫描文件" in batch.error_message
assert extract_node.status == WorkflowNodeRun.Status.FAILED
assert "未解出任何可扫描文件" in extract_node.message
failed_event = WorkflowEvent.objects.filter(
batch=batch,
event_type="node_progress",
payload__status=WorkflowNodeRun.Status.FAILED,
).latest("id")
assert "未解出任何可扫描文件" in failed_event.payload["message"]
def test_rar_extract_uses_python_libarchive_before_7z(monkeypatch, tmp_path):
archive_path = tmp_path / "sample.rar"
archive_path.write_bytes(b"rar")
target_dir = tmp_path / "out"
calls = []
def fake_libarchive_extract(path: Path, target: Path):
calls.append(("libarchive", path, target))
extracted = target / "a.docx"
extracted.parent.mkdir(parents=True, exist_ok=True)
extracted.write_bytes(b"doc")
return [extracted]
def fake_7z_extract(path: Path, target: Path):
calls.append(("7z", path, target))
return []
monkeypatch.setattr(archive_service, "_extract_rar_with_libarchive", fake_libarchive_extract)
monkeypatch.setattr(archive_service, "_extract_rar_with_7z", fake_7z_extract)
extracted = archive_service.extract_archive(archive_path, target_dir)
assert [path.name for path in extracted] == ["a.docx"]
assert calls == [("libarchive", archive_path, target_dir)]
def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model):
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
@@ -142,7 +228,7 @@ def test_stream_message_reads_active_attachment_when_requested(settings, tmp_pat
assert "workflow_started" not in joined
def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeypatch, django_user_model):
def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
@@ -151,14 +237,17 @@ def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeyp
raise RuntimeError("provider connection reset")
monkeypatch.setattr("review_agent.services.stream_reply", broken_stream_reply)
monkeypatch.setattr("review_agent.services.generate_reply", lambda conversation, content: "非流式完整回复")
frames = list(stream_message(conversation, "普通问题"))
joined = "".join(frames)
assert "已生成部分内容" in joined
assert "回复生成中断" in joined
assert "replace" in joined
assert "非流式完整回复" in joined
assert "done" in joined
assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists()
assistant_message = Message.objects.get(conversation=conversation, role=Message.Role.ASSISTANT)
assert assistant_message.content == "非流式完整回复"
def test_stream_message_uses_llm_router_for_attachment_reader(