From 7e561ea21340f193cc8758c28d35deeb5ddae13a Mon Sep 17 00:00:00 2001 From: bruce Date: Sat, 6 Jun 2026 19:45:49 +0800 Subject: [PATCH] =?UTF-8?q?fix(file-summary):=20=E5=90=8C=E6=AD=A5?= =?UTF-8?q?=E5=8E=8B=E7=BC=A9=E5=8C=85=E5=B7=A5=E4=BD=9C=E6=B5=81=E7=8A=B6?= =?UTF-8?q?=E6=80=81=E4=B8=8E=E7=BB=93=E6=9E=9C=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- review_agent/file_summary/services/archive.py | 48 ++++++++ .../file_summary/skills/archive_extract.py | 35 +++++- .../file_summary/skills/file_inventory.py | 40 ++++++- review_agent/file_summary/views.py | 44 +++++++- review_agent/file_summary/workflow.py | 64 ++++++++--- review_agent/urls.py | 14 ++- static/css/login.css | 26 ++++- static/js/app.js | 106 +++++++++++++++++- templates/home.html | 10 +- tests/test_file_summary_frontend.py | 36 ++++++ tests/test_file_summary_views.py | 74 +++++++++++- tests/test_file_summary_workflow.py | 95 +++++++++++++++- 12 files changed, 560 insertions(+), 32 deletions(-) diff --git a/review_agent/file_summary/services/archive.py b/review_agent/file_summary/services/archive.py index 9e554e8..531336b 100644 --- a/review_agent/file_summary/services/archive.py +++ b/review_agent/file_summary/services/archive.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging import subprocess from pathlib import Path from zipfile import ZipFile @@ -9,6 +10,8 @@ import py7zr ARCHIVE_EXTENSIONS = {"zip", "7z", "rar"} +logger = logging.getLogger("review_agent.file_summary.services.archive") + def _ensure_inside_target(path: Path, target_dir: Path) -> None: target = target_dir.resolve() @@ -63,6 +66,51 @@ def _extract_7z(archive_path: Path, target_dir: Path) -> list[Path]: def _extract_rar(archive_path: Path, target_dir: Path) -> list[Path]: + try: + extracted = _extract_rar_with_libarchive(archive_path, target_dir) + except Exception as exc: + logger.warning( + "RAR libarchive extract failed, falling back to 7z", + extra={"archive_path": str(archive_path), "target_dir": str(target_dir), "error": str(exc)}, + ) + else: + if extracted: + return extracted + logger.info( + "RAR libarchive extract produced no files, falling back to 7z", + extra={"archive_path": str(archive_path), "target_dir": str(target_dir)}, + ) + return _extract_rar_with_7z(archive_path, target_dir) + + +def _extract_rar_with_libarchive(archive_path: Path, target_dir: Path) -> list[Path]: + try: + import libarchive + except ImportError as exc: + raise RuntimeError("未安装 libarchive,跳过 Python RAR 解压。") from exc + + extracted: list[Path] = [] + with libarchive.file_reader(str(archive_path)) as entries: + for entry in entries: + destination = _safe_member_path(target_dir, entry.pathname) + if entry.isdir: + destination.mkdir(parents=True, exist_ok=True) + continue + if not entry.isfile: + logger.info( + "RAR libarchive skipped non-regular entry", + extra={"archive_path": str(archive_path), "entry": entry.pathname}, + ) + continue + destination.parent.mkdir(parents=True, exist_ok=True) + with destination.open("wb") as target: + for block in entry.get_blocks(): + target.write(block) + extracted.append(destination) + return extracted + + +def _extract_rar_with_7z(archive_path: Path, target_dir: Path) -> list[Path]: result = subprocess.run( ["7z", "x", f"-o{target_dir}", str(archive_path), "-y"], check=False, diff --git a/review_agent/file_summary/skills/archive_extract.py b/review_agent/file_summary/skills/archive_extract.py index 6e12f6f..bf2c71b 100644 --- a/review_agent/file_summary/skills/archive_extract.py +++ b/review_agent/file_summary/skills/archive_extract.py @@ -2,6 +2,7 @@ from __future__ import annotations import logging from pathlib import Path +import re from review_agent.models import FileSummaryBatchAttachment @@ -13,34 +14,56 @@ from .base import BaseSkill, SkillResult, WorkflowContext logger = logging.getLogger("review_agent.file_summary.skills.archive_extract") +def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str: + stem = Path(binding.attachment.original_name).stem or "archive" + safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive" + return f"{binding.attachment_id}_{safe_stem}" + + class ArchiveExtractSkill(BaseSkill): name = "archive_extract" def run(self, context: WorkflowContext) -> SkillResult: extracted_count = 0 - target_dir = Path(context.batch.work_dir or "") - if not target_dir: - logger.info( - "Archive extract skipped without work dir", + if not context.batch.work_dir: + message = "批次工作目录为空,无法解压压缩包。" + logger.error( + "Archive extract failed without work dir", extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no}, ) - return SkillResult(success=True, data={"extracted_count": 0}) + return SkillResult(success=False, message=message, data={"extracted_count": 0}) + target_root = Path(context.batch.work_dir) + archive_count = 0 for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch): path = resolve_storage_path(binding.attachment.storage_path) if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS: continue + archive_count += 1 + target_dir = target_root / "extracted" / _safe_archive_dir_name(binding) logger.info( "Archive extract started", extra={ "batch_id": context.batch.pk, "attachment_id": binding.attachment_id, "path": str(path), + "target_dir": str(target_dir), }, ) extracted_count += len(extract_archive(path, target_dir)) + if archive_count and extracted_count == 0: + message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。" + logger.warning( + "Archive extract produced no files", + extra={"batch_id": context.batch.pk, "archive_count": archive_count}, + ) + return SkillResult(success=False, message=message, data={"extracted_count": 0}) logger.info( "Archive extract finished", - extra={"batch_id": context.batch.pk, "extracted_count": extracted_count}, + extra={ + "batch_id": context.batch.pk, + "archive_count": archive_count, + "extracted_count": extracted_count, + }, ) return SkillResult(success=True, data={"extracted_count": extracted_count}) diff --git a/review_agent/file_summary/skills/file_inventory.py b/review_agent/file_summary/skills/file_inventory.py index a705e9f..0de852c 100644 --- a/review_agent/file_summary/skills/file_inventory.py +++ b/review_agent/file_summary/skills/file_inventory.py @@ -2,10 +2,12 @@ from __future__ import annotations import logging from pathlib import Path +import re from review_agent.models import FileSummaryBatchAttachment from ..paths import resolve_storage_path +from ..services.archive import ARCHIVE_EXTENSIONS from ..services.inventory import scan_files_to_items from .base import BaseSkill, SkillResult, WorkflowContext @@ -13,14 +15,44 @@ from .base import BaseSkill, SkillResult, WorkflowContext logger = logging.getLogger("review_agent.file_summary.skills.file_inventory") +def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str: + stem = Path(binding.attachment.original_name).stem or "archive" + safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive" + return f"{binding.attachment_id}_{safe_stem}" + + class FileInventorySkill(BaseSkill): name = "file_inventory" def run(self, context: WorkflowContext) -> SkillResult: - roots = [ - resolve_storage_path(binding.attachment.storage_path) - for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch) - ] + roots: list[Path] = [] + missing_extract_roots: list[str] = [] + for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch): + original_path = resolve_storage_path(binding.attachment.storage_path) + is_archive = original_path.suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS + if not is_archive: + roots.append(original_path) + continue + + extracted_root = ( + Path(context.batch.work_dir) + / "extracted" + / _safe_archive_dir_name(binding) + ) + if extracted_root.exists(): + roots.append(extracted_root) + else: + missing_extract_roots.append(str(extracted_root)) + if missing_extract_roots: + message = "压缩包解压目录不存在,无法扫描解压后的文件。" + logger.warning( + "File inventory missing extracted roots", + extra={ + "batch_id": context.batch.pk, + "missing_extract_roots": missing_extract_roots, + }, + ) + return SkillResult(success=False, message=message) logger.info( "File inventory started", extra={ diff --git a/review_agent/file_summary/views.py b/review_agent/file_summary/views.py index a8a57b1..fa27e52 100644 --- a/review_agent/file_summary/views.py +++ b/review_agent/file_summary/views.py @@ -5,7 +5,7 @@ from pathlib import Path from django.http import FileResponse, Http404, JsonResponse from django.views.decorators.http import require_http_methods -from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment +from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, Message from review_agent.models import FileSummaryBatch, WorkflowEvent from .events import serialize_event @@ -90,6 +90,47 @@ def attachment_detail(request, conversation_id: int, attachment_id: int): return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)}) +def _serialize_message(message: Message) -> dict[str, object]: + return { + "id": message.pk, + "role": message.role, + "content": message.content, + "created_at": message.created_at.isoformat(), + } + + +@require_http_methods(["GET"]) +@login_required +def conversation_messages(request, conversation_id: int): + conversation = _conversation_for_user(request.user, conversation_id) + after = request.GET.get("after") or "0" + try: + after_id = int(after) + except ValueError: + after_id = 0 + + messages = list(conversation.messages.filter(pk__gt=after_id).order_by("id")) + latest_message_id = ( + conversation.messages.order_by("-id").values_list("id", flat=True).first() or 0 + ) + logger.info( + "Conversation incremental messages requested", + extra={ + "conversation_id": conversation.pk, + "after_id": after_id, + "message_count": len(messages), + "latest_message_id": latest_message_id, + }, + ) + return JsonResponse( + { + "conversation_id": conversation.pk, + "latest_message_id": latest_message_id, + "messages": [_serialize_message(message) for message in messages], + } + ) + + @require_http_methods(["GET"]) @login_required def batch_status(request, batch_id: int): @@ -107,6 +148,7 @@ def batch_status(request, batch_id: int): "success_files": batch.success_files, "failed_files": batch.failed_files, "total_pages": batch.total_pages, + "error_message": batch.error_message, }, "nodes": [ { diff --git a/review_agent/file_summary/workflow.py b/review_agent/file_summary/workflow.py index 8bfa147..5184ad9 100644 --- a/review_agent/file_summary/workflow.py +++ b/review_agent/file_summary/workflow.py @@ -1,9 +1,11 @@ from __future__ import annotations import logging +from pathlib import Path from threading import Thread from uuid import uuid4 +from django.conf import settings from django.db import transaction from django.utils import timezone @@ -17,6 +19,7 @@ from review_agent.models import ( ) from .events import record_event +from .services.archive import ARCHIVE_EXTENSIONS from .skills.archive_extract import ArchiveExtractSkill from .skills.base import WorkflowContext from .skills.document_page_count import DocumentPageCountSkill @@ -54,6 +57,10 @@ def build_batch_no() -> str: return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}" +def build_batch_work_dir(batch_no: str) -> Path: + return Path(settings.MEDIA_ROOT) / "file_summary" / "work" / batch_no + + @transaction.atomic def create_file_summary_batch( *, @@ -78,15 +85,29 @@ def create_file_summary_batch( }, ) + batch_no = build_batch_no() + work_dir = build_batch_work_dir(batch_no) + work_dir.mkdir(parents=True, exist_ok=True) + batch = FileSummaryBatch.objects.create( conversation=conversation, user=user, trigger_message=trigger_message, - batch_no=build_batch_no(), + batch_no=batch_no, + work_dir=str(work_dir), ) for attachment in active_attachments: - FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment) + source_role = ( + FileSummaryBatchAttachment.SourceRole.ARCHIVE + if Path(attachment.original_name).suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS + else FileSummaryBatchAttachment.SourceRole.MULTI_FILE + ) + FileSummaryBatchAttachment.objects.create( + batch=batch, + attachment=attachment, + source_role=source_role, + ) attachment.upload_status = FileAttachment.UploadStatus.BOUND attachment.save(update_fields=["upload_status"]) @@ -152,7 +173,7 @@ class WorkflowExecutor: record_event( self.batch, "node_progress", - {"node_code": node.node_code, "status": node.status, "progress": node.progress}, + {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message}, ) skill_name = next( @@ -160,18 +181,35 @@ class WorkflowExecutor: "", ) if skill_name: - result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch)) - if not result.success: - logger.warning( - "Workflow node skill failed", - extra={ - "batch_id": self.batch.pk, + try: + result = self.registry.execute(skill_name, WorkflowContext(batch=self.batch)) + if not result.success: + logger.warning( + "Workflow node skill failed", + extra={ + "batch_id": self.batch.pk, + "node_code": node.node_code, + "skill_name": skill_name, + "result_message": result.message, + }, + ) + raise RuntimeError(result.message or f"{node.node_name}执行失败") + except Exception as exc: + node.status = WorkflowNodeRun.Status.FAILED + node.finished_at = timezone.now() + node.message = str(exc) + node.save(update_fields=["status", "finished_at", "message"]) + record_event( + self.batch, + "node_progress", + { "node_code": node.node_code, - "skill_name": skill_name, - "result_message": result.message, + "status": node.status, + "progress": node.progress, + "message": node.message, }, ) - raise RuntimeError(result.message or f"{node.node_name}执行失败") + raise node.status = WorkflowNodeRun.Status.SUCCESS node.progress = 100 @@ -181,7 +219,7 @@ class WorkflowExecutor: record_event( self.batch, "node_progress", - {"node_code": node.node_code, "status": node.status, "progress": node.progress}, + {"node_code": node.node_code, "status": node.status, "progress": node.progress, "message": node.message}, ) logger.info( "Workflow node finished", diff --git a/review_agent/urls.py b/review_agent/urls.py index 737071d..418bb88 100644 --- a/review_agent/urls.py +++ b/review_agent/urls.py @@ -1,6 +1,13 @@ from django.urls import path -from .file_summary.views import attachment_detail, attachments, batch_events, batch_status, export_download +from .file_summary.views import ( + attachment_detail, + attachments, + batch_events, + batch_status, + conversation_messages, + export_download, +) urlpatterns = [ @@ -19,6 +26,11 @@ urlpatterns = [ attachment_detail, name="file_summary_attachment_detail", ), + path( + "api/review-agent/conversations//messages/", + conversation_messages, + name="review_agent_conversation_messages", + ), path( "api/review-agent/file-summary//status/", batch_status, diff --git a/static/css/login.css b/static/css/login.css index 48a725a..a177290 100644 --- a/static/css/login.css +++ b/static/css/login.css @@ -882,7 +882,9 @@ input:focus { .upload-dropzone span, .upload-status, .attachment-item span, -.workflow-card em { +.workflow-card em, +.workflow-card small, +.workflow-error { color: var(--muted); font-size: 12px; } @@ -949,6 +951,28 @@ input:focus { font-size: 13px; } +.node-status div { + display: grid; + min-width: 0; + gap: 2px; +} + +.node-status span, +.node-status small, +.workflow-error { + overflow-wrap: anywhere; + word-break: break-word; +} + +.workflow-error { + margin: 0; + padding: 8px 10px; + border-radius: 6px; + background: #fff1f0; + color: #b42318; + line-height: 1.5; +} + .status-running, .status-retrying { color: var(--accent); diff --git a/static/js/app.js b/static/js/app.js index 79d9cf6..ca9ae78 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -20,6 +20,7 @@ var nodeAnchors = []; var workflowPollingTimers = {}; var WORKFLOW_POLL_INTERVAL_MS = 1500; + var latestMessageId = 0; if (!workspace) { return; @@ -52,6 +53,15 @@ nodeAnchors = Array.prototype.slice.call(document.querySelectorAll(".node-anchor")); } + function syncLatestMessageIdFromDom() { + document.querySelectorAll(".message[data-message-id]").forEach(function (message) { + var id = parseInt(message.getAttribute("data-message-id"), 10); + if (!Number.isNaN(id)) { + latestMessageId = Math.max(latestMessageId, id); + } + }); + } + if (sidebarToggle) { sidebarToggle.addEventListener("click", toggleSidebar); } @@ -271,6 +281,9 @@ var article = document.createElement("article"); article.className = "message " + role; article.id = messageId; + if (typeof messageId === "number") { + article.setAttribute("data-message-id", messageId); + } if (label) { article.setAttribute("data-node-label", label); } @@ -295,6 +308,48 @@ return { article: article, bubble: bubble, text: text }; } + function appendConversationMessage(message) { + if (!message || document.querySelector('.message[data-message-id="' + message.id + '"]')) { + return; + } + var label = message.role === "assistant" ? "AI " : "用户 "; + label += document.querySelectorAll(".message").length + 1; + var created = createMessage(message.role, message.content || "", "message-" + message.id, label); + created.article.setAttribute("data-message-id", message.id); + latestMessageId = Math.max(latestMessageId, message.id); + if (message.role === "user") { + appendNode(created.article.id, label, true); + } + } + + async function refreshConversationMessages() { + var conversationId = currentConversationId(); + if (!conversationId || !summaryPanel) { + return; + } + var url = templateUrl("data-message-url-template", "__conversation_id__", conversationId); + if (!url) { + return; + } + try { + var response = await fetch(url + "?after=" + latestMessageId, { cache: "no-store" }); + if (!response.ok) { + return; + } + var payload = await response.json(); + (payload.messages || []).forEach(appendConversationMessage); + if (payload.latest_message_id) { + latestMessageId = Math.max(latestMessageId, payload.latest_message_id); + } + syncNodeRailVisibility(); + bindNodeAnchorClicks(); + setActiveNode(); + scrollChatToBottom(); + } catch (error) { + console.error("Conversation message refresh failed", error); + } + } + function appendNode(targetId, title, isLatest) { if (!nodeRail) { return; @@ -530,13 +585,31 @@ var status = card.querySelector(".workflow-status"); status.textContent = payload.batch.status; status.className = "workflow-status status-" + payload.batch.status; + var batchError = card.querySelector(".workflow-error"); + if (payload.batch.error_message) { + if (!batchError) { + batchError = document.createElement("p"); + batchError.className = "workflow-error"; + card.insertBefore(batchError, card.querySelector("ol")); + } + batchError.textContent = payload.batch.error_message; + } else if (batchError) { + batchError.remove(); + } var list = card.querySelector("ol"); list.innerHTML = ""; (payload.nodes || []).forEach(function (node) { var item = document.createElement("li"); item.className = "node-status status-" + node.status; item.setAttribute("data-node-code", node.node_code); - item.innerHTML = "" + escapeHtml(node.node_name) + "" + node.progress + "%"; + item.innerHTML = + '
' + + escapeHtml(node.node_name) + + "" + + (node.message ? "" + escapeHtml(node.message) + "" : "") + + "
" + + node.progress + + "%"; list.appendChild(item); }); return payload.batch.status || ""; @@ -561,11 +634,13 @@ workflowPollingTimers[batchId] = window.setInterval(async function () { var status = await refreshWorkflowCard(batchId); if (isWorkflowTerminalStatus(status)) { + refreshConversationMessages(); stopWorkflowPolling(batchId); } }, WORKFLOW_POLL_INTERVAL_MS); refreshWorkflowCard(batchId).then(function (status) { if (isWorkflowTerminalStatus(status)) { + refreshConversationMessages(); stopWorkflowPolling(batchId); } }); @@ -666,6 +741,11 @@ return; } if (eventName === "meta") { + if (payload.user_message_id) { + userMessage.article.id = "message-" + payload.user_message_id; + userMessage.article.setAttribute("data-message-id", payload.user_message_id); + latestMessageId = Math.max(latestMessageId, payload.user_message_id); + } if (payload.conversation_id) { conversationIdInput.value = payload.conversation_id; window.history.replaceState({}, "", "/?conversation=" + payload.conversation_id); @@ -678,6 +758,10 @@ assistantText += payload.delta || ""; assistantMessage.text.innerHTML = renderAssistantContent(assistantText); scrollChatToBottom(); + } else if (eventName === "replace") { + assistantText = payload.content || ""; + assistantMessage.text.innerHTML = renderAssistantContent(assistantText); + scrollChatToBottom(); } else if (eventName === "error") { assistantText = payload.message || "模型调用失败。"; assistantMessage.text.innerHTML = renderAssistantContent(assistantText); @@ -687,6 +771,8 @@ } else if (eventName === "done") { if (payload.assistant_message_id) { assistantMessage.article.id = "message-" + payload.assistant_message_id; + assistantMessage.article.setAttribute("data-message-id", payload.assistant_message_id); + latestMessageId = Math.max(latestMessageId, payload.assistant_message_id); } if (payload.title) { setConversationTitle(payload.title); @@ -711,7 +797,24 @@ } } + function bindPromptKeyboardShortcuts() { + if (!promptInput || !composer) { + return; + } + promptInput.addEventListener("keydown", function (event) { + if (event.key === "Enter" && !event.ctrlKey) { + event.preventDefault(); + if (typeof composer.requestSubmit === "function") { + composer.requestSubmit(); + } else { + composer.dispatchEvent(new Event("submit", { cancelable: true })); + } + } + }); + } + syncNodeRailVisibility(); + syncLatestMessageIdFromDom(); bindNodeAnchorClicks(); renderExistingAssistantMessages(); refreshRunningWorkflowCards(); @@ -724,6 +827,7 @@ if (composer) { composer.addEventListener("submit", streamChat); } + bindPromptKeyboardShortcuts(); if (uploadDropzone && attachmentInput) { uploadDropzone.addEventListener("click", function () { diff --git a/templates/home.html b/templates/home.html index 9c6d482..be9f2e5 100644 --- a/templates/home.html +++ b/templates/home.html @@ -108,6 +108,7 @@
@@ -174,6 +175,7 @@ class="summary-panel" id="summaryPanel" data-attachment-url-template="/api/review-agent/conversations/__conversation_id__/attachments/" + data-message-url-template="/api/review-agent/conversations/__conversation_id__/messages/" data-status-url-template="/api/review-agent/file-summary/__batch_id__/status/" data-events-url-template="/api/review-agent/file-summary/__batch_id__/events/" > @@ -220,10 +222,16 @@ {{ batch.batch_no }} {{ batch.status }} + {% if batch.error_message %} +

{{ batch.error_message }}

+ {% endif %}
    {% for node in batch.node_runs.all %}
  1. - {{ node.node_name }} +
    + {{ node.node_name }} + {% if node.message %}{{ node.message }}{% endif %} +
    {{ node.progress }}%
  2. {% endfor %} diff --git a/tests/test_file_summary_frontend.py b/tests/test_file_summary_frontend.py index 4f46de1..20de8bf 100644 --- a/tests/test_file_summary_frontend.py +++ b/tests/test_file_summary_frontend.py @@ -25,6 +25,8 @@ def test_workspace_renders_summary_panel(client, django_user_model): assert 'id="uploadDropzone"' in content assert 'id="workflowCardList"' in content assert 'data-conversation-id="' in content + assert 'data-message-id="' in content + assert 'data-message-url-template="' in content assert 'class="message-content markdown-content"' in content assert 'class="message-raw"' in content assert "自动汇总文件目录与页数" in content @@ -52,3 +54,37 @@ def test_frontend_updates_sidebar_conversation_by_stable_id(): assert "data-conversation-id" in script assert "setAttribute(\"data-conversation-id\"" in script assert ".history-item[data-conversation-id=" in script + + +def test_frontend_refreshes_generated_workflow_messages(): + script = open("static/js/app.js", encoding="utf-8").read() + + assert "refreshConversationMessages" in script + assert "latestMessageId" in script + assert "data-message-url-template" in script + + +def test_frontend_can_replace_partial_stream_content(): + script = open("static/js/app.js", encoding="utf-8").read() + + assert 'eventName === "replace"' in script + assert "assistantText = payload.content" in script + + +def test_frontend_enter_sends_and_ctrl_enter_inserts_newline(): + script = open("static/js/app.js", encoding="utf-8").read() + + assert "bindPromptKeyboardShortcuts" in script + assert "event.key === \"Enter\"" in script + assert "event.ctrlKey" in script + assert "composer.requestSubmit()" in script + + +def test_frontend_renders_workflow_error_messages(): + script = open("static/js/app.js", encoding="utf-8").read() + css = open("static/css/login.css", encoding="utf-8").read() + + assert "payload.batch.error_message" in script + assert "workflow-error" in script + assert "node.message" in script + assert ".workflow-error" in css diff --git a/tests/test_file_summary_views.py b/tests/test_file_summary_views.py index 588d466..d88e872 100644 --- a/tests/test_file_summary_views.py +++ b/tests/test_file_summary_views.py @@ -2,7 +2,14 @@ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse import pytest -from review_agent.models import Conversation, ExportedSummaryFile, FileAttachment, FileSummaryBatch +from review_agent.models import ( + Conversation, + ExportedSummaryFile, + FileAttachment, + FileSummaryBatch, + Message, + WorkflowNodeRun, +) pytestmark = pytest.mark.django_db @@ -99,3 +106,68 @@ def test_export_download_requires_batch_owner(client, tmp_path, django_user_mode assert "attachment" in allowed["Content-Disposition"] assert "summary.md" in allowed["Content-Disposition"] assert allowed["Content-Type"].startswith("text/markdown") + + +def test_conversation_messages_returns_incremental_messages(client, django_user_model): + owner = django_user_model.objects.create_user(username="owner", password="pass") + other = django_user_model.objects.create_user(username="other", password="pass") + conversation = Conversation.objects.create(user=owner, title="会话") + first = Message.objects.create( + conversation=conversation, + role=Message.Role.USER, + content="用户消息", + ) + second = Message.objects.create( + conversation=conversation, + role=Message.Role.ASSISTANT, + content="报告消息", + ) + + client.force_login(other) + denied = client.get(reverse("review_agent_conversation_messages", args=[conversation.pk])) + assert denied.status_code == 404 + + client.force_login(owner) + response = client.get( + f"{reverse('review_agent_conversation_messages', args=[conversation.pk])}?after={first.pk}" + ) + + assert response.status_code == 200 + payload = response.json() + assert payload["latest_message_id"] == second.pk + assert payload["messages"] == [ + { + "id": second.pk, + "role": Message.Role.ASSISTANT, + "content": "报告消息", + "created_at": second.created_at.isoformat(), + } + ] + + +def test_batch_status_exposes_batch_and_node_errors(client, django_user_model): + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + batch = FileSummaryBatch.objects.create( + conversation=conversation, + user=user, + batch_no="FS-ERR", + status=FileSummaryBatch.Status.FAILED, + error_message="压缩包解压失败", + ) + WorkflowNodeRun.objects.create( + batch=batch, + node_code="extract", + node_name="压缩包解压", + status=WorkflowNodeRun.Status.FAILED, + progress=10, + message="未解出任何可扫描文件", + ) + client.force_login(user) + + response = client.get(reverse("file_summary_batch_status", args=[batch.pk])) + + assert response.status_code == 200 + payload = response.json() + assert payload["batch"]["error_message"] == "压缩包解压失败" + assert payload["nodes"][0]["message"] == "未解出任何可扫描文件" diff --git a/tests/test_file_summary_workflow.py b/tests/test_file_summary_workflow.py index b80e490..fbe855d 100644 --- a/tests/test_file_summary_workflow.py +++ b/tests/test_file_summary_workflow.py @@ -1,5 +1,8 @@ import pytest +from pathlib import Path +from zipfile import ZipFile +from review_agent.file_summary.services import archive as archive_service from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow from review_agent.skill_router import SkillRoute from review_agent.models import ( @@ -43,6 +46,7 @@ def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active active.refresh_from_db() assert active.upload_status == FileAttachment.UploadStatus.BOUND + assert batch.work_dir assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6 assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists() @@ -67,6 +71,88 @@ def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_mo assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists() +def test_workflow_extracts_archive_and_scans_extracted_files(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + archive_path = tmp_path / "upload.zip" + with ZipFile(archive_path, "w") as archive: + archive.writestr("folder/a.pdf", b"%PDF-1.4\n%%EOF") + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="upload.zip", + storage_path=str(archive_path), + file_size=archive_path.stat().st_size, + ) + batch = create_file_summary_batch(conversation=conversation, user=user) + + start_file_summary_workflow(batch, async_run=False) + + batch.refresh_from_db() + assert batch.total_files == 1 + assert batch.items.get().file_name == "a.pdf" + assert not batch.items.filter(file_type="zip").exists() + + +def test_workflow_marks_archive_extract_failure_visible(settings, tmp_path, django_user_model): + settings.MEDIA_ROOT = tmp_path + user = django_user_model.objects.create_user(username="owner", password="pass") + conversation = Conversation.objects.create(user=user, title="会话") + archive_path = tmp_path / "empty.zip" + with ZipFile(archive_path, "w"): + pass + FileAttachment.objects.create( + conversation=conversation, + user=user, + original_name="empty.zip", + storage_path=str(archive_path), + file_size=archive_path.stat().st_size, + ) + batch = create_file_summary_batch(conversation=conversation, user=user) + + start_file_summary_workflow(batch, async_run=False) + + batch.refresh_from_db() + extract_node = batch.node_runs.get(node_code="extract") + assert batch.status == FileSummaryBatch.Status.FAILED + assert "未解出任何可扫描文件" in batch.error_message + assert extract_node.status == WorkflowNodeRun.Status.FAILED + assert "未解出任何可扫描文件" in extract_node.message + failed_event = WorkflowEvent.objects.filter( + batch=batch, + event_type="node_progress", + payload__status=WorkflowNodeRun.Status.FAILED, + ).latest("id") + assert "未解出任何可扫描文件" in failed_event.payload["message"] + + +def test_rar_extract_uses_python_libarchive_before_7z(monkeypatch, tmp_path): + archive_path = tmp_path / "sample.rar" + archive_path.write_bytes(b"rar") + target_dir = tmp_path / "out" + calls = [] + + def fake_libarchive_extract(path: Path, target: Path): + calls.append(("libarchive", path, target)) + extracted = target / "a.docx" + extracted.parent.mkdir(parents=True, exist_ok=True) + extracted.write_bytes(b"doc") + return [extracted] + + def fake_7z_extract(path: Path, target: Path): + calls.append(("7z", path, target)) + return [] + + monkeypatch.setattr(archive_service, "_extract_rar_with_libarchive", fake_libarchive_extract) + monkeypatch.setattr(archive_service, "_extract_rar_with_7z", fake_7z_extract) + + extracted = archive_service.extract_archive(archive_path, target_dir) + + assert [path.name for path in extracted] == ["a.docx"] + assert calls == [("libarchive", archive_path, target_dir)] + + def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model): settings.FILE_SUMMARY_ASYNC = False user = django_user_model.objects.create_user(username="owner", password="pass") @@ -142,7 +228,7 @@ def test_stream_message_reads_active_attachment_when_requested(settings, tmp_pat assert "workflow_started" not in joined -def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeypatch, django_user_model): +def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkeypatch, django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") @@ -151,14 +237,17 @@ def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeyp raise RuntimeError("provider connection reset") monkeypatch.setattr("review_agent.services.stream_reply", broken_stream_reply) + monkeypatch.setattr("review_agent.services.generate_reply", lambda conversation, content: "非流式完整回复") frames = list(stream_message(conversation, "普通问题")) joined = "".join(frames) assert "已生成部分内容" in joined - assert "回复生成中断" in joined + assert "replace" in joined + assert "非流式完整回复" in joined assert "done" in joined - assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists() + assistant_message = Message.objects.get(conversation=conversation, role=Message.Role.ASSISTANT) + assert assistant_message.content == "非流式完整回复" def test_stream_message_uses_llm_router_for_attachment_reader(