fix(file-summary): 同步压缩包工作流状态与结果刷新

This commit is contained in:
2026-06-06 19:45:49 +08:00
parent daa0642142
commit 7e561ea213
12 changed files with 560 additions and 32 deletions

View File

@@ -1,5 +1,8 @@
import pytest
from pathlib import Path
from zipfile import ZipFile
from review_agent.file_summary.services import archive as archive_service
from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from review_agent.skill_router import SkillRoute
from review_agent.models import (
@@ -43,6 +46,7 @@ def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user
assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active
active.refresh_from_db()
assert active.upload_status == FileAttachment.UploadStatus.BOUND
assert batch.work_dir
assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists()
@@ -67,6 +71,88 @@ def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_mo
assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists()
def test_workflow_extracts_archive_and_scans_extracted_files(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
archive_path = tmp_path / "upload.zip"
with ZipFile(archive_path, "w") as archive:
archive.writestr("folder/a.pdf", b"%PDF-1.4\n%%EOF")
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="upload.zip",
storage_path=str(archive_path),
file_size=archive_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
assert batch.total_files == 1
assert batch.items.get().file_name == "a.pdf"
assert not batch.items.filter(file_type="zip").exists()
def test_workflow_marks_archive_extract_failure_visible(settings, tmp_path, django_user_model):
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
archive_path = tmp_path / "empty.zip"
with ZipFile(archive_path, "w"):
pass
FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="empty.zip",
storage_path=str(archive_path),
file_size=archive_path.stat().st_size,
)
batch = create_file_summary_batch(conversation=conversation, user=user)
start_file_summary_workflow(batch, async_run=False)
batch.refresh_from_db()
extract_node = batch.node_runs.get(node_code="extract")
assert batch.status == FileSummaryBatch.Status.FAILED
assert "未解出任何可扫描文件" in batch.error_message
assert extract_node.status == WorkflowNodeRun.Status.FAILED
assert "未解出任何可扫描文件" in extract_node.message
failed_event = WorkflowEvent.objects.filter(
batch=batch,
event_type="node_progress",
payload__status=WorkflowNodeRun.Status.FAILED,
).latest("id")
assert "未解出任何可扫描文件" in failed_event.payload["message"]
def test_rar_extract_uses_python_libarchive_before_7z(monkeypatch, tmp_path):
archive_path = tmp_path / "sample.rar"
archive_path.write_bytes(b"rar")
target_dir = tmp_path / "out"
calls = []
def fake_libarchive_extract(path: Path, target: Path):
calls.append(("libarchive", path, target))
extracted = target / "a.docx"
extracted.parent.mkdir(parents=True, exist_ok=True)
extracted.write_bytes(b"doc")
return [extracted]
def fake_7z_extract(path: Path, target: Path):
calls.append(("7z", path, target))
return []
monkeypatch.setattr(archive_service, "_extract_rar_with_libarchive", fake_libarchive_extract)
monkeypatch.setattr(archive_service, "_extract_rar_with_7z", fake_7z_extract)
extracted = archive_service.extract_archive(archive_path, target_dir)
assert [path.name for path in extracted] == ["a.docx"]
assert calls == [("libarchive", archive_path, target_dir)]
def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model):
settings.FILE_SUMMARY_ASYNC = False
user = django_user_model.objects.create_user(username="owner", password="pass")
@@ -142,7 +228,7 @@ def test_stream_message_reads_active_attachment_when_requested(settings, tmp_pat
assert "workflow_started" not in joined
def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeypatch, django_user_model):
def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkeypatch, django_user_model):
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
@@ -151,14 +237,17 @@ def test_stream_message_returns_error_event_when_unexpected_stream_error(monkeyp
raise RuntimeError("provider connection reset")
monkeypatch.setattr("review_agent.services.stream_reply", broken_stream_reply)
monkeypatch.setattr("review_agent.services.generate_reply", lambda conversation, content: "非流式完整回复")
frames = list(stream_message(conversation, "普通问题"))
joined = "".join(frames)
assert "已生成部分内容" in joined
assert "回复生成中断" in joined
assert "replace" in joined
assert "非流式完整回复" in joined
assert "done" in joined
assert Message.objects.filter(conversation=conversation, role=Message.Role.ASSISTANT).exists()
assistant_message = Message.objects.get(conversation=conversation, role=Message.Role.ASSISTANT)
assert assistant_message.content == "非流式完整回复"
def test_stream_message_uses_llm_router_for_attachment_reader(