import pytest from pathlib import Path from zipfile import ZipFile from review_agent.file_summary.services import archive as archive_service from review_agent.file_summary.workflow import create_file_summary_batch, start_file_summary_workflow from review_agent.skill_router import SkillRoute from review_agent.models import ( Conversation, FileAttachment, FileSummaryBatch, FileSummaryBatchAttachment, Message, WorkflowEvent, WorkflowNodeRun, ) from review_agent.services import stream_message pytestmark = pytest.mark.django_db def test_create_batch_binds_active_attachments_and_initializes_nodes(django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总") active = FileAttachment.objects.create( conversation=conversation, user=user, original_name="a.docx", storage_path="x/a.docx", file_size=1, ) FileAttachment.objects.create( conversation=conversation, user=user, original_name="old.docx", is_active=False, storage_path="x/old.docx", file_size=1, ) batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message) assert batch.status == FileSummaryBatch.Status.PENDING assert FileSummaryBatchAttachment.objects.get(batch=batch).attachment == active active.refresh_from_db() assert active.upload_status == FileAttachment.UploadStatus.BOUND assert batch.work_dir assert WorkflowNodeRun.objects.filter(batch=batch).count() >= 6 assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_created").exists() def test_start_file_summary_workflow_runs_synchronously_for_tests(django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") message = Message.objects.create(conversation=conversation, role=Message.Role.USER, content="自动汇总") FileAttachment.objects.create( conversation=conversation, user=user, original_name="a.docx", storage_path="x/a.docx", file_size=1, ) batch = create_file_summary_batch(conversation=conversation, user=user, trigger_message=message) start_file_summary_workflow(batch, async_run=False) batch.refresh_from_db() assert batch.status == FileSummaryBatch.Status.SUCCESS assert WorkflowEvent.objects.filter(batch=batch, event_type="workflow_completed").exists() def test_file_summary_workflow_dispatches_completion_notification(monkeypatch, django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") FileAttachment.objects.create( conversation=conversation, user=user, original_name="a.docx", storage_path="x/a.docx", file_size=1, ) batch = create_file_summary_batch(conversation=conversation, user=user) calls = [] def fake_dispatch(context): calls.append(context) monkeypatch.setattr("review_agent.file_summary.workflow.dispatch_workflow_notification", fake_dispatch) start_file_summary_workflow(batch, async_run=False) assert calls assert calls[-1].workflow_type == "file_summary" assert calls[-1].workflow_batch_id == batch.pk def test_workflow_extracts_archive_and_scans_extracted_files(settings, tmp_path, django_user_model): settings.MEDIA_ROOT = tmp_path user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") archive_path = tmp_path / "upload.zip" with ZipFile(archive_path, "w") as archive: archive.writestr("folder/a.pdf", b"%PDF-1.4\n%%EOF") FileAttachment.objects.create( conversation=conversation, user=user, original_name="upload.zip", storage_path=str(archive_path), file_size=archive_path.stat().st_size, ) batch = create_file_summary_batch(conversation=conversation, user=user) start_file_summary_workflow(batch, async_run=False) batch.refresh_from_db() assert batch.total_files == 1 assert batch.items.get().file_name == "a.pdf" assert not batch.items.filter(file_type="zip").exists() def test_workflow_marks_archive_extract_failure_visible(settings, tmp_path, django_user_model): settings.MEDIA_ROOT = tmp_path user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") archive_path = tmp_path / "empty.zip" with ZipFile(archive_path, "w"): pass FileAttachment.objects.create( conversation=conversation, user=user, original_name="empty.zip", storage_path=str(archive_path), file_size=archive_path.stat().st_size, ) batch = create_file_summary_batch(conversation=conversation, user=user) start_file_summary_workflow(batch, async_run=False) batch.refresh_from_db() extract_node = batch.node_runs.get(node_code="extract") assert batch.status == FileSummaryBatch.Status.FAILED assert "未解出任何可扫描文件" in batch.error_message assert extract_node.status == WorkflowNodeRun.Status.FAILED assert "未解出任何可扫描文件" in extract_node.message failed_event = WorkflowEvent.objects.filter( batch=batch, event_type="node_progress", payload__status=WorkflowNodeRun.Status.FAILED, ).latest("id") assert "未解出任何可扫描文件" in failed_event.payload["message"] def test_rar_extract_uses_python_libarchive_before_7z(monkeypatch, tmp_path): archive_path = tmp_path / "sample.rar" archive_path.write_bytes(b"rar") target_dir = tmp_path / "out" calls = [] def fake_libarchive_extract(path: Path, target: Path): calls.append(("libarchive", path, target)) extracted = target / "a.docx" extracted.parent.mkdir(parents=True, exist_ok=True) extracted.write_bytes(b"doc") return [extracted] def fake_7z_extract(path: Path, target: Path): calls.append(("7z", path, target)) return [] monkeypatch.setattr(archive_service, "_extract_rar_with_libarchive", fake_libarchive_extract) monkeypatch.setattr(archive_service, "_extract_rar_with_7z", fake_7z_extract) extracted = archive_service.extract_archive(archive_path, target_dir) assert [path.name for path in extracted] == ["a.docx"] assert calls == [("libarchive", archive_path, target_dir)] def test_stream_message_returns_workflow_meta_when_triggered(settings, django_user_model): settings.FILE_SUMMARY_ASYNC = False user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") FileAttachment.objects.create( conversation=conversation, user=user, original_name="a.docx", storage_path="x/a.docx", file_size=1, ) frames = list(stream_message(conversation, "请自动汇总文件目录与页数")) joined = "".join(frames) assert "workflow_started" in joined assert "\"workflow_type\": \"file_summary\"" in joined assert FileSummaryBatch.objects.filter(conversation=conversation).exists() def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") def fake_stream_reply(conversation, content): yield "普通回复" monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply) frames = list(stream_message(conversation, "你好")) joined = "".join(frames) assert "普通回复" in joined assert "workflow_started" not in joined def test_stream_message_meta_uses_first_prompt_title_for_new_conversation(monkeypatch, django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="新对话 01-01 10:00") def fake_stream_reply(conversation, content): yield "普通回复" monkeypatch.setattr("review_agent.services.stream_reply", fake_stream_reply) frames = list(stream_message(conversation, "这是第一条新对话消息")) assert '"title": "这是第一条新对话消息"' in frames[0] assert '"title": "这是第一条新对话消息"' in frames[-1] def test_stream_message_reads_active_attachment_when_requested(settings, tmp_path, django_user_model): settings.MEDIA_ROOT = tmp_path user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") attachment_path = tmp_path / "uploads" / "detail.txt" attachment_path.parent.mkdir(parents=True) attachment_path.write_text("合同编号:RA-2026\n结论:附件阅读成功", encoding="utf-8") FileAttachment.objects.create( conversation=conversation, user=user, original_name="detail.txt", storage_path="uploads/detail.txt", file_size=attachment_path.stat().st_size, ) frames = list(stream_message(conversation, "请阅读附件并给出详情")) joined = "".join(frames) assert "附件解析结果" in joined assert "detail.txt" in joined assert "RA-2026" in joined assert "workflow_started" not in joined def test_stream_message_falls_back_to_non_stream_reply_when_stream_breaks(monkeypatch, django_user_model): user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") def broken_stream_reply(conversation, content): yield "已生成部分内容" raise RuntimeError("provider connection reset") monkeypatch.setattr("review_agent.services.stream_reply", broken_stream_reply) monkeypatch.setattr("review_agent.services.generate_reply", lambda conversation, content: "非流式完整回复") frames = list(stream_message(conversation, "普通问题")) joined = "".join(frames) assert "已生成部分内容" in joined assert "replace" in joined assert "非流式完整回复" in joined assert "done" in joined assistant_message = Message.objects.get(conversation=conversation, role=Message.Role.ASSISTANT) assert assistant_message.content == "非流式完整回复" def test_stream_message_uses_llm_router_for_attachment_reader( monkeypatch, settings, tmp_path, django_user_model, ): settings.MEDIA_ROOT = tmp_path user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") attachment_path = tmp_path / "uploads" / "resume.txt" attachment_path.parent.mkdir(parents=True) attachment_path.write_text("项目经历:负责审核智能体附件解析模块。", encoding="utf-8") FileAttachment.objects.create( conversation=conversation, user=user, original_name="resume.txt", storage_path="uploads/resume.txt", file_size=attachment_path.stat().st_size, ) monkeypatch.setattr( "review_agent.services.route_message_intent", lambda conversation, content: SkillRoute( action="attachment_reader", skill_name="attachment_reader", confidence=0.91, reason="需要读取上传简历。", source="llm", ), ) frames = list(stream_message(conversation, "帮我整理其中的项目经历")) joined = "".join(frames) assert "附件解析结果" in joined assert "审核智能体附件解析模块" in joined assert "模型调用失败" not in joined def test_stream_message_uses_llm_router_for_file_summary(monkeypatch, settings, django_user_model): settings.FILE_SUMMARY_ASYNC = False user = django_user_model.objects.create_user(username="owner", password="pass") conversation = Conversation.objects.create(user=user, title="会话") FileAttachment.objects.create( conversation=conversation, user=user, original_name="a.docx", storage_path="x/a.docx", file_size=1, ) monkeypatch.setattr( "review_agent.services.route_message_intent", lambda conversation, content: SkillRoute( action="file_summary", workflow_type="file_summary", confidence=0.93, reason="需要执行文件目录与页数汇总。", source="llm", ), ) frames = list(stream_message(conversation, "处理一下这批资料")) joined = "".join(frames) assert "workflow_started" in joined assert "\"workflow_type\": \"file_summary\"" in joined assert FileSummaryBatch.objects.filter(conversation=conversation).exists()