feat(attachments): 增加附件阅读解析能力
This commit is contained in:
111
tests/test_attachment_reader.py
Normal file
111
tests/test_attachment_reader.py
Normal file
@@ -0,0 +1,111 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from django.conf import settings
|
||||
|
||||
from review_agent.models import Conversation, FileAttachment
|
||||
|
||||
|
||||
pytestmark = pytest.mark.django_db
|
||||
|
||||
|
||||
def test_read_attachment_extracts_text_file_details(settings, tmp_path, django_user_model):
|
||||
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
relative_path = Path("uploads") / "note.txt"
|
||||
absolute_path = tmp_path / relative_path
|
||||
absolute_path.parent.mkdir(parents=True)
|
||||
absolute_path.write_text("产品名称:智能审核\n关键结论:可以解析附件详情", encoding="utf-8")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="note.txt",
|
||||
storage_path=relative_path.as_posix(),
|
||||
file_size=absolute_path.stat().st_size,
|
||||
content_type="text/plain",
|
||||
)
|
||||
|
||||
result = read_attachment_details(attachment)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.filename == "note.txt"
|
||||
assert result.file_type == "txt"
|
||||
assert "智能审核" in result.preview_text
|
||||
assert result.sections[0]["type"] == "text"
|
||||
|
||||
|
||||
def test_read_attachment_extracts_docx_and_xlsx_details(settings, tmp_path, django_user_model):
|
||||
from docx import Document
|
||||
from openpyxl import Workbook
|
||||
|
||||
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
|
||||
docx_path = tmp_path / "uploads" / "summary.docx"
|
||||
docx_path.parent.mkdir(parents=True)
|
||||
doc = Document()
|
||||
doc.add_heading("项目摘要", level=1)
|
||||
doc.add_paragraph("这是 Word 附件里的正文。")
|
||||
doc.save(docx_path)
|
||||
docx_attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="summary.docx",
|
||||
storage_path="uploads/summary.docx",
|
||||
file_size=docx_path.stat().st_size,
|
||||
)
|
||||
|
||||
workbook_path = tmp_path / "uploads" / "inventory.xlsx"
|
||||
workbook = Workbook()
|
||||
sheet = workbook.active
|
||||
sheet.title = "清单"
|
||||
sheet.append(["文件名", "页数"])
|
||||
sheet.append(["a.pdf", 3])
|
||||
workbook.save(workbook_path)
|
||||
xlsx_attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="inventory.xlsx",
|
||||
storage_path="uploads/inventory.xlsx",
|
||||
file_size=workbook_path.stat().st_size,
|
||||
)
|
||||
|
||||
docx_result = read_attachment_details(docx_attachment)
|
||||
xlsx_result = read_attachment_details(xlsx_attachment)
|
||||
|
||||
assert docx_result.status == "success"
|
||||
assert "项目摘要" in docx_result.preview_text
|
||||
assert "Word 附件里的正文" in docx_result.preview_text
|
||||
assert xlsx_result.status == "success"
|
||||
assert xlsx_result.sections[0]["name"] == "清单"
|
||||
assert xlsx_result.sections[0]["rows"][1] == ["a.pdf", "3"]
|
||||
|
||||
|
||||
def test_attachment_reader_skill_returns_structured_details(settings, tmp_path, django_user_model):
|
||||
from review_agent.file_summary.skills.attachment_reader import AttachmentReaderSkill
|
||||
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
file_path = tmp_path / "uploads" / "readme.txt"
|
||||
file_path.parent.mkdir(parents=True)
|
||||
file_path.write_text("请读取这个附件。", encoding="utf-8")
|
||||
attachment = FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="readme.txt",
|
||||
storage_path="uploads/readme.txt",
|
||||
file_size=file_path.stat().st_size,
|
||||
)
|
||||
|
||||
result = AttachmentReaderSkill().run_for_attachments([attachment])
|
||||
|
||||
assert result.success is True
|
||||
assert result.data["attachments"][0]["filename"] == "readme.txt"
|
||||
assert "请读取这个附件" in result.data["attachments"][0]["preview_text"]
|
||||
@@ -100,3 +100,27 @@ def test_stream_message_uses_normal_llm_path_when_not_triggered(monkeypatch, dja
|
||||
joined = "".join(frames)
|
||||
assert "普通回复" in joined
|
||||
assert "workflow_started" not in joined
|
||||
|
||||
|
||||
def test_stream_message_reads_active_attachment_when_requested(settings, tmp_path, django_user_model):
|
||||
settings.MEDIA_ROOT = tmp_path
|
||||
user = django_user_model.objects.create_user(username="owner", password="pass")
|
||||
conversation = Conversation.objects.create(user=user, title="会话")
|
||||
attachment_path = tmp_path / "uploads" / "detail.txt"
|
||||
attachment_path.parent.mkdir(parents=True)
|
||||
attachment_path.write_text("合同编号:RA-2026\n结论:附件阅读成功", encoding="utf-8")
|
||||
FileAttachment.objects.create(
|
||||
conversation=conversation,
|
||||
user=user,
|
||||
original_name="detail.txt",
|
||||
storage_path="uploads/detail.txt",
|
||||
file_size=attachment_path.stat().st_size,
|
||||
)
|
||||
|
||||
frames = list(stream_message(conversation, "请阅读附件并给出详情"))
|
||||
|
||||
joined = "".join(frames)
|
||||
assert "附件解析结果" in joined
|
||||
assert "detail.txt" in joined
|
||||
assert "RA-2026" in joined
|
||||
assert "workflow_started" not in joined
|
||||
|
||||
Reference in New Issue
Block a user