Files
DEMO-AGENT/tests/test_attachment_reader.py

147 lines
5.6 KiB
Python

from pathlib import Path
import pytest
from django.conf import settings
from review_agent.models import Conversation, FileAttachment
pytestmark = pytest.mark.django_db
def test_read_attachment_extracts_text_file_details(settings, tmp_path, django_user_model):
from review_agent.file_summary.services.attachment_reader import read_attachment_details
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
relative_path = Path("uploads") / "note.txt"
absolute_path = tmp_path / relative_path
absolute_path.parent.mkdir(parents=True)
absolute_path.write_text("产品名称:智能审核\n关键结论:可以解析附件详情", encoding="utf-8")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="note.txt",
storage_path=relative_path.as_posix(),
file_size=absolute_path.stat().st_size,
content_type="text/plain",
)
result = read_attachment_details(attachment)
assert result.status == "success"
assert result.filename == "note.txt"
assert result.file_type == "txt"
assert "智能审核" in result.preview_text
assert result.sections[0]["type"] == "text"
def test_read_attachment_extracts_docx_and_xlsx_details(settings, tmp_path, django_user_model):
from docx import Document
from openpyxl import Workbook
from review_agent.file_summary.services.attachment_reader import read_attachment_details
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
docx_path = tmp_path / "uploads" / "summary.docx"
docx_path.parent.mkdir(parents=True)
doc = Document()
doc.add_heading("项目摘要", level=1)
doc.add_paragraph("这是 Word 附件里的正文。")
doc.save(docx_path)
docx_attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="summary.docx",
storage_path="uploads/summary.docx",
file_size=docx_path.stat().st_size,
)
workbook_path = tmp_path / "uploads" / "inventory.xlsx"
workbook = Workbook()
sheet = workbook.active
sheet.title = "清单"
sheet.append(["文件名", "页数"])
sheet.append(["a.pdf", 3])
workbook.save(workbook_path)
xlsx_attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="inventory.xlsx",
storage_path="uploads/inventory.xlsx",
file_size=workbook_path.stat().st_size,
)
docx_result = read_attachment_details(docx_attachment)
xlsx_result = read_attachment_details(xlsx_attachment)
assert docx_result.status == "success"
assert "项目摘要" in docx_result.preview_text
assert "Word 附件里的正文" in docx_result.preview_text
assert xlsx_result.status == "success"
assert xlsx_result.sections[0]["name"] == "清单"
assert xlsx_result.sections[0]["rows"][1] == ["a.pdf", "3"]
def test_attachment_reader_skill_returns_structured_details(settings, tmp_path, django_user_model):
from review_agent.file_summary.skills.attachment_reader import AttachmentReaderSkill
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
file_path = tmp_path / "uploads" / "readme.txt"
file_path.parent.mkdir(parents=True)
file_path.write_text("请读取这个附件。", encoding="utf-8")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="readme.txt",
storage_path="uploads/readme.txt",
file_size=file_path.stat().st_size,
)
result = AttachmentReaderSkill().run_for_attachments([attachment])
assert result.success is True
assert result.data["attachments"][0]["filename"] == "readme.txt"
assert "请读取这个附件" in result.data["attachments"][0]["preview_text"]
def test_read_attachment_extracts_files_inside_rar(monkeypatch, settings, tmp_path, django_user_model):
from review_agent.file_summary.services.attachment_reader import read_attachment_details
settings.MEDIA_ROOT = tmp_path
user = django_user_model.objects.create_user(username="owner", password="pass")
conversation = Conversation.objects.create(user=user, title="会话")
archive_path = tmp_path / "uploads" / "第1章_监管信息.rar"
archive_path.parent.mkdir(parents=True)
archive_path.write_bytes(b"rar")
attachment = FileAttachment.objects.create(
conversation=conversation,
user=user,
original_name="第1章_监管信息.rar",
storage_path="uploads/第1章_监管信息.rar",
file_size=archive_path.stat().st_size,
)
def fake_extract_archive(path: Path, target_dir: Path):
extracted = target_dir / "说明书.txt"
extracted.write_text("产品名称:甲胎蛋白检测试剂盒", encoding="utf-8")
return [extracted]
monkeypatch.setattr(
"review_agent.file_summary.services.attachment_reader.extract_archive",
fake_extract_archive,
)
result = read_attachment_details(attachment)
assert result.status == "success"
assert result.file_type == "rar"
assert "说明书.txt" in result.sections[0]["name"]
assert "甲胎蛋白检测试剂盒" in result.preview_text