147 lines
5.6 KiB
Python
147 lines
5.6 KiB
Python
from pathlib import Path
|
|
|
|
import pytest
|
|
from django.conf import settings
|
|
|
|
from review_agent.models import Conversation, FileAttachment
|
|
|
|
|
|
pytestmark = pytest.mark.django_db
|
|
|
|
|
|
def test_read_attachment_extracts_text_file_details(settings, tmp_path, django_user_model):
|
|
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
|
|
|
settings.MEDIA_ROOT = tmp_path
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
conversation = Conversation.objects.create(user=user, title="会话")
|
|
relative_path = Path("uploads") / "note.txt"
|
|
absolute_path = tmp_path / relative_path
|
|
absolute_path.parent.mkdir(parents=True)
|
|
absolute_path.write_text("产品名称:智能审核\n关键结论:可以解析附件详情", encoding="utf-8")
|
|
attachment = FileAttachment.objects.create(
|
|
conversation=conversation,
|
|
user=user,
|
|
original_name="note.txt",
|
|
storage_path=relative_path.as_posix(),
|
|
file_size=absolute_path.stat().st_size,
|
|
content_type="text/plain",
|
|
)
|
|
|
|
result = read_attachment_details(attachment)
|
|
|
|
assert result.status == "success"
|
|
assert result.filename == "note.txt"
|
|
assert result.file_type == "txt"
|
|
assert "智能审核" in result.preview_text
|
|
assert result.sections[0]["type"] == "text"
|
|
|
|
|
|
def test_read_attachment_extracts_docx_and_xlsx_details(settings, tmp_path, django_user_model):
|
|
from docx import Document
|
|
from openpyxl import Workbook
|
|
|
|
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
|
|
|
settings.MEDIA_ROOT = tmp_path
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
conversation = Conversation.objects.create(user=user, title="会话")
|
|
|
|
docx_path = tmp_path / "uploads" / "summary.docx"
|
|
docx_path.parent.mkdir(parents=True)
|
|
doc = Document()
|
|
doc.add_heading("项目摘要", level=1)
|
|
doc.add_paragraph("这是 Word 附件里的正文。")
|
|
doc.save(docx_path)
|
|
docx_attachment = FileAttachment.objects.create(
|
|
conversation=conversation,
|
|
user=user,
|
|
original_name="summary.docx",
|
|
storage_path="uploads/summary.docx",
|
|
file_size=docx_path.stat().st_size,
|
|
)
|
|
|
|
workbook_path = tmp_path / "uploads" / "inventory.xlsx"
|
|
workbook = Workbook()
|
|
sheet = workbook.active
|
|
sheet.title = "清单"
|
|
sheet.append(["文件名", "页数"])
|
|
sheet.append(["a.pdf", 3])
|
|
workbook.save(workbook_path)
|
|
xlsx_attachment = FileAttachment.objects.create(
|
|
conversation=conversation,
|
|
user=user,
|
|
original_name="inventory.xlsx",
|
|
storage_path="uploads/inventory.xlsx",
|
|
file_size=workbook_path.stat().st_size,
|
|
)
|
|
|
|
docx_result = read_attachment_details(docx_attachment)
|
|
xlsx_result = read_attachment_details(xlsx_attachment)
|
|
|
|
assert docx_result.status == "success"
|
|
assert "项目摘要" in docx_result.preview_text
|
|
assert "Word 附件里的正文" in docx_result.preview_text
|
|
assert xlsx_result.status == "success"
|
|
assert xlsx_result.sections[0]["name"] == "清单"
|
|
assert xlsx_result.sections[0]["rows"][1] == ["a.pdf", "3"]
|
|
|
|
|
|
def test_attachment_reader_skill_returns_structured_details(settings, tmp_path, django_user_model):
|
|
from review_agent.file_summary.skills.attachment_reader import AttachmentReaderSkill
|
|
|
|
settings.MEDIA_ROOT = tmp_path
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
conversation = Conversation.objects.create(user=user, title="会话")
|
|
file_path = tmp_path / "uploads" / "readme.txt"
|
|
file_path.parent.mkdir(parents=True)
|
|
file_path.write_text("请读取这个附件。", encoding="utf-8")
|
|
attachment = FileAttachment.objects.create(
|
|
conversation=conversation,
|
|
user=user,
|
|
original_name="readme.txt",
|
|
storage_path="uploads/readme.txt",
|
|
file_size=file_path.stat().st_size,
|
|
)
|
|
|
|
result = AttachmentReaderSkill().run_for_attachments([attachment])
|
|
|
|
assert result.success is True
|
|
assert result.data["attachments"][0]["filename"] == "readme.txt"
|
|
assert "请读取这个附件" in result.data["attachments"][0]["preview_text"]
|
|
|
|
|
|
def test_read_attachment_extracts_files_inside_rar(monkeypatch, settings, tmp_path, django_user_model):
|
|
from review_agent.file_summary.services.attachment_reader import read_attachment_details
|
|
|
|
settings.MEDIA_ROOT = tmp_path
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
conversation = Conversation.objects.create(user=user, title="会话")
|
|
archive_path = tmp_path / "uploads" / "第1章_监管信息.rar"
|
|
archive_path.parent.mkdir(parents=True)
|
|
archive_path.write_bytes(b"rar")
|
|
attachment = FileAttachment.objects.create(
|
|
conversation=conversation,
|
|
user=user,
|
|
original_name="第1章_监管信息.rar",
|
|
storage_path="uploads/第1章_监管信息.rar",
|
|
file_size=archive_path.stat().st_size,
|
|
)
|
|
|
|
def fake_extract_archive(path: Path, target_dir: Path):
|
|
extracted = target_dir / "说明书.txt"
|
|
extracted.write_text("产品名称:甲胎蛋白检测试剂盒", encoding="utf-8")
|
|
return [extracted]
|
|
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.attachment_reader.extract_archive",
|
|
fake_extract_archive,
|
|
)
|
|
|
|
result = read_attachment_details(attachment)
|
|
|
|
assert result.status == "success"
|
|
assert result.file_type == "rar"
|
|
assert "说明书.txt" in result.sections[0]["name"]
|
|
assert "甲胎蛋白检测试剂盒" in result.preview_text
|