DEMO-AGENT/tests/test_attachment_reader.py

from pathlib import Path

import pytest
from django.conf import settings

from review_agent.models import Conversation, FileAttachment


pytestmark = pytest.mark.django_db


def test_read_attachment_extracts_text_file_details(settings, tmp_path, django_user_model):
    from review_agent.file_summary.services.attachment_reader import read_attachment_details

    settings.MEDIA_ROOT = tmp_path
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    relative_path = Path("uploads") / "note.txt"
    absolute_path = tmp_path / relative_path
    absolute_path.parent.mkdir(parents=True)
    absolute_path.write_text("产品名称：智能审核\n关键结论：可以解析附件详情", encoding="utf-8")
    attachment = FileAttachment.objects.create(
        conversation=conversation,
        user=user,
        original_name="note.txt",
        storage_path=relative_path.as_posix(),
        file_size=absolute_path.stat().st_size,
        content_type="text/plain",
    )

    result = read_attachment_details(attachment)

    assert result.status == "success"
    assert result.filename == "note.txt"
    assert result.file_type == "txt"
    assert "智能审核" in result.preview_text
    assert result.sections[0]["type"] == "text"


def test_read_attachment_extracts_docx_and_xlsx_details(settings, tmp_path, django_user_model):
    from docx import Document
    from openpyxl import Workbook

    from review_agent.file_summary.services.attachment_reader import read_attachment_details

    settings.MEDIA_ROOT = tmp_path
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")

    docx_path = tmp_path / "uploads" / "summary.docx"
    docx_path.parent.mkdir(parents=True)
    doc = Document()
    doc.add_heading("项目摘要", level=1)
    doc.add_paragraph("这是 Word 附件里的正文。")
    doc.save(docx_path)
    docx_attachment = FileAttachment.objects.create(
        conversation=conversation,
        user=user,
        original_name="summary.docx",
        storage_path="uploads/summary.docx",
        file_size=docx_path.stat().st_size,
    )

    workbook_path = tmp_path / "uploads" / "inventory.xlsx"
    workbook = Workbook()
    sheet = workbook.active
    sheet.title = "清单"
    sheet.append(["文件名", "页数"])
    sheet.append(["a.pdf", 3])
    workbook.save(workbook_path)
    xlsx_attachment = FileAttachment.objects.create(
        conversation=conversation,
        user=user,
        original_name="inventory.xlsx",
        storage_path="uploads/inventory.xlsx",
        file_size=workbook_path.stat().st_size,
    )

    docx_result = read_attachment_details(docx_attachment)
    xlsx_result = read_attachment_details(xlsx_attachment)

    assert docx_result.status == "success"
    assert "项目摘要" in docx_result.preview_text
    assert "Word 附件里的正文" in docx_result.preview_text
    assert xlsx_result.status == "success"
    assert xlsx_result.sections[0]["name"] == "清单"
    assert xlsx_result.sections[0]["rows"][1] == ["a.pdf", "3"]


def test_attachment_reader_skill_returns_structured_details(settings, tmp_path, django_user_model):
    from review_agent.file_summary.skills.attachment_reader import AttachmentReaderSkill

    settings.MEDIA_ROOT = tmp_path
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    file_path = tmp_path / "uploads" / "readme.txt"
    file_path.parent.mkdir(parents=True)
    file_path.write_text("请读取这个附件。", encoding="utf-8")
    attachment = FileAttachment.objects.create(
        conversation=conversation,
        user=user,
        original_name="readme.txt",
        storage_path="uploads/readme.txt",
        file_size=file_path.stat().st_size,
    )

    result = AttachmentReaderSkill().run_for_attachments([attachment])

    assert result.success is True
    assert result.data["attachments"][0]["filename"] == "readme.txt"
    assert "请读取这个附件" in result.data["attachments"][0]["preview_text"]


def test_read_attachment_extracts_files_inside_rar(monkeypatch, settings, tmp_path, django_user_model):
    from review_agent.file_summary.services.attachment_reader import read_attachment_details

    settings.MEDIA_ROOT = tmp_path
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    archive_path = tmp_path / "uploads" / "第1章_监管信息.rar"
    archive_path.parent.mkdir(parents=True)
    archive_path.write_bytes(b"rar")
    attachment = FileAttachment.objects.create(
        conversation=conversation,
        user=user,
        original_name="第1章_监管信息.rar",
        storage_path="uploads/第1章_监管信息.rar",
        file_size=archive_path.stat().st_size,
    )

    def fake_extract_archive(path: Path, target_dir: Path):
        extracted = target_dir / "说明书.txt"
        extracted.write_text("产品名称：甲胎蛋白检测试剂盒", encoding="utf-8")
        return [extracted]

    monkeypatch.setattr(
        "review_agent.file_summary.services.attachment_reader.extract_archive",
        fake_extract_archive,
    )

    result = read_attachment_details(attachment)

    assert result.status == "success"
    assert result.file_type == "rar"
    assert "说明书.txt" in result.sections[0]["name"]
    assert "甲胎蛋白检测试剂盒" in result.preview_text