181 lines
5.9 KiB
Python
181 lines
5.9 KiB
Python
import pytest
|
|
import shutil
|
|
from zipfile import ZipFile
|
|
from docx import Document
|
|
from openpyxl import Workbook
|
|
from pptx import Presentation
|
|
|
|
from review_agent.file_summary.services.page_count import count_document_pages
|
|
from review_agent.file_summary.skills.document_page_count import DocumentPageCountSkill
|
|
from review_agent.file_summary.skills.base import WorkflowContext
|
|
from review_agent.models import Conversation, FileSummaryBatch, FileSummaryItem
|
|
|
|
|
|
pytestmark = pytest.mark.django_db
|
|
|
|
|
|
def test_count_document_pages_for_office_formats(tmp_path):
|
|
docx_path = tmp_path / "a.docx"
|
|
Document().save(docx_path)
|
|
|
|
xlsx_path = tmp_path / "a.xlsx"
|
|
workbook = Workbook()
|
|
workbook.create_sheet("第二页")
|
|
workbook.save(xlsx_path)
|
|
|
|
pptx_path = tmp_path / "a.pptx"
|
|
presentation = Presentation()
|
|
presentation.slides.add_slide(presentation.slide_layouts[6])
|
|
presentation.save(pptx_path)
|
|
|
|
assert count_document_pages(docx_path).status in {"success", "uncertain"}
|
|
assert count_document_pages(xlsx_path).page_count == 2
|
|
assert count_document_pages(pptx_path).page_count == 1
|
|
|
|
|
|
def test_count_docx_pages_from_extended_properties(tmp_path):
|
|
docx_path = tmp_path / "with-pages.docx"
|
|
Document().save(docx_path)
|
|
app_xml = (
|
|
'<?xml version="1.0" encoding="UTF-8"?>'
|
|
'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">'
|
|
"<Pages>7</Pages>"
|
|
"</Properties>"
|
|
)
|
|
rewritten = tmp_path / "rewritten.docx"
|
|
with ZipFile(docx_path) as source, ZipFile(rewritten, "w") as target:
|
|
for entry in source.infolist():
|
|
if entry.filename != "docProps/app.xml":
|
|
target.writestr(entry, source.read(entry.filename))
|
|
target.writestr("docProps/app.xml", app_xml)
|
|
shutil.move(rewritten, docx_path)
|
|
|
|
result = count_document_pages(docx_path)
|
|
|
|
assert result.status == "success"
|
|
assert result.page_count == 7
|
|
|
|
|
|
def test_count_docx_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
|
docx_path = tmp_path / "without-pages.docx"
|
|
Document().save(docx_path)
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_docx_pages_from_extended_properties",
|
|
lambda path: None,
|
|
)
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
|
lambda path: 22,
|
|
)
|
|
|
|
result = count_document_pages(docx_path)
|
|
|
|
assert result.status == "success"
|
|
assert result.page_count == 22
|
|
|
|
|
|
def test_count_doc_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
|
doc_path = tmp_path / "legacy.doc"
|
|
doc_path.write_bytes(b"legacy-doc-placeholder")
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
|
lambda path, ext: True,
|
|
)
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
|
lambda path: 5,
|
|
)
|
|
|
|
result = count_document_pages(doc_path)
|
|
|
|
assert result.status == "success"
|
|
assert result.page_count == 5
|
|
|
|
|
|
def test_count_ppt_pages_uses_powerpoint_com_fallback(monkeypatch, tmp_path):
|
|
ppt_path = tmp_path / "legacy.ppt"
|
|
ppt_path.write_bytes(b"legacy-ppt-placeholder")
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
|
lambda path, ext: True,
|
|
)
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_powerpoint_slides_with_com",
|
|
lambda path: 9,
|
|
)
|
|
|
|
result = count_document_pages(ppt_path)
|
|
|
|
assert result.status == "success"
|
|
assert result.page_count == 9
|
|
|
|
|
|
def test_count_excel_pages_uses_excel_com_fallback(monkeypatch, tmp_path):
|
|
xls_path = tmp_path / "legacy.xls"
|
|
xls_path.write_bytes(b"legacy-xls-placeholder")
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._can_try_com_fallback",
|
|
lambda path, ext: True,
|
|
)
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_excel_sheets_with_com",
|
|
lambda path: 3,
|
|
)
|
|
|
|
result = count_document_pages(xls_path)
|
|
|
|
assert result.status == "success"
|
|
assert result.page_count == 3
|
|
|
|
|
|
def test_invalid_xlsx_does_not_start_excel_com(monkeypatch, tmp_path):
|
|
xlsx_path = tmp_path / "broken.xlsx"
|
|
xlsx_path.write_bytes(b"not a real workbook")
|
|
|
|
def fail_if_called(path):
|
|
raise AssertionError("Excel COM should not start for invalid xlsx signatures")
|
|
|
|
monkeypatch.setattr(
|
|
"review_agent.file_summary.services.page_count._count_excel_sheets_with_com",
|
|
fail_if_called,
|
|
)
|
|
|
|
result = count_document_pages(xlsx_path)
|
|
|
|
assert result.status == "uncertain"
|
|
|
|
|
|
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
|
|
xlsx_path = tmp_path / "a.xlsx"
|
|
workbook = Workbook()
|
|
workbook.save(xlsx_path)
|
|
txt_path = tmp_path / "a.txt"
|
|
txt_path.write_text("x", encoding="utf-8")
|
|
user = django_user_model.objects.create_user(username="owner", password="pass")
|
|
conversation = Conversation.objects.create(user=user, title="会话")
|
|
batch = FileSummaryBatch.objects.create(conversation=conversation, user=user, batch_no="FS-P")
|
|
xlsx_item = FileSummaryItem.objects.create(
|
|
batch=batch,
|
|
file_index=1,
|
|
file_name="a.xlsx",
|
|
file_type="xlsx",
|
|
relative_path="a.xlsx",
|
|
storage_path=str(xlsx_path),
|
|
)
|
|
txt_item = FileSummaryItem.objects.create(
|
|
batch=batch,
|
|
file_index=2,
|
|
file_name="a.txt",
|
|
file_type="txt",
|
|
relative_path="a.txt",
|
|
storage_path=str(txt_path),
|
|
)
|
|
|
|
result = DocumentPageCountSkill().run(WorkflowContext(batch=batch))
|
|
|
|
xlsx_item.refresh_from_db()
|
|
txt_item.refresh_from_db()
|
|
assert result.success is True
|
|
assert xlsx_item.statistics_status == FileSummaryItem.StatisticsStatus.SUCCESS
|
|
assert txt_item.statistics_status == FileSummaryItem.StatisticsStatus.UNSUPPORTED
|