fix(file-summary): 补强 Office 页数统计
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
import pytest
|
||||
import shutil
|
||||
from zipfile import ZipFile
|
||||
from docx import Document
|
||||
from openpyxl import Workbook
|
||||
from pptx import Presentation
|
||||
@@ -31,6 +33,89 @@ def test_count_document_pages_for_office_formats(tmp_path):
|
||||
assert count_document_pages(pptx_path).page_count == 1
|
||||
|
||||
|
||||
def test_count_docx_pages_from_extended_properties(tmp_path):
|
||||
docx_path = tmp_path / "with-pages.docx"
|
||||
Document().save(docx_path)
|
||||
app_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">'
|
||||
"<Pages>7</Pages>"
|
||||
"</Properties>"
|
||||
)
|
||||
rewritten = tmp_path / "rewritten.docx"
|
||||
with ZipFile(docx_path) as source, ZipFile(rewritten, "w") as target:
|
||||
for entry in source.infolist():
|
||||
if entry.filename != "docProps/app.xml":
|
||||
target.writestr(entry, source.read(entry.filename))
|
||||
target.writestr("docProps/app.xml", app_xml)
|
||||
shutil.move(rewritten, docx_path)
|
||||
|
||||
result = count_document_pages(docx_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 7
|
||||
|
||||
|
||||
def test_count_docx_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
||||
docx_path = tmp_path / "without-pages.docx"
|
||||
Document().save(docx_path)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_docx_pages_from_extended_properties",
|
||||
lambda path: None,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
||||
lambda path: 22,
|
||||
)
|
||||
|
||||
result = count_document_pages(docx_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 22
|
||||
|
||||
|
||||
def test_count_doc_pages_uses_word_com_fallback(monkeypatch, tmp_path):
|
||||
doc_path = tmp_path / "legacy.doc"
|
||||
doc_path.write_bytes(b"legacy-doc-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_word_pages_with_com",
|
||||
lambda path: 5,
|
||||
)
|
||||
|
||||
result = count_document_pages(doc_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 5
|
||||
|
||||
|
||||
def test_count_ppt_pages_uses_powerpoint_com_fallback(monkeypatch, tmp_path):
|
||||
ppt_path = tmp_path / "legacy.ppt"
|
||||
ppt_path.write_bytes(b"legacy-ppt-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_powerpoint_slides_with_com",
|
||||
lambda path: 9,
|
||||
)
|
||||
|
||||
result = count_document_pages(ppt_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 9
|
||||
|
||||
|
||||
def test_count_excel_pages_uses_excel_com_fallback(monkeypatch, tmp_path):
|
||||
xls_path = tmp_path / "legacy.xls"
|
||||
xls_path.write_bytes(b"legacy-xls-placeholder")
|
||||
monkeypatch.setattr(
|
||||
"review_agent.file_summary.services.page_count._count_excel_sheets_with_com",
|
||||
lambda path: 3,
|
||||
)
|
||||
|
||||
result = count_document_pages(xls_path)
|
||||
|
||||
assert result.status == "success"
|
||||
assert result.page_count == 3
|
||||
|
||||
|
||||
def test_document_page_count_skill_marks_unsupported_and_success(tmp_path, django_user_model):
|
||||
xlsx_path = tmp_path / "a.xlsx"
|
||||
workbook = Workbook()
|
||||
|
||||
Reference in New Issue
Block a user