from __future__ import annotations from dataclasses import dataclass from pathlib import Path SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"} @dataclass(frozen=True) class PageCountResult: status: str page_count: int | None = None error_message: str = "" def count_document_pages(path: str | Path) -> PageCountResult: file_path = Path(path) ext = file_path.suffix.lower().lstrip(".") if ext not in SUPPORTED_EXTENSIONS: return PageCountResult(status="unsupported") try: if ext == "pdf": from pypdf import PdfReader return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages)) if ext == "docx": from docx import Document properties = Document(str(file_path)).core_properties pages = getattr(properties, "pages", None) if pages: return PageCountResult(status="success", page_count=pages) return PageCountResult(status="uncertain") if ext == "xlsx": from openpyxl import load_workbook workbook = load_workbook(str(file_path), read_only=True, data_only=True) return PageCountResult(status="success", page_count=len(workbook.sheetnames)) if ext == "xls": import xlrd workbook = xlrd.open_workbook(str(file_path), on_demand=True) return PageCountResult(status="success", page_count=workbook.nsheets) if ext == "pptx": from pptx import Presentation return PageCountResult(status="success", page_count=len(Presentation(str(file_path)).slides)) if ext in {"doc", "ppt"}: import olefile if olefile.isOleFile(str(file_path)): return PageCountResult(status="uncertain") return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。") except Exception as exc: return PageCountResult(status="failed", error_message=str(exc)) return PageCountResult(status="uncertain")