60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
|
|
|
|
SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class PageCountResult:
|
|
status: str
|
|
page_count: int | None = None
|
|
error_message: str = ""
|
|
|
|
|
|
def count_document_pages(path: str | Path) -> PageCountResult:
|
|
file_path = Path(path)
|
|
ext = file_path.suffix.lower().lstrip(".")
|
|
if ext not in SUPPORTED_EXTENSIONS:
|
|
return PageCountResult(status="unsupported")
|
|
|
|
try:
|
|
if ext == "pdf":
|
|
from pypdf import PdfReader
|
|
|
|
return PageCountResult(status="success", page_count=len(PdfReader(str(file_path)).pages))
|
|
if ext == "docx":
|
|
from docx import Document
|
|
|
|
properties = Document(str(file_path)).core_properties
|
|
pages = getattr(properties, "pages", None)
|
|
if pages:
|
|
return PageCountResult(status="success", page_count=pages)
|
|
return PageCountResult(status="uncertain")
|
|
if ext == "xlsx":
|
|
from openpyxl import load_workbook
|
|
|
|
workbook = load_workbook(str(file_path), read_only=True, data_only=True)
|
|
return PageCountResult(status="success", page_count=len(workbook.sheetnames))
|
|
if ext == "xls":
|
|
import xlrd
|
|
|
|
workbook = xlrd.open_workbook(str(file_path), on_demand=True)
|
|
return PageCountResult(status="success", page_count=workbook.nsheets)
|
|
if ext == "pptx":
|
|
from pptx import Presentation
|
|
|
|
return PageCountResult(status="success", page_count=len(Presentation(str(file_path)).slides))
|
|
if ext in {"doc", "ppt"}:
|
|
import olefile
|
|
|
|
if olefile.isOleFile(str(file_path)):
|
|
return PageCountResult(status="uncertain")
|
|
return PageCountResult(status="failed", error_message="不是有效的 OLE 文件。")
|
|
except Exception as exc:
|
|
return PageCountResult(status="failed", error_message=str(exc))
|
|
|
|
return PageCountResult(status="uncertain")
|