Files
DEMO-AGENT/review_agent/file_summary/skills/document_page_count.py

109 lines
4.0 KiB
Python

from __future__ import annotations
import logging
from review_agent.models import FileSummaryItem
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
from .base import BaseSkill, SkillResult, WorkflowContext
logger = logging.getLogger("review_agent.file_summary.skills.document_page_count")
class DocumentPageCountSkill(BaseSkill):
name = "document_page_count"
def run(self, context: WorkflowContext) -> SkillResult:
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
logger.info("Document page count started", extra={"batch_id": context.batch.pk})
for item in context.batch.items.order_by("file_index"):
if item.file_type not in SUPPORTED_EXTENSIONS:
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
unsupported_files += 1
item.save(update_fields=["statistics_status", "updated_at"])
logger.info(
"Document page count unsupported",
extra={
"batch_id": context.batch.pk,
"item_id": item.pk,
"file_type": item.file_type,
"file_name": item.file_name,
},
)
continue
result = None
for attempt in range(1, 4):
logger.info(
"Document page count attempt",
extra={
"batch_id": context.batch.pk,
"item_id": item.pk,
"attempt": attempt,
"storage_path": item.storage_path,
},
)
result = count_document_pages(item.storage_path)
item.retry_count = attempt - 1
if result.status != "failed":
break
item.statistics_status = result.status
item.page_count = result.page_count
item.error_message = result.error_message
item.save(
update_fields=[
"statistics_status",
"page_count",
"retry_count",
"error_message",
"updated_at",
]
)
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
success_files += 1
total_pages += result.page_count or 0
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
uncertain_files += 1
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
unsupported_files += 1
else:
failed_files += 1
logger.warning(
"Document page count failed",
extra={
"batch_id": context.batch.pk,
"item_id": item.pk,
"file_name": item.file_name,
"error": result.error_message,
},
)
context.batch.success_files = success_files
context.batch.failed_files = failed_files
context.batch.unsupported_files = unsupported_files
context.batch.uncertain_files = uncertain_files
context.batch.total_pages = total_pages
context.batch.save(
update_fields=[
"success_files",
"failed_files",
"unsupported_files",
"uncertain_files",
"total_pages",
]
)
logger.info(
"Document page count finished",
extra={
"batch_id": context.batch.pk,
"success_files": success_files,
"failed_files": failed_files,
"unsupported_files": unsupported_files,
"uncertain_files": uncertain_files,
"total_pages": total_pages,
},
)
return SkillResult(success=True)