109 lines
4.0 KiB
Python
109 lines
4.0 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
|
|
from review_agent.models import FileSummaryItem
|
|
|
|
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
|
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
|
|
|
|
|
logger = logging.getLogger("review_agent.file_summary.skills.document_page_count")
|
|
|
|
|
|
class DocumentPageCountSkill(BaseSkill):
|
|
name = "document_page_count"
|
|
|
|
def run(self, context: WorkflowContext) -> SkillResult:
|
|
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
|
|
logger.info("Document page count started", extra={"batch_id": context.batch.pk})
|
|
for item in context.batch.items.order_by("file_index"):
|
|
if item.file_type not in SUPPORTED_EXTENSIONS:
|
|
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
|
unsupported_files += 1
|
|
item.save(update_fields=["statistics_status", "updated_at"])
|
|
logger.info(
|
|
"Document page count unsupported",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"item_id": item.pk,
|
|
"file_type": item.file_type,
|
|
"file_name": item.file_name,
|
|
},
|
|
)
|
|
continue
|
|
|
|
result = None
|
|
for attempt in range(1, 4):
|
|
logger.info(
|
|
"Document page count attempt",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"item_id": item.pk,
|
|
"attempt": attempt,
|
|
"storage_path": item.storage_path,
|
|
},
|
|
)
|
|
result = count_document_pages(item.storage_path)
|
|
item.retry_count = attempt - 1
|
|
if result.status != "failed":
|
|
break
|
|
item.statistics_status = result.status
|
|
item.page_count = result.page_count
|
|
item.error_message = result.error_message
|
|
item.save(
|
|
update_fields=[
|
|
"statistics_status",
|
|
"page_count",
|
|
"retry_count",
|
|
"error_message",
|
|
"updated_at",
|
|
]
|
|
)
|
|
|
|
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
|
|
success_files += 1
|
|
total_pages += result.page_count or 0
|
|
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
|
|
uncertain_files += 1
|
|
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
|
|
unsupported_files += 1
|
|
else:
|
|
failed_files += 1
|
|
logger.warning(
|
|
"Document page count failed",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"item_id": item.pk,
|
|
"file_name": item.file_name,
|
|
"error": result.error_message,
|
|
},
|
|
)
|
|
|
|
context.batch.success_files = success_files
|
|
context.batch.failed_files = failed_files
|
|
context.batch.unsupported_files = unsupported_files
|
|
context.batch.uncertain_files = uncertain_files
|
|
context.batch.total_pages = total_pages
|
|
context.batch.save(
|
|
update_fields=[
|
|
"success_files",
|
|
"failed_files",
|
|
"unsupported_files",
|
|
"uncertain_files",
|
|
"total_pages",
|
|
]
|
|
)
|
|
logger.info(
|
|
"Document page count finished",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"success_files": success_files,
|
|
"failed_files": failed_files,
|
|
"unsupported_files": unsupported_files,
|
|
"uncertain_files": uncertain_files,
|
|
"total_pages": total_pages,
|
|
},
|
|
)
|
|
return SkillResult(success=True)
|