from __future__ import annotations import logging from pathlib import Path from review_agent.models import FileSummaryBatchAttachment from ..paths import resolve_storage_path from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive from .base import BaseSkill, SkillResult, WorkflowContext logger = logging.getLogger("review_agent.file_summary.skills.archive_extract") class ArchiveExtractSkill(BaseSkill): name = "archive_extract" def run(self, context: WorkflowContext) -> SkillResult: extracted_count = 0 target_dir = Path(context.batch.work_dir or "") if not target_dir: logger.info( "Archive extract skipped without work dir", extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no}, ) return SkillResult(success=True, data={"extracted_count": 0}) for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch): path = resolve_storage_path(binding.attachment.storage_path) if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS: continue logger.info( "Archive extract started", extra={ "batch_id": context.batch.pk, "attachment_id": binding.attachment_id, "path": str(path), }, ) extracted_count += len(extract_archive(path, target_dir)) logger.info( "Archive extract finished", extra={"batch_id": context.batch.pk, "extracted_count": extracted_count}, ) return SkillResult(success=True, data={"extracted_count": extracted_count})