from __future__ import annotations import logging from pathlib import Path import re from review_agent.models import FileSummaryBatchAttachment from ..paths import resolve_storage_path from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive from .base import BaseSkill, SkillResult, WorkflowContext logger = logging.getLogger("review_agent.file_summary.skills.archive_extract") def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str: stem = Path(binding.attachment.original_name).stem or "archive" safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive" return f"{binding.attachment_id}_{safe_stem}" class ArchiveExtractSkill(BaseSkill): name = "archive_extract" def run(self, context: WorkflowContext) -> SkillResult: extracted_count = 0 if not context.batch.work_dir: message = "批次工作目录为空,无法解压压缩包。" logger.error( "Archive extract failed without work dir", extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no}, ) return SkillResult(success=False, message=message, data={"extracted_count": 0}) target_root = Path(context.batch.work_dir) archive_count = 0 for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch): path = resolve_storage_path(binding.attachment.storage_path) if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS: continue archive_count += 1 target_dir = target_root / "extracted" / _safe_archive_dir_name(binding) logger.info( "Archive extract started", extra={ "batch_id": context.batch.pk, "attachment_id": binding.attachment_id, "path": str(path), "target_dir": str(target_dir), }, ) extracted_count += len(extract_archive(path, target_dir)) if archive_count and extracted_count == 0: message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。" logger.warning( "Archive extract produced no files", extra={"batch_id": context.batch.pk, "archive_count": archive_count}, ) return SkillResult(success=False, message=message, data={"extracted_count": 0}) logger.info( "Archive extract finished", extra={ "batch_id": context.batch.pk, "archive_count": archive_count, "extracted_count": extracted_count, }, ) return SkillResult(success=True, data={"extracted_count": extracted_count})