Files
DEMO-AGENT/review_agent/file_summary/skills/archive_extract.py

70 lines
2.8 KiB
Python

from __future__ import annotations
import logging
from pathlib import Path
import re
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
from .base import BaseSkill, SkillResult, WorkflowContext
logger = logging.getLogger("review_agent.file_summary.skills.archive_extract")
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
stem = Path(binding.attachment.original_name).stem or "archive"
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
return f"{binding.attachment_id}_{safe_stem}"
class ArchiveExtractSkill(BaseSkill):
name = "archive_extract"
def run(self, context: WorkflowContext) -> SkillResult:
extracted_count = 0
if not context.batch.work_dir:
message = "批次工作目录为空,无法解压压缩包。"
logger.error(
"Archive extract failed without work dir",
extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no},
)
return SkillResult(success=False, message=message, data={"extracted_count": 0})
target_root = Path(context.batch.work_dir)
archive_count = 0
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
path = resolve_storage_path(binding.attachment.storage_path)
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
continue
archive_count += 1
target_dir = target_root / "extracted" / _safe_archive_dir_name(binding)
logger.info(
"Archive extract started",
extra={
"batch_id": context.batch.pk,
"attachment_id": binding.attachment_id,
"path": str(path),
"target_dir": str(target_dir),
},
)
extracted_count += len(extract_archive(path, target_dir))
if archive_count and extracted_count == 0:
message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。"
logger.warning(
"Archive extract produced no files",
extra={"batch_id": context.batch.pk, "archive_count": archive_count},
)
return SkillResult(success=False, message=message, data={"extracted_count": 0})
logger.info(
"Archive extract finished",
extra={
"batch_id": context.batch.pk,
"archive_count": archive_count,
"extracted_count": extracted_count,
},
)
return SkillResult(success=True, data={"extracted_count": extracted_count})