70 lines
2.8 KiB
Python
70 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
import re
|
|
|
|
from review_agent.models import FileSummaryBatchAttachment
|
|
|
|
from ..paths import resolve_storage_path
|
|
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
|
from .base import BaseSkill, SkillResult, WorkflowContext
|
|
|
|
|
|
logger = logging.getLogger("review_agent.file_summary.skills.archive_extract")
|
|
|
|
|
|
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
|
|
stem = Path(binding.attachment.original_name).stem or "archive"
|
|
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
|
|
return f"{binding.attachment_id}_{safe_stem}"
|
|
|
|
|
|
class ArchiveExtractSkill(BaseSkill):
|
|
name = "archive_extract"
|
|
|
|
def run(self, context: WorkflowContext) -> SkillResult:
|
|
extracted_count = 0
|
|
if not context.batch.work_dir:
|
|
message = "批次工作目录为空,无法解压压缩包。"
|
|
logger.error(
|
|
"Archive extract failed without work dir",
|
|
extra={"batch_id": context.batch.pk, "batch_no": context.batch.batch_no},
|
|
)
|
|
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
|
target_root = Path(context.batch.work_dir)
|
|
|
|
archive_count = 0
|
|
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
|
path = resolve_storage_path(binding.attachment.storage_path)
|
|
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
|
|
continue
|
|
archive_count += 1
|
|
target_dir = target_root / "extracted" / _safe_archive_dir_name(binding)
|
|
logger.info(
|
|
"Archive extract started",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"attachment_id": binding.attachment_id,
|
|
"path": str(path),
|
|
"target_dir": str(target_dir),
|
|
},
|
|
)
|
|
extracted_count += len(extract_archive(path, target_dir))
|
|
if archive_count and extracted_count == 0:
|
|
message = "压缩包未解出任何可扫描文件,请检查压缩包内容或格式。"
|
|
logger.warning(
|
|
"Archive extract produced no files",
|
|
extra={"batch_id": context.batch.pk, "archive_count": archive_count},
|
|
)
|
|
return SkillResult(success=False, message=message, data={"extracted_count": 0})
|
|
logger.info(
|
|
"Archive extract finished",
|
|
extra={
|
|
"batch_id": context.batch.pk,
|
|
"archive_count": archive_count,
|
|
"extracted_count": extracted_count,
|
|
},
|
|
)
|
|
return SkillResult(success=True, data={"extracted_count": extracted_count})
|