fix(file-summary): 同步压缩包工作流状态与结果刷新

This commit is contained in:
2026-06-06 19:45:49 +08:00
parent daa0642142
commit 7e561ea213
12 changed files with 560 additions and 32 deletions

View File

@@ -2,10 +2,12 @@ from __future__ import annotations
import logging
from pathlib import Path
import re
from review_agent.models import FileSummaryBatchAttachment
from ..paths import resolve_storage_path
from ..services.archive import ARCHIVE_EXTENSIONS
from ..services.inventory import scan_files_to_items
from .base import BaseSkill, SkillResult, WorkflowContext
@@ -13,14 +15,44 @@ from .base import BaseSkill, SkillResult, WorkflowContext
logger = logging.getLogger("review_agent.file_summary.skills.file_inventory")
def _safe_archive_dir_name(binding: FileSummaryBatchAttachment) -> str:
stem = Path(binding.attachment.original_name).stem or "archive"
safe_stem = re.sub(r"[^A-Za-z0-9._-]+", "_", stem).strip("._") or "archive"
return f"{binding.attachment_id}_{safe_stem}"
class FileInventorySkill(BaseSkill):
name = "file_inventory"
def run(self, context: WorkflowContext) -> SkillResult:
roots = [
resolve_storage_path(binding.attachment.storage_path)
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch)
]
roots: list[Path] = []
missing_extract_roots: list[str] = []
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
original_path = resolve_storage_path(binding.attachment.storage_path)
is_archive = original_path.suffix.lower().lstrip(".") in ARCHIVE_EXTENSIONS
if not is_archive:
roots.append(original_path)
continue
extracted_root = (
Path(context.batch.work_dir)
/ "extracted"
/ _safe_archive_dir_name(binding)
)
if extracted_root.exists():
roots.append(extracted_root)
else:
missing_extract_roots.append(str(extracted_root))
if missing_extract_roots:
message = "压缩包解压目录不存在,无法扫描解压后的文件。"
logger.warning(
"File inventory missing extracted roots",
extra={
"batch_id": context.batch.pk,
"missing_extract_roots": missing_extract_roots,
},
)
return SkillResult(success=False, message=message)
logger.info(
"File inventory started",
extra={