from __future__ import annotations from pathlib import Path from review_agent.models import FileSummaryBatch, FileSummaryItem SUPPORTED_EXTENSIONS = {"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx"} def _directory_level(relative_path: Path) -> str: if len(relative_path.parts) <= 1: return "" return "/".join(relative_path.parts[:-1]) def scan_files_to_items(*, batch: FileSummaryBatch, roots: list[Path]) -> list[FileSummaryItem]: files: list[tuple[Path, Path]] = [] for root in roots: root = Path(root) if root.is_file(): files.append((root.parent, root)) continue for path in sorted(item for item in root.rglob("*") if item.is_file()): if path.name.startswith(".") or path.stat().st_size == 0: continue files.append((root, path)) created: list[FileSummaryItem] = [] for index, (root, path) in enumerate(files, start=1): relative = path.relative_to(root).as_posix() file_type = path.suffix.lower().lstrip(".") item = FileSummaryItem.objects.create( batch=batch, file_index=index, directory_level=_directory_level(Path(relative)), file_name=path.name, file_type=file_type, relative_path=relative, storage_path=str(path), statistics_status=FileSummaryItem.StatisticsStatus.SKIPPED, ) created.append(item) batch.total_files = len(created) batch.supported_files = sum(1 for item in created if item.file_type in SUPPORTED_EXTENSIONS) batch.unsupported_files = len(created) - batch.supported_files batch.save(update_fields=["total_files", "supported_files", "unsupported_files"]) return created