from __future__ import annotations import hashlib from pathlib import Path from django.conf import settings from review_agent.models import RegulatoryInfoPackageArtifact, RegulatoryInfoPackageBatch def build_batch_work_dir(batch: RegulatoryInfoPackageBatch | None = None, *, batch_no: str = "") -> Path: if batch: return ( Path(settings.MEDIA_ROOT) / "regulatory_info_package" / str(batch.user_id) / str(batch.conversation_id) / batch.batch_no ) return Path(settings.MEDIA_ROOT) / "regulatory_info_package" / batch_no def ensure_batch_subdir(batch: RegulatoryInfoPackageBatch, name: str) -> Path: root = Path(batch.work_dir) if batch.work_dir else build_batch_work_dir(batch) target = root / Path(name).name ensure_within_work_dir(batch, target) target.mkdir(parents=True, exist_ok=True) return target def ensure_within_work_dir(batch: RegulatoryInfoPackageBatch, path: str | Path) -> Path: root = Path(batch.work_dir).resolve() target = Path(path).resolve() if root != target and root not in target.parents: raise ValueError("输出路径必须位于当前材料包批次工作目录内。") return target def compute_file_sha256(path: str | Path) -> str: file_path = Path(path) digest = hashlib.sha256() with file_path.open("rb") as handle: for chunk in iter(lambda: handle.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() def create_artifact_for_file( batch: RegulatoryInfoPackageBatch, *, path: str | Path, artifact_type: str, file_format: str, name: str = "", metadata: dict | None = None, created_by_node: str = "", ) -> RegulatoryInfoPackageArtifact: file_path = ensure_within_work_dir(batch, path) return RegulatoryInfoPackageArtifact.objects.create( batch=batch, artifact_type=artifact_type, file_format=file_format, name=name or file_path.stem, file_name=file_path.name, storage_path=str(file_path), file_size=file_path.stat().st_size if file_path.exists() else 0, content_hash=compute_file_sha256(file_path) if file_path.exists() else "", metadata=metadata or {}, created_by_node=created_by_node, )