feat(file-summary): 实现文件处理技能链路
This commit is contained in:
1
review_agent/file_summary/skills/__init__.py
Normal file
1
review_agent/file_summary/skills/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
26
review_agent/file_summary/skills/archive_extract.py
Normal file
26
review_agent/file_summary/skills/archive_extract.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from review_agent.models import FileSummaryBatchAttachment
|
||||
|
||||
from ..paths import resolve_storage_path
|
||||
from ..services.archive import ARCHIVE_EXTENSIONS, extract_archive
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
class ArchiveExtractSkill(BaseSkill):
|
||||
name = "archive_extract"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
extracted_count = 0
|
||||
target_dir = Path(context.batch.work_dir or "")
|
||||
if not target_dir:
|
||||
return SkillResult(success=True, data={"extracted_count": 0})
|
||||
|
||||
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch):
|
||||
path = resolve_storage_path(binding.attachment.storage_path)
|
||||
if path.suffix.lower().lstrip(".") not in ARCHIVE_EXTENSIONS:
|
||||
continue
|
||||
extracted_count += len(extract_archive(path, target_dir))
|
||||
return SkillResult(success=True, data={"extracted_count": extracted_count})
|
||||
24
review_agent/file_summary/skills/base.py
Normal file
24
review_agent/file_summary/skills/base.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from review_agent.models import FileSummaryBatch
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class WorkflowContext:
|
||||
batch: FileSummaryBatch
|
||||
|
||||
|
||||
@dataclass
|
||||
class SkillResult:
|
||||
success: bool
|
||||
data: dict = field(default_factory=dict)
|
||||
message: str = ""
|
||||
|
||||
|
||||
class BaseSkill:
|
||||
name = ""
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
raise NotImplementedError
|
||||
64
review_agent/file_summary/skills/document_page_count.py
Normal file
64
review_agent/file_summary/skills/document_page_count.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from review_agent.models import FileSummaryItem
|
||||
|
||||
from ..services.page_count import SUPPORTED_EXTENSIONS, count_document_pages
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
class DocumentPageCountSkill(BaseSkill):
|
||||
name = "document_page_count"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
success_files = failed_files = unsupported_files = uncertain_files = total_pages = 0
|
||||
for item in context.batch.items.order_by("file_index"):
|
||||
if item.file_type not in SUPPORTED_EXTENSIONS:
|
||||
item.statistics_status = FileSummaryItem.StatisticsStatus.UNSUPPORTED
|
||||
unsupported_files += 1
|
||||
item.save(update_fields=["statistics_status", "updated_at"])
|
||||
continue
|
||||
|
||||
result = None
|
||||
for attempt in range(1, 4):
|
||||
result = count_document_pages(item.storage_path)
|
||||
item.retry_count = attempt - 1
|
||||
if result.status != "failed":
|
||||
break
|
||||
item.statistics_status = result.status
|
||||
item.page_count = result.page_count
|
||||
item.error_message = result.error_message
|
||||
item.save(
|
||||
update_fields=[
|
||||
"statistics_status",
|
||||
"page_count",
|
||||
"retry_count",
|
||||
"error_message",
|
||||
"updated_at",
|
||||
]
|
||||
)
|
||||
|
||||
if result.status == FileSummaryItem.StatisticsStatus.SUCCESS:
|
||||
success_files += 1
|
||||
total_pages += result.page_count or 0
|
||||
elif result.status == FileSummaryItem.StatisticsStatus.UNCERTAIN:
|
||||
uncertain_files += 1
|
||||
elif result.status == FileSummaryItem.StatisticsStatus.UNSUPPORTED:
|
||||
unsupported_files += 1
|
||||
else:
|
||||
failed_files += 1
|
||||
|
||||
context.batch.success_files = success_files
|
||||
context.batch.failed_files = failed_files
|
||||
context.batch.unsupported_files = unsupported_files
|
||||
context.batch.uncertain_files = uncertain_files
|
||||
context.batch.total_pages = total_pages
|
||||
context.batch.save(
|
||||
update_fields=[
|
||||
"success_files",
|
||||
"failed_files",
|
||||
"unsupported_files",
|
||||
"uncertain_files",
|
||||
"total_pages",
|
||||
]
|
||||
)
|
||||
return SkillResult(success=True)
|
||||
21
review_agent/file_summary/skills/file_inventory.py
Normal file
21
review_agent/file_summary/skills/file_inventory.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from review_agent.models import FileSummaryBatchAttachment
|
||||
|
||||
from ..paths import resolve_storage_path
|
||||
from ..services.inventory import scan_files_to_items
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
class FileInventorySkill(BaseSkill):
|
||||
name = "file_inventory"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
roots = [
|
||||
resolve_storage_path(binding.attachment.storage_path)
|
||||
for binding in FileSummaryBatchAttachment.objects.filter(batch=context.batch)
|
||||
]
|
||||
items = scan_files_to_items(batch=context.batch, roots=roots)
|
||||
return SkillResult(success=True, data={"total_files": len(items)})
|
||||
12
review_agent/file_summary/skills/product_detect.py
Normal file
12
review_agent/file_summary/skills/product_detect.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from ..services.product_detect import detect_product_name
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
class ProductDetectSkill(BaseSkill):
|
||||
name = "product_detect"
|
||||
|
||||
def run(self, context: WorkflowContext) -> SkillResult:
|
||||
product_name = detect_product_name(context.batch)
|
||||
return SkillResult(success=True, data={"product_name": product_name})
|
||||
22
review_agent/file_summary/skills/registry.py
Normal file
22
review_agent/file_summary/skills/registry.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from .base import BaseSkill, SkillResult, WorkflowContext
|
||||
|
||||
|
||||
class SkillRegistry:
|
||||
def __init__(self):
|
||||
self._skills: dict[str, BaseSkill] = {}
|
||||
|
||||
def register(self, skill: BaseSkill) -> None:
|
||||
if not skill.name:
|
||||
raise ValueError("Skill 必须声明 name。")
|
||||
self._skills[skill.name] = skill
|
||||
|
||||
def get(self, name: str) -> BaseSkill:
|
||||
try:
|
||||
return self._skills[name]
|
||||
except KeyError as exc:
|
||||
raise KeyError(f"Skill 未注册:{name}") from exc
|
||||
|
||||
def execute(self, name: str, context: WorkflowContext) -> SkillResult:
|
||||
return self.get(name).run(context)
|
||||
Reference in New Issue
Block a user