feat(file-summary): 接入文件汇总工作流触发

This commit is contained in:
2026-06-06 01:16:22 +08:00
parent eb87d9040d
commit 51e7c0c007
8 changed files with 416 additions and 1 deletions

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
from review_agent.models import FileSummaryBatch, WorkflowEvent
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
return {
"id": event.pk,
"event_type": event.event_type,
"payload": event.payload,
"created_at": event.created_at.isoformat(),
}

View File

@@ -3,6 +3,8 @@ from django.http import Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from review_agent.models import Conversation, FileAttachment
from review_agent.models import FileSummaryBatch, WorkflowEvent
from .events import serialize_event
from .storage import save_uploaded_attachment, serialize_attachment
@@ -56,3 +58,50 @@ def attachment_detail(request, conversation_id: int, attachment_id: int):
attachment.is_active = False
attachment.save(update_fields=["upload_status", "is_active"])
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
return JsonResponse(
{
"batch": {
"id": batch.pk,
"batch_no": batch.batch_no,
"status": batch.status,
"product_name": batch.product_name,
"total_files": batch.total_files,
"success_files": batch.success_files,
"failed_files": batch.failed_files,
"total_pages": batch.total_pages,
},
"nodes": [
{
"node_code": node.node_code,
"node_name": node.node_name,
"status": node.status,
"progress": node.progress,
"message": node.message,
}
for node in batch.node_runs.order_by("id")
],
}
)
@require_http_methods(["GET"])
@login_required
def batch_events(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
after = request.GET.get("after") or "0"
try:
after_id = int(after)
except ValueError:
after_id = 0
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
return JsonResponse({"events": [serialize_event(event) for event in events]})

View File

@@ -0,0 +1,127 @@
from __future__ import annotations
from threading import Thread
from uuid import uuid4
from django.db import transaction
from django.utils import timezone
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowNodeRun,
)
from .events import record_event
NODE_DEFINITIONS = [
("upload", "附件固化"),
("extract", "压缩包解压"),
("inventory", "文件扫描"),
("page_count", "页数统计"),
("product_detect", "产品识别"),
("report", "报告输出"),
("complete", "完成"),
]
def build_batch_no() -> str:
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
@transaction.atomic
def create_file_summary_batch(
*,
conversation: Conversation,
user,
trigger_message: Message | None = None,
) -> FileSummaryBatch:
active_attachments = list(
FileAttachment.objects.select_for_update()
.filter(conversation=conversation, is_active=True)
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
.order_by("original_name", "-created_at")
)
if not active_attachments:
raise ValueError("当前对话没有可用附件。")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
batch_no=build_batch_no(),
)
for attachment in active_attachments:
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
attachment.upload_status = FileAttachment.UploadStatus.BOUND
attachment.save(update_fields=["upload_status"])
for code, name in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
return batch
class WorkflowExecutor:
def __init__(self, batch: FileSummaryBatch):
self.batch = batch
def run(self) -> None:
self.batch.status = FileSummaryBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
try:
for node in self.batch.node_runs.order_by("id"):
self._run_node(node)
except Exception as exc:
self.batch.status = FileSummaryBatch.Status.FAILED
self.batch.error_message = str(exc)
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "error_message", "finished_at"])
record_event(self.batch, "workflow_failed", {"message": str(exc)})
return
self.batch.status = FileSummaryBatch.Status.SUCCESS
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
def _run_node(self, node: WorkflowNodeRun) -> None:
now = timezone.now()
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = now
node.message = f"{node.node_name}处理中"
node.save(update_fields=["status", "progress", "started_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
node.message = f"{node.node_name}完成"
node.save(update_fields=["status", "progress", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
executor = WorkflowExecutor(batch)
if not async_run:
executor.run()
return
Thread(target=executor.run, daemon=True).start()

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
from dataclasses import dataclass
from review_agent.models import Conversation, FileAttachment
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
@dataclass(frozen=True)
class TriggerResult:
should_start: bool
workflow_type: str = ""
reason: str = ""
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
text = (content or "").strip()
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
return TriggerResult(should_start=False, reason="not_matched")
has_attachment = FileAttachment.objects.filter(
conversation=conversation,
is_active=True,
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
if not has_attachment:
return TriggerResult(should_start=False, reason="missing_attachment")
return TriggerResult(should_start=True, workflow_type="file_summary")