feat(file-summary): 接入文件汇总工作流触发

This commit is contained in:
2026-06-06 01:16:22 +08:00
parent eb87d9040d
commit 51e7c0c007
8 changed files with 416 additions and 1 deletions

View File

@@ -0,0 +1,16 @@
from __future__ import annotations
from review_agent.models import FileSummaryBatch, WorkflowEvent
def record_event(batch: FileSummaryBatch, event_type: str, payload: dict | None = None) -> WorkflowEvent:
return WorkflowEvent.objects.create(batch=batch, event_type=event_type, payload=payload or {})
def serialize_event(event: WorkflowEvent) -> dict[str, object]:
return {
"id": event.pk,
"event_type": event.event_type,
"payload": event.payload,
"created_at": event.created_at.isoformat(),
}

View File

@@ -3,6 +3,8 @@ from django.http import Http404, JsonResponse
from django.views.decorators.http import require_http_methods
from review_agent.models import Conversation, FileAttachment
from review_agent.models import FileSummaryBatch, WorkflowEvent
from .events import serialize_event
from .storage import save_uploaded_attachment, serialize_attachment
@@ -56,3 +58,50 @@ def attachment_detail(request, conversation_id: int, attachment_id: int):
attachment.is_active = False
attachment.save(update_fields=["upload_status", "is_active"])
return JsonResponse({"ok": True, "attachment": serialize_attachment(attachment)})
@require_http_methods(["GET"])
@login_required
def batch_status(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
return JsonResponse(
{
"batch": {
"id": batch.pk,
"batch_no": batch.batch_no,
"status": batch.status,
"product_name": batch.product_name,
"total_files": batch.total_files,
"success_files": batch.success_files,
"failed_files": batch.failed_files,
"total_pages": batch.total_pages,
},
"nodes": [
{
"node_code": node.node_code,
"node_name": node.node_name,
"status": node.status,
"progress": node.progress,
"message": node.message,
}
for node in batch.node_runs.order_by("id")
],
}
)
@require_http_methods(["GET"])
@login_required
def batch_events(request, batch_id: int):
batch = FileSummaryBatch.objects.filter(pk=batch_id, user=request.user).first()
if not batch:
raise Http404("批次不存在。")
after = request.GET.get("after") or "0"
try:
after_id = int(after)
except ValueError:
after_id = 0
events = WorkflowEvent.objects.filter(batch=batch, pk__gt=after_id).order_by("id")
return JsonResponse({"events": [serialize_event(event) for event in events]})

View File

@@ -0,0 +1,127 @@
from __future__ import annotations
from threading import Thread
from uuid import uuid4
from django.db import transaction
from django.utils import timezone
from review_agent.models import (
Conversation,
FileAttachment,
FileSummaryBatch,
FileSummaryBatchAttachment,
Message,
WorkflowNodeRun,
)
from .events import record_event
NODE_DEFINITIONS = [
("upload", "附件固化"),
("extract", "压缩包解压"),
("inventory", "文件扫描"),
("page_count", "页数统计"),
("product_detect", "产品识别"),
("report", "报告输出"),
("complete", "完成"),
]
def build_batch_no() -> str:
return f"FS-{timezone.localtime().strftime('%Y%m%d%H%M%S')}-{uuid4().hex[:6]}"
@transaction.atomic
def create_file_summary_batch(
*,
conversation: Conversation,
user,
trigger_message: Message | None = None,
) -> FileSummaryBatch:
active_attachments = list(
FileAttachment.objects.select_for_update()
.filter(conversation=conversation, is_active=True)
.exclude(upload_status=FileAttachment.UploadStatus.DELETED)
.order_by("original_name", "-created_at")
)
if not active_attachments:
raise ValueError("当前对话没有可用附件。")
batch = FileSummaryBatch.objects.create(
conversation=conversation,
user=user,
trigger_message=trigger_message,
batch_no=build_batch_no(),
)
for attachment in active_attachments:
FileSummaryBatchAttachment.objects.create(batch=batch, attachment=attachment)
attachment.upload_status = FileAttachment.UploadStatus.BOUND
attachment.save(update_fields=["upload_status"])
for code, name in NODE_DEFINITIONS:
WorkflowNodeRun.objects.create(batch=batch, node_code=code, node_name=name)
record_event(batch, "workflow_created", {"batch_id": batch.pk, "batch_no": batch.batch_no})
return batch
class WorkflowExecutor:
def __init__(self, batch: FileSummaryBatch):
self.batch = batch
def run(self) -> None:
self.batch.status = FileSummaryBatch.Status.RUNNING
self.batch.started_at = timezone.now()
self.batch.save(update_fields=["status", "started_at"])
record_event(self.batch, "workflow_started", {"batch_id": self.batch.pk})
try:
for node in self.batch.node_runs.order_by("id"):
self._run_node(node)
except Exception as exc:
self.batch.status = FileSummaryBatch.Status.FAILED
self.batch.error_message = str(exc)
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "error_message", "finished_at"])
record_event(self.batch, "workflow_failed", {"message": str(exc)})
return
self.batch.status = FileSummaryBatch.Status.SUCCESS
self.batch.finished_at = timezone.now()
self.batch.save(update_fields=["status", "finished_at"])
record_event(self.batch, "workflow_completed", {"batch_id": self.batch.pk})
def _run_node(self, node: WorkflowNodeRun) -> None:
now = timezone.now()
node.status = WorkflowNodeRun.Status.RUNNING
node.progress = 10
node.started_at = now
node.message = f"{node.node_name}处理中"
node.save(update_fields=["status", "progress", "started_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
node.status = WorkflowNodeRun.Status.SUCCESS
node.progress = 100
node.finished_at = timezone.now()
node.message = f"{node.node_name}完成"
node.save(update_fields=["status", "progress", "finished_at", "message"])
record_event(
self.batch,
"node_progress",
{"node_code": node.node_code, "status": node.status, "progress": node.progress},
)
def start_file_summary_workflow(batch: FileSummaryBatch, *, async_run: bool = True) -> None:
executor = WorkflowExecutor(batch)
if not async_run:
executor.run()
return
Thread(target=executor.run, daemon=True).start()

View File

@@ -0,0 +1,30 @@
from __future__ import annotations
from dataclasses import dataclass
from review_agent.models import Conversation, FileAttachment
TRIGGER_KEYWORDS = ("自动汇总", "文件目录", "页数", "目录与页数", "文件清单")
@dataclass(frozen=True)
class TriggerResult:
should_start: bool
workflow_type: str = ""
reason: str = ""
def evaluate_file_summary_trigger(conversation: Conversation, content: str) -> TriggerResult:
text = (content or "").strip()
if not any(keyword in text for keyword in TRIGGER_KEYWORDS):
return TriggerResult(should_start=False, reason="not_matched")
has_attachment = FileAttachment.objects.filter(
conversation=conversation,
is_active=True,
).exclude(upload_status=FileAttachment.UploadStatus.DELETED).exists()
if not has_attachment:
return TriggerResult(should_start=False, reason="missing_attachment")
return TriggerResult(should_start=True, workflow_type="file_summary")

View File

@@ -3,8 +3,11 @@ from __future__ import annotations
import json
from django.db.models import Q, QuerySet
from django.conf import settings
from django.utils import timezone
from .file_summary.workflow import create_file_summary_batch, start_file_summary_workflow
from .file_summary.workflow_trigger import evaluate_file_summary_trigger
from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
from .models import Conversation, Message
@@ -88,6 +91,7 @@ def stream_message(conversation: Conversation, content: str):
user_message = append_user_message(conversation, content)
assistant_parts: list[str] = []
trigger = evaluate_file_summary_trigger(conversation, content)
yield sse_event(
"meta",
@@ -99,6 +103,51 @@ def stream_message(conversation: Conversation, content: str):
},
)
if trigger.reason == "missing_attachment":
reply_content = "请先在当前对话右侧上传需要汇总的文件或压缩包,然后再发送自动汇总指令。"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
if trigger.should_start:
batch = create_file_summary_batch(
conversation=conversation,
user=conversation.user,
trigger_message=user_message,
)
start_file_summary_workflow(
batch,
async_run=getattr(settings, "FILE_SUMMARY_ASYNC", True),
)
reply_content = f"已启动文件目录与页数自动汇总工作流,批次号:{batch.batch_no}"
assistant_message = append_assistant_message(conversation, reply_content)
yield sse_event(
"workflow_started",
{
"workflow_type": "file_summary",
"batch_id": batch.pk,
"batch_no": batch.batch_no,
},
)
yield sse_event("chunk", {"delta": reply_content})
yield sse_event(
"done",
{
"assistant_message_id": assistant_message.pk,
"conversation_id": conversation.pk,
"title": conversation.title,
},
)
return
try:
for chunk in stream_reply(conversation, content):
assistant_parts.append(chunk)

View File

@@ -1,6 +1,6 @@
from django.urls import path
from .file_summary.views import attachment_detail, attachments
from .file_summary.views import attachment_detail, attachments, batch_events, batch_status
urlpatterns = [
@@ -19,4 +19,14 @@ urlpatterns = [
attachment_detail,
name="file_summary_attachment_detail",
),
path(
"api/review-agent/file-summary/<int:batch_id>/status/",
batch_status,
name="file_summary_batch_status",
),
path(
"api/review-agent/file-summary/<int:batch_id>/events/",
batch_events,
name="file_summary_batch_events",
),
]