feat(chat): 支持流式回复与用户节点导航

2026-06-05 00:11:53 +08:00
parent 84e045f5ab
commit 7ab5aad938
8 changed files with 676 additions and 15 deletions
--- a/review_agent/llm.py
+++ b/review_agent/llm.py
@@ -53,6 +53,57 @@ def generate_reply(conversation, user_message: str) -> str:
        raise LLMRequestError("模型接口返回格式不符合预期。") from exc


+def stream_reply(conversation, user_message: str):
+    """Streams incremental assistant text from the SiliconFlow chat endpoint."""
+
+    if not settings.LLM_API_KEY:
+        raise LLMConfigurationError("缺少 LLM_API_KEY 配置。")
+    if not settings.LLM_MODEL:
+        raise LLMConfigurationError("缺少 LLM_MODEL 配置。")
+
+    payload = {
+        "model": settings.LLM_MODEL,
+        "messages": build_messages(conversation, user_message),
+        "temperature": 0.3,
+        "stream": True,
+    }
+    body = json.dumps(payload).encode("utf-8")
+    endpoint = f"{settings.LLM_BASE_URL.rstrip('/')}/chat/completions"
+
+    http_request = request.Request(
+        endpoint,
+        data=body,
+        headers={
+            "Authorization": f"Bearer {settings.LLM_API_KEY}",
+            "Content-Type": "application/json",
+        },
+        method="POST",
+    )
+
+    try:
+        with request.urlopen(http_request, timeout=300) as response:
+            for raw_line in response:
+                line = raw_line.decode("utf-8", errors="ignore").strip()
+                if not line or not line.startswith("data:"):
+                    continue
+                data = line[5:].strip()
+                if data == "[DONE]":
+                    break
+                payload = json.loads(data)
+                delta = (
+                    payload.get("choices", [{}])[0]
+                    .get("delta", {})
+                    .get("content", "")
+                )
+                if delta:
+                    yield delta
+    except error.HTTPError as exc:
+        details = exc.read().decode("utf-8", errors="ignore")
+        raise LLMRequestError(f"模型接口调用失败：HTTP {exc.code} {details}") from exc
+    except error.URLError as exc:
+        raise LLMRequestError(f"模型接口调用失败：{exc.reason}") from exc
+
+
 def build_messages(conversation, latest_user_message: str) -> list[dict[str, str]]:
    """Builds system and conversation history messages for the provider call."""

--- a/review_agent/services.py
+++ b/review_agent/services.py
@@ -1,9 +1,11 @@
 from __future__ import annotations

+import json
+
 from django.db.models import Q, QuerySet
 from django.utils import timezone

-from .llm import LLMConfigurationError, LLMRequestError, generate_reply
+from .llm import LLMConfigurationError, LLMRequestError, generate_reply, stream_reply
 from .models import Conversation, Message


@@ -81,6 +83,47 @@ def send_message(conversation: Conversation, content: str) -> tuple[Message, Mes
    return user_message, assistant_message


+def stream_message(conversation: Conversation, content: str):
+    """Yields SSE events while collecting a streamed assistant reply."""
+
+    user_message = append_user_message(conversation, content)
+    assistant_parts: list[str] = []
+
+    yield sse_event(
+        "meta",
+        {
+            "conversation_id": conversation.pk,
+            "title": conversation.title or build_conversation_title(content),
+            "user_message_id": user_message.pk,
+            "user_message": user_message.content,
+        },
+    )
+
+    try:
+        for chunk in stream_reply(conversation, content):
+            assistant_parts.append(chunk)
+            yield sse_event("chunk", {"delta": chunk})
+    except (LLMConfigurationError, LLMRequestError) as exc:
+        fallback = f"模型调用失败：{exc}"
+        assistant_parts = [fallback]
+        yield sse_event("error", {"message": fallback})
+
+    assistant_message = append_assistant_message(conversation, "".join(assistant_parts).strip())
+
+    if conversation.title.startswith("新对话"):
+        conversation.title = build_conversation_title(content)
+        conversation.save(update_fields=["title", "updated_at"])
+
+    yield sse_event(
+        "done",
+        {
+            "assistant_message_id": assistant_message.pk,
+            "conversation_id": conversation.pk,
+            "title": conversation.title,
+        },
+    )
+
+
 def build_conversation_title(content: str) -> str:
    """Creates a concise title from the first user message."""

@@ -88,3 +131,9 @@ def build_conversation_title(content: str) -> str:
    if not normalized:
        return "新对话"
    return normalized[:24]
+
+
+def sse_event(event_name: str, payload: dict[str, object]) -> str:
+    """Formats one server-sent event frame."""
+
+    return f"event: {event_name}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n"
--- a/review_agent/views.py
+++ b/review_agent/views.py
@@ -1,9 +1,15 @@
 from django.contrib.auth.decorators import login_required
-from django.http import HttpRequest, HttpResponse
+from django.http import HttpRequest, HttpResponse, JsonResponse, StreamingHttpResponse
 from django.shortcuts import redirect, render
 from django.views.decorators.http import require_http_methods

-from .services import create_conversation, get_conversation_for_user, list_conversations, send_message
+from .services import (
+    create_conversation,
+    get_conversation_for_user,
+    list_conversations,
+    send_message,
+    stream_message,
+)


@login_required
@@ -45,3 +51,25 @@ def workspace(request: HttpRequest) -> HttpResponse:
            "messages": current.messages.all() if current else [],
        },
    )
+
+
+@login_required
+@require_http_methods(["POST"])
+def stream_chat(request: HttpRequest) -> HttpResponse:
+    """Streams one assistant reply so the UI can render incremental output."""
+
+    content = (request.POST.get("prompt") or "").strip()
+    if not content:
+        return JsonResponse({"error": "消息内容不能为空。"}, status=400)
+
+    conversation = get_conversation_for_user(request.user, request.POST.get("conversation_id"))
+    if not conversation:
+        conversation = create_conversation(request.user)
+
+    response = StreamingHttpResponse(
+        streaming_content=stream_message(conversation, content),
+        content_type="text/event-stream",
+    )
+    response["Cache-Control"] = "no-cache"
+    response["X-Accel-Buffering"] = "no"
+    return response