fix(agent): 增强 LLM 流式回复兜底

2026-06-06 19:45:13 +08:00
parent c78ff3a1fd
commit daa0642142
3 changed files with 82 additions and 7 deletions
--- a/review_agent/llm.py
+++ b/review_agent/llm.py
@@ -1,4 +1,5 @@
 import json
 import logging
 from urllib import error, request
 from django.conf import settings
@@ -12,6 +13,9 @@ class LLMRequestError(RuntimeError):
    """Raised when the remote LLM provider call fails."""
 logger = logging.getLogger(__name__)
 def generate_reply(conversation, user_message: str) -> str:
    """Calls the SiliconFlow OpenAI-compatible chat endpoint and returns assistant text."""
@@ -130,7 +134,11 @@ def stream_reply(conversation, user_message: str):
                data = line[5:].strip()
                if data == "[DONE]":
                    break
                try:
                    payload = json.loads(data)
                except json.JSONDecodeError:
                    logger.warning("Skipping malformed LLM stream data", extra={"data": data[:200]})
                    continue
                delta = (
                    payload.get("choices", [{}])[0]
                    .get("delta", {})
--- a/review_agent/services.py
+++ b/review_agent/services.py
@@ -219,25 +219,51 @@ def stream_message(conversation: Conversation, content: str):
        )
        return
    stream_failed = False
    stream_error = ""
    try:
        for chunk in stream_reply(conversation, content):
            assistant_parts.append(chunk)
            yield sse_event("chunk", {"delta": chunk})
    except (LLMConfigurationError, LLMRequestError) as exc:
-        fallback = f"模型调用失败：{exc}"
+        stream_failed = True
-        assistant_parts = [fallback]
+        stream_error = str(exc)
        logger.warning(
            "LLM stream failed",
            extra={"conversation_id": conversation.pk, "error": str(exc)},
        )
        yield sse_event("error", {"message": fallback})
    except Exception as exc:
-        fallback = f"回复生成中断：{exc}"
+        stream_failed = True
-        assistant_parts.append("\n\n" + fallback)
+        stream_error = str(exc)
        logger.exception(
            "Unexpected stream failure",
            extra={"conversation_id": conversation.pk, "error": str(exc)},
        )
    if stream_failed:
        try:
            fallback_reply = generate_reply(conversation, content)
            assistant_parts = [fallback_reply]
            logger.info(
                "Non-stream fallback reply succeeded",
                extra={"conversation_id": conversation.pk, "content_length": len(fallback_reply)},
            )
            yield sse_event("replace", {"content": fallback_reply})
        except (LLMConfigurationError, LLMRequestError) as exc:
            fallback = f"模型调用失败：{exc}"
            assistant_parts = [fallback]
            logger.warning(
                "Non-stream fallback reply failed",
                extra={"conversation_id": conversation.pk, "error": str(exc), "stream_error": stream_error},
            )
            yield sse_event("error", {"message": fallback})
        except Exception as exc:
            fallback = f"回复生成中断：{stream_error or exc}"
            assistant_parts.append("\n\n" + fallback)
            logger.exception(
                "Non-stream fallback crashed",
                extra={"conversation_id": conversation.pk, "error": str(exc), "stream_error": stream_error},
            )
            yield sse_event("error", {"message": fallback})
    assistant_message = append_assistant_message(conversation, "".join(assistant_parts).strip())
--- a/tests/test_llm_streaming.py
+++ b/tests/test_llm_streaming.py
@@ -0,0 +1,41 @@
 import io
 from urllib import request
 import pytest
 from review_agent.llm import stream_reply
 from review_agent.models import Conversation
 pytestmark = pytest.mark.django_db
 class FakeStreamingResponse:
    def __iter__(self):
        return iter(
            [
                b'data: {"choices":[{"delta":{"content":"A"}}]}\n\n',
                b"data: not-json\n\n",
                b'data: {"choices":[{"delta":{"content":"B"}}]}\n\n',
                b"data: [DONE]\n\n",
            ]
        )
    def __enter__(self):
        return self
    def __exit__(self, exc_type, exc, traceback):
        return False
 def test_stream_reply_skips_malformed_sse_data(monkeypatch, settings, django_user_model):
    settings.LLM_API_KEY = "key"
    settings.LLM_MODEL = "model"
    settings.LLM_BASE_URL = "https://example.test/v1"
    monkeypatch.setattr(request, "urlopen", lambda req, timeout: FakeStreamingResponse())
    user = django_user_model.objects.create_user(username="owner", password="pass")
    conversation = Conversation.objects.create(user=user, title="会话")
    chunks = list(stream_reply(conversation, "你好"))
    assert chunks == ["A", "B"]