移除thought输出

2025-09-26 23:20:18 +08:00 · 2025-09-26 23:20:18 +08:00 · a0ba989710
parent 5cc1e56904
commit a0ba989710
1 changed files with 21 additions and 9 deletions
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@ -176,20 +176,25 @@ def _process_delta(
    delta: GenerateContentResponse,
    fc_delta_buffer: io.StringIO,
    tool_calls_buffer: list[tuple[str, str, dict[str, Any]]],
+    resp: APIResponse | None = None,
 ):
    if not hasattr(delta, "candidates") or not delta.candidates:
        raise RespParseException(delta, "响应解析失败，缺失candidates字段")

-    if delta.text:
-        fc_delta_buffer.write(delta.text)
-        
    # 处理 thought（Gemini 的特殊字段）
    for c in getattr(delta, "candidates", []):
        if c.content and getattr(c.content, "parts", None):
            for p in c.content.parts:
                if getattr(p, "thought", False) and getattr(p, "text", None):
-                    # 把 thought 写入 buffer，避免 resp.content 永远为空
+                    # 保存到 reasoning_content
+                    if resp is not None:
+                        resp.reasoning_content = (resp.reasoning_content or "") + p.text
+                elif getattr(p, "text", None):
+                    # 正常输出写入 buffer
                    fc_delta_buffer.write(p.text)
+
+    if delta.text:
+        fc_delta_buffer.write(delta.text)
    
    if delta.function_calls:  # 为什么不用hasattr呢，是因为这个属性一定有，即使是个空的
        for call in delta.function_calls:
@ -213,9 +218,11 @@ def _build_stream_api_resp(
    _fc_delta_buffer: io.StringIO,
    _tool_calls_buffer: list[tuple[str, str, dict]],
    last_resp: GenerateContentResponse | None = None,  # 传入 last_resp
+    resp: APIResponse | None = None,
 ) -> APIResponse:
    # sourcery skip: simplify-len-comparison, use-assigned-variable
-    resp = APIResponse()
+    if resp is None:
+        resp = APIResponse()

    if _fc_delta_buffer.tell() > 0:
        # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
@ -244,7 +251,8 @@ def _build_stream_api_resp(
        reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None)

    if str(reason).endswith("MAX_TOKENS"):
-        if resp.content and resp.content.strip():
+        has_visible_output = bool(resp.content and resp.content.strip())
+        if has_visible_output:
            logger.warning(
                "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
                "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
@ -254,9 +262,10 @@ def _build_stream_api_resp(
                "⚠ Gemini 响应因达到 max_tokens 限制被截断，\n"
                "    请修改模型 max_tokens 配置！"
            )
-    
+
    if not resp.content and not resp.tool_calls:
-        raise EmptyResponseException()
+        if not getattr(resp, "reasoning_content", None):
+            raise EmptyResponseException()

    return resp

@ -274,7 +283,8 @@ async def _default_stream_response_handler(
    _tool_calls_buffer: list[tuple[str, str, dict]] = []  # 工具调用缓冲区，用于存储接收到的工具调用
    _usage_record = None  # 使用情况记录
    last_resp: GenerateContentResponse | None = None  # 保存最后一个 chunk
-
+    resp = APIResponse()  
+    
    def _insure_buffer_closed():
        if _fc_delta_buffer and not _fc_delta_buffer.closed:
            _fc_delta_buffer.close()
@ -290,6 +300,7 @@ async def _default_stream_response_handler(
            chunk,
            _fc_delta_buffer,
            _tool_calls_buffer,
+            resp=resp, 
        )

        if chunk.usage_metadata:
@ -305,6 +316,7 @@ async def _default_stream_response_handler(
            _fc_delta_buffer,
            _tool_calls_buffer,
            last_resp=last_resp,
+            resp=resp, 
        ), _usage_record
    except Exception:
        # 确保缓冲区被关闭