diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 0713a33a..a047c383 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -176,20 +176,25 @@ def _process_delta( delta: GenerateContentResponse, fc_delta_buffer: io.StringIO, tool_calls_buffer: list[tuple[str, str, dict[str, Any]]], + resp: APIResponse | None = None, ): if not hasattr(delta, "candidates") or not delta.candidates: raise RespParseException(delta, "响应解析失败,缺失candidates字段") - if delta.text: - fc_delta_buffer.write(delta.text) - # 处理 thought(Gemini 的特殊字段) for c in getattr(delta, "candidates", []): if c.content and getattr(c.content, "parts", None): for p in c.content.parts: if getattr(p, "thought", False) and getattr(p, "text", None): - # 把 thought 写入 buffer,避免 resp.content 永远为空 + # 保存到 reasoning_content + if resp is not None: + resp.reasoning_content = (resp.reasoning_content or "") + p.text + elif getattr(p, "text", None): + # 正常输出写入 buffer fc_delta_buffer.write(p.text) + + if delta.text: + fc_delta_buffer.write(delta.text) if delta.function_calls: # 为什么不用hasattr呢,是因为这个属性一定有,即使是个空的 for call in delta.function_calls: @@ -213,9 +218,11 @@ def _build_stream_api_resp( _fc_delta_buffer: io.StringIO, _tool_calls_buffer: list[tuple[str, str, dict]], last_resp: GenerateContentResponse | None = None, # 传入 last_resp + resp: APIResponse | None = None, ) -> APIResponse: # sourcery skip: simplify-len-comparison, use-assigned-variable - resp = APIResponse() + if resp is None: + resp = APIResponse() if _fc_delta_buffer.tell() > 0: # 如果正式内容缓冲区不为空,则将其写入APIResponse对象 @@ -244,7 +251,8 @@ def _build_stream_api_resp( reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None) if str(reason).endswith("MAX_TOKENS"): - if resp.content and resp.content.strip(): + has_visible_output = bool(resp.content and resp.content.strip()) + if has_visible_output: logger.warning( "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" @@ -254,9 +262,10 @@ def _build_stream_api_resp( "⚠ Gemini 响应因达到 max_tokens 限制被截断,\n" " 请修改模型 max_tokens 配置!" ) - + if not resp.content and not resp.tool_calls: - raise EmptyResponseException() + if not getattr(resp, "reasoning_content", None): + raise EmptyResponseException() return resp @@ -274,7 +283,8 @@ async def _default_stream_response_handler( _tool_calls_buffer: list[tuple[str, str, dict]] = [] # 工具调用缓冲区,用于存储接收到的工具调用 _usage_record = None # 使用情况记录 last_resp: GenerateContentResponse | None = None # 保存最后一个 chunk - + resp = APIResponse() + def _insure_buffer_closed(): if _fc_delta_buffer and not _fc_delta_buffer.closed: _fc_delta_buffer.close() @@ -290,6 +300,7 @@ async def _default_stream_response_handler( chunk, _fc_delta_buffer, _tool_calls_buffer, + resp=resp, ) if chunk.usage_metadata: @@ -305,6 +316,7 @@ async def _default_stream_response_handler( _fc_delta_buffer, _tool_calls_buffer, last_resp=last_resp, + resp=resp, ), _usage_record except Exception: # 确保缓冲区被关闭