From a0ba9897107d0f9864a643fa472c3eedd77a66af Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Fri, 26 Sep 2025 23:20:18 +0800 Subject: [PATCH 1/9] =?UTF-8?q?=E7=A7=BB=E9=99=A4thought=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 30 ++++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 0713a33a..a047c383 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -176,20 +176,25 @@ def _process_delta( delta: GenerateContentResponse, fc_delta_buffer: io.StringIO, tool_calls_buffer: list[tuple[str, str, dict[str, Any]]], + resp: APIResponse | None = None, ): if not hasattr(delta, "candidates") or not delta.candidates: raise RespParseException(delta, "响应解析失败,缺失candidates字段") - if delta.text: - fc_delta_buffer.write(delta.text) - # 处理 thought(Gemini 的特殊字段) for c in getattr(delta, "candidates", []): if c.content and getattr(c.content, "parts", None): for p in c.content.parts: if getattr(p, "thought", False) and getattr(p, "text", None): - # 把 thought 写入 buffer,避免 resp.content 永远为空 + # 保存到 reasoning_content + if resp is not None: + resp.reasoning_content = (resp.reasoning_content or "") + p.text + elif getattr(p, "text", None): + # 正常输出写入 buffer fc_delta_buffer.write(p.text) + + if delta.text: + fc_delta_buffer.write(delta.text) if delta.function_calls: # 为什么不用hasattr呢,是因为这个属性一定有,即使是个空的 for call in delta.function_calls: @@ -213,9 +218,11 @@ def _build_stream_api_resp( _fc_delta_buffer: io.StringIO, _tool_calls_buffer: list[tuple[str, str, dict]], last_resp: GenerateContentResponse | None = None, # 传入 last_resp + resp: APIResponse | None = None, ) -> APIResponse: # sourcery skip: simplify-len-comparison, use-assigned-variable - resp = APIResponse() + if resp is None: + resp = APIResponse() if _fc_delta_buffer.tell() > 0: # 如果正式内容缓冲区不为空,则将其写入APIResponse对象 @@ -244,7 +251,8 @@ def _build_stream_api_resp( reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None) if str(reason).endswith("MAX_TOKENS"): - if resp.content and resp.content.strip(): + has_visible_output = bool(resp.content and resp.content.strip()) + if has_visible_output: logger.warning( "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" @@ -254,9 +262,10 @@ def _build_stream_api_resp( "⚠ Gemini 响应因达到 max_tokens 限制被截断,\n" " 请修改模型 max_tokens 配置!" ) - + if not resp.content and not resp.tool_calls: - raise EmptyResponseException() + if not getattr(resp, "reasoning_content", None): + raise EmptyResponseException() return resp @@ -274,7 +283,8 @@ async def _default_stream_response_handler( _tool_calls_buffer: list[tuple[str, str, dict]] = [] # 工具调用缓冲区,用于存储接收到的工具调用 _usage_record = None # 使用情况记录 last_resp: GenerateContentResponse | None = None # 保存最后一个 chunk - + resp = APIResponse() + def _insure_buffer_closed(): if _fc_delta_buffer and not _fc_delta_buffer.closed: _fc_delta_buffer.close() @@ -290,6 +300,7 @@ async def _default_stream_response_handler( chunk, _fc_delta_buffer, _tool_calls_buffer, + resp=resp, ) if chunk.usage_metadata: @@ -305,6 +316,7 @@ async def _default_stream_response_handler( _fc_delta_buffer, _tool_calls_buffer, last_resp=last_resp, + resp=resp, ), _usage_record except Exception: # 确保缓冲区被关闭 From 7597af64d8de9250c74178c0e061c2804aa1319c Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Fri, 26 Sep 2025 23:26:11 +0800 Subject: [PATCH 2/9] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99?= =?UTF-8?q?=E5=86=99=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index a047c383..2a95f765 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -192,9 +192,6 @@ def _process_delta( elif getattr(p, "text", None): # 正常输出写入 buffer fc_delta_buffer.write(p.text) - - if delta.text: - fc_delta_buffer.write(delta.text) if delta.function_calls: # 为什么不用hasattr呢,是因为这个属性一定有,即使是个空的 for call in delta.function_calls: From 2009ee3b5e9bfd77c3dbb734e6298a557bf9703e Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Sun, 28 Sep 2025 07:58:16 +0800 Subject: [PATCH 3/9] =?UTF-8?q?=E5=90=88=E5=B9=B6=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index be47f9be..9656127d 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -255,10 +255,7 @@ def _build_stream_api_resp( " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" ) else: - logger.warning( - "⚠ Gemini 响应因达到 max_tokens 限制被截断,\n" - " 请修改模型 max_tokens 配置!" - ) + logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") if not resp.content and not resp.tool_calls: if not getattr(resp, "reasoning_content", None): From 8cad5bb45dfee27add46b925636210470014abc8 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Sun, 28 Sep 2025 13:37:06 +0800 Subject: [PATCH 4/9] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 9656127d..89a3c53b 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -244,9 +244,12 @@ def _build_stream_api_resp( # 检查是否因为 max_tokens 截断 reason = None if last_resp and getattr(last_resp, "candidates", None): - c0 = last_resp.candidates[0] - reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None) - + for c in last_resp.candidates: + fr = getattr(c, "finish_reason", None) or getattr(c, "finishReason", None) + if fr: + reason = str(fr) + break + if str(reason).endswith("MAX_TOKENS"): has_visible_output = bool(resp.content and resp.content.strip()) if has_visible_output: From d7c093f21c8c6acad0fab2403a09427e2b76f1d2 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Fri, 3 Oct 2025 02:31:23 +0800 Subject: [PATCH 5/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=88=AA=E6=96=AD?= =?UTF-8?q?=E6=A3=80=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/openai_client.py | 33 ++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index 148ec8cb..dd92b9e8 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -199,6 +199,7 @@ def _build_stream_api_resp( _fc_delta_buffer: io.StringIO, _rc_delta_buffer: io.StringIO, _tool_calls_buffer: list[tuple[str, str, io.StringIO]], + finish_reason: str | None = None, ) -> APIResponse: resp = APIResponse() @@ -236,6 +237,16 @@ def _build_stream_api_resp( resp.tool_calls.append(ToolCall(call_id, function_name, arguments)) + # 检查 max_tokens 截断 + if finish_reason == "length": + if resp.content and resp.content.strip(): + logger.warning( + "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n" + " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" + ) + else: + logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + if not resp.content and not resp.tool_calls: raise EmptyResponseException() @@ -258,6 +269,7 @@ async def _default_stream_response_handler( _fc_delta_buffer = io.StringIO() # 正式内容缓冲区,用于存储接收到的正式内容 _tool_calls_buffer: list[tuple[str, str, io.StringIO]] = [] # 工具调用缓冲区,用于存储接收到的工具调用 _usage_record = None # 使用情况记录 + finish_reason: str | None = None # 记录最后的 finish_reason def _insure_buffer_closed(): # 确保缓冲区被关闭 @@ -285,6 +297,9 @@ async def _default_stream_response_handler( continue # 跳过本帧,避免访问 choices[0] delta = event.choices[0].delta # 获取当前块的delta内容 + if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason: + finish_reason = event.choices[0].finish_reason + if hasattr(delta, "reasoning_content") and delta.reasoning_content: # type: ignore # 标记:有独立的推理内容块 _has_rc_attr_flag = True @@ -311,6 +326,7 @@ async def _default_stream_response_handler( _fc_delta_buffer, _rc_delta_buffer, _tool_calls_buffer, + finish_reason=finish_reason, ), _usage_record except Exception: # 确保缓冲区被关闭 @@ -381,6 +397,23 @@ def _default_normal_response_parser( # 将原始响应存储在原始数据中 api_response.raw_data = resp + # 检查 max_tokens 截断 + try: + choice0 = resp.choices[0] + reason = getattr(choice0, "finish_reason", None) + if reason and reason == "length": + has_real_output = bool(api_response.content and api_response.content.strip()) + if has_real_output: + logger.warning( + "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n" + " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" + ) + else: + logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + return api_response, _usage_record + except Exception as e: + logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}") + if not api_response.content and not api_response.tool_calls: raise EmptyResponseException() From 3231f4f2f8042e47c79f6714d78651e067bd39d2 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Fri, 17 Oct 2025 23:11:34 +0800 Subject: [PATCH 6/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=AF=A6=E7=BB=86?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 40 ++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 89a3c53b..02889373 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -249,16 +249,29 @@ def _build_stream_api_resp( if fr: reason = str(fr) break - + if str(reason).endswith("MAX_TOKENS"): + warn_target = "max_tokens" + try: + usage_meta = getattr(last_resp, "usage_metadata", None) + if usage_meta and getattr(usage_meta, "prompt_tokens_details", None): + for detail in usage_meta.prompt_tokens_details: + modality = str(getattr(detail, "modality", "")).upper() + token_count = getattr(detail, "token_count", 0) + if "IMAGE" in modality and token_count > 0: + warn_target = "img_tokens" + break + except Exception as ee: + logger.debug(f"检测 img_tokens 信息失败: {ee}") + has_visible_output = bool(resp.content and resp.content.strip()) if has_visible_output: logger.warning( - "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" - " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" + f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断,\n" + f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" ) else: - logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n请修改模型 {warn_target} 配置!") if not resp.content and not resp.tool_calls: if not getattr(resp, "reasoning_content", None): @@ -379,6 +392,19 @@ def _default_normal_response_parser( c0 = resp.candidates[0] reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None) if reason and "MAX_TOKENS" in str(reason): + warn_target = "max_tokens" + try: + usage_meta = getattr(resp, "usage_metadata", None) + if usage_meta and getattr(usage_meta, "prompt_tokens_details", None): + for detail in usage_meta.prompt_tokens_details: + modality = str(getattr(detail, "modality", "")).upper() + token_count = getattr(detail, "token_count", 0) + if "IMAGE" in modality and token_count > 0: + warn_target = "img_tokens" + break + except Exception as ee: + logger.debug(f"检测 img_tokens 信息失败: {ee}") + # 检查第二个及之后的 parts 是否有内容 has_real_output = False if getattr(c0, "content", None) and getattr(c0.content, "parts", None): @@ -392,11 +418,11 @@ def _default_normal_response_parser( if has_real_output: logger.warning( - "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" - " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" + f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断,\n" + f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" ) else: - logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n请修改模型 {warn_target} 配置!") return api_response, _usage_record except Exception as e: From c0a7cc2102c39f6cbb439cae27fc801496781e67 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Fri, 17 Oct 2025 23:14:33 +0800 Subject: [PATCH 7/9] Update gemini_client.py --- src/llm_models/model_client/gemini_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 02889373..87ca8697 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -271,7 +271,7 @@ def _build_stream_api_resp( f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" ) else: - logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n请修改模型 {warn_target} 配置!") + logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n 请修改模型 {warn_target} 配置!") if not resp.content and not resp.tool_calls: if not getattr(resp, "reasoning_content", None): @@ -422,7 +422,7 @@ def _default_normal_response_parser( f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" ) else: - logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n请修改模型 {warn_target} 配置!") + logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n 请修改模型 {warn_target} 配置!") return api_response, _usage_record except Exception as e: From d5696c12d4a6a8c3e1a3d2ba4d05a2b9c8513b83 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Sun, 19 Oct 2025 00:50:32 +0800 Subject: [PATCH 8/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Search=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 87ca8697..98f24473 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -13,6 +13,7 @@ from google.genai.types import ( ContentUnion, ThinkingConfig, Tool, + GoogleSearch, GenerateContentConfig, EmbedContentResponse, EmbedContentConfig, @@ -564,6 +565,15 @@ class GeminiClient(BaseClient): tools = _convert_tool_options(tool_options) if tool_options else None # 解析并裁剪 thinking_budget tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier) + # 检测是否为带 -search 的模型 + enable_google_search = False + model_identifier = model_info.model_identifier + if model_identifier.endswith("-search"): + enable_google_search = True + # 去掉后缀并更新模型ID + model_identifier = model_identifier.removesuffix("-search") + model_info.model_identifier = model_identifier + logger.info(f"模型已启用 GoogleSearch 功能:{model_identifier}") # 将response_format转换为Gemini API所需的格式 generation_config_dict = { @@ -586,6 +596,17 @@ class GeminiClient(BaseClient): elif response_format and response_format.format_type in (RespFormatType.JSON_OBJ, RespFormatType.JSON_SCHEMA): generation_config_dict["response_mime_type"] = "application/json" generation_config_dict["response_schema"] = response_format.to_dict() + # 自动启用 GoogleSearch grounding_tool + if enable_google_search: + grounding_tool = Tool(google_search=GoogleSearch()) + if "tools" in generation_config_dict: + existing = generation_config_dict["tools"] + if isinstance(existing, list): + existing.append(grounding_tool) + else: + generation_config_dict["tools"] = [existing, grounding_tool] + else: + generation_config_dict["tools"] = [grounding_tool] generation_config = GenerateContentConfig(**generation_config_dict) From 9662d818a702fe6d70074675b5a8b8ce518ab0cb Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Sun, 26 Oct 2025 15:01:18 +0800 Subject: [PATCH 9/9] =?UTF-8?q?=E6=81=A2=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 40 ++++---------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 98f24473..b83c3b8f 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -250,29 +250,16 @@ def _build_stream_api_resp( if fr: reason = str(fr) break - + if str(reason).endswith("MAX_TOKENS"): - warn_target = "max_tokens" - try: - usage_meta = getattr(last_resp, "usage_metadata", None) - if usage_meta and getattr(usage_meta, "prompt_tokens_details", None): - for detail in usage_meta.prompt_tokens_details: - modality = str(getattr(detail, "modality", "")).upper() - token_count = getattr(detail, "token_count", 0) - if "IMAGE" in modality and token_count > 0: - warn_target = "img_tokens" - break - except Exception as ee: - logger.debug(f"检测 img_tokens 信息失败: {ee}") - has_visible_output = bool(resp.content and resp.content.strip()) if has_visible_output: logger.warning( - f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断,\n" - f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" + "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" + " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" ) else: - logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n 请修改模型 {warn_target} 配置!") + logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") if not resp.content and not resp.tool_calls: if not getattr(resp, "reasoning_content", None): @@ -393,19 +380,6 @@ def _default_normal_response_parser( c0 = resp.candidates[0] reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None) if reason and "MAX_TOKENS" in str(reason): - warn_target = "max_tokens" - try: - usage_meta = getattr(resp, "usage_metadata", None) - if usage_meta and getattr(usage_meta, "prompt_tokens_details", None): - for detail in usage_meta.prompt_tokens_details: - modality = str(getattr(detail, "modality", "")).upper() - token_count = getattr(detail, "token_count", 0) - if "IMAGE" in modality and token_count > 0: - warn_target = "img_tokens" - break - except Exception as ee: - logger.debug(f"检测 img_tokens 信息失败: {ee}") - # 检查第二个及之后的 parts 是否有内容 has_real_output = False if getattr(c0, "content", None) and getattr(c0.content, "parts", None): @@ -419,11 +393,11 @@ def _default_normal_response_parser( if has_real_output: logger.warning( - f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断,\n" - f" 可能会对回复内容造成影响,建议修改模型 {warn_target} 配置!" + "⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n" + " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" ) else: - logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断,\n 请修改模型 {warn_target} 配置!") + logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") return api_response, _usage_record except Exception as e: