From 2a4facee0cdec495e24603e8ee6659c6534de6e0 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Thu, 21 Aug 2025 05:29:45 +0800 Subject: [PATCH 1/4] =?UTF-8?q?fix(gemini):=20=E5=AF=B9=20thinking=5Fbudge?= =?UTF-8?q?t=20=E4=B8=8D=E5=90=8C=E6=A8=A1=E5=9E=8B=E7=9A=84=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 72 ++++++++++++++++---- 1 file changed, 58 insertions(+), 14 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index c0d4588b..299bb080 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -44,10 +44,13 @@ from ..payload_content.tool_option import ToolOption, ToolParam, ToolCall logger = get_logger("Gemini客户端") -# gemini_thinking参数 -GEMINI_THINKING_BUDGET_MIN = 512 -GEMINI_THINKING_BUDGET_MAX = 24576 -DEFAULT_THINKING_BUDGET = 1024 +# gemini_thinking参数(默认范围) +# 不同模型的思考预算范围配置 +THINKING_BUDGET_LIMITS = { + "gemini-2.5-flash": {"min": 1, "max": 24576, "can_disable": True}, + "gemini-2.5-flash-lite": {"min": 512, "max": 24576, "can_disable": True}, + "gemini-2.5-pro": {"min": 128, "max": 32768, "can_disable": False}, +} gemini_safe_settings = [ SafetySetting(category=HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=HarmBlockThreshold.BLOCK_NONE), @@ -333,6 +336,49 @@ class GeminiClient(BaseClient): api_key=api_provider.api_key, ) # 这里和openai不一样,gemini会自己决定自己是否需要retry + # 思维预算特殊值 + TB_DYNAMIC_MODE = -1 + TB_DISABLE_OR_MIN = 0 + + @staticmethod + def clamp_thinking_budget(tb: int, model_id: str): + """ + 按模型限制思考预算范围,仅支持指定的模型(支持带数字后缀的新版本) + """ + # 精确匹配或更精确的包含匹配 + limits = None + matched_key = None + + # 首先尝试精确匹配 + if model_id in THINKING_BUDGET_LIMITS: + limits = THINKING_BUDGET_LIMITS[model_id] + matched_key = model_id + else: + # 如果没有精确匹配,尝试更精确的包含匹配 + # 按键的长度降序排序,优先匹配更长的键 + sorted_keys = sorted(THINKING_BUDGET_LIMITS.keys(), key=len, reverse=True) + for key in sorted_keys: + # 使用更精确的匹配逻辑:键必须是模型ID的一部分,但不能是部分匹配 + if key in model_id and (model_id == key or model_id.startswith(key + "-")): + limits = THINKING_BUDGET_LIMITS[key] + matched_key = key + break + + if limits is None: + raise ValueError(f"模型 {model_id} 不支持 ThinkingConfig") + if tb == GeminiClient.TB_DYNAMIC_MODE: + return GeminiClient.TB_DYNAMIC_MODE # 动态思考模式 + if tb == GeminiClient.TB_DISABLE_OR_MIN: + if limits["can_disable"]: + # 允许禁用思考预算 + return GeminiClient.TB_DISABLE_OR_MIN + else: + # 不允许禁用,返回最小值 + return limits["min"] + + # 正常范围裁剪 + return max(limits["min"], min(tb, limits["max"])) + async def get_response( self, model_info: ModelInfo, @@ -379,17 +425,15 @@ class GeminiClient(BaseClient): # 将tool_options转换为Gemini API所需的格式 tools = _convert_tool_options(tool_options) if tool_options else None # 将response_format转换为Gemini API所需的格式 - try: - if extra_params and "thinking_budget" in extra_params: - tb = extra_params["thinking_budget"] - tb = int(tb) # 尝试转换为整数 - else: - tb = int(max_tokens / 2) - except (TypeError, ValueError) as e: - logger.warning(f"无效的thinking_budget值 {extra_params.get('thinking_budget') if extra_params else None},使用默认值 {DEFAULT_THINKING_BUDGET}: {e}") - tb = DEFAULT_THINKING_BUDGET + tb = int(max_tokens / 2) # 默认值 + if extra_params and "thinking_budget" in extra_params: + try: + tb = int(extra_params["thinking_budget"]) + except (ValueError, TypeError): + logger.warning(f"无效的 thinking_budget 值 {extra_params['thinking_budget']},将使用默认值") - tb = max(GEMINI_THINKING_BUDGET_MIN, min(tb, GEMINI_THINKING_BUDGET_MAX)) # 限制在合法范围(512-24576) + # 裁剪到模型支持的范围 + tb = self.clamp_thinking_budget(tb, model_info.model_identifier) generation_config_dict = { "max_output_tokens": max_tokens, From 068b2dc3d688915b7d3a8cbcc4418fb8ffd1859d Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Thu, 21 Aug 2025 05:47:25 +0800 Subject: [PATCH 2/4] =?UTF-8?q?fix(gemini):=20=E5=AF=B9=20thinking=5Fbudge?= =?UTF-8?q?t=20=E4=B8=8D=E5=90=8C=E6=A8=A1=E5=9E=8B=E7=9A=84=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 299bb080..8afddee0 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -348,7 +348,7 @@ class GeminiClient(BaseClient): # 精确匹配或更精确的包含匹配 limits = None matched_key = None - + # 首先尝试精确匹配 if model_id in THINKING_BUDGET_LIMITS: limits = THINKING_BUDGET_LIMITS[model_id] From ea64f1b2eda4a4869018e153bdc01b5b56562acd Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Thu, 21 Aug 2025 05:53:03 +0800 Subject: [PATCH 3/4] =?UTF-8?q?fix(gemini):=20=E5=AF=B9=20thinking=5Fbudge?= =?UTF-8?q?t=20=E4=B8=8D=E5=90=8C=E6=A8=A1=E5=9E=8B=E7=9A=84=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index 8afddee0..c927060f 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -345,21 +345,19 @@ class GeminiClient(BaseClient): """ 按模型限制思考预算范围,仅支持指定的模型(支持带数字后缀的新版本) """ - # 精确匹配或更精确的包含匹配 limits = None matched_key = None - # 首先尝试精确匹配 + # 优先尝试精确匹配 if model_id in THINKING_BUDGET_LIMITS: limits = THINKING_BUDGET_LIMITS[model_id] matched_key = model_id else: - # 如果没有精确匹配,尝试更精确的包含匹配 - # 按键的长度降序排序,优先匹配更长的键 + # 按 key 长度倒序,保证更长的(更具体的,如 -lite)优先 sorted_keys = sorted(THINKING_BUDGET_LIMITS.keys(), key=len, reverse=True) for key in sorted_keys: - # 使用更精确的匹配逻辑:键必须是模型ID的一部分,但不能是部分匹配 - if key in model_id and (model_id == key or model_id.startswith(key + "-")): + # 必须满足:完全等于 或者 前缀匹配(带 "-" 边界) + if model_id == key or model_id.startswith(key + "-"): limits = THINKING_BUDGET_LIMITS[key] matched_key = key break From a8ecd9bfa3bcbe1ac8af405001cc9eee440f6eb4 Mon Sep 17 00:00:00 2001 From: foxplaying <166147707+foxplaying@users.noreply.github.com> Date: Thu, 21 Aug 2025 08:42:27 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=90=8E=E5=A4=84?= =?UTF-8?q?=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/gemini_client.py | 48 +++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py index c927060f..93a41a3d 100644 --- a/src/llm_models/model_client/gemini_client.py +++ b/src/llm_models/model_client/gemini_client.py @@ -337,8 +337,8 @@ class GeminiClient(BaseClient): ) # 这里和openai不一样,gemini会自己决定自己是否需要retry # 思维预算特殊值 - TB_DYNAMIC_MODE = -1 - TB_DISABLE_OR_MIN = 0 + THINKING_BUDGET_AUTO = -1 # 自动调整思考预算,由模型决定 + THINKING_BUDGET_DISABLED = 0 # 禁用思考预算(如果模型允许禁用) @staticmethod def clamp_thinking_budget(tb: int, model_id: str): @@ -358,24 +358,25 @@ class GeminiClient(BaseClient): for key in sorted_keys: # 必须满足:完全等于 或者 前缀匹配(带 "-" 边界) if model_id == key or model_id.startswith(key + "-"): - limits = THINKING_BUDGET_LIMITS[key] - matched_key = key - break + limits = THINKING_BUDGET_LIMITS[key] + matched_key = key + break - if limits is None: - raise ValueError(f"模型 {model_id} 不支持 ThinkingConfig") - if tb == GeminiClient.TB_DYNAMIC_MODE: - return GeminiClient.TB_DYNAMIC_MODE # 动态思考模式 - if tb == GeminiClient.TB_DISABLE_OR_MIN: - if limits["can_disable"]: - # 允许禁用思考预算 - return GeminiClient.TB_DISABLE_OR_MIN - else: - # 不允许禁用,返回最小值 - return limits["min"] + # 特殊值处理 + if tb == GeminiClient.THINKING_BUDGET_AUTO: + return GeminiClient.THINKING_BUDGET_AUTO + if tb == GeminiClient.THINKING_BUDGET_DISABLED: + if limits and limits.get("can_disable", False): + return GeminiClient.THINKING_BUDGET_DISABLED + return limits["min"] if limits else GeminiClient.THINKING_BUDGET_AUTO - # 正常范围裁剪 - return max(limits["min"], min(tb, limits["max"])) + # 已知模型裁剪到范围 + if limits: + return max(limits["min"], min(tb, limits["max"])) + + # 未知模型,返回动态模式 + logger.warning(f"模型 {model_id} 未在 THINKING_BUDGET_LIMITS 中定义,将使用动态模式 tb=-1 兼容。") + return GeminiClient.THINKING_BUDGET_AUTO async def get_response( self, @@ -422,17 +423,20 @@ class GeminiClient(BaseClient): messages = _convert_messages(message_list) # 将tool_options转换为Gemini API所需的格式 tools = _convert_tool_options(tool_options) if tool_options else None - # 将response_format转换为Gemini API所需的格式 - tb = int(max_tokens / 2) # 默认值 + + tb = GeminiClient.THINKING_BUDGET_AUTO + #空处理 if extra_params and "thinking_budget" in extra_params: try: tb = int(extra_params["thinking_budget"]) except (ValueError, TypeError): - logger.warning(f"无效的 thinking_budget 值 {extra_params['thinking_budget']},将使用默认值") - + logger.warning( + f"无效的 thinking_budget 值 {extra_params['thinking_budget']},将使用默认动态模式 {tb}" + ) # 裁剪到模型支持的范围 tb = self.clamp_thinking_budget(tb, model_info.model_identifier) + # 将response_format转换为Gemini API所需的格式 generation_config_dict = { "max_output_tokens": max_tokens, "temperature": temperature,