From 00ff43625b09ad5d4b2f4222f246edc934bec954 Mon Sep 17 00:00:00 2001
From: foxplaying <fox@sccc.top>
Date: Wed, 19 Nov 2025 18:21:40 +0800
Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8DGemini3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt                             |  4 +-
 src/llm_models/model_client/gemini_client.py | 69 ++++++++++++++------
 template/model_config_template.toml          | 23 ++++++-
 3 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8a0d22c2..37cb855d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ aiohttp-cors>=0.8.1
 colorama>=0.4.6
 faiss-cpu>=1.11.0
 fastapi>=0.116.0
-google-genai>=1.39.1
+google-genai>=1.51.0
 jieba>=0.42.1
 json-repair>=0.47.6
 maim-message
@@ -27,4 +27,4 @@ tomlkit>=0.13.3
 urllib3>=2.5.0
 uvicorn>=0.35.0
 msgpack
-zstandard
\ No newline at end of file
+zstandard
diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 444c5671..e630784a 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -256,15 +256,16 @@ def _build_stream_api_resp(
                 reason = str(fr)
                 break
 
-    if str(reason).endswith("MAX_TOKENS"):
+    if reason and "MAX_TOKENS" in reason:
+        model_dbg = getattr(last_resp, "model_version", None) or getattr(last_resp, "modelVersion", None)
         has_visible_output = bool(resp.content and resp.content.strip())
         if has_visible_output:
-            logger.warning(
-                "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
-                "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+            logger.info(
+                f"模型 {model_dbg} 因达到 max_tokens 限制被部分截断，\n"
+                "可能会对回复内容造成影响，适当调宽模型 max_tokens 配置！"
             )
         else:
-            logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+            logger.warning(f"⚠ 模型 {model_dbg} 因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
 
     if not resp.content and not resp.tool_calls:
         if not getattr(resp, "reasoning_content", None):
@@ -396,13 +397,14 @@ def _default_normal_response_parser(
                 if not has_real_output and getattr(resp, "text", None):
                     has_real_output = True
 
+                model_dbg = getattr(resp, "model_version", None) or getattr(resp, "modelVersion", None)
                 if has_real_output:
-                    logger.warning(
-                        "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
-                        "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+                    logger.info(
+                        f"模型 {model_dbg} 因达到 max_tokens 限制被部分截断，\n"
+                        "可能会对回复内容造成影响，适当调宽模型 max_tokens 配置！"
                     )
                 else:
-                    logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+                    logger.warning(f"⚠ 模型 {model_dbg} 因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
 
                 return api_response, _usage_record
     except Exception as e:
@@ -448,6 +450,20 @@ class GeminiClient(BaseClient):
         """
         按模型限制思考预算范围，仅支持指定的模型（支持带数字后缀的新版本）
         """
+        if model_id.startswith("gemini-3"):
+            # Gemini 3 使用 thinking_level
+            thinking_level = "high"  # 默认高思考级别
+
+            if extra_params and "thinking_level" in extra_params:
+                tl = str(extra_params["thinking_level"]).lower()
+                if tl in ("low", "high"):
+                    thinking_level = tl
+                else:
+                    logger.warning(f"无效的 thinking_level: {tl}，已回退为默认值 high")
+
+            # Gemini 3 系列模型不支持禁用思考功能
+            return thinking_level
+
         limits = None
 
         # 参数传入处理
@@ -543,7 +559,7 @@ class GeminiClient(BaseClient):
         # 将tool_options转换为Gemini API所需的格式
         tools = _convert_tool_options(tool_options) if tool_options else None
         # 解析并裁剪 thinking_budget
-        tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
+        think = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
         # 检测是否为带 -search 的模型
         enable_google_search = False
         model_identifier = model_info.model_identifier
@@ -559,12 +575,20 @@ class GeminiClient(BaseClient):
             "max_output_tokens": max_tokens,
             "temperature": temperature,
             "response_modalities": ["TEXT"],
-            "thinking_config": ThinkingConfig(
-                include_thoughts=True,
-                thinking_budget=tb,
-            ),
             "safety_settings": gemini_safe_settings,  # 防止空回复问题
         }
+
+        # 根据模型类型选择 thinking_config 参数
+        thinking_kwargs = {"include_thoughts": True}
+        if model_identifier.startswith("gemini-3"):
+            thinking_kwargs["thinking_level"] = think
+        else:
+            thinking_kwargs["thinking_budget"] = think
+        try:
+            generation_config_dict["thinking_config"] = ThinkingConfig(**thinking_kwargs)
+        except Exception:
+            logger.warning("当前SDK不支持 thinking_level，请手动更新google-genai库")
+            generation_config_dict["thinking_config"] = ThinkingConfig(include_thoughts=True)
         if tools:
             generation_config_dict["tools"] = Tool(function_declarations=tools)
         if messages[1]:
@@ -707,7 +731,7 @@ class GeminiClient(BaseClient):
         :return: 转录响应
         """
         # 解析并裁剪 thinking_budget
-        tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
+        think = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
 
         # 构造 prompt + 音频输入
         prompt = "Generate a transcript of the speech. The language of the transcript should **match the language of the speech**."
@@ -724,12 +748,19 @@ class GeminiClient(BaseClient):
         generation_config_dict = {
             "max_output_tokens": max_tokens,
             "response_modalities": ["TEXT"],
-            "thinking_config": ThinkingConfig(
-                include_thoughts=True,
-                thinking_budget=tb,
-            ),
             "safety_settings": gemini_safe_settings,
         }
+
+        thinking_kwargs = {"include_thoughts": True}
+        if model_info.model_identifier.startswith("gemini-3"):
+            thinking_kwargs["thinking_level"] = think
+        else:
+            thinking_kwargs["thinking_budget"] = think
+        try:
+            generation_config_dict["thinking_config"] = ThinkingConfig(**thinking_kwargs)
+        except Exception:
+            logger.warning("当前SDK不支持 thinking_level，请手动更新google-genai库")
+            generation_config_dict["thinking_config"] = ThinkingConfig(include_thoughts=True)
         generate_content_config = GenerateContentConfig(**generation_config_dict)
 
         try:
diff --git a/template/model_config_template.toml b/template/model_config_template.toml
index 6d956ace..8603a33b 100644
--- a/template/model_config_template.toml
+++ b/template/model_config_template.toml
@@ -1,5 +1,5 @@
 [inner]
-version = "1.7.8"
+version = "1.7.9"
 
 # 配置文件版本号迭代规则同bot_config.toml
 
@@ -66,6 +66,26 @@ price_out = 3.0
 [models.extra_params] # 可选的额外参数配置
 enable_thinking = true # 不启用思考
 
+[[models]]
+model_identifier = "gemini-3-pro-preview"
+name = "google-gemini-3-pro-preview"
+api_provider = "Google"
+price_in = 0.0
+price_out = 0.0
+[models.extra_params]
+thinking_level = "high" # 默认高预算
+
+[[models]]
+model_identifier = "gemini-2.5-pro"
+name = "google-gemini-2.5-pro"
+api_provider = "Google"
+price_in = 0.0
+price_out = 0.0
+[models.extra_params]
+thinking_budget = -1 # 由模型自己决定
+# thinking_budget = 0 禁用思考(如果可能)
+
+
 [[models]]
 model_identifier = "deepseek-ai/DeepSeek-R1"
 name = "siliconflow-deepseek-r1"
@@ -73,7 +93,6 @@ api_provider = "SiliconFlow"
 price_in = 4.0
 price_out = 16.0
 
-
 [[models]]
 model_identifier = "Qwen/Qwen3-30B-A3B-Instruct-2507"
 name = "qwen3-30b"