From a0ba9897107d0f9864a643fa472c3eedd77a66af Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Fri, 26 Sep 2025 23:20:18 +0800
Subject: [PATCH 1/9] =?UTF-8?q?=E7=A7=BB=E9=99=A4thought=E8=BE=93=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 30 ++++++++++++++------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 0713a33a..a047c383 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -176,20 +176,25 @@ def _process_delta(
     delta: GenerateContentResponse,
     fc_delta_buffer: io.StringIO,
     tool_calls_buffer: list[tuple[str, str, dict[str, Any]]],
+    resp: APIResponse | None = None,
 ):
     if not hasattr(delta, "candidates") or not delta.candidates:
         raise RespParseException(delta, "响应解析失败，缺失candidates字段")
 
-    if delta.text:
-        fc_delta_buffer.write(delta.text)
-        
     # 处理 thought（Gemini 的特殊字段）
     for c in getattr(delta, "candidates", []):
         if c.content and getattr(c.content, "parts", None):
             for p in c.content.parts:
                 if getattr(p, "thought", False) and getattr(p, "text", None):
-                    # 把 thought 写入 buffer，避免 resp.content 永远为空
+                    # 保存到 reasoning_content
+                    if resp is not None:
+                        resp.reasoning_content = (resp.reasoning_content or "") + p.text
+                elif getattr(p, "text", None):
+                    # 正常输出写入 buffer
                     fc_delta_buffer.write(p.text)
+
+    if delta.text:
+        fc_delta_buffer.write(delta.text)
     
     if delta.function_calls:  # 为什么不用hasattr呢，是因为这个属性一定有，即使是个空的
         for call in delta.function_calls:
@@ -213,9 +218,11 @@ def _build_stream_api_resp(
     _fc_delta_buffer: io.StringIO,
     _tool_calls_buffer: list[tuple[str, str, dict]],
     last_resp: GenerateContentResponse | None = None,  # 传入 last_resp
+    resp: APIResponse | None = None,
 ) -> APIResponse:
     # sourcery skip: simplify-len-comparison, use-assigned-variable
-    resp = APIResponse()
+    if resp is None:
+        resp = APIResponse()
 
     if _fc_delta_buffer.tell() > 0:
         # 如果正式内容缓冲区不为空，则将其写入APIResponse对象
@@ -244,7 +251,8 @@ def _build_stream_api_resp(
         reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None)
 
     if str(reason).endswith("MAX_TOKENS"):
-        if resp.content and resp.content.strip():
+        has_visible_output = bool(resp.content and resp.content.strip())
+        if has_visible_output:
             logger.warning(
                 "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
                 "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
@@ -254,9 +262,10 @@ def _build_stream_api_resp(
                 "⚠ Gemini 响应因达到 max_tokens 限制被截断，\n"
                 "    请修改模型 max_tokens 配置！"
             )
-    
+
     if not resp.content and not resp.tool_calls:
-        raise EmptyResponseException()
+        if not getattr(resp, "reasoning_content", None):
+            raise EmptyResponseException()
 
     return resp
 
@@ -274,7 +283,8 @@ async def _default_stream_response_handler(
     _tool_calls_buffer: list[tuple[str, str, dict]] = []  # 工具调用缓冲区，用于存储接收到的工具调用
     _usage_record = None  # 使用情况记录
     last_resp: GenerateContentResponse | None = None  # 保存最后一个 chunk
-
+    resp = APIResponse()  
+    
     def _insure_buffer_closed():
         if _fc_delta_buffer and not _fc_delta_buffer.closed:
             _fc_delta_buffer.close()
@@ -290,6 +300,7 @@ async def _default_stream_response_handler(
             chunk,
             _fc_delta_buffer,
             _tool_calls_buffer,
+            resp=resp, 
         )
 
         if chunk.usage_metadata:
@@ -305,6 +316,7 @@ async def _default_stream_response_handler(
             _fc_delta_buffer,
             _tool_calls_buffer,
             last_resp=last_resp,
+            resp=resp, 
         ), _usage_record
     except Exception:
         # 确保缓冲区被关闭

From 7597af64d8de9250c74178c0e061c2804aa1319c Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Fri, 26 Sep 2025 23:26:11 +0800
Subject: [PATCH 2/9] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=A4=9A=E4=BD=99?=
 =?UTF-8?q?=E5=86=99=E5=85=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index a047c383..2a95f765 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -192,9 +192,6 @@ def _process_delta(
                 elif getattr(p, "text", None):
                     # 正常输出写入 buffer
                     fc_delta_buffer.write(p.text)
-
-    if delta.text:
-        fc_delta_buffer.write(delta.text)
     
     if delta.function_calls:  # 为什么不用hasattr呢，是因为这个属性一定有，即使是个空的
         for call in delta.function_calls:

From 2009ee3b5e9bfd77c3dbb734e6298a557bf9703e Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Sun, 28 Sep 2025 07:58:16 +0800
Subject: [PATCH 3/9] =?UTF-8?q?=E5=90=88=E5=B9=B6=E4=BF=AE=E6=94=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index be47f9be..9656127d 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -255,10 +255,7 @@ def _build_stream_api_resp(
                 "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
             )
         else:
-            logger.warning(
-                "⚠ Gemini 响应因达到 max_tokens 限制被截断，\n"
-                "    请修改模型 max_tokens 配置！"
-            )
+            logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
 
     if not resp.content and not resp.tool_calls:
         if not getattr(resp, "reasoning_content", None):

From 8cad5bb45dfee27add46b925636210470014abc8 Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Sun, 28 Sep 2025 13:37:06 +0800
Subject: [PATCH 4/9] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 9656127d..89a3c53b 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -244,9 +244,12 @@ def _build_stream_api_resp(
     # 检查是否因为 max_tokens 截断
     reason = None
     if last_resp and getattr(last_resp, "candidates", None):
-        c0 = last_resp.candidates[0]
-        reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None)
-
+        for c in last_resp.candidates:
+            fr = getattr(c, "finish_reason", None) or getattr(c, "finishReason", None)
+            if fr:
+                reason = str(fr)
+                break
+    
     if str(reason).endswith("MAX_TOKENS"):
         has_visible_output = bool(resp.content and resp.content.strip())
         if has_visible_output:

From d7c093f21c8c6acad0fab2403a09427e2b76f1d2 Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Fri, 3 Oct 2025 02:31:23 +0800
Subject: [PATCH 5/9] =?UTF-8?q?=E6=96=B0=E5=A2=9E=E6=88=AA=E6=96=AD?=
 =?UTF-8?q?=E6=A3=80=E6=B5=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/openai_client.py | 33 ++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py
index 148ec8cb..dd92b9e8 100644
--- a/src/llm_models/model_client/openai_client.py
+++ b/src/llm_models/model_client/openai_client.py
@@ -199,6 +199,7 @@ def _build_stream_api_resp(
     _fc_delta_buffer: io.StringIO,
     _rc_delta_buffer: io.StringIO,
     _tool_calls_buffer: list[tuple[str, str, io.StringIO]],
+    finish_reason: str | None = None,
 ) -> APIResponse:
     resp = APIResponse()
 
@@ -236,6 +237,16 @@ def _build_stream_api_resp(
 
             resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
 
+    # 检查 max_tokens 截断
+    if finish_reason == "length":
+        if resp.content and resp.content.strip():
+            logger.warning(
+                "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断，\n"
+                "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+            )
+        else:
+            logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+    
     if not resp.content and not resp.tool_calls:
         raise EmptyResponseException()
 
@@ -258,6 +269,7 @@ async def _default_stream_response_handler(
     _fc_delta_buffer = io.StringIO()  # 正式内容缓冲区，用于存储接收到的正式内容
     _tool_calls_buffer: list[tuple[str, str, io.StringIO]] = []  # 工具调用缓冲区，用于存储接收到的工具调用
     _usage_record = None  # 使用情况记录
+    finish_reason: str | None = None  # 记录最后的 finish_reason
 
     def _insure_buffer_closed():
         # 确保缓冲区被关闭
@@ -285,6 +297,9 @@ async def _default_stream_response_handler(
             continue  # 跳过本帧，避免访问 choices[0]
         delta = event.choices[0].delta  # 获取当前块的delta内容
 
+        if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason:
+            finish_reason = event.choices[0].finish_reason
+        
         if hasattr(delta, "reasoning_content") and delta.reasoning_content:  # type: ignore
             # 标记：有独立的推理内容块
             _has_rc_attr_flag = True
@@ -311,6 +326,7 @@ async def _default_stream_response_handler(
             _fc_delta_buffer,
             _rc_delta_buffer,
             _tool_calls_buffer,
+            finish_reason=finish_reason,
         ), _usage_record
     except Exception:
         # 确保缓冲区被关闭
@@ -381,6 +397,23 @@ def _default_normal_response_parser(
     # 将原始响应存储在原始数据中
     api_response.raw_data = resp
 
+    # 检查 max_tokens 截断
+    try:
+        choice0 = resp.choices[0]
+        reason = getattr(choice0, "finish_reason", None)
+        if reason and reason == "length":
+            has_real_output = bool(api_response.content and api_response.content.strip())
+            if has_real_output:
+                logger.warning(
+                    "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断，\n"
+                    "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+                )
+            else:
+                logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+            return api_response, _usage_record
+    except Exception as e:
+        logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}")
+    
     if not api_response.content and not api_response.tool_calls:
         raise EmptyResponseException()
 

From 3231f4f2f8042e47c79f6714d78651e067bd39d2 Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Fri, 17 Oct 2025 23:11:34 +0800
Subject: [PATCH 6/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=AF=A6=E7=BB=86?=
 =?UTF-8?q?=E8=BE=93=E5=87=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 40 ++++++++++++++++----
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 89a3c53b..02889373 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -249,16 +249,29 @@ def _build_stream_api_resp(
             if fr:
                 reason = str(fr)
                 break
-    
+
     if str(reason).endswith("MAX_TOKENS"):
+        warn_target = "max_tokens"
+        try:
+            usage_meta = getattr(last_resp, "usage_metadata", None)
+            if usage_meta and getattr(usage_meta, "prompt_tokens_details", None):
+                for detail in usage_meta.prompt_tokens_details:
+                    modality = str(getattr(detail, "modality", "")).upper()
+                    token_count = getattr(detail, "token_count", 0)
+                    if "IMAGE" in modality and token_count > 0:
+                        warn_target = "img_tokens"
+                        break
+        except Exception as ee:
+            logger.debug(f"检测 img_tokens 信息失败: {ee}")
+
         has_visible_output = bool(resp.content and resp.content.strip())
         if has_visible_output:
             logger.warning(
-                "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
-                "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+                f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断，\n"
+                f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
             )
         else:
-            logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+            logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n请修改模型 {warn_target} 配置！")
 
     if not resp.content and not resp.tool_calls:
         if not getattr(resp, "reasoning_content", None):
@@ -379,6 +392,19 @@ def _default_normal_response_parser(
             c0 = resp.candidates[0]
             reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None)
             if reason and "MAX_TOKENS" in str(reason):
+                warn_target = "max_tokens"
+                try:
+                    usage_meta = getattr(resp, "usage_metadata", None)
+                    if usage_meta and getattr(usage_meta, "prompt_tokens_details", None):
+                        for detail in usage_meta.prompt_tokens_details:
+                            modality = str(getattr(detail, "modality", "")).upper()
+                            token_count = getattr(detail, "token_count", 0)
+                            if "IMAGE" in modality and token_count > 0:
+                                warn_target = "img_tokens"
+                                break
+                except Exception as ee:
+                    logger.debug(f"检测 img_tokens 信息失败: {ee}")
+
                 # 检查第二个及之后的 parts 是否有内容
                 has_real_output = False
                 if getattr(c0, "content", None) and getattr(c0.content, "parts", None):
@@ -392,11 +418,11 @@ def _default_normal_response_parser(
 
                 if has_real_output:
                     logger.warning(
-                        "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
-                        "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
+                        f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断，\n"
+                        f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
                     )
                 else:
-                    logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
+                    logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n请修改模型 {warn_target} 配置！")
 
                 return api_response, _usage_record
     except Exception as e:

From c0a7cc2102c39f6cbb439cae27fc801496781e67 Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Fri, 17 Oct 2025 23:14:33 +0800
Subject: [PATCH 7/9] Update gemini_client.py

---
 src/llm_models/model_client/gemini_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 02889373..87ca8697 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -271,7 +271,7 @@ def _build_stream_api_resp(
                 f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
             )
         else:
-            logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n请修改模型 {warn_target} 配置！")
+            logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n    请修改模型 {warn_target} 配置！")
 
     if not resp.content and not resp.tool_calls:
         if not getattr(resp, "reasoning_content", None):
@@ -422,7 +422,7 @@ def _default_normal_response_parser(
                         f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
                     )
                 else:
-                    logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n请修改模型 {warn_target} 配置！")
+                    logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n    请修改模型 {warn_target} 配置！")
 
                 return api_response, _usage_record
     except Exception as e:

From d5696c12d4a6a8c3e1a3d2ba4d05a2b9c8513b83 Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Sun, 19 Oct 2025 00:50:32 +0800
Subject: [PATCH 8/9] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Search=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 21 ++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 87ca8697..98f24473 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -13,6 +13,7 @@ from google.genai.types import (
     ContentUnion,
     ThinkingConfig,
     Tool,
+    GoogleSearch,
     GenerateContentConfig,
     EmbedContentResponse,
     EmbedContentConfig,
@@ -564,6 +565,15 @@ class GeminiClient(BaseClient):
         tools = _convert_tool_options(tool_options) if tool_options else None
         # 解析并裁剪 thinking_budget
         tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
+        # 检测是否为带 -search 的模型
+        enable_google_search = False
+        model_identifier = model_info.model_identifier
+        if model_identifier.endswith("-search"):
+            enable_google_search = True
+            # 去掉后缀并更新模型ID
+            model_identifier = model_identifier.removesuffix("-search")
+            model_info.model_identifier = model_identifier
+            logger.info(f"模型已启用 GoogleSearch 功能：{model_identifier}")
 
         # 将response_format转换为Gemini API所需的格式
         generation_config_dict = {
@@ -586,6 +596,17 @@ class GeminiClient(BaseClient):
         elif response_format and response_format.format_type in (RespFormatType.JSON_OBJ, RespFormatType.JSON_SCHEMA):
             generation_config_dict["response_mime_type"] = "application/json"
             generation_config_dict["response_schema"] = response_format.to_dict()
+        # 自动启用 GoogleSearch grounding_tool
+        if enable_google_search:
+            grounding_tool = Tool(google_search=GoogleSearch())
+            if "tools" in generation_config_dict:
+                existing = generation_config_dict["tools"]
+                if isinstance(existing, list):
+                    existing.append(grounding_tool)
+                else:
+                    generation_config_dict["tools"] = [existing, grounding_tool]
+            else:
+                generation_config_dict["tools"] = [grounding_tool]
 
         generation_config = GenerateContentConfig(**generation_config_dict)
 

From 9662d818a702fe6d70074675b5a8b8ce518ab0cb Mon Sep 17 00:00:00 2001
From: foxplaying <166147707+foxplaying@users.noreply.github.com>
Date: Sun, 26 Oct 2025 15:01:18 +0800
Subject: [PATCH 9/9] =?UTF-8?q?=E6=81=A2=E5=A4=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/llm_models/model_client/gemini_client.py | 40 ++++----------------
 1 file changed, 7 insertions(+), 33 deletions(-)

diff --git a/src/llm_models/model_client/gemini_client.py b/src/llm_models/model_client/gemini_client.py
index 98f24473..b83c3b8f 100644
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@@ -250,29 +250,16 @@ def _build_stream_api_resp(
             if fr:
                 reason = str(fr)
                 break
-
+    
     if str(reason).endswith("MAX_TOKENS"):
-        warn_target = "max_tokens"
-        try:
-            usage_meta = getattr(last_resp, "usage_metadata", None)
-            if usage_meta and getattr(usage_meta, "prompt_tokens_details", None):
-                for detail in usage_meta.prompt_tokens_details:
-                    modality = str(getattr(detail, "modality", "")).upper()
-                    token_count = getattr(detail, "token_count", 0)
-                    if "IMAGE" in modality and token_count > 0:
-                        warn_target = "img_tokens"
-                        break
-        except Exception as ee:
-            logger.debug(f"检测 img_tokens 信息失败: {ee}")
-
         has_visible_output = bool(resp.content and resp.content.strip())
         if has_visible_output:
             logger.warning(
-                f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断，\n"
-                f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
+                "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
+                "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
             )
         else:
-            logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n    请修改模型 {warn_target} 配置！")
+            logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
 
     if not resp.content and not resp.tool_calls:
         if not getattr(resp, "reasoning_content", None):
@@ -393,19 +380,6 @@ def _default_normal_response_parser(
             c0 = resp.candidates[0]
             reason = getattr(c0, "finish_reason", None) or getattr(c0, "finishReason", None)
             if reason and "MAX_TOKENS" in str(reason):
-                warn_target = "max_tokens"
-                try:
-                    usage_meta = getattr(resp, "usage_metadata", None)
-                    if usage_meta and getattr(usage_meta, "prompt_tokens_details", None):
-                        for detail in usage_meta.prompt_tokens_details:
-                            modality = str(getattr(detail, "modality", "")).upper()
-                            token_count = getattr(detail, "token_count", 0)
-                            if "IMAGE" in modality and token_count > 0:
-                                warn_target = "img_tokens"
-                                break
-                except Exception as ee:
-                    logger.debug(f"检测 img_tokens 信息失败: {ee}")
-
                 # 检查第二个及之后的 parts 是否有内容
                 has_real_output = False
                 if getattr(c0, "content", None) and getattr(c0.content, "parts", None):
@@ -419,11 +393,11 @@ def _default_normal_response_parser(
 
                 if has_real_output:
                     logger.warning(
-                        f"⚠ Gemini 响应因达到 {warn_target} 限制被部分截断，\n"
-                        f"    可能会对回复内容造成影响，建议修改模型 {warn_target} 配置！"
+                        "⚠ Gemini 响应因达到 max_tokens 限制被部分截断，\n"
+                        "    可能会对回复内容造成影响，建议修改模型 max_tokens 配置！"
                     )
                 else:
-                    logger.warning(f"⚠ Gemini 响应因达到 {warn_target} 限制被截断，\n    请修改模型 {warn_target} 配置！")
+                    logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断，\n    请修改模型 max_tokens 配置！")
 
                 return api_response, _usage_record
     except Exception as e: