Merge pull request #1251 from foxplaying/patch-2

Gemini：正式启用HttpOptions对传入参数的处理，优化预算提示，修正文档
2025-09-23 20:10:37 +08:00 · 2025-09-23 20:10:37 +08:00 · 15d6a319d3
parent 22ddaa5b72 f8844fec4f
commit 15d6a319d3
2 changed files with 51 additions and 25 deletions
--- a/docs/model_configuration_guide.md
+++ b/docs/model_configuration_guide.md
@ -28,7 +28,7 @@ version = "1.1.1"
 ```toml
 [[api_providers]]
 name = "DeepSeek"                       # 服务商名称（自定义）
-base_url = "https://api.deepseek.cn/v1" # API服务的基础URL
+base_url = "https://api.deepseek.com/v1" # API服务的基础URL
 api_key = "your-api-key-here"           # API密钥
 client_type = "openai"                  # 客户端类型
 max_retry = 2                           # 最大重试次数
@ -43,19 +43,19 @@ retry_interval = 10                     # 重试间隔（秒）
 | `name` | ✅ | 服务商名称，需要在模型配置中引用 | - |
 | `base_url` | ✅ | API服务的基础URL | - |
 | `api_key` | ✅ | API密钥，请替换为实际密钥 | - |
-| `client_type` | ❌ | 客户端类型：`openai`（OpenAI格式）或 `gemini`（Gemini格式，现在支持不良好） | `openai` |
+| `client_type` | ❌ | 客户端类型：`openai`（OpenAI格式）或 `gemini`（Gemini格式） | `openai` |
 | `max_retry` | ❌ | API调用失败时的最大重试次数 | 2 |
 | `timeout` | ❌ | API请求超时时间（秒） | 30 |
 | `retry_interval` | ❌ | 重试间隔时间（秒） | 10 |
-**请注意，对于`client_type`为`gemini`的模型，`base_url`字段无效。**
+**请注意，对于`client_type`为`gemini`的模型，`retry`字段由`gemini`自己决定。**
 ### 2.3 支持的服务商示例
 #### DeepSeek
 ```toml
 [[api_providers]]
 name = "DeepSeek"
-base_url = "https://api.deepseek.cn/v1"
+base_url = "https://api.deepseek.com/v1"
 api_key = "your-deepseek-api-key"
 client_type = "openai"
 ```
@ -73,7 +73,7 @@ client_type = "openai"
 ```toml
 [[api_providers]]
 name = "Google"
-base_url = "https://api.google.com/v1"
+base_url = "https://generativelanguage.googleapis.com/v1beta"
 api_key = "your-google-api-key"
 client_type = "gemini"  # 注意：Gemini需要使用特殊客户端
 ```
@ -131,9 +131,20 @@ enable_thinking = false # 禁用思考
 [models.extra_params]
 thinking = {type = "disabled"} # 禁用思考
 ```
 而对于`gemini`需要单独进行配置
 ```toml
 [[models]]
 model_identifier = "gemini-2.5-flash"
 name = "gemini-2.5-flash"
 api_provider = "Google"
 [models.extra_params]
 thinking_budget = 0 # 禁用思考
 # thinking_budget = -1 由模型自己决定
 ```
 请注意，`extra_params` 的配置应该构成一个合法的TOML字典结构，具体内容取决于API服务商的要求。
 **请注意，对于`client_type`为`gemini`的模型，此字段无效。**
 ### 3.3 配置参数说明
 | 参数 | 必填 | 说明 |
--- a/src/llm_models/model_client/gemini_client.py
+++ b/src/llm_models/model_client/gemini_client.py
@ -17,6 +17,7 @@ from google.genai.types import (
    EmbedContentResponse,
    EmbedContentConfig,
    SafetySetting,
    HttpOptions,
    HarmCategory,
    HarmBlockThreshold,
 )
@ -345,22 +346,27 @@ class GeminiClient(BaseClient):
    def __init__(self, api_provider: APIProvider):
        super().__init__(api_provider)
        # 增加传入参数处理
        http_options_kwargs = {"timeout": api_provider.timeout}
        # 秒转换为毫秒传入
        if api_provider.timeout is not None:
            http_options_kwargs["timeout"] = int(api_provider.timeout * 1000)
        # 传入并处理地址和版本(必须为Gemini格式)
        if api_provider.base_url:
            parts = api_provider.base_url.rstrip("/").rsplit("/", 1)
            if len(parts) == 2 and parts[1].startswith("v"):
                http_options_kwargs["base_url"] = f"{parts[0]}/"
                http_options_kwargs["api_version"] = parts[1]
            else:
                http_options_kwargs["base_url"] = api_provider.base_url
        self.client = genai.Client(
            http_options=HttpOptions(**http_options_kwargs),
            api_key=api_provider.api_key,
        )  # 这里和openai不一样，gemini会自己决定自己是否需要retry
        # 尝试传入自定义base_url(实验性，必须为Gemini格式)
        if hasattr(api_provider, "base_url") and api_provider.base_url:
            base_url = api_provider.base_url.rstrip("/")  # 去掉末尾 /
            self.client._api_client._http_options.base_url = base_url
            # 如果 base_url 已经带了 /v1 或 /v1beta，就清掉 SDK 的 api_version
            if base_url.endswith("/v1") or base_url.endswith("/v1beta"):
                self.client._api_client._http_options.api_version = None
            # 让 GeminiClient 内部也能访问底层 api_client
            self._api_client = self.client._api_client
    @staticmethod
    def clamp_thinking_budget(tb: int, model_id: str) -> int:
        """
@ -380,20 +386,29 @@ class GeminiClient(BaseClient):
                    limits = THINKING_BUDGET_LIMITS[key]
                    break
-        # 特殊值处理
+        # 预算值处理
        if tb == THINKING_BUDGET_AUTO:
            return THINKING_BUDGET_AUTO
        if tb == THINKING_BUDGET_DISABLED:
            if limits and limits.get("can_disable", False):
                return THINKING_BUDGET_DISABLED
-            return limits["min"] if limits else THINKING_BUDGET_AUTO
+            if limits:
                logger.warning(f"模型 {model_id} 不支持禁用思考预算，已回退到最小值 {limits['min']}")
                return limits["min"]
            return THINKING_BUDGET_AUTO
-        # 已知模型裁剪到范围
+        # 已知模型范围裁剪 + 提示
        if limits:
-            return max(limits["min"], min(tb, limits["max"]))
+            if tb < limits["min"]:
                logger.warning(f"模型 {model_id} 的 thinking_budget={tb} 过小，已调整为最小值 {limits['min']}")
                return limits["min"]
            if tb > limits["max"]:
                logger.warning(f"模型 {model_id} 的 thinking_budget={tb} 过大，已调整为最大值 {limits['max']}")
                return limits["max"]
            return tb
-        # 未知模型，返回动态模式
+        # 未知模型 → 默认自动模式
-        logger.warning(f"模型 {model_id} 未在 THINKING_BUDGET_LIMITS 中定义，将使用动态模式 tb=-1 兼容。")
+        logger.warning(f"模型 {model_id} 未在 THINKING_BUDGET_LIMITS 中定义，已启用模型自动预算兼容")
        return THINKING_BUDGET_AUTO
    async def get_response(
@ -448,7 +463,7 @@ class GeminiClient(BaseClient):
            try:
                tb = int(extra_params["thinking_budget"])
            except (ValueError, TypeError):
-                logger.warning(f"无效的 thinking_budget 值 {extra_params['thinking_budget']}，将使用默认动态模式 {tb}")
+                logger.warning(f"无效的 thinking_budget 值 {extra_params['thinking_budget']}，将使用模型自动预算模式 {tb}")
        # 裁剪到模型支持的范围
        tb = self.clamp_thinking_budget(tb, model_info.model_identifier)