mirror of https://github.com/Mai-with-u/MaiBot.git
适配Gemini3
parent
fd975d323e
commit
00ff43625b
|
|
@ -3,7 +3,7 @@ aiohttp-cors>=0.8.1
|
||||||
colorama>=0.4.6
|
colorama>=0.4.6
|
||||||
faiss-cpu>=1.11.0
|
faiss-cpu>=1.11.0
|
||||||
fastapi>=0.116.0
|
fastapi>=0.116.0
|
||||||
google-genai>=1.39.1
|
google-genai>=1.51.0
|
||||||
jieba>=0.42.1
|
jieba>=0.42.1
|
||||||
json-repair>=0.47.6
|
json-repair>=0.47.6
|
||||||
maim-message
|
maim-message
|
||||||
|
|
|
||||||
|
|
@ -256,15 +256,16 @@ def _build_stream_api_resp(
|
||||||
reason = str(fr)
|
reason = str(fr)
|
||||||
break
|
break
|
||||||
|
|
||||||
if str(reason).endswith("MAX_TOKENS"):
|
if reason and "MAX_TOKENS" in reason:
|
||||||
|
model_dbg = getattr(last_resp, "model_version", None) or getattr(last_resp, "modelVersion", None)
|
||||||
has_visible_output = bool(resp.content and resp.content.strip())
|
has_visible_output = bool(resp.content and resp.content.strip())
|
||||||
if has_visible_output:
|
if has_visible_output:
|
||||||
logger.warning(
|
logger.info(
|
||||||
"⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n"
|
f"模型 {model_dbg} 因达到 max_tokens 限制被部分截断,\n"
|
||||||
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!"
|
"可能会对回复内容造成影响,适当调宽模型 max_tokens 配置!"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
|
logger.warning(f"⚠ 模型 {model_dbg} 因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
|
||||||
|
|
||||||
if not resp.content and not resp.tool_calls:
|
if not resp.content and not resp.tool_calls:
|
||||||
if not getattr(resp, "reasoning_content", None):
|
if not getattr(resp, "reasoning_content", None):
|
||||||
|
|
@ -396,13 +397,14 @@ def _default_normal_response_parser(
|
||||||
if not has_real_output and getattr(resp, "text", None):
|
if not has_real_output and getattr(resp, "text", None):
|
||||||
has_real_output = True
|
has_real_output = True
|
||||||
|
|
||||||
|
model_dbg = getattr(resp, "model_version", None) or getattr(resp, "modelVersion", None)
|
||||||
if has_real_output:
|
if has_real_output:
|
||||||
logger.warning(
|
logger.info(
|
||||||
"⚠ Gemini 响应因达到 max_tokens 限制被部分截断,\n"
|
f"模型 {model_dbg} 因达到 max_tokens 限制被部分截断,\n"
|
||||||
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!"
|
"可能会对回复内容造成影响,适当调宽模型 max_tokens 配置!"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.warning("⚠ Gemini 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
|
logger.warning(f"⚠ 模型 {model_dbg} 因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
|
||||||
|
|
||||||
return api_response, _usage_record
|
return api_response, _usage_record
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -448,6 +450,20 @@ class GeminiClient(BaseClient):
|
||||||
"""
|
"""
|
||||||
按模型限制思考预算范围,仅支持指定的模型(支持带数字后缀的新版本)
|
按模型限制思考预算范围,仅支持指定的模型(支持带数字后缀的新版本)
|
||||||
"""
|
"""
|
||||||
|
if model_id.startswith("gemini-3"):
|
||||||
|
# Gemini 3 使用 thinking_level
|
||||||
|
thinking_level = "high" # 默认高思考级别
|
||||||
|
|
||||||
|
if extra_params and "thinking_level" in extra_params:
|
||||||
|
tl = str(extra_params["thinking_level"]).lower()
|
||||||
|
if tl in ("low", "high"):
|
||||||
|
thinking_level = tl
|
||||||
|
else:
|
||||||
|
logger.warning(f"无效的 thinking_level: {tl},已回退为默认值 high")
|
||||||
|
|
||||||
|
# Gemini 3 系列模型不支持禁用思考功能
|
||||||
|
return thinking_level
|
||||||
|
|
||||||
limits = None
|
limits = None
|
||||||
|
|
||||||
# 参数传入处理
|
# 参数传入处理
|
||||||
|
|
@ -543,7 +559,7 @@ class GeminiClient(BaseClient):
|
||||||
# 将tool_options转换为Gemini API所需的格式
|
# 将tool_options转换为Gemini API所需的格式
|
||||||
tools = _convert_tool_options(tool_options) if tool_options else None
|
tools = _convert_tool_options(tool_options) if tool_options else None
|
||||||
# 解析并裁剪 thinking_budget
|
# 解析并裁剪 thinking_budget
|
||||||
tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
|
think = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
|
||||||
# 检测是否为带 -search 的模型
|
# 检测是否为带 -search 的模型
|
||||||
enable_google_search = False
|
enable_google_search = False
|
||||||
model_identifier = model_info.model_identifier
|
model_identifier = model_info.model_identifier
|
||||||
|
|
@ -559,12 +575,20 @@ class GeminiClient(BaseClient):
|
||||||
"max_output_tokens": max_tokens,
|
"max_output_tokens": max_tokens,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"response_modalities": ["TEXT"],
|
"response_modalities": ["TEXT"],
|
||||||
"thinking_config": ThinkingConfig(
|
|
||||||
include_thoughts=True,
|
|
||||||
thinking_budget=tb,
|
|
||||||
),
|
|
||||||
"safety_settings": gemini_safe_settings, # 防止空回复问题
|
"safety_settings": gemini_safe_settings, # 防止空回复问题
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# 根据模型类型选择 thinking_config 参数
|
||||||
|
thinking_kwargs = {"include_thoughts": True}
|
||||||
|
if model_identifier.startswith("gemini-3"):
|
||||||
|
thinking_kwargs["thinking_level"] = think
|
||||||
|
else:
|
||||||
|
thinking_kwargs["thinking_budget"] = think
|
||||||
|
try:
|
||||||
|
generation_config_dict["thinking_config"] = ThinkingConfig(**thinking_kwargs)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("当前SDK不支持 thinking_level,请手动更新google-genai库")
|
||||||
|
generation_config_dict["thinking_config"] = ThinkingConfig(include_thoughts=True)
|
||||||
if tools:
|
if tools:
|
||||||
generation_config_dict["tools"] = Tool(function_declarations=tools)
|
generation_config_dict["tools"] = Tool(function_declarations=tools)
|
||||||
if messages[1]:
|
if messages[1]:
|
||||||
|
|
@ -707,7 +731,7 @@ class GeminiClient(BaseClient):
|
||||||
:return: 转录响应
|
:return: 转录响应
|
||||||
"""
|
"""
|
||||||
# 解析并裁剪 thinking_budget
|
# 解析并裁剪 thinking_budget
|
||||||
tb = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
|
think = self.clamp_thinking_budget(extra_params, model_info.model_identifier)
|
||||||
|
|
||||||
# 构造 prompt + 音频输入
|
# 构造 prompt + 音频输入
|
||||||
prompt = "Generate a transcript of the speech. The language of the transcript should **match the language of the speech**."
|
prompt = "Generate a transcript of the speech. The language of the transcript should **match the language of the speech**."
|
||||||
|
|
@ -724,12 +748,19 @@ class GeminiClient(BaseClient):
|
||||||
generation_config_dict = {
|
generation_config_dict = {
|
||||||
"max_output_tokens": max_tokens,
|
"max_output_tokens": max_tokens,
|
||||||
"response_modalities": ["TEXT"],
|
"response_modalities": ["TEXT"],
|
||||||
"thinking_config": ThinkingConfig(
|
|
||||||
include_thoughts=True,
|
|
||||||
thinking_budget=tb,
|
|
||||||
),
|
|
||||||
"safety_settings": gemini_safe_settings,
|
"safety_settings": gemini_safe_settings,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
thinking_kwargs = {"include_thoughts": True}
|
||||||
|
if model_info.model_identifier.startswith("gemini-3"):
|
||||||
|
thinking_kwargs["thinking_level"] = think
|
||||||
|
else:
|
||||||
|
thinking_kwargs["thinking_budget"] = think
|
||||||
|
try:
|
||||||
|
generation_config_dict["thinking_config"] = ThinkingConfig(**thinking_kwargs)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("当前SDK不支持 thinking_level,请手动更新google-genai库")
|
||||||
|
generation_config_dict["thinking_config"] = ThinkingConfig(include_thoughts=True)
|
||||||
generate_content_config = GenerateContentConfig(**generation_config_dict)
|
generate_content_config = GenerateContentConfig(**generation_config_dict)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[inner]
|
[inner]
|
||||||
version = "1.7.8"
|
version = "1.7.9"
|
||||||
|
|
||||||
# 配置文件版本号迭代规则同bot_config.toml
|
# 配置文件版本号迭代规则同bot_config.toml
|
||||||
|
|
||||||
|
|
@ -66,6 +66,26 @@ price_out = 3.0
|
||||||
[models.extra_params] # 可选的额外参数配置
|
[models.extra_params] # 可选的额外参数配置
|
||||||
enable_thinking = true # 不启用思考
|
enable_thinking = true # 不启用思考
|
||||||
|
|
||||||
|
[[models]]
|
||||||
|
model_identifier = "gemini-3-pro-preview"
|
||||||
|
name = "google-gemini-3-pro-preview"
|
||||||
|
api_provider = "Google"
|
||||||
|
price_in = 0.0
|
||||||
|
price_out = 0.0
|
||||||
|
[models.extra_params]
|
||||||
|
thinking_level = "high" # 默认高预算
|
||||||
|
|
||||||
|
[[models]]
|
||||||
|
model_identifier = "gemini-2.5-pro"
|
||||||
|
name = "google-gemini-2.5-pro"
|
||||||
|
api_provider = "Google"
|
||||||
|
price_in = 0.0
|
||||||
|
price_out = 0.0
|
||||||
|
[models.extra_params]
|
||||||
|
thinking_budget = -1 # 由模型自己决定
|
||||||
|
# thinking_budget = 0 禁用思考(如果可能)
|
||||||
|
|
||||||
|
|
||||||
[[models]]
|
[[models]]
|
||||||
model_identifier = "deepseek-ai/DeepSeek-R1"
|
model_identifier = "deepseek-ai/DeepSeek-R1"
|
||||||
name = "siliconflow-deepseek-r1"
|
name = "siliconflow-deepseek-r1"
|
||||||
|
|
@ -73,7 +93,6 @@ api_provider = "SiliconFlow"
|
||||||
price_in = 4.0
|
price_in = 4.0
|
||||||
price_out = 16.0
|
price_out = 16.0
|
||||||
|
|
||||||
|
|
||||||
[[models]]
|
[[models]]
|
||||||
model_identifier = "Qwen/Qwen3-30B-A3B-Instruct-2507"
|
model_identifier = "Qwen/Qwen3-30B-A3B-Instruct-2507"
|
||||||
name = "qwen3-30b"
|
name = "qwen3-30b"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue