feat: 添加 fallback(按顺序降级)模型选择策略

新增模型选择策略 fallback,按照模型列表顺序选择模型,只有当前模型不可用时才会使用下一个模型。

- 修改 api_ada_configs.py:更新 selection_strategy 文档说明

- 修改 utils_model.py:在 _select_model 方法中添加 fallback 策略逻辑

- 修改 model_config_template.toml:更新所有 selection_strategy 配置注释
pull/1488/head
Liskarm 2026-01-18 23:40:13 +08:00
parent 24e434eb3a
commit bdb7419226
3 changed files with 21 additions and 12 deletions

View File

@ -98,7 +98,7 @@ class TaskConfig(ConfigBase):
"""慢请求阈值(秒),超过此值会输出警告日志"""
selection_strategy: str = field(default="balance")
"""模型选择策略balance负载均衡或 random随机选择"""
"""模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级"""
@dataclass

View File

@ -267,7 +267,7 @@ class LLMRequest:
def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]:
"""
根据配置的策略选择模型balance负载均衡 random随机选择
根据配置的策略选择模型balance负载均衡random随机选择 fallback按顺序降级
"""
available_models = {
model: scores
@ -279,7 +279,16 @@ class LLMRequest:
strategy = self.model_for_task.selection_strategy.lower()
if strategy == "random":
if strategy == "fallback":
# 按顺序降级策略:按照模型列表顺序选择第一个可用的模型
selected_model_name = None
for model_name in self.model_for_task.model_list:
if model_name in available_models:
selected_model_name = model_name
break
if selected_model_name is None:
raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。")
elif strategy == "random":
# 随机选择策略
selected_model_name = random.choice(list(available_models.keys()))
elif strategy == "balance":

View File

@ -140,45 +140,45 @@ model_list = ["siliconflow-deepseek-v3.2"] # 使用的模型列表,每个子
temperature = 0.2 # 模型温度新V3建议0.1-0.3
max_tokens = 4096 # 最大输出token数
slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大)
model_list = ["qwen3-30b","qwen3-next-80b"]
temperature = 0.7
max_tokens = 1024
slow_threshold = 10.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.replyer] # 首要回复模型,还用于表达方式学习
model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"]
temperature = 0.3 # 模型温度新V3建议0.1-0.3
max_tokens = 2048
slow_threshold = 25.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型
model_list = ["siliconflow-deepseek-v3.2"]
temperature = 0.3
max_tokens = 800
slow_threshold = 12.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.vlm] # 图像识别模型
model_list = ["qwen3-vl-30"]
max_tokens = 256
slow_threshold = 15.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.voice] # 语音识别模型
model_list = ["sensevoice-small"]
slow_threshold = 12.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
# 嵌入模型
[model_task_config.embedding]
model_list = ["bge-m3"]
slow_threshold = 5.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
# ------------LPMM知识库模型------------
@ -187,11 +187,11 @@ model_list = ["siliconflow-deepseek-v3.2"]
temperature = 0.2
max_tokens = 800
slow_threshold = 20.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级
[model_task_config.lpmm_rdf_build] # RDF构建模型
model_list = ["siliconflow-deepseek-v3.2"]
temperature = 0.2
max_tokens = 800
slow_threshold = 20.0
selection_strategy = "random" # 模型选择策略balance负载均衡或 random随机选择
selection_strategy = "random" # 模型选择策略balance负载均衡、random随机选择或 fallback按顺序降级