mirror of https://github.com/Mai-with-u/MaiBot.git
feat: 添加 fallback(按顺序降级)模型选择策略(包含前后端修改)
新增模型选择策略 fallback,按照模型列表顺序选择模型,只有当前模型不可用时才会使用下一个模型。 - 后端修改:在 _select_model 中实现 fallback 逻辑,更新配置说明。 - 前端修改:在 WebUI 模型选择菜单中添加 fallback 选项并更新文案。pull/1489/head
parent
19eee8358f
commit
8c707f4b9c
|
|
@ -143,10 +143,11 @@ export const TaskConfigCard = React.memo(function TaskConfigCard({
|
|||
<SelectContent>
|
||||
<SelectItem value="balance">负载均衡(balance)</SelectItem>
|
||||
<SelectItem value="random">随机选择(random)</SelectItem>
|
||||
<SelectItem value="fallback">按顺序降级(fallback)</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<p className="text-xs text-muted-foreground">
|
||||
负载均衡:优先选择使用次数少的模型。随机选择:完全随机从模型列表中选择
|
||||
负载均衡:优先选择使用次数少的模型。随机选择:完全随机从模型列表中选择。按顺序降级:按列表顺序选择,第一个不可用时才使用下一个。
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class TaskConfig(ConfigBase):
|
|||
"""慢请求阈值(秒),超过此值会输出警告日志"""
|
||||
|
||||
selection_strategy: str = field(default="balance")
|
||||
"""模型选择策略:balance(负载均衡)或 random(随机选择)"""
|
||||
"""模型选择策略:balance(负载均衡)、random(随机选择)或 fallback(按顺序降级)"""
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ class LLMRequest:
|
|||
|
||||
def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]:
|
||||
"""
|
||||
根据配置的策略选择模型:balance(负载均衡)或 random(随机选择)
|
||||
根据配置的策略选择模型:balance(负载均衡)、random(随机选择)或 fallback(按顺序降级)
|
||||
"""
|
||||
available_models = {
|
||||
model: scores
|
||||
|
|
@ -278,8 +278,17 @@ class LLMRequest:
|
|||
raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。")
|
||||
|
||||
strategy = self.model_for_task.selection_strategy.lower()
|
||||
|
||||
if strategy == "random":
|
||||
|
||||
if strategy == "fallback":
|
||||
# 按顺序降级策略:按照模型列表顺序选择第一个未被排除的模型
|
||||
selected_model_name = None
|
||||
for model_name in self.model_for_task.model_list:
|
||||
if not exclude_models or model_name not in exclude_models:
|
||||
selected_model_name = model_name
|
||||
break
|
||||
if not selected_model_name:
|
||||
raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。")
|
||||
elif strategy == "random":
|
||||
# 随机选择策略
|
||||
selected_model_name = random.choice(list(available_models.keys()))
|
||||
elif strategy == "balance":
|
||||
|
|
|
|||
|
|
@ -140,45 +140,45 @@ model_list = ["siliconflow-deepseek-v3.2"] # 使用的模型列表,每个子
|
|||
temperature = 0.2 # 模型温度,新V3建议0.1-0.3
|
||||
max_tokens = 4096 # 最大输出token数
|
||||
slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大)
|
||||
model_list = ["qwen3-30b","qwen3-next-80b"]
|
||||
temperature = 0.7
|
||||
max_tokens = 1024
|
||||
slow_threshold = 10.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.replyer] # 首要回复模型,还用于表达方式学习
|
||||
model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"]
|
||||
temperature = 0.3 # 模型温度,新V3建议0.1-0.3
|
||||
max_tokens = 2048
|
||||
slow_threshold = 25.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型
|
||||
model_list = ["siliconflow-deepseek-v3.2"]
|
||||
temperature = 0.3
|
||||
max_tokens = 800
|
||||
slow_threshold = 12.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.vlm] # 图像识别模型
|
||||
model_list = ["qwen3-vl-30"]
|
||||
max_tokens = 256
|
||||
slow_threshold = 15.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.voice] # 语音识别模型
|
||||
model_list = ["sensevoice-small"]
|
||||
slow_threshold = 12.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
# 嵌入模型
|
||||
[model_task_config.embedding]
|
||||
model_list = ["bge-m3"]
|
||||
slow_threshold = 5.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
# ------------LPMM知识库模型------------
|
||||
|
||||
|
|
@ -187,11 +187,11 @@ model_list = ["siliconflow-deepseek-v3.2"]
|
|||
temperature = 0.2
|
||||
max_tokens = 800
|
||||
slow_threshold = 20.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
|
||||
[model_task_config.lpmm_rdf_build] # RDF构建模型
|
||||
model_list = ["siliconflow-deepseek-v3.2"]
|
||||
temperature = 0.2
|
||||
max_tokens = 800
|
||||
slow_threshold = 20.0
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)
|
||||
selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级)
|
||||
Loading…
Reference in New Issue