From 8c707f4b9cfb60d17c9728314b5cb1d905514eb9 Mon Sep 17 00:00:00 2001 From: Liskarm Date: Mon, 19 Jan 2026 00:06:58 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20fallback=EF=BC=88?= =?UTF-8?q?=E6=8C=89=E9=A1=BA=E5=BA=8F=E9=99=8D=E7=BA=A7=EF=BC=89=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E9=80=89=E6=8B=A9=E7=AD=96=E7=95=A5=EF=BC=88=E5=8C=85?= =?UTF-8?q?=E5=90=AB=E5=89=8D=E5=90=8E=E7=AB=AF=E4=BF=AE=E6=94=B9=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增模型选择策略 fallback,按照模型列表顺序选择模型,只有当前模型不可用时才会使用下一个模型。 - 后端修改:在 _select_model 中实现 fallback 逻辑,更新配置说明。 - 前端修改:在 WebUI 模型选择菜单中添加 fallback 选项并更新文案。 --- .../config/model/components/TaskConfigCard.tsx | 3 ++- src/config/api_ada_configs.py | 2 +- src/llm_models/utils_model.py | 15 ++++++++++++--- template/model_config_template.toml | 18 +++++++++--------- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/dashboard/src/routes/config/model/components/TaskConfigCard.tsx b/dashboard/src/routes/config/model/components/TaskConfigCard.tsx index 51c63b1c..c90e0bfe 100644 --- a/dashboard/src/routes/config/model/components/TaskConfigCard.tsx +++ b/dashboard/src/routes/config/model/components/TaskConfigCard.tsx @@ -143,10 +143,11 @@ export const TaskConfigCard = React.memo(function TaskConfigCard({ 负载均衡(balance) 随机选择(random) + 按顺序降级(fallback)

- 负载均衡:优先选择使用次数少的模型。随机选择:完全随机从模型列表中选择 + 负载均衡:优先选择使用次数少的模型。随机选择:完全随机从模型列表中选择。按顺序降级:按列表顺序选择,第一个不可用时才使用下一个。

diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py index f450e91f..600c7d68 100644 --- a/src/config/api_ada_configs.py +++ b/src/config/api_ada_configs.py @@ -98,7 +98,7 @@ class TaskConfig(ConfigBase): """慢请求阈值(秒),超过此值会输出警告日志""" selection_strategy: str = field(default="balance") - """模型选择策略:balance(负载均衡)或 random(随机选择)""" + """模型选择策略:balance(负载均衡)、random(随机选择)或 fallback(按顺序降级)""" @dataclass diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index 7fe46fd5..49afbad4 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -267,7 +267,7 @@ class LLMRequest: def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]: """ - 根据配置的策略选择模型:balance(负载均衡)或 random(随机选择) + 根据配置的策略选择模型:balance(负载均衡)、random(随机选择)或 fallback(按顺序降级) """ available_models = { model: scores @@ -278,8 +278,17 @@ class LLMRequest: raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。") strategy = self.model_for_task.selection_strategy.lower() - - if strategy == "random": + + if strategy == "fallback": + # 按顺序降级策略:按照模型列表顺序选择第一个未被排除的模型 + selected_model_name = None + for model_name in self.model_for_task.model_list: + if not exclude_models or model_name not in exclude_models: + selected_model_name = model_name + break + if not selected_model_name: + raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。") + elif strategy == "random": # 随机选择策略 selected_model_name = random.choice(list(available_models.keys())) elif strategy == "balance": diff --git a/template/model_config_template.toml b/template/model_config_template.toml index 4e97cdb3..d8e1f0d2 100644 --- a/template/model_config_template.toml +++ b/template/model_config_template.toml @@ -140,45 +140,45 @@ model_list = ["siliconflow-deepseek-v3.2"] # 使用的模型列表,每个子 temperature = 0.2 # 模型温度,新V3建议0.1-0.3 max_tokens = 4096 # 最大输出token数 slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大) model_list = ["qwen3-30b","qwen3-next-80b"] temperature = 0.7 max_tokens = 1024 slow_threshold = 10.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.replyer] # 首要回复模型,还用于表达方式学习 model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"] temperature = 0.3 # 模型温度,新V3建议0.1-0.3 max_tokens = 2048 slow_threshold = 25.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型 model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.3 max_tokens = 800 slow_threshold = 12.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.vlm] # 图像识别模型 model_list = ["qwen3-vl-30"] max_tokens = 256 slow_threshold = 15.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.voice] # 语音识别模型 model_list = ["sensevoice-small"] slow_threshold = 12.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) # 嵌入模型 [model_task_config.embedding] model_list = ["bge-m3"] slow_threshold = 5.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) # ------------LPMM知识库模型------------ @@ -187,11 +187,11 @@ model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.2 max_tokens = 800 slow_threshold = 20.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) [model_task_config.lpmm_rdf_build] # RDF构建模型 model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.2 max_tokens = 800 slow_threshold = 20.0 -selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择) \ No newline at end of file +selection_strategy = "random" # 模型选择策略:balance(负载均衡)或 random(随机选择)或 fallback(按顺序降级) \ No newline at end of file