From f92136bffcb7a42dd9910aa923afd1361e2947ae Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 27 Dec 2025 17:33:24 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9B=E6=A8=A1=E5=9E=8B=E9=80=89?= =?UTF-8?q?=E6=8B=A9=E7=8E=B0=E5=9C=A8=E5=8F=AF=E4=BB=A5=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E5=AE=8C=E5=85=A8=E9=9A=8F=E6=9C=BA=E7=9A=84=E7=AD=96=E7=95=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update model_config_template.toml --- src/config/api_ada_configs.py | 3 +++ src/llm_models/utils_model.py | 30 ++++++++++++++++++++++------- template/model_config_template.toml | 13 +++++++++++-- 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/src/config/api_ada_configs.py b/src/config/api_ada_configs.py index 64477700..f450e91f 100644 --- a/src/config/api_ada_configs.py +++ b/src/config/api_ada_configs.py @@ -97,6 +97,9 @@ class TaskConfig(ConfigBase): slow_threshold: float = 15.0 """慢请求阈值(秒),超过此值会输出警告日志""" + selection_strategy: str = field(default="balance") + """模型选择策略:balance(负载均衡)或 random(随机选择)""" + @dataclass class ModelTaskConfig(ConfigBase): diff --git a/src/llm_models/utils_model.py b/src/llm_models/utils_model.py index df22c9cd..0f04f4e7 100644 --- a/src/llm_models/utils_model.py +++ b/src/llm_models/utils_model.py @@ -1,6 +1,7 @@ import re import asyncio import time +import random from enum import Enum from rich.traceback import install @@ -266,7 +267,7 @@ class LLMRequest: def _select_model(self, exclude_models: Optional[Set[str]] = None) -> Tuple[ModelInfo, APIProvider, BaseClient]: """ - 根据总tokens和惩罚值选择的模型 + 根据配置的策略选择模型:balance(负载均衡)或 random(随机选择) """ available_models = { model: scores @@ -276,15 +277,30 @@ class LLMRequest: if not available_models: raise RuntimeError("没有可用的模型可供选择。所有模型均已尝试失败。") - least_used_model_name = min( - available_models, - key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000, - ) - model_info = model_config.get_model_info(least_used_model_name) + strategy = self.model_for_task.selection_strategy.lower() + + if strategy == "random": + # 随机选择策略 + selected_model_name = random.choice(list(available_models.keys())) + elif strategy == "balance": + # 负载均衡策略:根据总tokens和惩罚值选择 + selected_model_name = min( + available_models, + key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000, + ) + else: + # 默认使用负载均衡策略 + logger.warning(f"未知的选择策略 '{strategy}',使用默认的负载均衡策略") + selected_model_name = min( + available_models, + key=lambda k: available_models[k][0] + available_models[k][1] * 300 + available_models[k][2] * 1000, + ) + + model_info = model_config.get_model_info(selected_model_name) api_provider = model_config.get_provider(model_info.api_provider) force_new_client = self.request_type == "embedding" client = client_registry.get_client_class_instance(api_provider, force_new=force_new_client) - logger.debug(f"选择请求模型: {model_info.name}") + logger.debug(f"选择请求模型: {model_info.name} (策略: {strategy})") total_tokens, penalty, usage_penalty = self.model_usage[model_info.name] self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty + 1) return model_info, api_provider, client diff --git a/template/model_config_template.toml b/template/model_config_template.toml index f9fae003..ed45047e 100644 --- a/template/model_config_template.toml +++ b/template/model_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "1.10.1" +version = "1.11.0" # 配置文件版本号迭代规则同bot_config.toml @@ -140,38 +140,45 @@ model_list = ["siliconflow-deepseek-v3.2"] # 使用的模型列表,每个子 temperature = 0.2 # 模型温度,新V3建议0.1-0.3 max_tokens = 4096 # 最大输出token数 slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.tool_use] #功能模型,需要使用支持工具调用的模型,请使用较快的小模型(调用量较大) model_list = ["qwen3-30b","qwen3-next-80b"] temperature = 0.7 max_tokens = 1024 slow_threshold = 10.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.replyer] # 首要回复模型,还用于表达方式学习 model_list = ["siliconflow-deepseek-v3.2","siliconflow-deepseek-v3.2-think","siliconflow-glm-4.6","siliconflow-glm-4.6-think"] temperature = 0.3 # 模型温度,新V3建议0.1-0.3 max_tokens = 2048 slow_threshold = 25.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.planner] #决策:负责决定麦麦该什么时候回复的模型 model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.3 max_tokens = 800 slow_threshold = 12.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.vlm] # 图像识别模型 model_list = ["qwen3-vl-30"] max_tokens = 256 slow_threshold = 15.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.voice] # 语音识别模型 model_list = ["sensevoice-small"] slow_threshold = 12.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) # 嵌入模型 [model_task_config.embedding] model_list = ["bge-m3"] slow_threshold = 5.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) # ------------LPMM知识库模型------------ @@ -180,9 +187,11 @@ model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.2 max_tokens = 800 slow_threshold = 20.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) [model_task_config.lpmm_rdf_build] # RDF构建模型 model_list = ["siliconflow-deepseek-v3.2"] temperature = 0.2 max_tokens = 800 -slow_threshold = 20.0 \ No newline at end of file +slow_threshold = 20.0 +selection_strategy = "random" # 模型选择策略:random(负载均衡)或 random(随机选择) \ No newline at end of file