remove:移除utils_small模型,统一使用tool_use模型,移除弃用的LLM_judge类型

pull/1460/head
SengokuCola 2025-12-24 19:28:44 +08:00
parent a3c3fcf518
commit 7cbc2f1462
17 changed files with 309 additions and 255 deletions

View File

@ -85,7 +85,6 @@ Action采用**两层决策机制**来优化性能和决策质量:
| ----------- | ---------------------------------------- | ---------------------- |
| [`NEVER`](#never-激活) | 从不激活Action对麦麦不可见 | 临时禁用某个Action |
| [`ALWAYS`](#always-激活) | 永远激活Action总是在麦麦的候选池中 | 核心功能,如回复、不回复 |
| [`LLM_JUDGE`](#llm_judge-激活) | 通过LLM智能判断当前情境是否需要激活此Action | 需要智能判断的复杂场景 |
| `RANDOM` | 基于随机概率决定是否激活 | 增加行为随机性的功能 |
| `KEYWORD` | 当检测到特定关键词时激活 | 明确触发条件的功能 |
@ -117,30 +116,6 @@ class AlwaysActivatedAction(BaseAction):
return True, "执行了核心功能"
```
#### `LLM_JUDGE` 激活
`ActionActivationType.LLM_JUDGE`会使得这个 Action 根据 LLM 的判断来决定是否加入候选池。
而 LLM 的判断是基于代码中预设的`llm_judge_prompt`和自动提供的聊天上下文进行的。
因此使用此种方法需要实现`llm_judge_prompt`属性。
```python
class LLMJudgedAction(BaseAction):
activation_type = ActionActivationType.LLM_JUDGE # 通过LLM判断激活
# LLM判断提示词
llm_judge_prompt = (
"判定是否需要使用这个动作的条件:\n"
"1. 用户希望调用XXX这个动作\n"
"...\n"
"请回答\"是\"或\"否\"。\n"
)
async def execute(self) -> Tuple[bool, str]:
# 根据LLM判断是否执行
return True, "执行了LLM判断功能"
```
#### `RANDOM` 激活
`ActionActivationType.RANDOM`会使得这个 Action 根据随机概率决定是否加入候选池。

View File

@ -0,0 +1,295 @@
"""
表达方式评估脚本
功能
1. 随机读取10条表达方式获取其situation和style
2. 使用LLM对表达方式进行评估每个表达方式单独评估
3. 如果合适就通过如果不合适就丢弃
4. 不真正修改数据库只是做评估
"""
import asyncio
import random
import json
import sys
import os
# 添加项目根目录到路径
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, project_root)
from src.common.database.database_model import Expression
from src.common.database.database import db
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
from src.common.logger import get_logger
logger = get_logger("expression_evaluator")
def get_random_expressions(count: int = 10) -> list[Expression]:
"""
随机读取指定数量的表达方式
Args:
count: 要读取的数量默认10条
Returns:
表达方式列表
"""
try:
# 查询所有表达方式
all_expressions = list(Expression.select())
if not all_expressions:
logger.warning("数据库中没有表达方式记录")
return []
# 如果总数少于请求数量,返回所有
if len(all_expressions) <= count:
logger.info(f"数据库中共有 {len(all_expressions)} 条表达方式,全部返回")
return all_expressions
# 随机选择指定数量
selected = random.sample(all_expressions, count)
logger.info(f"{len(all_expressions)} 条表达方式中随机选择了 {len(selected)}")
return selected
except Exception as e:
logger.error(f"随机读取表达方式失败: {e}")
import traceback
logger.error(traceback.format_exc())
return []
def create_evaluation_prompt(situation: str, style: str) -> str:
"""
创建评估提示词
Args:
situation: 情境
style: 风格
Returns:
评估提示词
"""
prompt = f"""请评估以下表达方式是否合适:
情境situation{situation}
风格style{style}
请从以下方面进行评估
1. 情境描述是否清晰准确
2. 风格表达是否合理自然
3. 情境和风格是否匹配
4. 是否存在不当内容或表达
请以JSON格式输出评估结果
{{
"suitable": true/false,
"reason": "评估理由(如果不合适,请说明原因)"
}}
如果合适suitable设为true如果不合适suitable设为false并在reason中说明原因
请严格按照JSON格式输出不要包含其他内容"""
return prompt
async def evaluate_expression(expression: Expression, llm: LLMRequest) -> dict:
"""
使用LLM评估单个表达方式
Args:
expression: 表达方式对象
llm: LLM请求实例
Returns:
评估结果字典包含
- expression_id: 表达方式ID
- situation: 情境
- style: 风格
- suitable: 是否合适
- reason: 评估理由
- error: 错误信息如果有
"""
result = {
"expression_id": expression.id,
"situation": expression.situation,
"style": expression.style,
"suitable": None,
"reason": None,
"error": None
}
try:
# 创建评估提示词
prompt = create_evaluation_prompt(expression.situation, expression.style)
# 调用LLM进行评估
logger.info(f"正在评估表达方式 ID: {expression.id}, Situation: {expression.situation}, Style: {expression.style}")
response, (reasoning, model_name, _) = await llm.generate_response_async(
prompt=prompt,
temperature=0.3,
max_tokens=500
)
logger.debug(f"LLM响应: {response}")
logger.debug(f"使用模型: {model_name}")
# 解析JSON响应
try:
# 尝试直接解析
evaluation = json.loads(response)
except json.JSONDecodeError:
# 如果直接解析失败尝试提取JSON部分
import re
json_match = re.search(r'\{[^{}]*"suitable"[^{}]*\}', response, re.DOTALL)
if json_match:
evaluation = json.loads(json_match.group())
else:
raise ValueError("无法从响应中提取JSON格式的评估结果")
# 提取评估结果
result["suitable"] = evaluation.get("suitable", False)
result["reason"] = evaluation.get("reason", "未提供理由")
logger.info(f"表达方式 ID: {expression.id} 评估结果: {'通过' if result['suitable'] else '不通过'}")
if result["reason"]:
logger.info(f"评估理由: {result['reason']}")
except Exception as e:
logger.error(f"评估表达方式 ID: {expression.id} 时出错: {e}")
import traceback
logger.error(traceback.format_exc())
result["error"] = str(e)
result["suitable"] = False
result["reason"] = f"评估过程出错: {str(e)}"
return result
async def main():
"""主函数"""
logger.info("=" * 60)
logger.info("开始表达方式评估")
logger.info("=" * 60)
# 初始化数据库连接
try:
db.connect(reuse_if_open=True)
logger.info("数据库连接成功")
except Exception as e:
logger.error(f"数据库连接失败: {e}")
return
# 1. 随机读取10条表达方式
logger.info("\n步骤1: 随机读取10条表达方式")
expressions = get_random_expressions(10)
if not expressions:
logger.error("没有可用的表达方式,退出")
return
logger.info(f"成功读取 {len(expressions)} 条表达方式")
for i, expr in enumerate(expressions, 1):
logger.info(f" {i}. ID: {expr.id}, Situation: {expr.situation}, Style: {expr.style}")
# 2. 创建LLM实例
logger.info("\n步骤2: 创建LLM实例")
try:
llm = LLMRequest(
model_set=model_config.model_task_config.tool_use,
request_type="expression_evaluator"
)
logger.info("LLM实例创建成功")
except Exception as e:
logger.error(f"创建LLM实例失败: {e}")
import traceback
logger.error(traceback.format_exc())
return
# 3. 对每个表达方式进行评估
logger.info("\n步骤3: 开始评估表达方式")
results = []
for i, expression in enumerate(expressions, 1):
logger.info(f"\n--- 评估进度: {i}/{len(expressions)} ---")
result = await evaluate_expression(expression, llm)
results.append(result)
# 添加短暂延迟,避免请求过快
if i < len(expressions):
await asyncio.sleep(0.5)
# 4. 汇总结果
logger.info("\n" + "=" * 60)
logger.info("评估结果汇总")
logger.info("=" * 60)
passed = [r for r in results if r["suitable"] is True]
failed = [r for r in results if r["suitable"] is False]
errors = [r for r in results if r["error"] is not None]
logger.info(f"\n总计: {len(results)}")
logger.info(f"通过: {len(passed)}")
logger.info(f"不通过: {len(failed)}")
if errors:
logger.info(f"出错: {len(errors)}")
# 详细结果
logger.info("\n--- 通过的表达方式 ---")
if passed:
for r in passed:
logger.info(f" ID: {r['expression_id']}")
logger.info(f" Situation: {r['situation']}")
logger.info(f" Style: {r['style']}")
if r['reason']:
logger.info(f" 理由: {r['reason']}")
else:
logger.info("")
logger.info("\n--- 不通过的表达方式 ---")
if failed:
for r in failed:
logger.info(f" ID: {r['expression_id']}")
logger.info(f" Situation: {r['situation']}")
logger.info(f" Style: {r['style']}")
if r['reason']:
logger.info(f" 理由: {r['reason']}")
if r['error']:
logger.info(f" 错误: {r['error']}")
else:
logger.info("")
# 保存结果到JSON文件可选
output_file = os.path.join(project_root, "data", "expression_evaluation_results.json")
try:
os.makedirs(os.path.dirname(output_file), exist_ok=True)
with open(output_file, "w", encoding="utf-8") as f:
json.dump({
"total": len(results),
"passed": len(passed),
"failed": len(failed),
"errors": len(errors),
"results": results
}, f, ensure_ascii=False, indent=2)
logger.info(f"\n评估结果已保存到: {output_file}")
except Exception as e:
logger.warning(f"保存结果到文件失败: {e}")
logger.info("\n" + "=" * 60)
logger.info("评估完成")
logger.info("=" * 60)
# 关闭数据库连接
try:
db.close()
logger.info("数据库连接已关闭")
except Exception as e:
logger.warning(f"关闭数据库连接时出错: {e}")
if __name__ == "__main__":
asyncio.run(main())

View File

@ -226,7 +226,7 @@ async def simulate_merge(
if use_llm:
try:
summary_model = LLMRequest(
model_set=model_config.model_task_config.utils_small,
model_set=model_config.model_task_config.tool_use,
request_type="expression.summary"
)
print("✅ LLM 模型已初始化,将进行实际总结")

View File

@ -89,7 +89,7 @@ class ExpressionLearner:
model_set=model_config.model_task_config.utils, request_type="expression.learner"
)
self.summary_model: LLMRequest = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="expression.summary"
model_set=model_config.model_task_config.utils, request_type="expression.summary"
)
self.chat_id = chat_id
self.chat_stream = get_chat_manager().get_stream(chat_id)

View File

@ -45,7 +45,7 @@ def init_prompt():
class ExpressionSelector:
def __init__(self):
self.llm_model = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="expression.selector"
model_set=model_config.model_task_config.tool_use, request_type="expression.selector"
)
def can_use_expression_for_chat(self, chat_id: str) -> bool:

View File

@ -444,7 +444,7 @@ class BrainPlanner:
if action_info.activation_type == ActionActivationType.NEVER:
logger.debug(f"{self.log_prefix}动作 {action_name} 设置为 NEVER 激活类型,跳过")
continue
elif action_info.activation_type in [ActionActivationType.LLM_JUDGE, ActionActivationType.ALWAYS]:
elif action_info.activation_type == ActionActivationType.ALWAYS:
filtered_actions[action_name] = action_info
elif action_info.activation_type == ActionActivationType.RANDOM:
if random.random() < action_info.random_activation_probability:

View File

@ -1,12 +1,9 @@
import random
import asyncio
import hashlib
import time
from typing import List, Dict, TYPE_CHECKING, Tuple
from src.common.logger import get_logger
from src.config.config import global_config, model_config
from src.llm_models.utils_model import LLMRequest
from src.config.config import global_config
from src.chat.message_receive.chat_stream import get_chat_manager, ChatMessageContext
from src.chat.planner_actions.action_manager import ActionManager
from src.chat.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, build_readable_messages
@ -35,14 +32,6 @@ class ActionModifier:
self.action_manager = action_manager
# 用于LLM判定的小模型
self.llm_judge = LLMRequest(model_set=model_config.model_task_config.utils_small, request_type="action.judge")
# 缓存相关属性
self._llm_judge_cache = {} # 缓存LLM判定结果
self._cache_expiry_time = 30 # 缓存过期时间(秒)
self._last_context_hash = None # 上次上下文的哈希值
async def modify_actions(
self,
message_content: str = "",
@ -159,9 +148,6 @@ class ActionModifier:
"""
deactivated_actions = []
# 分类处理不同激活类型的actions
llm_judge_actions: Dict[str, ActionInfo] = {}
actions_to_check = list(actions_with_info.items())
random.shuffle(actions_to_check)
@ -185,9 +171,6 @@ class ActionModifier:
deactivated_actions.append((action_name, reason))
logger.debug(f"{self.log_prefix}未激活动作: {action_name},原因: {reason}")
elif activation_type == ActionActivationType.LLM_JUDGE:
llm_judge_actions[action_name] = action_info
elif activation_type == ActionActivationType.NEVER:
reason = "激活类型为never"
deactivated_actions.append((action_name, reason))
@ -196,194 +179,8 @@ class ActionModifier:
else:
logger.warning(f"{self.log_prefix}未知的激活类型: {activation_type},跳过处理")
# 并行处理LLM_JUDGE类型
if llm_judge_actions:
llm_results = await self._process_llm_judge_actions_parallel(
llm_judge_actions,
chat_content,
)
for action_name, should_activate in llm_results.items():
if not should_activate:
reason = "LLM判定未激活"
deactivated_actions.append((action_name, reason))
logger.debug(f"{self.log_prefix}未激活动作: {action_name},原因: {reason}")
return deactivated_actions
def _generate_context_hash(self, chat_content: str) -> str:
"""生成上下文的哈希值用于缓存"""
context_content = f"{chat_content}"
return hashlib.md5(context_content.encode("utf-8")).hexdigest()
async def _process_llm_judge_actions_parallel(
self,
llm_judge_actions: Dict[str, ActionInfo],
chat_content: str = "",
) -> Dict[str, bool]:
"""
并行处理LLM判定actions支持智能缓存
Args:
llm_judge_actions: 需要LLM判定的actions
chat_content: 聊天内容
Returns:
Dict[str, bool]: action名称到激活结果的映射
"""
# 生成当前上下文的哈希值
current_context_hash = self._generate_context_hash(chat_content)
current_time = time.time()
results = {}
tasks_to_run: Dict[str, ActionInfo] = {}
# 检查缓存
for action_name, action_info in llm_judge_actions.items():
cache_key = f"{action_name}_{current_context_hash}"
# 检查是否有有效的缓存
if (
cache_key in self._llm_judge_cache
and current_time - self._llm_judge_cache[cache_key]["timestamp"] < self._cache_expiry_time
):
results[action_name] = self._llm_judge_cache[cache_key]["result"]
logger.debug(
f"{self.log_prefix}使用缓存结果 {action_name}: {'激活' if results[action_name] else '未激活'}"
)
else:
# 需要进行LLM判定
tasks_to_run[action_name] = action_info
# 如果有需要运行的任务,并行执行
if tasks_to_run:
logger.debug(f"{self.log_prefix}并行执行LLM判定任务数: {len(tasks_to_run)}")
# 创建并行任务
tasks = []
task_names = []
for action_name, action_info in tasks_to_run.items():
task = self._llm_judge_action(
action_name,
action_info,
chat_content,
)
tasks.append(task)
task_names.append(action_name)
# 并行执行所有任务
try:
task_results = await asyncio.gather(*tasks, return_exceptions=True)
# 处理结果并更新缓存
for action_name, result in zip(task_names, task_results, strict=False):
if isinstance(result, Exception):
logger.error(f"{self.log_prefix}LLM判定action {action_name} 时出错: {result}")
results[action_name] = False
else:
results[action_name] = result
# 更新缓存
cache_key = f"{action_name}_{current_context_hash}"
self._llm_judge_cache[cache_key] = {"result": result, "timestamp": current_time}
logger.debug(f"{self.log_prefix}并行LLM判定完成耗时: {time.time() - current_time:.2f}s")
except Exception as e:
logger.error(f"{self.log_prefix}并行LLM判定失败: {e}")
# 如果并行执行失败为所有任务返回False
for action_name in tasks_to_run:
results[action_name] = False
# 清理过期缓存
self._cleanup_expired_cache(current_time)
return results
def _cleanup_expired_cache(self, current_time: float):
"""清理过期的缓存条目"""
expired_keys = []
expired_keys.extend(
cache_key
for cache_key, cache_data in self._llm_judge_cache.items()
if current_time - cache_data["timestamp"] > self._cache_expiry_time
)
for key in expired_keys:
del self._llm_judge_cache[key]
if expired_keys:
logger.debug(f"{self.log_prefix}清理了 {len(expired_keys)} 个过期缓存条目")
async def _llm_judge_action(
self,
action_name: str,
action_info: ActionInfo,
chat_content: str = "",
) -> bool: # sourcery skip: move-assign-in-block, use-named-expression
"""
使用LLM判定是否应该激活某个action
Args:
action_name: 动作名称
action_info: 动作信息
observed_messages_str: 观察到的聊天消息
chat_context: 聊天上下文
extra_context: 额外上下文
Returns:
bool: 是否应该激活此action
"""
try:
# 构建判定提示词
action_description = action_info.description
action_require = action_info.action_require
custom_prompt = action_info.llm_judge_prompt
# 构建基础判定提示词
base_prompt = f"""
你需要判断在当前聊天情况下是否应该激活名为"{action_name}"的动作
动作描述{action_description}
动作使用场景
"""
for req in action_require:
base_prompt += f"- {req}\n"
if custom_prompt:
base_prompt += f"\n额外判定条件:\n{custom_prompt}\n"
if chat_content:
base_prompt += f"\n当前聊天记录:\n{chat_content}\n"
base_prompt += """
请根据以上信息判断是否应该激活这个动作
只需要回答""""不要有其他内容
"""
# 调用LLM进行判定
response, _ = await self.llm_judge.generate_response_async(prompt=base_prompt)
# 解析响应
response = response.strip().lower()
# print(base_prompt)
# print(f"LLM判定动作 {action_name}:响应='{response}'")
should_activate = "" in response or "yes" in response or "true" in response
logger.debug(
f"{self.log_prefix}LLM判定动作 {action_name}:响应='{response}',结果={'激活' if should_activate else '不激活'}"
)
return should_activate
except Exception as e:
logger.error(f"{self.log_prefix}LLM判定动作 {action_name} 时出错: {e}")
# 出错时默认不激活
return False
def _check_keyword_activation(
self,
action_name: str,

View File

@ -591,7 +591,7 @@ class ActionPlanner:
if action_info.activation_type == ActionActivationType.NEVER:
logger.debug(f"{self.log_prefix}动作 {action_name} 设置为 NEVER 激活类型,跳过")
continue
elif action_info.activation_type in [ActionActivationType.LLM_JUDGE, ActionActivationType.ALWAYS]:
elif action_info.activation_type == ActionActivationType.ALWAYS:
filtered_actions[action_name] = action_info
elif action_info.activation_type == ActionActivationType.RANDOM:
if random.random() < action_info.random_activation_probability:

View File

@ -774,7 +774,7 @@ class PrivateReplyer:
expression_habits_block, selected_expressions = results_dict["expression_habits"]
expression_habits_block: str
selected_expressions: List[int]
relation_info: str = results_dict["relation_info"]
relation_info: str = results_dict.get("relation_info") or ""
tool_info: str = results_dict["tool_info"]
prompt_info: str = results_dict["prompt_info"] # 直接使用格式化后的结果
actions_info: str = results_dict["actions_info"]

View File

@ -105,9 +105,6 @@ class ModelTaskConfig(ConfigBase):
utils: TaskConfig
"""组件模型配置"""
utils_small: TaskConfig
"""组件小模型配置"""
replyer: TaskConfig
"""normal_chat首要回复模型模型配置"""

View File

@ -57,7 +57,7 @@ TEMPLATE_DIR = os.path.join(PROJECT_ROOT, "template")
# 考虑到实际上配置文件中的mai_version是不会自动更新的,所以采用硬编码
# 对该字段的更新请严格参照语义化版本规范https://semver.org/lang/zh-CN/
MMC_VERSION = "0.12.0"
MMC_VERSION = "0.12.1"
def get_key_comment(toml_table, key):

View File

@ -19,7 +19,7 @@ from src.chat.message_receive.chat_stream import get_chat_manager
logger = get_logger("person_info")
relation_selection_model = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="relation_selection"
model_set=model_config.model_task_config.tool_use, request_type="relation_selection"
)

View File

@ -28,7 +28,6 @@ class BaseAction(ABC):
- keyword_case_sensitive: 关键词是否区分大小写
- parallel_action: 是否允许并行执行
- random_activation_probability: 随机激活概率
- llm_judge_prompt: LLM判断提示词
"""
def __init__(
@ -81,8 +80,6 @@ class BaseAction(ABC):
"""激活类型"""
self.random_activation_probability: float = getattr(self.__class__, "random_activation_probability", 0.0)
"""当激活类型为RANDOM时的概率"""
self.llm_judge_prompt: str = getattr(self.__class__, "llm_judge_prompt", "") # 已弃用
"""协助LLM进行判断的Prompt"""
self.activation_keywords: list[str] = getattr(self.__class__, "activation_keywords", []).copy()
"""激活类型为KEYWORD时的KEYWORDS列表"""
self.keyword_case_sensitive: bool = getattr(self.__class__, "keyword_case_sensitive", False)
@ -504,7 +501,6 @@ class BaseAction(ABC):
keyword_case_sensitive=getattr(cls, "keyword_case_sensitive", False),
parallel_action=getattr(cls, "parallel_action", True),
random_activation_probability=getattr(cls, "random_activation_probability", 0.0),
llm_judge_prompt=getattr(cls, "llm_judge_prompt", ""),
# 使用正确的字段名
action_parameters=getattr(cls, "action_parameters", {}).copy(),
action_require=getattr(cls, "action_require", []).copy(),

View File

@ -33,7 +33,6 @@ class ActionActivationType(Enum):
NEVER = "never" # 从不激活(默认关闭)
ALWAYS = "always" # 默认参与到planner
LLM_JUDGE = "llm_judge" # LLM判定是否启动该action到planner
RANDOM = "random" # 随机启用action到planner
KEYWORD = "keyword" # 关键词触发启用action到planner
@ -128,7 +127,6 @@ class ActionInfo(ComponentInfo):
normal_activation_type: ActionActivationType = ActionActivationType.ALWAYS # 已弃用
activation_type: ActionActivationType = ActionActivationType.ALWAYS
random_activation_probability: float = 0.0
llm_judge_prompt: str = ""
activation_keywords: List[str] = field(default_factory=list) # 激活关键词列表
keyword_case_sensitive: bool = False
# 模式和并行设置

View File

@ -28,7 +28,7 @@
"type": "action",
"name": "tts_action",
"description": "将文本转换为语音进行播放",
"activation_modes": ["llm_judge", "keyword"],
"activation_modes": ["keyword"],
"keywords": ["语音", "tts", "播报", "读出来", "语音播放", "听", "朗读"]
}
],

View File

@ -13,7 +13,9 @@ class TTSAction(BaseAction):
"""TTS语音转换动作处理类"""
# 激活设置
activation_type = ActionActivationType.LLM_JUDGE
activation_type = ActionActivationType.KEYWORD
activation_keywords = ["语音", "tts", "播报", "读出来", "语音播放", "", "朗读"]
keyword_case_sensitive = False
parallel_action = False
# 动作基本信息

View File

@ -1,5 +1,5 @@
[inner]
version = "1.9.1"
version = "1.10.1"
# 配置文件版本号迭代规则同bot_config.toml
@ -141,12 +141,6 @@ temperature = 0.2 # 模型温度新V3建议0.1-0.3
max_tokens = 4096 # 最大输出token数
slow_threshold = 15.0 # 慢请求阈值(秒),模型等待回复时间超过此值会输出警告日志
[model_task_config.utils_small] # 在麦麦的一些组件中使用的小模型,消耗量较大,建议使用速度较快的小模型
model_list = ["qwen3-30b","qwen3-next-80b"]
temperature = 0.7
max_tokens = 2048
slow_threshold = 10.0
[model_task_config.tool_use] #工具调用模型,需要使用支持工具调用的模型
model_list = ["qwen3-30b","qwen3-next-80b"]
temperature = 0.7