From bc2431ce98acb9a5716cc03168b6207e55a34019 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Thu, 18 Dec 2025 21:01:32 +0800 Subject: [PATCH] =?UTF-8?q?=E9=BB=91=E8=AF=9D=E6=8F=90=E5=8F=96=E6=96=B0?= =?UTF-8?q?=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bw_learner/expression_learner.py | 2 +- src/chat/brain_chat/brain_chat.py | 15 +++++ src/chat/heart_flow/heartFC_chat.py | 15 +++++ src/chat/planner_actions/planner.py | 15 ++++- src/chat/replyer/group_generator.py | 76 +++++++++++++++++++------ src/config/official_configs.py | 27 ++++----- src/plugin_system/apis/generator_api.py | 26 +++++++-- template/bot_config_template.toml | 3 +- 8 files changed, 138 insertions(+), 41 deletions(-) diff --git a/src/bw_learner/expression_learner.py b/src/bw_learner/expression_learner.py index 7ff9d789..a9ad7348 100644 --- a/src/bw_learner/expression_learner.py +++ b/src/bw_learner/expression_learner.py @@ -34,7 +34,7 @@ def init_prompt() -> None: 你的名字是{bot_name},现在请你完成两个提取任务 任务1:请从上面这段群聊中用户的语言风格和说话方式 1. 只考虑文字,不要考虑表情包和图片 -2. 不要总结SELF的发言 +2. 不要总结SELF的发言,因为这是你自己的发言,不要重复学习你自己的发言 3. 不要涉及具体的人名,也不要涉及具体名词 4. 思考有没有特殊的梗,一并总结成语言风格 5. 例子仅供参考,请严格根据群聊内容总结!!! diff --git a/src/chat/brain_chat/brain_chat.py b/src/chat/brain_chat/brain_chat.py index 4a22628e..b555bf0f 100644 --- a/src/chat/brain_chat/brain_chat.py +++ b/src/chat/brain_chat/brain_chat.py @@ -588,12 +588,27 @@ class BrainChatting: elif action_planner_info.action_type == "reply": try: + # 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用) + unknown_words = None + if isinstance(action_planner_info.action_data, dict): + uw = action_planner_info.action_data.get("unknown_words") + if isinstance(uw, list): + cleaned_uw: List[str] = [] + for item in uw: + if isinstance(item, str): + s = item.strip() + if s: + cleaned_uw.append(s) + if cleaned_uw: + unknown_words = cleaned_uw + success, llm_response = await generator_api.generate_reply( chat_stream=self.chat_stream, reply_message=action_planner_info.action_message, available_actions=available_actions, chosen_actions=chosen_action_plan_infos, reply_reason=action_planner_info.reasoning or "", + unknown_words=unknown_words, enable_tool=global_config.tool.enable_tool, request_type="replyer", from_plugin=False, diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index eaf6dc8e..13db1783 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -638,12 +638,27 @@ class HeartFChatting: action_reasoning=reason, ) + # 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用) + unknown_words = None + if isinstance(action_planner_info.action_data, dict): + uw = action_planner_info.action_data.get("unknown_words") + if isinstance(uw, list): + cleaned_uw: List[str] = [] + for item in uw: + if isinstance(item, str): + s = item.strip() + if s: + cleaned_uw.append(s) + if cleaned_uw: + unknown_words = cleaned_uw + success, llm_response = await generator_api.generate_reply( chat_stream=self.chat_stream, reply_message=action_planner_info.action_message, available_actions=available_actions, chosen_actions=chosen_action_plan_infos, reply_reason=planner_reasoning, + unknown_words=unknown_words, enable_tool=global_config.tool.enable_tool, request_type="replyer", from_plugin=False, diff --git a/src/chat/planner_actions/planner.py b/src/chat/planner_actions/planner.py index 8ef99f6e..ce316f50 100644 --- a/src/chat/planner_actions/planner.py +++ b/src/chat/planner_actions/planner.py @@ -51,6 +51,8 @@ reply 3.最好一次对一个话题进行回复,免得啰嗦或者回复内容太乱。 4.不要选择回复你自己发送的消息 5.不要单独对表情包进行回复 +6.将上下文中所有含义不明的,疑似黑话的,缩写词均写入unknown_words中 +7.用一句简单的话来描述当前回复场景,不超过10个字 {reply_action_example} no_reply @@ -478,10 +480,19 @@ class ActionPlanner: name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。" # 根据 think_mode 配置决定 reply action 的示例 JSON + # 在 JSON 中直接作为 action 参数携带 unknown_words if global_config.chat.think_mode == "classic": - reply_action_example = '{{"action":"reply", "target_messamge_id":"消息id(m+数字)"}}' + reply_action_example = ( + '{{"action":"reply", "target_messamge_id":"消息id(m+数字)", ' + '"unknown_words":["词语1","词语2"]}}' + ) else: - reply_action_example = '5.think_level表示思考深度,0表示该回复不需要思考和回忆,1表示该回复需要进行回忆和思考\n{{"action":"reply", "think_level":数值等级(0或1), "target_messamge_id":"消息id(m+数字)"}}' + reply_action_example = ( + "5.think_level表示思考深度,0表示该回复不需要思考和回忆,1表示该回复需要进行回忆和思考\n" + + '{{"action":"reply", "think_level":数值等级(0或1), ' + '"target_messamge_id":"消息id(m+数字)", ' + '"unknown_words":["词语1","词语2"]}}' + ) planner_prompt_template = await global_prompt_manager.get_prompt_async("planner_prompt") prompt = planner_prompt_template.format( diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 3a82b555..1ec04556 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -35,7 +35,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt -from src.bw_learner.jargon_explainer import explain_jargon_in_context +from src.bw_learner.jargon_explainer import explain_jargon_in_context, retrieve_concepts_with_jargon init_lpmm_prompt() init_replyer_prompt() @@ -73,6 +73,7 @@ class DefaultReplyer: reply_message: Optional[DatabaseMessages] = None, reply_time_point: Optional[float] = time.time(), think_level: int = 1, + unknown_words: Optional[List[str]] = None, ) -> Tuple[bool, LLMGenerationDataModel]: # sourcery skip: merge-nested-ifs """ @@ -110,6 +111,7 @@ class DefaultReplyer: reply_reason=reply_reason, reply_time_point=reply_time_point, think_level=think_level, + unknown_words=unknown_words, ) llm_response.prompt = prompt llm_response.selected_expressions = selected_expressions @@ -492,6 +494,53 @@ class DefaultReplyer: """当关闭黑话解释时使用的占位协程,避免额外的LLM调用""" return "" + async def _build_unknown_words_jargon(self, unknown_words: Optional[List[str]], chat_id: str) -> str: + """针对 Planner 提供的未知词语列表执行黑话检索""" + if not unknown_words: + return "" + # 清洗未知词语列表,只保留非空字符串 + concepts: List[str] = [] + for item in unknown_words: + if isinstance(item, str): + s = item.strip() + if s: + concepts.append(s) + if not concepts: + return "" + try: + return await retrieve_concepts_with_jargon(concepts, chat_id) + except Exception as e: + logger.error(f"未知词语黑话检索失败: {e}") + return "" + + async def _build_jargon_explanation( + self, + chat_id: str, + messages_short: List[DatabaseMessages], + chat_talking_prompt_short: str, + unknown_words: Optional[List[str]], + ) -> str: + """ + 统一的黑话解释构建函数: + - 根据 enable_jargon_explanation / jargon_mode 决定具体策略 + """ + enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True) + if not enable_jargon_explanation: + return "" + + jargon_mode = getattr(global_config.expression, "jargon_mode", "context") + + # planner 模式:仅使用 Planner 的 unknown_words + if jargon_mode == "planner": + return await self._build_unknown_words_jargon(unknown_words, chat_id) + + # 默认 / context 模式:使用上下文自动匹配黑话 + try: + return await explain_jargon_in_context(chat_id, messages_short, chat_talking_prompt_short) + except Exception as e: + logger.error(f"上下文黑话解释失败: {e}") + return "" + def build_chat_history_prompts( self, message_list_before_now: List[DatabaseMessages], target_user_id: str, sender: str ) -> Tuple[str, str]: @@ -676,16 +725,10 @@ class DefaultReplyer: # 判断是否为群聊 is_group = stream_type == "group" - # 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id - import hashlib - - if is_group: - components = [platform, str(id_str)] - else: - components = [platform, str(id_str), "private"] - key = "_".join(components) - chat_id = hashlib.md5(key.encode()).hexdigest() + # 使用 ChatManager 提供的接口生成 chat_id,避免在此重复实现逻辑 + from src.chat.message_receive.chat_stream import get_chat_manager + chat_id = get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group) return chat_id, prompt_content except (ValueError, IndexError): @@ -739,6 +782,7 @@ class DefaultReplyer: enable_tool: bool = True, reply_time_point: Optional[float] = time.time(), think_level: int = 1, + unknown_words: Optional[List[str]] = None, ) -> Tuple[str, List[int], List[str], str]: """ 构建回复器上下文 @@ -823,14 +867,12 @@ class DefaultReplyer: show_actions=True, ) - # 根据配置决定是否启用黑话解释 - enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True) - if enable_jargon_explanation: - jargon_coroutine = explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short) - else: - jargon_coroutine = self._build_disabled_jargon_explanation() + # 统一黑话解释构建:根据配置选择上下文或 Planner 模式 + jargon_coroutine = self._build_jargon_explanation( + chat_id, message_list_before_short, chat_talking_prompt_short, unknown_words + ) - # 并行执行八个构建任务(包括黑话解释,可配置关闭) + # 并行执行构建任务(包括黑话解释,可配置关闭) task_results = await asyncio.gather( self._time_and_run_task( self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level), diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 143d5652..de4bc1da 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -135,14 +135,9 @@ class ChatConfig(ConfigBase): is_group = stream_type == "group" - import hashlib + from src.chat.message_receive.chat_stream import get_chat_manager - if is_group: - components = [platform, str(id_str)] - else: - components = [platform, str(id_str), "private"] - key = "_".join(components) - return hashlib.md5(key.encode()).hexdigest() + return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group) except (ValueError, IndexError): return None @@ -328,6 +323,13 @@ class ExpressionConfig(ConfigBase): enable_jargon_explanation: bool = True """是否在回复前尝试对上下文中的黑话进行解释(关闭可减少一次LLM调用,仅影响回复前的黑话匹配与解释,不影响黑话学习)""" + jargon_mode: Literal["context", "planner"] = "context" + """ + 黑话解释来源模式: + - "context": 使用上下文自动匹配黑话并解释(原有模式) + - "planner": 仅使用 Planner 在 reply 动作中给出的 unknown_words 列表进行黑话检索 + """ + def _parse_stream_config_to_chat_id(self, stream_config_str: str) -> Optional[str]: """ 解析流配置字符串并生成对应的 chat_id @@ -350,15 +352,10 @@ class ExpressionConfig(ConfigBase): # 判断是否为群聊 is_group = stream_type == "group" - # 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id - import hashlib + # 使用 ChatManager 提供的接口生成 chat_id,避免在此重复实现逻辑 + from src.chat.message_receive.chat_stream import get_chat_manager - if is_group: - components = [platform, str(id_str)] - else: - components = [platform, str(id_str), "private"] - key = "_".join(components) - return hashlib.md5(key.encode()).hexdigest() + return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group) except (ValueError, IndexError): return None diff --git a/src/plugin_system/apis/generator_api.py b/src/plugin_system/apis/generator_api.py index cdac2791..4f7b63fc 100644 --- a/src/plugin_system/apis/generator_api.py +++ b/src/plugin_system/apis/generator_api.py @@ -86,6 +86,7 @@ async def generate_reply( reply_reason: str = "", available_actions: Optional[Dict[str, ActionInfo]] = None, chosen_actions: Optional[List["ActionPlannerInfo"]] = None, + unknown_words: Optional[List[str]] = None, enable_tool: bool = False, enable_splitter: bool = True, enable_chinese_typo: bool = True, @@ -104,6 +105,7 @@ async def generate_reply( reply_reason: 回复原因 available_actions: 可用动作 chosen_actions: 已选动作 + unknown_words: Planner 在 reply 动作中给出的未知词语列表,用于黑话检索 enable_tool: 是否启用工具调用 enable_splitter: 是否启用消息分割器 enable_chinese_typo: 是否启用错字生成器 @@ -123,11 +125,24 @@ async def generate_reply( logger.error("[GeneratorAPI] 无法获取回复器") return False, None - if not extra_info and action_data: - extra_info = action_data.get("extra_info", "") - - if not reply_reason and action_data: - reply_reason = action_data.get("reason", "") + if action_data: + if not extra_info: + extra_info = action_data.get("extra_info", "") + if not reply_reason: + reply_reason = action_data.get("reason", "") + # 仅在 reply 场景下使用的未知词语解析(Planner JSON 中下发) + if unknown_words is None: + uw = action_data.get("unknown_words") + if isinstance(uw, list): + # 只保留非空字符串 + cleaned: List[str] = [] + for item in uw: + if isinstance(item, str): + s = item.strip() + if s: + cleaned.append(s) + if cleaned: + unknown_words = cleaned # 调用回复器生成回复 success, llm_response = await replyer.generate_reply_with_context( @@ -137,6 +152,7 @@ async def generate_reply( enable_tool=enable_tool, reply_message=reply_message, reply_reason=reply_reason, + unknown_words=unknown_words, think_level=think_level, from_plugin=from_plugin, stream_id=chat_stream.stream_id if chat_stream else chat_id, diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 2740591a..85e9d1a2 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "7.2.0" +version = "7.2.1" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- # 如果你想要修改配置文件,请递增version的值 @@ -94,6 +94,7 @@ allow_reflect = [] # 允许进行表达反思的聊天流ID列表,格式:["q all_global_jargon = true # 是否开启全局黑话模式,注意,此功能关闭后,已经记录的全局黑话不会改变,需要手动删除 enable_jargon_explanation = true # 是否在回复前尝试对上下文中的黑话进行解释(关闭可减少一次LLM调用,仅影响回复前的黑话匹配与解释,不影响黑话学习) +jargon_mode = "context" # 黑话解释来源模式,可选: "context"(使用上下文自动匹配黑话) 或 "planner"(仅使用Planner在reply动作中给出的unknown_words列表) [chat] # 麦麦的聊天设置