From 817154a72b1c9f38f2bfcb39a22fe0712dfdff4b Mon Sep 17 00:00:00 2001 From: exynos <3520824673@qq.com> Date: Mon, 13 Oct 2025 16:00:15 +0800 Subject: [PATCH] =?UTF-8?q?=E6=A0=87=E5=87=86=E5=8C=96=E2=80=9C=E8=A2=AB@?= =?UTF-8?q?=E2=80=9D=E8=AF=86=E5=88=AB=E4=B8=8E=E5=B9=B3=E5=8F=B0=E8=A7=A3?= =?UTF-8?q?=E8=80=A6=EF=BC=9A=E6=9B=B4=E6=94=B9=20maim=5Fmessage=20?= =?UTF-8?q?=E4=BB=A5=E9=80=82=E9=85=8Dtg=E7=AD=89=E5=90=8E=E7=BB=AD?= =?UTF-8?q?=E5=B9=B3=E5=8F=B0=E7=9A=84=E8=A7=86=E9=A2=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/brain_chat/brain_planner.py | 2 + .../heart_flow/heartflow_message_processor.py | 3 + src/chat/message_receive/bot.py | 2 + src/chat/message_receive/message.py | 10 ++ src/chat/utils/chat_message_builder.py | 20 ++- src/chat/utils/utils.py | 132 ++++++++++++------ src/express/expression_learner.py | 64 ++------- 7 files changed, 134 insertions(+), 99 deletions(-) diff --git a/src/chat/brain_chat/brain_planner.py b/src/chat/brain_chat/brain_planner.py index 35cdf172..0cbaab36 100644 --- a/src/chat/brain_chat/brain_planner.py +++ b/src/chat/brain_chat/brain_planner.py @@ -249,6 +249,8 @@ class BrainPlanner: # 获取必要信息 is_group_chat, chat_target_info, current_available_actions = self.get_necessary_info() + # 提及/被@ 的处理由心流或统一判定模块驱动;Planner 不再做硬编码强制回复 + # 应用激活类型过滤 filtered_actions = self._filter_actions_by_activation_type(available_actions, chat_content_block_short) diff --git a/src/chat/heart_flow/heartflow_message_processor.py b/src/chat/heart_flow/heartflow_message_processor.py index fdc13e2f..4247d02c 100644 --- a/src/chat/heart_flow/heartflow_message_processor.py +++ b/src/chat/heart_flow/heartflow_message_processor.py @@ -31,6 +31,9 @@ async def _calculate_interest(message: MessageRecv) -> Tuple[float, list[str]]: return 0.0, [] is_mentioned, is_at, reply_probability_boost = is_mentioned_bot_in_message(message) + # 保留适配器/上游直接标记的提及信号,避免被覆盖 + if getattr(message, "is_mentioned", False): + is_mentioned = True # interested_rate = 0.0 keywords = [] diff --git a/src/chat/message_receive/bot.py b/src/chat/message_receive/bot.py index 2eb0b4e3..43d2754a 100644 --- a/src/chat/message_receive/bot.py +++ b/src/chat/message_receive/bot.py @@ -221,6 +221,8 @@ class ChatBot: # 处理消息内容,生成纯文本 await message.process() + # 平台层的 @ 检测由底层 is_mentioned_bot_in_message 统一处理;此处不做用户名硬编码匹配 + # 过滤检查 if _check_ban_words( message.processed_plain_text, diff --git a/src/chat/message_receive/message.py b/src/chat/message_receive/message.py index aa992552..6c1ec1a7 100644 --- a/src/chat/message_receive/message.py +++ b/src/chat/message_receive/message.py @@ -130,6 +130,16 @@ class MessageRecv(Message): self.key_words = [] self.key_words_lite = [] + # 兼容适配器通过 additional_config 传入的 @ 标记 + try: + msg_info_dict = message_dict.get("message_info", {}) + add_cfg = msg_info_dict.get("additional_config") or {} + if isinstance(add_cfg, dict) and add_cfg.get("at_bot"): + # 标记为被提及,提高后续回复优先级 + self.is_mentioned = True # type: ignore + except Exception: + pass + def update_chat_stream(self, chat_stream: "ChatStream"): self.chat_stream = chat_stream diff --git a/src/chat/utils/chat_message_builder.py b/src/chat/utils/chat_message_builder.py index 52559ecb..8915e810 100644 --- a/src/chat/utils/chat_message_builder.py +++ b/src/chat/utils/chat_message_builder.py @@ -43,9 +43,12 @@ def replace_user_references( if name_resolver is None: def default_resolver(platform: str, user_id: str) -> str: - # 检查是否是机器人自己 - if replace_bot_name and user_id == global_config.bot.qq_account: - return f"{global_config.bot.nickname}(你)" + # 检查是否是机器人自己(支持多平台) + if replace_bot_name: + if platform == "qq" and user_id == global_config.bot.qq_account: + return f"{global_config.bot.nickname}(你)" + if platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", ""): + return f"{global_config.bot.nickname}(你)" person = Person(platform=platform, user_id=user_id) return person.person_name or user_id # type: ignore @@ -92,6 +95,8 @@ def replace_user_references( new_content += content[last_end:] content = new_content + # Telegram 文本 @username 的显示映射交由适配器或平台层处理;此处不做硬编码替换 + return content @@ -432,7 +437,10 @@ def _build_readable_messages_internal( person_name = ( person.person_name or f"{user_nickname}" or (f"昵称:{user_cardname}" if user_cardname else "某人") ) - if replace_bot_name and user_id == global_config.bot.qq_account: + if replace_bot_name and ( + (platform == global_config.bot.platform and user_id == global_config.bot.qq_account) + or (platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", "")) + ): person_name = f"{global_config.bot.nickname}(你)" # 使用独立函数处理用户引用格式 @@ -866,7 +874,9 @@ async def build_anonymous_messages(messages: List[DatabaseMessages]) -> str: # print(f"get_anon_name: platform:{platform}, user_id:{user_id}") # print(f"global_config.bot.qq_account:{global_config.bot.qq_account}") - if user_id == global_config.bot.qq_account: + if (platform == "qq" and user_id == global_config.bot.qq_account) or ( + platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", "") + ): # print("SELF11111111111111") return "SELF" try: diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 240ce609..3011c865 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -44,62 +44,104 @@ def db_message_to_str(message_dict: dict) -> str: def is_mentioned_bot_in_message(message: MessageRecv) -> tuple[bool, bool, float]: - """检查消息是否提到了机器人""" - keywords = [global_config.bot.nickname] + list(global_config.bot.alias_names) + """检查消息是否提到了机器人(多平台实现)""" + text = message.processed_plain_text or "" + platform = getattr(message.message_info, "platform", "") or "" + qq_id = str(getattr(global_config.bot, "qq_account", "") or "") + tg_id = str(getattr(global_config.bot, "telegram_account", "") or "") + tg_uname = str(getattr(global_config.bot, "telegram_username", "") or "") + + nickname = str(global_config.bot.nickname or "") + alias_names = list(getattr(global_config.bot, "alias_names", []) or []) + keywords = [nickname] + alias_names + reply_probability = 0.0 is_at = False is_mentioned = False - # 这部分怎么处理啊啊啊啊 - # 我觉得可以给消息加一个 reply_probability_boost字段 - if ( - message.message_info.additional_config is not None - and message.message_info.additional_config.get("is_mentioned") is not None - ): + # 1) 直接的 additional_config 标记 + add_cfg = getattr(message.message_info, "additional_config", None) or {} + if isinstance(add_cfg, dict): + if add_cfg.get("at_bot") or add_cfg.get("is_mentioned"): + is_mentioned = True + # 当提供数值型 is_mentioned 时,当作概率提升 + try: + if add_cfg.get("is_mentioned") not in (None, ""): + reply_probability = float(add_cfg.get("is_mentioned")) # type: ignore + except Exception: + pass + + # 2) 已经在上游设置过的 message.is_mentioned + if getattr(message, "is_mentioned", False): + is_mentioned = True + + # 3) 扫描分段:是否包含 mention_bot(适配器插入) + def _has_mention_bot(seg) -> bool: try: - reply_probability = float(message.message_info.additional_config.get("is_mentioned")) # type: ignore - is_mentioned = True - return is_mentioned, is_at, reply_probability - except Exception as e: - logger.warning(str(e)) - logger.warning( - f"消息中包含不合理的设置 is_mentioned: {message.message_info.additional_config.get('is_mentioned')}" - ) + if seg is None: + return False + if getattr(seg, "type", None) == "mention_bot": + return True + if getattr(seg, "type", None) == "seglist": + for s in getattr(seg, "data", []) or []: + if _has_mention_bot(s): + return True + return False + except Exception: + return False - for keyword in keywords: - if keyword in message.processed_plain_text: - is_mentioned = True - - # 判断是否被@ - if re.search(rf"@<(.+?):{global_config.bot.qq_account}>", message.processed_plain_text): + if _has_mention_bot(getattr(message, "message_segment", None)): is_at = True is_mentioned = True - if is_at and global_config.chat.at_bot_inevitable_reply: + # 4) 文本层面的 @ 检测(多平台) + # QQ: @ + if qq_id and re.search(rf"@<(.+?):{re.escape(qq_id)}>", text): + is_at = True + is_mentioned = True + # Telegram: @username + if platform == "telegram" and tg_uname: + if re.search(rf"@{re.escape(tg_uname)}(\b|$)", text, flags=re.IGNORECASE): + is_at = True + is_mentioned = True + + # 5) 回复机器人检测: + # a) 通用显示文本:包含 “(你)” 或 “(你)” 的回复格式 + if re.search(r"\[回复 .*?\(你\):", text) or re.search(r"\[回复 .*?(你):", text): + is_mentioned = True + # b) 兼容 ID 形式(QQ与Telegram) + if qq_id and ( + re.search(rf"\[回复 (.+?)\({re.escape(qq_id)}\):(.+?)\],说:", text) + or re.search(rf"\[回复<(.+?)(?=:{re.escape(qq_id)}>)\:{re.escape(qq_id)}>:(.+?)\],说:", text) + ): + is_mentioned = True + if tg_id and ( + re.search(rf"\[回复 (.+?)\({re.escape(tg_id)}\):(.+?)\],说:", text) + or re.search(rf"\[回复<(.+?)(?=:{re.escape(tg_id)}>)\:{re.escape(tg_id)}>:(.+?)\],说:", text) + ): + is_mentioned = True + + # 6) 名称/别名 提及(去除 @/回复标记后再匹配) + if not is_mentioned and keywords: + msg_content = text + # 去除各种 @ 与 回复标记,避免误判 + msg_content = re.sub(r"@(.+?)((\d+))", "", msg_content) + msg_content = re.sub(r"@<(.+?)(?=:(\d+))\:(\d+)>", "", msg_content) + msg_content = re.sub(r"\[回复 (.+?)\(((\d+)|未知id|你)\):(.+?)\],说:", "", msg_content) + msg_content = re.sub(r"\[回复<(.+?)(?=:(\d+))\:(\d+)>:(.+?)\],说:", "", msg_content) + for kw in keywords: + if kw and kw in msg_content: + is_mentioned = True + break + + # 7) 概率设置 + if is_at and getattr(global_config.chat, "at_bot_inevitable_reply", 1): reply_probability = 1.0 logger.debug("被@,回复概率设置为100%") - else: - if not is_mentioned: - # 判断是否被回复 - if re.match( - rf"\[回复 (.+?)\({str(global_config.bot.qq_account)}\):(.+?)\],说:", message.processed_plain_text - ) or re.match( - rf"\[回复<(.+?)(?=:{str(global_config.bot.qq_account)}>)\:{str(global_config.bot.qq_account)}>:(.+?)\],说:", - message.processed_plain_text, - ): - is_mentioned = True - else: - # 判断内容中是否被提及 - message_content = re.sub(r"@(.+?)((\d+))", "", message.processed_plain_text) - message_content = re.sub(r"@<(.+?)(?=:(\d+))\:(\d+)>", "", message_content) - message_content = re.sub(r"\[回复 (.+?)\(((\d+)|未知id)\):(.+?)\],说:", "", message_content) - message_content = re.sub(r"\[回复<(.+?)(?=:(\d+))\:(\d+)>:(.+?)\],说:", "", message_content) - for keyword in keywords: - if keyword in message_content: - is_mentioned = True - if is_mentioned and global_config.chat.mentioned_bot_reply: - reply_probability = 1.0 - logger.debug("被提及,回复概率设置为100%") + elif is_mentioned and getattr(global_config.chat, "mentioned_bot_reply", 1): + reply_probability = max(reply_probability, 1.0) + logger.debug("被提及,回复概率设置为100%") + return is_mentioned, is_at, reply_probability diff --git a/src/express/expression_learner.py b/src/express/expression_learner.py index e0bc6d71..cb56be4a 100644 --- a/src/express/expression_learner.py +++ b/src/express/expression_learner.py @@ -112,8 +112,8 @@ class ExpressionLearner: _, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat( self.chat_id ) - self.min_messages_for_learning = 30 / self.learning_intensity # 触发学习所需的最少消息数 - self.min_learning_interval = 300 / self.learning_intensity + self.min_messages_for_learning = 15 / self.learning_intensity # 触发学习所需的最少消息数 + self.min_learning_interval = 150 / self.learning_intensity def should_trigger_learning(self) -> bool: """ @@ -343,44 +343,26 @@ class ExpressionLearner: logger.error(f"解析匹配响应JSON失败: {e}, 响应内容: \n{response}") return [] - # 确保 match_responses 是一个列表 - if not isinstance(match_responses, list): - if isinstance(match_responses, dict): - match_responses = [match_responses] - else: - logger.error(f"match_responses 不是列表或字典类型: {type(match_responses)}, 内容: {match_responses}") - return [] - matched_expressions = [] used_pair_indices = set() # 用于跟踪已经使用的expression_pair索引 - logger.debug(f"match_responses 类型: {type(match_responses)}, 长度: {len(match_responses)}") - logger.debug(f"match_responses 内容: {match_responses}") + print(f"match_responses: {match_responses}") for match_response in match_responses: try: - # 检查 match_response 的类型 - if not isinstance(match_response, dict): - logger.error(f"match_response 不是字典类型: {type(match_response)}, 内容: {match_response}") - continue - # 获取表达方式序号 - if "expression_pair" not in match_response: - logger.error(f"match_response 缺少 'expression_pair' 字段: {match_response}") - continue - pair_index = int(match_response["expression_pair"]) - 1 # 转换为0-based索引 # 检查索引是否有效且未被使用过 if 0 <= pair_index < len(expression_pairs) and pair_index not in used_pair_indices: situation, style = expression_pairs[pair_index] - context = match_response.get("context", "") + context = match_response["context"] matched_expressions.append((situation, style, context)) used_pair_indices.add(pair_index) # 标记该索引已使用 logger.debug(f"成功匹配表达方式 {pair_index + 1}: {situation} -> {style}") elif pair_index in used_pair_indices: logger.debug(f"跳过重复的表达方式 {pair_index + 1}") - except (ValueError, KeyError, IndexError, TypeError) as e: + except (ValueError, KeyError, IndexError) as e: logger.error(f"解析匹配条目失败: {e}, 条目: {match_response}") continue @@ -457,7 +439,7 @@ class ExpressionLearner: continue prev_original_idx = bare_lines[pos - 1][0] - up_content = self._filter_message_content(random_msg[prev_original_idx].processed_plain_text or "") + up_content = (random_msg[prev_original_idx].processed_plain_text or "").strip() if not up_content: # 上一句为空,跳过该表达 continue @@ -499,30 +481,6 @@ class ExpressionLearner: expressions.append((situation, style)) return expressions - def _filter_message_content(self, content: str) -> str: - """ - 过滤消息内容,移除回复、@、图片等格式 - - Args: - content: 原始消息内容 - - Returns: - str: 过滤后的内容 - """ - if not content: - return "" - - # 移除以[回复开头、]结尾的部分,包括后面的",说:"部分 - content = re.sub(r'\[回复.*?\],说:\s*', '', content) - # 移除@<...>格式的内容 - content = re.sub(r'@<[^>]*>', '', content) - # 移除[picid:...]格式的图片ID - content = re.sub(r'\[picid:[^\]]*\]', '', content) - # 移除[表情包:...]格式的内容 - content = re.sub(r'\[表情包:[^\]]*\]', '', content) - - return content.strip() - def _build_bare_lines(self, messages: List) -> List[Tuple[int, str]]: """ 为每条消息构建精简文本列表,保留到原消息索引的映射 @@ -537,7 +495,15 @@ class ExpressionLearner: for idx, msg in enumerate(messages): content = msg.processed_plain_text or "" - content = self._filter_message_content(content) + + # 移除以[回复开头、]结尾的部分 + content = re.sub(r'\[回复[^\]]*\]', '', content) + # 移除@<...>格式的内容 + content = re.sub(r'@<[^>]*>', '', content) + # 移除[picid:...]格式的图片ID + content = re.sub(r'\[picid:[^\]]*\]', '', content) + + content = content.strip() # 即使content为空也要记录,防止错位 bare_lines.append((idx, content))