mirror of https://github.com/Mai-with-u/MaiBot.git
标准化“被@”识别与平台解耦:更改 maim_message 以适配tg等后续平台的视频
parent
47816e305e
commit
817154a72b
|
|
@ -249,6 +249,8 @@ class BrainPlanner:
|
||||||
# 获取必要信息
|
# 获取必要信息
|
||||||
is_group_chat, chat_target_info, current_available_actions = self.get_necessary_info()
|
is_group_chat, chat_target_info, current_available_actions = self.get_necessary_info()
|
||||||
|
|
||||||
|
# 提及/被@ 的处理由心流或统一判定模块驱动;Planner 不再做硬编码强制回复
|
||||||
|
|
||||||
# 应用激活类型过滤
|
# 应用激活类型过滤
|
||||||
filtered_actions = self._filter_actions_by_activation_type(available_actions, chat_content_block_short)
|
filtered_actions = self._filter_actions_by_activation_type(available_actions, chat_content_block_short)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,9 @@ async def _calculate_interest(message: MessageRecv) -> Tuple[float, list[str]]:
|
||||||
return 0.0, []
|
return 0.0, []
|
||||||
|
|
||||||
is_mentioned, is_at, reply_probability_boost = is_mentioned_bot_in_message(message)
|
is_mentioned, is_at, reply_probability_boost = is_mentioned_bot_in_message(message)
|
||||||
|
# 保留适配器/上游直接标记的提及信号,避免被覆盖
|
||||||
|
if getattr(message, "is_mentioned", False):
|
||||||
|
is_mentioned = True
|
||||||
# interested_rate = 0.0
|
# interested_rate = 0.0
|
||||||
keywords = []
|
keywords = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -221,6 +221,8 @@ class ChatBot:
|
||||||
# 处理消息内容,生成纯文本
|
# 处理消息内容,生成纯文本
|
||||||
await message.process()
|
await message.process()
|
||||||
|
|
||||||
|
# 平台层的 @ 检测由底层 is_mentioned_bot_in_message 统一处理;此处不做用户名硬编码匹配
|
||||||
|
|
||||||
# 过滤检查
|
# 过滤检查
|
||||||
if _check_ban_words(
|
if _check_ban_words(
|
||||||
message.processed_plain_text,
|
message.processed_plain_text,
|
||||||
|
|
|
||||||
|
|
@ -130,6 +130,16 @@ class MessageRecv(Message):
|
||||||
self.key_words = []
|
self.key_words = []
|
||||||
self.key_words_lite = []
|
self.key_words_lite = []
|
||||||
|
|
||||||
|
# 兼容适配器通过 additional_config 传入的 @ 标记
|
||||||
|
try:
|
||||||
|
msg_info_dict = message_dict.get("message_info", {})
|
||||||
|
add_cfg = msg_info_dict.get("additional_config") or {}
|
||||||
|
if isinstance(add_cfg, dict) and add_cfg.get("at_bot"):
|
||||||
|
# 标记为被提及,提高后续回复优先级
|
||||||
|
self.is_mentioned = True # type: ignore
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def update_chat_stream(self, chat_stream: "ChatStream"):
|
def update_chat_stream(self, chat_stream: "ChatStream"):
|
||||||
self.chat_stream = chat_stream
|
self.chat_stream = chat_stream
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,9 +43,12 @@ def replace_user_references(
|
||||||
if name_resolver is None:
|
if name_resolver is None:
|
||||||
|
|
||||||
def default_resolver(platform: str, user_id: str) -> str:
|
def default_resolver(platform: str, user_id: str) -> str:
|
||||||
# 检查是否是机器人自己
|
# 检查是否是机器人自己(支持多平台)
|
||||||
if replace_bot_name and user_id == global_config.bot.qq_account:
|
if replace_bot_name:
|
||||||
return f"{global_config.bot.nickname}(你)"
|
if platform == "qq" and user_id == global_config.bot.qq_account:
|
||||||
|
return f"{global_config.bot.nickname}(你)"
|
||||||
|
if platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", ""):
|
||||||
|
return f"{global_config.bot.nickname}(你)"
|
||||||
person = Person(platform=platform, user_id=user_id)
|
person = Person(platform=platform, user_id=user_id)
|
||||||
return person.person_name or user_id # type: ignore
|
return person.person_name or user_id # type: ignore
|
||||||
|
|
||||||
|
|
@ -92,6 +95,8 @@ def replace_user_references(
|
||||||
new_content += content[last_end:]
|
new_content += content[last_end:]
|
||||||
content = new_content
|
content = new_content
|
||||||
|
|
||||||
|
# Telegram 文本 @username 的显示映射交由适配器或平台层处理;此处不做硬编码替换
|
||||||
|
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -432,7 +437,10 @@ def _build_readable_messages_internal(
|
||||||
person_name = (
|
person_name = (
|
||||||
person.person_name or f"{user_nickname}" or (f"昵称:{user_cardname}" if user_cardname else "某人")
|
person.person_name or f"{user_nickname}" or (f"昵称:{user_cardname}" if user_cardname else "某人")
|
||||||
)
|
)
|
||||||
if replace_bot_name and user_id == global_config.bot.qq_account:
|
if replace_bot_name and (
|
||||||
|
(platform == global_config.bot.platform and user_id == global_config.bot.qq_account)
|
||||||
|
or (platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", ""))
|
||||||
|
):
|
||||||
person_name = f"{global_config.bot.nickname}(你)"
|
person_name = f"{global_config.bot.nickname}(你)"
|
||||||
|
|
||||||
# 使用独立函数处理用户引用格式
|
# 使用独立函数处理用户引用格式
|
||||||
|
|
@ -866,7 +874,9 @@ async def build_anonymous_messages(messages: List[DatabaseMessages]) -> str:
|
||||||
# print(f"get_anon_name: platform:{platform}, user_id:{user_id}")
|
# print(f"get_anon_name: platform:{platform}, user_id:{user_id}")
|
||||||
# print(f"global_config.bot.qq_account:{global_config.bot.qq_account}")
|
# print(f"global_config.bot.qq_account:{global_config.bot.qq_account}")
|
||||||
|
|
||||||
if user_id == global_config.bot.qq_account:
|
if (platform == "qq" and user_id == global_config.bot.qq_account) or (
|
||||||
|
platform == "telegram" and user_id == getattr(global_config.bot, "telegram_account", "")
|
||||||
|
):
|
||||||
# print("SELF11111111111111")
|
# print("SELF11111111111111")
|
||||||
return "SELF"
|
return "SELF"
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -44,62 +44,104 @@ def db_message_to_str(message_dict: dict) -> str:
|
||||||
|
|
||||||
|
|
||||||
def is_mentioned_bot_in_message(message: MessageRecv) -> tuple[bool, bool, float]:
|
def is_mentioned_bot_in_message(message: MessageRecv) -> tuple[bool, bool, float]:
|
||||||
"""检查消息是否提到了机器人"""
|
"""检查消息是否提到了机器人(多平台实现)"""
|
||||||
keywords = [global_config.bot.nickname] + list(global_config.bot.alias_names)
|
text = message.processed_plain_text or ""
|
||||||
|
platform = getattr(message.message_info, "platform", "") or ""
|
||||||
|
qq_id = str(getattr(global_config.bot, "qq_account", "") or "")
|
||||||
|
tg_id = str(getattr(global_config.bot, "telegram_account", "") or "")
|
||||||
|
tg_uname = str(getattr(global_config.bot, "telegram_username", "") or "")
|
||||||
|
|
||||||
|
nickname = str(global_config.bot.nickname or "")
|
||||||
|
alias_names = list(getattr(global_config.bot, "alias_names", []) or [])
|
||||||
|
keywords = [nickname] + alias_names
|
||||||
|
|
||||||
reply_probability = 0.0
|
reply_probability = 0.0
|
||||||
is_at = False
|
is_at = False
|
||||||
is_mentioned = False
|
is_mentioned = False
|
||||||
|
|
||||||
# 这部分怎么处理啊啊啊啊
|
# 1) 直接的 additional_config 标记
|
||||||
# 我觉得可以给消息加一个 reply_probability_boost字段
|
add_cfg = getattr(message.message_info, "additional_config", None) or {}
|
||||||
if (
|
if isinstance(add_cfg, dict):
|
||||||
message.message_info.additional_config is not None
|
if add_cfg.get("at_bot") or add_cfg.get("is_mentioned"):
|
||||||
and message.message_info.additional_config.get("is_mentioned") is not None
|
is_mentioned = True
|
||||||
):
|
# 当提供数值型 is_mentioned 时,当作概率提升
|
||||||
|
try:
|
||||||
|
if add_cfg.get("is_mentioned") not in (None, ""):
|
||||||
|
reply_probability = float(add_cfg.get("is_mentioned")) # type: ignore
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 2) 已经在上游设置过的 message.is_mentioned
|
||||||
|
if getattr(message, "is_mentioned", False):
|
||||||
|
is_mentioned = True
|
||||||
|
|
||||||
|
# 3) 扫描分段:是否包含 mention_bot(适配器插入)
|
||||||
|
def _has_mention_bot(seg) -> bool:
|
||||||
try:
|
try:
|
||||||
reply_probability = float(message.message_info.additional_config.get("is_mentioned")) # type: ignore
|
if seg is None:
|
||||||
is_mentioned = True
|
return False
|
||||||
return is_mentioned, is_at, reply_probability
|
if getattr(seg, "type", None) == "mention_bot":
|
||||||
except Exception as e:
|
return True
|
||||||
logger.warning(str(e))
|
if getattr(seg, "type", None) == "seglist":
|
||||||
logger.warning(
|
for s in getattr(seg, "data", []) or []:
|
||||||
f"消息中包含不合理的设置 is_mentioned: {message.message_info.additional_config.get('is_mentioned')}"
|
if _has_mention_bot(s):
|
||||||
)
|
return True
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
for keyword in keywords:
|
if _has_mention_bot(getattr(message, "message_segment", None)):
|
||||||
if keyword in message.processed_plain_text:
|
|
||||||
is_mentioned = True
|
|
||||||
|
|
||||||
# 判断是否被@
|
|
||||||
if re.search(rf"@<(.+?):{global_config.bot.qq_account}>", message.processed_plain_text):
|
|
||||||
is_at = True
|
is_at = True
|
||||||
is_mentioned = True
|
is_mentioned = True
|
||||||
|
|
||||||
if is_at and global_config.chat.at_bot_inevitable_reply:
|
# 4) 文本层面的 @ 检测(多平台)
|
||||||
|
# QQ: @<name:qq_id>
|
||||||
|
if qq_id and re.search(rf"@<(.+?):{re.escape(qq_id)}>", text):
|
||||||
|
is_at = True
|
||||||
|
is_mentioned = True
|
||||||
|
# Telegram: @username
|
||||||
|
if platform == "telegram" and tg_uname:
|
||||||
|
if re.search(rf"@{re.escape(tg_uname)}(\b|$)", text, flags=re.IGNORECASE):
|
||||||
|
is_at = True
|
||||||
|
is_mentioned = True
|
||||||
|
|
||||||
|
# 5) 回复机器人检测:
|
||||||
|
# a) 通用显示文本:包含 “(你)” 或 “(你)” 的回复格式
|
||||||
|
if re.search(r"\[回复 .*?\(你\):", text) or re.search(r"\[回复 .*?(你):", text):
|
||||||
|
is_mentioned = True
|
||||||
|
# b) 兼容 ID 形式(QQ与Telegram)
|
||||||
|
if qq_id and (
|
||||||
|
re.search(rf"\[回复 (.+?)\({re.escape(qq_id)}\):(.+?)\],说:", text)
|
||||||
|
or re.search(rf"\[回复<(.+?)(?=:{re.escape(qq_id)}>)\:{re.escape(qq_id)}>:(.+?)\],说:", text)
|
||||||
|
):
|
||||||
|
is_mentioned = True
|
||||||
|
if tg_id and (
|
||||||
|
re.search(rf"\[回复 (.+?)\({re.escape(tg_id)}\):(.+?)\],说:", text)
|
||||||
|
or re.search(rf"\[回复<(.+?)(?=:{re.escape(tg_id)}>)\:{re.escape(tg_id)}>:(.+?)\],说:", text)
|
||||||
|
):
|
||||||
|
is_mentioned = True
|
||||||
|
|
||||||
|
# 6) 名称/别名 提及(去除 @/回复标记后再匹配)
|
||||||
|
if not is_mentioned and keywords:
|
||||||
|
msg_content = text
|
||||||
|
# 去除各种 @ 与 回复标记,避免误判
|
||||||
|
msg_content = re.sub(r"@(.+?)((\d+))", "", msg_content)
|
||||||
|
msg_content = re.sub(r"@<(.+?)(?=:(\d+))\:(\d+)>", "", msg_content)
|
||||||
|
msg_content = re.sub(r"\[回复 (.+?)\(((\d+)|未知id|你)\):(.+?)\],说:", "", msg_content)
|
||||||
|
msg_content = re.sub(r"\[回复<(.+?)(?=:(\d+))\:(\d+)>:(.+?)\],说:", "", msg_content)
|
||||||
|
for kw in keywords:
|
||||||
|
if kw and kw in msg_content:
|
||||||
|
is_mentioned = True
|
||||||
|
break
|
||||||
|
|
||||||
|
# 7) 概率设置
|
||||||
|
if is_at and getattr(global_config.chat, "at_bot_inevitable_reply", 1):
|
||||||
reply_probability = 1.0
|
reply_probability = 1.0
|
||||||
logger.debug("被@,回复概率设置为100%")
|
logger.debug("被@,回复概率设置为100%")
|
||||||
else:
|
elif is_mentioned and getattr(global_config.chat, "mentioned_bot_reply", 1):
|
||||||
if not is_mentioned:
|
reply_probability = max(reply_probability, 1.0)
|
||||||
# 判断是否被回复
|
logger.debug("被提及,回复概率设置为100%")
|
||||||
if re.match(
|
|
||||||
rf"\[回复 (.+?)\({str(global_config.bot.qq_account)}\):(.+?)\],说:", message.processed_plain_text
|
|
||||||
) or re.match(
|
|
||||||
rf"\[回复<(.+?)(?=:{str(global_config.bot.qq_account)}>)\:{str(global_config.bot.qq_account)}>:(.+?)\],说:",
|
|
||||||
message.processed_plain_text,
|
|
||||||
):
|
|
||||||
is_mentioned = True
|
|
||||||
else:
|
|
||||||
# 判断内容中是否被提及
|
|
||||||
message_content = re.sub(r"@(.+?)((\d+))", "", message.processed_plain_text)
|
|
||||||
message_content = re.sub(r"@<(.+?)(?=:(\d+))\:(\d+)>", "", message_content)
|
|
||||||
message_content = re.sub(r"\[回复 (.+?)\(((\d+)|未知id)\):(.+?)\],说:", "", message_content)
|
|
||||||
message_content = re.sub(r"\[回复<(.+?)(?=:(\d+))\:(\d+)>:(.+?)\],说:", "", message_content)
|
|
||||||
for keyword in keywords:
|
|
||||||
if keyword in message_content:
|
|
||||||
is_mentioned = True
|
|
||||||
if is_mentioned and global_config.chat.mentioned_bot_reply:
|
|
||||||
reply_probability = 1.0
|
|
||||||
logger.debug("被提及,回复概率设置为100%")
|
|
||||||
return is_mentioned, is_at, reply_probability
|
return is_mentioned, is_at, reply_probability
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -112,8 +112,8 @@ class ExpressionLearner:
|
||||||
_, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat(
|
_, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat(
|
||||||
self.chat_id
|
self.chat_id
|
||||||
)
|
)
|
||||||
self.min_messages_for_learning = 30 / self.learning_intensity # 触发学习所需的最少消息数
|
self.min_messages_for_learning = 15 / self.learning_intensity # 触发学习所需的最少消息数
|
||||||
self.min_learning_interval = 300 / self.learning_intensity
|
self.min_learning_interval = 150 / self.learning_intensity
|
||||||
|
|
||||||
def should_trigger_learning(self) -> bool:
|
def should_trigger_learning(self) -> bool:
|
||||||
"""
|
"""
|
||||||
|
|
@ -343,44 +343,26 @@ class ExpressionLearner:
|
||||||
logger.error(f"解析匹配响应JSON失败: {e}, 响应内容: \n{response}")
|
logger.error(f"解析匹配响应JSON失败: {e}, 响应内容: \n{response}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# 确保 match_responses 是一个列表
|
|
||||||
if not isinstance(match_responses, list):
|
|
||||||
if isinstance(match_responses, dict):
|
|
||||||
match_responses = [match_responses]
|
|
||||||
else:
|
|
||||||
logger.error(f"match_responses 不是列表或字典类型: {type(match_responses)}, 内容: {match_responses}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
matched_expressions = []
|
matched_expressions = []
|
||||||
used_pair_indices = set() # 用于跟踪已经使用的expression_pair索引
|
used_pair_indices = set() # 用于跟踪已经使用的expression_pair索引
|
||||||
|
|
||||||
logger.debug(f"match_responses 类型: {type(match_responses)}, 长度: {len(match_responses)}")
|
print(f"match_responses: {match_responses}")
|
||||||
logger.debug(f"match_responses 内容: {match_responses}")
|
|
||||||
|
|
||||||
for match_response in match_responses:
|
for match_response in match_responses:
|
||||||
try:
|
try:
|
||||||
# 检查 match_response 的类型
|
|
||||||
if not isinstance(match_response, dict):
|
|
||||||
logger.error(f"match_response 不是字典类型: {type(match_response)}, 内容: {match_response}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 获取表达方式序号
|
# 获取表达方式序号
|
||||||
if "expression_pair" not in match_response:
|
|
||||||
logger.error(f"match_response 缺少 'expression_pair' 字段: {match_response}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
pair_index = int(match_response["expression_pair"]) - 1 # 转换为0-based索引
|
pair_index = int(match_response["expression_pair"]) - 1 # 转换为0-based索引
|
||||||
|
|
||||||
# 检查索引是否有效且未被使用过
|
# 检查索引是否有效且未被使用过
|
||||||
if 0 <= pair_index < len(expression_pairs) and pair_index not in used_pair_indices:
|
if 0 <= pair_index < len(expression_pairs) and pair_index not in used_pair_indices:
|
||||||
situation, style = expression_pairs[pair_index]
|
situation, style = expression_pairs[pair_index]
|
||||||
context = match_response.get("context", "")
|
context = match_response["context"]
|
||||||
matched_expressions.append((situation, style, context))
|
matched_expressions.append((situation, style, context))
|
||||||
used_pair_indices.add(pair_index) # 标记该索引已使用
|
used_pair_indices.add(pair_index) # 标记该索引已使用
|
||||||
logger.debug(f"成功匹配表达方式 {pair_index + 1}: {situation} -> {style}")
|
logger.debug(f"成功匹配表达方式 {pair_index + 1}: {situation} -> {style}")
|
||||||
elif pair_index in used_pair_indices:
|
elif pair_index in used_pair_indices:
|
||||||
logger.debug(f"跳过重复的表达方式 {pair_index + 1}")
|
logger.debug(f"跳过重复的表达方式 {pair_index + 1}")
|
||||||
except (ValueError, KeyError, IndexError, TypeError) as e:
|
except (ValueError, KeyError, IndexError) as e:
|
||||||
logger.error(f"解析匹配条目失败: {e}, 条目: {match_response}")
|
logger.error(f"解析匹配条目失败: {e}, 条目: {match_response}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -457,7 +439,7 @@ class ExpressionLearner:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
prev_original_idx = bare_lines[pos - 1][0]
|
prev_original_idx = bare_lines[pos - 1][0]
|
||||||
up_content = self._filter_message_content(random_msg[prev_original_idx].processed_plain_text or "")
|
up_content = (random_msg[prev_original_idx].processed_plain_text or "").strip()
|
||||||
if not up_content:
|
if not up_content:
|
||||||
# 上一句为空,跳过该表达
|
# 上一句为空,跳过该表达
|
||||||
continue
|
continue
|
||||||
|
|
@ -499,30 +481,6 @@ class ExpressionLearner:
|
||||||
expressions.append((situation, style))
|
expressions.append((situation, style))
|
||||||
return expressions
|
return expressions
|
||||||
|
|
||||||
def _filter_message_content(self, content: str) -> str:
|
|
||||||
"""
|
|
||||||
过滤消息内容,移除回复、@、图片等格式
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: 原始消息内容
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: 过滤后的内容
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
# 移除以[回复开头、]结尾的部分,包括后面的",说:"部分
|
|
||||||
content = re.sub(r'\[回复.*?\],说:\s*', '', content)
|
|
||||||
# 移除@<...>格式的内容
|
|
||||||
content = re.sub(r'@<[^>]*>', '', content)
|
|
||||||
# 移除[picid:...]格式的图片ID
|
|
||||||
content = re.sub(r'\[picid:[^\]]*\]', '', content)
|
|
||||||
# 移除[表情包:...]格式的内容
|
|
||||||
content = re.sub(r'\[表情包:[^\]]*\]', '', content)
|
|
||||||
|
|
||||||
return content.strip()
|
|
||||||
|
|
||||||
def _build_bare_lines(self, messages: List) -> List[Tuple[int, str]]:
|
def _build_bare_lines(self, messages: List) -> List[Tuple[int, str]]:
|
||||||
"""
|
"""
|
||||||
为每条消息构建精简文本列表,保留到原消息索引的映射
|
为每条消息构建精简文本列表,保留到原消息索引的映射
|
||||||
|
|
@ -537,7 +495,15 @@ class ExpressionLearner:
|
||||||
|
|
||||||
for idx, msg in enumerate(messages):
|
for idx, msg in enumerate(messages):
|
||||||
content = msg.processed_plain_text or ""
|
content = msg.processed_plain_text or ""
|
||||||
content = self._filter_message_content(content)
|
|
||||||
|
# 移除以[回复开头、]结尾的部分
|
||||||
|
content = re.sub(r'\[回复[^\]]*\]', '', content)
|
||||||
|
# 移除@<...>格式的内容
|
||||||
|
content = re.sub(r'@<[^>]*>', '', content)
|
||||||
|
# 移除[picid:...]格式的图片ID
|
||||||
|
content = re.sub(r'\[picid:[^\]]*\]', '', content)
|
||||||
|
|
||||||
|
content = content.strip()
|
||||||
# 即使content为空也要记录,防止错位
|
# 即使content为空也要记录,防止错位
|
||||||
bare_lines.append((idx, content))
|
bare_lines.append((idx, content))
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue