From a3390f6cba9db0f9eef0d680c6f599b76f8c6969 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Mon, 29 Sep 2025 20:57:34 +0800 Subject: [PATCH] =?UTF-8?q?better:=E4=BC=98=E5=8C=96=E8=AE=B0=E5=BF=86?= =?UTF-8?q?=E6=9E=84=E5=BB=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/memory_system/Memory_chest.py | 57 +++++++++++++++++----- src/chat/utils/chat_message_builder.py | 67 +++++++++++++++++++++++++- 2 files changed, 110 insertions(+), 14 deletions(-) diff --git a/src/chat/memory_system/Memory_chest.py b/src/chat/memory_system/Memory_chest.py index 216bb432..23ade3af 100644 --- a/src/chat/memory_system/Memory_chest.py +++ b/src/chat/memory_system/Memory_chest.py @@ -35,7 +35,7 @@ class MemoryChest: self.memory_build_threshold = 30 self.memory_size_limit = global_config.memory.max_memory_size - self.running_content_list = {} # {chat_id: {"content": running_content, "last_update_time": timestamp}} + self.running_content_list = {} # {chat_id: {"content": running_content, "last_update_time": timestamp, "create_time": timestamp}} self.fetched_memory_list = [] # [(chat_id, (question, answer, timestamp)), ...] async def build_running_content(self, chat_id: str = None) -> str: @@ -53,7 +53,8 @@ class MemoryChest: if chat_id not in self.running_content_list: self.running_content_list[chat_id] = { "content": "", - "last_update_time": time.time() + "last_update_time": time.time(), + "create_time": time.time() } should_update = True @@ -67,10 +68,18 @@ class MemoryChest: chat_id=chat_id, limit=global_config.chat.max_context_size * 2, ) - + new_messages_count = len(message_list) - should_update = new_messages_count > self.memory_build_threshold - logger.info(f"chat_id {chat_id} 自上次更新后有 {new_messages_count} 条新消息,{'需要' if should_update else '不需要'}更新") + time_diff_minutes = (current_time - last_update_time) / 60 + + # 检查是否满足强制构建条件:超过15分钟且至少有5条新消息 + forced_update = time_diff_minutes > 15 and new_messages_count >= 5 + should_update = new_messages_count > self.memory_build_threshold or forced_update + + if forced_update: + logger.info(f"chat_id {chat_id} 距离上次更新已 {time_diff_minutes:.1f} 分钟,有 {new_messages_count} 条新消息,强制构建") + else: + logger.info(f"chat_id {chat_id} 自上次更新后有 {new_messages_count} 条新消息,{'需要' if should_update else '不需要'}更新") if should_update: @@ -81,6 +90,7 @@ class MemoryChest: timestamp_mode="relative", read_mark=0.0, show_actions=True, + remove_emoji_stickers=True, ) @@ -94,9 +104,12 @@ class MemoryChest: 请将下面的新聊天记录内的有用的信息,添加到你的记忆中 请主要关注概念和知识,而不是聊天的琐事 -如果有表情包,仅在意表情包对上下文的影响,不要在意表情包本身 -如果有图片,尽在意内容,不要在意图片的名称和编号 -记忆为一段纯文本,逻辑清晰,指出事件,概念的含义,并说明关系 +重要!!你要关注的概念和知识必须是较为不常见的信息,或者时效性较强的信息!! +不要!!关注常见的只是,或者已经过时的信息!! +1.不要关注诸如某个用户做了什么,说了什么,不要关注某个用户的行为,而是关注其中的概念性信息 +2.概念要求精确,不啰嗦,像科普读物或教育课本那样 +3.如果有图片,请只关注图片和文本结合的知识和概念性内容 +记忆为一段纯文本,逻辑清晰,指出概念的含义,并说明关系 请输出添加后的记忆内容,不要输出其他内容: {message_str} """ @@ -112,15 +125,28 @@ class MemoryChest: # 如果有chat_id,更新对应的running_content if chat_id and running_content: + current_time = time.time() + + # 保留原有的create_time,如果没有则使用当前时间 + create_time = self.running_content_list[chat_id].get("create_time", current_time) + self.running_content_list[chat_id] = { "content": running_content, - "last_update_time": time.time() + "last_update_time": current_time, + "create_time": create_time } # 检查running_content长度是否大于500 if len(running_content) > self.memory_size_limit: await self._save_to_database_and_clear(chat_id, running_content) + # 检查是否需要强制保存:create_time超过1800秒且内容大小达到max_memory_size的30% + elif (current_time - create_time > 1800 and + len(running_content) >= self.memory_size_limit * 0.3): + logger.info(f"chat_id {chat_id} 内容创建时间已超过 {(current_time - create_time)/60:.1f} 分钟," + f"内容大小 {len(running_content)} 达到限制的 {int(self.memory_size_limit * 0.3)} 字符,强制保存") + await self._save_to_database_and_clear(chat_id, running_content) + return running_content @@ -503,9 +529,16 @@ class MemoryChest: 以下是多段记忆内容,请将它们合并成一段记忆: {content} +请将下面的多段记忆内容,合并成一段记忆 请主要关注概念和知识,而不是聊天的琐事 -记忆为一段纯文本,逻辑清晰,指出事件,概念的含义,并说明关系 -请输出添加后的记忆内容,不要输出其他内容: +重要!!你要关注的概念和知识必须是较为不常见的信息,或者时效性较强的信息!! +不要!!关注常见的只是,或者已经过时的信息!! +1.不要关注诸如某个用户做了什么,说了什么,不要关注某个用户的行为,而是关注其中的概念性信息 +2.概念要求精确,不啰嗦,像科普读物或教育课本那样 +3.如果有图片,请只关注图片和文本结合的知识和概念性内容 +4.如果记忆中有冲突的地方,可以进行整合。如果无法整合,需要在此处标注存在冲突的不同信息 +记忆为一段纯文本,逻辑清晰,指出概念的含义,并说明关系 +请输出合并的记忆内容,不要输出其他内容: """ if global_config.debug.show_prompt: @@ -546,7 +579,7 @@ class MemoryChest: 请为以下内容生成一个描述全面的标题,要求描述内容的主要概念和事件: {merged_content} -标题不要分点,不要换行,不要输出其他内容 +标题不要分点,不要换行,不要输出其他内容,不要浮夸,以白话简洁的风格输出标题 请只输出标题,不要输出其他内容: """ diff --git a/src/chat/utils/chat_message_builder.py b/src/chat/utils/chat_message_builder.py index 6cf0feab..fbaa9bd5 100644 --- a/src/chat/utils/chat_message_builder.py +++ b/src/chat/utils/chat_message_builder.py @@ -622,6 +622,7 @@ def build_readable_messages_with_id( truncate: bool = False, show_actions: bool = False, show_pic: bool = True, + remove_emoji_stickers: bool = False, ) -> Tuple[str, List[Tuple[str, DatabaseMessages]]]: """ 将消息列表转换为可读的文本格式,并返回原始(时间戳, 昵称, 内容)列表。 @@ -638,6 +639,7 @@ def build_readable_messages_with_id( show_pic=show_pic, read_mark=read_mark, message_id_list=message_id_list, + remove_emoji_stickers=remove_emoji_stickers, ) return formatted_string, message_id_list @@ -652,6 +654,7 @@ def build_readable_messages( show_actions: bool = False, show_pic: bool = True, message_id_list: Optional[List[Tuple[str, DatabaseMessages]]] = None, + remove_emoji_stickers: bool = False, ) -> str: # sourcery skip: extract-method """ 将消息列表转换为可读的文本格式。 @@ -666,13 +669,43 @@ def build_readable_messages( read_mark: 已读标记时间戳 truncate: 是否截断长消息 show_actions: 是否显示动作记录 + remove_emoji_stickers: 是否移除表情包并过滤空消息 """ # WIP HERE and BELOW ---------------------------------------------- # 创建messages的深拷贝,避免修改原始列表 if not messages: return "" - copy_messages: List[MessageAndActionModel] = [MessageAndActionModel.from_DatabaseMessages(msg) for msg in messages] + # 如果启用移除表情包,先过滤消息 + if remove_emoji_stickers: + filtered_messages = [] + for msg in messages: + # 获取消息内容 + content = msg.display_message or msg.processed_plain_text or "" + + # 移除表情包 + emoji_pattern = r"\[表情包:[^\]]+\]" + content = re.sub(emoji_pattern, "", content) + + # 如果移除表情包后内容不为空,则保留消息 + if content.strip(): + filtered_messages.append(msg) + + messages = filtered_messages + + copy_messages: List[MessageAndActionModel] = [] + for msg in messages: + if remove_emoji_stickers: + # 创建 MessageAndActionModel 但移除表情包 + model = MessageAndActionModel.from_DatabaseMessages(msg) + # 移除表情包 + if model.display_message: + model.display_message = re.sub(r"\[表情包:[^\]]+\]", "", model.display_message) + if model.processed_plain_text: + model.processed_plain_text = re.sub(r"\[表情包:[^\]]+\]", "", model.processed_plain_text) + copy_messages.append(model) + else: + copy_messages.append(MessageAndActionModel.from_DatabaseMessages(msg)) if show_actions and copy_messages: # 获取所有消息的时间范围 @@ -901,6 +934,7 @@ def build_readable_messages_anonymized( show_actions: bool = False, show_pic: bool = True, replace_bot_name: bool = True, + remove_emoji_stickers: bool = False, ) -> Tuple[str, Dict[str, str]]: """ 仿照 build_readable_messages,构建匿名化的可读消息: @@ -953,8 +987,37 @@ def build_readable_messages_anonymized( name_mapping[original_display] = anon return anon + # 如果启用移除表情包,先过滤消息 + if remove_emoji_stickers: + filtered_messages = [] + for msg in messages: + # 获取消息内容 + content = msg.display_message or msg.processed_plain_text or "" + + # 移除表情包 + emoji_pattern = r"\[表情包:[^\]]+\]" + content = re.sub(emoji_pattern, "", content) + + # 如果移除表情包后内容不为空,则保留消息 + if content.strip(): + filtered_messages.append(msg) + + messages = filtered_messages + # 将 DatabaseMessages 转换为可处理结构,并可选拼入动作 - copy_messages: List[MessageAndActionModel] = [MessageAndActionModel.from_DatabaseMessages(msg) for msg in messages] + copy_messages: List[MessageAndActionModel] = [] + for msg in messages: + if remove_emoji_stickers: + # 创建 MessageAndActionModel 但移除表情包 + model = MessageAndActionModel.from_DatabaseMessages(msg) + # 移除表情包 + if model.display_message: + model.display_message = re.sub(r"\[表情包:[^\]]+\]", "", model.display_message) + if model.processed_plain_text: + model.processed_plain_text = re.sub(r"\[表情包:[^\]]+\]", "", model.processed_plain_text) + copy_messages.append(model) + else: + copy_messages.append(MessageAndActionModel.from_DatabaseMessages(msg)) if show_actions and copy_messages: min_time = min(msg.time or 0 for msg in copy_messages)