diff --git a/changelogs/changelog.md b/changelogs/changelog.md index 99fcd682..da96cb72 100644 --- a/changelogs/changelog.md +++ b/changelogs/changelog.md @@ -1,10 +1,24 @@ # Changelog + +TODO:回复频率动态控制 + +## [0.10.2] - 2025-8-24 +### 🌟 主要功能更改 +- 记忆系统重新启用,更好更优秀 +- 更好的event系统 +- 为空回复添加重试机制 + +### 细节功能更改 +- 修复tts插件可能的复读问题 + + ## [0.10.1] - 2025-8-24 ### 🌟 主要功能更改 - planner现在改为大小核结构,移除激活阶段,提高回复速度和动作调用精准度 - 优化关系的表现的效率 +### 细节功能更改 - 优化识图的表现 - 为planner添加单独控制的提示词 - 修复激活值计算异常的BUG diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index 8680392a..18aec433 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -385,12 +385,6 @@ class HeartFChatting: async with global_prompt_manager.async_message_scope(self.chat_stream.context.get_template_name()): await self.expression_learner.trigger_learning_for_chat() - # # 记忆构建:为当前chat_id构建记忆 - # try: - # await hippocampus_manager.build_memory_for_chat(self.stream_id) - # except Exception as e: - # logger.error(f"{self.log_prefix} 记忆构建失败: {e}") - available_actions: Dict[str, ActionInfo] = {} if random.random() > self.focus_value_control.get_current_focus_value() and mode == ChatMode.FOCUS: # 如果激活度没有激活,并且聊天活跃度低,有可能不进行plan,相当于不在电脑前,不进行认真思考 @@ -445,8 +439,8 @@ class HeartFChatting: available_actions=available_actions, ) - for action in action_to_use_info: - print(action.action_type) + # for action in action_to_use_info: + # print(action.action_type) # 3. 并行执行所有动作 action_tasks = [ diff --git a/src/chat/memory_system/Hippocampus.py b/src/chat/memory_system/Hippocampus.py index 1b15d717..bb8d6c5c 100644 --- a/src/chat/memory_system/Hippocampus.py +++ b/src/chat/memory_system/Hippocampus.py @@ -18,6 +18,7 @@ from src.config.config import global_config, model_config from src.common.data_models.database_data_model import DatabaseMessages from src.common.database.database_model import GraphNodes, GraphEdges # Peewee Models导入 from src.common.logger import get_logger +from src.chat.utils.utils import cut_key_words from src.chat.utils.chat_message_builder import ( build_readable_messages, get_raw_msg_by_timestamp_with_chat_inclusive, @@ -98,19 +99,23 @@ class MemoryGraph: current_weight = self.G.nodes[concept].get("weight", 0.0) self.G.nodes[concept]["weight"] = current_weight + 1.0 logger.debug(f"节点 {concept} 记忆整合成功,权重增加到 {current_weight + 1.0}") + logger.info(f"节点 {concept} 记忆内容已更新:{integrated_memory}") except Exception as e: logger.error(f"LLM整合记忆失败: {e}") # 降级到简单连接 new_memory_str = f"{existing_memory} | {memory}" self.G.nodes[concept]["memory_items"] = new_memory_str + logger.info(f"节点 {concept} 记忆内容已简单拼接并更新:{new_memory_str}") else: new_memory_str = str(memory) self.G.nodes[concept]["memory_items"] = new_memory_str + logger.info(f"节点 {concept} 记忆内容已直接更新:{new_memory_str}") else: self.G.nodes[concept]["memory_items"] = str(memory) # 如果节点存在但没有memory_items,说明是第一次添加memory,设置created_time if "created_time" not in self.G.nodes[concept]: self.G.nodes[concept]["created_time"] = current_time + logger.info(f"节点 {concept} 创建新记忆:{str(memory)}") # 更新最后修改时间 self.G.nodes[concept]["last_modified"] = current_time else: @@ -122,6 +127,7 @@ class MemoryGraph: created_time=current_time, # 添加创建时间 last_modified=current_time, ) # 添加最后修改时间 + logger.info(f"新节点 {concept} 已添加,记忆内容已写入:{str(memory)}") def get_dot(self, concept): # 检查节点是否存在于图中 @@ -402,9 +408,7 @@ class Hippocampus: text_length = len(text) topic_num: int | list[int] = 0 - words = jieba.cut(text) - keywords_lite = [word for word in words if len(word) > 1] - keywords_lite = list(set(keywords_lite)) + keywords_lite = cut_key_words(text) if keywords_lite: logger.debug(f"提取关键词极简版: {keywords_lite}") @@ -1159,6 +1163,131 @@ class ParahippocampalGyrus: return compressed_memory, similar_topics_dict + def get_similar_topics_from_keywords( + self, + keywords: list[str] | str, + top_k: int = 3, + threshold: float = 0.7, + ) -> dict[str, list[tuple[str, float]]]: + """基于输入的关键词,返回每个关键词对应的相似主题列表。 + + Args: + keywords: 关键词列表或以逗号/空格/顿号分隔的字符串。 + top_k: 每个关键词返回的相似主题数量上限。 + threshold: 相似度阈值,低于该值的主题将被过滤。 + + Returns: + dict[str, list[tuple[str, float]]]: {keyword: [(topic, similarity), ...]} + """ + # 规范化输入为列表[str] + if isinstance(keywords, str): + # 支持中英文逗号、顿号、空格分隔 + parts = ( + keywords.replace(",", ",").replace("、", ",").replace(" ", ",").strip(", ") + ) + keyword_list = [p.strip() for p in parts.split(",") if p.strip()] + else: + keyword_list = [k.strip() for k in keywords if isinstance(k, str) and k.strip()] + + if not keyword_list: + return {} + + existing_topics = list(self.memory_graph.G.nodes()) + result: dict[str, list[tuple[str, float]]] = {} + + for kw in keyword_list: + kw_words = set(jieba.cut(kw)) + similar_topics: list[tuple[str, float]] = [] + + for topic in existing_topics: + topic_words = set(jieba.cut(topic)) + all_words = kw_words | topic_words + if not all_words: + continue + v1 = [1 if w in kw_words else 0 for w in all_words] + v2 = [1 if w in topic_words else 0 for w in all_words] + sim = cosine_similarity(v1, v2) + if sim >= threshold: + similar_topics.append((topic, sim)) + + similar_topics.sort(key=lambda x: x[1], reverse=True) + result[kw] = similar_topics[:top_k] + + return result + + async def add_memory_with_similar( + self, + memory_item: str, + similar_topics_dict: dict[str, list[tuple[str, float]]], + ) -> bool: + """将单条记忆内容与相似主题写入记忆网络并同步数据库。 + + 按 build_memory_for_chat 的方式:为 similar_topics_dict 的每个键作为主题添加节点内容, + 并与其相似主题建立连接,连接强度为 int(similarity * 10)。 + + Args: + memory_item: 记忆内容字符串,将作为每个主题节点的 memory_items。 + similar_topics_dict: {topic: [(similar_topic, similarity), ...]} + + Returns: + bool: 是否成功执行添加与同步。 + """ + try: + if not memory_item or not isinstance(memory_item, str): + return False + + if not similar_topics_dict or not isinstance(similar_topics_dict, dict): + return False + + current_time = time.time() + + # 为每个主题写入节点 + for topic, similar_list in similar_topics_dict.items(): + if not topic or not isinstance(topic, str): + continue + + await self.hippocampus.memory_graph.add_dot(topic, memory_item, self.hippocampus) + + # 连接相似主题 + if isinstance(similar_list, list): + for item in similar_list: + try: + similar_topic, similarity = item + except Exception: + continue + if not isinstance(similar_topic, str): + continue + if topic == similar_topic: + continue + # 强度按 build_memory_for_chat 的规则 + strength = int(max(0.0, float(similarity)) * 10) if similarity is not None else 0 + if strength <= 0: + continue + # 确保相似主题节点存在(如果没有,也可以只建立边,networkx会创建节点,但需初始化属性) + if similar_topic not in self.memory_graph.G: + # 创建一个空的相似主题节点,避免悬空边,memory_items 为空字符串 + self.memory_graph.G.add_node( + similar_topic, + memory_items="", + weight=1.0, + created_time=current_time, + last_modified=current_time, + ) + self.memory_graph.G.add_edge( + topic, + similar_topic, + strength=strength, + created_time=current_time, + last_modified=current_time, + ) + + # 同步数据库 + await self.hippocampus.entorhinal_cortex.sync_memory_to_db() + return True + except Exception as e: + logger.error(f"添加记忆节点失败: {e}") + return False + async def operation_forget_topic(self, percentage=0.005): start_time = time.time() logger.info("[遗忘] 开始检查数据库...") @@ -1325,7 +1454,6 @@ class HippocampusManager: logger.info(f""" -------------------------------- 记忆系统参数配置: - 构建频率: {global_config.memory.memory_build_frequency}秒|压缩率: {global_config.memory.memory_compress_rate} 遗忘间隔: {global_config.memory.forget_memory_interval}秒|遗忘比例: {global_config.memory.memory_forget_percentage}|遗忘: {global_config.memory.memory_forget_time}小时之后 记忆图统计信息: 节点数量: {node_count}, 连接数量: {edge_count} --------------------------------""") # noqa: E501 @@ -1343,61 +1471,6 @@ class HippocampusManager: raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法") return await self._hippocampus.parahippocampal_gyrus.operation_forget_topic(percentage) - async def build_memory_for_chat(self, chat_id: str): - """为指定chat_id构建记忆(在heartFC_chat.py中调用)""" - if not self._initialized: - raise RuntimeError("HippocampusManager 尚未初始化,请先调用 initialize 方法") - - try: - # 检查是否需要构建记忆 - logger.info(f"为 {chat_id} 构建记忆") - if memory_segment_manager.check_and_build_memory_for_chat(chat_id): - logger.info(f"为 {chat_id} 构建记忆,需要构建记忆") - messages = memory_segment_manager.get_messages_for_memory_build(chat_id, 50) - - build_probability = 0.3 * global_config.memory.memory_build_frequency - - if messages and random.random() < build_probability: - logger.info(f"为 {chat_id} 构建记忆,消息数量: {len(messages)}") - - # 调用记忆压缩和构建 - ( - compressed_memory, - similar_topics_dict, - ) = await self._hippocampus.parahippocampal_gyrus.memory_compress( - messages, global_config.memory.memory_compress_rate - ) - - # 添加记忆节点 - current_time = time.time() - for topic, memory in compressed_memory: - await self._hippocampus.memory_graph.add_dot(topic, memory, self._hippocampus) - - # 连接相似主题 - if topic in similar_topics_dict: - similar_topics = similar_topics_dict[topic] - for similar_topic, similarity in similar_topics: - if topic != similar_topic: - strength = int(similarity * 10) - self._hippocampus.memory_graph.G.add_edge( - topic, - similar_topic, - strength=strength, - created_time=current_time, - last_modified=current_time, - ) - - # 同步到数据库 - await self._hippocampus.entorhinal_cortex.sync_memory_to_db() - logger.info(f"为 {chat_id} 构建记忆完成") - return True - - except Exception as e: - logger.error(f"为 {chat_id} 构建记忆失败: {e}") - return False - - return False - async def get_memory_from_topic( self, valid_keywords: list[str], max_memory_num: int = 3, max_memory_length: int = 2, max_depth: int = 3 ) -> list: @@ -1441,89 +1514,3 @@ class HippocampusManager: # 创建全局实例 hippocampus_manager = HippocampusManager() - - -# 在Hippocampus类中添加新的记忆构建管理器 -class MemoryBuilder: - """记忆构建器 - - 为每个chat_id维护消息缓存和触发机制,类似ExpressionLearner - """ - - def __init__(self, chat_id: str): - self.chat_id = chat_id - self.last_update_time: float = time.time() - self.last_processed_time: float = 0.0 - - def should_trigger_memory_build(self) -> bool: - # sourcery skip: assign-if-exp, boolean-if-exp-identity, reintroduce-else - """检查是否应该触发记忆构建""" - current_time = time.time() - - # 检查时间间隔 - time_diff = current_time - self.last_update_time - if time_diff < 600 / global_config.memory.memory_build_frequency: - return False - - # 检查消息数量 - - recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive( - chat_id=self.chat_id, - timestamp_start=self.last_update_time, - timestamp_end=current_time, - ) - - logger.info(f"最近消息数量: {len(recent_messages)},间隔时间: {time_diff}") - - if not recent_messages or len(recent_messages) < 30 / global_config.memory.memory_build_frequency: - return False - - return True - - def get_messages_for_memory_build(self, threshold: int = 25) -> List[DatabaseMessages]: - """获取用于记忆构建的消息""" - current_time = time.time() - - messages = get_raw_msg_by_timestamp_with_chat_inclusive( - chat_id=self.chat_id, - timestamp_start=self.last_update_time, - timestamp_end=current_time, - limit=threshold, - ) - if messages: - # 更新最后处理时间 - self.last_processed_time = current_time - self.last_update_time = current_time - - return messages or [] - - -class MemorySegmentManager: - """记忆段管理器 - - 管理所有chat_id的MemoryBuilder实例,自动检查和触发记忆构建 - """ - - def __init__(self): - self.builders: Dict[str, MemoryBuilder] = {} - - def get_or_create_builder(self, chat_id: str) -> MemoryBuilder: - """获取或创建指定chat_id的MemoryBuilder""" - if chat_id not in self.builders: - self.builders[chat_id] = MemoryBuilder(chat_id) - return self.builders[chat_id] - - def check_and_build_memory_for_chat(self, chat_id: str) -> bool: - """检查指定chat_id是否需要构建记忆,如果需要则返回True""" - builder = self.get_or_create_builder(chat_id) - return builder.should_trigger_memory_build() - - def get_messages_for_memory_build(self, chat_id: str, threshold: int = 25) -> List[DatabaseMessages]: - """获取指定chat_id用于记忆构建的消息""" - if chat_id not in self.builders: - return [] - return self.builders[chat_id].get_messages_for_memory_build(threshold) - - -# 创建全局实例 -memory_segment_manager = MemorySegmentManager() diff --git a/src/chat/memory_system/instant_memory.py b/src/chat/memory_system/instant_memory.py deleted file mode 100644 index f8e91b5c..00000000 --- a/src/chat/memory_system/instant_memory.py +++ /dev/null @@ -1,254 +0,0 @@ -# -*- coding: utf-8 -*- -import time -import re -import json -import ast -import traceback - -from json_repair import repair_json -from datetime import datetime, timedelta - -from src.llm_models.utils_model import LLMRequest -from src.common.logger import get_logger -from src.common.database.database_model import Memory # Peewee Models导入 -from src.config.config import model_config, global_config - - -logger = get_logger(__name__) - - -class MemoryItem: - def __init__(self, memory_id: str, chat_id: str, memory_text: str, keywords: list[str]): - self.memory_id = memory_id - self.chat_id = chat_id - self.memory_text: str = memory_text - self.keywords: list[str] = keywords - self.create_time: float = time.time() - self.last_view_time: float = time.time() - - -class MemoryManager: - def __init__(self): - # self.memory_items:list[MemoryItem] = [] - pass - - -class InstantMemory: - def __init__(self, chat_id): - self.chat_id = chat_id - self.last_view_time = time.time() - self.summary_model = LLMRequest( - model_set=model_config.model_task_config.utils, - request_type="memory.summary", - ) - - async def if_need_build(self, text: str): - prompt = f""" -请判断以下内容中是否有值得记忆的信息,如果有,请输出1,否则输出0 -{text} -请只输出1或0就好 - """ - - try: - response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5) - if global_config.debug.show_prompt: - print(prompt) - print(response) - - return "1" in response - except Exception as e: - logger.error(f"判断是否需要记忆出现错误:{str(e)} {traceback.format_exc()}") - return False - - async def build_memory(self, text): - prompt = f""" - 以下内容中存在值得记忆的信息,请你从中总结出一段值得记忆的信息,并输出 - {text} - 请以json格式输出一段概括的记忆内容和关键词 - {{ - "memory_text": "记忆内容", - "keywords": "关键词,用/划分" - }} - """ - try: - response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5) - # print(prompt) - # print(response) - if not response: - return None - try: - repaired = repair_json(response) - result = json.loads(repaired) - memory_text = result.get("memory_text", "") - keywords = result.get("keywords", "") - if isinstance(keywords, str): - keywords_list = [k.strip() for k in keywords.split("/") if k.strip()] - elif isinstance(keywords, list): - keywords_list = keywords - else: - keywords_list = [] - return {"memory_text": memory_text, "keywords": keywords_list} - except Exception as parse_e: - logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}") - return None - except Exception as e: - logger.error(f"构建记忆出现错误:{str(e)} {traceback.format_exc()}") - return None - - async def create_and_store_memory(self, text: str): - if_need = await self.if_need_build(text) - if if_need: - logger.info(f"需要记忆:{text}") - memory = await self.build_memory(text) - if memory and memory.get("memory_text"): - memory_id = f"{self.chat_id}_{time.time()}" - memory_item = MemoryItem( - memory_id=memory_id, - chat_id=self.chat_id, - memory_text=memory["memory_text"], - keywords=memory.get("keywords", []), - ) - await self.store_memory(memory_item) - else: - logger.info(f"不需要记忆:{text}") - - async def store_memory(self, memory_item: MemoryItem): - memory = Memory( - memory_id=memory_item.memory_id, - chat_id=memory_item.chat_id, - memory_text=memory_item.memory_text, - keywords=memory_item.keywords, - create_time=memory_item.create_time, - last_view_time=memory_item.last_view_time, - ) - memory.save() - - async def get_memory(self, target: str): - from json_repair import repair_json - - prompt = f""" -请根据以下发言内容,判断是否需要提取记忆 -{target} -请用json格式输出,包含以下字段: -其中,time的要求是: -可以选择具体日期时间,格式为YYYY-MM-DD HH:MM:SS,或者大致时间,格式为YYYY-MM-DD -可以选择相对时间,例如:今天,昨天,前天,5天前,1个月前 -可以选择留空进行模糊搜索 -{{ - "need_memory": 1, - "keywords": "希望获取的记忆关键词,用/划分", - "time": "希望获取的记忆大致时间" -}} -请只输出json格式,不要输出其他多余内容 -""" - try: - response, _ = await self.summary_model.generate_response_async(prompt, temperature=0.5) - if global_config.debug.show_prompt: - print(prompt) - print(response) - if not response: - return None - try: - repaired = repair_json(response) - result = json.loads(repaired) - # 解析keywords - keywords = result.get("keywords", "") - if isinstance(keywords, str): - keywords_list = [k.strip() for k in keywords.split("/") if k.strip()] - elif isinstance(keywords, list): - keywords_list = keywords - else: - keywords_list = [] - # 解析time为时间段 - time_str = result.get("time", "").strip() - start_time, end_time = self._parse_time_range(time_str) - logger.info(f"start_time: {start_time}, end_time: {end_time}") - # 检索包含关键词的记忆 - memories_set = set() - if start_time and end_time: - start_ts = start_time.timestamp() - end_ts = end_time.timestamp() - query = Memory.select().where( - (Memory.chat_id == self.chat_id) - & (Memory.create_time >= start_ts) # type: ignore - & (Memory.create_time < end_ts) # type: ignore - ) - else: - query = Memory.select().where(Memory.chat_id == self.chat_id) - - for mem in query: - # 对每条记忆 - mem_keywords = mem.keywords or "" - parsed = ast.literal_eval(mem_keywords) - if isinstance(parsed, list): - mem_keywords = [str(k).strip() for k in parsed if str(k).strip()] - else: - mem_keywords = [] - # logger.info(f"mem_keywords: {mem_keywords}") - # logger.info(f"keywords_list: {keywords_list}") - for kw in keywords_list: - # logger.info(f"kw: {kw}") - # logger.info(f"kw in mem_keywords: {kw in mem_keywords}") - if kw in mem_keywords: - # logger.info(f"mem.memory_text: {mem.memory_text}") - memories_set.add(mem.memory_text) - break - return list(memories_set) - except Exception as parse_e: - logger.error(f"解析记忆json失败:{str(parse_e)} {traceback.format_exc()}") - return None - except Exception as e: - logger.error(f"获取记忆出现错误:{str(e)} {traceback.format_exc()}") - return None - - def _parse_time_range(self, time_str): - # sourcery skip: extract-duplicate-method, use-contextlib-suppress - """ - 支持解析如下格式: - - 具体日期时间:YYYY-MM-DD HH:MM:SS - - 具体日期:YYYY-MM-DD - - 相对时间:今天,昨天,前天,N天前,N个月前 - - 空字符串:返回(None, None) - """ - now = datetime.now() - if not time_str: - return 0, now - time_str = time_str.strip() - # 具体日期时间 - try: - dt = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") - return dt, dt + timedelta(hours=1) - except Exception: - pass - # 具体日期 - try: - dt = datetime.strptime(time_str, "%Y-%m-%d") - return dt, dt + timedelta(days=1) - except Exception: - pass - # 相对时间 - if time_str == "今天": - start = now.replace(hour=0, minute=0, second=0, microsecond=0) - end = start + timedelta(days=1) - return start, end - if time_str == "昨天": - start = (now - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0) - end = start + timedelta(days=1) - return start, end - if time_str == "前天": - start = (now - timedelta(days=2)).replace(hour=0, minute=0, second=0, microsecond=0) - end = start + timedelta(days=1) - return start, end - if m := re.match(r"(\d+)天前", time_str): - days = int(m.group(1)) - start = (now - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0) - end = start + timedelta(days=1) - return start, end - if m := re.match(r"(\d+)个月前", time_str): - months = int(m.group(1)) - # 近似每月30天 - start = (now - timedelta(days=months * 30)).replace(hour=0, minute=0, second=0, microsecond=0) - end = start + timedelta(days=1) - return start, end - # 其他无法解析 - return 0, now diff --git a/src/chat/replyer/default_generator.py b/src/chat/replyer/default_generator.py index 1db4efa6..51477d89 100644 --- a/src/chat/replyer/default_generator.py +++ b/src/chat/replyer/default_generator.py @@ -26,7 +26,6 @@ from src.chat.utils.chat_message_builder import ( ) from src.chat.express.expression_selector import expression_selector from src.chat.memory_system.memory_activator import MemoryActivator -from src.chat.memory_system.instant_memory import InstantMemory from src.mood.mood_manager import mood_manager from src.person_info.person_info import Person, is_person_known from src.plugin_system.base.component_types import ActionInfo, EventType @@ -147,7 +146,6 @@ class DefaultReplyer: self.is_group_chat, self.chat_target_info = get_chat_type_and_target_info(self.chat_stream.stream_id) self.heart_fc_sender = HeartFCSender() self.memory_activator = MemoryActivator() - self.instant_memory = InstantMemory(chat_id=self.chat_stream.stream_id) from src.plugin_system.core.tool_use import ToolExecutor # 延迟导入ToolExecutor,不然会循环依赖 @@ -375,20 +373,11 @@ class DefaultReplyer: instant_memory = None - # running_memories = await self.memory_activator.activate_memory_with_chat_history( - # target_message=target, chat_history=chat_history - # ) + running_memories = await self.memory_activator.activate_memory_with_chat_history( + target_message=target, chat_history=chat_history + ) running_memories = None - if global_config.memory.enable_instant_memory: - chat_history_str = build_readable_messages( - messages=chat_history, replace_bot_name=True, timestamp_mode="normal" - ) - asyncio.create_task(self.instant_memory.create_and_store_memory(chat_history_str)) - - instant_memory = await self.instant_memory.get_memory(target) - logger.info(f"即时记忆:{instant_memory}") - if not running_memories: return "" diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index b489e1e7..88562fff 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -834,3 +834,79 @@ def parse_keywords_string(keywords_input) -> list[str]: # 如果没有分隔符,返回单个关键词 return [keywords_str] if keywords_str else [] + + + + +def cut_key_words(concept_name: str) -> list[str]: + """对概念名称进行jieba分词,并过滤掉关键词列表中的关键词""" + concept_name_tokens = list(jieba.cut(concept_name)) + + # 定义常见连词、停用词与标点 + conjunctions = { + "和", "与", "及", "跟", "以及", "并且", "而且", "或", "或者", "并" + } + stop_words = { + "的", "了", "呢", "吗", "吧", "啊", "哦", "恩", "嗯", "呀", "嘛", "哇", + "在", "是", "很", "也", "又", "就", "都", "还", "更", "最", "被", "把", + "给", "对", "和", "与", "及", "跟", "并", "而且", "或者", "或", "以及" + } + chinese_punctuations = set(",。!?、;:()【】《》“”‘’—…·-——,.!?;:()[]<>'\"/\\") + + # 清理空白并初步过滤纯标点 + cleaned_tokens = [] + for tok in concept_name_tokens: + t = tok.strip() + if not t: + continue + # 去除纯标点 + if all(ch in chinese_punctuations for ch in t): + continue + cleaned_tokens.append(t) + + # 合并连词两侧的词(仅当两侧都存在且不是标点/停用词时) + merged_tokens = [] + i = 0 + n = len(cleaned_tokens) + while i < n: + tok = cleaned_tokens[i] + if tok in conjunctions and merged_tokens and i + 1 < n: + left = merged_tokens[-1] + right = cleaned_tokens[i + 1] + # 左右都需要是有效词 + if left and right \ + and left not in conjunctions and right not in conjunctions \ + and left not in stop_words and right not in stop_words \ + and not all(ch in chinese_punctuations for ch in left) \ + and not all(ch in chinese_punctuations for ch in right): + # 合并为一个新词,并替换掉左侧与跳过右侧 + combined = f"{left}{tok}{right}" + merged_tokens[-1] = combined + i += 2 + continue + # 常规推进 + merged_tokens.append(tok) + i += 1 + + # 二次过滤:去除停用词、单字符纯标点与无意义项 + result_tokens = [] + seen = set() + # ban_words = set(getattr(global_config.memory, "memory_ban_words", []) or []) + for tok in merged_tokens: + if tok in conjunctions: + # 独立连词丢弃 + continue + if tok in stop_words: + continue + # if tok in ban_words: + # continue + if all(ch in chinese_punctuations for ch in tok): + continue + if tok.strip() == "": + continue + if tok not in seen: + seen.add(tok) + result_tokens.append(tok) + + filtered_concept_name_tokens = result_tokens + return filtered_concept_name_tokens \ No newline at end of file diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index 8a6ea8cb..330bfa7d 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -298,19 +298,6 @@ class GroupInfo(BaseModel): # database = db # 继承自 BaseModel table_name = "group_info" - -class Memory(BaseModel): - memory_id = TextField(index=True) - chat_id = TextField(null=True) - memory_text = TextField(null=True) - keywords = TextField(null=True) - create_time = FloatField(null=True) - last_view_time = FloatField(null=True) - - class Meta: - table_name = "memory" - - class Expression(BaseModel): """ 用于存储表达风格的模型。 @@ -377,7 +364,6 @@ def create_tables(): Expression, GraphNodes, # 添加图节点表 GraphEdges, # 添加图边表 - Memory, ActionRecords, # 添加 ActionRecords 到初始化列表 ] ) @@ -403,7 +389,6 @@ def initialize_database(sync_constraints=False): OnlineTime, PersonInfo, Expression, - Memory, GraphNodes, GraphEdges, ActionRecords, # 添加 ActionRecords 到初始化列表 @@ -501,7 +486,6 @@ def sync_field_constraints(): OnlineTime, PersonInfo, Expression, - Memory, GraphNodes, GraphEdges, ActionRecords, @@ -680,7 +664,6 @@ def check_field_constraints(): OnlineTime, PersonInfo, Expression, - Memory, GraphNodes, GraphEdges, ActionRecords, diff --git a/src/config/official_configs.py b/src/config/official_configs.py index c99c5dad..7d9d950b 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -60,9 +60,6 @@ class RelationshipConfig(ConfigBase): enable_relationship: bool = True """是否启用关系系统""" - relation_frequency: int = 1 - """关系频率,麦麦构建关系的速度""" - @dataclass class ChatConfig(ConfigBase): @@ -336,14 +333,8 @@ class MemoryConfig(ConfigBase): enable_memory: bool = True """是否启用记忆系统""" - - memory_build_frequency: int = 1 - """记忆构建频率(秒)""" - memory_compress_rate: float = 0.1 - """记忆压缩率""" - - forget_memory_interval: int = 1000 + forget_memory_interval: int = 1500 """记忆遗忘间隔(秒)""" memory_forget_time: int = 24 @@ -355,9 +346,6 @@ class MemoryConfig(ConfigBase): memory_ban_words: list[str] = field(default_factory=lambda: ["表情包", "图片", "回复", "聊天记录"]) """不允许记忆的词列表""" - enable_instant_memory: bool = True - """是否启用即时记忆""" - @dataclass class MoodConfig(ConfigBase): diff --git a/src/plugins/built_in/memory/_manifest.json b/src/plugins/built_in/memory/_manifest.json new file mode 100644 index 00000000..08a58540 --- /dev/null +++ b/src/plugins/built_in/memory/_manifest.json @@ -0,0 +1,34 @@ +{ + "manifest_version": 1, + "name": "Memory Build组件", + "version": "1.0.0", + "description": "可以构建和管理记忆", + "author": { + "name": "Mai", + "url": "https://github.com/MaiM-with-u" + }, + "license": "GPL-v3.0-or-later", + + "host_application": { + "min_version": "0.10.1" + }, + "homepage_url": "https://github.com/MaiM-with-u/maibot", + "repository_url": "https://github.com/MaiM-with-u/maibot", + "keywords": ["memory", "build", "built-in"], + "categories": ["Memory"], + + "default_locale": "zh-CN", + "locales_path": "_locales", + + "plugin_info": { + "is_built_in": true, + "plugin_type": "action_provider", + "components": [ + { + "type": "build_memory", + "name": "build_memory", + "description": "构建记忆" + } + ] + } +} diff --git a/src/plugins/built_in/memory/build_memory.py b/src/plugins/built_in/memory/build_memory.py new file mode 100644 index 00000000..939f6c23 --- /dev/null +++ b/src/plugins/built_in/memory/build_memory.py @@ -0,0 +1,134 @@ +from typing import Tuple + +from src.common.logger import get_logger +from src.config.config import global_config +from src.chat.utils.prompt_builder import Prompt +from src.plugin_system import BaseAction, ActionActivationType +from src.chat.memory_system.Hippocampus import hippocampus_manager +from src.chat.utils.utils import cut_key_words + +logger = get_logger("memory") + + +def init_prompt(): + Prompt( + """ +以下是一些记忆条目的分类: +---------------------- +{category_list} +---------------------- +每一个分类条目类型代表了你对用户:"{person_name}"的印象的一个类别 + +现在,你有一条对 {person_name} 的新记忆内容: +{memory_point} + +请判断该记忆内容是否属于上述分类,请给出分类的名称。 +如果不属于上述分类,请输出一个合适的分类名称,对新记忆内容进行概括。要求分类名具有概括性。 +注意分类数一般不超过5个 +请严格用json格式输出,不要输出任何其他内容: +{{ + "category": "分类名称" +}} """, + "relation_category", + ) + + Prompt( + """ +以下是有关{category}的现有记忆: +---------------------- +{memory_list} +---------------------- + +现在,你有一条对 {person_name} 的新记忆内容: +{memory_point} + +请判断该新记忆内容是否已经存在于现有记忆中,你可以对现有进行进行以下修改: +注意,一般来说记忆内容不超过5个,且记忆文本不应太长 + +1.新增:当记忆内容不存在于现有记忆,且不存在矛盾,请用json格式输出: +{{ + "new_memory": "需要新增的记忆内容" +}} +2.加深印象:如果这个新记忆已经存在于现有记忆中,在内容上与现有记忆类似,请用json格式输出: +{{ + "memory_id": 1, #请输出你认为需要加深印象的,与新记忆内容类似的,已经存在的记忆的序号 + "integrate_memory": "加深后的记忆内容,合并内容类似的新记忆和旧记忆" +}} +3.整合:如果这个新记忆与现有记忆产生矛盾,请你结合其他记忆进行整合,用json格式输出: +{{ + "memory_id": 1, #请输出你认为需要整合的,与新记忆存在矛盾的,已经存在的记忆的序号 + "integrate_memory": "整合后的记忆内容,合并内容矛盾的新记忆和旧记忆" +}} + +现在,请你根据情况选出合适的修改方式,并输出json,不要输出其他内容: +""", + "relation_category_update", + ) + + +class BuildMemoryAction(BaseAction): + """关系动作 - 构建关系""" + + activation_type = ActionActivationType.LLM_JUDGE + parallel_action = True + + # 动作基本信息 + action_name = "build_memory" + action_description = "了解对于某个概念或者某件事的记忆,并存储下来,在之后的聊天中,你可以根据这条记忆来获取相关信息" + + # 动作参数定义 + action_parameters = { + "concept_name": "需要了解或记忆的概念或事件的名称", + "concept_description": "需要了解或记忆的概念或事件的描述,需要具体且明确", + } + + # 动作使用场景 + action_require = [ + "了解对于某个概念或者某件事的记忆,并存储下来,在之后的聊天中,你可以根据这条记忆来获取相关信息", + "有你不了解的概念", + "有人要求你记住某个概念或者事件", + "你对某件事或概念有新的理解,或产生了兴趣", + ] + + # 关联类型 + associated_types = ["text"] + + async def execute(self) -> Tuple[bool, str]: + """执行关系动作""" + + try: + # 1. 获取构建关系的原因 + concept_description = self.action_data.get("concept_description", "") + logger.info(f"{self.log_prefix} 添加记忆原因: {self.reasoning}") + concept_name = self.action_data.get("concept_name", "") + # 2. 获取目标用户信息 + + + + # 对 concept_name 进行jieba分词 + concept_name_tokens = cut_key_words(concept_name) + # logger.info(f"{self.log_prefix} 对 concept_name 进行分词结果: {concept_name_tokens}") + + filtered_concept_name_tokens = [ + token for token in concept_name_tokens if all(keyword not in token for keyword in global_config.memory.memory_ban_words) + ] + + if not filtered_concept_name_tokens: + logger.warning(f"{self.log_prefix} 过滤后的概念名称列表为空,跳过添加记忆") + return False, "过滤后的概念名称列表为空,跳过添加记忆" + + similar_topics_dict = hippocampus_manager.get_hippocampus().parahippocampal_gyrus.get_similar_topics_from_keywords(filtered_concept_name_tokens) + await hippocampus_manager.get_hippocampus().parahippocampal_gyrus.add_memory_with_similar(concept_description, similar_topics_dict) + + + + return True, f"成功添加记忆: {concept_name}" + + except Exception as e: + logger.error(f"{self.log_prefix} 构建记忆时出错: {e}") + return False, f"构建记忆时出错: {e}" + + + +# 还缺一个关系的太多遗忘和对应的提取 +init_prompt() diff --git a/src/plugins/built_in/memory/plugin.py b/src/plugins/built_in/memory/plugin.py new file mode 100644 index 00000000..8eaaf900 --- /dev/null +++ b/src/plugins/built_in/memory/plugin.py @@ -0,0 +1,58 @@ +from typing import List, Tuple, Type + +# 导入新插件系统 +from src.plugin_system import BasePlugin, register_plugin, ComponentInfo +from src.plugin_system.base.config_types import ConfigField + +# 导入依赖的系统组件 +from src.common.logger import get_logger + +from src.plugins.built_in.memory.build_memory import BuildMemoryAction + +logger = get_logger("relation_actions") + + +@register_plugin +class MemoryBuildPlugin(BasePlugin): + """关系动作插件 + + 系统内置插件,提供基础的聊天交互功能: + - Reply: 回复动作 + - NoReply: 不回复动作 + - Emoji: 表情动作 + + 注意:插件基本信息优先从_manifest.json文件中读取 + """ + + # 插件基本信息 + plugin_name: str = "memory_build" # 内部标识符 + enable_plugin: bool = True + dependencies: list[str] = [] # 插件依赖列表 + python_dependencies: list[str] = [] # Python包依赖列表 + config_file_name: str = "config.toml" + + # 配置节描述 + config_section_descriptions = { + "plugin": "插件启用配置", + "components": "核心组件启用配置", + } + + # 配置Schema定义 + config_schema: dict = { + "plugin": { + "enabled": ConfigField(type=bool, default=True, description="是否启用插件"), + "config_version": ConfigField(type=str, default="1.1.0", description="配置文件版本"), + }, + "components": { + "memory_max_memory_num": ConfigField(type=int, default=10, description="记忆最大数量"), + }, + } + + def get_plugin_components(self) -> List[Tuple[ComponentInfo, Type]]: + """返回插件包含的组件列表""" + + # --- 根据配置注册组件 --- + components = [] + components.append((BuildMemoryAction.get_action_info(), BuildMemoryAction)) + + return components diff --git a/src/plugins/built_in/plugin_management/_manifest.json b/src/plugins/built_in/plugin_management/_manifest.json index f394b867..a0175d77 100644 --- a/src/plugins/built_in/plugin_management/_manifest.json +++ b/src/plugins/built_in/plugin_management/_manifest.json @@ -9,7 +9,7 @@ }, "license": "GPL-v3.0-or-later", "host_application": { - "min_version": "0.9.1" + "min_version": "0.10.1" }, "homepage_url": "https://github.com/MaiM-with-u/maibot", "repository_url": "https://github.com/MaiM-with-u/maibot", diff --git a/src/plugins/built_in/relation/relation.py b/src/plugins/built_in/relation/relation.py index bab9090d..1f6f0d0f 100644 --- a/src/plugins/built_in/relation/relation.py +++ b/src/plugins/built_in/relation/relation.py @@ -1,6 +1,7 @@ import json from json_repair import repair_json from typing import Tuple +import time from src.common.logger import get_logger from src.config.config import global_config @@ -79,16 +80,6 @@ class BuildRelationAction(BaseAction): action_name = "build_relation" action_description = "了解对于某人的记忆,并添加到你对对方的印象中" - # LLM判断提示词 - llm_judge_prompt = """ - 判定是否需要使用关系动作,添加对于某人的记忆: - 1. 对方与你的交互让你对其有新记忆 - 2. 对方有提到其个人信息,包括喜好,身份,等等 - 3. 对方希望你记住对方的信息 - - 请回答"是"或"否"。 - """ - # 动作参数定义 action_parameters = {"person_name": "需要了解或记忆的人的名称", "impression": "需要了解的对某人的记忆或印象"} @@ -109,13 +100,17 @@ class BuildRelationAction(BaseAction): try: # 1. 获取构建关系的原因 impression = self.action_data.get("impression", "") - logger.info(f"{self.log_prefix} 添加记忆原因: {self.reasoning}") + logger.info(f"{self.log_prefix} 添加关系印象原因: {self.reasoning}") person_name = self.action_data.get("person_name", "") # 2. 获取目标用户信息 person = Person(person_name=person_name) if not person.is_known: logger.warning(f"{self.log_prefix} 用户 {person_name} 不存在,跳过添加记忆") return False, f"用户 {person_name} 不存在,跳过添加记忆" + + person.last_know = time.time() + person.know_times += 1 + person.sync_to_database() category_list = person.get_all_category() if not category_list: @@ -195,6 +190,8 @@ class BuildRelationAction(BaseAction): # 新记忆 person.memory_points.append(f"{category}:{new_memory}:1.0") person.sync_to_database() + + logger.info(f"{self.log_prefix} 为{person.person_name}新增记忆点: {new_memory}") return True, f"为{person.person_name}新增记忆点: {new_memory}" elif memory_id and integrate_memory: @@ -204,12 +201,14 @@ class BuildRelationAction(BaseAction): del_count = person.del_memory(category, memory_content) if del_count > 0: - logger.info(f"{self.log_prefix} 删除记忆点: {memory_content}") + # logger.info(f"{self.log_prefix} 删除记忆点: {memory_content}") memory_weight = get_weight_from_memory(memory) person.memory_points.append(f"{category}:{integrate_memory}:{memory_weight + 1.0}") person.sync_to_database() + logger.info(f"{self.log_prefix} 更新{person.person_name}的记忆点: {memory_content} -> {integrate_memory}") + return True, f"更新{person.person_name}的记忆点: {memory_content} -> {integrate_memory}" else: diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 97eb5b4a..3805051c 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "6.7.1" +version = "6.7.2" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请递增version的值 @@ -65,8 +65,6 @@ focus_value = 0.5 max_context_size = 20 # 上下文长度 -interest_rate_mode = "fast" #激活值计算模式,可选fast或者accurate - planner_size = 2.5 # 副规划器大小,越小,麦麦的动作执行能力越精细,但是消耗更多token,调大可以缓解429类错误 mentioned_bot_inevitable_reply = true # 提及 bot 大概率回复 @@ -102,22 +100,8 @@ talk_frequency_adjust = [ # - 后续元素是"时间,频率"格式,表示从该时间开始使用该活跃度,直到下一个时间点 # - 优先级:特定聊天流配置 > 全局配置 > 默认 talk_frequency - [relationship] enable_relationship = true # 是否启用关系系统 -relation_frequency = 1 # 关系频率,麦麦构建关系的频率 - -[message_receive] -# 以下是消息过滤,可以根据规则过滤特定消息,将不会读取这些消息 -ban_words = [ - # "403","张三" - ] - -ban_msgs_regex = [ - # 需要过滤的消息(原始消息)匹配的正则表达式,匹配到的消息将被过滤,若不了解正则表达式请勿修改 - #"https?://[^\\s]+", # 匹配https链接 - #"\\d{4}-\\d{2}-\\d{2}", # 匹配日期 -] [tool] enable_tool = false # 是否在普通聊天中启用工具 @@ -138,21 +122,30 @@ filtration_prompt = "符合公序良俗" # 表情包过滤要求,只有符合 [memory] enable_memory = true # 是否启用记忆系统 -memory_build_frequency = 1 # 记忆构建频率 越高,麦麦学习越多 -memory_compress_rate = 0.1 # 记忆压缩率 控制记忆精简程度 建议保持默认,调高可以获得更多信息,但是冗余信息也会增多 -forget_memory_interval = 3000 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习 +forget_memory_interval = 1500 # 记忆遗忘间隔 单位秒 间隔越低,麦麦遗忘越频繁,记忆更精简,但更难学习 memory_forget_time = 48 #多长时间后的记忆会被遗忘 单位小时 memory_forget_percentage = 0.008 # 记忆遗忘比例 控制记忆遗忘程度 越大遗忘越多 建议保持默认 -enable_instant_memory = false # 是否启用即时记忆,测试功能,可能存在未知问题 - #不希望记忆的词,已经记忆的不会受到影响,需要手动清理 memory_ban_words = [ "表情包", "图片", "回复", "聊天记录" ] [voice] enable_asr = false # 是否启用语音识别,启用后麦麦可以识别语音消息,启用该功能需要配置语音识别模型[model.voice]s +[message_receive] +# 以下是消息过滤,可以根据规则过滤特定消息,将不会读取这些消息 +ban_words = [ + # "403","张三" + ] + +ban_msgs_regex = [ + # 需要过滤的消息(原始消息)匹配的正则表达式,匹配到的消息将被过滤,若不了解正则表达式请勿修改 + #"https?://[^\\s]+", # 匹配https链接 + #"\\d{4}-\\d{2}-\\d{2}", # 匹配日期 +] + + [lpmm_knowledge] # lpmm知识库配置 enable = false # 是否启用lpmm知识库 rag_synonym_search_top_k = 10 # 同义词搜索TopK