From 6a71ea4f2e82d4afa37f51ce55f3f6cb1f4d4814 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sun, 9 Mar 2025 22:45:32 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E4=BA=86=E8=AE=B0=E5=BF=86?= =?UTF-8?q?=E6=97=B6=E9=97=B4bug,config=E6=B7=BB=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E8=AE=B0=E5=BF=86=E5=B1=8F=E8=94=BD=E5=85=B3=E9=94=AE=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- changelog.md | 6 + changelog_config.md | 6 + src/plugins/chat/config.py | 9 +- src/plugins/memory_system/memory.py | 100 ++++-- .../memory_system/memory_manual_build.py | 307 +++++++++++++++--- template/auto_format.py | 48 --- template/bot_config_template.toml | 21 +- 7 files changed, 367 insertions(+), 130 deletions(-) create mode 100644 changelog.md create mode 100644 changelog_config.md delete mode 100644 template/auto_format.py diff --git a/changelog.md b/changelog.md new file mode 100644 index 00000000..c68a16ad --- /dev/null +++ b/changelog.md @@ -0,0 +1,6 @@ +# Changelog + +## [0.5.12] - 2025-3-9 +### Added +- 新增了 我是测试 + diff --git a/changelog_config.md b/changelog_config.md new file mode 100644 index 00000000..7101fe82 --- /dev/null +++ b/changelog_config.md @@ -0,0 +1,6 @@ +# Changelog + +## [0.0.4] - 2025-3-9 +### Added +- 新增了 `memory_ban_words` 配置项,用于指定不希望记忆的词汇。 + diff --git a/src/plugins/chat/config.py b/src/plugins/chat/config.py index c027753c..f4c44a72 100644 --- a/src/plugins/chat/config.py +++ b/src/plugins/chat/config.py @@ -88,6 +88,8 @@ class BotConfig: PERSONALITY_2: float = 0.3 # 第二种人格概率 PERSONALITY_3: float = 0.1 # 第三种人格概率 + memory_ban_words: list = field(default_factory=lambda: ['表情包', '图片', '回复', '聊天记录']) # 添加新的配置项默认值 + @staticmethod def get_config_dir() -> str: """获取配置文件目录""" @@ -277,6 +279,10 @@ class BotConfig: memory_config = parent["memory"] config.build_memory_interval = memory_config.get("build_memory_interval", config.build_memory_interval) config.forget_memory_interval = memory_config.get("forget_memory_interval", config.forget_memory_interval) + + # 在版本 >= 0.0.4 时才处理新增的配置项 + if config.INNER_VERSION in SpecifierSet(">=0.0.4"): + config.memory_ban_words = set(memory_config.get("memory_ban_words", [])) def mood(parent: dict): mood_config = parent["mood"] @@ -344,7 +350,8 @@ class BotConfig: }, "memory": { "func": memory, - "support": ">=0.0.0" + "support": ">=0.0.0", + "necessary": False }, "mood": { "func": mood, diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py index f88888aa..08fc6d30 100644 --- a/src/plugins/memory_system/memory.py +++ b/src/plugins/memory_system/memory.py @@ -157,46 +157,88 @@ class Hippocampus: nodes = sorted([source, target]) return hash(f"{nodes[0]}:{nodes[1]}") - def get_memory_sample(self,chat_size=20,time_frequency:dict={'near':2,'mid':4,'far':3}): + def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}): + """获取记忆样本 + + Returns: + list: 消息记录列表,每个元素是一个消息记录字典列表 + """ current_timestamp = datetime.datetime.now().timestamp() - chat_text = [] - #短期:1h 中期:4h 长期:24h - for _ in range(time_frequency.get('near')): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('mid')): # 循环10次 - random_time = current_timestamp - random.randint(3600, 3600*4) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('far')): # 循环10次 - random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - return [text for text in chat_text if text] - - async def memory_compress(self, input_text, compress_rate=0.1): + chat_samples = [] + + # 短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): + random_time = current_timestamp - random.randint(1, 3600) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('mid')): + random_time = current_timestamp - random.randint(3600, 3600*4) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('far')): + random_time = current_timestamp - random.randint(3600*4, 3600*24) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + return chat_samples + + async def memory_compress(self, messages: list, compress_rate=0.1): + """压缩消息记录为记忆 + + Args: + messages: 消息记录字典列表,每个字典包含text和time字段 + compress_rate: 压缩率 + + Returns: + set: (话题, 记忆) 元组集合 + """ + if not messages: + return set() + + # 合并消息文本,同时保留时间信息 + input_text = "" + time_info = "" + # 计算最早和最晚时间 + earliest_time = min(msg['time'] for msg in messages) + latest_time = max(msg['time'] for msg in messages) + + earliest_dt = datetime.datetime.fromtimestamp(earliest_time) + latest_dt = datetime.datetime.fromtimestamp(latest_time) + + # 如果是同一年 + if earliest_dt.year == latest_dt.year: + earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%m-%d %H:%M:%S") + time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n" + else: + earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S") + time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n" + + for msg in messages: + input_text += f"{msg['text']}\n" + print(input_text) - #获取topics topic_num = self.calculate_topic_num(input_text, compress_rate) topics_response = await self.llm_topic_judge.generate_response(self.find_topic_llm(input_text, topic_num)) - # 修改话题处理逻辑 - # 定义需要过滤的关键词 - filter_keywords = ['表情包', '图片', '回复', '聊天记录'] # 过滤topics + filter_keywords = global_config.memory_ban_words topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)] - # print(f"原始话题: {topics}") print(f"过滤后话题: {filtered_topics}") - # 使用过滤后的话题继续处理 + # 创建所有话题的请求任务 tasks = [] for topic in filtered_topics: - topic_what_prompt = self.topic_what(input_text, topic) - # 创建异步任务 + topic_what_prompt = self.topic_what(input_text, topic, time_info) task = self.llm_summary_by_topic.generate_response_async(topic_what_prompt) tasks.append((topic.strip(), task)) @@ -440,7 +482,7 @@ class Hippocampus: print(f"选择的记忆:\n{merged_text}") # 使用memory_compress生成新的压缩记忆 - compressed_memories = await self.memory_compress(merged_text, 0.1) + compressed_memories = await self.memory_compress(selected_memories, 0.1) # 从原记忆列表中移除被选中的记忆 for memory in selected_memories: @@ -494,8 +536,8 @@ class Hippocampus: prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。' return prompt - def topic_what(self,text, topic): - prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' + def topic_what(self,text, topic, time_info): + prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' return prompt async def _identify_topics(self, text: str) -> list: diff --git a/src/plugins/memory_system/memory_manual_build.py b/src/plugins/memory_system/memory_manual_build.py index 3124bc8e..2e1151f9 100644 --- a/src/plugins/memory_system/memory_manual_build.py +++ b/src/plugins/memory_system/memory_manual_build.py @@ -13,6 +13,7 @@ import networkx as nx import pymongo from dotenv import load_dotenv from loguru import logger +import jieba # from chat.config import global_config sys.path.append("C:/GitHub/MaiMBot") # 添加项目根目录到 Python 路径 @@ -86,23 +87,26 @@ def calculate_information_content(text): return entropy def get_cloest_chat_from_db(db, length: int, timestamp: str): - """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数""" - chat_text = '' + """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数 + + Returns: + list: 消息记录字典列表,每个字典包含消息内容和时间信息 + """ + chat_records = [] closest_record = db.db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) if closest_record and closest_record.get('memorized', 0) < 4: closest_time = closest_record['time'] - group_id = closest_record['group_id'] # 获取groupid + group_id = closest_record['group_id'] # 获取该时间戳之后的length条消息,且groupid相同 - chat_records = list(db.db.messages.find( + records = list(db.db.messages.find( {"time": {"$gt": closest_time}, "group_id": group_id} ).sort('time', 1).limit(length)) # 更新每条消息的memorized属性 - for record in chat_records: - # 检查当前记录的memorized值 + for record in records: current_memorized = record.get('memorized', 0) - if current_memorized > 3: + if current_memorized > 3: print("消息已读取3次,跳过") return '' @@ -112,11 +116,14 @@ def get_cloest_chat_from_db(db, length: int, timestamp: str): {"$set": {"memorized": current_memorized + 1}} ) - chat_text += record["detailed_plain_text"] + # 添加到记录列表中 + chat_records.append({ + 'text': record["detailed_plain_text"], + 'time': record["time"], + 'group_id': record["group_id"] + }) - return chat_text - print("消息已读取3次,跳过") - return '' + return chat_records class Memory_graph: def __init__(self): @@ -205,22 +212,34 @@ class Hippocampus: self.llm_model_summary = LLMModel(model_name="Qwen/Qwen2.5-32B-Instruct") def get_memory_sample(self, chat_size=20, time_frequency:dict={'near':2,'mid':4,'far':3}): + """获取记忆样本 + + Returns: + list: 消息记录列表,每个元素是一个消息记录字典列表 + """ current_timestamp = datetime.datetime.now().timestamp() - chat_text = [] - #短期:1h 中期:4h 长期:24h - for _ in range(time_frequency.get('near')): # 循环10次 - random_time = current_timestamp - random.randint(1, 3600*4) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('mid')): # 循环10次 - random_time = current_timestamp - random.randint(3600*4, 3600*24) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - for _ in range(time_frequency.get('far')): # 循环10次 - random_time = current_timestamp - random.randint(3600*24, 3600*24*7) # 随机时间 - chat_ = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) - chat_text.append(chat_) - return [chat for chat in chat_text if chat] + chat_samples = [] + + # 短期:1h 中期:4h 长期:24h + for _ in range(time_frequency.get('near')): + random_time = current_timestamp - random.randint(1, 3600*4) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('mid')): + random_time = current_timestamp - random.randint(3600*4, 3600*24) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + for _ in range(time_frequency.get('far')): + random_time = current_timestamp - random.randint(3600*24, 3600*24*7) + messages = get_cloest_chat_from_db(db=self.memory_graph.db, length=chat_size, timestamp=random_time) + if messages: + chat_samples.append(messages) + + return chat_samples def calculate_topic_num(self,text, compress_rate): """计算文本的话题数量""" @@ -231,16 +250,49 @@ class Hippocampus: print(f"topic_by_length: {topic_by_length}, topic_by_information_content: {topic_by_information_content}, topic_num: {topic_num}") return topic_num - async def memory_compress(self, input_text, compress_rate=0.1): + async def memory_compress(self, messages: list, compress_rate=0.1): + """压缩消息记录为记忆 + + Args: + messages: 消息记录字典列表,每个字典包含text和time字段 + compress_rate: 压缩率 + + Returns: + set: (话题, 记忆) 元组集合 + """ + if not messages: + return set() + + # 合并消息文本,同时保留时间信息 + input_text = "" + time_info = "" + # 计算最早和最晚时间 + earliest_time = min(msg['time'] for msg in messages) + latest_time = max(msg['time'] for msg in messages) + + earliest_dt = datetime.datetime.fromtimestamp(earliest_time) + latest_dt = datetime.datetime.fromtimestamp(latest_time) + + # 如果是同一年 + if earliest_dt.year == latest_dt.year: + earliest_str = earliest_dt.strftime("%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%m-%d %H:%M:%S") + time_info += f"是在{earliest_dt.year}年,{earliest_str} 到 {latest_str} 的对话:\n" + else: + earliest_str = earliest_dt.strftime("%Y-%m-%d %H:%M:%S") + latest_str = latest_dt.strftime("%Y-%m-%d %H:%M:%S") + time_info += f"是从 {earliest_str} 到 {latest_str} 的对话:\n" + + for msg in messages: + input_text += f"{msg['text']}\n" + print(input_text) topic_num = self.calculate_topic_num(input_text, compress_rate) topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(input_text, topic_num)) - # 修改话题处理逻辑 - # 定义需要过滤的关键词 - filter_keywords = ['表情包', '图片', '回复', '聊天记录'] # 过滤topics + filter_keywords = ['表情包', '图片', '回复', '聊天记录'] topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] filtered_topics = [topic for topic in topics if not any(keyword in topic for keyword in filter_keywords)] @@ -250,7 +302,7 @@ class Hippocampus: # 创建所有话题的请求任务 tasks = [] for topic in filtered_topics: - topic_what_prompt = self.topic_what(input_text, topic) + topic_what_prompt = self.topic_what(input_text, topic , time_info) # 创建异步任务 task = self.llm_model_small.generate_response_async(topic_what_prompt) tasks.append((topic.strip(), task)) @@ -267,37 +319,35 @@ class Hippocampus: async def operation_build_memory(self, chat_size=12): # 最近消息获取频率 time_frequency = {'near': 3, 'mid': 8, 'far': 5} - memory_sample = self.get_memory_sample(chat_size, time_frequency) + memory_samples = self.get_memory_sample(chat_size, time_frequency) all_topics = [] # 用于存储所有话题 - for i, input_text in enumerate(memory_sample, 1): + for i, messages in enumerate(memory_samples, 1): # 加载进度可视化 all_topics = [] - progress = (i / len(memory_sample)) * 100 + progress = (i / len(memory_samples)) * 100 bar_length = 30 - filled_length = int(bar_length * i // len(memory_sample)) + filled_length = int(bar_length * i // len(memory_samples)) bar = '█' * filled_length + '-' * (bar_length - filled_length) - print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_sample)})") + print(f"\n进度: [{bar}] {progress:.1f}% ({i}/{len(memory_samples)})") - # 生成压缩后记忆 ,表现为 (话题,记忆) 的元组 - compressed_memory = set() + # 生成压缩后记忆 compress_rate = 0.1 - compressed_memory = await self.memory_compress(input_text, compress_rate) + compressed_memory = await self.memory_compress(messages, compress_rate) print(f"\033[1;33m压缩后记忆数量\033[0m: {len(compressed_memory)}") # 将记忆加入到图谱中 for topic, memory in compressed_memory: print(f"\033[1;32m添加节点\033[0m: {topic}") self.memory_graph.add_dot(topic, memory) - all_topics.append(topic) # 收集所有话题 + all_topics.append(topic) + + # 连接相关话题 for i in range(len(all_topics)): for j in range(i + 1, len(all_topics)): print(f"\033[1;32m连接节点\033[0m: {all_topics[i]} 和 {all_topics[j]}") self.memory_graph.connect_dot(all_topics[i], all_topics[j]) - - - self.sync_memory_to_db() @@ -375,7 +425,7 @@ class Hippocampus: if concept not in db_nodes_dict: # 数据库中缺少的节点,添加 - logger.info(f"添加新节点: {concept}") + # logger.info(f"添加新节点: {concept}") node_data = { 'concept': concept, 'memory_items': memory_items, @@ -389,7 +439,7 @@ class Hippocampus: # 如果特征值不同,则更新节点 if db_hash != memory_hash: - logger.info(f"更新节点内容: {concept}") + # logger.info(f"更新节点内容: {concept}") self.memory_graph.db.db.graph_data.nodes.update_one( {'concept': concept}, {'$set': { @@ -402,7 +452,7 @@ class Hippocampus: memory_concepts = set(node[0] for node in memory_nodes) for db_node in db_nodes: if db_node['concept'] not in memory_concepts: - logger.info(f"删除多余节点: {db_node['concept']}") + # logger.info(f"删除多余节点: {db_node['concept']}") self.memory_graph.db.db.graph_data.nodes.delete_one({'concept': db_node['concept']}) # 处理边的信息 @@ -460,9 +510,10 @@ class Hippocampus: prompt = f'这是一段文字:{text}。请你从这段话中总结出{topic_num}个关键的概念,可以是名词,动词,或者特定人物,帮我列出来,用逗号,隔开,尽可能精简。只需要列举{topic_num}个话题就好,不要有序号,不要告诉我其他内容。' return prompt - def topic_what(self,text, topic): + def topic_what(self,text, topic, time_info): # prompt = f'这是一段文字:{text}。我想知道这段文字里有什么关于{topic}的话题,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' - prompt = f'这是一段文字:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' + # 获取当前时间 + prompt = f'这是一段文字,{time_info}:{text}。我想让你基于这段文字来概括"{topic}"这个概念,帮我总结成一句自然的话,可以包含时间和人物,以及具体的观点。只输出这句话就好' return prompt def remove_node_from_db(self, topic): @@ -597,7 +648,7 @@ class Hippocampus: print(f"选择的记忆:\n{merged_text}") # 使用memory_compress生成新的压缩记忆 - compressed_memories = await self.memory_compress(merged_text, 0.1) + compressed_memories = await self.memory_compress(selected_memories, 0.1) # 从原记忆列表中移除被选中的记忆 for memory in selected_memories: @@ -647,6 +698,164 @@ class Hippocampus: else: print("\n本次检查没有需要合并的节点") + async def _identify_topics(self, text: str) -> list: + """从文本中识别可能的主题""" + topics_response = self.llm_model_get_topic.generate_response(self.find_topic_llm(text, 5)) + topics = [topic.strip() for topic in topics_response[0].replace(",", ",").replace("、", ",").replace(" ", ",").split(",") if topic.strip()] + return topics + + def _find_similar_topics(self, topics: list, similarity_threshold: float = 0.4, debug_info: str = "") -> list: + """查找与给定主题相似的记忆主题""" + all_memory_topics = list(self.memory_graph.G.nodes()) + all_similar_topics = [] + + for topic in topics: + if debug_info: + pass + + topic_vector = text_to_vector(topic) + has_similar_topic = False + + for memory_topic in all_memory_topics: + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + similarity = cosine_similarity(v1, v2) + + if similarity >= similarity_threshold: + has_similar_topic = True + all_similar_topics.append((memory_topic, similarity)) + + return all_similar_topics + + def _get_top_topics(self, similar_topics: list, max_topics: int = 5) -> list: + """获取相似度最高的主题""" + seen_topics = set() + top_topics = [] + + for topic, score in sorted(similar_topics, key=lambda x: x[1], reverse=True): + if topic not in seen_topics and len(top_topics) < max_topics: + seen_topics.add(topic) + top_topics.append((topic, score)) + + return top_topics + + async def memory_activate_value(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.3) -> int: + """计算输入文本对记忆的激活程度""" + print(f"\033[1;32m[记忆激活]\033[0m 识别主题: {await self._identify_topics(text)}") + + identified_topics = await self._identify_topics(text) + if not identified_topics: + return 0 + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆激活" + ) + + if not all_similar_topics: + return 0 + + top_topics = self._get_top_topics(all_similar_topics, max_topics) + + if len(top_topics) == 1: + topic, score = top_topics[0] + memory_items = self.memory_graph.G.nodes[topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + activation = int(score * 50 * penalty) + print(f"\033[1;32m[记忆激活]\033[0m 单主题「{topic}」- 相似度: {score:.3f}, 内容数: {content_count}, 激活值: {activation}") + return activation + + matched_topics = set() + topic_similarities = {} + + for memory_topic, similarity in top_topics: + memory_items = self.memory_graph.G.nodes[memory_topic].get('memory_items', []) + if not isinstance(memory_items, list): + memory_items = [memory_items] if memory_items else [] + content_count = len(memory_items) + penalty = 1.0 / (1 + math.log(content_count + 1)) + + for input_topic in identified_topics: + topic_vector = text_to_vector(input_topic) + memory_vector = text_to_vector(memory_topic) + all_words = set(topic_vector.keys()) | set(memory_vector.keys()) + v1 = [topic_vector.get(word, 0) for word in all_words] + v2 = [memory_vector.get(word, 0) for word in all_words] + sim = cosine_similarity(v1, v2) + if sim >= similarity_threshold: + matched_topics.add(input_topic) + adjusted_sim = sim * penalty + topic_similarities[input_topic] = max(topic_similarities.get(input_topic, 0), adjusted_sim) + print(f"\033[1;32m[记忆激活]\033[0m 主题「{input_topic}」-> 「{memory_topic}」(内容数: {content_count}, 相似度: {adjusted_sim:.3f})") + + topic_match = len(matched_topics) / len(identified_topics) + average_similarities = sum(topic_similarities.values()) / len(topic_similarities) if topic_similarities else 0 + + activation = int((topic_match + average_similarities) / 2 * 100) + print(f"\033[1;32m[记忆激活]\033[0m 匹配率: {topic_match:.3f}, 平均相似度: {average_similarities:.3f}, 激活值: {activation}") + + return activation + + async def get_relevant_memories(self, text: str, max_topics: int = 5, similarity_threshold: float = 0.4, max_memory_num: int = 5) -> list: + """根据输入文本获取相关的记忆内容""" + identified_topics = await self._identify_topics(text) + + all_similar_topics = self._find_similar_topics( + identified_topics, + similarity_threshold=similarity_threshold, + debug_info="记忆检索" + ) + + relevant_topics = self._get_top_topics(all_similar_topics, max_topics) + + relevant_memories = [] + for topic, score in relevant_topics: + first_layer, _ = self.memory_graph.get_related_item(topic, depth=1) + if first_layer: + if len(first_layer) > max_memory_num/2: + first_layer = random.sample(first_layer, max_memory_num//2) + for memory in first_layer: + relevant_memories.append({ + 'topic': topic, + 'similarity': score, + 'content': memory + }) + + relevant_memories.sort(key=lambda x: x['similarity'], reverse=True) + + if len(relevant_memories) > max_memory_num: + relevant_memories = random.sample(relevant_memories, max_memory_num) + + return relevant_memories + +def segment_text(text): + """使用jieba进行文本分词""" + seg_text = list(jieba.cut(text)) + return seg_text + +def text_to_vector(text): + """将文本转换为词频向量""" + words = segment_text(text) + vector = {} + for word in words: + vector[word] = vector.get(word, 0) + 1 + return vector + +def cosine_similarity(v1, v2): + """计算两个向量的余弦相似度""" + dot_product = sum(a * b for a, b in zip(v1, v2)) + norm1 = math.sqrt(sum(a * a for a in v1)) + norm2 = math.sqrt(sum(b * b for b in v2)) + if norm1 == 0 or norm2 == 0: + return 0 + return dot_product / (norm1 * norm2) def visualize_graph_lite(memory_graph: Memory_graph, color_by_memory: bool = False): # 设置中文字体 diff --git a/template/auto_format.py b/template/auto_format.py deleted file mode 100644 index d99e29e3..00000000 --- a/template/auto_format.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -import sys -from pathlib import Path - -import tomli -import tomli_w - - -def sync_configs(): - # 读取两个配置文件 - try: - with open('bot_config_dev.toml', 'rb') as f: # tomli需要使用二进制模式读取 - dev_config = tomli.load(f) - - with open('bot_config.toml', 'rb') as f: - prod_config = tomli.load(f) - except FileNotFoundError as e: - print(f"错误:找不到配置文件 - {e}") - sys.exit(1) - except tomli.TOMLDecodeError as e: - print(f"错误:TOML格式解析失败 - {e}") - sys.exit(1) - - # 递归合并配置 - def merge_configs(source, target): - for key, value in source.items(): - if key not in target: - target[key] = value - elif isinstance(value, dict) and isinstance(target[key], dict): - merge_configs(value, target[key]) - - # 将dev配置的新属性合并到prod配置中 - merge_configs(dev_config, prod_config) - - # 保存更新后的配置 - try: - with open('bot_config.toml', 'wb') as f: # tomli_w需要使用二进制模式写入 - tomli_w.dump(prod_config, f) - print("配置文件同步完成!") - except Exception as e: - print(f"错误:保存配置文件失败 - {e}") - sys.exit(1) - -if __name__ == '__main__': - # 确保在正确的目录下运行 - script_dir = Path(__file__).parent - os.chdir(script_dir) - sync_configs() diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index d7c66d3f..bff64d05 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,16 @@ [inner] -version = "0.0.3" +version = "0.0.4" + +#如果你想要修改配置文件,请在修改后将version的值进行变更 +#如果新增项目,请在BotConfig类下新增相应的变量 +#1.如果你修改的是[]层级项目,例如你新增了 [memory],那么请在config.py的 load_config函数中的include_configs字典中新增"内容":{ +#"func":memory, +#"support":">=0.0.0", #新的版本号 +#"necessary":False #是否必须 +#} +#2.如果你修改的是[]下的项目,例如你新增了[memory]下的 memory_ban_words ,那么请在config.py的 load_config函数中的 memory函数下新增版本判断: + # if config.INNER_VERSION in SpecifierSet(">=0.0.2"): + # config.memory_ban_words = set(memory_config.get("memory_ban_words", [])) [bot] qq = 123 @@ -49,6 +60,10 @@ max_response_length = 1024 # 麦麦回答的最大token数 build_memory_interval = 300 # 记忆构建间隔 单位秒 forget_memory_interval = 300 # 记忆遗忘间隔 单位秒 +memory_ban_words = [ #不希望记忆的词 + # "403","张三" +] + [mood] mood_update_interval = 1.0 # 情绪更新间隔 单位秒 mood_decay_rate = 0.95 # 情绪衰减率 @@ -69,10 +84,10 @@ reaction = "回答“测试成功”" [chinese_typo] enable = true # 是否启用中文错别字生成器 -error_rate=0.03 # 单字替换概率 +error_rate=0.006 # 单字替换概率 min_freq=7 # 最小字频阈值 tone_error_rate=0.2 # 声调错误概率 -word_replace_rate=0.02 # 整词替换概率 +word_replace_rate=0.006 # 整词替换概率 [others] enable_advance_output = true # 是否启用高级输出