From 0683f56e23999d1e53a1a73dc801f1b8baa5083c Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Mon, 29 Sep 2025 00:22:34 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E4=BF=AE=E5=A4=8D=E8=AE=B0?= =?UTF-8?q?=E5=BF=86=E6=8F=90=E5=8F=96=E7=9A=84=E4=B8=80=E4=BA=9B=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/memory_system/Memory_chest.py | 166 +++--------------- .../hippocampus_to_memory_chest_task.py | 4 +- .../memory_system/memory_management_task.py | 26 +-- src/chat/memory_system/memory_utils.py | 156 ++++++++++++++++ src/chat/replyer/group_generator.py | 2 +- src/plugins/built_in/memory/build_memory.py | 4 +- 6 files changed, 189 insertions(+), 169 deletions(-) create mode 100644 src/chat/memory_system/memory_utils.py diff --git a/src/chat/memory_system/Memory_chest.py b/src/chat/memory_system/Memory_chest.py index 5dc02145..216bb432 100644 --- a/src/chat/memory_system/Memory_chest.py +++ b/src/chat/memory_system/Memory_chest.py @@ -1,7 +1,8 @@ import asyncio import json import re -from difflib import SequenceMatcher +import time +import random from src.llm_models.utils_model import LLMRequest from src.config.config import model_config @@ -9,9 +10,12 @@ from src.common.database.database_model import MemoryChest as MemoryChestModel from src.common.logger import get_logger from src.config.config import global_config from src.plugin_system.apis.message_api import build_readable_messages -import time from src.plugin_system.apis.message_api import get_raw_msg_by_timestamp_with_chat from json_repair import repair_json +from .memory_utils import ( + find_best_matching_memory, + check_title_exists_fuzzy +) logger = get_logger("memory_chest") @@ -146,6 +150,8 @@ class MemoryChest: """ 根据问题获取答案 """ + logger.info(f"正在回忆问题答案: {question}") + title = await self.select_title_by_question(question) if not title: @@ -154,12 +160,18 @@ class MemoryChest: for memory in MemoryChestModel.select(): if memory.title == title: content = memory.content - + + if random.random() < 0.5: + type = "要求原文能够较为全面的回答问题" + else: + type = "要求提取简短的内容" + prompt = f""" {content} 请根据问题:{question} -在上方内容中,提取相关信息的原文并输出,请务必提取上面原文,不要输出其他内容: +在上方内容中,提取相关信息的原文并输出,{type} +请务必提取上面原文,不要输出其他内容: """ if global_config.debug.show_prompt: @@ -170,7 +182,7 @@ class MemoryChest: answer, (reasoning_content, model_name, tool_calls) = await self.LLMRequest.generate_response_async(prompt) - logger.info(f"记忆仓库获取答案: {answer}") + logger.info(f"记忆仓库对问题 “{question}” 获取答案: {answer}") # 将问题和答案存到fetched_memory_list if chat_id and answer: @@ -251,7 +263,7 @@ class MemoryChest: selected_title = None # 使用模糊查找匹配标题 - best_match = self.find_best_matching_memory(title, similarity_threshold=0.8) + best_match = find_best_matching_memory(title, similarity_threshold=0.8) if best_match: selected_title = best_match[0] # 获取匹配的标题 logger.info(f"记忆仓库选择标题: {selected_title} (相似度: {best_match[2]:.3f})") @@ -407,7 +419,7 @@ class MemoryChest: # 使用模糊查找匹配记忆 try: - best_match = self.find_best_matching_memory(title.strip(), similarity_threshold=0.8) + best_match = find_best_matching_memory(title.strip(), similarity_threshold=0.8) if best_match: contents.append(best_match[1]) # best_match[1] 是 content logger.debug(f"找到记忆: {best_match[0]} (相似度: {best_match[2]:.3f})") @@ -550,7 +562,7 @@ class MemoryChest: if title: # 检查是否存在相似标题 - if self.check_title_exists_fuzzy(title, similarity_threshold=0.9): + if check_title_exists_fuzzy(title, similarity_threshold=0.9): logger.warning(f"生成的标题 '{title}' 与现有标题相似,使用时间戳后缀") title = f"{title}_{int(time.time())}" @@ -564,143 +576,5 @@ class MemoryChest: logger.error(f"生成合并记忆标题时出错: {e}") return f"合并记忆_{int(time.time())}" - def fuzzy_find_memory_by_title(self, target_title: str, similarity_threshold: float = 0.9) -> list[tuple[str, str, float]]: - """ - 根据标题模糊查找记忆 - - Args: - target_title: 目标标题 - similarity_threshold: 相似度阈值,默认0.6 - - Returns: - list[tuple[str, str, float]]: 匹配的记忆列表,每个元素为(title, content, similarity_score) - """ - try: - # 获取所有记忆 - all_memories = MemoryChestModel.select() - - matches = [] - for memory in all_memories: - similarity = self._calculate_similarity(target_title, memory.title) - if similarity >= similarity_threshold: - matches.append((memory.title, memory.content, similarity)) - - # 按相似度降序排序 - matches.sort(key=lambda x: x[2], reverse=True) - - logger.info(f"模糊查找标题 '{target_title}' 找到 {len(matches)} 个匹配项") - return matches - - except Exception as e: - logger.error(f"模糊查找记忆时出错: {e}") - return [] - - def _calculate_similarity(self, text1: str, text2: str) -> float: - """ - 计算两个文本的相似度 - - Args: - text1: 第一个文本 - text2: 第二个文本 - - Returns: - float: 相似度分数 (0-1) - """ - try: - # 预处理文本 - text1 = self._preprocess_text(text1) - text2 = self._preprocess_text(text2) - - # 使用SequenceMatcher计算相似度 - similarity = SequenceMatcher(None, text1, text2).ratio() - - # 如果其中一个文本包含另一个,提高相似度 - if text1 in text2 or text2 in text1: - similarity = max(similarity, 0.8) - - return similarity - - except Exception as e: - logger.error(f"计算相似度时出错: {e}") - return 0.0 - - def _preprocess_text(self, text: str) -> str: - """ - 预处理文本,提高匹配准确性 - - Args: - text: 原始文本 - - Returns: - str: 预处理后的文本 - """ - try: - # 转换为小写 - text = text.lower() - - # 移除标点符号和特殊字符 - text = re.sub(r'[^\w\s]', '', text) - - # 移除多余空格 - text = re.sub(r'\s+', ' ', text).strip() - - return text - - except Exception as e: - logger.error(f"预处理文本时出错: {e}") - return text - - def find_best_matching_memory(self, target_title: str, similarity_threshold: float = 0.9) -> tuple[str, str, float] | None: - """ - 查找最佳匹配的记忆 - - Args: - target_title: 目标标题 - similarity_threshold: 相似度阈值 - - Returns: - tuple[str, str, float] | None: 最佳匹配的记忆(title, content, similarity)或None - """ - try: - matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold) - - if matches: - best_match = matches[0] # 已经按相似度排序,第一个是最佳匹配 - logger.info(f"找到最佳匹配: '{best_match[0]}' (相似度: {best_match[2]:.3f})") - return best_match - else: - logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆") - return None - - except Exception as e: - logger.error(f"查找最佳匹配记忆时出错: {e}") - return None - - def check_title_exists_fuzzy(self, target_title: str, similarity_threshold: float = 0.9) -> bool: - """ - 检查标题是否已存在(模糊匹配) - - Args: - target_title: 目标标题 - similarity_threshold: 相似度阈值,默认0.8(较高阈值避免误判) - - Returns: - bool: 是否存在相似标题 - """ - try: - matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold) - exists = len(matches) > 0 - - if exists: - logger.info(f"发现相似标题: '{matches[0][0]}' (相似度: {matches[0][2]:.3f})") - else: - logger.debug("未发现相似标题") - - return exists - - except Exception as e: - logger.error(f"检查标题是否存在时出错: {e}") - return False - global_memory_chest = MemoryChest() \ No newline at end of file diff --git a/src/chat/memory_system/hippocampus_to_memory_chest_task.py b/src/chat/memory_system/hippocampus_to_memory_chest_task.py index b30e6d4d..6349a423 100644 --- a/src/chat/memory_system/hippocampus_to_memory_chest_task.py +++ b/src/chat/memory_system/hippocampus_to_memory_chest_task.py @@ -76,12 +76,12 @@ class HippocampusToMemoryChestTask(AsyncTask): break # 如果剩余节点不足10个,使用所有剩余节点 - if len(remaining_nodes) < 15: + if len(remaining_nodes) < 5: selected_nodes = remaining_nodes logger.info(f"[海马体转换] 第 {batch_num} 批:剩余节点不足10个({len(remaining_nodes)}个),使用所有剩余节点") else: # 随机选择10个节点 - selected_nodes = random.sample(remaining_nodes, 15) + selected_nodes = random.sample(remaining_nodes, 5) logger.info(f"[海马体转换] 第 {batch_num} 批:选择了 {len(selected_nodes)} 个节点") # 拼接节点内容 diff --git a/src/chat/memory_system/memory_management_task.py b/src/chat/memory_system/memory_management_task.py index 3b110826..52ec330e 100644 --- a/src/chat/memory_system/memory_management_task.py +++ b/src/chat/memory_system/memory_management_task.py @@ -60,10 +60,10 @@ class MemoryManagementTask(AsyncTask): return 3600 elif percentage < 0.7: # 大于等于50%,每300秒执行一次 - return 600 + return 1800 elif percentage < 0.9: # 大于等于70%,每120秒执行一次 - return 120 + return 300 elif percentage < 1.2: return 30 else: @@ -107,15 +107,15 @@ class MemoryManagementTask(AsyncTask): logger.info(f"[记忆管理] 随机选择的记忆标题: {selected_title}") # 执行choose_merge_target获取相关记忆内容 - related_contents = await global_memory_chest.choose_merge_target(selected_title) - if not related_contents: + related_contents_titles = await global_memory_chest.choose_merge_target(selected_title) + if not related_contents_titles: logger.warning("[记忆管理] 未找到相关记忆内容,跳过合并") return - logger.info(f"[记忆管理] 找到 {len(related_contents)} 条相关记忆") + logger.info(f"[记忆管理] 找到 {len(related_contents_titles)} 条相关记忆") # 执行merge_memory合并记忆 - merged_title, merged_content = await global_memory_chest.merge_memory(related_contents) + merged_title, merged_content = await global_memory_chest.merge_memory(related_contents_titles) if not merged_title or not merged_content: logger.warning("[记忆管理] 记忆合并失败,跳过删除") return @@ -123,7 +123,7 @@ class MemoryManagementTask(AsyncTask): logger.info(f"[记忆管理] 记忆合并成功,新标题: {merged_title}") # 删除原始记忆(包括选中的标题和相关的记忆) - deleted_count = self._delete_original_memories(selected_title, related_contents) + deleted_count = self._delete_original_memories(related_contents_titles) logger.info(f"[记忆管理] 已删除 {deleted_count} 条原始记忆") logger.info("[记忆管理] 记忆管理任务完成") @@ -147,20 +147,10 @@ class MemoryManagementTask(AsyncTask): logger.error(f"[记忆管理] 获取随机记忆标题时发生错误: {e}") return "" - def _delete_original_memories(self, selected_title: str, related_contents: List[str]) -> int: + def _delete_original_memories(self, related_contents: List[str]) -> int: """删除原始记忆""" try: deleted_count = 0 - - # 删除选中的标题对应的记忆 - try: - deleted = MemoryChestModel.delete().where(MemoryChestModel.title == selected_title).execute() - if deleted > 0: - deleted_count += deleted - logger.debug(f"[记忆管理] 删除选中记忆: {selected_title}") - except Exception as e: - logger.error(f"[记忆管理] 删除选中记忆时出错: {e}") - # 删除相关记忆(通过内容匹配) for content in related_contents: try: diff --git a/src/chat/memory_system/memory_utils.py b/src/chat/memory_system/memory_utils.py new file mode 100644 index 00000000..49060185 --- /dev/null +++ b/src/chat/memory_system/memory_utils.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- +""" +记忆系统工具函数 +包含模糊查找、相似度计算等工具函数 +""" +import re +from difflib import SequenceMatcher +from typing import List, Tuple, Optional + +from src.common.database.database_model import MemoryChest as MemoryChestModel +from src.common.logger import get_logger + +logger = get_logger("memory_utils") + + +def calculate_similarity(text1: str, text2: str) -> float: + """ + 计算两个文本的相似度 + + Args: + text1: 第一个文本 + text2: 第二个文本 + + Returns: + float: 相似度分数 (0-1) + """ + try: + # 预处理文本 + text1 = preprocess_text(text1) + text2 = preprocess_text(text2) + + # 使用SequenceMatcher计算相似度 + similarity = SequenceMatcher(None, text1, text2).ratio() + + # 如果其中一个文本包含另一个,提高相似度 + if text1 in text2 or text2 in text1: + similarity = max(similarity, 0.8) + + return similarity + + except Exception as e: + logger.error(f"计算相似度时出错: {e}") + return 0.0 + + +def preprocess_text(text: str) -> str: + """ + 预处理文本,提高匹配准确性 + + Args: + text: 原始文本 + + Returns: + str: 预处理后的文本 + """ + try: + # 转换为小写 + text = text.lower() + + # 移除标点符号和特殊字符 + text = re.sub(r'[^\w\s]', '', text) + + # 移除多余空格 + text = re.sub(r'\s+', ' ', text).strip() + + return text + + except Exception as e: + logger.error(f"预处理文本时出错: {e}") + return text + + +def fuzzy_find_memory_by_title(target_title: str, similarity_threshold: float = 0.9) -> List[Tuple[str, str, float]]: + """ + 根据标题模糊查找记忆 + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值,默认0.9 + + Returns: + List[Tuple[str, str, float]]: 匹配的记忆列表,每个元素为(title, content, similarity_score) + """ + try: + # 获取所有记忆 + all_memories = MemoryChestModel.select() + + matches = [] + for memory in all_memories: + similarity = calculate_similarity(target_title, memory.title) + if similarity >= similarity_threshold: + matches.append((memory.title, memory.content, similarity)) + + # 按相似度降序排序 + matches.sort(key=lambda x: x[2], reverse=True) + + logger.info(f"模糊查找标题 '{target_title}' 找到 {len(matches)} 个匹配项") + return matches + + except Exception as e: + logger.error(f"模糊查找记忆时出错: {e}") + return [] + + +def find_best_matching_memory(target_title: str, similarity_threshold: float = 0.9) -> Optional[Tuple[str, str, float]]: + """ + 查找最佳匹配的记忆 + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值 + + Returns: + Optional[Tuple[str, str, float]]: 最佳匹配的记忆(title, content, similarity)或None + """ + try: + matches = fuzzy_find_memory_by_title(target_title, similarity_threshold) + + if matches: + best_match = matches[0] # 已经按相似度排序,第一个是最佳匹配 + logger.info(f"找到最佳匹配: '{best_match[0]}' (相似度: {best_match[2]:.3f})") + return best_match + else: + logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆") + return None + + except Exception as e: + logger.error(f"查找最佳匹配记忆时出错: {e}") + return None + + +def check_title_exists_fuzzy(target_title: str, similarity_threshold: float = 0.9) -> bool: + """ + 检查标题是否已存在(模糊匹配) + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值,默认0.9(较高阈值避免误判) + + Returns: + bool: 是否存在相似标题 + """ + try: + matches = fuzzy_find_memory_by_title(target_title, similarity_threshold) + exists = len(matches) > 0 + + if exists: + logger.info(f"发现相似标题: '{matches[0][0]}' (相似度: {matches[0][2]:.3f})") + else: + logger.debug("未发现相似标题") + + return exists + + except Exception as e: + logger.error(f"检查标题是否存在时出错: {e}") + return False diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 75b0998c..97bba8ae 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -355,7 +355,7 @@ class DefaultReplyer: content = tool_result.get("content", "") result_type = tool_result.get("type", "tool_result") - tool_info_str += f"- 【{tool_name}】{result_type}: {content}\n" + tool_info_str += f"- 【{tool_name}】: {content}\n" tool_info_str += "以上是你获取到的实时信息,请在回复时参考这些信息。" logger.info(f"获取到 {len(tool_results)} 个工具结果") diff --git a/src/plugins/built_in/memory/build_memory.py b/src/plugins/built_in/memory/build_memory.py index f9ba2c9b..9158fcbc 100644 --- a/src/plugins/built_in/memory/build_memory.py +++ b/src/plugins/built_in/memory/build_memory.py @@ -36,7 +36,7 @@ class GetMemoryTool(BaseTool): answer = await global_memory_chest.get_answer_by_question(question=question) if not answer: - return {"content": f"没有找到相关记忆"} + return {"content": f"问题:{question},没有找到相关记忆"} return {"content": f"问题:{question},答案:{answer}"} @@ -80,7 +80,7 @@ class GetMemoryAction(BaseAction): action_done=True, ) - return False, f"没有找到相关记忆" + return False, f"问题:{question},没有找到相关记忆" await self.store_action_info( action_build_into_prompt=True,