From f3cbc6ed89376382565ccac978eae7686fa44493 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sun, 28 Sep 2025 14:33:12 +0800 Subject: [PATCH] =?UTF-8?q?fuix=EF=BC=9A=E6=A8=A1=E7=B3=8A=E5=8C=B9?= =?UTF-8?q?=E9=85=8D=E8=AE=B0=E5=BF=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/memory_system/Memory_chest.py | 176 ++++++++++++++++-- .../memory_system/memory_management_task.py | 8 +- 2 files changed, 165 insertions(+), 19 deletions(-) diff --git a/src/chat/memory_system/Memory_chest.py b/src/chat/memory_system/Memory_chest.py index d0435a97..5dc02145 100644 --- a/src/chat/memory_system/Memory_chest.py +++ b/src/chat/memory_system/Memory_chest.py @@ -1,6 +1,7 @@ import asyncio import json import re +from difflib import SequenceMatcher from src.llm_models.utils_model import LLMRequest from src.config.config import model_config @@ -249,14 +250,14 @@ class MemoryChest: titles = self.get_all_titles() selected_title = None - # 查找完全匹配的标题 - for t in titles: - if t == title: - selected_title = t - break - - - logger.info(f"记忆仓库选择标题: {selected_title}") + # 使用模糊查找匹配标题 + best_match = self.find_best_matching_memory(title, similarity_threshold=0.8) + if best_match: + selected_title = best_match[0] # 获取匹配的标题 + logger.info(f"记忆仓库选择标题: {selected_title} (相似度: {best_match[2]:.3f})") + else: + logger.warning(f"未找到相似度 >= 0.7 的标题匹配: {title}") + selected_title = None return selected_title @@ -381,7 +382,7 @@ class MemoryChest: # 根据标题查找对应的内容 selected_contents = self._get_memories_by_titles(selected_titles) - logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆") + logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆:{selected_titles}") return selected_contents except Exception as e: @@ -399,21 +400,19 @@ class MemoryChest: list[str]: 记忆内容列表 """ try: - from src.common.database.database_model import MemoryChest as MemoryChestModel - contents = [] for title in titles: if not title or not title.strip(): continue - # 在数据库中查找匹配的记忆 + # 使用模糊查找匹配记忆 try: - memory_record = MemoryChestModel.select().where(MemoryChestModel.title == title.strip()).first() - if memory_record: - contents.append(memory_record.content) - logger.debug(f"找到记忆: {memory_record.title}") + best_match = self.find_best_matching_memory(title.strip(), similarity_threshold=0.8) + if best_match: + contents.append(best_match[1]) # best_match[1] 是 content + logger.debug(f"找到记忆: {best_match[0]} (相似度: {best_match[2]:.3f})") else: - logger.warning(f"未找到标题为 '{title}' 的记忆") + logger.warning(f"未找到相似度 >= 0.8 的标题匹配: '{title}'") except Exception as e: logger.error(f"查找标题 '{title}' 的记忆时出错: {e}") continue @@ -550,6 +549,11 @@ class MemoryChest: title = title_response.strip().strip('"').strip("'").strip() if title: + # 检查是否存在相似标题 + if self.check_title_exists_fuzzy(title, similarity_threshold=0.9): + logger.warning(f"生成的标题 '{title}' 与现有标题相似,使用时间戳后缀") + title = f"{title}_{int(time.time())}" + logger.info(f"生成合并记忆标题: {title}") return title else: @@ -560,5 +564,143 @@ class MemoryChest: logger.error(f"生成合并记忆标题时出错: {e}") return f"合并记忆_{int(time.time())}" + def fuzzy_find_memory_by_title(self, target_title: str, similarity_threshold: float = 0.9) -> list[tuple[str, str, float]]: + """ + 根据标题模糊查找记忆 + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值,默认0.6 + + Returns: + list[tuple[str, str, float]]: 匹配的记忆列表,每个元素为(title, content, similarity_score) + """ + try: + # 获取所有记忆 + all_memories = MemoryChestModel.select() + + matches = [] + for memory in all_memories: + similarity = self._calculate_similarity(target_title, memory.title) + if similarity >= similarity_threshold: + matches.append((memory.title, memory.content, similarity)) + + # 按相似度降序排序 + matches.sort(key=lambda x: x[2], reverse=True) + + logger.info(f"模糊查找标题 '{target_title}' 找到 {len(matches)} 个匹配项") + return matches + + except Exception as e: + logger.error(f"模糊查找记忆时出错: {e}") + return [] + + def _calculate_similarity(self, text1: str, text2: str) -> float: + """ + 计算两个文本的相似度 + + Args: + text1: 第一个文本 + text2: 第二个文本 + + Returns: + float: 相似度分数 (0-1) + """ + try: + # 预处理文本 + text1 = self._preprocess_text(text1) + text2 = self._preprocess_text(text2) + + # 使用SequenceMatcher计算相似度 + similarity = SequenceMatcher(None, text1, text2).ratio() + + # 如果其中一个文本包含另一个,提高相似度 + if text1 in text2 or text2 in text1: + similarity = max(similarity, 0.8) + + return similarity + + except Exception as e: + logger.error(f"计算相似度时出错: {e}") + return 0.0 + + def _preprocess_text(self, text: str) -> str: + """ + 预处理文本,提高匹配准确性 + + Args: + text: 原始文本 + + Returns: + str: 预处理后的文本 + """ + try: + # 转换为小写 + text = text.lower() + + # 移除标点符号和特殊字符 + text = re.sub(r'[^\w\s]', '', text) + + # 移除多余空格 + text = re.sub(r'\s+', ' ', text).strip() + + return text + + except Exception as e: + logger.error(f"预处理文本时出错: {e}") + return text + + def find_best_matching_memory(self, target_title: str, similarity_threshold: float = 0.9) -> tuple[str, str, float] | None: + """ + 查找最佳匹配的记忆 + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值 + + Returns: + tuple[str, str, float] | None: 最佳匹配的记忆(title, content, similarity)或None + """ + try: + matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold) + + if matches: + best_match = matches[0] # 已经按相似度排序,第一个是最佳匹配 + logger.info(f"找到最佳匹配: '{best_match[0]}' (相似度: {best_match[2]:.3f})") + return best_match + else: + logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆") + return None + + except Exception as e: + logger.error(f"查找最佳匹配记忆时出错: {e}") + return None + + def check_title_exists_fuzzy(self, target_title: str, similarity_threshold: float = 0.9) -> bool: + """ + 检查标题是否已存在(模糊匹配) + + Args: + target_title: 目标标题 + similarity_threshold: 相似度阈值,默认0.8(较高阈值避免误判) + + Returns: + bool: 是否存在相似标题 + """ + try: + matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold) + exists = len(matches) > 0 + + if exists: + logger.info(f"发现相似标题: '{matches[0][0]}' (相似度: {matches[0][2]:.3f})") + else: + logger.debug("未发现相似标题") + + return exists + + except Exception as e: + logger.error(f"检查标题是否存在时出错: {e}") + return False + global_memory_chest = MemoryChest() \ No newline at end of file diff --git a/src/chat/memory_system/memory_management_task.py b/src/chat/memory_system/memory_management_task.py index df534f27..3b110826 100644 --- a/src/chat/memory_system/memory_management_task.py +++ b/src/chat/memory_system/memory_management_task.py @@ -61,10 +61,14 @@ class MemoryManagementTask(AsyncTask): elif percentage < 0.7: # 大于等于50%,每300秒执行一次 return 600 - else: + elif percentage < 0.9: # 大于等于70%,每120秒执行一次 return 120 - + elif percentage < 1.2: + return 30 + else: + return 10 + except Exception as e: logger.error(f"[记忆管理] 计算执行间隔时出错: {e}") return 300 # 默认300秒