fuix：模糊匹配记忆

2025-09-28 14:33:12 +08:00 · 2025-09-28 14:33:12 +08:00 · f3cbc6ed89
parent f4b977fa20
commit f3cbc6ed89
2 changed files with 165 additions and 19 deletions
--- a/src/chat/memory_system/Memory_chest.py
+++ b/src/chat/memory_system/Memory_chest.py
@ -1,6 +1,7 @@
 import asyncio
 import json
 import re
+from difflib import SequenceMatcher

 from src.llm_models.utils_model import LLMRequest
 from src.config.config import model_config
@ -249,14 +250,14 @@ class MemoryChest:
        titles = self.get_all_titles()
        selected_title = None

-        # 查找完全匹配的标题
-        for t in titles:
-            if t == title:
-                selected_title = t
-                break
-
-                    
-        logger.info(f"记忆仓库选择标题: {selected_title}")
+        # 使用模糊查找匹配标题
+        best_match = self.find_best_matching_memory(title, similarity_threshold=0.8)
+        if best_match:
+            selected_title = best_match[0]  # 获取匹配的标题
+            logger.info(f"记忆仓库选择标题: {selected_title} (相似度: {best_match[2]:.3f})")
+        else:
+            logger.warning(f"未找到相似度 >= 0.7 的标题匹配: {title}")
+            selected_title = None

        return selected_title

@ -381,7 +382,7 @@ class MemoryChest:
            # 根据标题查找对应的内容
            selected_contents = self._get_memories_by_titles(selected_titles)
            
-            logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆")
+            logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆:{selected_titles}")
            return selected_contents
            
        except Exception as e:
@ -399,21 +400,19 @@ class MemoryChest:
            list[str]: 记忆内容列表
        """
        try:
-            from src.common.database.database_model import MemoryChest as MemoryChestModel
-            
            contents = []
            for title in titles:
                if not title or not title.strip():
                    continue
                    
-                # 在数据库中查找匹配的记忆
+                # 使用模糊查找匹配记忆
                try:
-                    memory_record = MemoryChestModel.select().where(MemoryChestModel.title == title.strip()).first()
-                    if memory_record:
-                        contents.append(memory_record.content)
-                        logger.debug(f"找到记忆: {memory_record.title}")
+                    best_match = self.find_best_matching_memory(title.strip(), similarity_threshold=0.8)
+                    if best_match:
+                        contents.append(best_match[1])  # best_match[1] 是 content
+                        logger.debug(f"找到记忆: {best_match[0]} (相似度: {best_match[2]:.3f})")
                    else:
-                        logger.warning(f"未找到标题为 '{title}' 的记忆")
+                        logger.warning(f"未找到相似度 >= 0.8 的标题匹配: '{title}'")
                except Exception as e:
                    logger.error(f"查找标题 '{title}' 的记忆时出错: {e}")
                    continue
@ -550,6 +549,11 @@ class MemoryChest:
            title = title_response.strip().strip('"').strip("'").strip()
            
            if title:
+                # 检查是否存在相似标题
+                if self.check_title_exists_fuzzy(title, similarity_threshold=0.9):
+                    logger.warning(f"生成的标题 '{title}' 与现有标题相似，使用时间戳后缀")
+                    title = f"{title}_{int(time.time())}"
+                
                logger.info(f"生成合并记忆标题: {title}")
                return title
            else:
@ -560,5 +564,143 @@ class MemoryChest:
            logger.error(f"生成合并记忆标题时出错: {e}")
            return f"合并记忆_{int(time.time())}"
    
+    def fuzzy_find_memory_by_title(self, target_title: str, similarity_threshold: float = 0.9) -> list[tuple[str, str, float]]:
+        """
+        根据标题模糊查找记忆
+        
+        Args:
+            target_title: 目标标题
+            similarity_threshold: 相似度阈值，默认0.6
+            
+        Returns:
+            list[tuple[str, str, float]]: 匹配的记忆列表，每个元素为(title, content, similarity_score)
+        """
+        try:
+            # 获取所有记忆
+            all_memories = MemoryChestModel.select()
+            
+            matches = []
+            for memory in all_memories:
+                similarity = self._calculate_similarity(target_title, memory.title)
+                if similarity >= similarity_threshold:
+                    matches.append((memory.title, memory.content, similarity))
+            
+            # 按相似度降序排序
+            matches.sort(key=lambda x: x[2], reverse=True)
+            
+            logger.info(f"模糊查找标题 '{target_title}' 找到 {len(matches)} 个匹配项")
+            return matches
+            
+        except Exception as e:
+            logger.error(f"模糊查找记忆时出错: {e}")
+            return []
+    
+    def _calculate_similarity(self, text1: str, text2: str) -> float:
+        """
+        计算两个文本的相似度
+        
+        Args:
+            text1: 第一个文本
+            text2: 第二个文本
+            
+        Returns:
+            float: 相似度分数 (0-1)
+        """
+        try:
+            # 预处理文本
+            text1 = self._preprocess_text(text1)
+            text2 = self._preprocess_text(text2)
+            
+            # 使用SequenceMatcher计算相似度
+            similarity = SequenceMatcher(None, text1, text2).ratio()
+            
+            # 如果其中一个文本包含另一个，提高相似度
+            if text1 in text2 or text2 in text1:
+                similarity = max(similarity, 0.8)
+            
+            return similarity
+            
+        except Exception as e:
+            logger.error(f"计算相似度时出错: {e}")
+            return 0.0
+    
+    def _preprocess_text(self, text: str) -> str:
+        """
+        预处理文本，提高匹配准确性
+        
+        Args:
+            text: 原始文本
+            
+        Returns:
+            str: 预处理后的文本
+        """
+        try:
+            # 转换为小写
+            text = text.lower()
+            
+            # 移除标点符号和特殊字符
+            text = re.sub(r'[^\w\s]', '', text)
+            
+            # 移除多余空格
+            text = re.sub(r'\s+', ' ', text).strip()
+            
+            return text
+            
+        except Exception as e:
+            logger.error(f"预处理文本时出错: {e}")
+            return text
+    
+    def find_best_matching_memory(self, target_title: str, similarity_threshold: float = 0.9) -> tuple[str, str, float] | None:
+        """
+        查找最佳匹配的记忆
+        
+        Args:
+            target_title: 目标标题
+            similarity_threshold: 相似度阈值
+            
+        Returns:
+            tuple[str, str, float] | None: 最佳匹配的记忆(title, content, similarity)或None
+        """
+        try:
+            matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold)
+            
+            if matches:
+                best_match = matches[0]  # 已经按相似度排序，第一个是最佳匹配
+                logger.info(f"找到最佳匹配: '{best_match[0]}' (相似度: {best_match[2]:.3f})")
+                return best_match
+            else:
+                logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆")
+                return None
+                
+        except Exception as e:
+            logger.error(f"查找最佳匹配记忆时出错: {e}")
+            return None
+    
+    def check_title_exists_fuzzy(self, target_title: str, similarity_threshold: float = 0.9) -> bool:
+        """
+        检查标题是否已存在（模糊匹配）
+        
+        Args:
+            target_title: 目标标题
+            similarity_threshold: 相似度阈值，默认0.8（较高阈值避免误判）
+            
+        Returns:
+            bool: 是否存在相似标题
+        """
+        try:
+            matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold)
+            exists = len(matches) > 0
+            
+            if exists:
+                logger.info(f"发现相似标题: '{matches[0][0]}' (相似度: {matches[0][2]:.3f})")
+            else:
+                logger.debug("未发现相似标题")
+                
+            return exists
+            
+        except Exception as e:
+            logger.error(f"检查标题是否存在时出错: {e}")
+            return False
+    
    
 global_memory_chest = MemoryChest()
--- a/src/chat/memory_system/memory_management_task.py
+++ b/src/chat/memory_system/memory_management_task.py
@ -61,10 +61,14 @@ class MemoryManagementTask(AsyncTask):
            elif percentage < 0.7:
                # 大于等于50%，每300秒执行一次
                return 600
-            else:
+            elif percentage < 0.9:
                # 大于等于70%，每120秒执行一次
                return 120
-                
+            elif percentage < 1.2:
+                return 30
+            else:
+                return 10
+            
        except Exception as e:
            logger.error(f"[记忆管理] 计算执行间隔时出错: {e}")
            return 300  # 默认300秒