fuix:模糊匹配记忆

pull/1273/head
SengokuCola 2025-09-28 14:33:12 +08:00
parent f4b977fa20
commit f3cbc6ed89
2 changed files with 165 additions and 19 deletions

View File

@ -1,6 +1,7 @@
import asyncio
import json
import re
from difflib import SequenceMatcher
from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
@ -249,14 +250,14 @@ class MemoryChest:
titles = self.get_all_titles()
selected_title = None
# 查找完全匹配标题
for t in titles:
if t == title:
selected_title = t
break
logger.info(f"记忆仓库选择标题: {selected_title}")
# 使用模糊查找匹配标题
best_match = self.find_best_matching_memory(title, similarity_threshold=0.8)
if best_match:
selected_title = best_match[0] # 获取匹配的标题
logger.info(f"记忆仓库选择标题: {selected_title} (相似度: {best_match[2]:.3f})")
else:
logger.warning(f"未找到相似度 >= 0.7 的标题匹配: {title}")
selected_title = None
return selected_title
@ -381,7 +382,7 @@ class MemoryChest:
# 根据标题查找对应的内容
selected_contents = self._get_memories_by_titles(selected_titles)
logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆")
logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆:{selected_titles}")
return selected_contents
except Exception as e:
@ -399,21 +400,19 @@ class MemoryChest:
list[str]: 记忆内容列表
"""
try:
from src.common.database.database_model import MemoryChest as MemoryChestModel
contents = []
for title in titles:
if not title or not title.strip():
continue
# 在数据库中查找匹配的记忆
# 使用模糊查找匹配记忆
try:
memory_record = MemoryChestModel.select().where(MemoryChestModel.title == title.strip()).first()
if memory_record:
contents.append(memory_record.content)
logger.debug(f"找到记忆: {memory_record.title}")
best_match = self.find_best_matching_memory(title.strip(), similarity_threshold=0.8)
if best_match:
contents.append(best_match[1]) # best_match[1] 是 content
logger.debug(f"找到记忆: {best_match[0]} (相似度: {best_match[2]:.3f})")
else:
logger.warning(f"未找到标题为 '{title}' 的记忆")
logger.warning(f"未找到相似度 >= 0.8 的标题匹配: '{title}'")
except Exception as e:
logger.error(f"查找标题 '{title}' 的记忆时出错: {e}")
continue
@ -550,6 +549,11 @@ class MemoryChest:
title = title_response.strip().strip('"').strip("'").strip()
if title:
# 检查是否存在相似标题
if self.check_title_exists_fuzzy(title, similarity_threshold=0.9):
logger.warning(f"生成的标题 '{title}' 与现有标题相似,使用时间戳后缀")
title = f"{title}_{int(time.time())}"
logger.info(f"生成合并记忆标题: {title}")
return title
else:
@ -560,5 +564,143 @@ class MemoryChest:
logger.error(f"生成合并记忆标题时出错: {e}")
return f"合并记忆_{int(time.time())}"
def fuzzy_find_memory_by_title(self, target_title: str, similarity_threshold: float = 0.9) -> list[tuple[str, str, float]]:
"""
根据标题模糊查找记忆
Args:
target_title: 目标标题
similarity_threshold: 相似度阈值默认0.6
Returns:
list[tuple[str, str, float]]: 匹配的记忆列表每个元素为(title, content, similarity_score)
"""
try:
# 获取所有记忆
all_memories = MemoryChestModel.select()
matches = []
for memory in all_memories:
similarity = self._calculate_similarity(target_title, memory.title)
if similarity >= similarity_threshold:
matches.append((memory.title, memory.content, similarity))
# 按相似度降序排序
matches.sort(key=lambda x: x[2], reverse=True)
logger.info(f"模糊查找标题 '{target_title}' 找到 {len(matches)} 个匹配项")
return matches
except Exception as e:
logger.error(f"模糊查找记忆时出错: {e}")
return []
def _calculate_similarity(self, text1: str, text2: str) -> float:
"""
计算两个文本的相似度
Args:
text1: 第一个文本
text2: 第二个文本
Returns:
float: 相似度分数 (0-1)
"""
try:
# 预处理文本
text1 = self._preprocess_text(text1)
text2 = self._preprocess_text(text2)
# 使用SequenceMatcher计算相似度
similarity = SequenceMatcher(None, text1, text2).ratio()
# 如果其中一个文本包含另一个,提高相似度
if text1 in text2 or text2 in text1:
similarity = max(similarity, 0.8)
return similarity
except Exception as e:
logger.error(f"计算相似度时出错: {e}")
return 0.0
def _preprocess_text(self, text: str) -> str:
"""
预处理文本提高匹配准确性
Args:
text: 原始文本
Returns:
str: 预处理后的文本
"""
try:
# 转换为小写
text = text.lower()
# 移除标点符号和特殊字符
text = re.sub(r'[^\w\s]', '', text)
# 移除多余空格
text = re.sub(r'\s+', ' ', text).strip()
return text
except Exception as e:
logger.error(f"预处理文本时出错: {e}")
return text
def find_best_matching_memory(self, target_title: str, similarity_threshold: float = 0.9) -> tuple[str, str, float] | None:
"""
查找最佳匹配的记忆
Args:
target_title: 目标标题
similarity_threshold: 相似度阈值
Returns:
tuple[str, str, float] | None: 最佳匹配的记忆(title, content, similarity)或None
"""
try:
matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold)
if matches:
best_match = matches[0] # 已经按相似度排序,第一个是最佳匹配
logger.info(f"找到最佳匹配: '{best_match[0]}' (相似度: {best_match[2]:.3f})")
return best_match
else:
logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆")
return None
except Exception as e:
logger.error(f"查找最佳匹配记忆时出错: {e}")
return None
def check_title_exists_fuzzy(self, target_title: str, similarity_threshold: float = 0.9) -> bool:
"""
检查标题是否已存在模糊匹配
Args:
target_title: 目标标题
similarity_threshold: 相似度阈值默认0.8较高阈值避免误判
Returns:
bool: 是否存在相似标题
"""
try:
matches = self.fuzzy_find_memory_by_title(target_title, similarity_threshold)
exists = len(matches) > 0
if exists:
logger.info(f"发现相似标题: '{matches[0][0]}' (相似度: {matches[0][2]:.3f})")
else:
logger.debug("未发现相似标题")
return exists
except Exception as e:
logger.error(f"检查标题是否存在时出错: {e}")
return False
global_memory_chest = MemoryChest()

View File

@ -61,10 +61,14 @@ class MemoryManagementTask(AsyncTask):
elif percentage < 0.7:
# 大于等于50%每300秒执行一次
return 600
else:
elif percentage < 0.9:
# 大于等于70%每120秒执行一次
return 120
elif percentage < 1.2:
return 30
else:
return 10
except Exception as e:
logger.error(f"[记忆管理] 计算执行间隔时出错: {e}")
return 300 # 默认300秒