better:优化记忆构建和合并

pull/1319/head
SengokuCola 2025-10-25 23:58:58 +08:00
parent 00c8144d49
commit c5b9bc4927
4 changed files with 131 additions and 58 deletions

View File

@ -18,7 +18,7 @@ from .memory_utils import (
find_best_matching_memory,
check_title_exists_fuzzy,
get_all_titles,
get_memory_titles_by_chat_id_weighted,
find_most_similar_memory_by_chat_id,
)
@ -430,75 +430,40 @@ class MemoryChest:
except Exception as e:
logger.error(f"保存记忆仓库内容时出错: {e}")
async def choose_merge_target(self, memory_title: str, chat_id: str = None) -> list[str]:
async def choose_merge_target(self, memory_title: str, chat_id: str = None) -> tuple[list[str], list[str]]:
"""
选择与给定记忆标题相关的记忆目标
选择与给定记忆标题相关的记忆目标基于文本相似度
Args:
memory_title: 要匹配的记忆标题
chat_id: 聊天ID用于加权抽样
chat_id: 聊天ID用于筛选同chat_id的记忆
Returns:
list[str]: 选中的记忆内容列表
tuple[list[str], list[str]]: (选中的记忆标题列表, 选中的记忆内容列表)
"""
try:
# 如果提供了chat_id使用加权抽样
all_titles = get_memory_titles_by_chat_id_weighted(chat_id)
# 剔除掉输入的 memory_title 本身
all_titles = [title for title in all_titles if title and title.strip() != (memory_title or "").strip()]
if not chat_id:
logger.warning("未提供chat_id无法进行记忆匹配")
return [], []
content = ""
display_index = 1
for title in all_titles:
content += f"{display_index}. {title}\n"
display_index += 1
# 使用相似度匹配查找最相似的记忆
similar_memory = find_most_similar_memory_by_chat_id(
target_title=memory_title,
target_chat_id=chat_id,
similarity_threshold=0.5 # 相似度阈值
)
prompt = f"""
所有记忆列表
{content}
请根据以上记忆列表选择一个与"{memory_title}"相关的记忆用json输出
如果没有相关记忆输出:
{{
"selected_title": ""
}}
可以选择多个相关的记忆但最多不超过5个
例如
{{
"selected_title": "选择的相关记忆标题"
}},
{{
"selected_title": "选择的相关记忆标题"
}},
{{
"selected_title": "选择的相关记忆标题"
}}
...
注意请返回原始标题本身作为 selected_title不要包含前面的序号或多余字符
请输出JSON格式不要输出其他内容
"""
# logger.info(f"选择合并目标 prompt: {prompt}")
if global_config.debug.show_prompt:
logger.info(f"选择合并目标 prompt: {prompt}")
if similar_memory:
selected_title, selected_content, similarity = similar_memory
logger.info(f"'{memory_title}' 找到相似记忆: '{selected_title}' (相似度: {similarity:.3f})")
return [selected_title], [selected_content]
else:
logger.debug(f"选择合并目标 prompt: {prompt}")
merge_target_response, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(prompt)
# 解析JSON响应
selected_titles = self._parse_merge_target_json(merge_target_response)
# 根据标题查找对应的内容
selected_contents = self._get_memories_by_titles(selected_titles)
logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆:{selected_titles}")
return selected_titles,selected_contents
logger.info(f"'{memory_title}' 未找到相似度 >= 0.7 的记忆")
return [], []
except Exception as e:
logger.error(f"选择合并目标时出错: {e}")
return []
return [], []
def _get_memories_by_titles(self, titles: list[str]) -> list[str]:
"""
@ -659,6 +624,11 @@ class MemoryChest:
合并记忆
"""
try:
# 在记忆整合前先清理空chat_id的记忆
cleaned_count = self.cleanup_empty_chat_id_memories()
if cleaned_count > 0:
logger.info(f"记忆整合前清理了 {cleaned_count} 条空chat_id记忆")
content = ""
for memory in memory_list:
content += f"{memory}\n"
@ -793,5 +763,37 @@ MutePlugin 是禁言插件的名称
logger.error(f"生成合并记忆标题时出错: {e}")
return f"合并记忆_{int(time.time())}"
def cleanup_empty_chat_id_memories(self) -> int:
"""
清理chat_id为空的记忆记录
Returns:
int: 被清理的记忆数量
"""
try:
# 查找所有chat_id为空的记忆
empty_chat_id_memories = MemoryChestModel.select().where(
(MemoryChestModel.chat_id.is_null()) |
(MemoryChestModel.chat_id == "") |
(MemoryChestModel.chat_id == "None")
)
count = 0
for memory in empty_chat_id_memories:
logger.info(f"清理空chat_id记忆: 标题='{memory.title}', ID={memory.id}")
memory.delete_instance()
count += 1
if count > 0:
logger.info(f"已清理 {count} 条chat_id为空的记忆记录")
else:
logger.debug("未发现需要清理的空chat_id记忆记录")
return count
except Exception as e:
logger.error(f"清理空chat_id记忆时出错: {e}")
return 0
global_memory_chest = MemoryChest()

View File

@ -109,7 +109,7 @@ class MemoryManagementTask(AsyncTask):
logger.info("无合适合并内容,跳过本次合并")
return
logger.info(f"为 [{selected_title}] 找到 {len(related_contents)} 条相关记忆:{related_titles}")
logger.info(f"{selected_chat_id} 为 [{selected_title}] 找到 {len(related_contents)} 条相关记忆:{related_titles}")
# 执行merge_memory合并记忆
merged_title, merged_content = await global_memory_chest.merge_memory(related_contents,selected_chat_id)

View File

@ -303,4 +303,55 @@ def get_memory_titles_by_chat_id_weighted(target_chat_id: str, same_chat_weight:
except Exception as e:
logger.error(f"按chat_id加权抽样记忆标题时出错: {e}")
return []
return []
def find_most_similar_memory_by_chat_id(target_title: str, target_chat_id: str, similarity_threshold: float = 0.5) -> Optional[Tuple[str, str, float]]:
"""
在指定chat_id的记忆中查找最相似的记忆
Args:
target_title: 目标标题
target_chat_id: 目标聊天ID
similarity_threshold: 相似度阈值默认0.7
Returns:
Optional[Tuple[str, str, float]]: 最相似的记忆(title, content, similarity)或None
"""
try:
# 获取指定chat_id的所有记忆
same_chat_memories = []
for memory in MemoryChestModel.select():
if memory.title and not memory.locked and memory.chat_id == target_chat_id:
same_chat_memories.append((memory.title, memory.content))
if not same_chat_memories:
logger.warning(f"未找到chat_id为 '{target_chat_id}' 的记忆")
return None
# 计算相似度并找到最佳匹配
best_match = None
best_similarity = 0.0
for title, content in same_chat_memories:
# 跳过目标标题本身
if title.strip() == target_title.strip():
continue
similarity = calculate_similarity(target_title, title)
if similarity > best_similarity:
best_similarity = similarity
best_match = (title, content, similarity)
# 检查是否超过阈值
if best_match and best_similarity >= similarity_threshold:
logger.info(f"找到最相似记忆: '{best_match[0]}' (相似度: {best_similarity:.3f})")
return best_match
else:
logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆,最高相似度: {best_similarity:.3f}")
return None
except Exception as e:
logger.error(f"查找最相似记忆时出错: {e}")
return None

View File

@ -146,6 +146,20 @@ class GetMemoryTool(BaseTool):
"""从聊天记录中获取问题的答案"""
try:
# 确定时间范围
print(f"time_point: {time_point}, time_range: {time_range}")
# 检查time_range的两个时间值是否相同如果相同则按照time_point处理
if time_range and not time_point:
try:
start_timestamp, end_timestamp = parse_time_range(time_range)
if start_timestamp == end_timestamp:
# 两个时间值相同按照time_point处理
time_point = time_range.split(" - ")[0].strip()
time_range = None
print(f"time_range两个值相同按照time_point处理: {time_point}")
except Exception as e:
logger.warning(f"解析time_range失败: {e}")
if time_point:
# 时间点搜索前后25条记录
target_timestamp = parse_datetime_to_timestamp(time_point)
@ -204,12 +218,18 @@ class GetMemoryTool(BaseTool):
答案"""
print(f"analysis_prompt: {analysis_prompt}")
response, (reasoning, model_name, tool_calls) = await llm_request.generate_response_async(
prompt=analysis_prompt,
temperature=0.3,
max_tokens=256
)
print(f"response: {response}")
if "无有效信息" in response:
return ""