mirror of https://github.com/Mai-with-u/MaiBot.git
better:优化记忆构建和合并
parent
00c8144d49
commit
c5b9bc4927
|
|
@ -18,7 +18,7 @@ from .memory_utils import (
|
|||
find_best_matching_memory,
|
||||
check_title_exists_fuzzy,
|
||||
get_all_titles,
|
||||
get_memory_titles_by_chat_id_weighted,
|
||||
find_most_similar_memory_by_chat_id,
|
||||
|
||||
)
|
||||
|
||||
|
|
@ -430,75 +430,40 @@ class MemoryChest:
|
|||
except Exception as e:
|
||||
logger.error(f"保存记忆仓库内容时出错: {e}")
|
||||
|
||||
async def choose_merge_target(self, memory_title: str, chat_id: str = None) -> list[str]:
|
||||
async def choose_merge_target(self, memory_title: str, chat_id: str = None) -> tuple[list[str], list[str]]:
|
||||
"""
|
||||
选择与给定记忆标题相关的记忆目标
|
||||
选择与给定记忆标题相关的记忆目标(基于文本相似度)
|
||||
|
||||
Args:
|
||||
memory_title: 要匹配的记忆标题
|
||||
chat_id: 聊天ID,用于加权抽样
|
||||
chat_id: 聊天ID,用于筛选同chat_id的记忆
|
||||
|
||||
Returns:
|
||||
list[str]: 选中的记忆内容列表
|
||||
tuple[list[str], list[str]]: (选中的记忆标题列表, 选中的记忆内容列表)
|
||||
"""
|
||||
try:
|
||||
# 如果提供了chat_id,使用加权抽样
|
||||
all_titles = get_memory_titles_by_chat_id_weighted(chat_id)
|
||||
# 剔除掉输入的 memory_title 本身
|
||||
all_titles = [title for title in all_titles if title and title.strip() != (memory_title or "").strip()]
|
||||
if not chat_id:
|
||||
logger.warning("未提供chat_id,无法进行记忆匹配")
|
||||
return [], []
|
||||
|
||||
content = ""
|
||||
display_index = 1
|
||||
for title in all_titles:
|
||||
content += f"{display_index}. {title}\n"
|
||||
display_index += 1
|
||||
# 使用相似度匹配查找最相似的记忆
|
||||
similar_memory = find_most_similar_memory_by_chat_id(
|
||||
target_title=memory_title,
|
||||
target_chat_id=chat_id,
|
||||
similarity_threshold=0.5 # 相似度阈值
|
||||
)
|
||||
|
||||
prompt = f"""
|
||||
所有记忆列表
|
||||
{content}
|
||||
|
||||
请根据以上记忆列表,选择一个与"{memory_title}"相关的记忆,用json输出:
|
||||
如果没有相关记忆,输出:
|
||||
{{
|
||||
"selected_title": ""
|
||||
}}
|
||||
可以选择多个相关的记忆,但最多不超过5个
|
||||
例如:
|
||||
{{
|
||||
"selected_title": "选择的相关记忆标题"
|
||||
}},
|
||||
{{
|
||||
"selected_title": "选择的相关记忆标题"
|
||||
}},
|
||||
{{
|
||||
"selected_title": "选择的相关记忆标题"
|
||||
}}
|
||||
...
|
||||
注意:请返回原始标题本身作为 selected_title,不要包含前面的序号或多余字符。
|
||||
请输出JSON格式,不要输出其他内容:
|
||||
"""
|
||||
|
||||
# logger.info(f"选择合并目标 prompt: {prompt}")
|
||||
|
||||
if global_config.debug.show_prompt:
|
||||
logger.info(f"选择合并目标 prompt: {prompt}")
|
||||
if similar_memory:
|
||||
selected_title, selected_content, similarity = similar_memory
|
||||
logger.info(f"为 '{memory_title}' 找到相似记忆: '{selected_title}' (相似度: {similarity:.3f})")
|
||||
return [selected_title], [selected_content]
|
||||
else:
|
||||
logger.debug(f"选择合并目标 prompt: {prompt}")
|
||||
|
||||
merge_target_response, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(prompt)
|
||||
|
||||
# 解析JSON响应
|
||||
selected_titles = self._parse_merge_target_json(merge_target_response)
|
||||
|
||||
# 根据标题查找对应的内容
|
||||
selected_contents = self._get_memories_by_titles(selected_titles)
|
||||
|
||||
logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆:{selected_titles}")
|
||||
return selected_titles,selected_contents
|
||||
logger.info(f"为 '{memory_title}' 未找到相似度 >= 0.7 的记忆")
|
||||
return [], []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"选择合并目标时出错: {e}")
|
||||
return []
|
||||
return [], []
|
||||
|
||||
def _get_memories_by_titles(self, titles: list[str]) -> list[str]:
|
||||
"""
|
||||
|
|
@ -659,6 +624,11 @@ class MemoryChest:
|
|||
合并记忆
|
||||
"""
|
||||
try:
|
||||
# 在记忆整合前先清理空chat_id的记忆
|
||||
cleaned_count = self.cleanup_empty_chat_id_memories()
|
||||
if cleaned_count > 0:
|
||||
logger.info(f"记忆整合前清理了 {cleaned_count} 条空chat_id记忆")
|
||||
|
||||
content = ""
|
||||
for memory in memory_list:
|
||||
content += f"{memory}\n"
|
||||
|
|
@ -793,5 +763,37 @@ MutePlugin 是禁言插件的名称
|
|||
logger.error(f"生成合并记忆标题时出错: {e}")
|
||||
return f"合并记忆_{int(time.time())}"
|
||||
|
||||
def cleanup_empty_chat_id_memories(self) -> int:
|
||||
"""
|
||||
清理chat_id为空的记忆记录
|
||||
|
||||
Returns:
|
||||
int: 被清理的记忆数量
|
||||
"""
|
||||
try:
|
||||
# 查找所有chat_id为空的记忆
|
||||
empty_chat_id_memories = MemoryChestModel.select().where(
|
||||
(MemoryChestModel.chat_id.is_null()) |
|
||||
(MemoryChestModel.chat_id == "") |
|
||||
(MemoryChestModel.chat_id == "None")
|
||||
)
|
||||
|
||||
count = 0
|
||||
for memory in empty_chat_id_memories:
|
||||
logger.info(f"清理空chat_id记忆: 标题='{memory.title}', ID={memory.id}")
|
||||
memory.delete_instance()
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
logger.info(f"已清理 {count} 条chat_id为空的记忆记录")
|
||||
else:
|
||||
logger.debug("未发现需要清理的空chat_id记忆记录")
|
||||
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"清理空chat_id记忆时出错: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
global_memory_chest = MemoryChest()
|
||||
|
|
@ -109,7 +109,7 @@ class MemoryManagementTask(AsyncTask):
|
|||
logger.info("无合适合并内容,跳过本次合并")
|
||||
return
|
||||
|
||||
logger.info(f"为 [{selected_title}] 找到 {len(related_contents)} 条相关记忆:{related_titles}")
|
||||
logger.info(f"{selected_chat_id} 为 [{selected_title}] 找到 {len(related_contents)} 条相关记忆:{related_titles}")
|
||||
|
||||
# 执行merge_memory合并记忆
|
||||
merged_title, merged_content = await global_memory_chest.merge_memory(related_contents,selected_chat_id)
|
||||
|
|
|
|||
|
|
@ -303,4 +303,55 @@ def get_memory_titles_by_chat_id_weighted(target_chat_id: str, same_chat_weight:
|
|||
|
||||
except Exception as e:
|
||||
logger.error(f"按chat_id加权抽样记忆标题时出错: {e}")
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
def find_most_similar_memory_by_chat_id(target_title: str, target_chat_id: str, similarity_threshold: float = 0.5) -> Optional[Tuple[str, str, float]]:
|
||||
"""
|
||||
在指定chat_id的记忆中查找最相似的记忆
|
||||
|
||||
Args:
|
||||
target_title: 目标标题
|
||||
target_chat_id: 目标聊天ID
|
||||
similarity_threshold: 相似度阈值,默认0.7
|
||||
|
||||
Returns:
|
||||
Optional[Tuple[str, str, float]]: 最相似的记忆(title, content, similarity)或None
|
||||
"""
|
||||
try:
|
||||
# 获取指定chat_id的所有记忆
|
||||
same_chat_memories = []
|
||||
for memory in MemoryChestModel.select():
|
||||
if memory.title and not memory.locked and memory.chat_id == target_chat_id:
|
||||
same_chat_memories.append((memory.title, memory.content))
|
||||
|
||||
if not same_chat_memories:
|
||||
logger.warning(f"未找到chat_id为 '{target_chat_id}' 的记忆")
|
||||
return None
|
||||
|
||||
# 计算相似度并找到最佳匹配
|
||||
best_match = None
|
||||
best_similarity = 0.0
|
||||
|
||||
for title, content in same_chat_memories:
|
||||
# 跳过目标标题本身
|
||||
if title.strip() == target_title.strip():
|
||||
continue
|
||||
|
||||
similarity = calculate_similarity(target_title, title)
|
||||
|
||||
if similarity > best_similarity:
|
||||
best_similarity = similarity
|
||||
best_match = (title, content, similarity)
|
||||
|
||||
# 检查是否超过阈值
|
||||
if best_match and best_similarity >= similarity_threshold:
|
||||
logger.info(f"找到最相似记忆: '{best_match[0]}' (相似度: {best_similarity:.3f})")
|
||||
return best_match
|
||||
else:
|
||||
logger.info(f"未找到相似度 >= {similarity_threshold} 的记忆,最高相似度: {best_similarity:.3f}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查找最相似记忆时出错: {e}")
|
||||
return None
|
||||
|
|
@ -146,6 +146,20 @@ class GetMemoryTool(BaseTool):
|
|||
"""从聊天记录中获取问题的答案"""
|
||||
try:
|
||||
# 确定时间范围
|
||||
print(f"time_point: {time_point}, time_range: {time_range}")
|
||||
|
||||
# 检查time_range的两个时间值是否相同,如果相同则按照time_point处理
|
||||
if time_range and not time_point:
|
||||
try:
|
||||
start_timestamp, end_timestamp = parse_time_range(time_range)
|
||||
if start_timestamp == end_timestamp:
|
||||
# 两个时间值相同,按照time_point处理
|
||||
time_point = time_range.split(" - ")[0].strip()
|
||||
time_range = None
|
||||
print(f"time_range两个值相同,按照time_point处理: {time_point}")
|
||||
except Exception as e:
|
||||
logger.warning(f"解析time_range失败: {e}")
|
||||
|
||||
if time_point:
|
||||
# 时间点:搜索前后25条记录
|
||||
target_timestamp = parse_datetime_to_timestamp(time_point)
|
||||
|
|
@ -204,12 +218,18 @@ class GetMemoryTool(BaseTool):
|
|||
|
||||
答案:"""
|
||||
|
||||
print(f"analysis_prompt: {analysis_prompt}")
|
||||
|
||||
|
||||
response, (reasoning, model_name, tool_calls) = await llm_request.generate_response_async(
|
||||
prompt=analysis_prompt,
|
||||
temperature=0.3,
|
||||
max_tokens=256
|
||||
)
|
||||
|
||||
|
||||
print(f"response: {response}")
|
||||
|
||||
if "无有效信息" in response:
|
||||
return ""
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue