From 1c956d414983ceab0460e81a836f7a96badc7cb0 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 2 May 2025 01:16:23 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=E5=90=8C=E6=AD=A5hfc=E7=9A=84=E8=AE=B0?= =?UTF-8?q?=E5=BF=86=E7=9F=A5=E8=AF=86=E6=A3=80=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/action_planner.py | 234 +++++++++++++++++++++++-- src/plugins/PFC/reply_generator.py | 265 ++++++++++++++++++++++++++++- 2 files changed, 483 insertions(+), 16 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 4770c6ce..a86ddf0a 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -1,5 +1,11 @@ import time -from typing import Tuple, Optional # 增加了 Optional +from typing import Tuple, Optional, Union # 增加了 Optional +from src.plugins.memory_system.Hippocampus import HippocampusManager +from src.plugins.knowledge.knowledge_lib import qa_manager +from src.common.database import db +from src.plugins.chat.utils import get_embedding +# import jieba # 如果需要旧版知识库的回退,可能需要 +# import re # 如果需要旧版知识库的回退,可能需要 from src.common.logger_manager import get_logger from ..models.utils_model import LLMRequest from ...config.config import global_config @@ -21,20 +27,21 @@ PROMPT_INITIAL_REPLY = """{persona_text}。现在你在参与一场QQ私聊, 【当前对话目标】 {goals_str} -{knowledge_info_str} - 【最近行动历史概要】 {action_history_summary} +【你想起来的相关知识】 +{retrieved_knowledge_str} 【上一次行动的详细情况和结果】 {last_action_context} 【时间和超时提示】 -{time_since_last_bot_message_info}{timeout_context} +{time_since_last_bot_message_info}{timeout_context} 【最近的对话记录】(包括你已成功发送的消息 和 新收到的消息) {chat_history_text} +【你的的回忆】 +{retrieved_memory_str} ------ 可选行动类型以及解释: -fetch_knowledge: 需要调取知识或记忆,当需要专业知识或特定信息时选择,对方若提到你不太认识的人名或实体也可以尝试选择 listening: 倾听对方发言,当你认为对方话才说到一半,发言明显未结束时选择 direct_reply: 直接回复对方 rethink_goal: 思考一个对话目标,当你觉得目前对话需要目标,或当前目标不再适用,或话题卡住时选择。注意私聊的环境是灵活的,有可能需要经常选择 @@ -54,20 +61,20 @@ PROMPT_FOLLOW_UP = """{persona_text}。现在你在参与一场QQ私聊,刚刚 【当前对话目标】 {goals_str} -{knowledge_info_str} - 【最近行动历史概要】 {action_history_summary} +【你想起来的相关知识】 +{retrieved_knowledge_str} 【上一次行动的详细情况和结果】 {last_action_context} 【时间和超时提示】 {time_since_last_bot_message_info}{timeout_context} 【最近的对话记录】(包括你已成功发送的消息 和 新收到的消息) {chat_history_text} - +【你的的回忆】 +{retrieved_memory_str} ------ 可选行动类型以及解释: -fetch_knowledge: 需要调取知识,当需要专业知识或特定信息时选择,对方若提到你不太认识的人名或实体也可以尝试选择 wait: 暂时不说话,留给对方交互空间,等待对方回复(尤其是在你刚发言后、或上次发言因重复、发言过多被拒时、或不确定做什么时,这是不错的选择) listening: 倾听对方发言(虽然你刚发过言,但如果对方立刻回复且明显话没说完,可以选择这个) send_new_message: 发送一条新消息继续对话,允许适当的追问、补充、深入话题,或开启相关新话题。**但是避免在因重复被拒后立即使用,也不要在对方没有回复的情况下过多的“消息轰炸”或重复发言** @@ -117,7 +124,133 @@ class ActionPlanner: self.name = global_config.BOT_NICKNAME self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) - # self.action_planner_info = ActionPlannerInfo() # 移除未使用的变量 + + async def _get_memory_info(self, text: str) -> str: + """根据文本自动检索相关记忆""" + memory_prompt = "" + related_memory_info = "" + try: + related_memory = await HippocampusManager.get_instance().get_memory_from_text( + text=text, + max_memory_num=2, # 最多获取 2 条记忆 + max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) + max_depth=3, # 搜索深度 + fast_retrieval=False # 是否快速检索 + ) + if related_memory: + for memory in related_memory: + # memory[0] 是记忆ID, memory[1] 是记忆内容 + related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 + if related_memory_info: + memory_prompt = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,供参考)\n" + logger.debug(f"[私聊]决策层[{self.private_name}]自动检索到记忆: {related_memory_info.strip()[:100]}...") + else: + logger.debug(f"[私聊]决策层[{self.private_name}]自动检索记忆返回为空。") + else: + logger.debug(f"[私聊]决策层[{self.private_name}]未自动检索到相关记忆。") + except Exception as e: + logger.error(f"[私聊]决策层[{self.private_name}]自动检索记忆时出错: {e}") + # memory_prompt = "检索记忆时出错。\n" # 可以选择是否提示错误 + return memory_prompt + + async def _get_prompt_info_old(self, message: str, threshold: float) -> str: + """ + 旧版的知识检索方法,根据消息文本从旧知识库(knowledges collection)检索。 + (移植并自 heartflow_prompt_builder.py) + """ + related_info = "" + start_time = time.time() + logger.debug(f"[私聊]决策层[{self.private_name}]开始使用旧版知识检索,消息: {message[:30]}...") + + # 简化处理:直接使用整个消息进行查询,不再提取主题 + query_text = message.strip() + if not query_text: + logger.debug(f"[私聊]决策层[{self.private_name}]旧版知识检索:消息为空,跳过。") + return "" + + embedding = None + try: + embedding = await get_embedding(query_text, request_type="pfc_implicit_knowledge") + except Exception as e: + logger.error(f"[私聊]决策层[{self.private_name}]旧版知识检索:获取嵌入向量时出错: {str(e)}") + + if not embedding: + logger.error(f"[私聊]决策层[{self.private_name}]旧版知识检索:获取嵌入向量失败。") + return "" + + # 调用我们之前添加的 get_info_from_db 函数 + results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 + + logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") + + # 去重和格式化 + unique_contents = set() + final_results_content = [] + for result in results: + content = result.get("content", "").strip() + similarity = result.get("similarity", 0.0) + if content and content not in unique_contents: + unique_contents.add(content) + # 可以选择性地加入相似度信息,或者只加内容 + # final_results_content.append(f"[{similarity:.2f}] {content}") + final_results_content.append(content) + + if final_results_content: + related_info = "\n".join(final_results_content) + logger.debug(f"[私聊][{self.private_name}]旧版知识检索格式化后内容: {related_info[:100]}...") + else: + logger.debug(f"[私聊][{self.private_name}]旧版知识检索未找到合适结果或结果为空。") + + logger.info(f"[私聊][{self.private_name}]旧版知识检索总耗时: {time.time() - start_time:.3f}秒") + return related_info + + async def _get_prompt_info(self, message: str, threshold: float = 0.38) -> str: + """ + 自动检索相关知识的主函数。优先使用 LPMM,失败则回退到旧版。 + (移植自 heartflow_prompt_builder.py) + """ + related_info = "" + start_time = time.time() + message = message.strip() + if not message: + logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") + return "" + + logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") + + # 1. 尝试从 LPMM 知识库获取知识 + try: + found_knowledge_from_lpmm = qa_manager.get_knowledge(message) + if found_knowledge_from_lpmm and found_knowledge_from_lpmm.strip(): + related_info = found_knowledge_from_lpmm.strip() + logger.info(f"[私聊][{self.private_name}]从 LPMM 知识库获取到知识,长度: {len(related_info)}") + logger.debug(f"[私聊][{self.private_name}]LPMM 知识内容: {related_info[:100]}...") + # LPMM 成功获取,直接返回 + logger.info(f"[私聊][{self.private_name}]自动知识检索(LPMM)耗时: {time.time() - start_time:.3f}秒") + return related_info + else: + logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") + except Exception as e: + logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") + + # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 + try: + knowledge_from_old = await self._get_prompt_info_old(message, threshold=threshold) + if knowledge_from_old and knowledge_from_old.strip(): + related_info = knowledge_from_old.strip() + logger.info(f"[私聊][{self.private_name}]从旧版数据库检索到知识,长度: {len(related_info)}") + # 旧版成功获取,返回 + logger.info(f"[私聊][{self.private_name}]自动知识检索(旧版)耗时: {time.time() - start_time:.3f}秒") + return related_info + else: + logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") + + except Exception as e2: + logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") + + # 如果两种方法都失败或无结果 + logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") + return "" # 返回空字符串 # 修改 plan 方法签名,增加 last_successful_reply_action 参数 async def plan( @@ -468,7 +601,6 @@ class ActionPlanner: valid_actions = [ "direct_reply", "send_new_message", - "fetch_knowledge", "wait", "listening", "rethink_goal", @@ -489,3 +621,83 @@ class ActionPlanner: # 外层异常处理保持不变 logger.error(f"[私聊][{self.private_name}]规划行动时调用 LLM 或处理结果出错: {str(e)}") return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" + +def get_info_from_db( + query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False +) -> Union[str, list]: + """ + 从旧知识库 (knowledges collection) 中根据嵌入向量相似度检索信息。 + (移植自 heartflow_prompt_builder.py) + """ + if not query_embedding: + return "" if not return_raw else [] + # 使用余弦相似度计算 + pipeline = [ + { + "$addFields": { + "dotProduct": { + "$reduce": { + "input": {"$range": [0, {"$size": "$embedding"}]}, + "initialValue": 0, + "in": { + "$add": [ + "$$value", + { + "$multiply": [ + {"$arrayElemAt": ["$embedding", "$$this"]}, + {"$arrayElemAt": [query_embedding, "$$this"]}, + ] + }, + ] + }, + } + }, + "magnitude1": { + "$sqrt": { + "$reduce": { + "input": "$embedding", + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + "magnitude2": { + "$sqrt": { + "$reduce": { + "input": query_embedding, + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + } + }, + # 防止除以零错误,添加一个小的 epsilon + {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, + { + "$match": { + "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 + } + }, + {"$sort": {"similarity": -1}}, + {"$limit": limit}, + {"$project": {"content": 1, "similarity": 1}}, + ] + + try: + results = list(db.knowledges.aggregate(pipeline)) + # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 + # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 + except Exception as e: + # logger.error(f"执行旧知识库聚合查询时出错: {e}") # 暂时注释掉 + results = [] + + if not results: + return "" if not return_raw else [] + + if return_raw: + return results + else: + # 返回所有找到的内容,用换行分隔 + return "\n".join(str(result["content"]) for result in results) + diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 890f807c..1b2362f3 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,4 +1,15 @@ -from typing import Tuple, List, Dict, Any +# 用于访问记忆系统 +from src.plugins.memory_system.Hippocampus import HippocampusManager +# 用于访问新的知识库 (LPMM) +from src.plugins.knowledge.knowledge_lib import qa_manager +# 用于访问数据库 (旧知识库需要) +from src.common.database import db +# 用于获取文本的嵌入向量 (旧知识库需要) +from src.plugins.chat.utils import get_embedding +# 可能用于旧知识库提取主题 (如果需要回退到旧方法) +# import jieba # 如果报错说找不到 jieba,可能需要安装: pip install jieba +# import re # 正则表达式库,通常 Python 自带 +from typing import Tuple, List, Dict, Any,Union from src.common.logger import get_module_logger from ..models.utils_model import LLMRequest from ...config.config import global_config @@ -8,6 +19,7 @@ from src.individuality.individuality import Individuality from .observation_info import ObservationInfo from .conversation_info import ConversationInfo from src.plugins.utils.chat_message_builder import build_readable_messages +import time logger = get_module_logger("reply_generator") @@ -18,17 +30,21 @@ PROMPT_DIRECT_REPLY = """{persona_text}。现在你在参与一场QQ私聊,请 当前对话目标:{goals_str} -{knowledge_info_str} +你有以下这些知识: +{retrieved_knowledge_str} +请你**记住上面的知识**,在回复中有可能会用到。 最近的聊天记录: {chat_history_text} +{related_memory_info}。 + 请根据上述信息,结合聊天记录,回复对方。该回复应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) 2. 符合你的性格特征和身份细节 3. 通俗易懂,自然流畅,像正常聊天一样,简短(通常20字以内,除非特殊情况) -4. 可以适当利用相关知识,但不要生硬引用 +4. 可以适当利用相关知识和回忆,但**不要生硬引用**,若无必要,也可以不利用 5. 自然、得体,结合聊天记录逻辑合理,且没有重复表达同质内容 请注意把握聊天内容,不要回复的太有条理,可以有个性。请分清"你"和对方说的话,不要把"你"说的话当做对方说的话,这是你自己说的话。 @@ -43,17 +59,20 @@ PROMPT_SEND_NEW_MESSAGE = """{persona_text}。现在你在参与一场QQ私聊 当前对话目标:{goals_str} -{knowledge_info_str} +你有以下这些知识: +{retrieved_knowledge_str} +请你**记住上面的知识**,在发消息时有可能会用到。 最近的聊天记录: {chat_history_text} +{related_memory_info} 请根据上述信息,结合聊天记录,继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。该消息应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) 2. 符合你的性格特征和身份细节 3. 通俗易懂,自然流畅,像正常聊天一样,简短(通常20字以内,除非特殊情况) -4. 可以适当利用相关知识,但不要生硬引用 +4. 可以适当利用相关知识和回忆,但**不要生硬引用**,若无必要,也可以不利用 5. 跟之前你发的消息自然的衔接,逻辑合理,且没有重复表达同质内容或部分重叠内容 请注意把握聊天内容,不用太有条理,可以有个性。请分清"你"和对方说的话,不要把"你"说的话当做对方说的话,这是你自己说的话。 @@ -97,7 +116,132 @@ class ReplyGenerator: self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) self.reply_checker = ReplyChecker(stream_id, private_name) + async def _get_memory_info(self, text: str) -> str: + """根据文本自动检索相关记忆""" + memory_prompt = "" + related_memory_info = "" + try: + related_memory = await HippocampusManager.get_instance().get_memory_from_text( + text=text, + max_memory_num=2, # 最多获取 2 条记忆 + max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) + max_depth=3, # 搜索深度 + fast_retrieval=False # 是否快速检索 + ) + if related_memory: + for memory in related_memory: + # memory[0] 是记忆ID, memory[1] 是记忆内容 + related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 + if related_memory_info: + memory_prompt = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,不一定是目前聊天里的人说的,回忆中别人说的事情也不一定是准确的,请记住)\n" + logger.debug(f"[私聊][{self.private_name}]自动检索到记忆: {related_memory_info.strip()[:100]}...") + else: + logger.debug(f"[私聊][{self.private_name}]自动检索记忆返回为空。") + else: + logger.debug(f"[私聊][{self.private_name}]未自动检索到相关记忆。") + except Exception as e: + logger.error(f"[私聊][{self.private_name}]自动检索记忆时出错: {e}") + # memory_prompt = "检索记忆时出错。\n" # 可以选择是否提示错误 + return memory_prompt + async def _get_prompt_info_old(self, message: str, threshold: float) -> str: + """ + 旧版的知识检索方法,根据消息文本从旧知识库(knowledges collection)检索。 + (移植并简化自 heartflow_prompt_builder.py) + """ + related_info = "" + start_time = time.time() + logger.debug(f"[私聊][{self.private_name}]开始使用旧版知识检索,消息: {message[:30]}...") + + # 简化处理:直接使用整个消息进行查询,不再提取主题 + query_text = message.strip() + if not query_text: + logger.debug(f"[私聊][{self.private_name}]旧版知识检索:消息为空,跳过。") + return "" + + embedding = None + try: + embedding = await get_embedding(query_text, request_type="pfc_implicit_knowledge") + except Exception as e: + logger.error(f"[私聊][{self.private_name}]旧版知识检索:获取嵌入向量时出错: {str(e)}") + + if not embedding: + logger.error(f"[私聊][{self.private_name}]旧版知识检索:获取嵌入向量失败。") + return "" + + # 调用我们之前添加的 get_info_from_db 函数 + results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 + + logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") + + # 去重和格式化 + unique_contents = set() + final_results_content = [] + for result in results: + content = result.get("content", "").strip() + similarity = result.get("similarity", 0.0) + if content and content not in unique_contents: + unique_contents.add(content) + # 可以选择性地加入相似度信息,或者只加内容 + # final_results_content.append(f"[{similarity:.2f}] {content}") + final_results_content.append(content) + + if final_results_content: + related_info = "\n".join(final_results_content) + logger.debug(f"[私聊][{self.private_name}]旧版知识检索格式化后内容: {related_info[:100]}...") + else: + logger.debug(f"[私聊][{self.private_name}]旧版知识检索未找到合适结果或结果为空。") + + logger.info(f"[私聊][{self.private_name}]旧版知识检索总耗时: {time.time() - start_time:.3f}秒") + return related_info + + async def _get_prompt_info(self, message: str, threshold: float = 0.38) -> str: + """ + 自动检索相关知识的主函数。优先使用 LPMM,失败则回退到旧版。 + (移植自 heartflow_prompt_builder.py) + """ + related_info = "" + start_time = time.time() + message = message.strip() + if not message: + logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") + return "" + + logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") + + # 1. 尝试从 LPMM 知识库获取知识 + try: + found_knowledge_from_lpmm = qa_manager.get_knowledge(message) + if found_knowledge_from_lpmm and found_knowledge_from_lpmm.strip(): + related_info = found_knowledge_from_lpmm.strip() + logger.info(f"[私聊][{self.private_name}]从 LPMM 知识库获取到知识,长度: {len(related_info)}") + logger.debug(f"[私聊][{self.private_name}]LPMM 知识内容: {related_info[:100]}...") + # LPMM 成功获取,直接返回 + logger.info(f"[私聊][{self.private_name}]自动知识检索(LPMM)耗时: {time.time() - start_time:.3f}秒") + return related_info + else: + logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") + except Exception as e: + logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") + + # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 + try: + knowledge_from_old = await self._get_prompt_info_old(message, threshold=threshold) + if knowledge_from_old and knowledge_from_old.strip(): + related_info = knowledge_from_old.strip() + logger.info(f"[私聊][{self.private_name}]从旧版数据库检索到知识,长度: {len(related_info)}") + # 旧版成功获取,返回 + logger.info(f"[私聊][{self.private_name}]自动知识检索(旧版)耗时: {time.time() - start_time:.3f}秒") + return related_info + else: + logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") + + except Exception as e2: + logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") + # 如果两种方法都失败或无结果 + logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") + return "" # 返回空字符串 + # 修改 generate 方法签名,增加 action_type 参数 async def generate( self, observation_info: ObservationInfo, conversation_info: ConversationInfo, action_type: str @@ -186,6 +330,36 @@ class ReplyGenerator: # 构建 Persona 文本 (persona_text) persona_text = f"你的名字是{self.name},{self.personality_info}。" + retrieved_memory_str = "" + retrieved_knowledge_str = "" + # 使用 chat_history_text 作为检索的上下文,因为它包含了最近的对话和新消息 + retrieval_context = chat_history_text + if retrieval_context and retrieval_context != "还没有聊天记录。" and retrieval_context != "[构建聊天记录出错]": + try: + # 提取记忆 + logger.debug(f"[私聊][{self.private_name}]开始自动检索记忆...") + retrieved_memory_str = await self._get_memory_info(text=retrieval_context) + if retrieved_memory_str: + logger.info(f"[私聊][{self.private_name}]自动检索到记忆片段。") + else: + logger.info(f"[私聊][{self.private_name}]未自动检索到相关记忆。") + + # 提取知识 + logger.debug(f"[私聊][{self.private_name}]开始自动检索知识...") + retrieved_knowledge_str = await self._get_prompt_info(message=retrieval_context) + if retrieved_knowledge_str: + logger.info(f"[私聊][{self.private_name}]自动检索到相关知识。") + else: + logger.info(f"[私聊][{self.private_name}]未自动检索到相关知识。") + + except Exception as retrieval_err: + logger.error(f"[私聊][{self.private_name}]在自动检索记忆/知识时发生错误: {retrieval_err}") + retrieved_memory_str = "检索记忆时出错。\n" + retrieved_knowledge_str = "检索知识时出错。\n" + else: + logger.debug(f"[私聊][{self.private_name}]聊天记录为空或无效,跳过自动记忆/知识检索。") + retrieved_memory_str = "无聊天记录,无法自动检索记忆。\n" + retrieved_knowledge_str = "无聊天记录,无法自动检索知识。\n" # --- 选择 Prompt --- if action_type == "send_new_message": @@ -204,6 +378,8 @@ class ReplyGenerator: goals_str=goals_str, chat_history_text=chat_history_text, knowledge_info_str=knowledge_info_str, + retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 + retrieved_knowledge_str=retrieved_knowledge_str if retrieved_knowledge_str else "无相关知识。" # 如果为空则提示无 ) # --- 调用 LLM 生成 --- @@ -226,3 +402,82 @@ class ReplyGenerator: (此方法逻辑保持不变) """ return await self.reply_checker.check(reply, goal, chat_history, chat_history_str, retry_count) + +def get_info_from_db( + query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False +) -> Union[str, list]: + """ + 从旧知识库 (knowledges collection) 中根据嵌入向量相似度检索信息。 + (移植自 heartflow_prompt_builder.py) + """ + if not query_embedding: + return "" if not return_raw else [] + # 使用余弦相似度计算 + pipeline = [ + { + "$addFields": { + "dotProduct": { + "$reduce": { + "input": {"$range": [0, {"$size": "$embedding"}]}, + "initialValue": 0, + "in": { + "$add": [ + "$$value", + { + "$multiply": [ + {"$arrayElemAt": ["$embedding", "$$this"]}, + {"$arrayElemAt": [query_embedding, "$$this"]}, + ] + }, + ] + }, + } + }, + "magnitude1": { + "$sqrt": { + "$reduce": { + "input": "$embedding", + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + "magnitude2": { + "$sqrt": { + "$reduce": { + "input": query_embedding, + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + } + }, + # 防止除以零错误,添加一个小的 epsilon + {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, + { + "$match": { + "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 + } + }, + {"$sort": {"similarity": -1}}, + {"$limit": limit}, + {"$project": {"content": 1, "similarity": 1}}, + ] + + try: + results = list(db.knowledges.aggregate(pipeline)) + # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 + # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 + except Exception as e: + # logger.error(f"执行旧知识库聚合查询时出错: {e}") # 暂时注释掉 + results = [] + + if not results: + return "" if not return_raw else [] + + if return_raw: + return results + else: + # 返回所有找到的内容,用换行分隔 + return "\n".join(str(result["content"]) for result in results) From 80747000b2c1c8bf5753c89212bc4a66c3638449 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 2 May 2025 11:34:24 +0800 Subject: [PATCH 02/10] fix --- src/plugins/PFC/action_planner.py | 75 +++++++++++++++++++----------- src/plugins/PFC/conversation.py | 44 +++++++++--------- src/plugins/PFC/reply_generator.py | 56 +++++++++++----------- 3 files changed, 99 insertions(+), 76 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index a86ddf0a..1f10c973 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -360,38 +360,38 @@ class ActionPlanner: goals_str = "- 构建对话目标时出错。\n" # --- 知识信息字符串构建开始 --- - knowledge_info_str = "【已获取的相关知识和记忆】\n" - try: + # knowledge_info_str = "【已获取的相关知识和记忆】\n" + # try: # 检查 conversation_info 是否有 knowledge_list 并且不为空 - if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: + # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: # 最多只显示最近的 5 条知识,防止 Prompt 过长 - recent_knowledge = conversation_info.knowledge_list[-5:] - for i, knowledge_item in enumerate(recent_knowledge): - if isinstance(knowledge_item, dict): - query = knowledge_item.get("query", "未知查询") - knowledge = knowledge_item.get("knowledge", "无知识内容") - source = knowledge_item.get("source", "未知来源") + # recent_knowledge = conversation_info.knowledge_list[-5:] + # for i, knowledge_item in enumerate(recent_knowledge): + # if isinstance(knowledge_item, dict): + # query = knowledge_item.get("query", "未知查询") + # knowledge = knowledge_item.get("knowledge", "无知识内容") + # source = knowledge_item.get("source", "未知来源") # 只取知识内容的前 2000 个字,避免太长 - knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - knowledge_info_str += ( - f"{i + 1}. 关于 '{query}' 的知识 (来源: {source}):\n {knowledge_snippet}\n" - ) - else: + # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge + # knowledge_info_str += ( + # f"{i + 1}. 关于 '{query}' 的知识 (来源: {source}):\n {knowledge_snippet}\n" + # ) + # else: # 处理列表里不是字典的异常情况 - knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" + # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - if not recent_knowledge: # 如果 knowledge_list 存在但为空 - knowledge_info_str += "- 暂无相关知识和记忆。\n" + # if not recent_knowledge: # 如果 knowledge_list 存在但为空 + # knowledge_info_str += "- 暂无相关知识和记忆。\n" - else: + # else: # 如果 conversation_info 没有 knowledge_list 属性,或者列表为空 - knowledge_info_str += "- 暂无相关知识记忆。\n" - except AttributeError: - logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - knowledge_info_str += "- 获取知识列表时出错。\n" - except Exception as e: - logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - knowledge_info_str += "- 处理知识列表时出错。\n" + # knowledge_info_str += "- 暂无相关知识记忆。\n" + # except AttributeError: + # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") + # knowledge_info_str += "- 获取知识列表时出错。\n" + # except Exception as e: + # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") + # knowledge_info_str += "- 处理知识列表时出错。\n" # --- 知识信息字符串构建结束 --- # 获取聊天历史记录 (chat_history_text) @@ -501,6 +501,27 @@ class ActionPlanner: last_action_context += f"- 该行动当前状态: {status}\n" # self.last_successful_action_type = None # 非完成状态,清除记录 + retrieved_memory_str_planner = "" + retrieved_knowledge_str_planner = "" + retrieval_context = chat_history_text # 使用聊天记录作为检索上下文 + if retrieval_context and retrieval_context != "还没有聊天记录。" and retrieval_context != "[构建聊天记录出错]": + try: + logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动检索记忆...") + retrieved_memory_str_planner = await self._get_memory_info(text=retrieval_context) + logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索记忆 {'完成' if retrieved_memory_str_planner else '无结果'}。") + + logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动知识检索...") + retrieved_knowledge_str_planner = await self._get_prompt_info(message=retrieval_context) + logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。") + except Exception as retrieval_err: + logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") + retrieved_memory_str_planner = "检索记忆时出错。\n" + retrieved_knowledge_str_planner = "检索知识时出错。\n" + else: + logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 无有效聊天记录,跳过自动检索。") + retrieved_memory_str_planner = "无聊天记录无法检索记忆。\n" + retrieved_knowledge_str_planner = "无聊天记录无法检索知识。\n" + # --- 选择 Prompt --- if last_successful_reply_action in ["direct_reply", "send_new_message"]: prompt_template = PROMPT_FOLLOW_UP @@ -518,7 +539,9 @@ class ActionPlanner: time_since_last_bot_message_info=time_since_last_bot_message_info, timeout_context=timeout_context, chat_history_text=chat_history_text if chat_history_text.strip() else "还没有聊天记录。", - knowledge_info_str=knowledge_info_str, + # knowledge_info_str=knowledge_info_str, + retrieved_memory_str=retrieved_memory_str_planner if retrieved_memory_str_planner else "无相关记忆。", + retrieved_knowledge_str=retrieved_knowledge_str_planner if retrieved_knowledge_str_planner else "无相关知识。" ) logger.debug(f"[私聊][{self.private_name}]发送到LLM的最终提示词:\n------\n{prompt}\n------") diff --git a/src/plugins/PFC/conversation.py b/src/plugins/PFC/conversation.py index 9f744c30..2ecd6824 100644 --- a/src/plugins/PFC/conversation.py +++ b/src/plugins/PFC/conversation.py @@ -505,31 +505,31 @@ class Conversation: } conversation_info.done_action.append(wait_action_record) - elif action == "fetch_knowledge": - self.state = ConversationState.FETCHING - knowledge_query = reason - try: + # elif action == "fetch_knowledge": + # self.state = ConversationState.FETCHING + # knowledge_query = reason + # try: # 检查 knowledge_fetcher 是否存在 - if not hasattr(self, "knowledge_fetcher"): - logger.error(f"[私聊][{self.private_name}]KnowledgeFetcher 未初始化,无法获取知识。") - raise AttributeError("KnowledgeFetcher not initialized") + # if not hasattr(self, "knowledge_fetcher"): + # logger.error(f"[私聊][{self.private_name}]KnowledgeFetcher 未初始化,无法获取知识。") + # raise AttributeError("KnowledgeFetcher not initialized") - knowledge, source = await self.knowledge_fetcher.fetch(knowledge_query, observation_info.chat_history) - logger.info(f"[私聊][{self.private_name}]获取到知识: {knowledge[:100]}..., 来源: {source}") - if knowledge: + # knowledge, source = await self.knowledge_fetcher.fetch(knowledge_query, observation_info.chat_history) + # logger.info(f"[私聊][{self.private_name}]获取到知识: {knowledge[:100]}..., 来源: {source}") + # if knowledge: # 确保 knowledge_list 存在 - if not hasattr(conversation_info, "knowledge_list"): - conversation_info.knowledge_list = [] - conversation_info.knowledge_list.append( - {"query": knowledge_query, "knowledge": knowledge, "source": source} - ) - action_successful = True - except Exception as fetch_err: - logger.error(f"[私聊][{self.private_name}]获取知识时出错: {str(fetch_err)}") - conversation_info.done_action[action_index].update( - {"status": "recall", "final_reason": f"获取知识失败: {str(fetch_err)}"} - ) - self.conversation_info.last_successful_reply_action = None # 重置状态 + # if not hasattr(conversation_info, "knowledge_list"): + # conversation_info.knowledge_list = [] + # conversation_info.knowledge_list.append( + # {"query": knowledge_query, "knowledge": knowledge, "source": source} + # ) + # action_successful = True + # except Exception as fetch_err: + # logger.error(f"[私聊][{self.private_name}]获取知识时出错: {str(fetch_err)}") + # conversation_info.done_action[action_index].update( + # {"status": "recall", "final_reason": f"获取知识失败: {str(fetch_err)}"} + # ) + # self.conversation_info.last_successful_reply_action = None # 重置状态 elif action == "rethink_goal": self.state = ConversationState.RETHINKING diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 1b2362f3..a9ed61fd 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -37,7 +37,7 @@ PROMPT_DIRECT_REPLY = """{persona_text}。现在你在参与一场QQ私聊,请 最近的聊天记录: {chat_history_text} -{related_memory_info}。 +{retrieved_memory_str} 请根据上述信息,结合聊天记录,回复对方。该回复应该: @@ -66,7 +66,7 @@ PROMPT_SEND_NEW_MESSAGE = """{persona_text}。现在你在参与一场QQ私聊 最近的聊天记录: {chat_history_text} -{related_memory_info} +{retrieved_memory_str} 请根据上述信息,结合聊天记录,继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。该消息应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) @@ -282,36 +282,36 @@ class ReplyGenerator: goals_str = "- 目前没有明确对话目标\n" # 简化无目标情况 # --- 新增:构建知识信息字符串 --- - knowledge_info_str = "【供参考的相关知识和记忆】\n" # 稍微改下标题,表明是供参考 - try: + # knowledge_info_str = "【供参考的相关知识和记忆】\n" # 稍微改下标题,表明是供参考 + # try: # 检查 conversation_info 是否有 knowledge_list 并且不为空 - if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: + # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: # 最多只显示最近的 5 条知识 - recent_knowledge = conversation_info.knowledge_list[-5:] - for i, knowledge_item in enumerate(recent_knowledge): - if isinstance(knowledge_item, dict): - query = knowledge_item.get("query", "未知查询") - knowledge = knowledge_item.get("knowledge", "无知识内容") - source = knowledge_item.get("source", "未知来源") + # recent_knowledge = conversation_info.knowledge_list[-5:] + # for i, knowledge_item in enumerate(recent_knowledge): + # if isinstance(knowledge_item, dict): + # query = knowledge_item.get("query", "未知查询") + # knowledge = knowledge_item.get("knowledge", "无知识内容") + # source = knowledge_item.get("source", "未知来源") # 只取知识内容的前 2000 个字 - knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - knowledge_info_str += ( - f"{i + 1}. 关于 '{query}' (来源: {source}): {knowledge_snippet}\n" # 格式微调,更简洁 - ) - else: - knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" + # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge + # knowledge_info_str += ( + # f"{i + 1}. 关于 '{query}' (来源: {source}): {knowledge_snippet}\n" # 格式微调,更简洁 + # ) + # else: + # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - if not recent_knowledge: - knowledge_info_str += "- 暂无。\n" # 更简洁的提示 + # if not recent_knowledge: + # knowledge_info_str += "- 暂无。\n" # 更简洁的提示 - else: - knowledge_info_str += "- 暂无。\n" - except AttributeError: - logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - knowledge_info_str += "- 获取知识列表时出错。\n" - except Exception as e: - logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - knowledge_info_str += "- 处理知识列表时出错。\n" + # else: + # knowledge_info_str += "- 暂无。\n" + # except AttributeError: + # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") + # knowledge_info_str += "- 获取知识列表时出错。\n" + # except Exception as e: + # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") + # knowledge_info_str += "- 处理知识列表时出错。\n" # 获取聊天历史记录 (chat_history_text) chat_history_text = observation_info.chat_history_str @@ -377,7 +377,7 @@ class ReplyGenerator: persona_text=persona_text, goals_str=goals_str, chat_history_text=chat_history_text, - knowledge_info_str=knowledge_info_str, + # knowledge_info_str=knowledge_info_str, retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 retrieved_knowledge_str=retrieved_knowledge_str if retrieved_knowledge_str else "无相关知识。" # 如果为空则提示无 ) From 931b2c3d2a57e9a3236207c0fa1841dd9a480294 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 18:32:30 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=E5=AD=98=E5=85=A5=E4=BF=A1=E6=81=AF=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/message_sender.py | 2 +- src/plugins/PFC/observation_info.py | 37 +++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/plugins/PFC/message_sender.py b/src/plugins/PFC/message_sender.py index 53c20374..a33314e3 100644 --- a/src/plugins/PFC/message_sender.py +++ b/src/plugins/PFC/message_sender.py @@ -70,7 +70,7 @@ class DirectMessageSender: message_set = MessageSet(chat_stream, message_id) message_set.add_message(message) await message_manager.add_message(message_set) - await self.storage.store_message(message, chat_stream) + # await self.storage.store_message(message, chat_stream) logger.info(f"[私聊][{self.private_name}]PFC消息已发送: {content}") except Exception as e: diff --git a/src/plugins/PFC/observation_info.py b/src/plugins/PFC/observation_info.py index c7572955..5e0bdc16 100644 --- a/src/plugins/PFC/observation_info.py +++ b/src/plugins/PFC/observation_info.py @@ -6,6 +6,7 @@ from .chat_observer import ChatObserver from .chat_states import NotificationHandler, NotificationType, Notification from src.plugins.utils.chat_message_builder import build_readable_messages import traceback # 导入 traceback 用于调试 +from src.config.config import global_config logger = get_module_logger("observation_info") @@ -252,6 +253,27 @@ class ObservationInfo: message_time = message.get("time") message_id = message.get("message_id") processed_text = message.get("processed_plain_text", "") + is_bot_message = False + + # 确定发送者和是否为机器人 + if user_info: + sender_id = str(user_info.user_id) + if sender_id == str(global_config.BOT_QQ): + is_bot_message = True + # 更新机器人最后发言时间 + if message_time and message_time > (self.last_bot_speak_time or 0): + self.last_bot_speak_time = message_time + else: + # 更新用户最后发言时间 + if message_time and message_time > (self.last_user_speak_time or 0): + self.last_user_speak_time = message_time + self.active_users.add(sender_id) + self.last_message_sender = sender_id + else: + logger.warning( + f"[私聊][{self.private_name}]处理消息更新时缺少有效的 UserInfo 对象, message_id: {message_id}" + ) + self.last_message_sender = None # 只有在新消息到达时才更新 last_message 相关信息 if message_time and message_time > (self.last_message_time or 0): @@ -278,12 +300,17 @@ class ObservationInfo: ) self.last_message_sender = None # 发送者未知 - # 将原始消息字典添加到未处理列表 - self.unprocessed_messages.append(message) - self.new_messages_count = len(self.unprocessed_messages) # 直接用列表长度 + if not is_bot_message: + # 将原始消息字典添加到未处理列表 + self.unprocessed_messages.append(message) + self.new_messages_count = len(self.unprocessed_messages) # 直接用列表长度 + logger.debug(f"[私聊][{self.private_name}] 用户新消息加入未处理列表. 当前未处理数: {self.new_messages_count}") + # logger.debug(f"[私聊][{self.private_name}]消息更新: last_time={self.last_message_time}, new_count={self.new_messages_count}") + self.update_changed() # 标记状态已改变 + else: + # 是机器人自己的消息,仅记录日志,不加入未处理列表 + logger.debug(f"[私聊][{self.private_name}] 观察到机器人自身消息 (ID: {message_id}),仅更新时间戳,不处理。") - # logger.debug(f"[私聊][{self.private_name}]消息更新: last_time={self.last_message_time}, new_count={self.new_messages_count}") - self.update_changed() # 标记状态已改变 else: # 如果消息时间戳不是最新的,可能不需要处理,或者记录一个警告 pass From bea8ce81505ca64652ba6246aa53ada2942ab6dc Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 18:34:59 +0800 Subject: [PATCH 04/10] ruff --- src/plugins/PFC/message_sender.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/PFC/message_sender.py b/src/plugins/PFC/message_sender.py index a33314e3..0c232ed8 100644 --- a/src/plugins/PFC/message_sender.py +++ b/src/plugins/PFC/message_sender.py @@ -6,7 +6,7 @@ from ..chat.message import Message from maim_message import UserInfo, Seg from src.plugins.chat.message import MessageSending, MessageSet from src.plugins.chat.message_sender import message_manager -from ..storage.storage import MessageStorage +# from ..storage.storage import MessageStorage from ...config.config import global_config @@ -18,7 +18,7 @@ class DirectMessageSender: def __init__(self, private_name: str): self.private_name = private_name - self.storage = MessageStorage() + # self.storage = MessageStorage() async def send_message( self, From 7fe55ddf689e6ba56b93e205905dc652cf62029a Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 19:05:52 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=90=AF=E7=94=A8?= =?UTF-8?q?=E6=96=B0=E7=9A=84=20message=5Fsender=20=E5=90=8E=E5=AF=BC?= =?UTF-8?q?=E8=87=B4=E9=BA=A6=E9=BA=A6=E6=B6=88=E6=81=AF=E8=A2=AB=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E5=AD=98=E5=85=A5=E6=95=B0=E6=8D=AE=E5=BA=93=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/action_planner.py | 12 +++++----- src/plugins/PFC/message_sender.py | 3 --- src/plugins/PFC/observation_info.py | 37 ++++------------------------- 3 files changed, 11 insertions(+), 41 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 1f10c973..ba16e8b0 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -514,13 +514,13 @@ class ActionPlanner: retrieved_knowledge_str_planner = await self._get_prompt_info(message=retrieval_context) logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。") except Exception as retrieval_err: - logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") - retrieved_memory_str_planner = "检索记忆时出错。\n" - retrieved_knowledge_str_planner = "检索知识时出错。\n" + logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") + retrieved_memory_str_planner = "检索记忆时出错。\n" + retrieved_knowledge_str_planner = "检索知识时出错。\n" else: - logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 无有效聊天记录,跳过自动检索。") - retrieved_memory_str_planner = "无聊天记录无法检索记忆。\n" - retrieved_knowledge_str_planner = "无聊天记录无法检索知识。\n" + logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 无有效聊天记录,跳过自动检索。") + retrieved_memory_str_planner = "无聊天记录无法检索记忆。\n" + retrieved_knowledge_str_planner = "无聊天记录无法检索知识。\n" # --- 选择 Prompt --- if last_successful_reply_action in ["direct_reply", "send_new_message"]: diff --git a/src/plugins/PFC/message_sender.py b/src/plugins/PFC/message_sender.py index 0c232ed8..abbafa9f 100644 --- a/src/plugins/PFC/message_sender.py +++ b/src/plugins/PFC/message_sender.py @@ -6,7 +6,6 @@ from ..chat.message import Message from maim_message import UserInfo, Seg from src.plugins.chat.message import MessageSending, MessageSet from src.plugins.chat.message_sender import message_manager -# from ..storage.storage import MessageStorage from ...config.config import global_config @@ -18,7 +17,6 @@ class DirectMessageSender: def __init__(self, private_name: str): self.private_name = private_name - # self.storage = MessageStorage() async def send_message( self, @@ -70,7 +68,6 @@ class DirectMessageSender: message_set = MessageSet(chat_stream, message_id) message_set.add_message(message) await message_manager.add_message(message_set) - # await self.storage.store_message(message, chat_stream) logger.info(f"[私聊][{self.private_name}]PFC消息已发送: {content}") except Exception as e: diff --git a/src/plugins/PFC/observation_info.py b/src/plugins/PFC/observation_info.py index 5e0bdc16..c7572955 100644 --- a/src/plugins/PFC/observation_info.py +++ b/src/plugins/PFC/observation_info.py @@ -6,7 +6,6 @@ from .chat_observer import ChatObserver from .chat_states import NotificationHandler, NotificationType, Notification from src.plugins.utils.chat_message_builder import build_readable_messages import traceback # 导入 traceback 用于调试 -from src.config.config import global_config logger = get_module_logger("observation_info") @@ -253,27 +252,6 @@ class ObservationInfo: message_time = message.get("time") message_id = message.get("message_id") processed_text = message.get("processed_plain_text", "") - is_bot_message = False - - # 确定发送者和是否为机器人 - if user_info: - sender_id = str(user_info.user_id) - if sender_id == str(global_config.BOT_QQ): - is_bot_message = True - # 更新机器人最后发言时间 - if message_time and message_time > (self.last_bot_speak_time or 0): - self.last_bot_speak_time = message_time - else: - # 更新用户最后发言时间 - if message_time and message_time > (self.last_user_speak_time or 0): - self.last_user_speak_time = message_time - self.active_users.add(sender_id) - self.last_message_sender = sender_id - else: - logger.warning( - f"[私聊][{self.private_name}]处理消息更新时缺少有效的 UserInfo 对象, message_id: {message_id}" - ) - self.last_message_sender = None # 只有在新消息到达时才更新 last_message 相关信息 if message_time and message_time > (self.last_message_time or 0): @@ -300,17 +278,12 @@ class ObservationInfo: ) self.last_message_sender = None # 发送者未知 - if not is_bot_message: - # 将原始消息字典添加到未处理列表 - self.unprocessed_messages.append(message) - self.new_messages_count = len(self.unprocessed_messages) # 直接用列表长度 - logger.debug(f"[私聊][{self.private_name}] 用户新消息加入未处理列表. 当前未处理数: {self.new_messages_count}") - # logger.debug(f"[私聊][{self.private_name}]消息更新: last_time={self.last_message_time}, new_count={self.new_messages_count}") - self.update_changed() # 标记状态已改变 - else: - # 是机器人自己的消息,仅记录日志,不加入未处理列表 - logger.debug(f"[私聊][{self.private_name}] 观察到机器人自身消息 (ID: {message_id}),仅更新时间戳,不处理。") + # 将原始消息字典添加到未处理列表 + self.unprocessed_messages.append(message) + self.new_messages_count = len(self.unprocessed_messages) # 直接用列表长度 + # logger.debug(f"[私聊][{self.private_name}]消息更新: last_time={self.last_message_time}, new_count={self.new_messages_count}") + self.update_changed() # 标记状态已改变 else: # 如果消息时间戳不是最新的,可能不需要处理,或者记录一个警告 pass From 5a7c54aceffaee1b7aef41970581e3e6f975d57a Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 19:13:40 +0800 Subject: [PATCH 06/10] ruff --- src/plugins/PFC/action_planner.py | 8 ++++---- src/plugins/PFC/reply_generator.py | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index ba16e8b0..40333fc4 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -188,7 +188,7 @@ class ActionPlanner: final_results_content = [] for result in results: content = result.get("content", "").strip() - similarity = result.get("similarity", 0.0) + # similarity = result.get("similarity", 0.0) if content and content not in unique_contents: unique_contents.add(content) # 可以选择性地加入相似度信息,或者只加内容 @@ -213,8 +213,8 @@ class ActionPlanner: start_time = time.time() message = message.strip() if not message: - logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") - return "" + logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") + return "" logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") @@ -712,7 +712,7 @@ def get_info_from_db( # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 except Exception as e: - # logger.error(f"执行旧知识库聚合查询时出错: {e}") # 暂时注释掉 + logger.debug(f"执行旧知识库聚合查询时出错: {e}") results = [] if not results: diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index a9ed61fd..3599e6e1 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -179,7 +179,7 @@ class ReplyGenerator: final_results_content = [] for result in results: content = result.get("content", "").strip() - similarity = result.get("similarity", 0.0) + # similarity = result.get("similarity", 0.0) if content and content not in unique_contents: unique_contents.add(content) # 可以选择性地加入相似度信息,或者只加内容 @@ -204,8 +204,8 @@ class ReplyGenerator: start_time = time.time() message = message.strip() if not message: - logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") - return "" + logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") + return "" logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") @@ -340,17 +340,17 @@ class ReplyGenerator: logger.debug(f"[私聊][{self.private_name}]开始自动检索记忆...") retrieved_memory_str = await self._get_memory_info(text=retrieval_context) if retrieved_memory_str: - logger.info(f"[私聊][{self.private_name}]自动检索到记忆片段。") + logger.info(f"[私聊][{self.private_name}]自动检索到记忆片段。") else: - logger.info(f"[私聊][{self.private_name}]未自动检索到相关记忆。") + logger.info(f"[私聊][{self.private_name}]未自动检索到相关记忆。") # 提取知识 logger.debug(f"[私聊][{self.private_name}]开始自动检索知识...") retrieved_knowledge_str = await self._get_prompt_info(message=retrieval_context) if retrieved_knowledge_str: - logger.info(f"[私聊][{self.private_name}]自动检索到相关知识。") + logger.info(f"[私聊][{self.private_name}]自动检索到相关知识。") else: - logger.info(f"[私聊][{self.private_name}]未自动检索到相关知识。") + logger.info(f"[私聊][{self.private_name}]未自动检索到相关知识。") except Exception as retrieval_err: logger.error(f"[私聊][{self.private_name}]在自动检索记忆/知识时发生错误: {retrieval_err}") @@ -470,7 +470,7 @@ def get_info_from_db( # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 except Exception as e: - # logger.error(f"执行旧知识库聚合查询时出错: {e}") # 暂时注释掉 + logger.debug(f"执行旧知识库聚合查询时出错: {e}") results = [] if not results: From 3304dc6b290ad8d45be8bbc4113a2f7594a94685 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 2 May 2025 11:13:59 +0000 Subject: [PATCH 07/10] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/action_planner.py | 115 +++++++++++++++++------------ src/plugins/PFC/conversation.py | 46 ++++++------ src/plugins/PFC/reply_generator.py | 102 +++++++++++++++---------- 3 files changed, 154 insertions(+), 109 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 40333fc4..8a45bf13 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -4,6 +4,7 @@ from src.plugins.memory_system.Hippocampus import HippocampusManager from src.plugins.knowledge.knowledge_lib import qa_manager from src.common.database import db from src.plugins.chat.utils import get_embedding + # import jieba # 如果需要旧版知识库的回退,可能需要 # import re # 如果需要旧版知识库的回退,可能需要 from src.common.logger_manager import get_logger @@ -124,7 +125,7 @@ class ActionPlanner: self.name = global_config.BOT_NICKNAME self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) - + async def _get_memory_info(self, text: str) -> str: """根据文本自动检索相关记忆""" memory_prompt = "" @@ -132,18 +133,20 @@ class ActionPlanner: try: related_memory = await HippocampusManager.get_instance().get_memory_from_text( text=text, - max_memory_num=2, # 最多获取 2 条记忆 - max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) - max_depth=3, # 搜索深度 - fast_retrieval=False # 是否快速检索 + max_memory_num=2, # 最多获取 2 条记忆 + max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) + max_depth=3, # 搜索深度 + fast_retrieval=False, # 是否快速检索 ) if related_memory: for memory in related_memory: # memory[0] 是记忆ID, memory[1] 是记忆内容 - related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 + related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 if related_memory_info: memory_prompt = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,供参考)\n" - logger.debug(f"[私聊]决策层[{self.private_name}]自动检索到记忆: {related_memory_info.strip()[:100]}...") + logger.debug( + f"[私聊]决策层[{self.private_name}]自动检索到记忆: {related_memory_info.strip()[:100]}..." + ) else: logger.debug(f"[私聊]决策层[{self.private_name}]自动检索记忆返回为空。") else: @@ -179,9 +182,11 @@ class ActionPlanner: return "" # 调用我们之前添加的 get_info_from_db 函数 - results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 + results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 - logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") + logger.info( + f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果" + ) # 去重和格式化 unique_contents = set() @@ -231,7 +236,9 @@ class ActionPlanner: else: logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") except Exception as e: - logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") + logger.error( + f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。" + ) # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 try: @@ -246,11 +253,15 @@ class ActionPlanner: logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") except Exception as e2: - logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") + logger.error( + f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}" + ) # 如果两种方法都失败或无结果 - logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") - return "" # 返回空字符串 + logger.info( + f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。" + ) + return "" # 返回空字符串 # 修改 plan 方法签名,增加 last_successful_reply_action 参数 async def plan( @@ -362,36 +373,36 @@ class ActionPlanner: # --- 知识信息字符串构建开始 --- # knowledge_info_str = "【已获取的相关知识和记忆】\n" # try: - # 检查 conversation_info 是否有 knowledge_list 并且不为空 - # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: - # 最多只显示最近的 5 条知识,防止 Prompt 过长 - # recent_knowledge = conversation_info.knowledge_list[-5:] - # for i, knowledge_item in enumerate(recent_knowledge): - # if isinstance(knowledge_item, dict): - # query = knowledge_item.get("query", "未知查询") - # knowledge = knowledge_item.get("knowledge", "无知识内容") - # source = knowledge_item.get("source", "未知来源") - # 只取知识内容的前 2000 个字,避免太长 - # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - # knowledge_info_str += ( - # f"{i + 1}. 关于 '{query}' 的知识 (来源: {source}):\n {knowledge_snippet}\n" - # ) - # else: - # 处理列表里不是字典的异常情况 - # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" + # 检查 conversation_info 是否有 knowledge_list 并且不为空 + # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: + # 最多只显示最近的 5 条知识,防止 Prompt 过长 + # recent_knowledge = conversation_info.knowledge_list[-5:] + # for i, knowledge_item in enumerate(recent_knowledge): + # if isinstance(knowledge_item, dict): + # query = knowledge_item.get("query", "未知查询") + # knowledge = knowledge_item.get("knowledge", "无知识内容") + # source = knowledge_item.get("source", "未知来源") + # 只取知识内容的前 2000 个字,避免太长 + # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge + # knowledge_info_str += ( + # f"{i + 1}. 关于 '{query}' 的知识 (来源: {source}):\n {knowledge_snippet}\n" + # ) + # else: + # 处理列表里不是字典的异常情况 + # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - # if not recent_knowledge: # 如果 knowledge_list 存在但为空 - # knowledge_info_str += "- 暂无相关知识和记忆。\n" + # if not recent_knowledge: # 如果 knowledge_list 存在但为空 + # knowledge_info_str += "- 暂无相关知识和记忆。\n" - # else: - # 如果 conversation_info 没有 knowledge_list 属性,或者列表为空 - # knowledge_info_str += "- 暂无相关知识记忆。\n" + # else: + # 如果 conversation_info 没有 knowledge_list 属性,或者列表为空 + # knowledge_info_str += "- 暂无相关知识记忆。\n" # except AttributeError: - # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - # knowledge_info_str += "- 获取知识列表时出错。\n" + # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") + # knowledge_info_str += "- 获取知识列表时出错。\n" # except Exception as e: - # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - # knowledge_info_str += "- 处理知识列表时出错。\n" + # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") + # knowledge_info_str += "- 处理知识列表时出错。\n" # --- 知识信息字符串构建结束 --- # 获取聊天历史记录 (chat_history_text) @@ -503,16 +514,20 @@ class ActionPlanner: retrieved_memory_str_planner = "" retrieved_knowledge_str_planner = "" - retrieval_context = chat_history_text # 使用聊天记录作为检索上下文 + retrieval_context = chat_history_text # 使用聊天记录作为检索上下文 if retrieval_context and retrieval_context != "还没有聊天记录。" and retrieval_context != "[构建聊天记录出错]": try: logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动检索记忆...") retrieved_memory_str_planner = await self._get_memory_info(text=retrieval_context) - logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索记忆 {'完成' if retrieved_memory_str_planner else '无结果'}。") + logger.info( + f"[私聊][{self.private_name}] (ActionPlanner) 自动检索记忆 {'完成' if retrieved_memory_str_planner else '无结果'}。" + ) logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动知识检索...") retrieved_knowledge_str_planner = await self._get_prompt_info(message=retrieval_context) - logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。") + logger.info( + f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。" + ) except Exception as retrieval_err: logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") retrieved_memory_str_planner = "检索记忆时出错。\n" @@ -541,7 +556,9 @@ class ActionPlanner: chat_history_text=chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # knowledge_info_str=knowledge_info_str, retrieved_memory_str=retrieved_memory_str_planner if retrieved_memory_str_planner else "无相关记忆。", - retrieved_knowledge_str=retrieved_knowledge_str_planner if retrieved_knowledge_str_planner else "无相关知识。" + retrieved_knowledge_str=retrieved_knowledge_str_planner + if retrieved_knowledge_str_planner + else "无相关知识。", ) logger.debug(f"[私聊][{self.private_name}]发送到LLM的最终提示词:\n------\n{prompt}\n------") @@ -644,7 +661,8 @@ class ActionPlanner: # 外层异常处理保持不变 logger.error(f"[私聊][{self.private_name}]规划行动时调用 LLM 或处理结果出错: {str(e)}") return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" - + + def get_info_from_db( query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False ) -> Union[str, list]: @@ -696,7 +714,13 @@ def get_info_from_db( } }, # 防止除以零错误,添加一个小的 epsilon - {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, + { + "$addFields": { + "similarity": { + "$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}] + } + } + }, { "$match": { "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 @@ -723,4 +747,3 @@ def get_info_from_db( else: # 返回所有找到的内容,用换行分隔 return "\n".join(str(result["content"]) for result in results) - diff --git a/src/plugins/PFC/conversation.py b/src/plugins/PFC/conversation.py index 2ecd6824..b2541ebb 100644 --- a/src/plugins/PFC/conversation.py +++ b/src/plugins/PFC/conversation.py @@ -506,30 +506,30 @@ class Conversation: conversation_info.done_action.append(wait_action_record) # elif action == "fetch_knowledge": - # self.state = ConversationState.FETCHING - # knowledge_query = reason - # try: - # 检查 knowledge_fetcher 是否存在 - # if not hasattr(self, "knowledge_fetcher"): - # logger.error(f"[私聊][{self.private_name}]KnowledgeFetcher 未初始化,无法获取知识。") - # raise AttributeError("KnowledgeFetcher not initialized") + # self.state = ConversationState.FETCHING + # knowledge_query = reason + # try: + # 检查 knowledge_fetcher 是否存在 + # if not hasattr(self, "knowledge_fetcher"): + # logger.error(f"[私聊][{self.private_name}]KnowledgeFetcher 未初始化,无法获取知识。") + # raise AttributeError("KnowledgeFetcher not initialized") - # knowledge, source = await self.knowledge_fetcher.fetch(knowledge_query, observation_info.chat_history) - # logger.info(f"[私聊][{self.private_name}]获取到知识: {knowledge[:100]}..., 来源: {source}") - # if knowledge: - # 确保 knowledge_list 存在 - # if not hasattr(conversation_info, "knowledge_list"): - # conversation_info.knowledge_list = [] - # conversation_info.knowledge_list.append( - # {"query": knowledge_query, "knowledge": knowledge, "source": source} - # ) - # action_successful = True - # except Exception as fetch_err: - # logger.error(f"[私聊][{self.private_name}]获取知识时出错: {str(fetch_err)}") - # conversation_info.done_action[action_index].update( - # {"status": "recall", "final_reason": f"获取知识失败: {str(fetch_err)}"} - # ) - # self.conversation_info.last_successful_reply_action = None # 重置状态 + # knowledge, source = await self.knowledge_fetcher.fetch(knowledge_query, observation_info.chat_history) + # logger.info(f"[私聊][{self.private_name}]获取到知识: {knowledge[:100]}..., 来源: {source}") + # if knowledge: + # 确保 knowledge_list 存在 + # if not hasattr(conversation_info, "knowledge_list"): + # conversation_info.knowledge_list = [] + # conversation_info.knowledge_list.append( + # {"query": knowledge_query, "knowledge": knowledge, "source": source} + # ) + # action_successful = True + # except Exception as fetch_err: + # logger.error(f"[私聊][{self.private_name}]获取知识时出错: {str(fetch_err)}") + # conversation_info.done_action[action_index].update( + # {"status": "recall", "final_reason": f"获取知识失败: {str(fetch_err)}"} + # ) + # self.conversation_info.last_successful_reply_action = None # 重置状态 elif action == "rethink_goal": self.state = ConversationState.RETHINKING diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 3599e6e1..2c01cc0e 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,15 +1,19 @@ # 用于访问记忆系统 from src.plugins.memory_system.Hippocampus import HippocampusManager + # 用于访问新的知识库 (LPMM) from src.plugins.knowledge.knowledge_lib import qa_manager + # 用于访问数据库 (旧知识库需要) from src.common.database import db + # 用于获取文本的嵌入向量 (旧知识库需要) from src.plugins.chat.utils import get_embedding + # 可能用于旧知识库提取主题 (如果需要回退到旧方法) # import jieba # 如果报错说找不到 jieba,可能需要安装: pip install jieba # import re # 正则表达式库,通常 Python 自带 -from typing import Tuple, List, Dict, Any,Union +from typing import Tuple, List, Dict, Any, Union from src.common.logger import get_module_logger from ..models.utils_model import LLMRequest from ...config.config import global_config @@ -116,6 +120,7 @@ class ReplyGenerator: self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) self.reply_checker = ReplyChecker(stream_id, private_name) + async def _get_memory_info(self, text: str) -> str: """根据文本自动检索相关记忆""" memory_prompt = "" @@ -123,15 +128,15 @@ class ReplyGenerator: try: related_memory = await HippocampusManager.get_instance().get_memory_from_text( text=text, - max_memory_num=2, # 最多获取 2 条记忆 - max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) - max_depth=3, # 搜索深度 - fast_retrieval=False # 是否快速检索 + max_memory_num=2, # 最多获取 2 条记忆 + max_memory_length=2, # 每条记忆长度限制(这个参数含义可能需确认) + max_depth=3, # 搜索深度 + fast_retrieval=False, # 是否快速检索 ) if related_memory: for memory in related_memory: # memory[0] 是记忆ID, memory[1] 是记忆内容 - related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 + related_memory_info += memory[1] + "\n" # 将记忆内容拼接起来 if related_memory_info: memory_prompt = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,不一定是目前聊天里的人说的,回忆中别人说的事情也不一定是准确的,请记住)\n" logger.debug(f"[私聊][{self.private_name}]自动检索到记忆: {related_memory_info.strip()[:100]}...") @@ -170,9 +175,11 @@ class ReplyGenerator: return "" # 调用我们之前添加的 get_info_from_db 函数 - results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 + results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 - logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") + logger.info( + f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果" + ) # 去重和格式化 unique_contents = set() @@ -222,7 +229,9 @@ class ReplyGenerator: else: logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") except Exception as e: - logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") + logger.error( + f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。" + ) # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 try: @@ -237,11 +246,15 @@ class ReplyGenerator: logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") except Exception as e2: - logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") + logger.error( + f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}" + ) # 如果两种方法都失败或无结果 - logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") - return "" # 返回空字符串 - + logger.info( + f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。" + ) + return "" # 返回空字符串 + # 修改 generate 方法签名,增加 action_type 参数 async def generate( self, observation_info: ObservationInfo, conversation_info: ConversationInfo, action_type: str @@ -284,34 +297,34 @@ class ReplyGenerator: # --- 新增:构建知识信息字符串 --- # knowledge_info_str = "【供参考的相关知识和记忆】\n" # 稍微改下标题,表明是供参考 # try: - # 检查 conversation_info 是否有 knowledge_list 并且不为空 - # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: - # 最多只显示最近的 5 条知识 - # recent_knowledge = conversation_info.knowledge_list[-5:] - # for i, knowledge_item in enumerate(recent_knowledge): - # if isinstance(knowledge_item, dict): - # query = knowledge_item.get("query", "未知查询") - # knowledge = knowledge_item.get("knowledge", "无知识内容") - # source = knowledge_item.get("source", "未知来源") - # 只取知识内容的前 2000 个字 - # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - # knowledge_info_str += ( - # f"{i + 1}. 关于 '{query}' (来源: {source}): {knowledge_snippet}\n" # 格式微调,更简洁 - # ) - # else: - # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" + # 检查 conversation_info 是否有 knowledge_list 并且不为空 + # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: + # 最多只显示最近的 5 条知识 + # recent_knowledge = conversation_info.knowledge_list[-5:] + # for i, knowledge_item in enumerate(recent_knowledge): + # if isinstance(knowledge_item, dict): + # query = knowledge_item.get("query", "未知查询") + # knowledge = knowledge_item.get("knowledge", "无知识内容") + # source = knowledge_item.get("source", "未知来源") + # 只取知识内容的前 2000 个字 + # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge + # knowledge_info_str += ( + # f"{i + 1}. 关于 '{query}' (来源: {source}): {knowledge_snippet}\n" # 格式微调,更简洁 + # ) + # else: + # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - # if not recent_knowledge: - # knowledge_info_str += "- 暂无。\n" # 更简洁的提示 + # if not recent_knowledge: + # knowledge_info_str += "- 暂无。\n" # 更简洁的提示 - # else: - # knowledge_info_str += "- 暂无。\n" + # else: + # knowledge_info_str += "- 暂无。\n" # except AttributeError: - # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - # knowledge_info_str += "- 获取知识列表时出错。\n" + # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") + # knowledge_info_str += "- 获取知识列表时出错。\n" # except Exception as e: - # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - # knowledge_info_str += "- 处理知识列表时出错。\n" + # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") + # knowledge_info_str += "- 处理知识列表时出错。\n" # 获取聊天历史记录 (chat_history_text) chat_history_text = observation_info.chat_history_str @@ -378,8 +391,10 @@ class ReplyGenerator: goals_str=goals_str, chat_history_text=chat_history_text, # knowledge_info_str=knowledge_info_str, - retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 - retrieved_knowledge_str=retrieved_knowledge_str if retrieved_knowledge_str else "无相关知识。" # 如果为空则提示无 + retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 + retrieved_knowledge_str=retrieved_knowledge_str + if retrieved_knowledge_str + else "无相关知识。", # 如果为空则提示无 ) # --- 调用 LLM 生成 --- @@ -403,6 +418,7 @@ class ReplyGenerator: """ return await self.reply_checker.check(reply, goal, chat_history, chat_history_str, retry_count) + def get_info_from_db( query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False ) -> Union[str, list]: @@ -454,7 +470,13 @@ def get_info_from_db( } }, # 防止除以零错误,添加一个小的 epsilon - {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, + { + "$addFields": { + "similarity": { + "$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}] + } + } + }, { "$match": { "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 From 00851c3d8f8b373e97c2f26d4b9f3d34baecf72e Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 2 May 2025 23:19:47 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=E7=AE=80=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/action_planner.py | 242 +++-------------------------- src/plugins/PFC/reply_generator.py | 232 +++------------------------ 2 files changed, 41 insertions(+), 433 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 40333fc4..d9bb672d 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -4,6 +4,10 @@ from src.plugins.memory_system.Hippocampus import HippocampusManager from src.plugins.knowledge.knowledge_lib import qa_manager from src.common.database import db from src.plugins.chat.utils import get_embedding +# --- NEW IMPORT --- +# 从 heartflow 导入知识检索和数据库查询函数/实例 +from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder, get_info_from_db +# --- END NEW IMPORT --- # import jieba # 如果需要旧版知识库的回退,可能需要 # import re # 如果需要旧版知识库的回退,可能需要 from src.common.logger_manager import get_logger @@ -34,7 +38,7 @@ PROMPT_INITIAL_REPLY = """{persona_text}。现在你在参与一场QQ私聊, 【上一次行动的详细情况和结果】 {last_action_context} 【时间和超时提示】 -{time_since_last_bot_message_info}{timeout_context} +{time_since_last_bot_message_info}{timeout_context} 【最近的对话记录】(包括你已成功发送的消息 和 新收到的消息) {chat_history_text} 【你的的回忆】 @@ -57,7 +61,7 @@ block_and_ignore: 更加极端的结束对话方式,直接结束对话并在 注意:请严格按照JSON格式输出,不要包含任何其他内容。""" # Prompt(2): 上一次成功回复后,决定继续发言时的决策 Prompt -PROMPT_FOLLOW_UP = """{persona_text}。现在你在参与一场QQ私聊,刚刚你已经回复了对方,请根据以下【所有信息】审慎且灵活的决策下一步行动,可以继续发送新消息,可以等待,可以倾听,可以调取知识,甚至可以屏蔽对方: +PROMPT_FOLLOW_UP = """{persona_text}。现在你在参与一场QQ私聊,刚刚你已经回复了对方,请根据以下【所有信息】审慎且灵活的决策下一步行动,可以继续发送新消息,可以等待,可以倾听,可以调取知识,甚至可以屏蔽对方: 【当前对话目标】 {goals_str} @@ -68,7 +72,7 @@ PROMPT_FOLLOW_UP = """{persona_text}。现在你在参与一场QQ私聊,刚刚 【上一次行动的详细情况和结果】 {last_action_context} 【时间和超时提示】 -{time_since_last_bot_message_info}{timeout_context} +{time_since_last_bot_message_info}{timeout_context} 【最近的对话记录】(包括你已成功发送的消息 和 新收到的消息) {chat_history_text} 【你的的回忆】 @@ -124,7 +128,8 @@ class ActionPlanner: self.name = global_config.BOT_NICKNAME self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) - + + # _get_memory_info 保持不变 async def _get_memory_info(self, text: str) -> str: """根据文本自动检索相关记忆""" memory_prompt = "" @@ -153,104 +158,9 @@ class ActionPlanner: # memory_prompt = "检索记忆时出错。\n" # 可以选择是否提示错误 return memory_prompt - async def _get_prompt_info_old(self, message: str, threshold: float) -> str: - """ - 旧版的知识检索方法,根据消息文本从旧知识库(knowledges collection)检索。 - (移植并自 heartflow_prompt_builder.py) - """ - related_info = "" - start_time = time.time() - logger.debug(f"[私聊]决策层[{self.private_name}]开始使用旧版知识检索,消息: {message[:30]}...") + # --- REMOVED _get_prompt_info_old --- - # 简化处理:直接使用整个消息进行查询,不再提取主题 - query_text = message.strip() - if not query_text: - logger.debug(f"[私聊]决策层[{self.private_name}]旧版知识检索:消息为空,跳过。") - return "" - - embedding = None - try: - embedding = await get_embedding(query_text, request_type="pfc_implicit_knowledge") - except Exception as e: - logger.error(f"[私聊]决策层[{self.private_name}]旧版知识检索:获取嵌入向量时出错: {str(e)}") - - if not embedding: - logger.error(f"[私聊]决策层[{self.private_name}]旧版知识检索:获取嵌入向量失败。") - return "" - - # 调用我们之前添加的 get_info_from_db 函数 - results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 - - logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") - - # 去重和格式化 - unique_contents = set() - final_results_content = [] - for result in results: - content = result.get("content", "").strip() - # similarity = result.get("similarity", 0.0) - if content and content not in unique_contents: - unique_contents.add(content) - # 可以选择性地加入相似度信息,或者只加内容 - # final_results_content.append(f"[{similarity:.2f}] {content}") - final_results_content.append(content) - - if final_results_content: - related_info = "\n".join(final_results_content) - logger.debug(f"[私聊][{self.private_name}]旧版知识检索格式化后内容: {related_info[:100]}...") - else: - logger.debug(f"[私聊][{self.private_name}]旧版知识检索未找到合适结果或结果为空。") - - logger.info(f"[私聊][{self.private_name}]旧版知识检索总耗时: {time.time() - start_time:.3f}秒") - return related_info - - async def _get_prompt_info(self, message: str, threshold: float = 0.38) -> str: - """ - 自动检索相关知识的主函数。优先使用 LPMM,失败则回退到旧版。 - (移植自 heartflow_prompt_builder.py) - """ - related_info = "" - start_time = time.time() - message = message.strip() - if not message: - logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") - return "" - - logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") - - # 1. 尝试从 LPMM 知识库获取知识 - try: - found_knowledge_from_lpmm = qa_manager.get_knowledge(message) - if found_knowledge_from_lpmm and found_knowledge_from_lpmm.strip(): - related_info = found_knowledge_from_lpmm.strip() - logger.info(f"[私聊][{self.private_name}]从 LPMM 知识库获取到知识,长度: {len(related_info)}") - logger.debug(f"[私聊][{self.private_name}]LPMM 知识内容: {related_info[:100]}...") - # LPMM 成功获取,直接返回 - logger.info(f"[私聊][{self.private_name}]自动知识检索(LPMM)耗时: {time.time() - start_time:.3f}秒") - return related_info - else: - logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") - except Exception as e: - logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") - - # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 - try: - knowledge_from_old = await self._get_prompt_info_old(message, threshold=threshold) - if knowledge_from_old and knowledge_from_old.strip(): - related_info = knowledge_from_old.strip() - logger.info(f"[私聊][{self.private_name}]从旧版数据库检索到知识,长度: {len(related_info)}") - # 旧版成功获取,返回 - logger.info(f"[私聊][{self.private_name}]自动知识检索(旧版)耗时: {time.time() - start_time:.3f}秒") - return related_info - else: - logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") - - except Exception as e2: - logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") - - # 如果两种方法都失败或无结果 - logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") - return "" # 返回空字符串 + # --- REMOVED _get_prompt_info --- # 修改 plan 方法签名,增加 last_successful_reply_action 参数 async def plan( @@ -359,40 +269,6 @@ class ActionPlanner: logger.error(f"[私聊][{self.private_name}]构建对话目标字符串时出错: {e}") goals_str = "- 构建对话目标时出错。\n" - # --- 知识信息字符串构建开始 --- - # knowledge_info_str = "【已获取的相关知识和记忆】\n" - # try: - # 检查 conversation_info 是否有 knowledge_list 并且不为空 - # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: - # 最多只显示最近的 5 条知识,防止 Prompt 过长 - # recent_knowledge = conversation_info.knowledge_list[-5:] - # for i, knowledge_item in enumerate(recent_knowledge): - # if isinstance(knowledge_item, dict): - # query = knowledge_item.get("query", "未知查询") - # knowledge = knowledge_item.get("knowledge", "无知识内容") - # source = knowledge_item.get("source", "未知来源") - # 只取知识内容的前 2000 个字,避免太长 - # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - # knowledge_info_str += ( - # f"{i + 1}. 关于 '{query}' 的知识 (来源: {source}):\n {knowledge_snippet}\n" - # ) - # else: - # 处理列表里不是字典的异常情况 - # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - - # if not recent_knowledge: # 如果 knowledge_list 存在但为空 - # knowledge_info_str += "- 暂无相关知识和记忆。\n" - - # else: - # 如果 conversation_info 没有 knowledge_list 属性,或者列表为空 - # knowledge_info_str += "- 暂无相关知识记忆。\n" - # except AttributeError: - # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - # knowledge_info_str += "- 获取知识列表时出错。\n" - # except Exception as e: - # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - # knowledge_info_str += "- 处理知识列表时出错。\n" - # --- 知识信息字符串构建结束 --- # 获取聊天历史记录 (chat_history_text) try: @@ -501,18 +377,24 @@ class ActionPlanner: last_action_context += f"- 该行动当前状态: {status}\n" # self.last_successful_action_type = None # 非完成状态,清除记录 - retrieved_memory_str_planner = "" + retrieved_memory_str_planner = "" retrieved_knowledge_str_planner = "" retrieval_context = chat_history_text # 使用聊天记录作为检索上下文 if retrieval_context and retrieval_context != "还没有聊天记录。" and retrieval_context != "[构建聊天记录出错]": try: + # 调用本地的 _get_memory_info logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动检索记忆...") retrieved_memory_str_planner = await self._get_memory_info(text=retrieval_context) logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索记忆 {'完成' if retrieved_memory_str_planner else '无结果'}。") - logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动知识检索...") - retrieved_knowledge_str_planner = await self._get_prompt_info(message=retrieval_context) + # --- MODIFIED KNOWLEDGE RETRIEVAL --- + # 调用导入的 prompt_builder.get_prompt_info + logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动检索知识 (使用导入函数)...") + # 使用导入的 prompt_builder 实例及其方法 + retrieved_knowledge_str_planner = await prompt_builder.get_prompt_info(message=retrieval_context, threshold=0.38) + # --- END MODIFIED KNOWLEDGE RETRIEVAL --- logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。") + except Exception as retrieval_err: logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") retrieved_memory_str_planner = "检索记忆时出错。\n" @@ -539,7 +421,7 @@ class ActionPlanner: time_since_last_bot_message_info=time_since_last_bot_message_info, timeout_context=timeout_context, chat_history_text=chat_history_text if chat_history_text.strip() else "还没有聊天记录。", - # knowledge_info_str=knowledge_info_str, + # knowledge_info_str=knowledge_info_str, # 移除了旧知识展示方式 retrieved_memory_str=retrieved_memory_str_planner if retrieved_memory_str_planner else "无相关记忆。", retrieved_knowledge_str=retrieved_knowledge_str_planner if retrieved_knowledge_str_planner else "无相关知识。" ) @@ -643,84 +525,4 @@ class ActionPlanner: except Exception as e: # 外层异常处理保持不变 logger.error(f"[私聊][{self.private_name}]规划行动时调用 LLM 或处理结果出错: {str(e)}") - return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" - -def get_info_from_db( - query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False -) -> Union[str, list]: - """ - 从旧知识库 (knowledges collection) 中根据嵌入向量相似度检索信息。 - (移植自 heartflow_prompt_builder.py) - """ - if not query_embedding: - return "" if not return_raw else [] - # 使用余弦相似度计算 - pipeline = [ - { - "$addFields": { - "dotProduct": { - "$reduce": { - "input": {"$range": [0, {"$size": "$embedding"}]}, - "initialValue": 0, - "in": { - "$add": [ - "$$value", - { - "$multiply": [ - {"$arrayElemAt": ["$embedding", "$$this"]}, - {"$arrayElemAt": [query_embedding, "$$this"]}, - ] - }, - ] - }, - } - }, - "magnitude1": { - "$sqrt": { - "$reduce": { - "input": "$embedding", - "initialValue": 0, - "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, - } - } - }, - "magnitude2": { - "$sqrt": { - "$reduce": { - "input": query_embedding, - "initialValue": 0, - "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, - } - } - }, - } - }, - # 防止除以零错误,添加一个小的 epsilon - {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, - { - "$match": { - "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 - } - }, - {"$sort": {"similarity": -1}}, - {"$limit": limit}, - {"$project": {"content": 1, "similarity": 1}}, - ] - - try: - results = list(db.knowledges.aggregate(pipeline)) - # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 - # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 - except Exception as e: - logger.debug(f"执行旧知识库聚合查询时出错: {e}") - results = [] - - if not results: - return "" if not return_raw else [] - - if return_raw: - return results - else: - # 返回所有找到的内容,用换行分隔 - return "\n".join(str(result["content"]) for result in results) - + return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" \ No newline at end of file diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 3599e6e1..cf0af43f 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -6,6 +6,10 @@ from src.plugins.knowledge.knowledge_lib import qa_manager from src.common.database import db # 用于获取文本的嵌入向量 (旧知识库需要) from src.plugins.chat.utils import get_embedding +# --- NEW IMPORT --- +# 从 heartflow 导入知识检索和数据库查询函数/实例 +from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder, get_info_from_db +# --- END NEW IMPORT --- # 可能用于旧知识库提取主题 (如果需要回退到旧方法) # import jieba # 如果报错说找不到 jieba,可能需要安装: pip install jieba # import re # 正则表达式库,通常 Python 自带 @@ -55,7 +59,7 @@ PROMPT_DIRECT_REPLY = """{persona_text}。现在你在参与一场QQ私聊,请 请直接输出回复内容,不需要任何额外格式。""" # Prompt for send_new_message (追问/补充) -PROMPT_SEND_NEW_MESSAGE = """{persona_text}。现在你在参与一场QQ私聊,**刚刚你已经发送了一条或多条消息**,现在请根据以下信息再发一条新消息: +PROMPT_SEND_NEW_MESSAGE = """{persona_text}。现在你在参与一场QQ私聊,**刚刚你已经发送了一条或多条消息**,现在请根据以下信息再发一条新消息: 当前对话目标:{goals_str} @@ -68,7 +72,7 @@ PROMPT_SEND_NEW_MESSAGE = """{persona_text}。现在你在参与一场QQ私聊 {retrieved_memory_str} -请根据上述信息,结合聊天记录,继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。该消息应该: +请根据上述信息,结合聊天记录,继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。该消息应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) 2. 符合你的性格特征和身份细节 3. 通俗易懂,自然流畅,像正常聊天一样,简短(通常20字以内,除非特殊情况) @@ -116,6 +120,8 @@ class ReplyGenerator: self.private_name = private_name self.chat_observer = ChatObserver.get_instance(stream_id, private_name) self.reply_checker = ReplyChecker(stream_id, private_name) + + # _get_memory_info 保持不变,因为它不是与 heartflow 重复的部分 async def _get_memory_info(self, text: str) -> str: """根据文本自动检索相关记忆""" memory_prompt = "" @@ -144,104 +150,10 @@ class ReplyGenerator: # memory_prompt = "检索记忆时出错。\n" # 可以选择是否提示错误 return memory_prompt - async def _get_prompt_info_old(self, message: str, threshold: float) -> str: - """ - 旧版的知识检索方法,根据消息文本从旧知识库(knowledges collection)检索。 - (移植并简化自 heartflow_prompt_builder.py) - """ - related_info = "" - start_time = time.time() - logger.debug(f"[私聊][{self.private_name}]开始使用旧版知识检索,消息: {message[:30]}...") + # --- REMOVED _get_prompt_info_old --- - # 简化处理:直接使用整个消息进行查询,不再提取主题 - query_text = message.strip() - if not query_text: - logger.debug(f"[私聊][{self.private_name}]旧版知识检索:消息为空,跳过。") - return "" + # --- REMOVED _get_prompt_info --- - embedding = None - try: - embedding = await get_embedding(query_text, request_type="pfc_implicit_knowledge") - except Exception as e: - logger.error(f"[私聊][{self.private_name}]旧版知识检索:获取嵌入向量时出错: {str(e)}") - - if not embedding: - logger.error(f"[私聊][{self.private_name}]旧版知识检索:获取嵌入向量失败。") - return "" - - # 调用我们之前添加的 get_info_from_db 函数 - results = get_info_from_db(embedding, limit=5, threshold=threshold, return_raw=True) # 最多查 5 条 - - logger.info(f"[私聊][{self.private_name}]旧版知识库查询完成,耗时: {time.time() - start_time:.3f}秒,获取{len(results)}条结果") - - # 去重和格式化 - unique_contents = set() - final_results_content = [] - for result in results: - content = result.get("content", "").strip() - # similarity = result.get("similarity", 0.0) - if content and content not in unique_contents: - unique_contents.add(content) - # 可以选择性地加入相似度信息,或者只加内容 - # final_results_content.append(f"[{similarity:.2f}] {content}") - final_results_content.append(content) - - if final_results_content: - related_info = "\n".join(final_results_content) - logger.debug(f"[私聊][{self.private_name}]旧版知识检索格式化后内容: {related_info[:100]}...") - else: - logger.debug(f"[私聊][{self.private_name}]旧版知识检索未找到合适结果或结果为空。") - - logger.info(f"[私聊][{self.private_name}]旧版知识检索总耗时: {time.time() - start_time:.3f}秒") - return related_info - - async def _get_prompt_info(self, message: str, threshold: float = 0.38) -> str: - """ - 自动检索相关知识的主函数。优先使用 LPMM,失败则回退到旧版。 - (移植自 heartflow_prompt_builder.py) - """ - related_info = "" - start_time = time.time() - message = message.strip() - if not message: - logger.debug(f"[私聊][{self.private_name}]自动知识检索:输入消息为空。") - return "" - - logger.debug(f"[私聊][{self.private_name}]开始自动知识检索,消息: {message[:30]}...") - - # 1. 尝试从 LPMM 知识库获取知识 - try: - found_knowledge_from_lpmm = qa_manager.get_knowledge(message) - if found_knowledge_from_lpmm and found_knowledge_from_lpmm.strip(): - related_info = found_knowledge_from_lpmm.strip() - logger.info(f"[私聊][{self.private_name}]从 LPMM 知识库获取到知识,长度: {len(related_info)}") - logger.debug(f"[私聊][{self.private_name}]LPMM 知识内容: {related_info[:100]}...") - # LPMM 成功获取,直接返回 - logger.info(f"[私聊][{self.private_name}]自动知识检索(LPMM)耗时: {time.time() - start_time:.3f}秒") - return related_info - else: - logger.debug(f"[私聊][{self.private_name}]LPMM 知识库未返回有效知识,尝试旧版数据库检索。") - except Exception as e: - logger.error(f"[私聊][{self.private_name}]调用 LPMM 知识库 (qa_manager.get_knowledge) 时发生异常: {str(e)},尝试旧版数据库检索。") - - # 2. 如果 LPMM 失败或无结果,尝试旧版数据库 - try: - knowledge_from_old = await self._get_prompt_info_old(message, threshold=threshold) - if knowledge_from_old and knowledge_from_old.strip(): - related_info = knowledge_from_old.strip() - logger.info(f"[私聊][{self.private_name}]从旧版数据库检索到知识,长度: {len(related_info)}") - # 旧版成功获取,返回 - logger.info(f"[私聊][{self.private_name}]自动知识检索(旧版)耗时: {time.time() - start_time:.3f}秒") - return related_info - else: - logger.debug(f"[私聊][{self.private_name}]旧版数据库也未检索到有效知识。") - - except Exception as e2: - logger.error(f"[私聊][{self.private_name}]调用旧版知识库检索 (_get_prompt_info_old) 时也发生异常: {str(e2)}") - # 如果两种方法都失败或无结果 - logger.info(f"[私聊][{self.private_name}]自动知识检索总耗时: {time.time() - start_time:.3f}秒,未找到任何相关知识。") - return "" # 返回空字符串 - # 修改 generate 方法签名,增加 action_type 参数 async def generate( self, observation_info: ObservationInfo, conversation_info: ConversationInfo, action_type: str @@ -281,37 +193,6 @@ class ReplyGenerator: else: goals_str = "- 目前没有明确对话目标\n" # 简化无目标情况 - # --- 新增:构建知识信息字符串 --- - # knowledge_info_str = "【供参考的相关知识和记忆】\n" # 稍微改下标题,表明是供参考 - # try: - # 检查 conversation_info 是否有 knowledge_list 并且不为空 - # if hasattr(conversation_info, "knowledge_list") and conversation_info.knowledge_list: - # 最多只显示最近的 5 条知识 - # recent_knowledge = conversation_info.knowledge_list[-5:] - # for i, knowledge_item in enumerate(recent_knowledge): - # if isinstance(knowledge_item, dict): - # query = knowledge_item.get("query", "未知查询") - # knowledge = knowledge_item.get("knowledge", "无知识内容") - # source = knowledge_item.get("source", "未知来源") - # 只取知识内容的前 2000 个字 - # knowledge_snippet = knowledge[:2000] + "..." if len(knowledge) > 2000 else knowledge - # knowledge_info_str += ( - # f"{i + 1}. 关于 '{query}' (来源: {source}): {knowledge_snippet}\n" # 格式微调,更简洁 - # ) - # else: - # knowledge_info_str += f"{i + 1}. 发现一条格式不正确的知识记录。\n" - - # if not recent_knowledge: - # knowledge_info_str += "- 暂无。\n" # 更简洁的提示 - - # else: - # knowledge_info_str += "- 暂无。\n" - # except AttributeError: - # logger.warning(f"[私聊][{self.private_name}]ConversationInfo 对象可能缺少 knowledge_list 属性。") - # knowledge_info_str += "- 获取知识列表时出错。\n" - # except Exception as e: - # logger.error(f"[私聊][{self.private_name}]构建知识信息字符串时出错: {e}") - # knowledge_info_str += "- 处理知识列表时出错。\n" # 获取聊天历史记录 (chat_history_text) chat_history_text = observation_info.chat_history_str @@ -336,7 +217,7 @@ class ReplyGenerator: retrieval_context = chat_history_text if retrieval_context and retrieval_context != "还没有聊天记录。" and retrieval_context != "[构建聊天记录出错]": try: - # 提取记忆 + # 提取记忆 (调用本地的 _get_memory_info) logger.debug(f"[私聊][{self.private_name}]开始自动检索记忆...") retrieved_memory_str = await self._get_memory_info(text=retrieval_context) if retrieved_memory_str: @@ -344,9 +225,13 @@ class ReplyGenerator: else: logger.info(f"[私聊][{self.private_name}]未自动检索到相关记忆。") - # 提取知识 - logger.debug(f"[私聊][{self.private_name}]开始自动检索知识...") - retrieved_knowledge_str = await self._get_prompt_info(message=retrieval_context) + # --- MODIFIED KNOWLEDGE RETRIEVAL --- + # 提取知识 (调用导入的 prompt_builder.get_prompt_info) + logger.debug(f"[私聊][{self.private_name}]开始自动检索知识 (使用导入函数)...") + # 使用导入的 prompt_builder 实例及其方法 + retrieved_knowledge_str = await prompt_builder.get_prompt_info(message=retrieval_context, threshold=0.38) + # --- END MODIFIED KNOWLEDGE RETRIEVAL --- + if retrieved_knowledge_str: logger.info(f"[私聊][{self.private_name}]自动检索到相关知识。") else: @@ -377,7 +262,7 @@ class ReplyGenerator: persona_text=persona_text, goals_str=goals_str, chat_history_text=chat_history_text, - # knowledge_info_str=knowledge_info_str, + # knowledge_info_str=knowledge_info_str, # 移除了这个旧的知识展示方式 retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 retrieved_knowledge_str=retrieved_knowledge_str if retrieved_knowledge_str else "无相关知识。" # 如果为空则提示无 ) @@ -402,82 +287,3 @@ class ReplyGenerator: (此方法逻辑保持不变) """ return await self.reply_checker.check(reply, goal, chat_history, chat_history_str, retry_count) - -def get_info_from_db( - query_embedding: list, limit: int = 1, threshold: float = 0.5, return_raw: bool = False -) -> Union[str, list]: - """ - 从旧知识库 (knowledges collection) 中根据嵌入向量相似度检索信息。 - (移植自 heartflow_prompt_builder.py) - """ - if not query_embedding: - return "" if not return_raw else [] - # 使用余弦相似度计算 - pipeline = [ - { - "$addFields": { - "dotProduct": { - "$reduce": { - "input": {"$range": [0, {"$size": "$embedding"}]}, - "initialValue": 0, - "in": { - "$add": [ - "$$value", - { - "$multiply": [ - {"$arrayElemAt": ["$embedding", "$$this"]}, - {"$arrayElemAt": [query_embedding, "$$this"]}, - ] - }, - ] - }, - } - }, - "magnitude1": { - "$sqrt": { - "$reduce": { - "input": "$embedding", - "initialValue": 0, - "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, - } - } - }, - "magnitude2": { - "$sqrt": { - "$reduce": { - "input": query_embedding, - "initialValue": 0, - "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, - } - } - }, - } - }, - # 防止除以零错误,添加一个小的 epsilon - {"$addFields": {"similarity": {"$divide": ["$dotProduct", {"$max": [{"$multiply": ["$magnitude1", "$magnitude2"]}, 1e-9]}]}}}, - { - "$match": { - "similarity": {"$gte": threshold} # 只保留相似度大于等于阈值的结果 - } - }, - {"$sort": {"similarity": -1}}, - {"$limit": limit}, - {"$project": {"content": 1, "similarity": 1}}, - ] - - try: - results = list(db.knowledges.aggregate(pipeline)) - # 注意:这里的 logger 需要能访问到,或者在这个函数里获取 logger 实例 - # logger.debug(f"旧知识库查询结果数量: {len(results)}") # 暂时注释掉,避免 logger 未定义 - except Exception as e: - logger.debug(f"执行旧知识库聚合查询时出错: {e}") - results = [] - - if not results: - return "" if not return_raw else [] - - if return_raw: - return results - else: - # 返回所有找到的内容,用换行分隔 - return "\n".join(str(result["content"]) for result in results) From a034ea61033acd945861955e095242209960bba9 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 22:08:54 +0800 Subject: [PATCH 09/10] ruff --- src/plugins/PFC/action_planner.py | 5 +---- src/plugins/PFC/reply_generator.py | 11 +---------- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 8d70f261..582b98c4 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -1,9 +1,6 @@ import time -from typing import Tuple, Optional, Union # 增加了 Optional +from typing import Tuple, Optional from src.plugins.memory_system.Hippocampus import HippocampusManager -from src.plugins.knowledge.knowledge_lib import qa_manager -from src.common.database import db -from src.plugins.chat.utils import get_embedding # --- NEW IMPORT --- # 从 heartflow 导入知识检索和数据库查询函数/实例 from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index c2614b11..88987129 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,14 +1,6 @@ # 用于访问记忆系统 from src.plugins.memory_system.Hippocampus import HippocampusManager -# 用于访问新的知识库 (LPMM) -from src.plugins.knowledge.knowledge_lib import qa_manager - -# 用于访问数据库 (旧知识库需要) -from src.common.database import db - -# 用于获取文本的嵌入向量 (旧知识库需要) -from src.plugins.chat.utils import get_embedding # --- NEW IMPORT --- # 从 heartflow 导入知识检索和数据库查询函数/实例 from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder @@ -16,7 +8,7 @@ from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # 可能用于旧知识库提取主题 (如果需要回退到旧方法) # import jieba # 如果报错说找不到 jieba,可能需要安装: pip install jieba # import re # 正则表达式库,通常 Python 自带 -from typing import Tuple, List, Dict, Any, Union +from typing import Tuple, List, Dict, Any from src.common.logger import get_module_logger from ..models.utils_model import LLMRequest from ...config.config import global_config @@ -26,7 +18,6 @@ from src.individuality.individuality import Individuality from .observation_info import ObservationInfo from .conversation_info import ConversationInfo from src.plugins.utils.chat_message_builder import build_readable_messages -import time logger = get_module_logger("reply_generator") From dff8fef12954710f8437497fe182961aab67b632 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 3 May 2025 14:09:18 +0000 Subject: [PATCH 10/10] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/action_planner.py | 13 +++++++++---- src/plugins/PFC/reply_generator.py | 12 ++++++++---- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/plugins/PFC/action_planner.py b/src/plugins/PFC/action_planner.py index 582b98c4..51d9bff2 100644 --- a/src/plugins/PFC/action_planner.py +++ b/src/plugins/PFC/action_planner.py @@ -1,9 +1,11 @@ import time from typing import Tuple, Optional from src.plugins.memory_system.Hippocampus import HippocampusManager + # --- NEW IMPORT --- # 从 heartflow 导入知识检索和数据库查询函数/实例 from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder + # --- END NEW IMPORT --- # import jieba # 如果需要旧版知识库的回退,可能需要 # import re # 如果需要旧版知识库的回退,可能需要 @@ -268,7 +270,6 @@ class ActionPlanner: logger.error(f"[私聊][{self.private_name}]构建对话目标字符串时出错: {e}") goals_str = "- 构建对话目标时出错。\n" - # 获取聊天历史记录 (chat_history_text) try: if hasattr(observation_info, "chat_history") and observation_info.chat_history: @@ -392,9 +393,13 @@ class ActionPlanner: # 调用导入的 prompt_builder.get_prompt_info logger.debug(f"[私聊][{self.private_name}] (ActionPlanner) 开始自动检索知识 (使用导入函数)...") # 使用导入的 prompt_builder 实例及其方法 - retrieved_knowledge_str_planner = await prompt_builder.get_prompt_info(message=retrieval_context, threshold=0.38) + retrieved_knowledge_str_planner = await prompt_builder.get_prompt_info( + message=retrieval_context, threshold=0.38 + ) # --- END MODIFIED KNOWLEDGE RETRIEVAL --- - logger.info(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。") + logger.info( + f"[私聊][{self.private_name}] (ActionPlanner) 自动检索知识 {'完成' if retrieved_knowledge_str_planner else '无结果'}。" + ) except Exception as retrieval_err: logger.error(f"[私聊][{self.private_name}] (ActionPlanner) 自动检索时出错: {retrieval_err}") @@ -528,4 +533,4 @@ class ActionPlanner: except Exception as e: # 外层异常处理保持不变 logger.error(f"[私聊][{self.private_name}]规划行动时调用 LLM 或处理结果出错: {str(e)}") - return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" \ No newline at end of file + return "wait", f"行动规划处理中发生错误,暂时等待: {str(e)}" diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 88987129..b9d2c00e 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -4,6 +4,7 @@ from src.plugins.memory_system.Hippocampus import HippocampusManager # --- NEW IMPORT --- # 从 heartflow 导入知识检索和数据库查询函数/实例 from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder + # --- END NEW IMPORT --- # 可能用于旧知识库提取主题 (如果需要回退到旧方法) # import jieba # 如果报错说找不到 jieba,可能需要安装: pip install jieba @@ -187,7 +188,6 @@ class ReplyGenerator: else: goals_str = "- 目前没有明确对话目标\n" # 简化无目标情况 - # 获取聊天历史记录 (chat_history_text) chat_history_text = observation_info.chat_history_str if observation_info.new_messages_count > 0 and observation_info.unprocessed_messages: @@ -223,7 +223,9 @@ class ReplyGenerator: # 提取知识 (调用导入的 prompt_builder.get_prompt_info) logger.debug(f"[私聊][{self.private_name}]开始自动检索知识 (使用导入函数)...") # 使用导入的 prompt_builder 实例及其方法 - retrieved_knowledge_str = await prompt_builder.get_prompt_info(message=retrieval_context, threshold=0.38) + retrieved_knowledge_str = await prompt_builder.get_prompt_info( + message=retrieval_context, threshold=0.38 + ) # --- END MODIFIED KNOWLEDGE RETRIEVAL --- if retrieved_knowledge_str: @@ -257,8 +259,10 @@ class ReplyGenerator: goals_str=goals_str, chat_history_text=chat_history_text, # knowledge_info_str=knowledge_info_str, # 移除了这个旧的知识展示方式 - retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 - retrieved_knowledge_str=retrieved_knowledge_str if retrieved_knowledge_str else "无相关知识。" # 如果为空则提示无 + retrieved_memory_str=retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 如果为空则提示无 + retrieved_knowledge_str=retrieved_knowledge_str + if retrieved_knowledge_str + else "无相关知识。", # 如果为空则提示无 ) # --- 调用 LLM 生成 ---