From 37822fb34762953fb221bcc525c0784865c52cb3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 10 May 2025 06:33:19 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 89 ++++---- src/plugins/PFC/pfc_utils.py | 331 +++++++++++++++++++---------- src/plugins/PFC/reply_generator.py | 70 +++--- 3 files changed, 309 insertions(+), 181 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 03bdf641..f706bffa 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -1,12 +1,12 @@ import traceback import re from typing import Any -from datetime import datetime # 确保导入 datetime -from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv -from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py +from datetime import datetime # 确保导入 datetime +from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv +from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py from src.config.config import global_config from src.common.logger_manager import get_logger -from ..chat.chat_stream import ChatStream, chat_manager +from ..chat.chat_stream import ChatStream, chat_manager from src.plugins.chat.utils import get_embedding from src.common.database import db from .pfc_manager import PFCManager @@ -14,18 +14,22 @@ from .pfc_manager import PFCManager logger = get_logger("pfc_processor") -async def _handle_error(error: Exception, context: str, message: MessageRecv | None = None) -> None: # 明确 message 类型 +async def _handle_error( + error: Exception, context: str, message: MessageRecv | None = None +) -> None: # 明确 message 类型 """统一的错误处理函数 # ... (方法注释不变) ... """ logger.error(f"{context}: {error}") logger.error(traceback.format_exc()) # 检查 message 是否 None 以及是否有 raw_message 属性 - if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message - raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取 + if ( + message and hasattr(message, "message_info") and hasattr(message.message_info, "raw_message") + ): # MessageRecv 结构可能没有直接的 raw_message + raw_msg_content = getattr(message.message_info, "raw_message", None) # 安全获取 if raw_msg_content: logger.error(f"相关消息原始内容: {raw_msg_content}") - elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message + elif message and hasattr(message, "raw_message"): # 如果 MessageRecv 直接有 raw_message logger.error(f"相关消息原始内容: {message.raw_message}") @@ -35,21 +39,22 @@ class PFCProcessor: # MessageStorage() 的实例化位置和具体类是什么? # 我们假设它来自 src.plugins.storage.storage # 但由于我们不能修改那个文件,所以这里的 self.storage 将按原样使用 - from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解 + from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解 + self.storage: MessageStorage = MessageStorage() self.pfc_manager = PFCManager.get_instance() - async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict + async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict """处理接收到的原始消息数据 # ... (方法注释不变) ... """ - message_obj: MessageRecv | None = None # 初始化为 None,并明确类型 + message_obj: MessageRecv | None = None # 初始化为 None,并明确类型 try: # 1. 消息解析与初始化 - message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv + message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv - groupinfo = getattr(message_obj.message_info, 'group_info', None) - userinfo = getattr(message_obj.message_info, 'user_info', None) + groupinfo = getattr(message_obj.message_info, "group_info", None) + userinfo = getattr(message_obj.message_info, "user_info", None) logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流") chat = await chat_manager.get_or_create_stream( @@ -57,12 +62,13 @@ class PFCProcessor: user_info=userinfo, group_info=groupinfo, ) - message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法 + message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法 # 2. 过滤检查 - await message_obj.process() # 调用 MessageRecv 的异步 process 方法 - if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \ - self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性 + await message_obj.process() # 调用 MessageRecv 的异步 process 方法 + if self._check_ban_words(message_obj.processed_plain_text, userinfo) or self._check_ban_regex( + message_obj.raw_message, userinfo + ): # MessageRecv 有 raw_message 属性 return # 3. 消息存储 (保持原有调用) @@ -71,7 +77,7 @@ class PFCProcessor: await self.storage.store_message(message_obj, chat) logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}") - await self._update_embedding_vector(message_obj) # 明确传递 message_obj + await self._update_embedding_vector(message_obj) # 明确传递 message_obj # 4. 创建 PFC 聊天流 await self._create_pfc_chat(message_obj, chat) @@ -81,43 +87,41 @@ class PFCProcessor: current_time_display = datetime.fromtimestamp(float(message_obj.message_info.time)).strftime("%H:%M:%S") # 确保 userinfo.user_nickname 存在 - user_nickname_display = getattr(userinfo, 'user_nickname', '未知用户') + user_nickname_display = getattr(userinfo, "user_nickname", "未知用户") - logger.info( - f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}" - ) + logger.info(f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}") except Exception as e: - await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj + await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj - async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型 + async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型 try: chat_id = str(message.chat_stream.stream_id) - private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname + private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname if global_config.enable_pfc_chatting: await self.pfc_manager.get_or_create_conversation(chat_id, private_name) except Exception as e: - logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True + logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True @staticmethod - def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 + def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息中是否包含过滤词""" for word in global_config.ban_words: if word in text: - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname logger.info(f"[过滤词识别]消息中含有{word},filtered") return True return False @staticmethod - def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 + def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息是否匹配过滤正则表达式""" for pattern in global_config.ban_msgs_regex: - if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象 - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname - logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 + if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象 + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname + logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 return True return False @@ -125,7 +129,7 @@ class PFCProcessor: """更新消息的嵌入向量""" # === 新增:为已存储的消息生成嵌入并更新数据库文档 === embedding_vector = None - text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 + text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 # 在 storage.py 中,会对 processed_plain_text 进行一次过滤 # 为了保持一致,我们也在这里应用相同的过滤逻辑 @@ -148,18 +152,25 @@ class PFCProcessor: # 确保你有权限访问和操作 db 对象 update_result = db.messages.update_one( {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, - {"$set": {"embedding_vector": embedding_vector}} + {"$set": {"embedding_vector": embedding_vector}}, ) if update_result.modified_count > 0: logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。") elif update_result.matched_count > 0: logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。") else: - logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。") + logger.error( + f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。" + ) else: - logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。") + logger.warning( + f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。" + ) except Exception as e_embed_update: - logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True) + logger.error( + f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", + exc_info=True, + ) else: logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。") - # === 新增结束 === \ No newline at end of file + # === 新增结束 === diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 19385855..91c04ad5 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -3,35 +3,36 @@ import json import re import time from datetime import datetime -from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 +from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 from src.common.logger_manager import get_logger from src.config.config import global_config -from src.common.database import db # << 确认此路径 +from src.common.database import db # << 确认此路径 # --- 依赖于你项目结构的导入,请务必仔细检查并根据你的实际情况调整 --- -from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径 -from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径 -from src.plugins.chat.utils import get_embedding # << 确认此路径 -from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径 +from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径 +from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径 +from src.plugins.chat.utils import get_embedding # << 确认此路径 +from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径 # --- 依赖导入结束 --- -from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py -from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入) -import math # 来自原始 pfc_utils.py -from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入) +from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py +from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入) +import math # 来自原始 pfc_utils.py +from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入) logger = get_logger("pfc_utils") + # ============================================================================== # 新增:专门用于检索 PFC 私聊历史对话上下文的函数 # ============================================================================== async def find_most_relevant_historical_message( chat_id: str, query_text: str, - similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 - absolute_search_time_limit: Optional[float] = None # 新增参数:排除最近多少秒内的消息(例如5分钟) + similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 + absolute_search_time_limit: Optional[float] = None, # 新增参数:排除最近多少秒内的消息(例如5分钟) ) -> Optional[Dict[str, Any]]: """ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 @@ -51,20 +52,22 @@ async def find_most_relevant_historical_message( effective_search_upper_limit: float log_source_of_limit: str = "" - + if absolute_search_time_limit is not None: effective_search_upper_limit = absolute_search_time_limit log_source_of_limit = "传入的绝对时间上限" else: # 如果没有传入绝对时间上限,可以设置一个默认的回退逻辑 - fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时 + fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时 effective_search_upper_limit = time.time() - fallback_exclude_seconds log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)" - - logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " - f"将使用时间上限 {effective_search_upper_limit} " - f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " - f"进行历史消息锚点搜索。来源: {log_source_of_limit}") + + logger.debug( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " + f"将使用时间上限 {effective_search_upper_limit} " + f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " + f"进行历史消息锚点搜索。来源: {log_source_of_limit}" + ) # --- [新代码结束] --- pipeline = [ @@ -72,14 +75,46 @@ async def find_most_relevant_historical_message( "$match": { "chat_id": chat_id, "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}, - "time": {"$lt": effective_search_upper_limit} # <--- 使用新的 effective_search_upper_limit + "time": {"$lt": effective_search_upper_limit}, # <--- 使用新的 effective_search_upper_limit } }, { "$addFields": { - "dotProduct": {"$reduce": {"input": {"$range": [0, {"$size": "$embedding_vector"}]}, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": [{"$arrayElemAt": ["$embedding_vector", "$$this"]}, {"$arrayElemAt": [query_embedding, "$$this"]}]}]}}}, - "queryVecMagnitude": {"$sqrt": {"$reduce": {"input": query_embedding, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}}, - "docVecMagnitude": {"$sqrt": {"$reduce": {"input": "$embedding_vector", "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}} + "dotProduct": { + "$reduce": { + "input": {"$range": [0, {"$size": "$embedding_vector"}]}, + "initialValue": 0, + "in": { + "$add": [ + "$$value", + { + "$multiply": [ + {"$arrayElemAt": ["$embedding_vector", "$$this"]}, + {"$arrayElemAt": [query_embedding, "$$this"]}, + ] + }, + ] + }, + } + }, + "queryVecMagnitude": { + "$sqrt": { + "$reduce": { + "input": query_embedding, + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + "docVecMagnitude": { + "$sqrt": { + "$reduce": { + "input": "$embedding_vector", + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, } }, { @@ -88,7 +123,7 @@ async def find_most_relevant_historical_message( "$cond": [ {"$and": [{"$gt": ["$queryVecMagnitude", 0]}, {"$gt": ["$docVecMagnitude", 0]}]}, {"$divide": ["$dotProduct", {"$multiply": ["$queryVecMagnitude", "$docVecMagnitude"]}]}, - 0 + 0, ] } } @@ -96,26 +131,44 @@ async def find_most_relevant_historical_message( {"$match": {"similarity": {"$gte": similarity_threshold}}}, {"$sort": {"similarity": -1}}, {"$limit": 1}, - {"$project": {"_id": 0, "message_id": 1, "time": 1, "chat_id": 1, "user_info": 1, "processed_plain_text": 1, "similarity": 1}} # 可以不返回 embedding_vector 节省带宽 + { + "$project": { + "_id": 0, + "message_id": 1, + "time": 1, + "chat_id": 1, + "user_info": 1, + "processed_plain_text": 1, + "similarity": 1, + } + }, # 可以不返回 embedding_vector 节省带宽 ] try: # --- 确定性修改:同步执行聚合和结果转换 --- - cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor - results = list(cursor) # 直接将 CommandCursor 转换为列表 + cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor + results = list(cursor) # 直接将 CommandCursor 转换为列表 if not results: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。") + logger.info( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。" + ) else: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:") - for res_msg in results: - msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S') - logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'") + logger.info( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:" + ) + for res_msg in results: + msg_time_readable = datetime.fromtimestamp(res_msg.get("time", 0)).strftime("%Y-%m-%d %H:%M:%S") + logger.info( + f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text', '')[:50]}...'" + ) # --- [修改结束] --- # --- 修改结束 --- if results and len(results) > 0: most_similar_message = results[0] - logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}") + logger.info( + f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}" + ) return most_similar_message else: logger.debug(f"[{chat_id}] (私聊历史)未找到相似度超过 {similarity_threshold} 的相关消息。") @@ -124,13 +177,14 @@ async def find_most_relevant_historical_message( logger.error(f"[{chat_id}] (私聊历史)在数据库中检索时出错: {e}", exc_info=True) return None + async def retrieve_chat_context_window( chat_id: str, anchor_message_id: str, anchor_message_time: float, excluded_time_threshold_for_window: float, window_size_before: int = 7, - window_size_after: int = 7 + window_size_after: int = 7, ) -> List[Dict[str, Any]]: """ 以某条消息为锚点,获取其前后的聊天记录形成一个上下文窗口。 @@ -138,33 +192,50 @@ async def retrieve_chat_context_window( if not anchor_message_id or anchor_message_time is None: return [] - context_messages: List[Dict[str, Any]] = [] # 明确类型 - logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...") + context_messages: List[Dict[str, Any]] = [] # 明确类型 + logger.debug( + f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口..." + ) try: # --- 同步执行 find_one 和 find --- anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) - messages_before_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$lt": anchor_message_time}} - ).sort("time", -1).limit(window_size_before) + messages_before_cursor = ( + db.messages.find({"chat_id": chat_id, "time": {"$lt": anchor_message_time}}) + .sort("time", -1) + .limit(window_size_before) + ) messages_before = list(messages_before_cursor) messages_before.reverse() # --- 新增日志 --- - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}") - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):") + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}" + ) + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):" + ) for msg_b in messages_before: - logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'") + logger.debug( + f" - Time: {datetime.fromtimestamp(msg_b.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text', '')[:30]}...'" + ) - messages_after_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} - ).sort("time", 1).limit(window_size_after) + messages_after_cursor = ( + db.messages.find( + {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} + ) + .sort("time", 1) + .limit(window_size_after) + ) messages_after = list(messages_after_cursor) # --- 新增日志 --- - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):") + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):" + ) for msg_a in messages_after: - logger.debug(f" - Time: {datetime.fromtimestamp(msg_a.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text','')[:30]}...'") - + logger.debug( + f" - Time: {datetime.fromtimestamp(msg_a.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text', '')[:30]}...'" + ) if messages_before: context_messages.extend(messages_before) @@ -173,32 +244,35 @@ async def retrieve_chat_context_window( context_messages.append(anchor_message) if messages_after: context_messages.extend(messages_after) - - final_window: List[Dict[str, Any]] = [] # 明确类型 - seen_ids: set[str] = set() # 明确类型 + + final_window: List[Dict[str, Any]] = [] # 明确类型 + seen_ids: set[str] = set() # 明确类型 for msg in context_messages: msg_id = msg.get("message_id") - if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在 + if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在 final_window.append(msg) seen_ids.add(msg_id) - + final_window.sort(key=lambda m: m.get("time", 0)) - logger.info(f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。") + logger.info( + f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。" + ) return final_window except Exception as e: logger.error(f"[{chat_id}] (私聊历史)获取消息 ID '{anchor_message_id}' 的上下文窗口时出错: {e}", exc_info=True) return [] + # ============================================================================== # 修改后的 retrieve_contextual_info 函数 # ============================================================================== async def retrieve_contextual_info( - text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) - private_name: str, # 用于日志 - chat_id: str, # 用于特定私聊历史的检索 + text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) + private_name: str, # 用于日志 + chat_id: str, # 用于特定私聊历史的检索 historical_chat_query_text: Optional[str] = None, - current_short_term_history_earliest_time: Optional[float] = None # <--- 新增参数 -) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 + current_short_term_history_earliest_time: Optional[float] = None, # <--- 新增参数 +) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 """ 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。 @@ -222,9 +296,9 @@ async def retrieve_contextual_info( related_memory = await HippocampusManager.get_instance().get_memory_from_text( text=text, max_memory_num=2, - max_memory_length=2, + max_memory_length=2, max_depth=3, - fast_retrieval=False, + fast_retrieval=False, ) if related_memory: temp_global_memory_info = "" @@ -233,7 +307,7 @@ async def retrieve_contextual_info( temp_global_memory_info += str(memory_item[1]) + "\n" elif isinstance(memory_item, str): temp_global_memory_info += memory_item + "\n" - + if temp_global_memory_info.strip(): retrieved_global_memory_str = f"你回忆起一些相关的全局记忆:\n{temp_global_memory_info.strip()}\n(以上是你的全局记忆,供参考)\n" global_memory_log_msg = f"自动检索到全局压缩记忆: {temp_global_memory_info.strip()[:100]}..." @@ -250,7 +324,6 @@ async def retrieve_contextual_info( else: logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过全局压缩记忆检索。") - # --- 2. 相关知识检索 (来自 prompt_builder) --- # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变) knowledge_log_msg = f"开始知识检索 (基于文本: '{text[:30]}...')" @@ -260,8 +333,8 @@ async def retrieve_contextual_info( message=text, threshold=0.38, ) - if knowledge_result and knowledge_result.strip(): # 确保结果不为空 - retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装 + if knowledge_result and knowledge_result.strip(): # 确保结果不为空 + retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装 knowledge_log_msg = f"自动检索到相关知识: {knowledge_result[:100]}..." else: knowledge_log_msg = "知识检索返回为空。" @@ -274,9 +347,10 @@ async def retrieve_contextual_info( else: logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。") - # --- 3. 当前私聊的特定历史对话上下文检索 --- - query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None + query_for_historical_chat = ( + historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None + ) # historical_chat_log_msg 的初始化可以移到 try 块之后,根据实际情况赋值 if query_for_historical_chat: @@ -284,13 +358,15 @@ async def retrieve_contextual_info( # ---- [新代码] 计算最终的、严格的搜索时间上限 ---- # 1. 设置一个基础的、较大的时间回溯窗口,例如2小时 (7200秒) # 这个值可以从全局配置读取,如果没配置则使用默认值 - default_search_exclude_seconds = getattr(global_config, "pfc_historical_search_default_exclude_seconds", 7200) # 默认2小时 + default_search_exclude_seconds = getattr( + global_config, "pfc_historical_search_default_exclude_seconds", 7200 + ) # 默认2小时 base_excluded_time_limit = time.time() - default_search_exclude_seconds - + final_search_upper_limit_time = base_excluded_time_limit if current_short_term_history_earliest_time is not None: # 我们希望找到的消息严格早于 short_term_history 的开始,减去一个小量确保不包含边界 - limit_from_short_term = current_short_term_history_earliest_time - 0.001 + limit_from_short_term = current_short_term_history_earliest_time - 0.001 final_search_upper_limit_time = min(base_excluded_time_limit, limit_from_short_term) log_earliest_time_str = "未提供" if current_short_term_history_earliest_time is not None: @@ -298,55 +374,60 @@ async def retrieve_contextual_info( log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})" except Exception: log_earliest_time_str = str(current_short_term_history_earliest_time) - - logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " - f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " - f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " - f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}") - + + logger.debug( + f"[{private_name}] (私聊历史) retrieve_contextual_info: " + f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " + f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " + f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}" + ) most_relevant_message_doc = await find_most_relevant_historical_message( chat_id=chat_id, query_text=query_for_historical_chat, - similarity_threshold=0.5, # 您可以调整这个 + similarity_threshold=0.5, # 您可以调整这个 # exclude_recent_seconds 不再直接使用,而是传递计算好的绝对时间上限 - absolute_search_time_limit=final_search_upper_limit_time # <--- 传递计算好的绝对时间上限 + absolute_search_time_limit=final_search_upper_limit_time, # <--- 传递计算好的绝对时间上限 ) - + if most_relevant_message_doc: anchor_id = most_relevant_message_doc.get("message_id") - anchor_time = most_relevant_message_doc.get("time") - + anchor_time = most_relevant_message_doc.get("time") + # 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证) if anchor_time is not None and anchor_time >= final_search_upper_limit_time: - logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " - f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。") + logger.warning( + f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " + f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。" + ) historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。" # 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值 elif anchor_id and anchor_time is not None: # 构建上下文窗口时,其“未来”消息的上限也应该是 final_search_upper_limit_time # 因为我们不希望历史回忆的上下文窗口延伸到“最近聊天记录”的范围内或更近 - time_limit_for_context_window_after = final_search_upper_limit_time - - logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " - f"with anchor_time: {anchor_time}, " - f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}") + time_limit_for_context_window_after = final_search_upper_limit_time + + logger.debug( + f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " + f"with anchor_time: {anchor_time}, " + f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}" + ) context_window_messages = await retrieve_chat_context_window( chat_id=chat_id, anchor_message_id=anchor_id, - anchor_message_time=anchor_time, + anchor_message_time=anchor_time, excluded_time_threshold_for_window=time_limit_for_context_window_after, window_size_before=7, - window_size_after=7 + window_size_after=7, ) if context_window_messages: formatted_window_str = await build_readable_messages( context_window_messages, - replace_bot_name=False, # 在回忆中,保留原始发送者名称 + replace_bot_name=False, # 在回忆中,保留原始发送者名称 merge_messages=False, - timestamp_mode="relative", # 可以选择 'absolute' 或 'none' - read_mark=0.0 + timestamp_mode="relative", # 可以选择 'absolute' 或 'none' + read_mark=0.0, ) if formatted_window_str and formatted_window_str.strip(): retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n" @@ -359,14 +440,18 @@ async def retrieve_contextual_info( historical_chat_log_msg = "检索到的最相关私聊历史消息文档缺少 message_id 或 time。" else: historical_chat_log_msg = "未找到足够相关的私聊历史对话消息。" - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}") + logger.debug( + f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}" + ) except Exception as e: logger.error( f"[私聊][{private_name}] (retrieve_contextual_info) 检索私聊历史对话时出错: {e}\n{traceback.format_exc()}" ) retrieved_historical_chat_str = "[检索私聊历史对话时出错]\n" else: - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。") + logger.debug( + f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。" + ) return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str @@ -410,13 +495,13 @@ def get_items_from_json( json_array = json.loads(cleaned_content) if isinstance(json_array, list): valid_items_list: List[Dict[str, Any]] = [] - for item_json in json_array: # Renamed item to item_json to avoid conflict + for item_json in json_array: # Renamed item to item_json to avoid conflict if not isinstance(item_json, dict): logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item_json}") continue current_item_result = default_result.copy() valid_item = True - for field in items: # items is args from function signature + for field in items: # items is args from function signature if field in item_json: current_item_result[field] = item_json[field] elif field not in default_result: @@ -427,15 +512,25 @@ def get_items_from_json( continue if required_types: for field, expected_type in required_types.items(): - if field in current_item_result and not isinstance(current_item_result[field], expected_type): - logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}") + if field in current_item_result and not isinstance( + current_item_result[field], expected_type + ): + logger.warning( + f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}" + ) valid_item = False break if not valid_item: continue for field in items: - if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip(): - logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}") + if ( + field in current_item_result + and isinstance(current_item_result[field], str) + and not current_item_result[field].strip() + ): + logger.warning( + f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}" + ) valid_item = False break if valid_item: @@ -472,12 +567,14 @@ def get_items_from_json( logger.error(f"[私聊][{private_name}] 正则提取的部分 '{potential_json_str[:100]}...' 无法解析为JSON。") return False, default_result else: - logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...") + logger.error( + f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}..." + ) return False, default_result if not isinstance(result, dict): result = default_result.copy() valid_single_object = True - for item_field in items: # Renamed item to item_field + for item_field in items: # Renamed item to item_field if item_field in json_data: result[item_field] = json_data[item_field] elif item_field not in default_result: @@ -489,7 +586,9 @@ def get_items_from_json( if required_types: for field, expected_type in required_types.items(): if field in result and not isinstance(result[field], expected_type): - logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})") + logger.error( + f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})" + ) valid_single_object = False break if not valid_single_object: @@ -507,7 +606,7 @@ def get_items_from_json( async def get_person_id(private_name: str, chat_stream: ChatStream): - """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ + """(保持你原始 pfc_utils.py 中的此函数代码不变)""" private_user_id_str: Optional[str] = None private_platform_str: Optional[str] = None # private_nickname_str = private_name # 这行在你提供的代码中没有被使用,可以考虑移除 @@ -516,7 +615,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream): private_user_id_str = str(chat_stream.user_info.user_id) private_platform_str = chat_stream.user_info.platform logger.debug( - f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name + f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name ) # elif chat_stream.group_info is None and private_name: # 这个 elif 条件体为空,可以移除 # pass @@ -547,7 +646,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream): async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: float) -> float: - """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ + """(保持你原始 pfc_utils.py 中的此函数代码不变)""" old_value = max(-1000, min(1000, old_value)) value = raw_adjustment if old_value >= 0: @@ -555,7 +654,9 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: value = value * math.cos(math.pi * old_value / 2000) if old_value > 500: # 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整 - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False) + rdict = await person_info_manager.get_specific_value_list( + "relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False + ) high_value_count = len(rdict) if old_value > 700: value *= 3 / (high_value_count + 2) @@ -564,7 +665,7 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: elif value < 0: value = value * math.exp(old_value / 2000) # else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0 - else: # old_value < 0 + else: # old_value < 0 if value >= 0: value = value * math.exp(old_value / 2000) elif value < 0: @@ -586,12 +687,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam ) else: chat_history_text = "还没有聊天记录。\n" - + unread_count = getattr(observation_info, "new_messages_count", 0) unread_messages = getattr(observation_info, "unprocessed_messages", []) if unread_count > 0 and unread_messages: - bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取 - if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤 + bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取 + if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤 other_unread_messages = [ msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str ] @@ -599,12 +700,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam if other_unread_count > 0: new_messages_str = await build_readable_messages( other_unread_messages, - replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字 + replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字 merge_messages=False, timestamp_mode="relative", read_mark=0.0, ) - chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的 + chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的 else: logger.warning(f"[私聊][{private_name}] BOT_QQ 未配置,无法准确过滤未读消息中的机器人自身消息。") @@ -614,4 +715,4 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam except Exception as e: logger.error(f"[私聊][{private_name}] 处理聊天记录时发生未知错误: {e}") chat_history_text = "[处理聊天记录时出错]\n" - return chat_history_text \ No newline at end of file + return chat_history_text diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 0a9089aa..7773bc08 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -226,7 +226,7 @@ class ReplyGenerator: chat_history_for_prompt_builder: list = [] recent_history_start_time_for_exclusion: Optional[float] = None - + # 我们需要知道 build_chat_history_text 函数大致会用 observation_info.chat_history 的多少条记录 # 或者 build_chat_history_text 内部的逻辑。 # 假设 build_chat_history_text 主要依赖 observation_info.chat_history_str, @@ -238,7 +238,7 @@ class ReplyGenerator: # 如果 observation_info.chat_history_str 是由 observation_info.py 中的 update_from_message 等方法维护的, # 并且总是代表一个固定长度(比如最后30条)的聊天记录字符串,那么我们就需要从 observation_info.chat_history # 取出这部分原始消息来确定起始时间。 - + # 我们先做一个合理的假设: “最近聊天记录” 字符串 chat_history_text 是基于 # observation_info.chat_history 的一个有限的尾部片段生成的。 # 假设这个片段的长度由 global_config.pfc_recent_history_display_count 控制,默认为20条。 @@ -249,25 +249,29 @@ class ReplyGenerator: # 如果 observation_info.chat_history 长度小于 display_count,则取全部 start_index = max(0, len(observation_info.chat_history) - recent_history_display_count) chat_history_for_prompt_builder = observation_info.chat_history[start_index:] - - if chat_history_for_prompt_builder: # 如果片段不为空 + + if chat_history_for_prompt_builder: # 如果片段不为空 try: first_message_in_display_slice = chat_history_for_prompt_builder[0] - recent_history_start_time_for_exclusion = first_message_in_display_slice.get('time') + recent_history_start_time_for_exclusion = first_message_in_display_slice.get("time") if recent_history_start_time_for_exclusion: # 导入 datetime (如果 reply_generator.py 文件顶部没有的话) # from datetime import datetime # 通常建议放在文件顶部 - logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " - f"{recent_history_start_time_for_exclusion} " - f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})") + logger.debug( + f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " + f"{recent_history_start_time_for_exclusion} " + f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})" + ) else: logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。") except (IndexError, KeyError, TypeError) as e: logger.warning(f"[{self.private_name}] (ReplyGenerator) 获取“最近聊天记录”起始时间失败: {e}") - recent_history_start_time_for_exclusion = None + recent_history_start_time_for_exclusion = None else: - logger.debug(f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。") - # --- [新代码结束] --- + logger.debug( + f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。" + ) + # --- [新代码结束] --- chat_history_text = await build_chat_history_text(observation_info, self.private_name) @@ -278,28 +282,32 @@ class ReplyGenerator: persona_text = f"你的名字是{self.name},{self.personality_info}。" historical_chat_query = "" - num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子 + num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子 if observation_info.chat_history and len(observation_info.chat_history) > 0: # 从 chat_history (已处理并存入 ObservationInfo 的历史) 中取最新N条 # 或者,如果 observation_info.unprocessed_messages 更能代表“当前上下文”,也可以考虑用它 # 我们先用 chat_history,因为它包含了双方的对话历史,可能更稳定 recent_messages_for_query_list = observation_info.chat_history[-num_recent_messages_for_query:] - + # 将这些消息的文本内容合并 query_texts_list = [] for msg_dict in recent_messages_for_query_list: text_content = msg_dict.get("processed_plain_text", "") - if text_content.strip(): # 只添加有内容的文本 + if text_content.strip(): # 只添加有内容的文本 # 可以选择是否添加发送者信息到查询文本中,例如: # sender_nickname = msg_dict.get("user_info", {}).get("user_nickname", "用户") # query_texts_list.append(f"{sender_nickname}: {text_content}") - query_texts_list.append(text_content) # 简单合并文本内容 - + query_texts_list.append(text_content) # 简单合并文本内容 + if query_texts_list: historical_chat_query = " ".join(query_texts_list).strip() - logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'") + logger.debug( + f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'" + ) else: - logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。") + logger.debug( + f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。" + ) else: logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 无聊天历史可用于生成私聊历史查询文本。") @@ -316,13 +324,13 @@ class ReplyGenerator: ( retrieved_global_memory_str, retrieved_knowledge_str, - retrieved_historical_chat_str # << 新增接收私聊历史回忆 + retrieved_historical_chat_str, # << 新增接收私聊历史回忆 ) = await retrieve_contextual_info( - text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 + text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 private_name=self.private_name, - chat_id=current_chat_id, # << 传递 chat_id - historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本 - current_short_term_history_earliest_time=recent_history_start_time_for_exclusion # <--- 新增传递的参数 + chat_id=current_chat_id, # << 传递 chat_id + historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本 + current_short_term_history_earliest_time=recent_history_start_time_for_exclusion, # <--- 新增传递的参数 ) # === 调用修改结束 === @@ -394,10 +402,18 @@ class ReplyGenerator: base_format_params = { "persona_text": persona_text, "goals_str": goals_str, - "chat_history_text": chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # 当前短期历史 - "retrieved_global_memory_str": retrieved_global_memory_str if retrieved_global_memory_str.strip() else "无相关全局记忆。", - "retrieved_knowledge_str": retrieved_knowledge_str if retrieved_knowledge_str.strip() else "无相关知识。", - "retrieved_historical_chat_str": retrieved_historical_chat_str if retrieved_historical_chat_str.strip() else "无相关私聊历史回忆。", # << 新增 + "chat_history_text": chat_history_text + if chat_history_text.strip() + else "还没有聊天记录。", # 当前短期历史 + "retrieved_global_memory_str": retrieved_global_memory_str + if retrieved_global_memory_str.strip() + else "无相关全局记忆。", + "retrieved_knowledge_str": retrieved_knowledge_str + if retrieved_knowledge_str.strip() + else "无相关知识。", + "retrieved_historical_chat_str": retrieved_historical_chat_str + if retrieved_historical_chat_str.strip() + else "无相关私聊历史回忆。", # << 新增 "last_rejection_info": last_rejection_info_str, "current_time_str": current_time_value, "sender_name": sender_name_str,