From ed7e3399079a275347ce95702a10426be4d2d33d Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 16:58:57 +0800 Subject: [PATCH 01/27] =?UTF-8?q?=E9=95=BF=E6=9C=9F=E8=AE=B0=E5=BF=86=20?= =?UTF-8?q?=E7=AC=AC=E4=B8=80=E7=89=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 168 +++++--- src/plugins/PFC/pfc_utils.py | 593 +++++++++++++++++------------ src/plugins/PFC/reply_generator.py | 76 +++- 3 files changed, 525 insertions(+), 312 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 428db544..8aaf800d 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -1,123 +1,171 @@ import traceback - -from maim_message import UserInfo +import re +from typing import Any, Dict +from datetime import datetime # 确保导入 datetime +from maim_message import UserInfo, MessageRecv # 从 maim_message 导入 MessageRecv from src.config.config import global_config from src.common.logger_manager import get_logger -from ..chat.chat_stream import chat_manager -from typing import Optional, Dict, Any +from ..chat.chat_stream import chat_manager +from src.plugins.chat.utils import get_embedding +from src.common.database import db from .pfc_manager import PFCManager -from src.plugins.chat.message import MessageRecv -from src.plugins.storage.storage import MessageStorage -from datetime import datetime - logger = get_logger("pfc_processor") -async def _handle_error(error: Exception, context: str, message: Optional[MessageRecv] = None) -> None: +async def _handle_error(error: Exception, context: str, message: MessageRecv | None = None) -> None: # 明确 message 类型 """统一的错误处理函数 - - Args: - error: 捕获到的异常 - context: 错误发生的上下文描述 - message: 可选的消息对象,用于记录相关消息内容 + # ... (方法注释不变) ... """ logger.error(f"{context}: {error}") logger.error(traceback.format_exc()) - if message and hasattr(message, "raw_message"): + # 检查 message 是否 None 以及是否有 raw_message 属性 + if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message + raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取 + if raw_msg_content: + logger.error(f"相关消息原始内容: {raw_msg_content}") + elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message logger.error(f"相关消息原始内容: {message.raw_message}") class PFCProcessor: - """PFC 处理器,负责处理接收到的信息并计数""" - def __init__(self): """初始化 PFC 处理器,创建消息存储实例""" - self.storage = MessageStorage() + # MessageStorage() 的实例化位置和具体类是什么? + # 我们假设它来自 src.plugins.storage.storage + # 但由于我们不能修改那个文件,所以这里的 self.storage 将按原样使用 + from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解 + self.storage: MessageStorage = MessageStorage() self.pfc_manager = PFCManager.get_instance() - async def process_message(self, message_data: Dict[str, Any]) -> None: + async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict """处理接收到的原始消息数据 - - 主要流程: - 1. 消息解析与初始化 - 2. 过滤检查 - 3. 消息存储 - 4. 创建 PFC 流 - 5. 日志记录 - - Args: - message_data: 原始消息字符串 + # ... (方法注释不变) ... """ - message = None + message_obj: MessageRecv | None = None # 初始化为 None,并明确类型 try: # 1. 消息解析与初始化 - message = MessageRecv(message_data) - groupinfo = message.message_info.group_info - userinfo = message.message_info.user_info - messageinfo = message.message_info + message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv + # 确保 message_obj.message_info 存在 + if not hasattr(message_obj, 'message_info'): + logger.error("MessageRecv 对象缺少 message_info 属性。跳过处理。") + return + + groupinfo = getattr(message_obj.message_info, 'group_info', None) + userinfo = getattr(message_obj.message_info, 'user_info', None) + + if userinfo is None: # 确保 userinfo 存在 + logger.error("message_obj.message_info 中缺少 user_info。跳过处理。") + return + if not hasattr(userinfo, 'user_id'): # 确保 user_id 存在 + logger.error("userinfo 对象中缺少 user_id。跳过处理。") + return logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流") chat = await chat_manager.get_or_create_stream( - platform=messageinfo.platform, + platform=message_obj.message_info.platform, user_info=userinfo, group_info=groupinfo, ) - message.update_chat_stream(chat) + message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法 # 2. 过滤检查 - # 处理消息 - await message.process() - # 过滤词/正则表达式过滤 - if self._check_ban_words(message.processed_plain_text, userinfo) or self._check_ban_regex( - message.raw_message, userinfo - ): + await message_obj.process() # 调用 MessageRecv 的异步 process 方法 + if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \ + self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性 return - # 3. 消息存储 - await self.storage.store_message(message, chat) - logger.trace(f"存储成功: {message.processed_plain_text}") + # 3. 消息存储 (保持原有调用) + # 这里的 self.storage.store_message 来自 src/plugins/storage/storage.py + # 它内部会将 message_obj 转换为字典并存储 + await self.storage.store_message(message_obj, chat) + logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}") + + # === 新增:为已存储的消息生成嵌入并更新数据库文档 === + embedding_vector = None + text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 + + # 在 storage.py 中,会对 processed_plain_text 进行一次过滤 + # 为了保持一致,我们也在这里应用相同的过滤逻辑 + # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性 + # 这里为了简单,我们先重复一次过滤逻辑 + pattern = r".*?|.*?|.*?" + if text_for_embedding: + filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL) + else: + filtered_text_for_embedding = "" + + if filtered_text_for_embedding and filtered_text_for_embedding.strip(): + try: + # request_type 参数根据你的 get_embedding 函数实际需求来定 + embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory") + if embedding_vector: + logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。") + + # 更新数据库中的对应文档 + # 确保你有权限访问和操作 db 对象 + update_result = await db.messages.update_one( + {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, + {"$set": {"embedding_vector": embedding_vector}} + ) + if update_result.modified_count > 0: + logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。") + elif update_result.matched_count > 0: + logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。") + else: + logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。") + else: + logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。") + except Exception as e_embed_update: + logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True) + else: + logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。") + # === 新增结束 === # 4. 创建 PFC 聊天流 - await self._create_pfc_chat(message) + await self._create_pfc_chat(message_obj) # 5. 日志记录 - # 将时间戳转换为datetime对象 - current_time = datetime.fromtimestamp(message.message_info.time).strftime("%H:%M:%S") + # 确保 message_obj.message_info.time 是 float 类型的时间戳 + current_time_display = datetime.fromtimestamp(float(message_obj.message_info.time)).strftime("%H:%M:%S") + + # 确保 userinfo.user_nickname 存在 + user_nickname_display = getattr(userinfo, 'user_nickname', '未知用户') + logger.info( - f"[{current_time}][私聊]{message.message_info.user_info.user_nickname}: {message.processed_plain_text}" + f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}" ) except Exception as e: - await _handle_error(e, "消息处理失败", message) + await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj - async def _create_pfc_chat(self, message: MessageRecv): + async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型 try: chat_id = str(message.chat_stream.stream_id) - private_name = str(message.message_info.user_info.user_nickname) + private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname if global_config.enable_pfc_chatting: await self.pfc_manager.get_or_create_conversation(chat_id, private_name) except Exception as e: - logger.error(f"创建PFC聊天失败: {e}") + logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True @staticmethod - def _check_ban_words(text: str, userinfo: UserInfo) -> bool: + def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息中是否包含过滤词""" for word in global_config.ban_words: if word in text: - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname logger.info(f"[过滤词识别]消息中含有{word},filtered") return True return False @staticmethod - def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: + def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息是否匹配过滤正则表达式""" for pattern in global_config.ban_msgs_regex: - if pattern.search(text): - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") - logger.info(f"[正则表达式过滤]消息匹配到{pattern},filtered") + if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象 + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname + logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 return True - return False + return False \ No newline at end of file diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index fc5437ab..666fa6e8 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -1,88 +1,285 @@ import traceback import json import re -from typing import Dict, Any, Optional, Tuple, List, Union -from src.common.logger_manager import get_logger # 确认 logger 的导入路径 -from src.plugins.memory_system.Hippocampus import HippocampusManager -from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # 确认 prompt_builder 的导入路径 -from src.plugins.chat.chat_stream import ChatStream -from ..person_info.person_info import person_info_manager -import math -from src.plugins.utils.chat_message_builder import build_readable_messages -from .observation_info import ObservationInfo +import asyncio # 确保导入 asyncio +from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 + +from src.common.logger_manager import get_logger from src.config.config import global_config +from src.common.database import db # << 确认此路径 + +# --- 依赖于你项目结构的导入,请务必仔细检查并根据你的实际情况调整 --- +from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径 +from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径 +from src.plugins.chat.utils import get_embedding # << 确认此路径 +from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径 +# --- 依赖导入结束 --- + +from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py +from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入) +import math # 来自原始 pfc_utils.py +from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入) + logger = get_logger("pfc_utils") - -async def retrieve_contextual_info(text: str, private_name: str) -> Tuple[str, str]: +# ============================================================================== +# 新增:专门用于检索 PFC 私聊历史对话上下文的函数 +# ============================================================================== +async def find_most_relevant_historical_message( + chat_id: str, + query_text: str, + similarity_threshold: float = 0.3 # 相似度阈值,可以根据效果调整 +) -> Optional[Dict[str, Any]]: """ - 根据输入文本检索相关的记忆和知识。 - - Args: - text: 用于检索的上下文文本 (例如聊天记录)。 - private_name: 私聊对象的名称,用于日志记录。 - - Returns: - Tuple[str, str]: (检索到的记忆字符串, 检索到的知识字符串) + 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 """ - retrieved_memory_str = "无相关记忆。" + if not query_text or not query_text.strip(): + logger.debug(f"[{chat_id}] (私聊历史)查询文本为空,跳过检索。") + return None + + logger.debug(f"[{chat_id}] (私聊历史)开始为查询文本 '{query_text[:50]}...' 检索。") + + # 使用你项目中已有的 get_embedding 函数 + # request_type 参数需要根据 get_embedding 的实际需求调整 + query_embedding = await get_embedding(query_text, request_type="pfc_historical_chat_query") + if not query_embedding: + logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。") + return None + + pipeline = [ + { + "$match": { + "chat_id": chat_id, + "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}} + } + }, + { + "$addFields": { + "dotProduct": {"$reduce": {"input": {"$range": [0, {"$size": "$embedding_vector"}]}, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": [{"$arrayElemAt": ["$embedding_vector", "$$this"]}, {"$arrayElemAt": [query_embedding, "$$this"]}]}]}}}, + "queryVecMagnitude": {"$sqrt": {"$reduce": {"input": query_embedding, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}}, + "docVecMagnitude": {"$sqrt": {"$reduce": {"input": "$embedding_vector", "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}} + } + }, + { + "$addFields": { + "similarity": { + "$cond": [ + {"$and": [{"$gt": ["$queryVecMagnitude", 0]}, {"$gt": ["$docVecMagnitude", 0]}]}, + {"$divide": ["$dotProduct", {"$multiply": ["$queryVecMagnitude", "$docVecMagnitude"]}]}, + 0 + ] + } + } + }, + {"$match": {"similarity": {"$gte": similarity_threshold}}}, + {"$sort": {"similarity": -1}}, + {"$limit": 1}, + {"$project": {"_id": 0, "message_id": 1, "time": 1, "chat_id": 1, "user_info": 1, "processed_plain_text": 1, "similarity": 1}} # 可以不返回 embedding_vector 节省带宽 + ] + + try: + # 假设 db.messages 是存储PFC私聊消息并带有embedding_vector的集合 + results = await db.messages.aggregate(pipeline).to_list(length=1) + if results and len(results) > 0: + most_similar_message = results[0] + logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}") + return most_similar_message + else: + logger.debug(f"[{chat_id}] (私聊历史)未找到相似度超过 {similarity_threshold} 的相关消息。") + return None + except Exception as e: + logger.error(f"[{chat_id}] (私聊历史)在数据库中检索时出错: {e}", exc_info=True) + return None + +async def retrieve_chat_context_window( + chat_id: str, + anchor_message_id: str, + anchor_message_time: float, + window_size_before: int = 7, + window_size_after: int = 7 +) -> List[Dict[str, Any]]: + """ + 以某条消息为锚点,获取其前后的聊天记录形成一个上下文窗口。 + """ + if not anchor_message_id or anchor_message_time is None: + return [] + + context_messages: List[Dict[str, Any]] = [] # 明确类型 + logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...") + + try: + # 假设 db.messages 是存储PFC私聊消息的集合 + anchor_message = await db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) + + messages_before_cursor = db.messages.find( + {"chat_id": chat_id, "time": {"$lt": anchor_message_time}} + ).sort("time", -1).limit(window_size_before) + messages_before = await messages_before_cursor.to_list(length=window_size_before) + messages_before.reverse() + + messages_after_cursor = db.messages.find( + {"chat_id": chat_id, "time": {"$gt": anchor_message_time}} + ).sort("time", 1).limit(window_size_after) + messages_after = await messages_after_cursor.to_list(length=window_size_after) + + if messages_before: + context_messages.extend(messages_before) + if anchor_message: + anchor_message.pop("_id", None) + context_messages.append(anchor_message) + if messages_after: + context_messages.extend(messages_after) + + final_window: List[Dict[str, Any]] = [] # 明确类型 + seen_ids: set[str] = set() # 明确类型 + for msg in context_messages: + msg_id = msg.get("message_id") + if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在 + final_window.append(msg) + seen_ids.add(msg_id) + + final_window.sort(key=lambda m: m.get("time", 0)) + logger.info(f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。") + return final_window + except Exception as e: + logger.error(f"[{chat_id}] (私聊历史)获取消息 ID '{anchor_message_id}' 的上下文窗口时出错: {e}", exc_info=True) + return [] + +# ============================================================================== +# 修改后的 retrieve_contextual_info 函数 +# ============================================================================== +async def retrieve_contextual_info( + text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) + private_name: str, # 用于日志 + chat_id: str, # 用于特定私聊历史的检索 + historical_chat_query_text: Optional[str] = None # 专门为私聊历史检索准备的查询文本 (例如最新的N条消息合并) +) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 + """ + 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。 + """ + # 初始化返回值 + retrieved_global_memory_str = "无相关全局记忆。" retrieved_knowledge_str = "无相关知识。" - memory_log_msg = "未自动检索到相关记忆。" - knowledge_log_msg = "未自动检索到相关知识。" + retrieved_historical_chat_str = "无相关私聊历史回忆。" - if not text or text == "还没有聊天记录。" or text == "[构建聊天记录出错]": - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效上下文,跳过检索。") - return retrieved_memory_str, retrieved_knowledge_str - - # 1. 检索记忆 (逻辑来自原 _get_memory_info) - try: - related_memory = await HippocampusManager.get_instance().get_memory_from_text( - text=text, - max_memory_num=2, - max_memory_length=2, - max_depth=3, - fast_retrieval=False, - ) - if related_memory: - related_memory_info = "" - for memory in related_memory: - related_memory_info += memory[1] + "\n" - if related_memory_info: - # 注意:原版提示信息可以根据需要调整 - retrieved_memory_str = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,供参考)\n" - memory_log_msg = f"自动检索到记忆: {related_memory_info.strip()[:100]}..." + # --- 1. 全局压缩记忆检索 (来自 HippocampusManager) --- + # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变) + global_memory_log_msg = f"开始全局压缩记忆检索 (基于文本: '{text[:30]}...')" + if text and text.strip() and text != "还没有聊天记录。" and text != "[构建聊天记录出错]": + try: + related_memory = await HippocampusManager.get_instance().get_memory_from_text( + text=text, + max_memory_num=2, + max_memory_length=2, # 你原始代码中这里是2,不是200 + max_depth=3, + fast_retrieval=False, # 你原始代码中这里是False + ) + if related_memory: + temp_global_memory_info = "" + for memory_item in related_memory: + if isinstance(memory_item, (list, tuple)) and len(memory_item) > 1: + temp_global_memory_info += str(memory_item[1]) + "\n" + elif isinstance(memory_item, str): + temp_global_memory_info += memory_item + "\n" + + if temp_global_memory_info.strip(): + retrieved_global_memory_str = f"你回忆起一些相关的全局记忆:\n{temp_global_memory_info.strip()}\n(以上是你的全局记忆,供参考)\n" + global_memory_log_msg = f"自动检索到全局压缩记忆: {temp_global_memory_info.strip()[:100]}..." + else: + global_memory_log_msg = "全局压缩记忆检索返回为空或格式不符。" else: - memory_log_msg = "自动检索记忆返回为空。" - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 记忆检索: {memory_log_msg}") - - except Exception as e: - logger.error( - f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索记忆时出错: {e}\n{traceback.format_exc()}" - ) - retrieved_memory_str = "检索记忆时出错。\n" - - # 2. 检索知识 (逻辑来自原 action_planner 和 reply_generator) - try: - # 使用导入的 prompt_builder 实例及其方法 - knowledge_result = await prompt_builder.get_prompt_info( - message=text, - threshold=0.38, # threshold 可以根据需要调整 - ) - if knowledge_result: - retrieved_knowledge_str = knowledge_result # 直接使用返回结果 - knowledge_log_msg = "自动检索到相关知识。" - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 知识检索: {knowledge_log_msg}") - - except Exception as e: - logger.error( - f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索知识时出错: {e}\n{traceback.format_exc()}" - ) - retrieved_knowledge_str = "检索知识时出错。\n" - - return retrieved_memory_str, retrieved_knowledge_str + global_memory_log_msg = "全局压缩记忆检索返回为空列表。" + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 全局压缩记忆检索: {global_memory_log_msg}") + except Exception as e: + logger.error( + f"[私聊][{private_name}] (retrieve_contextual_info) 检索全局压缩记忆时出错: {e}\n{traceback.format_exc()}" + ) + retrieved_global_memory_str = "[检索全局压缩记忆时出错]\n" + else: + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过全局压缩记忆检索。") + # --- 2. 相关知识检索 (来自 prompt_builder) --- + # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变) + knowledge_log_msg = f"开始知识检索 (基于文本: '{text[:30]}...')" + if text and text.strip() and text != "还没有聊天记录。" and text != "[构建聊天记录出错]": + try: + knowledge_result = await prompt_builder.get_prompt_info( + message=text, + threshold=0.38, + ) + if knowledge_result and knowledge_result.strip(): # 确保结果不为空 + retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装 + knowledge_log_msg = f"自动检索到相关知识: {knowledge_result[:100]}..." + else: + knowledge_log_msg = "知识检索返回为空。" + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 知识检索: {knowledge_log_msg}") + except Exception as e: + logger.error( + f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索知识时出错: {e}\n{traceback.format_exc()}" + ) + retrieved_knowledge_str = "[检索知识时出错]\n" + else: + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。") + + + # --- 3. 当前私聊的特定历史对话上下文检索 (新增逻辑) --- + query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None + historical_chat_log_msg = f"开始私聊历史检索 (查询文本: '{str(query_for_historical_chat)[:30]}...')" + + if query_for_historical_chat: + try: + most_relevant_message_doc = await find_most_relevant_historical_message( + chat_id=chat_id, + query_text=query_for_historical_chat, + similarity_threshold=0.5 # 你可以根据需要调整这个阈值 + ) + if most_relevant_message_doc: + anchor_id = most_relevant_message_doc.get("message_id") + anchor_time = most_relevant_message_doc.get("time") + if anchor_id and anchor_time is not None: + context_window_messages = await retrieve_chat_context_window( + chat_id=chat_id, + anchor_message_id=anchor_id, + anchor_message_time=anchor_time, + window_size_before=7, # 我们的目标:上7条 + window_size_after=7 # 我们的目标:下7条 (共15条,包括锚点) + ) + if context_window_messages: + formatted_window_str = await build_readable_messages( + context_window_messages, + replace_bot_name=False, # 在回忆中,保留原始发送者名称 + merge_messages=False, + timestamp_mode="relative", # 可以选择 'absolute' 或 'none' + read_mark=0.0 + ) + if formatted_window_str and formatted_window_str.strip(): + retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n" + historical_chat_log_msg = f"自动检索到相关私聊历史片段 (锚点ID: {anchor_id}, 相似度: {most_relevant_message_doc.get('similarity'):.3f})" + else: + historical_chat_log_msg = "检索到的私聊历史对话窗口格式化后为空。" + else: + historical_chat_log_msg = f"找到了相关锚点消息 (ID: {anchor_id}),但未能构建其上下文窗口。" + else: + historical_chat_log_msg = "检索到的最相关私聊历史消息文档缺少 message_id 或 time。" + else: + historical_chat_log_msg = "未找到足够相关的私聊历史对话消息。" + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}") + except Exception as e: + logger.error( + f"[私聊][{private_name}] (retrieve_contextual_info) 检索私聊历史对话时出错: {e}\n{traceback.format_exc()}" + ) + retrieved_historical_chat_str = "[检索私聊历史对话时出错]\n" + else: + logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。") + + return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str + + +# ============================================================================== +# 你原始 pfc_utils.py 中的其他函数保持不变 +# ============================================================================== def get_items_from_json( content: str, private_name: str, @@ -92,121 +289,66 @@ def get_items_from_json( allow_array: bool = True, ) -> Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: """从文本中提取JSON内容并获取指定字段 - - Args: - content: 包含JSON的文本 - private_name: 私聊名称 - *items: 要提取的字段名 - default_values: 字段的默认值,格式为 {字段名: 默认值} - required_types: 字段的必需类型,格式为 {字段名: 类型} - allow_array: 是否允许解析JSON数组 - - Returns: - Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: (是否成功, 提取的字段字典或字典列表) + (保持你原始 pfc_utils.py 中的此函数代码不变) """ cleaned_content = content.strip() - result: Union[Dict[str, Any], List[Dict[str, Any]]] = {} # 初始化类型 - # 匹配 ```json ... ``` 或 ``` ... ``` + result: Union[Dict[str, Any], List[Dict[str, Any]]] = {} markdown_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", cleaned_content, re.IGNORECASE) if markdown_match: cleaned_content = markdown_match.group(1).strip() logger.debug(f"[私聊][{private_name}] 已去除 Markdown 标记,剩余内容: {cleaned_content[:100]}...") - # --- 新增结束 --- - - # 设置默认值 - default_result: Dict[str, Any] = {} # 用于单对象时的默认值 + default_result: Dict[str, Any] = {} if default_values: default_result.update(default_values) - result = default_result.copy() # 先用默认值初始化 - - # 首先尝试解析为JSON数组 + result = default_result.copy() if allow_array: try: - # 尝试直接解析清理后的内容为列表 json_array = json.loads(cleaned_content) - if isinstance(json_array, list): valid_items_list: List[Dict[str, Any]] = [] - for item in json_array: - if not isinstance(item, dict): - logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item}") + for item_json in json_array: # Renamed item to item_json to avoid conflict + if not isinstance(item_json, dict): + logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item_json}") continue - - current_item_result = default_result.copy() # 每个元素都用默认值初始化 + current_item_result = default_result.copy() valid_item = True - - # 提取并验证字段 - for field in items: - if field in item: - current_item_result[field] = item[field] - elif field not in default_result: # 如果字段不存在且没有默认值 - logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item}") - valid_item = False - break # 这个元素无效 - - if not valid_item: - continue - - # 验证类型 + for field in items: # items is args from function signature + if field in item_json: + current_item_result[field] = item_json[field] + elif field not in default_result: + logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item_json}") + valid_item = False; break + if not valid_item: continue if required_types: for field, expected_type in required_types.items(): - # 检查 current_item_result 中是否存在该字段 (可能来自 item 或 default_values) - if field in current_item_result and not isinstance( - current_item_result[field], expected_type - ): - logger.warning( - f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item}" - ) - valid_item = False - break - - if not valid_item: - continue - - # 验证字符串不为空 (只检查 items 中要求的字段) + if field in current_item_result and not isinstance(current_item_result[field], expected_type): + logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}") + valid_item = False; break + if not valid_item: continue for field in items: - if ( - field in current_item_result - and isinstance(current_item_result[field], str) - and not current_item_result[field].strip() - ): - logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item}") - valid_item = False - break - - if valid_item: - valid_items_list.append(current_item_result) # 只添加完全有效的项 - - if valid_items_list: # 只有当列表不为空时才认为是成功 + if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip(): + logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}") + valid_item = False; break + if valid_item: valid_items_list.append(current_item_result) + if valid_items_list: logger.debug(f"[私聊][{private_name}] 成功解析JSON数组,包含 {len(valid_items_list)} 个有效项目。") return True, valid_items_list else: - # 如果列表为空(可能所有项都无效),则继续尝试解析为单个对象 logger.debug(f"[私聊][{private_name}] 解析为JSON数组,但未找到有效项目,尝试解析单个JSON对象。") - # result 重置回单个对象的默认值 result = default_result.copy() - except json.JSONDecodeError: logger.debug(f"[私聊][{private_name}] JSON数组直接解析失败,尝试解析单个JSON对象") - # result 重置回单个对象的默认值 result = default_result.copy() except Exception as e: logger.error(f"[私聊][{private_name}] 尝试解析JSON数组时发生未知错误: {str(e)}") - # result 重置回单个对象的默认值 result = default_result.copy() - - # 尝试解析为单个JSON对象 try: - # 尝试直接解析清理后的内容 json_data = json.loads(cleaned_content) if not isinstance(json_data, dict): logger.error(f"[私聊][{private_name}] 解析为单个对象,但结果不是字典类型: {type(json_data)}") - return False, default_result # 返回失败和默认值 - + return False, default_result except json.JSONDecodeError: - # 如果直接解析失败,尝试用正则表达式查找 JSON 对象部分 (作为后备) - # 这个正则比较简单,可能无法处理嵌套或复杂的 JSON - json_pattern = r"\{[\s\S]*?\}" # 使用非贪婪匹配 + json_pattern = r"\{[\s\S]*?\}" json_match = re.search(json_pattern, cleaned_content) if json_match: try: @@ -220,133 +362,97 @@ def get_items_from_json( logger.error(f"[私聊][{private_name}] 正则提取的部分 '{potential_json_str[:100]}...' 无法解析为JSON。") return False, default_result else: - logger.error( - f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}..." - ) + logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...") return False, default_result - - # 提取并验证字段 (适用于单个JSON对象) - # 确保 result 是字典类型用于更新 - if not isinstance(result, dict): - result = default_result.copy() # 如果之前是列表,重置为字典 - + if not isinstance(result, dict): result = default_result.copy() valid_single_object = True - for item in items: - if item in json_data: - result[item] = json_data[item] - elif item not in default_result: # 如果字段不存在且没有默认值 - logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item}'。JSON内容: {json_data}") - valid_single_object = False - break # 这个对象无效 - - if not valid_single_object: - return False, default_result - - # 验证类型 + for item_field in items: # Renamed item to item_field + if item_field in json_data: result[item_field] = json_data[item_field] + elif item_field not in default_result: + logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item_field}'。JSON内容: {json_data}") + valid_single_object = False; break + if not valid_single_object: return False, default_result if required_types: for field, expected_type in required_types.items(): if field in result and not isinstance(result[field], expected_type): - logger.error( - f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})" - ) - valid_single_object = False - break - - if not valid_single_object: - return False, default_result - - # 验证字符串不为空 (只检查 items 中要求的字段) + logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})") + valid_single_object = False; break + if not valid_single_object: return False, default_result for field in items: if field in result and isinstance(result[field], str) and not result[field].strip(): logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 不能为空字符串") - valid_single_object = False - break - + valid_single_object = False; break if valid_single_object: logger.debug(f"[私聊][{private_name}] 成功解析并验证了单个JSON对象。") - return True, result # 返回提取并验证后的字典 + return True, result else: - return False, default_result # 验证失败 + return False, default_result async def get_person_id(private_name: str, chat_stream: ChatStream): + """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ private_user_id_str: Optional[str] = None private_platform_str: Optional[str] = None - private_nickname_str = private_name + # private_nickname_str = private_name # 这行在你提供的代码中没有被使用,可以考虑移除 if chat_stream.user_info: private_user_id_str = str(chat_stream.user_info.user_id) private_platform_str = chat_stream.user_info.platform logger.debug( - f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_nickname_str}" + f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name ) - elif chat_stream.group_info is None and private_name: - pass + # elif chat_stream.group_info is None and private_name: # 这个 elif 条件体为空,可以移除 + # pass if private_user_id_str and private_platform_str: try: private_user_id_int = int(private_user_id_str) - # person_id = person_info_manager.get_person_id( # get_person_id 可能只查询,不创建 - # private_platform_str, - # private_user_id_int - # ) - # 使用 get_or_create_person 确保用户存在 person_id = await person_info_manager.get_or_create_person( platform=private_platform_str, user_id=private_user_id_int, - nickname=private_name, # 使用传入的 private_name 作为昵称 + nickname=private_name, ) - if person_id is None: # 如果 get_or_create_person 返回 None,说明创建失败 + if person_id is None: logger.error(f"[私聊][{private_name}] get_or_create_person 未能获取或创建 person_id。") - return None # 返回 None 表示失败 - - return person_id, private_platform_str, private_user_id_str # 返回获取或创建的 person_id + return None + return person_id, private_platform_str, private_user_id_str except ValueError: logger.error(f"[私聊][{private_name}] 无法将 private_user_id_str ('{private_user_id_str}') 转换为整数。") - return None # 返回 None 表示失败 + return None except Exception as e_pid: logger.error(f"[私聊][{private_name}] 获取或创建 person_id 时出错: {e_pid}") - return None # 返回 None 表示失败 + return None else: logger.warning( f"[私聊][{private_name}] 未能确定私聊对象的 user_id 或 platform,无法获取 person_id。将在收到消息后尝试。" ) - return None # 返回 None 表示失败 + return None async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: float) -> float: - # 限制 old_value 范围 + """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ old_value = max(-1000, min(1000, old_value)) value = raw_adjustment - if old_value >= 0: if value >= 0: value = value * math.cos(math.pi * old_value / 2000) if old_value > 500: - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) + # 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整 + rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False) high_value_count = len(rdict) - if old_value > 700: - value *= 3 / (high_value_count + 2) - else: - value *= 3 / (high_value_count + 3) - elif value < 0: - value = value * math.exp(old_value / 2000) - else: - value = 0 - else: - if value >= 0: - value = value * math.exp(old_value / 2000) - elif value < 0: - value = value * math.cos(math.pi * old_value / 2000) - else: - value = 0 - + if old_value > 700: value *= 3 / (high_value_count + 2) + else: value *= 3 / (high_value_count + 3) + elif value < 0: value = value * math.exp(old_value / 2000) + # else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0 + else: # old_value < 0 + if value >= 0: value = value * math.exp(old_value / 2000) + elif value < 0: value = value * math.cos(math.pi * old_value / 2000) + # else: value = 0 # 你原始代码中没有这句 return value async def build_chat_history_text(observation_info: ObservationInfo, private_name: str) -> str: - """构建聊天历史记录文本 (包含未处理消息)""" - + """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ chat_history_text = "" try: if hasattr(observation_info, "chat_history_str") and observation_info.chat_history_str: @@ -358,27 +464,32 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam ) else: chat_history_text = "还没有聊天记录。\n" + unread_count = getattr(observation_info, "new_messages_count", 0) unread_messages = getattr(observation_info, "unprocessed_messages", []) if unread_count > 0 and unread_messages: - bot_qq_str = str(global_config.BOT_QQ) - other_unread_messages = [ - msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str - ] - other_unread_count = len(other_unread_messages) - if other_unread_count > 0: - new_messages_str = await build_readable_messages( - other_unread_messages, - replace_bot_name=True, - merge_messages=False, - timestamp_mode="relative", - read_mark=0.0, - ) - chat_history_text += f"\n{new_messages_str}\n------\n" + bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取 + if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤 + other_unread_messages = [ + msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str + ] + other_unread_count = len(other_unread_messages) + if other_unread_count > 0: + new_messages_str = await build_readable_messages( + other_unread_messages, + replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字 + merge_messages=False, + timestamp_mode="relative", + read_mark=0.0, + ) + chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的 + else: + logger.warning(f"[私聊][{private_name}] BOT_QQ 未配置,无法准确过滤未读消息中的机器人自身消息。") + except AttributeError as e: logger.warning(f"[私聊][{private_name}] 构建聊天记录文本时属性错误: {e}") chat_history_text = "[获取聊天记录时出错]\n" except Exception as e: logger.error(f"[私聊][{private_name}] 处理聊天记录时发生未知错误: {e}") chat_history_text = "[处理聊天记录时出错]\n" - return chat_history_text + return chat_history_text \ No newline at end of file diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 174e3ba0..f2f925d6 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,5 +1,5 @@ import random - +import asyncio from .pfc_utils import retrieve_contextual_info from src.common.logger_manager import get_logger @@ -60,6 +60,9 @@ PROMPT_DIRECT_REPLY = """ {retrieved_knowledge_str} 请你**记住上面的知识**,在回复中有可能会用到。 +你还想到了一些你们之前的聊天记录: +{retrieved_historical_chat_str} + 最近的聊天记录: {chat_history_text} @@ -68,6 +71,8 @@ PROMPT_DIRECT_REPLY = """ {last_rejection_info} + + 请根据上述信息,结合聊天记录,回复对方。该回复应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) 2. 符合你的性格特征和身份细节 @@ -97,6 +102,9 @@ PROMPT_SEND_NEW_MESSAGE = """ {retrieved_knowledge_str} 请你**记住上面的知识**,在发消息时有可能会用到。 +你还想到了一些你们之前的聊天记录: +{retrieved_historical_chat_str} + 最近的聊天记录: {chat_history_text} @@ -223,12 +231,59 @@ class ReplyGenerator: current_emotion_text_str = getattr(conversation_info, "current_emotion_text", "心情平静。") persona_text = f"你的名字是{self.name},{self.personality_info}。" - retrieval_context = chat_history_text - retrieved_memory_str, retrieved_knowledge_str = await retrieve_contextual_info( - retrieval_context, self.private_name - ) + historical_chat_query = "" + num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子 + if observation_info.chat_history and len(observation_info.chat_history) > 0: + # 从 chat_history (已处理并存入 ObservationInfo 的历史) 中取最新N条 + # 或者,如果 observation_info.unprocessed_messages 更能代表“当前上下文”,也可以考虑用它 + # 我们先用 chat_history,因为它包含了双方的对话历史,可能更稳定 + recent_messages_for_query_list = observation_info.chat_history[-num_recent_messages_for_query:] + + # 将这些消息的文本内容合并 + query_texts_list = [] + for msg_dict in recent_messages_for_query_list: + text_content = msg_dict.get("processed_plain_text", "") + if text_content.strip(): # 只添加有内容的文本 + # 可以选择是否添加发送者信息到查询文本中,例如: + # sender_nickname = msg_dict.get("user_info", {}).get("user_nickname", "用户") + # query_texts_list.append(f"{sender_nickname}: {text_content}") + query_texts_list.append(text_content) # 简单合并文本内容 + + if query_texts_list: + historical_chat_query = " ".join(query_texts_list).strip() + logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'") + else: + logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。") + else: + logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 无聊天历史可用于生成私聊历史查询文本。") + + current_chat_id = self.chat_observer.stream_id if self.chat_observer else None + if not current_chat_id: + logger.error(f"[私聊][{self.private_name}] (ReplyGenerator) 无法获取 current_chat_id,跳过所有上下文检索!") + retrieved_global_memory_str = "[获取全局记忆出错:chat_id 未知]" + retrieved_knowledge_str = "[获取知识出错:chat_id 未知]" + retrieved_historical_chat_str = "[获取私聊历史回忆出错:chat_id 未知]" + else: + # retrieval_context 之前是用 chat_history_text,现在也用它作为全局记忆和知识的检索上下文 + retrieval_context_for_global_and_knowledge = chat_history_text + + ( + retrieved_global_memory_str, + retrieved_knowledge_str, + retrieved_historical_chat_str # << 新增接收私聊历史回忆 + ) = await retrieve_contextual_info( + text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 + private_name=self.private_name, + chat_id=current_chat_id, # << 传递 chat_id + historical_chat_query_text=historical_chat_query # << 传递专门的查询文本 + ) + # === 调用修改结束 === + logger.info( - f"[私聊][{self.private_name}] (ReplyGenerator) 统一检索完成。记忆: {'有' if '回忆起' in retrieved_memory_str else '无'} / 知识: {'有' if '出错' not in retrieved_knowledge_str and '无相关知识' not in retrieved_knowledge_str else '无'}" + f"[私聊][{self.private_name}] (ReplyGenerator) 上下文检索完成。\n" + f" 全局记忆: {'有内容' if '回忆起' in retrieved_global_memory_str else '无或出错'}\n" + f" 知识: {'有内容' if '出错' not in retrieved_knowledge_str and '无相关知识' not in retrieved_knowledge_str and retrieved_knowledge_str.strip() else '无或出错'}\n" + f" 私聊历史回忆: {'有内容' if '回忆起一段相关的历史聊天' in retrieved_historical_chat_str else '无或出错'}" ) last_rejection_info_str = "" @@ -292,11 +347,10 @@ class ReplyGenerator: base_format_params = { "persona_text": persona_text, "goals_str": goals_str, - "chat_history_text": chat_history_text, - "retrieved_memory_str": retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 确保已定义 - "retrieved_knowledge_str": retrieved_knowledge_str - if retrieved_knowledge_str - else "无相关知识。", # 确保已定义 + "chat_history_text": chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # 当前短期历史 + "retrieved_global_memory_str": retrieved_global_memory_str if retrieved_global_memory_str.strip() else "无相关全局记忆。", + "retrieved_knowledge_str": retrieved_knowledge_str if retrieved_knowledge_str.strip() else "无相关知识。", + "retrieved_historical_chat_str": retrieved_historical_chat_str if retrieved_historical_chat_str.strip() else "无相关私聊历史回忆。", # << 新增 "last_rejection_info": last_rejection_info_str, "current_time_str": current_time_value, "sender_name": sender_name_str, From fe3ddb3b2eca81d4df2cffd9c03b8d9ac8585b73 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 17:09:05 +0800 Subject: [PATCH 02/27] fix --- src/plugins/PFC/pfc_processor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 8aaf800d..e13ba2d0 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -2,7 +2,8 @@ import traceback import re from typing import Any, Dict from datetime import datetime # 确保导入 datetime -from maim_message import UserInfo, MessageRecv # 从 maim_message 导入 MessageRecv +from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv +from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py from src.config.config import global_config from src.common.logger_manager import get_logger from ..chat.chat_stream import chat_manager From 79dd9cc6550ff11d2a6d157d5f6f1670eb98e577 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 17:29:58 +0800 Subject: [PATCH 03/27] fix --- src/plugins/PFC/pfc_processor.py | 2 +- src/plugins/PFC/pfc_utils.py | 14 ++++++++------ src/plugins/PFC/reply_generator.py | 5 ----- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index e13ba2d0..b6e6b8b3 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -105,7 +105,7 @@ class PFCProcessor: # 更新数据库中的对应文档 # 确保你有权限访问和操作 db 对象 - update_result = await db.messages.update_one( + update_result = db.messages.update_one( {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, {"$set": {"embedding_vector": embedding_vector}} ) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 666fa6e8..2441941f 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -79,8 +79,10 @@ async def find_most_relevant_historical_message( ] try: - # 假设 db.messages 是存储PFC私聊消息并带有embedding_vector的集合 - results = await db.messages.aggregate(pipeline).to_list(length=1) + # --- 确定性修改:同步执行聚合和结果转换 --- + cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor + results = list(cursor) # 直接将 CommandCursor 转换为列表 + # --- 修改结束 --- if results and len(results) > 0: most_similar_message = results[0] logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}") @@ -109,19 +111,19 @@ async def retrieve_chat_context_window( logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...") try: - # 假设 db.messages 是存储PFC私聊消息的集合 - anchor_message = await db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) + # --- 确定性修改:同步执行 find_one 和 find --- + anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) messages_before_cursor = db.messages.find( {"chat_id": chat_id, "time": {"$lt": anchor_message_time}} ).sort("time", -1).limit(window_size_before) - messages_before = await messages_before_cursor.to_list(length=window_size_before) + messages_before = list(messages_before_cursor) messages_before.reverse() messages_after_cursor = db.messages.find( {"chat_id": chat_id, "time": {"$gt": anchor_message_time}} ).sort("time", 1).limit(window_size_after) - messages_after = await messages_after_cursor.to_list(length=window_size_after) + messages_after = list(messages_after_cursor) if messages_before: context_messages.extend(messages_before) diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index f2f925d6..0a82bad4 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -66,13 +66,10 @@ PROMPT_DIRECT_REPLY = """ 最近的聊天记录: {chat_history_text} -{retrieved_memory_str} - {last_rejection_info} - 请根据上述信息,结合聊天记录,回复对方。该回复应该: 1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!) 2. 符合你的性格特征和身份细节 @@ -108,8 +105,6 @@ PROMPT_SEND_NEW_MESSAGE = """ 最近的聊天记录: {chat_history_text} -{retrieved_memory_str} - {last_rejection_info} 请根据上述信息,判断你是否要继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。如果你觉得要发送,该消息应该: From b998d7a05b8f5425ec8053e1faf4bf609e42e19a Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 17:52:40 +0800 Subject: [PATCH 04/27] fix --- src/plugins/PFC/pfc_utils.py | 9 +++++++-- src/plugins/PFC/reply_generator.py | 6 ++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 2441941f..5e89ee6e 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -2,6 +2,7 @@ import traceback import json import re import asyncio # 确保导入 asyncio +import time from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 from src.common.logger_manager import get_logger @@ -29,7 +30,8 @@ logger = get_logger("pfc_utils") async def find_most_relevant_historical_message( chat_id: str, query_text: str, - similarity_threshold: float = 0.3 # 相似度阈值,可以根据效果调整 + similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 + exclude_recent_seconds: int = 300 # 新增参数:排除最近多少秒内的消息(例如5分钟) ) -> Optional[Dict[str, Any]]: """ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 @@ -47,6 +49,8 @@ async def find_most_relevant_historical_message( logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。") return None + current_timestamp = time.time() # 获取当前时间戳 + pipeline = [ { "$match": { @@ -235,7 +239,8 @@ async def retrieve_contextual_info( most_relevant_message_doc = await find_most_relevant_historical_message( chat_id=chat_id, query_text=query_for_historical_chat, - similarity_threshold=0.5 # 你可以根据需要调整这个阈值 + similarity_threshold=0.5, # 你可以根据需要调整这个阈值 + exclude_recent_seconds=300 ) if most_relevant_message_doc: anchor_id = most_relevant_message_doc.get("message_id") diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 0a82bad4..2a0cda6d 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -60,6 +60,9 @@ PROMPT_DIRECT_REPLY = """ {retrieved_knowledge_str} 请你**记住上面的知识**,在回复中有可能会用到。 +你有以下记忆可供参考: +{retrieved_global_memory_str} + 你还想到了一些你们之前的聊天记录: {retrieved_historical_chat_str} @@ -99,6 +102,9 @@ PROMPT_SEND_NEW_MESSAGE = """ {retrieved_knowledge_str} 请你**记住上面的知识**,在发消息时有可能会用到。 +你有以下记忆可供参考: +{retrieved_global_memory_str} + 你还想到了一些你们之前的聊天记录: {retrieved_historical_chat_str} From 35f0c8224e7f54b00b422698c9179ba4ac9665fa Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 18:39:59 +0800 Subject: [PATCH 05/27] fix --- src/plugins/PFC/pfc_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 5e89ee6e..f4627c27 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -50,12 +50,14 @@ async def find_most_relevant_historical_message( return None current_timestamp = time.time() # 获取当前时间戳 + excluded_time_threshold = current_timestamp - exclude_recent_seconds pipeline = [ { "$match": { "chat_id": chat_id, - "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}} + "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}, + "time": {"$lt": excluded_time_threshold} } }, { From 53ba204e25d901c507a0e8ddc740b66ed4366af8 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 19:00:40 +0800 Subject: [PATCH 06/27] fix --- src/plugins/PFC/pfc_utils.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index f4627c27..62e20cfe 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -3,6 +3,7 @@ import json import re import asyncio # 确保导入 asyncio import time +import datetime from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 from src.common.logger_manager import get_logger @@ -31,7 +32,7 @@ async def find_most_relevant_historical_message( chat_id: str, query_text: str, similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 - exclude_recent_seconds: int = 300 # 新增参数:排除最近多少秒内的消息(例如5分钟) + exclude_recent_seconds: int = 900 # 新增参数:排除最近多少秒内的消息(例如5分钟) ) -> Optional[Dict[str, Any]]: """ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 @@ -88,6 +89,15 @@ async def find_most_relevant_historical_message( # --- 确定性修改:同步执行聚合和结果转换 --- cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor results = list(cursor) # 直接将 CommandCursor 转换为列表 + if not results: + logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。") + else: + logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:") + for res_msg in results: # 最多只打印我们 limit 的那几条 + msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S') + logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'") + # --- 新增日志结束 --- + # --- 修改结束 --- if results and len(results) > 0: most_similar_message = results[0] @@ -104,6 +114,7 @@ async def retrieve_chat_context_window( chat_id: str, anchor_message_id: str, anchor_message_time: float, + excluded_time_threshold_for_window: float, window_size_before: int = 7, window_size_after: int = 7 ) -> List[Dict[str, Any]]: @@ -125,11 +136,21 @@ async def retrieve_chat_context_window( ).sort("time", -1).limit(window_size_before) messages_before = list(messages_before_cursor) messages_before.reverse() + # --- 新增日志 --- + logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}") + logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):") + for msg_b in messages_before: + logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'") messages_after_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$gt": anchor_message_time}} + {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} # <--- 修改这里 ).sort("time", 1).limit(window_size_after) messages_after = list(messages_after_cursor) + # --- 新增日志 --- + logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):") + for msg_a in messages_after: + logger.debug(f" - Time: {datetime.fromtimestamp(msg_a.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text','')[:30]}...'") + if messages_before: context_messages.extend(messages_before) From ebc3dd53557df77d33eefabbce29df6c18e24e0c Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 19:09:16 +0800 Subject: [PATCH 07/27] fix --- src/plugins/PFC/pfc_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 62e20cfe..9e7d745a 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -3,7 +3,7 @@ import json import re import asyncio # 确保导入 asyncio import time -import datetime +from datetime import datetime from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 from src.common.logger_manager import get_logger From fe4990c73161eab59b3038a7064b413f8be9b7fb Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 19:21:17 +0800 Subject: [PATCH 08/27] fix --- src/plugins/PFC/pfc_utils.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 9e7d745a..012c33f1 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -128,7 +128,7 @@ async def retrieve_chat_context_window( logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...") try: - # --- 确定性修改:同步执行 find_one 和 find --- + # --- 同步执行 find_one 和 find --- anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) messages_before_cursor = db.messages.find( @@ -143,7 +143,7 @@ async def retrieve_chat_context_window( logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'") messages_after_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} # <--- 修改这里 + {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} ).sort("time", 1).limit(window_size_after) messages_after = list(messages_after_cursor) # --- 新增日志 --- @@ -200,9 +200,9 @@ async def retrieve_contextual_info( related_memory = await HippocampusManager.get_instance().get_memory_from_text( text=text, max_memory_num=2, - max_memory_length=2, # 你原始代码中这里是2,不是200 + max_memory_length=2, max_depth=3, - fast_retrieval=False, # 你原始代码中这里是False + fast_retrieval=False, ) if related_memory: temp_global_memory_info = "" @@ -259,22 +259,34 @@ async def retrieve_contextual_info( if query_for_historical_chat: try: + # 获取 find_most_relevant_historical_message 调用时实际使用的 exclude_recent_seconds 值 + actual_exclude_seconds_for_find = 900 # 根据您对 find_most_relevant_historical_message 的调用 + most_relevant_message_doc = await find_most_relevant_historical_message( chat_id=chat_id, query_text=query_for_historical_chat, - similarity_threshold=0.5, # 你可以根据需要调整这个阈值 - exclude_recent_seconds=300 + similarity_threshold=0.5, + exclude_recent_seconds=actual_exclude_seconds_for_find ) if most_relevant_message_doc: anchor_id = most_relevant_message_doc.get("message_id") anchor_time = most_relevant_message_doc.get("time") if anchor_id and anchor_time is not None: + # 计算传递给 retrieve_chat_context_window 的时间上限 + # 这个上限应该与 find_most_relevant_historical_message 的排除点一致 + time_limit_for_window_after = time.time() - actual_exclude_seconds_for_find + + logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " + f"with anchor_time: {anchor_time}, " + f"excluded_time_threshold_for_window: {time_limit_for_window_after}") + context_window_messages = await retrieve_chat_context_window( chat_id=chat_id, anchor_message_id=anchor_id, anchor_message_time=anchor_time, - window_size_before=7, # 我们的目标:上7条 - window_size_after=7 # 我们的目标:下7条 (共15条,包括锚点) + excluded_time_threshold_for_window=time_limit_for_window_after, # <--- 传递这个值 + window_size_before=7, + window_size_after=7 ) if context_window_messages: formatted_window_str = await build_readable_messages( From c9e6cf214076929097a03b0599fafb5d52cb6a47 Mon Sep 17 00:00:00 2001 From: 114514 <2514624910@qq.com> Date: Fri, 9 May 2025 20:50:46 +0800 Subject: [PATCH 09/27] fix --- src/plugins/PFC/pfc_utils.py | 94 ++++++++++++++++++++++-------- src/plugins/PFC/reply_generator.py | 51 +++++++++++++++- 2 files changed, 119 insertions(+), 26 deletions(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 012c33f1..76dd6bc4 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -32,7 +32,7 @@ async def find_most_relevant_historical_message( chat_id: str, query_text: str, similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 - exclude_recent_seconds: int = 900 # 新增参数:排除最近多少秒内的消息(例如5分钟) + absolute_search_time_limit: Optional[float] = None # 新增参数:排除最近多少秒内的消息(例如5分钟) ) -> Optional[Dict[str, Any]]: """ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 @@ -50,15 +50,30 @@ async def find_most_relevant_historical_message( logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。") return None - current_timestamp = time.time() # 获取当前时间戳 - excluded_time_threshold = current_timestamp - exclude_recent_seconds + effective_search_upper_limit: float + log_source_of_limit: str = "" + + if absolute_search_time_limit is not None: + effective_search_upper_limit = absolute_search_time_limit + log_source_of_limit = "传入的绝对时间上限" + else: + # 如果没有传入绝对时间上限,可以设置一个默认的回退逻辑 + fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时 + effective_search_upper_limit = time.time() - fallback_exclude_seconds + log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)" + + logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " + f"将使用时间上限 {effective_search_upper_limit} " + f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " + f"进行历史消息锚点搜索。来源: {log_source_of_limit}") + # --- [新代码结束] --- pipeline = [ { "$match": { "chat_id": chat_id, "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}, - "time": {"$lt": excluded_time_threshold} + "time": {"$lt": effective_search_upper_limit} # <--- 使用新的 effective_search_upper_limit } }, { @@ -90,13 +105,13 @@ async def find_most_relevant_historical_message( cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor results = list(cursor) # 直接将 CommandCursor 转换为列表 if not results: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。") + logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。") else: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:") - for res_msg in results: # 最多只打印我们 limit 的那几条 + logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:") + for res_msg in results: msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S') logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'") - # --- 新增日志结束 --- + # --- [修改结束] --- # --- 修改结束 --- if results and len(results) > 0: @@ -182,7 +197,8 @@ async def retrieve_contextual_info( text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) private_name: str, # 用于日志 chat_id: str, # 用于特定私聊历史的检索 - historical_chat_query_text: Optional[str] = None # 专门为私聊历史检索准备的查询文本 (例如最新的N条消息合并) + historical_chat_query_text: Optional[str] = None, + current_short_term_history_earliest_time: Optional[float] = None # <--- 新增参数 ) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 """ 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。 @@ -253,38 +269,68 @@ async def retrieve_contextual_info( logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。") - # --- 3. 当前私聊的特定历史对话上下文检索 (新增逻辑) --- + # --- 3. 当前私聊的特定历史对话上下文检索 --- query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None - historical_chat_log_msg = f"开始私聊历史检索 (查询文本: '{str(query_for_historical_chat)[:30]}...')" + # historical_chat_log_msg 的初始化可以移到 try 块之后,根据实际情况赋值 if query_for_historical_chat: try: - # 获取 find_most_relevant_historical_message 调用时实际使用的 exclude_recent_seconds 值 - actual_exclude_seconds_for_find = 900 # 根据您对 find_most_relevant_historical_message 的调用 + # ---- [新代码] 计算最终的、严格的搜索时间上限 ---- + # 1. 设置一个基础的、较大的时间回溯窗口,例如2小时 (7200秒) + # 这个值可以从全局配置读取,如果没配置则使用默认值 + default_search_exclude_seconds = getattr(global_config, "pfc_historical_search_default_exclude_seconds", 7200) # 默认2小时 + base_excluded_time_limit = time.time() - default_search_exclude_seconds + + final_search_upper_limit_time = base_excluded_time_limit + if current_short_term_history_earliest_time is not None: + # 我们希望找到的消息严格早于 short_term_history 的开始,减去一个小量确保不包含边界 + limit_from_short_term = current_short_term_history_earliest_time - 0.001 + final_search_upper_limit_time = min(base_excluded_time_limit, limit_from_short_term) + log_earliest_time_str = "未提供" + if current_short_term_history_earliest_time is not None: + try: + log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})" + except: + log_earliest_time_str = str(current_short_term_history_earliest_time) + + logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " + f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " + f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " + f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}") + most_relevant_message_doc = await find_most_relevant_historical_message( chat_id=chat_id, query_text=query_for_historical_chat, - similarity_threshold=0.5, - exclude_recent_seconds=actual_exclude_seconds_for_find + similarity_threshold=0.5, # 您可以调整这个 + # exclude_recent_seconds 不再直接使用,而是传递计算好的绝对时间上限 + absolute_search_time_limit=final_search_upper_limit_time # <--- 传递计算好的绝对时间上限 ) + if most_relevant_message_doc: anchor_id = most_relevant_message_doc.get("message_id") - anchor_time = most_relevant_message_doc.get("time") - if anchor_id and anchor_time is not None: - # 计算传递给 retrieve_chat_context_window 的时间上限 - # 这个上限应该与 find_most_relevant_historical_message 的排除点一致 - time_limit_for_window_after = time.time() - actual_exclude_seconds_for_find - + anchor_time = most_relevant_message_doc.get("time") + + # 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证) + if anchor_time is not None and anchor_time >= final_search_upper_limit_time: + logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " + f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。") + historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。" + # 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值 + elif anchor_id and anchor_time is not None: + # 构建上下文窗口时,其“未来”消息的上限也应该是 final_search_upper_limit_time + # 因为我们不希望历史回忆的上下文窗口延伸到“最近聊天记录”的范围内或更近 + time_limit_for_context_window_after = final_search_upper_limit_time + logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " f"with anchor_time: {anchor_time}, " - f"excluded_time_threshold_for_window: {time_limit_for_window_after}") + f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}") context_window_messages = await retrieve_chat_context_window( chat_id=chat_id, anchor_message_id=anchor_id, - anchor_message_time=anchor_time, - excluded_time_threshold_for_window=time_limit_for_window_after, # <--- 传递这个值 + anchor_message_time=anchor_time, + excluded_time_threshold_for_window=time_limit_for_context_window_after, window_size_before=7, window_size_after=7 ) diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 2a0cda6d..9a184e52 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,7 +1,8 @@ import random import asyncio +from datetime import datetime from .pfc_utils import retrieve_contextual_info - +from typing import Optional from src.common.logger_manager import get_logger from ..models.utils_model import LLMRequest from ...config.config import global_config @@ -224,6 +225,51 @@ class ReplyGenerator: else: goals_str = "- 目前没有明确对话目标\n" + chat_history_for_prompt_builder: list = [] + recent_history_start_time_for_exclusion: Optional[float] = None + + # 我们需要知道 build_chat_history_text 函数大致会用 observation_info.chat_history 的多少条记录 + # 或者 build_chat_history_text 内部的逻辑。 + # 假设 build_chat_history_text 主要依赖 observation_info.chat_history_str, + # 而 observation_info.chat_history_str 是基于 observation_info.chat_history 的最后一部分(比如20条)生成的。 + # 为了准确,我们应该直接从 observation_info.chat_history 中获取这个片段的起始时间。 + # 请确保这里的 MAX_RECENT_HISTORY_FOR_PROMPT 与 observation_info.py 或 build_chat_history_text 中 + # 用于生成 chat_history_str 的消息数量逻辑大致吻合。 + # 如果 build_chat_history_text 总是用 observation_info.chat_history 的最后 N 条,那么这个 N 就是这里的数字。 + # 如果 observation_info.chat_history_str 是由 observation_info.py 中的 update_from_message 等方法维护的, + # 并且总是代表一个固定长度(比如最后30条)的聊天记录字符串,那么我们就需要从 observation_info.chat_history + # 取出这部分原始消息来确定起始时间。 + + # 我们先做一个合理的假设: “最近聊天记录” 字符串 chat_history_text 是基于 + # observation_info.chat_history 的一个有限的尾部片段生成的。 + # 假设这个片段的长度由 global_config.pfc_recent_history_display_count 控制,默认为20条。 + recent_history_display_count = getattr(global_config, "pfc_recent_history_display_count", 20) + + if observation_info and observation_info.chat_history and len(observation_info.chat_history) > 0: + # 获取用于生成“最近聊天记录”的实际消息片段 + # 如果 observation_info.chat_history 长度小于 display_count,则取全部 + start_index = max(0, len(observation_info.chat_history) - recent_history_display_count) + chat_history_for_prompt_builder = observation_info.chat_history[start_index:] + + if chat_history_for_prompt_builder: # 如果片段不为空 + try: + first_message_in_display_slice = chat_history_for_prompt_builder[0] + recent_history_start_time_for_exclusion = first_message_in_display_slice.get('time') + if recent_history_start_time_for_exclusion: + # 导入 datetime (如果 reply_generator.py 文件顶部没有的话) + # from datetime import datetime # 通常建议放在文件顶部 + logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " + f"{recent_history_start_time_for_exclusion} " + f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})") + else: + logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。") + except (IndexError, KeyError, TypeError) as e: + logger.warning(f"[{self.private_name}] (ReplyGenerator) 获取“最近聊天记录”起始时间失败: {e}") + recent_history_start_time_for_exclusion = None + else: + logger.debug(f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。") + # --- [新代码结束] --- + chat_history_text = await build_chat_history_text(observation_info, self.private_name) sender_name_str = self.private_name @@ -276,7 +322,8 @@ class ReplyGenerator: text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 private_name=self.private_name, chat_id=current_chat_id, # << 传递 chat_id - historical_chat_query_text=historical_chat_query # << 传递专门的查询文本 + historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本 + current_short_term_history_earliest_time=recent_history_start_time_for_exclusion # <--- 新增传递的参数 ) # === 调用修改结束 === From 5d31cc8bbe52b121f4bcab0bf75a820969933afb Mon Sep 17 00:00:00 2001 From: infinitycat Date: Fri, 9 May 2025 21:20:31 +0800 Subject: [PATCH 10/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E7=9A=84tag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 54 +++++++++++++++++------------- docker-compose.yml | 4 +-- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 605d838c..36c7604f 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -6,10 +6,9 @@ on: - main - classical - dev - - new_knowledge tags: - - 'v*' - workflow_dispatch: + - "v*.*.*" + - "v*" jobs: build-and-push: @@ -20,6 +19,11 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 - name: Clone maim_message run: git clone https://github.com/MaiM-with-u/maim_message maim_message @@ -29,6 +33,8 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --debug - name: Login to Docker Hub uses: docker/login-action@v3 @@ -36,20 +42,18 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Determine Image Tags - id: tags - run: | - if [[ "${{ github.ref }}" == refs/tags/* ]]; then - echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:${{ github.ref_name }},${{ secrets.DOCKERHUB_USERNAME }}/maimbot:latest" >> $GITHUB_OUTPUT - elif [ "${{ github.ref }}" == "refs/heads/main" ]; then - echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT - elif [ "${{ github.ref }}" == "refs/heads/classical" ]; then - echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:classical,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:classical-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT - elif [ "${{ github.ref }}" == "refs/heads/dev" ]; then - echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:dev,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:dev-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT - elif [ "${{ github.ref }}" == "refs/heads/new_knowledge" ]; then - echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:knowledge,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:knowledge-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT - fi + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot + tags: | + type=ref,event=branch + type=ref,event=tag + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha - name: Build and Push Docker Image uses: docker/build-push-action@v5 @@ -57,10 +61,14 @@ jobs: context: . file: ./Dockerfile platforms: linux/amd64,linux/arm64 - tags: ${{ steps.tags.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }} push: true - cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:buildcache - cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:buildcache,mode=max - labels: | - org.opencontainers.image.created=${{ steps.tags.outputs.date_tag }} - org.opencontainers.image.revision=${{ github.sha }} \ No newline at end of file + cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache + cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache,mode=max + labels: ${{ steps.meta.outputs.labels }} + provenance: true + sbom: true + build-args: | + BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') + VCS_REF=${{ github.sha }} + outputs: type=image,push=true \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 000d00c3..363fafc2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,8 +16,8 @@ services: - maim_bot core: container_name: maim-bot-core - image: sengokucola/maimbot:main - # image: infinitycat/maimbot:main + image: sengokucola/maibot:main + # image: infinitycat/maibot:main environment: - TZ=Asia/Shanghai # - EULA_AGREE=35362b6ea30f12891d46ef545122e84a # 同意EULA From 3323c8dc498984cb938fc16f531dff05e8c5bc21 Mon Sep 17 00:00:00 2001 From: infinitycat Date: Fri, 9 May 2025 21:42:00 +0800 Subject: [PATCH 11/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0docker-compose?= =?UTF-8?q?=E7=9A=84tag?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker-compose.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 363fafc2..2392f707 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,8 +16,11 @@ services: - maim_bot core: container_name: maim-bot-core - image: sengokucola/maibot:main - # image: infinitycat/maibot:main + image: sengokucola/maibot:latest + # image: infinitycat/maibot:latest + # dev + # image: sengokucola/maibot:dev + # image: infinitycat/maibot:dev environment: - TZ=Asia/Shanghai # - EULA_AGREE=35362b6ea30f12891d46ef545122e84a # 同意EULA From 835efd5daae12ef268bb016d9e12f8e9a184fc9c Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 01:41:56 +0800 Subject: [PATCH 12/27] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84Docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E5=A4=9A=E5=B9=B3=E5=8F=B0=E6=94=AF=E6=8C=81=E5=92=8C?= =?UTF-8?q?=E6=91=98=E8=A6=81=E4=B8=8A=E4=BC=A0=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 129 +++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 27 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 36c7604f..3fce193b 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -10,20 +10,58 @@ on: - "v*.*.*" - "v*" +env: + REGISTRY_IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/maibot + jobs: - build-and-push: + prepare: runs-on: ubuntu-latest - env: - DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }} - DATE_TAG: $(date -u +'%Y-%m-%dT%H-%M-%S') + outputs: + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + bake-file: ${{ steps.meta.outputs.bake-file }} steps: - name: Checkout code uses: actions/checkout@v4 with: fetch-depth: 0 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + build: + runs-on: ubuntu-latest + needs: prepare + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + - linux/arm/v7 + - linux/arm/v6 + - linux/386 + - linux/loong64 + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Clone maim_message run: git clone https://github.com/MaiM-with-u/maim_message maim_message @@ -31,6 +69,9 @@ jobs: - name: Clone lpmm run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: @@ -42,33 +83,67 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot - tags: | - type=ref,event=branch - type=ref,event=tag - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=semver,pattern={{major}} - type=sha - - - name: Build and Push Docker Image + - name: Build and push by digest + id: build uses: docker/build-push-action@v5 with: context: . file: ./Dockerfile - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta.outputs.tags }} - push: true - cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache - cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache,mode=max - labels: ${{ steps.meta.outputs.labels }} + platforms: ${{ matrix.platform }} + labels: ${{ needs.prepare.outputs.labels }} + cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }} + cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max provenance: true sbom: true build-args: | BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') VCS_REF=${{ github.sha }} - outputs: type=image,push=true \ No newline at end of file + outputs: type=image,push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p ${{ runner.temp }}/digests + digest="${{ steps.build.outputs.digest }}" + touch "${{ runner.temp }}/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: ${{ runner.temp }}/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - prepare + - build + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: ${{ runner.temp }}/digests + pattern: digests-* + merge-multiple: true + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Create manifest list and push + working-directory: ${{ runner.temp }}/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: | + tags_json='${{ needs.prepare.outputs.tags }}' + first_tag=$(echo $tags_json | jq -r '.tags[0]') + docker buildx imagetools inspect $first_tag \ No newline at end of file From de1c36f8e8ee64dd0ea2abaca8782ab99b56f211 Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 01:46:17 +0800 Subject: [PATCH 13/27] =?UTF-8?q?feat:=20=E5=9C=A8Docker=E9=95=9C=E5=83=8F?= =?UTF-8?q?=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=E4=B8=AD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=A0=87=E7=AD=BE=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BB=A5=E4=BE=BF?= =?UTF-8?q?=E4=BA=8E=E7=89=88=E6=9C=AC=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 3fce193b..097fdac3 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -91,6 +91,7 @@ jobs: file: ./Dockerfile platforms: ${{ matrix.platform }} labels: ${{ needs.prepare.outputs.labels }} + tags: ${{ env.REGISTRY_IMAGE }}:${{ github.sha }}-${{ env.PLATFORM_PAIR }} cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }} cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max provenance: true From 4fc33278c98e9cf0d97b5a238f9ed69198de8bbf Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 01:51:09 +0800 Subject: [PATCH 14/27] =?UTF-8?q?feat:=20=E7=B2=BE=E7=AE=80Docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E7=A7=BB?= =?UTF-8?q?=E9=99=A4=E4=B8=8D=E5=BF=85=E8=A6=81=E7=9A=84=E5=B9=B3=E5=8F=B0?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=B9=B6=E6=9B=B4=E6=96=B0=E6=A0=87=E7=AD=BE?= =?UTF-8?q?=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 097fdac3..fb3d4938 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -48,10 +48,6 @@ jobs: platform: - linux/amd64 - linux/arm64 - - linux/arm/v7 - - linux/arm/v6 - - linux/386 - - linux/loong64 steps: - name: Prepare run: | @@ -91,7 +87,7 @@ jobs: file: ./Dockerfile platforms: ${{ matrix.platform }} labels: ${{ needs.prepare.outputs.labels }} - tags: ${{ env.REGISTRY_IMAGE }}:${{ github.sha }}-${{ env.PLATFORM_PAIR }} + tags: ${{ env.REGISTRY_IMAGE }} cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }} cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max provenance: true From f96fffe16eb8981f2c7f657effd1b503cdfadc0c Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 02:00:59 +0800 Subject: [PATCH 15/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0Docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E7=A1=AE?= =?UTF-8?q?=E4=BF=9D=E4=BD=BF=E7=94=A8=E9=BB=98=E8=AE=A4=E6=A0=87=E7=AD=BE?= =?UTF-8?q?=E5=B9=B6=E4=BC=98=E5=8C=96=E6=A0=87=E7=AD=BE=E5=A4=84=E7=90=86?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index fb3d4938..7ea9d86e 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -136,8 +136,16 @@ jobs: - name: Create manifest list and push working-directory: ${{ runner.temp }}/digests run: | - docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") \ - $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + # 确保至少有一个默认标签 + TAGS="-t ${{ env.REGISTRY_IMAGE }}:latest" + + # 如果 meta 输出的标签不为空,则使用它们 + if [ -n "${{ needs.prepare.outputs.tags }}" ]; then + TAGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") + fi + + echo "Using tags: ${TAGS}" + docker buildx imagetools create ${TAGS} $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) - name: Inspect image run: | From 5ad1993fee7d127b1af776e5816497169be24e14 Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 02:11:26 +0800 Subject: [PATCH 16/27] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96Docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E5=A2=9E?= =?UTF-8?q?=E5=BC=BA=E6=A0=87=E7=AD=BE=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E4=BB=A5=E6=94=AF=E6=8C=81=E9=BB=98=E8=AE=A4=E6=A0=87=E7=AD=BE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 7ea9d86e..a2e4cfc8 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -149,6 +149,20 @@ jobs: - name: Inspect image run: | - tags_json='${{ needs.prepare.outputs.tags }}' - first_tag=$(echo $tags_json | jq -r '.tags[0]') - docker buildx imagetools inspect $first_tag \ No newline at end of file + # 使用默认标签 + DEFAULT_TAG="${{ env.REGISTRY_IMAGE }}:latest" + + # 尝试从 prepare 输出中获取标签 + if [ -n "${{ needs.prepare.outputs.tags }}" ]; then + TAGS_JSON='${{ needs.prepare.outputs.tags }}' + FIRST_TAG=$(echo $TAGS_JSON | jq -r '.tags[0]') + if [ -n "$FIRST_TAG" ] && [ "$FIRST_TAG" != "null" ]; then + echo "使用从 metadata 获取的标签: $FIRST_TAG" + docker buildx imagetools inspect $FIRST_TAG + exit 0 + fi + fi + + # 如果没有标签或提取失败,使用默认标签 + echo "使用默认标签: $DEFAULT_TAG" + docker buildx imagetools inspect $DEFAULT_TAG \ No newline at end of file From 606b89c99b233426d90d3289418eec0476e5e27f Mon Sep 17 00:00:00 2001 From: infinitycat Date: Sat, 10 May 2025 02:37:46 +0800 Subject: [PATCH 17/27] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84Docker=E9=95=9C?= =?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E6=96=B0?= =?UTF-8?q?=E5=A2=9EAMD64=E5=92=8CARM64=E6=9E=B6=E6=9E=84=E6=94=AF?= =?UTF-8?q?=E6=8C=81=EF=BC=8C=E5=B9=B6=E4=BC=98=E5=8C=96=E5=A4=9A=E6=9E=B6?= =?UTF-8?q?=E6=9E=84=E6=B8=85=E5=8D=95=E5=88=9B=E5=BB=BA=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker-image.yml | 217 +++++++++++++++-------------- 1 file changed, 109 insertions(+), 108 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index a2e4cfc8..ba56b0c2 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -10,50 +10,13 @@ on: - "v*.*.*" - "v*" -env: - REGISTRY_IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/maibot - jobs: - prepare: + build-amd64: + name: Build AMD64 Image runs-on: ubuntu-latest - outputs: - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - bake-file: ${{ steps.meta.outputs.bake-file }} + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }} steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Docker meta - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY_IMAGE }} - tags: | - type=ref,event=branch - type=ref,event=tag - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=semver,pattern={{major}} - type=sha - - build: - runs-on: ubuntu-latest - needs: prepare - strategy: - fail-fast: false - matrix: - platform: - - linux/amd64 - - linux/arm64 - steps: - - name: Prepare - run: | - platform=${{ matrix.platform }} - echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - - name: Checkout code uses: actions/checkout@v4 with: @@ -65,9 +28,6 @@ jobs: - name: Clone lpmm run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 with: @@ -79,50 +39,61 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and push by digest - id: build + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot + tags: | + type=ref,event=branch + type=ref,event=tag + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - name: Build and Push AMD64 Docker Image uses: docker/build-push-action@v5 with: context: . file: ./Dockerfile - platforms: ${{ matrix.platform }} - labels: ${{ needs.prepare.outputs.labels }} - tags: ${{ env.REGISTRY_IMAGE }} - cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }} - cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max + platforms: linux/amd64 + tags: ${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-${{ github.sha }} + push: true + cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-buildcache + cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-buildcache,mode=max + labels: ${{ steps.meta.outputs.labels }} provenance: true sbom: true build-args: | BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') VCS_REF=${{ github.sha }} - outputs: type=image,push-by-digest=true,name-canonical=true,push=true + outputs: type=image,push=true - - name: Export digest - run: | - mkdir -p ${{ runner.temp }}/digests - digest="${{ steps.build.outputs.digest }}" - touch "${{ runner.temp }}/digests/${digest#sha256:}" - - - name: Upload digest - uses: actions/upload-artifact@v4 - with: - name: digests-${{ env.PLATFORM_PAIR }} - path: ${{ runner.temp }}/digests/* - if-no-files-found: error - retention-days: 1 - - merge: + build-arm64: + name: Build ARM64 Image runs-on: ubuntu-latest - needs: - - prepare - - build + env: + DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }} steps: - - name: Download digests - uses: actions/download-artifact@v4 + - name: Checkout code + uses: actions/checkout@v4 with: - path: ${{ runner.temp }}/digests - pattern: digests-* - merge-multiple: true + fetch-depth: 0 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Clone maim_message + run: git clone https://github.com/MaiM-with-u/maim_message maim_message + + - name: Clone lpmm + run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + buildkitd-flags: --debug - name: Login to Docker Hub uses: docker/login-action@v3 @@ -130,39 +101,69 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot + tags: | + type=ref,event=branch + type=ref,event=tag + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha - - name: Create manifest list and push - working-directory: ${{ runner.temp }}/digests - run: | - # 确保至少有一个默认标签 - TAGS="-t ${{ env.REGISTRY_IMAGE }}:latest" - - # 如果 meta 输出的标签不为空,则使用它们 - if [ -n "${{ needs.prepare.outputs.tags }}" ]; then - TAGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") - fi - - echo "Using tags: ${TAGS}" - docker buildx imagetools create ${TAGS} $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + - name: Build and Push ARM64 Docker Image + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + platforms: linux/arm64 + tags: ${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-${{ github.sha }} + push: true + cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-buildcache + cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-buildcache,mode=max + labels: ${{ steps.meta.outputs.labels }} + provenance: true + sbom: true + build-args: | + BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ') + VCS_REF=${{ github.sha }} + outputs: type=image,push=true - - name: Inspect image + create-manifest: + name: Create Multi-Arch Manifest + runs-on: ubuntu-latest + needs: + - build-amd64 + - build-arm64 + steps: + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot + tags: | + type=ref,event=branch + type=ref,event=tag + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + + - name: Create and Push Manifest run: | - # 使用默认标签 - DEFAULT_TAG="${{ env.REGISTRY_IMAGE }}:latest" - - # 尝试从 prepare 输出中获取标签 - if [ -n "${{ needs.prepare.outputs.tags }}" ]; then - TAGS_JSON='${{ needs.prepare.outputs.tags }}' - FIRST_TAG=$(echo $TAGS_JSON | jq -r '.tags[0]') - if [ -n "$FIRST_TAG" ] && [ "$FIRST_TAG" != "null" ]; then - echo "使用从 metadata 获取的标签: $FIRST_TAG" - docker buildx imagetools inspect $FIRST_TAG - exit 0 - fi - fi - - # 如果没有标签或提取失败,使用默认标签 - echo "使用默认标签: $DEFAULT_TAG" - docker buildx imagetools inspect $DEFAULT_TAG \ No newline at end of file + # 为每个标签创建多架构镜像 + for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr '\n' ' '); do + echo "Creating manifest for $tag" + docker buildx imagetools create -t $tag \ + ${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-${{ github.sha }} \ + ${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-${{ github.sha }} + done \ No newline at end of file From 080c862fcd3aab9aea24235b5bd7998dc9793531 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 13:43:19 +0800 Subject: [PATCH 18/27] =?UTF-8?q?=E4=B8=8D=E8=AE=A9=E5=88=B7=E5=B1=8Flogge?= =?UTF-8?q?r=E6=89=93=E6=89=B0=E5=88=B0=E6=99=BA=E7=B1=B3=E5=A1=94?= =?UTF-8?q?=E5=A4=A7=E4=BA=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/heart_flow/subheartflow_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heart_flow/subheartflow_manager.py b/src/heart_flow/subheartflow_manager.py index c074d29a..9d69a5a4 100644 --- a/src/heart_flow/subheartflow_manager.py +++ b/src/heart_flow/subheartflow_manager.py @@ -284,7 +284,7 @@ class SubHeartflowManager: return # 如果不允许,直接返回 # --- 结束新增 --- - logger.info(f"当前状态 ({current_state.value}) 可以在{focused_limit}个群 专注聊天") + logger.debug(f"当前状态 ({current_state.value}) 可以在{focused_limit}个群 专注聊天") if focused_limit <= 0: # logger.debug(f"{log_prefix} 当前状态 ({current_state.value}) 不允许 FOCUSED 子心流") From 28eb827c5f5a604dd1265c3df2e5369beae7cede Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 13:55:21 +0800 Subject: [PATCH 19/27] =?UTF-8?q?=E5=88=86=E7=A6=BB=E5=B5=8C=E5=85=A5?= =?UTF-8?q?=E5=90=91=E9=87=8F=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 105 +++++++++++++++---------------- 1 file changed, 49 insertions(+), 56 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index b6e6b8b3..4efc513e 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -6,7 +6,7 @@ from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_ from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py from src.config.config import global_config from src.common.logger_manager import get_logger -from ..chat.chat_stream import chat_manager +from ..chat.chat_stream import ChatStream, chat_manager from src.plugins.chat.utils import get_embedding from src.common.database import db from .pfc_manager import PFCManager @@ -24,7 +24,7 @@ async def _handle_error(error: Exception, context: str, message: MessageRecv | N if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取 if raw_msg_content: - logger.error(f"相关消息原始内容: {raw_msg_content}") + logger.error(f"相关消息原始内容: {raw_msg_content}") elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message logger.error(f"相关消息原始内容: {message.raw_message}") @@ -47,21 +47,10 @@ class PFCProcessor: try: # 1. 消息解析与初始化 message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv - # 确保 message_obj.message_info 存在 - if not hasattr(message_obj, 'message_info'): - logger.error("MessageRecv 对象缺少 message_info 属性。跳过处理。") - return groupinfo = getattr(message_obj.message_info, 'group_info', None) userinfo = getattr(message_obj.message_info, 'user_info', None) - if userinfo is None: # 确保 userinfo 存在 - logger.error("message_obj.message_info 中缺少 user_info。跳过处理。") - return - if not hasattr(userinfo, 'user_id'): # 确保 user_id 存在 - logger.error("userinfo 对象中缺少 user_id。跳过处理。") - return - logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流") chat = await chat_manager.get_or_create_stream( platform=message_obj.message_info.platform, @@ -73,7 +62,7 @@ class PFCProcessor: # 2. 过滤检查 await message_obj.process() # 调用 MessageRecv 的异步 process 方法 if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \ - self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性 + self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性 return # 3. 消息存储 (保持原有调用) @@ -82,49 +71,10 @@ class PFCProcessor: await self.storage.store_message(message_obj, chat) logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}") - # === 新增:为已存储的消息生成嵌入并更新数据库文档 === - embedding_vector = None - text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 - - # 在 storage.py 中,会对 processed_plain_text 进行一次过滤 - # 为了保持一致,我们也在这里应用相同的过滤逻辑 - # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性 - # 这里为了简单,我们先重复一次过滤逻辑 - pattern = r".*?|.*?|.*?" - if text_for_embedding: - filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL) - else: - filtered_text_for_embedding = "" - - if filtered_text_for_embedding and filtered_text_for_embedding.strip(): - try: - # request_type 参数根据你的 get_embedding 函数实际需求来定 - embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory") - if embedding_vector: - logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。") - - # 更新数据库中的对应文档 - # 确保你有权限访问和操作 db 对象 - update_result = db.messages.update_one( - {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, - {"$set": {"embedding_vector": embedding_vector}} - ) - if update_result.modified_count > 0: - logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。") - elif update_result.matched_count > 0: - logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。") - else: - logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。") - else: - logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。") - except Exception as e_embed_update: - logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True) - else: - logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。") - # === 新增结束 === + await self._update_embedding_vector(message_obj) # 明确传递 message_obj # 4. 创建 PFC 聊天流 - await self._create_pfc_chat(message_obj) + await self._create_pfc_chat(message_obj, chat) # 5. 日志记录 # 确保 message_obj.message_info.time 是 float 类型的时间戳 @@ -169,4 +119,47 @@ class PFCProcessor: logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 return True - return False \ No newline at end of file + return False + + async def _update_embedding_vector(self, message_obj: MessageRecv, chat: ChatStream) -> None: + """更新消息的嵌入向量""" + # === 新增:为已存储的消息生成嵌入并更新数据库文档 === + embedding_vector = None + text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 + + # 在 storage.py 中,会对 processed_plain_text 进行一次过滤 + # 为了保持一致,我们也在这里应用相同的过滤逻辑 + # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性 + # 这里为了简单,我们先重复一次过滤逻辑 + pattern = r".*?|.*?|.*?" + if text_for_embedding: + filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL) + else: + filtered_text_for_embedding = "" + + if filtered_text_for_embedding and filtered_text_for_embedding.strip(): + try: + # request_type 参数根据你的 get_embedding 函数实际需求来定 + embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory") + if embedding_vector: + logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。") + + # 更新数据库中的对应文档 + # 确保你有权限访问和操作 db 对象 + update_result = db.messages.update_one( + {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, + {"$set": {"embedding_vector": embedding_vector}} + ) + if update_result.modified_count > 0: + logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。") + elif update_result.matched_count > 0: + logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。") + else: + logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。") + else: + logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。") + except Exception as e_embed_update: + logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True) + else: + logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。") + # === 新增结束 === \ No newline at end of file From cd0a41dec69287c27e2b7ba8cd087e39a70345c9 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:08:44 +0800 Subject: [PATCH 20/27] =?UTF-8?q?utils=20=E6=96=B9=E6=B3=95=E8=A7=A3?= =?UTF-8?q?=E9=87=8A=E8=BF=98=E5=8E=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_utils.py | 39 ++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 76dd6bc4..f57d59b3 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -63,9 +63,9 @@ async def find_most_relevant_historical_message( log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)" logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " - f"将使用时间上限 {effective_search_upper_limit} " - f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " - f"进行历史消息锚点搜索。来源: {log_source_of_limit}") + f"将使用时间上限 {effective_search_upper_limit} " + f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " + f"进行历史消息锚点搜索。来源: {log_source_of_limit}") # --- [新代码结束] --- pipeline = [ @@ -202,6 +202,13 @@ async def retrieve_contextual_info( ) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 """ 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。 + + Args: + text: 用于检索的上下文文本 (例如聊天记录)。 + private_name: 私聊对象的名称,用于日志记录。 + + Returns: + Tuple[str, str]: (检索到的记忆字符串, 检索到的知识字符串) """ # 初始化返回值 retrieved_global_memory_str = "无相关全局记忆。" @@ -294,9 +301,9 @@ async def retrieve_contextual_info( log_earliest_time_str = str(current_short_term_history_earliest_time) logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " - f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " - f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " - f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}") + f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " + f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " + f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}") most_relevant_message_doc = await find_most_relevant_historical_message( @@ -314,7 +321,7 @@ async def retrieve_contextual_info( # 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证) if anchor_time is not None and anchor_time >= final_search_upper_limit_time: logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " - f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。") + f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。") historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。" # 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值 elif anchor_id and anchor_time is not None: @@ -323,8 +330,8 @@ async def retrieve_contextual_info( time_limit_for_context_window_after = final_search_upper_limit_time logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " - f"with anchor_time: {anchor_time}, " - f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}") + f"with anchor_time: {anchor_time}, " + f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}") context_window_messages = await retrieve_chat_context_window( chat_id=chat_id, @@ -377,7 +384,17 @@ def get_items_from_json( allow_array: bool = True, ) -> Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: """从文本中提取JSON内容并获取指定字段 - (保持你原始 pfc_utils.py 中的此函数代码不变) + + Args: + content: 包含JSON的文本 + private_name: 私聊名称 + *items: 要提取的字段名 + default_values: 字段的默认值,格式为 {字段名: 默认值} + required_types: 字段的必需类型,格式为 {字段名: 类型} + allow_array: 是否允许解析JSON数组 + + Returns: + Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: (是否成功, 提取的字段字典或字典列表) """ cleaned_content = content.strip() result: Union[Dict[str, Any], List[Dict[str, Any]]] = {} @@ -540,7 +557,7 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: async def build_chat_history_text(observation_info: ObservationInfo, private_name: str) -> str: - """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ + """构建聊天历史记录文本 (包含未处理消息)""" chat_history_text = "" try: if hasattr(observation_info, "chat_history_str") and observation_info.chat_history_str: From 5aa04fa246e34fa9fd10e4e63f89bf6fd67ce4a3 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:14:59 +0800 Subject: [PATCH 21/27] =?UTF-8?q?=E8=A7=84=E8=8C=83=E7=BC=A9=E8=BF=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/reply_generator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 9a184e52..f142e720 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -259,8 +259,8 @@ class ReplyGenerator: # 导入 datetime (如果 reply_generator.py 文件顶部没有的话) # from datetime import datetime # 通常建议放在文件顶部 logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " - f"{recent_history_start_time_for_exclusion} " - f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})") + f"{recent_history_start_time_for_exclusion} " + f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})") else: logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。") except (IndexError, KeyError, TypeError) as e: From 027760517809a96acc40621978e4716316101318 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:31:15 +0800 Subject: [PATCH 22/27] ruff --- src/plugins/PFC/pfc_processor.py | 2 +- src/plugins/PFC/pfc_utils.py | 57 +++++++++++++++++++----------- src/plugins/PFC/reply_generator.py | 1 - 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 4efc513e..03bdf641 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -1,6 +1,6 @@ import traceback import re -from typing import Any, Dict +from typing import Any from datetime import datetime # 确保导入 datetime from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index f57d59b3..9d85f9f8 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -1,7 +1,6 @@ import traceback import json import re -import asyncio # 确保导入 asyncio import time from datetime import datetime from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 @@ -297,7 +296,7 @@ async def retrieve_contextual_info( if current_short_term_history_earliest_time is not None: try: log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})" - except: + except: log_earliest_time_str = str(current_short_term_history_earliest_time) logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " @@ -422,19 +421,25 @@ def get_items_from_json( current_item_result[field] = item_json[field] elif field not in default_result: logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item_json}") - valid_item = False; break - if not valid_item: continue + valid_item = False + break + if not valid_item: + continue if required_types: for field, expected_type in required_types.items(): if field in current_item_result and not isinstance(current_item_result[field], expected_type): logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}") - valid_item = False; break - if not valid_item: continue + valid_item = False + break + if not valid_item: + continue for field in items: if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip(): logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}") - valid_item = False; break - if valid_item: valid_items_list.append(current_item_result) + valid_item = False + break + if valid_item: + valid_items_list.append(current_item_result) if valid_items_list: logger.debug(f"[私聊][{private_name}] 成功解析JSON数组,包含 {len(valid_items_list)} 个有效项目。") return True, valid_items_list @@ -469,24 +474,31 @@ def get_items_from_json( else: logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...") return False, default_result - if not isinstance(result, dict): result = default_result.copy() + if not isinstance(result, dict): + result = default_result.copy() valid_single_object = True for item_field in items: # Renamed item to item_field - if item_field in json_data: result[item_field] = json_data[item_field] + if item_field in json_data: + result[item_field] = json_data[item_field] elif item_field not in default_result: logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item_field}'。JSON内容: {json_data}") - valid_single_object = False; break - if not valid_single_object: return False, default_result + valid_single_object = False + break + if not valid_single_object: + return False, default_result if required_types: for field, expected_type in required_types.items(): if field in result and not isinstance(result[field], expected_type): logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})") - valid_single_object = False; break - if not valid_single_object: return False, default_result + valid_single_object = False + break + if not valid_single_object: + return False, default_result for field in items: if field in result and isinstance(result[field], str) and not result[field].strip(): logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 不能为空字符串") - valid_single_object = False; break + valid_single_object = False + break if valid_single_object: logger.debug(f"[私聊][{private_name}] 成功解析并验证了单个JSON对象。") return True, result @@ -545,13 +557,18 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: # 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整 rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False) high_value_count = len(rdict) - if old_value > 700: value *= 3 / (high_value_count + 2) - else: value *= 3 / (high_value_count + 3) - elif value < 0: value = value * math.exp(old_value / 2000) + if old_value > 700: + value *= 3 / (high_value_count + 2) + else: + value *= 3 / (high_value_count + 3) + elif value < 0: + value = value * math.exp(old_value / 2000) # else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0 else: # old_value < 0 - if value >= 0: value = value * math.exp(old_value / 2000) - elif value < 0: value = value * math.cos(math.pi * old_value / 2000) + if value >= 0: + value = value * math.exp(old_value / 2000) + elif value < 0: + value = value * math.cos(math.pi * old_value / 2000) # else: value = 0 # 你原始代码中没有这句 return value diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index f142e720..0a9089aa 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -1,5 +1,4 @@ import random -import asyncio from datetime import datetime from .pfc_utils import retrieve_contextual_info from typing import Optional From facc4bbef0a21760d457be46535b9cc81869e08a Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:33:00 +0800 Subject: [PATCH 23/27] ruff --- src/plugins/PFC/pfc_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 9d85f9f8..19385855 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -296,7 +296,7 @@ async def retrieve_contextual_info( if current_short_term_history_earliest_time is not None: try: log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})" - except: + except Exception: log_earliest_time_str = str(current_short_term_history_earliest_time) logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " From 37822fb34762953fb221bcc525c0784865c52cb3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 10 May 2025 06:33:19 +0000 Subject: [PATCH 24/27] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 89 ++++---- src/plugins/PFC/pfc_utils.py | 331 +++++++++++++++++++---------- src/plugins/PFC/reply_generator.py | 70 +++--- 3 files changed, 309 insertions(+), 181 deletions(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 03bdf641..f706bffa 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -1,12 +1,12 @@ import traceback import re from typing import Any -from datetime import datetime # 确保导入 datetime -from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv -from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py +from datetime import datetime # 确保导入 datetime +from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv +from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py from src.config.config import global_config from src.common.logger_manager import get_logger -from ..chat.chat_stream import ChatStream, chat_manager +from ..chat.chat_stream import ChatStream, chat_manager from src.plugins.chat.utils import get_embedding from src.common.database import db from .pfc_manager import PFCManager @@ -14,18 +14,22 @@ from .pfc_manager import PFCManager logger = get_logger("pfc_processor") -async def _handle_error(error: Exception, context: str, message: MessageRecv | None = None) -> None: # 明确 message 类型 +async def _handle_error( + error: Exception, context: str, message: MessageRecv | None = None +) -> None: # 明确 message 类型 """统一的错误处理函数 # ... (方法注释不变) ... """ logger.error(f"{context}: {error}") logger.error(traceback.format_exc()) # 检查 message 是否 None 以及是否有 raw_message 属性 - if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message - raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取 + if ( + message and hasattr(message, "message_info") and hasattr(message.message_info, "raw_message") + ): # MessageRecv 结构可能没有直接的 raw_message + raw_msg_content = getattr(message.message_info, "raw_message", None) # 安全获取 if raw_msg_content: logger.error(f"相关消息原始内容: {raw_msg_content}") - elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message + elif message and hasattr(message, "raw_message"): # 如果 MessageRecv 直接有 raw_message logger.error(f"相关消息原始内容: {message.raw_message}") @@ -35,21 +39,22 @@ class PFCProcessor: # MessageStorage() 的实例化位置和具体类是什么? # 我们假设它来自 src.plugins.storage.storage # 但由于我们不能修改那个文件,所以这里的 self.storage 将按原样使用 - from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解 + from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解 + self.storage: MessageStorage = MessageStorage() self.pfc_manager = PFCManager.get_instance() - async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict + async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict """处理接收到的原始消息数据 # ... (方法注释不变) ... """ - message_obj: MessageRecv | None = None # 初始化为 None,并明确类型 + message_obj: MessageRecv | None = None # 初始化为 None,并明确类型 try: # 1. 消息解析与初始化 - message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv + message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv - groupinfo = getattr(message_obj.message_info, 'group_info', None) - userinfo = getattr(message_obj.message_info, 'user_info', None) + groupinfo = getattr(message_obj.message_info, "group_info", None) + userinfo = getattr(message_obj.message_info, "user_info", None) logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流") chat = await chat_manager.get_or_create_stream( @@ -57,12 +62,13 @@ class PFCProcessor: user_info=userinfo, group_info=groupinfo, ) - message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法 + message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法 # 2. 过滤检查 - await message_obj.process() # 调用 MessageRecv 的异步 process 方法 - if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \ - self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性 + await message_obj.process() # 调用 MessageRecv 的异步 process 方法 + if self._check_ban_words(message_obj.processed_plain_text, userinfo) or self._check_ban_regex( + message_obj.raw_message, userinfo + ): # MessageRecv 有 raw_message 属性 return # 3. 消息存储 (保持原有调用) @@ -71,7 +77,7 @@ class PFCProcessor: await self.storage.store_message(message_obj, chat) logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}") - await self._update_embedding_vector(message_obj) # 明确传递 message_obj + await self._update_embedding_vector(message_obj) # 明确传递 message_obj # 4. 创建 PFC 聊天流 await self._create_pfc_chat(message_obj, chat) @@ -81,43 +87,41 @@ class PFCProcessor: current_time_display = datetime.fromtimestamp(float(message_obj.message_info.time)).strftime("%H:%M:%S") # 确保 userinfo.user_nickname 存在 - user_nickname_display = getattr(userinfo, 'user_nickname', '未知用户') + user_nickname_display = getattr(userinfo, "user_nickname", "未知用户") - logger.info( - f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}" - ) + logger.info(f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}") except Exception as e: - await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj + await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj - async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型 + async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型 try: chat_id = str(message.chat_stream.stream_id) - private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname + private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname if global_config.enable_pfc_chatting: await self.pfc_manager.get_or_create_conversation(chat_id, private_name) except Exception as e: - logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True + logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True @staticmethod - def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 + def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息中是否包含过滤词""" for word in global_config.ban_words: if word in text: - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname logger.info(f"[过滤词识别]消息中含有{word},filtered") return True return False @staticmethod - def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 + def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型 """检查消息是否匹配过滤正则表达式""" for pattern in global_config.ban_msgs_regex: - if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象 - logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname - logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 + if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象 + logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname + logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串 return True return False @@ -125,7 +129,7 @@ class PFCProcessor: """更新消息的嵌入向量""" # === 新增:为已存储的消息生成嵌入并更新数据库文档 === embedding_vector = None - text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 + text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本 # 在 storage.py 中,会对 processed_plain_text 进行一次过滤 # 为了保持一致,我们也在这里应用相同的过滤逻辑 @@ -148,18 +152,25 @@ class PFCProcessor: # 确保你有权限访问和操作 db 对象 update_result = db.messages.update_one( {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id}, - {"$set": {"embedding_vector": embedding_vector}} + {"$set": {"embedding_vector": embedding_vector}}, ) if update_result.modified_count > 0: logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。") elif update_result.matched_count > 0: logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。") else: - logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。") + logger.error( + f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。" + ) else: - logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。") + logger.warning( + f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。" + ) except Exception as e_embed_update: - logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True) + logger.error( + f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", + exc_info=True, + ) else: logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。") - # === 新增结束 === \ No newline at end of file + # === 新增结束 === diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 19385855..91c04ad5 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -3,35 +3,36 @@ import json import re import time from datetime import datetime -from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 +from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型 from src.common.logger_manager import get_logger from src.config.config import global_config -from src.common.database import db # << 确认此路径 +from src.common.database import db # << 确认此路径 # --- 依赖于你项目结构的导入,请务必仔细检查并根据你的实际情况调整 --- -from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径 -from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径 -from src.plugins.chat.utils import get_embedding # << 确认此路径 -from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径 +from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径 +from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径 +from src.plugins.chat.utils import get_embedding # << 确认此路径 +from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径 # --- 依赖导入结束 --- -from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py -from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入) -import math # 来自原始 pfc_utils.py -from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入) +from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py +from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入) +import math # 来自原始 pfc_utils.py +from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入) logger = get_logger("pfc_utils") + # ============================================================================== # 新增:专门用于检索 PFC 私聊历史对话上下文的函数 # ============================================================================== async def find_most_relevant_historical_message( chat_id: str, query_text: str, - similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 - absolute_search_time_limit: Optional[float] = None # 新增参数:排除最近多少秒内的消息(例如5分钟) + similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整 + absolute_search_time_limit: Optional[float] = None, # 新增参数:排除最近多少秒内的消息(例如5分钟) ) -> Optional[Dict[str, Any]]: """ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。 @@ -51,20 +52,22 @@ async def find_most_relevant_historical_message( effective_search_upper_limit: float log_source_of_limit: str = "" - + if absolute_search_time_limit is not None: effective_search_upper_limit = absolute_search_time_limit log_source_of_limit = "传入的绝对时间上限" else: # 如果没有传入绝对时间上限,可以设置一个默认的回退逻辑 - fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时 + fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时 effective_search_upper_limit = time.time() - fallback_exclude_seconds log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)" - - logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " - f"将使用时间上限 {effective_search_upper_limit} " - f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " - f"进行历史消息锚点搜索。来源: {log_source_of_limit}") + + logger.debug( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: " + f"将使用时间上限 {effective_search_upper_limit} " + f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) " + f"进行历史消息锚点搜索。来源: {log_source_of_limit}" + ) # --- [新代码结束] --- pipeline = [ @@ -72,14 +75,46 @@ async def find_most_relevant_historical_message( "$match": { "chat_id": chat_id, "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}, - "time": {"$lt": effective_search_upper_limit} # <--- 使用新的 effective_search_upper_limit + "time": {"$lt": effective_search_upper_limit}, # <--- 使用新的 effective_search_upper_limit } }, { "$addFields": { - "dotProduct": {"$reduce": {"input": {"$range": [0, {"$size": "$embedding_vector"}]}, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": [{"$arrayElemAt": ["$embedding_vector", "$$this"]}, {"$arrayElemAt": [query_embedding, "$$this"]}]}]}}}, - "queryVecMagnitude": {"$sqrt": {"$reduce": {"input": query_embedding, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}}, - "docVecMagnitude": {"$sqrt": {"$reduce": {"input": "$embedding_vector", "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}} + "dotProduct": { + "$reduce": { + "input": {"$range": [0, {"$size": "$embedding_vector"}]}, + "initialValue": 0, + "in": { + "$add": [ + "$$value", + { + "$multiply": [ + {"$arrayElemAt": ["$embedding_vector", "$$this"]}, + {"$arrayElemAt": [query_embedding, "$$this"]}, + ] + }, + ] + }, + } + }, + "queryVecMagnitude": { + "$sqrt": { + "$reduce": { + "input": query_embedding, + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, + "docVecMagnitude": { + "$sqrt": { + "$reduce": { + "input": "$embedding_vector", + "initialValue": 0, + "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}, + } + } + }, } }, { @@ -88,7 +123,7 @@ async def find_most_relevant_historical_message( "$cond": [ {"$and": [{"$gt": ["$queryVecMagnitude", 0]}, {"$gt": ["$docVecMagnitude", 0]}]}, {"$divide": ["$dotProduct", {"$multiply": ["$queryVecMagnitude", "$docVecMagnitude"]}]}, - 0 + 0, ] } } @@ -96,26 +131,44 @@ async def find_most_relevant_historical_message( {"$match": {"similarity": {"$gte": similarity_threshold}}}, {"$sort": {"similarity": -1}}, {"$limit": 1}, - {"$project": {"_id": 0, "message_id": 1, "time": 1, "chat_id": 1, "user_info": 1, "processed_plain_text": 1, "similarity": 1}} # 可以不返回 embedding_vector 节省带宽 + { + "$project": { + "_id": 0, + "message_id": 1, + "time": 1, + "chat_id": 1, + "user_info": 1, + "processed_plain_text": 1, + "similarity": 1, + } + }, # 可以不返回 embedding_vector 节省带宽 ] try: # --- 确定性修改:同步执行聚合和结果转换 --- - cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor - results = list(cursor) # 直接将 CommandCursor 转换为列表 + cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor + results = list(cursor) # 直接将 CommandCursor 转换为列表 if not results: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。") + logger.info( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。" + ) else: - logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:") - for res_msg in results: - msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S') - logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'") + logger.info( + f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:" + ) + for res_msg in results: + msg_time_readable = datetime.fromtimestamp(res_msg.get("time", 0)).strftime("%Y-%m-%d %H:%M:%S") + logger.info( + f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text', '')[:50]}...'" + ) # --- [修改结束] --- # --- 修改结束 --- if results and len(results) > 0: most_similar_message = results[0] - logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}") + logger.info( + f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}" + ) return most_similar_message else: logger.debug(f"[{chat_id}] (私聊历史)未找到相似度超过 {similarity_threshold} 的相关消息。") @@ -124,13 +177,14 @@ async def find_most_relevant_historical_message( logger.error(f"[{chat_id}] (私聊历史)在数据库中检索时出错: {e}", exc_info=True) return None + async def retrieve_chat_context_window( chat_id: str, anchor_message_id: str, anchor_message_time: float, excluded_time_threshold_for_window: float, window_size_before: int = 7, - window_size_after: int = 7 + window_size_after: int = 7, ) -> List[Dict[str, Any]]: """ 以某条消息为锚点,获取其前后的聊天记录形成一个上下文窗口。 @@ -138,33 +192,50 @@ async def retrieve_chat_context_window( if not anchor_message_id or anchor_message_time is None: return [] - context_messages: List[Dict[str, Any]] = [] # 明确类型 - logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...") + context_messages: List[Dict[str, Any]] = [] # 明确类型 + logger.debug( + f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口..." + ) try: # --- 同步执行 find_one 和 find --- anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id}) - messages_before_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$lt": anchor_message_time}} - ).sort("time", -1).limit(window_size_before) + messages_before_cursor = ( + db.messages.find({"chat_id": chat_id, "time": {"$lt": anchor_message_time}}) + .sort("time", -1) + .limit(window_size_before) + ) messages_before = list(messages_before_cursor) messages_before.reverse() # --- 新增日志 --- - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}") - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):") + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}" + ) + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):" + ) for msg_b in messages_before: - logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'") + logger.debug( + f" - Time: {datetime.fromtimestamp(msg_b.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text', '')[:30]}...'" + ) - messages_after_cursor = db.messages.find( - {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} - ).sort("time", 1).limit(window_size_after) + messages_after_cursor = ( + db.messages.find( + {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} + ) + .sort("time", 1) + .limit(window_size_after) + ) messages_after = list(messages_after_cursor) # --- 新增日志 --- - logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):") + logger.debug( + f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):" + ) for msg_a in messages_after: - logger.debug(f" - Time: {datetime.fromtimestamp(msg_a.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text','')[:30]}...'") - + logger.debug( + f" - Time: {datetime.fromtimestamp(msg_a.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text', '')[:30]}...'" + ) if messages_before: context_messages.extend(messages_before) @@ -173,32 +244,35 @@ async def retrieve_chat_context_window( context_messages.append(anchor_message) if messages_after: context_messages.extend(messages_after) - - final_window: List[Dict[str, Any]] = [] # 明确类型 - seen_ids: set[str] = set() # 明确类型 + + final_window: List[Dict[str, Any]] = [] # 明确类型 + seen_ids: set[str] = set() # 明确类型 for msg in context_messages: msg_id = msg.get("message_id") - if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在 + if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在 final_window.append(msg) seen_ids.add(msg_id) - + final_window.sort(key=lambda m: m.get("time", 0)) - logger.info(f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。") + logger.info( + f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。" + ) return final_window except Exception as e: logger.error(f"[{chat_id}] (私聊历史)获取消息 ID '{anchor_message_id}' 的上下文窗口时出错: {e}", exc_info=True) return [] + # ============================================================================== # 修改后的 retrieve_contextual_info 函数 # ============================================================================== async def retrieve_contextual_info( - text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) - private_name: str, # 用于日志 - chat_id: str, # 用于特定私聊历史的检索 + text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录) + private_name: str, # 用于日志 + chat_id: str, # 用于特定私聊历史的检索 historical_chat_query_text: Optional[str] = None, - current_short_term_history_earliest_time: Optional[float] = None # <--- 新增参数 -) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 + current_short_term_history_earliest_time: Optional[float] = None, # <--- 新增参数 +) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆 """ 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。 @@ -222,9 +296,9 @@ async def retrieve_contextual_info( related_memory = await HippocampusManager.get_instance().get_memory_from_text( text=text, max_memory_num=2, - max_memory_length=2, + max_memory_length=2, max_depth=3, - fast_retrieval=False, + fast_retrieval=False, ) if related_memory: temp_global_memory_info = "" @@ -233,7 +307,7 @@ async def retrieve_contextual_info( temp_global_memory_info += str(memory_item[1]) + "\n" elif isinstance(memory_item, str): temp_global_memory_info += memory_item + "\n" - + if temp_global_memory_info.strip(): retrieved_global_memory_str = f"你回忆起一些相关的全局记忆:\n{temp_global_memory_info.strip()}\n(以上是你的全局记忆,供参考)\n" global_memory_log_msg = f"自动检索到全局压缩记忆: {temp_global_memory_info.strip()[:100]}..." @@ -250,7 +324,6 @@ async def retrieve_contextual_info( else: logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过全局压缩记忆检索。") - # --- 2. 相关知识检索 (来自 prompt_builder) --- # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变) knowledge_log_msg = f"开始知识检索 (基于文本: '{text[:30]}...')" @@ -260,8 +333,8 @@ async def retrieve_contextual_info( message=text, threshold=0.38, ) - if knowledge_result and knowledge_result.strip(): # 确保结果不为空 - retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装 + if knowledge_result and knowledge_result.strip(): # 确保结果不为空 + retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装 knowledge_log_msg = f"自动检索到相关知识: {knowledge_result[:100]}..." else: knowledge_log_msg = "知识检索返回为空。" @@ -274,9 +347,10 @@ async def retrieve_contextual_info( else: logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。") - # --- 3. 当前私聊的特定历史对话上下文检索 --- - query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None + query_for_historical_chat = ( + historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None + ) # historical_chat_log_msg 的初始化可以移到 try 块之后,根据实际情况赋值 if query_for_historical_chat: @@ -284,13 +358,15 @@ async def retrieve_contextual_info( # ---- [新代码] 计算最终的、严格的搜索时间上限 ---- # 1. 设置一个基础的、较大的时间回溯窗口,例如2小时 (7200秒) # 这个值可以从全局配置读取,如果没配置则使用默认值 - default_search_exclude_seconds = getattr(global_config, "pfc_historical_search_default_exclude_seconds", 7200) # 默认2小时 + default_search_exclude_seconds = getattr( + global_config, "pfc_historical_search_default_exclude_seconds", 7200 + ) # 默认2小时 base_excluded_time_limit = time.time() - default_search_exclude_seconds - + final_search_upper_limit_time = base_excluded_time_limit if current_short_term_history_earliest_time is not None: # 我们希望找到的消息严格早于 short_term_history 的开始,减去一个小量确保不包含边界 - limit_from_short_term = current_short_term_history_earliest_time - 0.001 + limit_from_short_term = current_short_term_history_earliest_time - 0.001 final_search_upper_limit_time = min(base_excluded_time_limit, limit_from_short_term) log_earliest_time_str = "未提供" if current_short_term_history_earliest_time is not None: @@ -298,55 +374,60 @@ async def retrieve_contextual_info( log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})" except Exception: log_earliest_time_str = str(current_short_term_history_earliest_time) - - logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: " - f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " - f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " - f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}") - + + logger.debug( + f"[{private_name}] (私聊历史) retrieve_contextual_info: " + f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} " + f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). " + f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}" + ) most_relevant_message_doc = await find_most_relevant_historical_message( chat_id=chat_id, query_text=query_for_historical_chat, - similarity_threshold=0.5, # 您可以调整这个 + similarity_threshold=0.5, # 您可以调整这个 # exclude_recent_seconds 不再直接使用,而是传递计算好的绝对时间上限 - absolute_search_time_limit=final_search_upper_limit_time # <--- 传递计算好的绝对时间上限 + absolute_search_time_limit=final_search_upper_limit_time, # <--- 传递计算好的绝对时间上限 ) - + if most_relevant_message_doc: anchor_id = most_relevant_message_doc.get("message_id") - anchor_time = most_relevant_message_doc.get("time") - + anchor_time = most_relevant_message_doc.get("time") + # 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证) if anchor_time is not None and anchor_time >= final_search_upper_limit_time: - logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " - f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。") + logger.warning( + f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} " + f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。" + ) historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。" # 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值 elif anchor_id and anchor_time is not None: # 构建上下文窗口时,其“未来”消息的上限也应该是 final_search_upper_limit_time # 因为我们不希望历史回忆的上下文窗口延伸到“最近聊天记录”的范围内或更近 - time_limit_for_context_window_after = final_search_upper_limit_time - - logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " - f"with anchor_time: {anchor_time}, " - f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}") + time_limit_for_context_window_after = final_search_upper_limit_time + + logger.debug( + f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window " + f"with anchor_time: {anchor_time}, " + f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}" + ) context_window_messages = await retrieve_chat_context_window( chat_id=chat_id, anchor_message_id=anchor_id, - anchor_message_time=anchor_time, + anchor_message_time=anchor_time, excluded_time_threshold_for_window=time_limit_for_context_window_after, window_size_before=7, - window_size_after=7 + window_size_after=7, ) if context_window_messages: formatted_window_str = await build_readable_messages( context_window_messages, - replace_bot_name=False, # 在回忆中,保留原始发送者名称 + replace_bot_name=False, # 在回忆中,保留原始发送者名称 merge_messages=False, - timestamp_mode="relative", # 可以选择 'absolute' 或 'none' - read_mark=0.0 + timestamp_mode="relative", # 可以选择 'absolute' 或 'none' + read_mark=0.0, ) if formatted_window_str and formatted_window_str.strip(): retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n" @@ -359,14 +440,18 @@ async def retrieve_contextual_info( historical_chat_log_msg = "检索到的最相关私聊历史消息文档缺少 message_id 或 time。" else: historical_chat_log_msg = "未找到足够相关的私聊历史对话消息。" - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}") + logger.debug( + f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}" + ) except Exception as e: logger.error( f"[私聊][{private_name}] (retrieve_contextual_info) 检索私聊历史对话时出错: {e}\n{traceback.format_exc()}" ) retrieved_historical_chat_str = "[检索私聊历史对话时出错]\n" else: - logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。") + logger.debug( + f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。" + ) return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str @@ -410,13 +495,13 @@ def get_items_from_json( json_array = json.loads(cleaned_content) if isinstance(json_array, list): valid_items_list: List[Dict[str, Any]] = [] - for item_json in json_array: # Renamed item to item_json to avoid conflict + for item_json in json_array: # Renamed item to item_json to avoid conflict if not isinstance(item_json, dict): logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item_json}") continue current_item_result = default_result.copy() valid_item = True - for field in items: # items is args from function signature + for field in items: # items is args from function signature if field in item_json: current_item_result[field] = item_json[field] elif field not in default_result: @@ -427,15 +512,25 @@ def get_items_from_json( continue if required_types: for field, expected_type in required_types.items(): - if field in current_item_result and not isinstance(current_item_result[field], expected_type): - logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}") + if field in current_item_result and not isinstance( + current_item_result[field], expected_type + ): + logger.warning( + f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}" + ) valid_item = False break if not valid_item: continue for field in items: - if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip(): - logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}") + if ( + field in current_item_result + and isinstance(current_item_result[field], str) + and not current_item_result[field].strip() + ): + logger.warning( + f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}" + ) valid_item = False break if valid_item: @@ -472,12 +567,14 @@ def get_items_from_json( logger.error(f"[私聊][{private_name}] 正则提取的部分 '{potential_json_str[:100]}...' 无法解析为JSON。") return False, default_result else: - logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...") + logger.error( + f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}..." + ) return False, default_result if not isinstance(result, dict): result = default_result.copy() valid_single_object = True - for item_field in items: # Renamed item to item_field + for item_field in items: # Renamed item to item_field if item_field in json_data: result[item_field] = json_data[item_field] elif item_field not in default_result: @@ -489,7 +586,9 @@ def get_items_from_json( if required_types: for field, expected_type in required_types.items(): if field in result and not isinstance(result[field], expected_type): - logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})") + logger.error( + f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})" + ) valid_single_object = False break if not valid_single_object: @@ -507,7 +606,7 @@ def get_items_from_json( async def get_person_id(private_name: str, chat_stream: ChatStream): - """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ + """(保持你原始 pfc_utils.py 中的此函数代码不变)""" private_user_id_str: Optional[str] = None private_platform_str: Optional[str] = None # private_nickname_str = private_name # 这行在你提供的代码中没有被使用,可以考虑移除 @@ -516,7 +615,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream): private_user_id_str = str(chat_stream.user_info.user_id) private_platform_str = chat_stream.user_info.platform logger.debug( - f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name + f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name ) # elif chat_stream.group_info is None and private_name: # 这个 elif 条件体为空,可以移除 # pass @@ -547,7 +646,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream): async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: float) -> float: - """ (保持你原始 pfc_utils.py 中的此函数代码不变) """ + """(保持你原始 pfc_utils.py 中的此函数代码不变)""" old_value = max(-1000, min(1000, old_value)) value = raw_adjustment if old_value >= 0: @@ -555,7 +654,9 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: value = value * math.cos(math.pi * old_value / 2000) if old_value > 500: # 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整 - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False) + rdict = await person_info_manager.get_specific_value_list( + "relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False + ) high_value_count = len(rdict) if old_value > 700: value *= 3 / (high_value_count + 2) @@ -564,7 +665,7 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: elif value < 0: value = value * math.exp(old_value / 2000) # else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0 - else: # old_value < 0 + else: # old_value < 0 if value >= 0: value = value * math.exp(old_value / 2000) elif value < 0: @@ -586,12 +687,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam ) else: chat_history_text = "还没有聊天记录。\n" - + unread_count = getattr(observation_info, "new_messages_count", 0) unread_messages = getattr(observation_info, "unprocessed_messages", []) if unread_count > 0 and unread_messages: - bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取 - if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤 + bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取 + if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤 other_unread_messages = [ msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str ] @@ -599,12 +700,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam if other_unread_count > 0: new_messages_str = await build_readable_messages( other_unread_messages, - replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字 + replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字 merge_messages=False, timestamp_mode="relative", read_mark=0.0, ) - chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的 + chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的 else: logger.warning(f"[私聊][{private_name}] BOT_QQ 未配置,无法准确过滤未读消息中的机器人自身消息。") @@ -614,4 +715,4 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam except Exception as e: logger.error(f"[私聊][{private_name}] 处理聊天记录时发生未知错误: {e}") chat_history_text = "[处理聊天记录时出错]\n" - return chat_history_text \ No newline at end of file + return chat_history_text diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py index 0a9089aa..7773bc08 100644 --- a/src/plugins/PFC/reply_generator.py +++ b/src/plugins/PFC/reply_generator.py @@ -226,7 +226,7 @@ class ReplyGenerator: chat_history_for_prompt_builder: list = [] recent_history_start_time_for_exclusion: Optional[float] = None - + # 我们需要知道 build_chat_history_text 函数大致会用 observation_info.chat_history 的多少条记录 # 或者 build_chat_history_text 内部的逻辑。 # 假设 build_chat_history_text 主要依赖 observation_info.chat_history_str, @@ -238,7 +238,7 @@ class ReplyGenerator: # 如果 observation_info.chat_history_str 是由 observation_info.py 中的 update_from_message 等方法维护的, # 并且总是代表一个固定长度(比如最后30条)的聊天记录字符串,那么我们就需要从 observation_info.chat_history # 取出这部分原始消息来确定起始时间。 - + # 我们先做一个合理的假设: “最近聊天记录” 字符串 chat_history_text 是基于 # observation_info.chat_history 的一个有限的尾部片段生成的。 # 假设这个片段的长度由 global_config.pfc_recent_history_display_count 控制,默认为20条。 @@ -249,25 +249,29 @@ class ReplyGenerator: # 如果 observation_info.chat_history 长度小于 display_count,则取全部 start_index = max(0, len(observation_info.chat_history) - recent_history_display_count) chat_history_for_prompt_builder = observation_info.chat_history[start_index:] - - if chat_history_for_prompt_builder: # 如果片段不为空 + + if chat_history_for_prompt_builder: # 如果片段不为空 try: first_message_in_display_slice = chat_history_for_prompt_builder[0] - recent_history_start_time_for_exclusion = first_message_in_display_slice.get('time') + recent_history_start_time_for_exclusion = first_message_in_display_slice.get("time") if recent_history_start_time_for_exclusion: # 导入 datetime (如果 reply_generator.py 文件顶部没有的话) # from datetime import datetime # 通常建议放在文件顶部 - logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " - f"{recent_history_start_time_for_exclusion} " - f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})") + logger.debug( + f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: " + f"{recent_history_start_time_for_exclusion} " + f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})" + ) else: logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。") except (IndexError, KeyError, TypeError) as e: logger.warning(f"[{self.private_name}] (ReplyGenerator) 获取“最近聊天记录”起始时间失败: {e}") - recent_history_start_time_for_exclusion = None + recent_history_start_time_for_exclusion = None else: - logger.debug(f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。") - # --- [新代码结束] --- + logger.debug( + f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。" + ) + # --- [新代码结束] --- chat_history_text = await build_chat_history_text(observation_info, self.private_name) @@ -278,28 +282,32 @@ class ReplyGenerator: persona_text = f"你的名字是{self.name},{self.personality_info}。" historical_chat_query = "" - num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子 + num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子 if observation_info.chat_history and len(observation_info.chat_history) > 0: # 从 chat_history (已处理并存入 ObservationInfo 的历史) 中取最新N条 # 或者,如果 observation_info.unprocessed_messages 更能代表“当前上下文”,也可以考虑用它 # 我们先用 chat_history,因为它包含了双方的对话历史,可能更稳定 recent_messages_for_query_list = observation_info.chat_history[-num_recent_messages_for_query:] - + # 将这些消息的文本内容合并 query_texts_list = [] for msg_dict in recent_messages_for_query_list: text_content = msg_dict.get("processed_plain_text", "") - if text_content.strip(): # 只添加有内容的文本 + if text_content.strip(): # 只添加有内容的文本 # 可以选择是否添加发送者信息到查询文本中,例如: # sender_nickname = msg_dict.get("user_info", {}).get("user_nickname", "用户") # query_texts_list.append(f"{sender_nickname}: {text_content}") - query_texts_list.append(text_content) # 简单合并文本内容 - + query_texts_list.append(text_content) # 简单合并文本内容 + if query_texts_list: historical_chat_query = " ".join(query_texts_list).strip() - logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'") + logger.debug( + f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'" + ) else: - logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。") + logger.debug( + f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。" + ) else: logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 无聊天历史可用于生成私聊历史查询文本。") @@ -316,13 +324,13 @@ class ReplyGenerator: ( retrieved_global_memory_str, retrieved_knowledge_str, - retrieved_historical_chat_str # << 新增接收私聊历史回忆 + retrieved_historical_chat_str, # << 新增接收私聊历史回忆 ) = await retrieve_contextual_info( - text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 + text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识 private_name=self.private_name, - chat_id=current_chat_id, # << 传递 chat_id - historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本 - current_short_term_history_earliest_time=recent_history_start_time_for_exclusion # <--- 新增传递的参数 + chat_id=current_chat_id, # << 传递 chat_id + historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本 + current_short_term_history_earliest_time=recent_history_start_time_for_exclusion, # <--- 新增传递的参数 ) # === 调用修改结束 === @@ -394,10 +402,18 @@ class ReplyGenerator: base_format_params = { "persona_text": persona_text, "goals_str": goals_str, - "chat_history_text": chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # 当前短期历史 - "retrieved_global_memory_str": retrieved_global_memory_str if retrieved_global_memory_str.strip() else "无相关全局记忆。", - "retrieved_knowledge_str": retrieved_knowledge_str if retrieved_knowledge_str.strip() else "无相关知识。", - "retrieved_historical_chat_str": retrieved_historical_chat_str if retrieved_historical_chat_str.strip() else "无相关私聊历史回忆。", # << 新增 + "chat_history_text": chat_history_text + if chat_history_text.strip() + else "还没有聊天记录。", # 当前短期历史 + "retrieved_global_memory_str": retrieved_global_memory_str + if retrieved_global_memory_str.strip() + else "无相关全局记忆。", + "retrieved_knowledge_str": retrieved_knowledge_str + if retrieved_knowledge_str.strip() + else "无相关知识。", + "retrieved_historical_chat_str": retrieved_historical_chat_str + if retrieved_historical_chat_str.strip() + else "无相关私聊历史回忆。", # << 新增 "last_rejection_info": last_rejection_info_str, "current_time_str": current_time_value, "sender_name": sender_name_str, From cb392c1981fbf06ffc5d53a2e57bfe4cf3bba31f Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:44:09 +0800 Subject: [PATCH 25/27] =?UTF-8?q?=E5=AE=8C=E6=95=B4=E4=BC=A0=E5=8F=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index f706bffa..926ae821 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -77,7 +77,7 @@ class PFCProcessor: await self.storage.store_message(message_obj, chat) logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}") - await self._update_embedding_vector(message_obj) # 明确传递 message_obj + await self._update_embedding_vector(message_obj, chat) # 明确传递 message_obj # 4. 创建 PFC 聊天流 await self._create_pfc_chat(message_obj, chat) From aa3568ecb2e58469506273e31223da86ef4b7173 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 14:47:16 +0800 Subject: [PATCH 26/27] =?UTF-8?q?=E6=88=91=E6=98=AF=E5=82=BB=E9=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py index 926ae821..ea9ac4df 100644 --- a/src/plugins/PFC/pfc_processor.py +++ b/src/plugins/PFC/pfc_processor.py @@ -80,7 +80,7 @@ class PFCProcessor: await self._update_embedding_vector(message_obj, chat) # 明确传递 message_obj # 4. 创建 PFC 聊天流 - await self._create_pfc_chat(message_obj, chat) + await self._create_pfc_chat(message_obj) # 5. 日志记录 # 确保 message_obj.message_info.time 是 float 类型的时间戳 From 7ec716f0e1036caf6a615b6048f3ca04a2119f8c Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 10 May 2025 17:27:54 +0800 Subject: [PATCH 27/27] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/pfc_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py index 91c04ad5..3710bae0 100644 --- a/src/plugins/PFC/pfc_utils.py +++ b/src/plugins/PFC/pfc_utils.py @@ -432,6 +432,7 @@ async def retrieve_contextual_info( if formatted_window_str and formatted_window_str.strip(): retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n" historical_chat_log_msg = f"自动检索到相关私聊历史片段 (锚点ID: {anchor_id}, 相似度: {most_relevant_message_doc.get('similarity'):.3f})" + return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str else: historical_chat_log_msg = "检索到的私聊历史对话窗口格式化后为空。" else: