From ed7e3399079a275347ce95702a10426be4d2d33d Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 16:58:57 +0800
Subject: [PATCH 01/27] =?UTF-8?q?=E9=95=BF=E6=9C=9F=E8=AE=B0=E5=BF=86=20?=
=?UTF-8?q?=E7=AC=AC=E4=B8=80=E7=89=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_processor.py | 168 +++++---
src/plugins/PFC/pfc_utils.py | 593 +++++++++++++++++------------
src/plugins/PFC/reply_generator.py | 76 +++-
3 files changed, 525 insertions(+), 312 deletions(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index 428db544..8aaf800d 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -1,123 +1,171 @@
import traceback
-
-from maim_message import UserInfo
+import re
+from typing import Any, Dict
+from datetime import datetime # 确保导入 datetime
+from maim_message import UserInfo, MessageRecv # 从 maim_message 导入 MessageRecv
from src.config.config import global_config
from src.common.logger_manager import get_logger
-from ..chat.chat_stream import chat_manager
-from typing import Optional, Dict, Any
+from ..chat.chat_stream import chat_manager
+from src.plugins.chat.utils import get_embedding
+from src.common.database import db
from .pfc_manager import PFCManager
-from src.plugins.chat.message import MessageRecv
-from src.plugins.storage.storage import MessageStorage
-from datetime import datetime
-
logger = get_logger("pfc_processor")
-async def _handle_error(error: Exception, context: str, message: Optional[MessageRecv] = None) -> None:
+async def _handle_error(error: Exception, context: str, message: MessageRecv | None = None) -> None: # 明确 message 类型
"""统一的错误处理函数
-
- Args:
- error: 捕获到的异常
- context: 错误发生的上下文描述
- message: 可选的消息对象,用于记录相关消息内容
+ # ... (方法注释不变) ...
"""
logger.error(f"{context}: {error}")
logger.error(traceback.format_exc())
- if message and hasattr(message, "raw_message"):
+ # 检查 message 是否 None 以及是否有 raw_message 属性
+ if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message
+ raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取
+ if raw_msg_content:
+ logger.error(f"相关消息原始内容: {raw_msg_content}")
+ elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message
logger.error(f"相关消息原始内容: {message.raw_message}")
class PFCProcessor:
- """PFC 处理器,负责处理接收到的信息并计数"""
-
def __init__(self):
"""初始化 PFC 处理器,创建消息存储实例"""
- self.storage = MessageStorage()
+ # MessageStorage() 的实例化位置和具体类是什么?
+ # 我们假设它来自 src.plugins.storage.storage
+ # 但由于我们不能修改那个文件,所以这里的 self.storage 将按原样使用
+ from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解
+ self.storage: MessageStorage = MessageStorage()
self.pfc_manager = PFCManager.get_instance()
- async def process_message(self, message_data: Dict[str, Any]) -> None:
+ async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict
"""处理接收到的原始消息数据
-
- 主要流程:
- 1. 消息解析与初始化
- 2. 过滤检查
- 3. 消息存储
- 4. 创建 PFC 流
- 5. 日志记录
-
- Args:
- message_data: 原始消息字符串
+ # ... (方法注释不变) ...
"""
- message = None
+ message_obj: MessageRecv | None = None # 初始化为 None,并明确类型
try:
# 1. 消息解析与初始化
- message = MessageRecv(message_data)
- groupinfo = message.message_info.group_info
- userinfo = message.message_info.user_info
- messageinfo = message.message_info
+ message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv
+ # 确保 message_obj.message_info 存在
+ if not hasattr(message_obj, 'message_info'):
+ logger.error("MessageRecv 对象缺少 message_info 属性。跳过处理。")
+ return
+
+ groupinfo = getattr(message_obj.message_info, 'group_info', None)
+ userinfo = getattr(message_obj.message_info, 'user_info', None)
+
+ if userinfo is None: # 确保 userinfo 存在
+ logger.error("message_obj.message_info 中缺少 user_info。跳过处理。")
+ return
+ if not hasattr(userinfo, 'user_id'): # 确保 user_id 存在
+ logger.error("userinfo 对象中缺少 user_id。跳过处理。")
+ return
logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流")
chat = await chat_manager.get_or_create_stream(
- platform=messageinfo.platform,
+ platform=message_obj.message_info.platform,
user_info=userinfo,
group_info=groupinfo,
)
- message.update_chat_stream(chat)
+ message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法
# 2. 过滤检查
- # 处理消息
- await message.process()
- # 过滤词/正则表达式过滤
- if self._check_ban_words(message.processed_plain_text, userinfo) or self._check_ban_regex(
- message.raw_message, userinfo
- ):
+ await message_obj.process() # 调用 MessageRecv 的异步 process 方法
+ if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \
+ self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性
return
- # 3. 消息存储
- await self.storage.store_message(message, chat)
- logger.trace(f"存储成功: {message.processed_plain_text}")
+ # 3. 消息存储 (保持原有调用)
+ # 这里的 self.storage.store_message 来自 src/plugins/storage/storage.py
+ # 它内部会将 message_obj 转换为字典并存储
+ await self.storage.store_message(message_obj, chat)
+ logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}")
+
+ # === 新增:为已存储的消息生成嵌入并更新数据库文档 ===
+ embedding_vector = None
+ text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本
+
+ # 在 storage.py 中,会对 processed_plain_text 进行一次过滤
+ # 为了保持一致,我们也在这里应用相同的过滤逻辑
+ # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性
+ # 这里为了简单,我们先重复一次过滤逻辑
+ pattern = r".*?|.*?|.*?"
+ if text_for_embedding:
+ filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL)
+ else:
+ filtered_text_for_embedding = ""
+
+ if filtered_text_for_embedding and filtered_text_for_embedding.strip():
+ try:
+ # request_type 参数根据你的 get_embedding 函数实际需求来定
+ embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory")
+ if embedding_vector:
+ logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。")
+
+ # 更新数据库中的对应文档
+ # 确保你有权限访问和操作 db 对象
+ update_result = await db.messages.update_one(
+ {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id},
+ {"$set": {"embedding_vector": embedding_vector}}
+ )
+ if update_result.modified_count > 0:
+ logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。")
+ elif update_result.matched_count > 0:
+ logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。")
+ else:
+ logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。")
+ else:
+ logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。")
+ except Exception as e_embed_update:
+ logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True)
+ else:
+ logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。")
+ # === 新增结束 ===
# 4. 创建 PFC 聊天流
- await self._create_pfc_chat(message)
+ await self._create_pfc_chat(message_obj)
# 5. 日志记录
- # 将时间戳转换为datetime对象
- current_time = datetime.fromtimestamp(message.message_info.time).strftime("%H:%M:%S")
+ # 确保 message_obj.message_info.time 是 float 类型的时间戳
+ current_time_display = datetime.fromtimestamp(float(message_obj.message_info.time)).strftime("%H:%M:%S")
+
+ # 确保 userinfo.user_nickname 存在
+ user_nickname_display = getattr(userinfo, 'user_nickname', '未知用户')
+
logger.info(
- f"[{current_time}][私聊]{message.message_info.user_info.user_nickname}: {message.processed_plain_text}"
+ f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}"
)
except Exception as e:
- await _handle_error(e, "消息处理失败", message)
+ await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj
- async def _create_pfc_chat(self, message: MessageRecv):
+ async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型
try:
chat_id = str(message.chat_stream.stream_id)
- private_name = str(message.message_info.user_info.user_nickname)
+ private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname
if global_config.enable_pfc_chatting:
await self.pfc_manager.get_or_create_conversation(chat_id, private_name)
except Exception as e:
- logger.error(f"创建PFC聊天失败: {e}")
+ logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True
@staticmethod
- def _check_ban_words(text: str, userinfo: UserInfo) -> bool:
+ def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
"""检查消息中是否包含过滤词"""
for word in global_config.ban_words:
if word in text:
- logger.info(f"[私聊]{userinfo.user_nickname}:{text}")
+ logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname
logger.info(f"[过滤词识别]消息中含有{word},filtered")
return True
return False
@staticmethod
- def _check_ban_regex(text: str, userinfo: UserInfo) -> bool:
+ def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
"""检查消息是否匹配过滤正则表达式"""
for pattern in global_config.ban_msgs_regex:
- if pattern.search(text):
- logger.info(f"[私聊]{userinfo.user_nickname}:{text}")
- logger.info(f"[正则表达式过滤]消息匹配到{pattern},filtered")
+ if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象
+ logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname
+ logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串
return True
- return False
+ return False
\ No newline at end of file
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index fc5437ab..666fa6e8 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -1,88 +1,285 @@
import traceback
import json
import re
-from typing import Dict, Any, Optional, Tuple, List, Union
-from src.common.logger_manager import get_logger # 确认 logger 的导入路径
-from src.plugins.memory_system.Hippocampus import HippocampusManager
-from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # 确认 prompt_builder 的导入路径
-from src.plugins.chat.chat_stream import ChatStream
-from ..person_info.person_info import person_info_manager
-import math
-from src.plugins.utils.chat_message_builder import build_readable_messages
-from .observation_info import ObservationInfo
+import asyncio # 确保导入 asyncio
+from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
+
+from src.common.logger_manager import get_logger
from src.config.config import global_config
+from src.common.database import db # << 确认此路径
+
+# --- 依赖于你项目结构的导入,请务必仔细检查并根据你的实际情况调整 ---
+from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径
+from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径
+from src.plugins.chat.utils import get_embedding # << 确认此路径
+from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径
+# --- 依赖导入结束 ---
+
+from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py
+from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入)
+import math # 来自原始 pfc_utils.py
+from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入)
+
logger = get_logger("pfc_utils")
-
-async def retrieve_contextual_info(text: str, private_name: str) -> Tuple[str, str]:
+# ==============================================================================
+# 新增:专门用于检索 PFC 私聊历史对话上下文的函数
+# ==============================================================================
+async def find_most_relevant_historical_message(
+ chat_id: str,
+ query_text: str,
+ similarity_threshold: float = 0.3 # 相似度阈值,可以根据效果调整
+) -> Optional[Dict[str, Any]]:
"""
- 根据输入文本检索相关的记忆和知识。
-
- Args:
- text: 用于检索的上下文文本 (例如聊天记录)。
- private_name: 私聊对象的名称,用于日志记录。
-
- Returns:
- Tuple[str, str]: (检索到的记忆字符串, 检索到的知识字符串)
+ 根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。
"""
- retrieved_memory_str = "无相关记忆。"
+ if not query_text or not query_text.strip():
+ logger.debug(f"[{chat_id}] (私聊历史)查询文本为空,跳过检索。")
+ return None
+
+ logger.debug(f"[{chat_id}] (私聊历史)开始为查询文本 '{query_text[:50]}...' 检索。")
+
+ # 使用你项目中已有的 get_embedding 函数
+ # request_type 参数需要根据 get_embedding 的实际需求调整
+ query_embedding = await get_embedding(query_text, request_type="pfc_historical_chat_query")
+ if not query_embedding:
+ logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。")
+ return None
+
+ pipeline = [
+ {
+ "$match": {
+ "chat_id": chat_id,
+ "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}
+ }
+ },
+ {
+ "$addFields": {
+ "dotProduct": {"$reduce": {"input": {"$range": [0, {"$size": "$embedding_vector"}]}, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": [{"$arrayElemAt": ["$embedding_vector", "$$this"]}, {"$arrayElemAt": [query_embedding, "$$this"]}]}]}}},
+ "queryVecMagnitude": {"$sqrt": {"$reduce": {"input": query_embedding, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}},
+ "docVecMagnitude": {"$sqrt": {"$reduce": {"input": "$embedding_vector", "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}}
+ }
+ },
+ {
+ "$addFields": {
+ "similarity": {
+ "$cond": [
+ {"$and": [{"$gt": ["$queryVecMagnitude", 0]}, {"$gt": ["$docVecMagnitude", 0]}]},
+ {"$divide": ["$dotProduct", {"$multiply": ["$queryVecMagnitude", "$docVecMagnitude"]}]},
+ 0
+ ]
+ }
+ }
+ },
+ {"$match": {"similarity": {"$gte": similarity_threshold}}},
+ {"$sort": {"similarity": -1}},
+ {"$limit": 1},
+ {"$project": {"_id": 0, "message_id": 1, "time": 1, "chat_id": 1, "user_info": 1, "processed_plain_text": 1, "similarity": 1}} # 可以不返回 embedding_vector 节省带宽
+ ]
+
+ try:
+ # 假设 db.messages 是存储PFC私聊消息并带有embedding_vector的集合
+ results = await db.messages.aggregate(pipeline).to_list(length=1)
+ if results and len(results) > 0:
+ most_similar_message = results[0]
+ logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}")
+ return most_similar_message
+ else:
+ logger.debug(f"[{chat_id}] (私聊历史)未找到相似度超过 {similarity_threshold} 的相关消息。")
+ return None
+ except Exception as e:
+ logger.error(f"[{chat_id}] (私聊历史)在数据库中检索时出错: {e}", exc_info=True)
+ return None
+
+async def retrieve_chat_context_window(
+ chat_id: str,
+ anchor_message_id: str,
+ anchor_message_time: float,
+ window_size_before: int = 7,
+ window_size_after: int = 7
+) -> List[Dict[str, Any]]:
+ """
+ 以某条消息为锚点,获取其前后的聊天记录形成一个上下文窗口。
+ """
+ if not anchor_message_id or anchor_message_time is None:
+ return []
+
+ context_messages: List[Dict[str, Any]] = [] # 明确类型
+ logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...")
+
+ try:
+ # 假设 db.messages 是存储PFC私聊消息的集合
+ anchor_message = await db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id})
+
+ messages_before_cursor = db.messages.find(
+ {"chat_id": chat_id, "time": {"$lt": anchor_message_time}}
+ ).sort("time", -1).limit(window_size_before)
+ messages_before = await messages_before_cursor.to_list(length=window_size_before)
+ messages_before.reverse()
+
+ messages_after_cursor = db.messages.find(
+ {"chat_id": chat_id, "time": {"$gt": anchor_message_time}}
+ ).sort("time", 1).limit(window_size_after)
+ messages_after = await messages_after_cursor.to_list(length=window_size_after)
+
+ if messages_before:
+ context_messages.extend(messages_before)
+ if anchor_message:
+ anchor_message.pop("_id", None)
+ context_messages.append(anchor_message)
+ if messages_after:
+ context_messages.extend(messages_after)
+
+ final_window: List[Dict[str, Any]] = [] # 明确类型
+ seen_ids: set[str] = set() # 明确类型
+ for msg in context_messages:
+ msg_id = msg.get("message_id")
+ if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在
+ final_window.append(msg)
+ seen_ids.add(msg_id)
+
+ final_window.sort(key=lambda m: m.get("time", 0))
+ logger.info(f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。")
+ return final_window
+ except Exception as e:
+ logger.error(f"[{chat_id}] (私聊历史)获取消息 ID '{anchor_message_id}' 的上下文窗口时出错: {e}", exc_info=True)
+ return []
+
+# ==============================================================================
+# 修改后的 retrieve_contextual_info 函数
+# ==============================================================================
+async def retrieve_contextual_info(
+ text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录)
+ private_name: str, # 用于日志
+ chat_id: str, # 用于特定私聊历史的检索
+ historical_chat_query_text: Optional[str] = None # 专门为私聊历史检索准备的查询文本 (例如最新的N条消息合并)
+) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆
+ """
+ 检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。
+ """
+ # 初始化返回值
+ retrieved_global_memory_str = "无相关全局记忆。"
retrieved_knowledge_str = "无相关知识。"
- memory_log_msg = "未自动检索到相关记忆。"
- knowledge_log_msg = "未自动检索到相关知识。"
+ retrieved_historical_chat_str = "无相关私聊历史回忆。"
- if not text or text == "还没有聊天记录。" or text == "[构建聊天记录出错]":
- logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效上下文,跳过检索。")
- return retrieved_memory_str, retrieved_knowledge_str
-
- # 1. 检索记忆 (逻辑来自原 _get_memory_info)
- try:
- related_memory = await HippocampusManager.get_instance().get_memory_from_text(
- text=text,
- max_memory_num=2,
- max_memory_length=2,
- max_depth=3,
- fast_retrieval=False,
- )
- if related_memory:
- related_memory_info = ""
- for memory in related_memory:
- related_memory_info += memory[1] + "\n"
- if related_memory_info:
- # 注意:原版提示信息可以根据需要调整
- retrieved_memory_str = f"你回忆起:\n{related_memory_info.strip()}\n(以上是你的回忆,供参考)\n"
- memory_log_msg = f"自动检索到记忆: {related_memory_info.strip()[:100]}..."
+ # --- 1. 全局压缩记忆检索 (来自 HippocampusManager) ---
+ # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变)
+ global_memory_log_msg = f"开始全局压缩记忆检索 (基于文本: '{text[:30]}...')"
+ if text and text.strip() and text != "还没有聊天记录。" and text != "[构建聊天记录出错]":
+ try:
+ related_memory = await HippocampusManager.get_instance().get_memory_from_text(
+ text=text,
+ max_memory_num=2,
+ max_memory_length=2, # 你原始代码中这里是2,不是200
+ max_depth=3,
+ fast_retrieval=False, # 你原始代码中这里是False
+ )
+ if related_memory:
+ temp_global_memory_info = ""
+ for memory_item in related_memory:
+ if isinstance(memory_item, (list, tuple)) and len(memory_item) > 1:
+ temp_global_memory_info += str(memory_item[1]) + "\n"
+ elif isinstance(memory_item, str):
+ temp_global_memory_info += memory_item + "\n"
+
+ if temp_global_memory_info.strip():
+ retrieved_global_memory_str = f"你回忆起一些相关的全局记忆:\n{temp_global_memory_info.strip()}\n(以上是你的全局记忆,供参考)\n"
+ global_memory_log_msg = f"自动检索到全局压缩记忆: {temp_global_memory_info.strip()[:100]}..."
+ else:
+ global_memory_log_msg = "全局压缩记忆检索返回为空或格式不符。"
else:
- memory_log_msg = "自动检索记忆返回为空。"
- logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 记忆检索: {memory_log_msg}")
-
- except Exception as e:
- logger.error(
- f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索记忆时出错: {e}\n{traceback.format_exc()}"
- )
- retrieved_memory_str = "检索记忆时出错。\n"
-
- # 2. 检索知识 (逻辑来自原 action_planner 和 reply_generator)
- try:
- # 使用导入的 prompt_builder 实例及其方法
- knowledge_result = await prompt_builder.get_prompt_info(
- message=text,
- threshold=0.38, # threshold 可以根据需要调整
- )
- if knowledge_result:
- retrieved_knowledge_str = knowledge_result # 直接使用返回结果
- knowledge_log_msg = "自动检索到相关知识。"
- logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 知识检索: {knowledge_log_msg}")
-
- except Exception as e:
- logger.error(
- f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索知识时出错: {e}\n{traceback.format_exc()}"
- )
- retrieved_knowledge_str = "检索知识时出错。\n"
-
- return retrieved_memory_str, retrieved_knowledge_str
+ global_memory_log_msg = "全局压缩记忆检索返回为空列表。"
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 全局压缩记忆检索: {global_memory_log_msg}")
+ except Exception as e:
+ logger.error(
+ f"[私聊][{private_name}] (retrieve_contextual_info) 检索全局压缩记忆时出错: {e}\n{traceback.format_exc()}"
+ )
+ retrieved_global_memory_str = "[检索全局压缩记忆时出错]\n"
+ else:
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过全局压缩记忆检索。")
+ # --- 2. 相关知识检索 (来自 prompt_builder) ---
+ # (保持你原始 pfc_utils.py 中这部分的逻辑基本不变)
+ knowledge_log_msg = f"开始知识检索 (基于文本: '{text[:30]}...')"
+ if text and text.strip() and text != "还没有聊天记录。" and text != "[构建聊天记录出错]":
+ try:
+ knowledge_result = await prompt_builder.get_prompt_info(
+ message=text,
+ threshold=0.38,
+ )
+ if knowledge_result and knowledge_result.strip(): # 确保结果不为空
+ retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装
+ knowledge_log_msg = f"自动检索到相关知识: {knowledge_result[:100]}..."
+ else:
+ knowledge_log_msg = "知识检索返回为空。"
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 知识检索: {knowledge_log_msg}")
+ except Exception as e:
+ logger.error(
+ f"[私聊][{private_name}] (retrieve_contextual_info) 自动检索知识时出错: {e}\n{traceback.format_exc()}"
+ )
+ retrieved_knowledge_str = "[检索知识时出错]\n"
+ else:
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。")
+
+
+ # --- 3. 当前私聊的特定历史对话上下文检索 (新增逻辑) ---
+ query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None
+ historical_chat_log_msg = f"开始私聊历史检索 (查询文本: '{str(query_for_historical_chat)[:30]}...')"
+
+ if query_for_historical_chat:
+ try:
+ most_relevant_message_doc = await find_most_relevant_historical_message(
+ chat_id=chat_id,
+ query_text=query_for_historical_chat,
+ similarity_threshold=0.5 # 你可以根据需要调整这个阈值
+ )
+ if most_relevant_message_doc:
+ anchor_id = most_relevant_message_doc.get("message_id")
+ anchor_time = most_relevant_message_doc.get("time")
+ if anchor_id and anchor_time is not None:
+ context_window_messages = await retrieve_chat_context_window(
+ chat_id=chat_id,
+ anchor_message_id=anchor_id,
+ anchor_message_time=anchor_time,
+ window_size_before=7, # 我们的目标:上7条
+ window_size_after=7 # 我们的目标:下7条 (共15条,包括锚点)
+ )
+ if context_window_messages:
+ formatted_window_str = await build_readable_messages(
+ context_window_messages,
+ replace_bot_name=False, # 在回忆中,保留原始发送者名称
+ merge_messages=False,
+ timestamp_mode="relative", # 可以选择 'absolute' 或 'none'
+ read_mark=0.0
+ )
+ if formatted_window_str and formatted_window_str.strip():
+ retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n"
+ historical_chat_log_msg = f"自动检索到相关私聊历史片段 (锚点ID: {anchor_id}, 相似度: {most_relevant_message_doc.get('similarity'):.3f})"
+ else:
+ historical_chat_log_msg = "检索到的私聊历史对话窗口格式化后为空。"
+ else:
+ historical_chat_log_msg = f"找到了相关锚点消息 (ID: {anchor_id}),但未能构建其上下文窗口。"
+ else:
+ historical_chat_log_msg = "检索到的最相关私聊历史消息文档缺少 message_id 或 time。"
+ else:
+ historical_chat_log_msg = "未找到足够相关的私聊历史对话消息。"
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}")
+ except Exception as e:
+ logger.error(
+ f"[私聊][{private_name}] (retrieve_contextual_info) 检索私聊历史对话时出错: {e}\n{traceback.format_exc()}"
+ )
+ retrieved_historical_chat_str = "[检索私聊历史对话时出错]\n"
+ else:
+ logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。")
+
+ return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str
+
+
+# ==============================================================================
+# 你原始 pfc_utils.py 中的其他函数保持不变
+# ==============================================================================
def get_items_from_json(
content: str,
private_name: str,
@@ -92,121 +289,66 @@ def get_items_from_json(
allow_array: bool = True,
) -> Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]:
"""从文本中提取JSON内容并获取指定字段
-
- Args:
- content: 包含JSON的文本
- private_name: 私聊名称
- *items: 要提取的字段名
- default_values: 字段的默认值,格式为 {字段名: 默认值}
- required_types: 字段的必需类型,格式为 {字段名: 类型}
- allow_array: 是否允许解析JSON数组
-
- Returns:
- Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: (是否成功, 提取的字段字典或字典列表)
+ (保持你原始 pfc_utils.py 中的此函数代码不变)
"""
cleaned_content = content.strip()
- result: Union[Dict[str, Any], List[Dict[str, Any]]] = {} # 初始化类型
- # 匹配 ```json ... ``` 或 ``` ... ```
+ result: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
markdown_match = re.search(r"```(?:json)?\s*([\s\S]*?)\s*```", cleaned_content, re.IGNORECASE)
if markdown_match:
cleaned_content = markdown_match.group(1).strip()
logger.debug(f"[私聊][{private_name}] 已去除 Markdown 标记,剩余内容: {cleaned_content[:100]}...")
- # --- 新增结束 ---
-
- # 设置默认值
- default_result: Dict[str, Any] = {} # 用于单对象时的默认值
+ default_result: Dict[str, Any] = {}
if default_values:
default_result.update(default_values)
- result = default_result.copy() # 先用默认值初始化
-
- # 首先尝试解析为JSON数组
+ result = default_result.copy()
if allow_array:
try:
- # 尝试直接解析清理后的内容为列表
json_array = json.loads(cleaned_content)
-
if isinstance(json_array, list):
valid_items_list: List[Dict[str, Any]] = []
- for item in json_array:
- if not isinstance(item, dict):
- logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item}")
+ for item_json in json_array: # Renamed item to item_json to avoid conflict
+ if not isinstance(item_json, dict):
+ logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item_json}")
continue
-
- current_item_result = default_result.copy() # 每个元素都用默认值初始化
+ current_item_result = default_result.copy()
valid_item = True
-
- # 提取并验证字段
- for field in items:
- if field in item:
- current_item_result[field] = item[field]
- elif field not in default_result: # 如果字段不存在且没有默认值
- logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item}")
- valid_item = False
- break # 这个元素无效
-
- if not valid_item:
- continue
-
- # 验证类型
+ for field in items: # items is args from function signature
+ if field in item_json:
+ current_item_result[field] = item_json[field]
+ elif field not in default_result:
+ logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item_json}")
+ valid_item = False; break
+ if not valid_item: continue
if required_types:
for field, expected_type in required_types.items():
- # 检查 current_item_result 中是否存在该字段 (可能来自 item 或 default_values)
- if field in current_item_result and not isinstance(
- current_item_result[field], expected_type
- ):
- logger.warning(
- f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item}"
- )
- valid_item = False
- break
-
- if not valid_item:
- continue
-
- # 验证字符串不为空 (只检查 items 中要求的字段)
+ if field in current_item_result and not isinstance(current_item_result[field], expected_type):
+ logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}")
+ valid_item = False; break
+ if not valid_item: continue
for field in items:
- if (
- field in current_item_result
- and isinstance(current_item_result[field], str)
- and not current_item_result[field].strip()
- ):
- logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item}")
- valid_item = False
- break
-
- if valid_item:
- valid_items_list.append(current_item_result) # 只添加完全有效的项
-
- if valid_items_list: # 只有当列表不为空时才认为是成功
+ if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip():
+ logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}")
+ valid_item = False; break
+ if valid_item: valid_items_list.append(current_item_result)
+ if valid_items_list:
logger.debug(f"[私聊][{private_name}] 成功解析JSON数组,包含 {len(valid_items_list)} 个有效项目。")
return True, valid_items_list
else:
- # 如果列表为空(可能所有项都无效),则继续尝试解析为单个对象
logger.debug(f"[私聊][{private_name}] 解析为JSON数组,但未找到有效项目,尝试解析单个JSON对象。")
- # result 重置回单个对象的默认值
result = default_result.copy()
-
except json.JSONDecodeError:
logger.debug(f"[私聊][{private_name}] JSON数组直接解析失败,尝试解析单个JSON对象")
- # result 重置回单个对象的默认值
result = default_result.copy()
except Exception as e:
logger.error(f"[私聊][{private_name}] 尝试解析JSON数组时发生未知错误: {str(e)}")
- # result 重置回单个对象的默认值
result = default_result.copy()
-
- # 尝试解析为单个JSON对象
try:
- # 尝试直接解析清理后的内容
json_data = json.loads(cleaned_content)
if not isinstance(json_data, dict):
logger.error(f"[私聊][{private_name}] 解析为单个对象,但结果不是字典类型: {type(json_data)}")
- return False, default_result # 返回失败和默认值
-
+ return False, default_result
except json.JSONDecodeError:
- # 如果直接解析失败,尝试用正则表达式查找 JSON 对象部分 (作为后备)
- # 这个正则比较简单,可能无法处理嵌套或复杂的 JSON
- json_pattern = r"\{[\s\S]*?\}" # 使用非贪婪匹配
+ json_pattern = r"\{[\s\S]*?\}"
json_match = re.search(json_pattern, cleaned_content)
if json_match:
try:
@@ -220,133 +362,97 @@ def get_items_from_json(
logger.error(f"[私聊][{private_name}] 正则提取的部分 '{potential_json_str[:100]}...' 无法解析为JSON。")
return False, default_result
else:
- logger.error(
- f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}..."
- )
+ logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...")
return False, default_result
-
- # 提取并验证字段 (适用于单个JSON对象)
- # 确保 result 是字典类型用于更新
- if not isinstance(result, dict):
- result = default_result.copy() # 如果之前是列表,重置为字典
-
+ if not isinstance(result, dict): result = default_result.copy()
valid_single_object = True
- for item in items:
- if item in json_data:
- result[item] = json_data[item]
- elif item not in default_result: # 如果字段不存在且没有默认值
- logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item}'。JSON内容: {json_data}")
- valid_single_object = False
- break # 这个对象无效
-
- if not valid_single_object:
- return False, default_result
-
- # 验证类型
+ for item_field in items: # Renamed item to item_field
+ if item_field in json_data: result[item_field] = json_data[item_field]
+ elif item_field not in default_result:
+ logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item_field}'。JSON内容: {json_data}")
+ valid_single_object = False; break
+ if not valid_single_object: return False, default_result
if required_types:
for field, expected_type in required_types.items():
if field in result and not isinstance(result[field], expected_type):
- logger.error(
- f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})"
- )
- valid_single_object = False
- break
-
- if not valid_single_object:
- return False, default_result
-
- # 验证字符串不为空 (只检查 items 中要求的字段)
+ logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})")
+ valid_single_object = False; break
+ if not valid_single_object: return False, default_result
for field in items:
if field in result and isinstance(result[field], str) and not result[field].strip():
logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 不能为空字符串")
- valid_single_object = False
- break
-
+ valid_single_object = False; break
if valid_single_object:
logger.debug(f"[私聊][{private_name}] 成功解析并验证了单个JSON对象。")
- return True, result # 返回提取并验证后的字典
+ return True, result
else:
- return False, default_result # 验证失败
+ return False, default_result
async def get_person_id(private_name: str, chat_stream: ChatStream):
+ """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
private_user_id_str: Optional[str] = None
private_platform_str: Optional[str] = None
- private_nickname_str = private_name
+ # private_nickname_str = private_name # 这行在你提供的代码中没有被使用,可以考虑移除
if chat_stream.user_info:
private_user_id_str = str(chat_stream.user_info.user_id)
private_platform_str = chat_stream.user_info.platform
logger.debug(
- f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_nickname_str}"
+ f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name
)
- elif chat_stream.group_info is None and private_name:
- pass
+ # elif chat_stream.group_info is None and private_name: # 这个 elif 条件体为空,可以移除
+ # pass
if private_user_id_str and private_platform_str:
try:
private_user_id_int = int(private_user_id_str)
- # person_id = person_info_manager.get_person_id( # get_person_id 可能只查询,不创建
- # private_platform_str,
- # private_user_id_int
- # )
- # 使用 get_or_create_person 确保用户存在
person_id = await person_info_manager.get_or_create_person(
platform=private_platform_str,
user_id=private_user_id_int,
- nickname=private_name, # 使用传入的 private_name 作为昵称
+ nickname=private_name,
)
- if person_id is None: # 如果 get_or_create_person 返回 None,说明创建失败
+ if person_id is None:
logger.error(f"[私聊][{private_name}] get_or_create_person 未能获取或创建 person_id。")
- return None # 返回 None 表示失败
-
- return person_id, private_platform_str, private_user_id_str # 返回获取或创建的 person_id
+ return None
+ return person_id, private_platform_str, private_user_id_str
except ValueError:
logger.error(f"[私聊][{private_name}] 无法将 private_user_id_str ('{private_user_id_str}') 转换为整数。")
- return None # 返回 None 表示失败
+ return None
except Exception as e_pid:
logger.error(f"[私聊][{private_name}] 获取或创建 person_id 时出错: {e_pid}")
- return None # 返回 None 表示失败
+ return None
else:
logger.warning(
f"[私聊][{private_name}] 未能确定私聊对象的 user_id 或 platform,无法获取 person_id。将在收到消息后尝试。"
)
- return None # 返回 None 表示失败
+ return None
async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: float) -> float:
- # 限制 old_value 范围
+ """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
old_value = max(-1000, min(1000, old_value))
value = raw_adjustment
-
if old_value >= 0:
if value >= 0:
value = value * math.cos(math.pi * old_value / 2000)
if old_value > 500:
- rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700)
+ # 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整
+ rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False)
high_value_count = len(rdict)
- if old_value > 700:
- value *= 3 / (high_value_count + 2)
- else:
- value *= 3 / (high_value_count + 3)
- elif value < 0:
- value = value * math.exp(old_value / 2000)
- else:
- value = 0
- else:
- if value >= 0:
- value = value * math.exp(old_value / 2000)
- elif value < 0:
- value = value * math.cos(math.pi * old_value / 2000)
- else:
- value = 0
-
+ if old_value > 700: value *= 3 / (high_value_count + 2)
+ else: value *= 3 / (high_value_count + 3)
+ elif value < 0: value = value * math.exp(old_value / 2000)
+ # else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0
+ else: # old_value < 0
+ if value >= 0: value = value * math.exp(old_value / 2000)
+ elif value < 0: value = value * math.cos(math.pi * old_value / 2000)
+ # else: value = 0 # 你原始代码中没有这句
return value
async def build_chat_history_text(observation_info: ObservationInfo, private_name: str) -> str:
- """构建聊天历史记录文本 (包含未处理消息)"""
-
+ """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
chat_history_text = ""
try:
if hasattr(observation_info, "chat_history_str") and observation_info.chat_history_str:
@@ -358,27 +464,32 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam
)
else:
chat_history_text = "还没有聊天记录。\n"
+
unread_count = getattr(observation_info, "new_messages_count", 0)
unread_messages = getattr(observation_info, "unprocessed_messages", [])
if unread_count > 0 and unread_messages:
- bot_qq_str = str(global_config.BOT_QQ)
- other_unread_messages = [
- msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str
- ]
- other_unread_count = len(other_unread_messages)
- if other_unread_count > 0:
- new_messages_str = await build_readable_messages(
- other_unread_messages,
- replace_bot_name=True,
- merge_messages=False,
- timestamp_mode="relative",
- read_mark=0.0,
- )
- chat_history_text += f"\n{new_messages_str}\n------\n"
+ bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取
+ if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤
+ other_unread_messages = [
+ msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str
+ ]
+ other_unread_count = len(other_unread_messages)
+ if other_unread_count > 0:
+ new_messages_str = await build_readable_messages(
+ other_unread_messages,
+ replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字
+ merge_messages=False,
+ timestamp_mode="relative",
+ read_mark=0.0,
+ )
+ chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的
+ else:
+ logger.warning(f"[私聊][{private_name}] BOT_QQ 未配置,无法准确过滤未读消息中的机器人自身消息。")
+
except AttributeError as e:
logger.warning(f"[私聊][{private_name}] 构建聊天记录文本时属性错误: {e}")
chat_history_text = "[获取聊天记录时出错]\n"
except Exception as e:
logger.error(f"[私聊][{private_name}] 处理聊天记录时发生未知错误: {e}")
chat_history_text = "[处理聊天记录时出错]\n"
- return chat_history_text
+ return chat_history_text
\ No newline at end of file
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index 174e3ba0..f2f925d6 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -1,5 +1,5 @@
import random
-
+import asyncio
from .pfc_utils import retrieve_contextual_info
from src.common.logger_manager import get_logger
@@ -60,6 +60,9 @@ PROMPT_DIRECT_REPLY = """
{retrieved_knowledge_str}
请你**记住上面的知识**,在回复中有可能会用到。
+你还想到了一些你们之前的聊天记录:
+{retrieved_historical_chat_str}
+
最近的聊天记录:
{chat_history_text}
@@ -68,6 +71,8 @@ PROMPT_DIRECT_REPLY = """
{last_rejection_info}
+
+
请根据上述信息,结合聊天记录,回复对方。该回复应该:
1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!)
2. 符合你的性格特征和身份细节
@@ -97,6 +102,9 @@ PROMPT_SEND_NEW_MESSAGE = """
{retrieved_knowledge_str}
请你**记住上面的知识**,在发消息时有可能会用到。
+你还想到了一些你们之前的聊天记录:
+{retrieved_historical_chat_str}
+
最近的聊天记录:
{chat_history_text}
@@ -223,12 +231,59 @@ class ReplyGenerator:
current_emotion_text_str = getattr(conversation_info, "current_emotion_text", "心情平静。")
persona_text = f"你的名字是{self.name},{self.personality_info}。"
- retrieval_context = chat_history_text
- retrieved_memory_str, retrieved_knowledge_str = await retrieve_contextual_info(
- retrieval_context, self.private_name
- )
+ historical_chat_query = ""
+ num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子
+ if observation_info.chat_history and len(observation_info.chat_history) > 0:
+ # 从 chat_history (已处理并存入 ObservationInfo 的历史) 中取最新N条
+ # 或者,如果 observation_info.unprocessed_messages 更能代表“当前上下文”,也可以考虑用它
+ # 我们先用 chat_history,因为它包含了双方的对话历史,可能更稳定
+ recent_messages_for_query_list = observation_info.chat_history[-num_recent_messages_for_query:]
+
+ # 将这些消息的文本内容合并
+ query_texts_list = []
+ for msg_dict in recent_messages_for_query_list:
+ text_content = msg_dict.get("processed_plain_text", "")
+ if text_content.strip(): # 只添加有内容的文本
+ # 可以选择是否添加发送者信息到查询文本中,例如:
+ # sender_nickname = msg_dict.get("user_info", {}).get("user_nickname", "用户")
+ # query_texts_list.append(f"{sender_nickname}: {text_content}")
+ query_texts_list.append(text_content) # 简单合并文本内容
+
+ if query_texts_list:
+ historical_chat_query = " ".join(query_texts_list).strip()
+ logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'")
+ else:
+ logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。")
+ else:
+ logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 无聊天历史可用于生成私聊历史查询文本。")
+
+ current_chat_id = self.chat_observer.stream_id if self.chat_observer else None
+ if not current_chat_id:
+ logger.error(f"[私聊][{self.private_name}] (ReplyGenerator) 无法获取 current_chat_id,跳过所有上下文检索!")
+ retrieved_global_memory_str = "[获取全局记忆出错:chat_id 未知]"
+ retrieved_knowledge_str = "[获取知识出错:chat_id 未知]"
+ retrieved_historical_chat_str = "[获取私聊历史回忆出错:chat_id 未知]"
+ else:
+ # retrieval_context 之前是用 chat_history_text,现在也用它作为全局记忆和知识的检索上下文
+ retrieval_context_for_global_and_knowledge = chat_history_text
+
+ (
+ retrieved_global_memory_str,
+ retrieved_knowledge_str,
+ retrieved_historical_chat_str # << 新增接收私聊历史回忆
+ ) = await retrieve_contextual_info(
+ text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识
+ private_name=self.private_name,
+ chat_id=current_chat_id, # << 传递 chat_id
+ historical_chat_query_text=historical_chat_query # << 传递专门的查询文本
+ )
+ # === 调用修改结束 ===
+
logger.info(
- f"[私聊][{self.private_name}] (ReplyGenerator) 统一检索完成。记忆: {'有' if '回忆起' in retrieved_memory_str else '无'} / 知识: {'有' if '出错' not in retrieved_knowledge_str and '无相关知识' not in retrieved_knowledge_str else '无'}"
+ f"[私聊][{self.private_name}] (ReplyGenerator) 上下文检索完成。\n"
+ f" 全局记忆: {'有内容' if '回忆起' in retrieved_global_memory_str else '无或出错'}\n"
+ f" 知识: {'有内容' if '出错' not in retrieved_knowledge_str and '无相关知识' not in retrieved_knowledge_str and retrieved_knowledge_str.strip() else '无或出错'}\n"
+ f" 私聊历史回忆: {'有内容' if '回忆起一段相关的历史聊天' in retrieved_historical_chat_str else '无或出错'}"
)
last_rejection_info_str = ""
@@ -292,11 +347,10 @@ class ReplyGenerator:
base_format_params = {
"persona_text": persona_text,
"goals_str": goals_str,
- "chat_history_text": chat_history_text,
- "retrieved_memory_str": retrieved_memory_str if retrieved_memory_str else "无相关记忆。", # 确保已定义
- "retrieved_knowledge_str": retrieved_knowledge_str
- if retrieved_knowledge_str
- else "无相关知识。", # 确保已定义
+ "chat_history_text": chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # 当前短期历史
+ "retrieved_global_memory_str": retrieved_global_memory_str if retrieved_global_memory_str.strip() else "无相关全局记忆。",
+ "retrieved_knowledge_str": retrieved_knowledge_str if retrieved_knowledge_str.strip() else "无相关知识。",
+ "retrieved_historical_chat_str": retrieved_historical_chat_str if retrieved_historical_chat_str.strip() else "无相关私聊历史回忆。", # << 新增
"last_rejection_info": last_rejection_info_str,
"current_time_str": current_time_value,
"sender_name": sender_name_str,
From fe3ddb3b2eca81d4df2cffd9c03b8d9ac8585b73 Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 17:09:05 +0800
Subject: [PATCH 02/27] fix
---
src/plugins/PFC/pfc_processor.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index 8aaf800d..e13ba2d0 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -2,7 +2,8 @@ import traceback
import re
from typing import Any, Dict
from datetime import datetime # 确保导入 datetime
-from maim_message import UserInfo, MessageRecv # 从 maim_message 导入 MessageRecv
+from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv
+from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py
from src.config.config import global_config
from src.common.logger_manager import get_logger
from ..chat.chat_stream import chat_manager
From 79dd9cc6550ff11d2a6d157d5f6f1670eb98e577 Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 17:29:58 +0800
Subject: [PATCH 03/27] fix
---
src/plugins/PFC/pfc_processor.py | 2 +-
src/plugins/PFC/pfc_utils.py | 14 ++++++++------
src/plugins/PFC/reply_generator.py | 5 -----
3 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index e13ba2d0..b6e6b8b3 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -105,7 +105,7 @@ class PFCProcessor:
# 更新数据库中的对应文档
# 确保你有权限访问和操作 db 对象
- update_result = await db.messages.update_one(
+ update_result = db.messages.update_one(
{"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id},
{"$set": {"embedding_vector": embedding_vector}}
)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 666fa6e8..2441941f 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -79,8 +79,10 @@ async def find_most_relevant_historical_message(
]
try:
- # 假设 db.messages 是存储PFC私聊消息并带有embedding_vector的集合
- results = await db.messages.aggregate(pipeline).to_list(length=1)
+ # --- 确定性修改:同步执行聚合和结果转换 ---
+ cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor
+ results = list(cursor) # 直接将 CommandCursor 转换为列表
+ # --- 修改结束 ---
if results and len(results) > 0:
most_similar_message = results[0]
logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}")
@@ -109,19 +111,19 @@ async def retrieve_chat_context_window(
logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...")
try:
- # 假设 db.messages 是存储PFC私聊消息的集合
- anchor_message = await db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id})
+ # --- 确定性修改:同步执行 find_one 和 find ---
+ anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id})
messages_before_cursor = db.messages.find(
{"chat_id": chat_id, "time": {"$lt": anchor_message_time}}
).sort("time", -1).limit(window_size_before)
- messages_before = await messages_before_cursor.to_list(length=window_size_before)
+ messages_before = list(messages_before_cursor)
messages_before.reverse()
messages_after_cursor = db.messages.find(
{"chat_id": chat_id, "time": {"$gt": anchor_message_time}}
).sort("time", 1).limit(window_size_after)
- messages_after = await messages_after_cursor.to_list(length=window_size_after)
+ messages_after = list(messages_after_cursor)
if messages_before:
context_messages.extend(messages_before)
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index f2f925d6..0a82bad4 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -66,13 +66,10 @@ PROMPT_DIRECT_REPLY = """
最近的聊天记录:
{chat_history_text}
-{retrieved_memory_str}
-
{last_rejection_info}
-
请根据上述信息,结合聊天记录,回复对方。该回复应该:
1. 符合对话目标,以"你"的角度发言(不要自己与自己对话!)
2. 符合你的性格特征和身份细节
@@ -108,8 +105,6 @@ PROMPT_SEND_NEW_MESSAGE = """
最近的聊天记录:
{chat_history_text}
-{retrieved_memory_str}
-
{last_rejection_info}
请根据上述信息,判断你是否要继续发一条新消息(例如对之前消息的补充,深入话题,或追问等等)。如果你觉得要发送,该消息应该:
From b998d7a05b8f5425ec8053e1faf4bf609e42e19a Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 17:52:40 +0800
Subject: [PATCH 04/27] fix
---
src/plugins/PFC/pfc_utils.py | 9 +++++++--
src/plugins/PFC/reply_generator.py | 6 ++++++
2 files changed, 13 insertions(+), 2 deletions(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 2441941f..5e89ee6e 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -2,6 +2,7 @@ import traceback
import json
import re
import asyncio # 确保导入 asyncio
+import time
from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
from src.common.logger_manager import get_logger
@@ -29,7 +30,8 @@ logger = get_logger("pfc_utils")
async def find_most_relevant_historical_message(
chat_id: str,
query_text: str,
- similarity_threshold: float = 0.3 # 相似度阈值,可以根据效果调整
+ similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整
+ exclude_recent_seconds: int = 300 # 新增参数:排除最近多少秒内的消息(例如5分钟)
) -> Optional[Dict[str, Any]]:
"""
根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。
@@ -47,6 +49,8 @@ async def find_most_relevant_historical_message(
logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。")
return None
+ current_timestamp = time.time() # 获取当前时间戳
+
pipeline = [
{
"$match": {
@@ -235,7 +239,8 @@ async def retrieve_contextual_info(
most_relevant_message_doc = await find_most_relevant_historical_message(
chat_id=chat_id,
query_text=query_for_historical_chat,
- similarity_threshold=0.5 # 你可以根据需要调整这个阈值
+ similarity_threshold=0.5, # 你可以根据需要调整这个阈值
+ exclude_recent_seconds=300
)
if most_relevant_message_doc:
anchor_id = most_relevant_message_doc.get("message_id")
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index 0a82bad4..2a0cda6d 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -60,6 +60,9 @@ PROMPT_DIRECT_REPLY = """
{retrieved_knowledge_str}
请你**记住上面的知识**,在回复中有可能会用到。
+你有以下记忆可供参考:
+{retrieved_global_memory_str}
+
你还想到了一些你们之前的聊天记录:
{retrieved_historical_chat_str}
@@ -99,6 +102,9 @@ PROMPT_SEND_NEW_MESSAGE = """
{retrieved_knowledge_str}
请你**记住上面的知识**,在发消息时有可能会用到。
+你有以下记忆可供参考:
+{retrieved_global_memory_str}
+
你还想到了一些你们之前的聊天记录:
{retrieved_historical_chat_str}
From 35f0c8224e7f54b00b422698c9179ba4ac9665fa Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 18:39:59 +0800
Subject: [PATCH 05/27] fix
---
src/plugins/PFC/pfc_utils.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 5e89ee6e..f4627c27 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -50,12 +50,14 @@ async def find_most_relevant_historical_message(
return None
current_timestamp = time.time() # 获取当前时间戳
+ excluded_time_threshold = current_timestamp - exclude_recent_seconds
pipeline = [
{
"$match": {
"chat_id": chat_id,
- "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}}
+ "embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}},
+ "time": {"$lt": excluded_time_threshold}
}
},
{
From 53ba204e25d901c507a0e8ddc740b66ed4366af8 Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 19:00:40 +0800
Subject: [PATCH 06/27] fix
---
src/plugins/PFC/pfc_utils.py | 25 +++++++++++++++++++++++--
1 file changed, 23 insertions(+), 2 deletions(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index f4627c27..62e20cfe 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -3,6 +3,7 @@ import json
import re
import asyncio # 确保导入 asyncio
import time
+import datetime
from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
from src.common.logger_manager import get_logger
@@ -31,7 +32,7 @@ async def find_most_relevant_historical_message(
chat_id: str,
query_text: str,
similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整
- exclude_recent_seconds: int = 300 # 新增参数:排除最近多少秒内的消息(例如5分钟)
+ exclude_recent_seconds: int = 900 # 新增参数:排除最近多少秒内的消息(例如5分钟)
) -> Optional[Dict[str, Any]]:
"""
根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。
@@ -88,6 +89,15 @@ async def find_most_relevant_historical_message(
# --- 确定性修改:同步执行聚合和结果转换 ---
cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor
results = list(cursor) # 直接将 CommandCursor 转换为列表
+ if not results:
+ logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。")
+ else:
+ logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:")
+ for res_msg in results: # 最多只打印我们 limit 的那几条
+ msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')
+ logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'")
+ # --- 新增日志结束 ---
+
# --- 修改结束 ---
if results and len(results) > 0:
most_similar_message = results[0]
@@ -104,6 +114,7 @@ async def retrieve_chat_context_window(
chat_id: str,
anchor_message_id: str,
anchor_message_time: float,
+ excluded_time_threshold_for_window: float,
window_size_before: int = 7,
window_size_after: int = 7
) -> List[Dict[str, Any]]:
@@ -125,11 +136,21 @@ async def retrieve_chat_context_window(
).sort("time", -1).limit(window_size_before)
messages_before = list(messages_before_cursor)
messages_before.reverse()
+ # --- 新增日志 ---
+ logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}")
+ logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):")
+ for msg_b in messages_before:
+ logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'")
messages_after_cursor = db.messages.find(
- {"chat_id": chat_id, "time": {"$gt": anchor_message_time}}
+ {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} # <--- 修改这里
).sort("time", 1).limit(window_size_after)
messages_after = list(messages_after_cursor)
+ # --- 新增日志 ---
+ logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):")
+ for msg_a in messages_after:
+ logger.debug(f" - Time: {datetime.fromtimestamp(msg_a.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text','')[:30]}...'")
+
if messages_before:
context_messages.extend(messages_before)
From ebc3dd53557df77d33eefabbce29df6c18e24e0c Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 19:09:16 +0800
Subject: [PATCH 07/27] fix
---
src/plugins/PFC/pfc_utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 62e20cfe..9e7d745a 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -3,7 +3,7 @@ import json
import re
import asyncio # 确保导入 asyncio
import time
-import datetime
+from datetime import datetime
from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
from src.common.logger_manager import get_logger
From fe4990c73161eab59b3038a7064b413f8be9b7fb Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 19:21:17 +0800
Subject: [PATCH 08/27] fix
---
src/plugins/PFC/pfc_utils.py | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 9e7d745a..012c33f1 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -128,7 +128,7 @@ async def retrieve_chat_context_window(
logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...")
try:
- # --- 确定性修改:同步执行 find_one 和 find ---
+ # --- 同步执行 find_one 和 find ---
anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id})
messages_before_cursor = db.messages.find(
@@ -143,7 +143,7 @@ async def retrieve_chat_context_window(
logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'")
messages_after_cursor = db.messages.find(
- {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}} # <--- 修改这里
+ {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}}
).sort("time", 1).limit(window_size_after)
messages_after = list(messages_after_cursor)
# --- 新增日志 ---
@@ -200,9 +200,9 @@ async def retrieve_contextual_info(
related_memory = await HippocampusManager.get_instance().get_memory_from_text(
text=text,
max_memory_num=2,
- max_memory_length=2, # 你原始代码中这里是2,不是200
+ max_memory_length=2,
max_depth=3,
- fast_retrieval=False, # 你原始代码中这里是False
+ fast_retrieval=False,
)
if related_memory:
temp_global_memory_info = ""
@@ -259,22 +259,34 @@ async def retrieve_contextual_info(
if query_for_historical_chat:
try:
+ # 获取 find_most_relevant_historical_message 调用时实际使用的 exclude_recent_seconds 值
+ actual_exclude_seconds_for_find = 900 # 根据您对 find_most_relevant_historical_message 的调用
+
most_relevant_message_doc = await find_most_relevant_historical_message(
chat_id=chat_id,
query_text=query_for_historical_chat,
- similarity_threshold=0.5, # 你可以根据需要调整这个阈值
- exclude_recent_seconds=300
+ similarity_threshold=0.5,
+ exclude_recent_seconds=actual_exclude_seconds_for_find
)
if most_relevant_message_doc:
anchor_id = most_relevant_message_doc.get("message_id")
anchor_time = most_relevant_message_doc.get("time")
if anchor_id and anchor_time is not None:
+ # 计算传递给 retrieve_chat_context_window 的时间上限
+ # 这个上限应该与 find_most_relevant_historical_message 的排除点一致
+ time_limit_for_window_after = time.time() - actual_exclude_seconds_for_find
+
+ logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window "
+ f"with anchor_time: {anchor_time}, "
+ f"excluded_time_threshold_for_window: {time_limit_for_window_after}")
+
context_window_messages = await retrieve_chat_context_window(
chat_id=chat_id,
anchor_message_id=anchor_id,
anchor_message_time=anchor_time,
- window_size_before=7, # 我们的目标:上7条
- window_size_after=7 # 我们的目标:下7条 (共15条,包括锚点)
+ excluded_time_threshold_for_window=time_limit_for_window_after, # <--- 传递这个值
+ window_size_before=7,
+ window_size_after=7
)
if context_window_messages:
formatted_window_str = await build_readable_messages(
From c9e6cf214076929097a03b0599fafb5d52cb6a47 Mon Sep 17 00:00:00 2001
From: 114514 <2514624910@qq.com>
Date: Fri, 9 May 2025 20:50:46 +0800
Subject: [PATCH 09/27] fix
---
src/plugins/PFC/pfc_utils.py | 94 ++++++++++++++++++++++--------
src/plugins/PFC/reply_generator.py | 51 +++++++++++++++-
2 files changed, 119 insertions(+), 26 deletions(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 012c33f1..76dd6bc4 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -32,7 +32,7 @@ async def find_most_relevant_historical_message(
chat_id: str,
query_text: str,
similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整
- exclude_recent_seconds: int = 900 # 新增参数:排除最近多少秒内的消息(例如5分钟)
+ absolute_search_time_limit: Optional[float] = None # 新增参数:排除最近多少秒内的消息(例如5分钟)
) -> Optional[Dict[str, Any]]:
"""
根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。
@@ -50,15 +50,30 @@ async def find_most_relevant_historical_message(
logger.warning(f"[{chat_id}] (私聊历史)未能为查询文本 '{query_text[:50]}...' 生成嵌入向量。")
return None
- current_timestamp = time.time() # 获取当前时间戳
- excluded_time_threshold = current_timestamp - exclude_recent_seconds
+ effective_search_upper_limit: float
+ log_source_of_limit: str = ""
+
+ if absolute_search_time_limit is not None:
+ effective_search_upper_limit = absolute_search_time_limit
+ log_source_of_limit = "传入的绝对时间上限"
+ else:
+ # 如果没有传入绝对时间上限,可以设置一个默认的回退逻辑
+ fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时
+ effective_search_upper_limit = time.time() - fallback_exclude_seconds
+ log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)"
+
+ logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: "
+ f"将使用时间上限 {effective_search_upper_limit} "
+ f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) "
+ f"进行历史消息锚点搜索。来源: {log_source_of_limit}")
+ # --- [新代码结束] ---
pipeline = [
{
"$match": {
"chat_id": chat_id,
"embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}},
- "time": {"$lt": excluded_time_threshold}
+ "time": {"$lt": effective_search_upper_limit} # <--- 使用新的 effective_search_upper_limit
}
},
{
@@ -90,13 +105,13 @@ async def find_most_relevant_historical_message(
cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor
results = list(cursor) # 直接将 CommandCursor 转换为列表
if not results:
- logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。")
+ logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。")
else:
- logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {excluded_time_threshold} ({exclude_recent_seconds} 秒前) 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:")
- for res_msg in results: # 最多只打印我们 limit 的那几条
+ logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:")
+ for res_msg in results:
msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')
logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'")
- # --- 新增日志结束 ---
+ # --- [修改结束] ---
# --- 修改结束 ---
if results and len(results) > 0:
@@ -182,7 +197,8 @@ async def retrieve_contextual_info(
text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录)
private_name: str, # 用于日志
chat_id: str, # 用于特定私聊历史的检索
- historical_chat_query_text: Optional[str] = None # 专门为私聊历史检索准备的查询文本 (例如最新的N条消息合并)
+ historical_chat_query_text: Optional[str] = None,
+ current_short_term_history_earliest_time: Optional[float] = None # <--- 新增参数
) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆
"""
检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。
@@ -253,38 +269,68 @@ async def retrieve_contextual_info(
logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。")
- # --- 3. 当前私聊的特定历史对话上下文检索 (新增逻辑) ---
+ # --- 3. 当前私聊的特定历史对话上下文检索 ---
query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None
- historical_chat_log_msg = f"开始私聊历史检索 (查询文本: '{str(query_for_historical_chat)[:30]}...')"
+ # historical_chat_log_msg 的初始化可以移到 try 块之后,根据实际情况赋值
if query_for_historical_chat:
try:
- # 获取 find_most_relevant_historical_message 调用时实际使用的 exclude_recent_seconds 值
- actual_exclude_seconds_for_find = 900 # 根据您对 find_most_relevant_historical_message 的调用
+ # ---- [新代码] 计算最终的、严格的搜索时间上限 ----
+ # 1. 设置一个基础的、较大的时间回溯窗口,例如2小时 (7200秒)
+ # 这个值可以从全局配置读取,如果没配置则使用默认值
+ default_search_exclude_seconds = getattr(global_config, "pfc_historical_search_default_exclude_seconds", 7200) # 默认2小时
+ base_excluded_time_limit = time.time() - default_search_exclude_seconds
+
+ final_search_upper_limit_time = base_excluded_time_limit
+ if current_short_term_history_earliest_time is not None:
+ # 我们希望找到的消息严格早于 short_term_history 的开始,减去一个小量确保不包含边界
+ limit_from_short_term = current_short_term_history_earliest_time - 0.001
+ final_search_upper_limit_time = min(base_excluded_time_limit, limit_from_short_term)
+ log_earliest_time_str = "未提供"
+ if current_short_term_history_earliest_time is not None:
+ try:
+ log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})"
+ except:
+ log_earliest_time_str = str(current_short_term_history_earliest_time)
+
+ logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: "
+ f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} "
+ f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). "
+ f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}")
+
most_relevant_message_doc = await find_most_relevant_historical_message(
chat_id=chat_id,
query_text=query_for_historical_chat,
- similarity_threshold=0.5,
- exclude_recent_seconds=actual_exclude_seconds_for_find
+ similarity_threshold=0.5, # 您可以调整这个
+ # exclude_recent_seconds 不再直接使用,而是传递计算好的绝对时间上限
+ absolute_search_time_limit=final_search_upper_limit_time # <--- 传递计算好的绝对时间上限
)
+
if most_relevant_message_doc:
anchor_id = most_relevant_message_doc.get("message_id")
- anchor_time = most_relevant_message_doc.get("time")
- if anchor_id and anchor_time is not None:
- # 计算传递给 retrieve_chat_context_window 的时间上限
- # 这个上限应该与 find_most_relevant_historical_message 的排除点一致
- time_limit_for_window_after = time.time() - actual_exclude_seconds_for_find
-
+ anchor_time = most_relevant_message_doc.get("time")
+
+ # 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证)
+ if anchor_time is not None and anchor_time >= final_search_upper_limit_time:
+ logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} "
+ f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。")
+ historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。"
+ # 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值
+ elif anchor_id and anchor_time is not None:
+ # 构建上下文窗口时,其“未来”消息的上限也应该是 final_search_upper_limit_time
+ # 因为我们不希望历史回忆的上下文窗口延伸到“最近聊天记录”的范围内或更近
+ time_limit_for_context_window_after = final_search_upper_limit_time
+
logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window "
f"with anchor_time: {anchor_time}, "
- f"excluded_time_threshold_for_window: {time_limit_for_window_after}")
+ f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}")
context_window_messages = await retrieve_chat_context_window(
chat_id=chat_id,
anchor_message_id=anchor_id,
- anchor_message_time=anchor_time,
- excluded_time_threshold_for_window=time_limit_for_window_after, # <--- 传递这个值
+ anchor_message_time=anchor_time,
+ excluded_time_threshold_for_window=time_limit_for_context_window_after,
window_size_before=7,
window_size_after=7
)
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index 2a0cda6d..9a184e52 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -1,7 +1,8 @@
import random
import asyncio
+from datetime import datetime
from .pfc_utils import retrieve_contextual_info
-
+from typing import Optional
from src.common.logger_manager import get_logger
from ..models.utils_model import LLMRequest
from ...config.config import global_config
@@ -224,6 +225,51 @@ class ReplyGenerator:
else:
goals_str = "- 目前没有明确对话目标\n"
+ chat_history_for_prompt_builder: list = []
+ recent_history_start_time_for_exclusion: Optional[float] = None
+
+ # 我们需要知道 build_chat_history_text 函数大致会用 observation_info.chat_history 的多少条记录
+ # 或者 build_chat_history_text 内部的逻辑。
+ # 假设 build_chat_history_text 主要依赖 observation_info.chat_history_str,
+ # 而 observation_info.chat_history_str 是基于 observation_info.chat_history 的最后一部分(比如20条)生成的。
+ # 为了准确,我们应该直接从 observation_info.chat_history 中获取这个片段的起始时间。
+ # 请确保这里的 MAX_RECENT_HISTORY_FOR_PROMPT 与 observation_info.py 或 build_chat_history_text 中
+ # 用于生成 chat_history_str 的消息数量逻辑大致吻合。
+ # 如果 build_chat_history_text 总是用 observation_info.chat_history 的最后 N 条,那么这个 N 就是这里的数字。
+ # 如果 observation_info.chat_history_str 是由 observation_info.py 中的 update_from_message 等方法维护的,
+ # 并且总是代表一个固定长度(比如最后30条)的聊天记录字符串,那么我们就需要从 observation_info.chat_history
+ # 取出这部分原始消息来确定起始时间。
+
+ # 我们先做一个合理的假设: “最近聊天记录” 字符串 chat_history_text 是基于
+ # observation_info.chat_history 的一个有限的尾部片段生成的。
+ # 假设这个片段的长度由 global_config.pfc_recent_history_display_count 控制,默认为20条。
+ recent_history_display_count = getattr(global_config, "pfc_recent_history_display_count", 20)
+
+ if observation_info and observation_info.chat_history and len(observation_info.chat_history) > 0:
+ # 获取用于生成“最近聊天记录”的实际消息片段
+ # 如果 observation_info.chat_history 长度小于 display_count,则取全部
+ start_index = max(0, len(observation_info.chat_history) - recent_history_display_count)
+ chat_history_for_prompt_builder = observation_info.chat_history[start_index:]
+
+ if chat_history_for_prompt_builder: # 如果片段不为空
+ try:
+ first_message_in_display_slice = chat_history_for_prompt_builder[0]
+ recent_history_start_time_for_exclusion = first_message_in_display_slice.get('time')
+ if recent_history_start_time_for_exclusion:
+ # 导入 datetime (如果 reply_generator.py 文件顶部没有的话)
+ # from datetime import datetime # 通常建议放在文件顶部
+ logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: "
+ f"{recent_history_start_time_for_exclusion} "
+ f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})")
+ else:
+ logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。")
+ except (IndexError, KeyError, TypeError) as e:
+ logger.warning(f"[{self.private_name}] (ReplyGenerator) 获取“最近聊天记录”起始时间失败: {e}")
+ recent_history_start_time_for_exclusion = None
+ else:
+ logger.debug(f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。")
+ # --- [新代码结束] ---
+
chat_history_text = await build_chat_history_text(observation_info, self.private_name)
sender_name_str = self.private_name
@@ -276,7 +322,8 @@ class ReplyGenerator:
text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识
private_name=self.private_name,
chat_id=current_chat_id, # << 传递 chat_id
- historical_chat_query_text=historical_chat_query # << 传递专门的查询文本
+ historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本
+ current_short_term_history_earliest_time=recent_history_start_time_for_exclusion # <--- 新增传递的参数
)
# === 调用修改结束 ===
From 5d31cc8bbe52b121f4bcab0bf75a820969933afb Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Fri, 9 May 2025 21:20:31 +0800
Subject: [PATCH 10/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E7=9A=84tag?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 54 +++++++++++++++++-------------
docker-compose.yml | 4 +--
2 files changed, 33 insertions(+), 25 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 605d838c..36c7604f 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -6,10 +6,9 @@ on:
- main
- classical
- dev
- - new_knowledge
tags:
- - 'v*'
- workflow_dispatch:
+ - "v*.*.*"
+ - "v*"
jobs:
build-and-push:
@@ -20,6 +19,11 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
- name: Clone maim_message
run: git clone https://github.com/MaiM-with-u/maim_message maim_message
@@ -29,6 +33,8 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
+ with:
+ buildkitd-flags: --debug
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -36,20 +42,18 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- - name: Determine Image Tags
- id: tags
- run: |
- if [[ "${{ github.ref }}" == refs/tags/* ]]; then
- echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:${{ github.ref_name }},${{ secrets.DOCKERHUB_USERNAME }}/maimbot:latest" >> $GITHUB_OUTPUT
- elif [ "${{ github.ref }}" == "refs/heads/main" ]; then
- echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:main-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT
- elif [ "${{ github.ref }}" == "refs/heads/classical" ]; then
- echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:classical,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:classical-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT
- elif [ "${{ github.ref }}" == "refs/heads/dev" ]; then
- echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:dev,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:dev-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT
- elif [ "${{ github.ref }}" == "refs/heads/new_knowledge" ]; then
- echo "tags=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:knowledge,${{ secrets.DOCKERHUB_USERNAME }}/maimbot:knowledge-$(date -u +'%Y%m%d%H%M%S')" >> $GITHUB_OUTPUT
- fi
+ - name: Docker meta
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=semver,pattern={{major}}
+ type=sha
- name: Build and Push Docker Image
uses: docker/build-push-action@v5
@@ -57,10 +61,14 @@ jobs:
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
- tags: ${{ steps.tags.outputs.tags }}
+ tags: ${{ steps.meta.outputs.tags }}
push: true
- cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:buildcache
- cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maimbot:buildcache,mode=max
- labels: |
- org.opencontainers.image.created=${{ steps.tags.outputs.date_tag }}
- org.opencontainers.image.revision=${{ github.sha }}
\ No newline at end of file
+ cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache
+ cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache,mode=max
+ labels: ${{ steps.meta.outputs.labels }}
+ provenance: true
+ sbom: true
+ build-args: |
+ BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
+ VCS_REF=${{ github.sha }}
+ outputs: type=image,push=true
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 000d00c3..363fafc2 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,8 +16,8 @@ services:
- maim_bot
core:
container_name: maim-bot-core
- image: sengokucola/maimbot:main
- # image: infinitycat/maimbot:main
+ image: sengokucola/maibot:main
+ # image: infinitycat/maibot:main
environment:
- TZ=Asia/Shanghai
# - EULA_AGREE=35362b6ea30f12891d46ef545122e84a # 同意EULA
From 3323c8dc498984cb938fc16f531dff05e8c5bc21 Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Fri, 9 May 2025 21:42:00 +0800
Subject: [PATCH 11/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0docker-compose?=
=?UTF-8?q?=E7=9A=84tag?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
docker-compose.yml | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/docker-compose.yml b/docker-compose.yml
index 363fafc2..2392f707 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,8 +16,11 @@ services:
- maim_bot
core:
container_name: maim-bot-core
- image: sengokucola/maibot:main
- # image: infinitycat/maibot:main
+ image: sengokucola/maibot:latest
+ # image: infinitycat/maibot:latest
+ # dev
+ # image: sengokucola/maibot:dev
+ # image: infinitycat/maibot:dev
environment:
- TZ=Asia/Shanghai
# - EULA_AGREE=35362b6ea30f12891d46ef545122e84a # 同意EULA
From 835efd5daae12ef268bb016d9e12f8e9a184fc9c Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 01:41:56 +0800
Subject: [PATCH 12/27] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84Docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E6=96=B0?=
=?UTF-8?q?=E5=A2=9E=E5=A4=9A=E5=B9=B3=E5=8F=B0=E6=94=AF=E6=8C=81=E5=92=8C?=
=?UTF-8?q?=E6=91=98=E8=A6=81=E4=B8=8A=E4=BC=A0=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 129 +++++++++++++++++++++++------
1 file changed, 102 insertions(+), 27 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 36c7604f..3fce193b 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -10,20 +10,58 @@ on:
- "v*.*.*"
- "v*"
+env:
+ REGISTRY_IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
+
jobs:
- build-and-push:
+ prepare:
runs-on: ubuntu-latest
- env:
- DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }}
- DATE_TAG: $(date -u +'%Y-%m-%dT%H-%M-%S')
+ outputs:
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ bake-file: ${{ steps.meta.outputs.bake-file }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
+
+ - name: Docker meta
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.REGISTRY_IMAGE }}
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=semver,pattern={{major}}
+ type=sha
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
+ build:
+ runs-on: ubuntu-latest
+ needs: prepare
+ strategy:
+ fail-fast: false
+ matrix:
+ platform:
+ - linux/amd64
+ - linux/arm64
+ - linux/arm/v7
+ - linux/arm/v6
+ - linux/386
+ - linux/loong64
+ steps:
+ - name: Prepare
+ run: |
+ platform=${{ matrix.platform }}
+ echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
- name: Clone maim_message
run: git clone https://github.com/MaiM-with-u/maim_message maim_message
@@ -31,6 +69,9 @@ jobs:
- name: Clone lpmm
run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
@@ -42,33 +83,67 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- - name: Docker meta
- id: meta
- uses: docker/metadata-action@v5
- with:
- images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
- tags: |
- type=ref,event=branch
- type=ref,event=tag
- type=semver,pattern={{version}}
- type=semver,pattern={{major}}.{{minor}}
- type=semver,pattern={{major}}
- type=sha
-
- - name: Build and Push Docker Image
+ - name: Build and push by digest
+ id: build
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
- platforms: linux/amd64,linux/arm64
- tags: ${{ steps.meta.outputs.tags }}
- push: true
- cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache
- cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:buildcache,mode=max
- labels: ${{ steps.meta.outputs.labels }}
+ platforms: ${{ matrix.platform }}
+ labels: ${{ needs.prepare.outputs.labels }}
+ cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }}
+ cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max
provenance: true
sbom: true
build-args: |
BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
VCS_REF=${{ github.sha }}
- outputs: type=image,push=true
\ No newline at end of file
+ outputs: type=image,push-by-digest=true,name-canonical=true,push=true
+
+ - name: Export digest
+ run: |
+ mkdir -p ${{ runner.temp }}/digests
+ digest="${{ steps.build.outputs.digest }}"
+ touch "${{ runner.temp }}/digests/${digest#sha256:}"
+
+ - name: Upload digest
+ uses: actions/upload-artifact@v4
+ with:
+ name: digests-${{ env.PLATFORM_PAIR }}
+ path: ${{ runner.temp }}/digests/*
+ if-no-files-found: error
+ retention-days: 1
+
+ merge:
+ runs-on: ubuntu-latest
+ needs:
+ - prepare
+ - build
+ steps:
+ - name: Download digests
+ uses: actions/download-artifact@v4
+ with:
+ path: ${{ runner.temp }}/digests
+ pattern: digests-*
+ merge-multiple: true
+
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+
+ - name: Create manifest list and push
+ working-directory: ${{ runner.temp }}/digests
+ run: |
+ docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") \
+ $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+
+ - name: Inspect image
+ run: |
+ tags_json='${{ needs.prepare.outputs.tags }}'
+ first_tag=$(echo $tags_json | jq -r '.tags[0]')
+ docker buildx imagetools inspect $first_tag
\ No newline at end of file
From de1c36f8e8ee64dd0ea2abaca8782ab99b56f211 Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 01:46:17 +0800
Subject: [PATCH 13/27] =?UTF-8?q?feat:=20=E5=9C=A8Docker=E9=95=9C=E5=83=8F?=
=?UTF-8?q?=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=E4=B8=AD=E6=B7=BB=E5=8A=A0?=
=?UTF-8?q?=E6=A0=87=E7=AD=BE=E6=94=AF=E6=8C=81=EF=BC=8C=E4=BB=A5=E4=BE=BF?=
=?UTF-8?q?=E4=BA=8E=E7=89=88=E6=9C=AC=E7=AE=A1=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 3fce193b..097fdac3 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -91,6 +91,7 @@ jobs:
file: ./Dockerfile
platforms: ${{ matrix.platform }}
labels: ${{ needs.prepare.outputs.labels }}
+ tags: ${{ env.REGISTRY_IMAGE }}:${{ github.sha }}-${{ env.PLATFORM_PAIR }}
cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }}
cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max
provenance: true
From 4fc33278c98e9cf0d97b5a238f9ed69198de8bbf Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 01:51:09 +0800
Subject: [PATCH 14/27] =?UTF-8?q?feat:=20=E7=B2=BE=E7=AE=80Docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E7=A7=BB?=
=?UTF-8?q?=E9=99=A4=E4=B8=8D=E5=BF=85=E8=A6=81=E7=9A=84=E5=B9=B3=E5=8F=B0?=
=?UTF-8?q?=E6=94=AF=E6=8C=81=E5=B9=B6=E6=9B=B4=E6=96=B0=E6=A0=87=E7=AD=BE?=
=?UTF-8?q?=E9=85=8D=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 097fdac3..fb3d4938 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -48,10 +48,6 @@ jobs:
platform:
- linux/amd64
- linux/arm64
- - linux/arm/v7
- - linux/arm/v6
- - linux/386
- - linux/loong64
steps:
- name: Prepare
run: |
@@ -91,7 +87,7 @@ jobs:
file: ./Dockerfile
platforms: ${{ matrix.platform }}
labels: ${{ needs.prepare.outputs.labels }}
- tags: ${{ env.REGISTRY_IMAGE }}:${{ github.sha }}-${{ env.PLATFORM_PAIR }}
+ tags: ${{ env.REGISTRY_IMAGE }}
cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }}
cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max
provenance: true
From f96fffe16eb8981f2c7f657effd1b503cdfadc0c Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 02:00:59 +0800
Subject: [PATCH 15/27] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0Docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E7=A1=AE?=
=?UTF-8?q?=E4=BF=9D=E4=BD=BF=E7=94=A8=E9=BB=98=E8=AE=A4=E6=A0=87=E7=AD=BE?=
=?UTF-8?q?=E5=B9=B6=E4=BC=98=E5=8C=96=E6=A0=87=E7=AD=BE=E5=A4=84=E7=90=86?=
=?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index fb3d4938..7ea9d86e 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -136,8 +136,16 @@ jobs:
- name: Create manifest list and push
working-directory: ${{ runner.temp }}/digests
run: |
- docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}") \
- $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+ # 确保至少有一个默认标签
+ TAGS="-t ${{ env.REGISTRY_IMAGE }}:latest"
+
+ # 如果 meta 输出的标签不为空,则使用它们
+ if [ -n "${{ needs.prepare.outputs.tags }}" ]; then
+ TAGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}")
+ fi
+
+ echo "Using tags: ${TAGS}"
+ docker buildx imagetools create ${TAGS} $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
- name: Inspect image
run: |
From 5ad1993fee7d127b1af776e5816497169be24e14 Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 02:11:26 +0800
Subject: [PATCH 16/27] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96Docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E5=A2=9E?=
=?UTF-8?q?=E5=BC=BA=E6=A0=87=E7=AD=BE=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?=
=?UTF-8?q?=E4=BB=A5=E6=94=AF=E6=8C=81=E9=BB=98=E8=AE=A4=E6=A0=87=E7=AD=BE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index 7ea9d86e..a2e4cfc8 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -149,6 +149,20 @@ jobs:
- name: Inspect image
run: |
- tags_json='${{ needs.prepare.outputs.tags }}'
- first_tag=$(echo $tags_json | jq -r '.tags[0]')
- docker buildx imagetools inspect $first_tag
\ No newline at end of file
+ # 使用默认标签
+ DEFAULT_TAG="${{ env.REGISTRY_IMAGE }}:latest"
+
+ # 尝试从 prepare 输出中获取标签
+ if [ -n "${{ needs.prepare.outputs.tags }}" ]; then
+ TAGS_JSON='${{ needs.prepare.outputs.tags }}'
+ FIRST_TAG=$(echo $TAGS_JSON | jq -r '.tags[0]')
+ if [ -n "$FIRST_TAG" ] && [ "$FIRST_TAG" != "null" ]; then
+ echo "使用从 metadata 获取的标签: $FIRST_TAG"
+ docker buildx imagetools inspect $FIRST_TAG
+ exit 0
+ fi
+ fi
+
+ # 如果没有标签或提取失败,使用默认标签
+ echo "使用默认标签: $DEFAULT_TAG"
+ docker buildx imagetools inspect $DEFAULT_TAG
\ No newline at end of file
From 606b89c99b233426d90d3289418eec0476e5e27f Mon Sep 17 00:00:00 2001
From: infinitycat
Date: Sat, 10 May 2025 02:37:46 +0800
Subject: [PATCH 17/27] =?UTF-8?q?feat:=20=E9=87=8D=E6=9E=84Docker=E9=95=9C?=
=?UTF-8?q?=E5=83=8F=E6=9E=84=E5=BB=BA=E6=B5=81=E7=A8=8B=EF=BC=8C=E6=96=B0?=
=?UTF-8?q?=E5=A2=9EAMD64=E5=92=8CARM64=E6=9E=B6=E6=9E=84=E6=94=AF?=
=?UTF-8?q?=E6=8C=81=EF=BC=8C=E5=B9=B6=E4=BC=98=E5=8C=96=E5=A4=9A=E6=9E=B6?=
=?UTF-8?q?=E6=9E=84=E6=B8=85=E5=8D=95=E5=88=9B=E5=BB=BA=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.github/workflows/docker-image.yml | 217 +++++++++++++++--------------
1 file changed, 109 insertions(+), 108 deletions(-)
diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml
index a2e4cfc8..ba56b0c2 100644
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@@ -10,50 +10,13 @@ on:
- "v*.*.*"
- "v*"
-env:
- REGISTRY_IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
-
jobs:
- prepare:
+ build-amd64:
+ name: Build AMD64 Image
runs-on: ubuntu-latest
- outputs:
- tags: ${{ steps.meta.outputs.tags }}
- labels: ${{ steps.meta.outputs.labels }}
- bake-file: ${{ steps.meta.outputs.bake-file }}
+ env:
+ DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }}
steps:
- - name: Checkout code
- uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - name: Docker meta
- id: meta
- uses: docker/metadata-action@v5
- with:
- images: ${{ env.REGISTRY_IMAGE }}
- tags: |
- type=ref,event=branch
- type=ref,event=tag
- type=semver,pattern={{version}}
- type=semver,pattern={{major}}.{{minor}}
- type=semver,pattern={{major}}
- type=sha
-
- build:
- runs-on: ubuntu-latest
- needs: prepare
- strategy:
- fail-fast: false
- matrix:
- platform:
- - linux/amd64
- - linux/arm64
- steps:
- - name: Prepare
- run: |
- platform=${{ matrix.platform }}
- echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
-
- name: Checkout code
uses: actions/checkout@v4
with:
@@ -65,9 +28,6 @@ jobs:
- name: Clone lpmm
run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM
- - name: Set up QEMU
- uses: docker/setup-qemu-action@v3
-
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
@@ -79,50 +39,61 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- - name: Build and push by digest
- id: build
+ - name: Docker meta
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=semver,pattern={{major}}
+ type=sha
+
+ - name: Build and Push AMD64 Docker Image
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
- platforms: ${{ matrix.platform }}
- labels: ${{ needs.prepare.outputs.labels }}
- tags: ${{ env.REGISTRY_IMAGE }}
- cache-from: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }}
- cache-to: type=registry,ref=${{ env.REGISTRY_IMAGE }}:buildcache-${{ env.PLATFORM_PAIR }},mode=max
+ platforms: linux/amd64
+ tags: ${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-${{ github.sha }}
+ push: true
+ cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-buildcache
+ cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-buildcache,mode=max
+ labels: ${{ steps.meta.outputs.labels }}
provenance: true
sbom: true
build-args: |
BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
VCS_REF=${{ github.sha }}
- outputs: type=image,push-by-digest=true,name-canonical=true,push=true
+ outputs: type=image,push=true
- - name: Export digest
- run: |
- mkdir -p ${{ runner.temp }}/digests
- digest="${{ steps.build.outputs.digest }}"
- touch "${{ runner.temp }}/digests/${digest#sha256:}"
-
- - name: Upload digest
- uses: actions/upload-artifact@v4
- with:
- name: digests-${{ env.PLATFORM_PAIR }}
- path: ${{ runner.temp }}/digests/*
- if-no-files-found: error
- retention-days: 1
-
- merge:
+ build-arm64:
+ name: Build ARM64 Image
runs-on: ubuntu-latest
- needs:
- - prepare
- - build
+ env:
+ DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }}
steps:
- - name: Download digests
- uses: actions/download-artifact@v4
+ - name: Checkout code
+ uses: actions/checkout@v4
with:
- path: ${{ runner.temp }}/digests
- pattern: digests-*
- merge-multiple: true
+ fetch-depth: 0
+
+ - name: Set up QEMU
+ uses: docker/setup-qemu-action@v3
+
+ - name: Clone maim_message
+ run: git clone https://github.com/MaiM-with-u/maim_message maim_message
+
+ - name: Clone lpmm
+ run: git clone https://github.com/MaiM-with-u/MaiMBot-LPMM.git MaiMBot-LPMM
+
+ - name: Set up Docker Buildx
+ uses: docker/setup-buildx-action@v3
+ with:
+ buildkitd-flags: --debug
- name: Login to Docker Hub
uses: docker/login-action@v3
@@ -130,39 +101,69 @@ jobs:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- - name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v3
+ - name: Docker meta
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=semver,pattern={{major}}
+ type=sha
- - name: Create manifest list and push
- working-directory: ${{ runner.temp }}/digests
- run: |
- # 确保至少有一个默认标签
- TAGS="-t ${{ env.REGISTRY_IMAGE }}:latest"
-
- # 如果 meta 输出的标签不为空,则使用它们
- if [ -n "${{ needs.prepare.outputs.tags }}" ]; then
- TAGS=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "${{ needs.prepare.outputs.tags }}")
- fi
-
- echo "Using tags: ${TAGS}"
- docker buildx imagetools create ${TAGS} $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *)
+ - name: Build and Push ARM64 Docker Image
+ uses: docker/build-push-action@v5
+ with:
+ context: .
+ file: ./Dockerfile
+ platforms: linux/arm64
+ tags: ${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-${{ github.sha }}
+ push: true
+ cache-from: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-buildcache
+ cache-to: type=registry,ref=${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-buildcache,mode=max
+ labels: ${{ steps.meta.outputs.labels }}
+ provenance: true
+ sbom: true
+ build-args: |
+ BUILD_DATE=$(date -u +'%Y-%m-%dT%H:%M:%SZ')
+ VCS_REF=${{ github.sha }}
+ outputs: type=image,push=true
- - name: Inspect image
+ create-manifest:
+ name: Create Multi-Arch Manifest
+ runs-on: ubuntu-latest
+ needs:
+ - build-amd64
+ - build-arm64
+ steps:
+ - name: Login to Docker Hub
+ uses: docker/login-action@v3
+ with:
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+ - name: Docker meta
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ secrets.DOCKERHUB_USERNAME }}/maibot
+ tags: |
+ type=ref,event=branch
+ type=ref,event=tag
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+ type=semver,pattern={{major}}
+ type=sha
+
+ - name: Create and Push Manifest
run: |
- # 使用默认标签
- DEFAULT_TAG="${{ env.REGISTRY_IMAGE }}:latest"
-
- # 尝试从 prepare 输出中获取标签
- if [ -n "${{ needs.prepare.outputs.tags }}" ]; then
- TAGS_JSON='${{ needs.prepare.outputs.tags }}'
- FIRST_TAG=$(echo $TAGS_JSON | jq -r '.tags[0]')
- if [ -n "$FIRST_TAG" ] && [ "$FIRST_TAG" != "null" ]; then
- echo "使用从 metadata 获取的标签: $FIRST_TAG"
- docker buildx imagetools inspect $FIRST_TAG
- exit 0
- fi
- fi
-
- # 如果没有标签或提取失败,使用默认标签
- echo "使用默认标签: $DEFAULT_TAG"
- docker buildx imagetools inspect $DEFAULT_TAG
\ No newline at end of file
+ # 为每个标签创建多架构镜像
+ for tag in $(echo "${{ steps.meta.outputs.tags }}" | tr '\n' ' '); do
+ echo "Creating manifest for $tag"
+ docker buildx imagetools create -t $tag \
+ ${{ secrets.DOCKERHUB_USERNAME }}/maibot:amd64-${{ github.sha }} \
+ ${{ secrets.DOCKERHUB_USERNAME }}/maibot:arm64-${{ github.sha }}
+ done
\ No newline at end of file
From 080c862fcd3aab9aea24235b5bd7998dc9793531 Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 13:43:19 +0800
Subject: [PATCH 18/27] =?UTF-8?q?=E4=B8=8D=E8=AE=A9=E5=88=B7=E5=B1=8Flogge?=
=?UTF-8?q?r=E6=89=93=E6=89=B0=E5=88=B0=E6=99=BA=E7=B1=B3=E5=A1=94?=
=?UTF-8?q?=E5=A4=A7=E4=BA=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/heart_flow/subheartflow_manager.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/heart_flow/subheartflow_manager.py b/src/heart_flow/subheartflow_manager.py
index c074d29a..9d69a5a4 100644
--- a/src/heart_flow/subheartflow_manager.py
+++ b/src/heart_flow/subheartflow_manager.py
@@ -284,7 +284,7 @@ class SubHeartflowManager:
return # 如果不允许,直接返回
# --- 结束新增 ---
- logger.info(f"当前状态 ({current_state.value}) 可以在{focused_limit}个群 专注聊天")
+ logger.debug(f"当前状态 ({current_state.value}) 可以在{focused_limit}个群 专注聊天")
if focused_limit <= 0:
# logger.debug(f"{log_prefix} 当前状态 ({current_state.value}) 不允许 FOCUSED 子心流")
From 28eb827c5f5a604dd1265c3df2e5369beae7cede Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 13:55:21 +0800
Subject: [PATCH 19/27] =?UTF-8?q?=E5=88=86=E7=A6=BB=E5=B5=8C=E5=85=A5?=
=?UTF-8?q?=E5=90=91=E9=87=8F=E6=96=B9=E6=B3=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_processor.py | 105 +++++++++++++++----------------
1 file changed, 49 insertions(+), 56 deletions(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index b6e6b8b3..4efc513e 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -6,7 +6,7 @@ from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_
from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py
from src.config.config import global_config
from src.common.logger_manager import get_logger
-from ..chat.chat_stream import chat_manager
+from ..chat.chat_stream import ChatStream, chat_manager
from src.plugins.chat.utils import get_embedding
from src.common.database import db
from .pfc_manager import PFCManager
@@ -24,7 +24,7 @@ async def _handle_error(error: Exception, context: str, message: MessageRecv | N
if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message
raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取
if raw_msg_content:
- logger.error(f"相关消息原始内容: {raw_msg_content}")
+ logger.error(f"相关消息原始内容: {raw_msg_content}")
elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message
logger.error(f"相关消息原始内容: {message.raw_message}")
@@ -47,21 +47,10 @@ class PFCProcessor:
try:
# 1. 消息解析与初始化
message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv
- # 确保 message_obj.message_info 存在
- if not hasattr(message_obj, 'message_info'):
- logger.error("MessageRecv 对象缺少 message_info 属性。跳过处理。")
- return
groupinfo = getattr(message_obj.message_info, 'group_info', None)
userinfo = getattr(message_obj.message_info, 'user_info', None)
- if userinfo is None: # 确保 userinfo 存在
- logger.error("message_obj.message_info 中缺少 user_info。跳过处理。")
- return
- if not hasattr(userinfo, 'user_id'): # 确保 user_id 存在
- logger.error("userinfo 对象中缺少 user_id。跳过处理。")
- return
-
logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流")
chat = await chat_manager.get_or_create_stream(
platform=message_obj.message_info.platform,
@@ -73,7 +62,7 @@ class PFCProcessor:
# 2. 过滤检查
await message_obj.process() # 调用 MessageRecv 的异步 process 方法
if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \
- self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性
+ self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性
return
# 3. 消息存储 (保持原有调用)
@@ -82,49 +71,10 @@ class PFCProcessor:
await self.storage.store_message(message_obj, chat)
logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}")
- # === 新增:为已存储的消息生成嵌入并更新数据库文档 ===
- embedding_vector = None
- text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本
-
- # 在 storage.py 中,会对 processed_plain_text 进行一次过滤
- # 为了保持一致,我们也在这里应用相同的过滤逻辑
- # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性
- # 这里为了简单,我们先重复一次过滤逻辑
- pattern = r".*?|.*?|.*?"
- if text_for_embedding:
- filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL)
- else:
- filtered_text_for_embedding = ""
-
- if filtered_text_for_embedding and filtered_text_for_embedding.strip():
- try:
- # request_type 参数根据你的 get_embedding 函数实际需求来定
- embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory")
- if embedding_vector:
- logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。")
-
- # 更新数据库中的对应文档
- # 确保你有权限访问和操作 db 对象
- update_result = db.messages.update_one(
- {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id},
- {"$set": {"embedding_vector": embedding_vector}}
- )
- if update_result.modified_count > 0:
- logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。")
- elif update_result.matched_count > 0:
- logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。")
- else:
- logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。")
- else:
- logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。")
- except Exception as e_embed_update:
- logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True)
- else:
- logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。")
- # === 新增结束 ===
+ await self._update_embedding_vector(message_obj) # 明确传递 message_obj
# 4. 创建 PFC 聊天流
- await self._create_pfc_chat(message_obj)
+ await self._create_pfc_chat(message_obj, chat)
# 5. 日志记录
# 确保 message_obj.message_info.time 是 float 类型的时间戳
@@ -169,4 +119,47 @@ class PFCProcessor:
logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname
logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串
return True
- return False
\ No newline at end of file
+ return False
+
+ async def _update_embedding_vector(self, message_obj: MessageRecv, chat: ChatStream) -> None:
+ """更新消息的嵌入向量"""
+ # === 新增:为已存储的消息生成嵌入并更新数据库文档 ===
+ embedding_vector = None
+ text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本
+
+ # 在 storage.py 中,会对 processed_plain_text 进行一次过滤
+ # 为了保持一致,我们也在这里应用相同的过滤逻辑
+ # 当然,更优的做法是 store_message 返回过滤后的文本,或在 message_obj 中增加一个 filtered_processed_plain_text 属性
+ # 这里为了简单,我们先重复一次过滤逻辑
+ pattern = r".*?|.*?|.*?"
+ if text_for_embedding:
+ filtered_text_for_embedding = re.sub(pattern, "", text_for_embedding, flags=re.DOTALL)
+ else:
+ filtered_text_for_embedding = ""
+
+ if filtered_text_for_embedding and filtered_text_for_embedding.strip():
+ try:
+ # request_type 参数根据你的 get_embedding 函数实际需求来定
+ embedding_vector = await get_embedding(filtered_text_for_embedding, request_type="pfc_private_memory")
+ if embedding_vector:
+ logger.debug(f"成功为消息 ID '{message_obj.message_info.message_id}' 生成嵌入向量。")
+
+ # 更新数据库中的对应文档
+ # 确保你有权限访问和操作 db 对象
+ update_result = db.messages.update_one(
+ {"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id},
+ {"$set": {"embedding_vector": embedding_vector}}
+ )
+ if update_result.modified_count > 0:
+ logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。")
+ elif update_result.matched_count > 0:
+ logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。")
+ else:
+ logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。")
+ else:
+ logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。")
+ except Exception as e_embed_update:
+ logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True)
+ else:
+ logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。")
+ # === 新增结束 ===
\ No newline at end of file
From cd0a41dec69287c27e2b7ba8cd087e39a70345c9 Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:08:44 +0800
Subject: [PATCH 20/27] =?UTF-8?q?utils=20=E6=96=B9=E6=B3=95=E8=A7=A3?=
=?UTF-8?q?=E9=87=8A=E8=BF=98=E5=8E=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_utils.py | 39 ++++++++++++++++++++++++++----------
1 file changed, 28 insertions(+), 11 deletions(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 76dd6bc4..f57d59b3 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -63,9 +63,9 @@ async def find_most_relevant_historical_message(
log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)"
logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: "
- f"将使用时间上限 {effective_search_upper_limit} "
- f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) "
- f"进行历史消息锚点搜索。来源: {log_source_of_limit}")
+ f"将使用时间上限 {effective_search_upper_limit} "
+ f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) "
+ f"进行历史消息锚点搜索。来源: {log_source_of_limit}")
# --- [新代码结束] ---
pipeline = [
@@ -202,6 +202,13 @@ async def retrieve_contextual_info(
) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆
"""
检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。
+
+ Args:
+ text: 用于检索的上下文文本 (例如聊天记录)。
+ private_name: 私聊对象的名称,用于日志记录。
+
+ Returns:
+ Tuple[str, str]: (检索到的记忆字符串, 检索到的知识字符串)
"""
# 初始化返回值
retrieved_global_memory_str = "无相关全局记忆。"
@@ -294,9 +301,9 @@ async def retrieve_contextual_info(
log_earliest_time_str = str(current_short_term_history_earliest_time)
logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: "
- f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} "
- f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). "
- f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}")
+ f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} "
+ f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). "
+ f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}")
most_relevant_message_doc = await find_most_relevant_historical_message(
@@ -314,7 +321,7 @@ async def retrieve_contextual_info(
# 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证)
if anchor_time is not None and anchor_time >= final_search_upper_limit_time:
logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} "
- f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。")
+ f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。")
historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。"
# 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值
elif anchor_id and anchor_time is not None:
@@ -323,8 +330,8 @@ async def retrieve_contextual_info(
time_limit_for_context_window_after = final_search_upper_limit_time
logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window "
- f"with anchor_time: {anchor_time}, "
- f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}")
+ f"with anchor_time: {anchor_time}, "
+ f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}")
context_window_messages = await retrieve_chat_context_window(
chat_id=chat_id,
@@ -377,7 +384,17 @@ def get_items_from_json(
allow_array: bool = True,
) -> Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]:
"""从文本中提取JSON内容并获取指定字段
- (保持你原始 pfc_utils.py 中的此函数代码不变)
+
+ Args:
+ content: 包含JSON的文本
+ private_name: 私聊名称
+ *items: 要提取的字段名
+ default_values: 字段的默认值,格式为 {字段名: 默认值}
+ required_types: 字段的必需类型,格式为 {字段名: 类型}
+ allow_array: 是否允许解析JSON数组
+
+ Returns:
+ Tuple[bool, Union[Dict[str, Any], List[Dict[str, Any]]]]: (是否成功, 提取的字段字典或字典列表)
"""
cleaned_content = content.strip()
result: Union[Dict[str, Any], List[Dict[str, Any]]] = {}
@@ -540,7 +557,7 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment:
async def build_chat_history_text(observation_info: ObservationInfo, private_name: str) -> str:
- """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
+ """构建聊天历史记录文本 (包含未处理消息)"""
chat_history_text = ""
try:
if hasattr(observation_info, "chat_history_str") and observation_info.chat_history_str:
From 5aa04fa246e34fa9fd10e4e63f89bf6fd67ce4a3 Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:14:59 +0800
Subject: [PATCH 21/27] =?UTF-8?q?=E8=A7=84=E8=8C=83=E7=BC=A9=E8=BF=9B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/reply_generator.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index 9a184e52..f142e720 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -259,8 +259,8 @@ class ReplyGenerator:
# 导入 datetime (如果 reply_generator.py 文件顶部没有的话)
# from datetime import datetime # 通常建议放在文件顶部
logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: "
- f"{recent_history_start_time_for_exclusion} "
- f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})")
+ f"{recent_history_start_time_for_exclusion} "
+ f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})")
else:
logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。")
except (IndexError, KeyError, TypeError) as e:
From 027760517809a96acc40621978e4716316101318 Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:31:15 +0800
Subject: [PATCH 22/27] ruff
---
src/plugins/PFC/pfc_processor.py | 2 +-
src/plugins/PFC/pfc_utils.py | 57 +++++++++++++++++++-----------
src/plugins/PFC/reply_generator.py | 1 -
3 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index 4efc513e..03bdf641 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -1,6 +1,6 @@
import traceback
import re
-from typing import Any, Dict
+from typing import Any
from datetime import datetime # 确保导入 datetime
from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv
from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index f57d59b3..9d85f9f8 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -1,7 +1,6 @@
import traceback
import json
import re
-import asyncio # 确保导入 asyncio
import time
from datetime import datetime
from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
@@ -297,7 +296,7 @@ async def retrieve_contextual_info(
if current_short_term_history_earliest_time is not None:
try:
log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})"
- except:
+ except:
log_earliest_time_str = str(current_short_term_history_earliest_time)
logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: "
@@ -422,19 +421,25 @@ def get_items_from_json(
current_item_result[field] = item_json[field]
elif field not in default_result:
logger.warning(f"[私聊][{private_name}] JSON数组元素缺少必要字段 '{field}': {item_json}")
- valid_item = False; break
- if not valid_item: continue
+ valid_item = False
+ break
+ if not valid_item:
+ continue
if required_types:
for field, expected_type in required_types.items():
if field in current_item_result and not isinstance(current_item_result[field], expected_type):
logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}")
- valid_item = False; break
- if not valid_item: continue
+ valid_item = False
+ break
+ if not valid_item:
+ continue
for field in items:
if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip():
logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}")
- valid_item = False; break
- if valid_item: valid_items_list.append(current_item_result)
+ valid_item = False
+ break
+ if valid_item:
+ valid_items_list.append(current_item_result)
if valid_items_list:
logger.debug(f"[私聊][{private_name}] 成功解析JSON数组,包含 {len(valid_items_list)} 个有效项目。")
return True, valid_items_list
@@ -469,24 +474,31 @@ def get_items_from_json(
else:
logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...")
return False, default_result
- if not isinstance(result, dict): result = default_result.copy()
+ if not isinstance(result, dict):
+ result = default_result.copy()
valid_single_object = True
for item_field in items: # Renamed item to item_field
- if item_field in json_data: result[item_field] = json_data[item_field]
+ if item_field in json_data:
+ result[item_field] = json_data[item_field]
elif item_field not in default_result:
logger.error(f"[私聊][{private_name}] JSON对象缺少必要字段 '{item_field}'。JSON内容: {json_data}")
- valid_single_object = False; break
- if not valid_single_object: return False, default_result
+ valid_single_object = False
+ break
+ if not valid_single_object:
+ return False, default_result
if required_types:
for field, expected_type in required_types.items():
if field in result and not isinstance(result[field], expected_type):
logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})")
- valid_single_object = False; break
- if not valid_single_object: return False, default_result
+ valid_single_object = False
+ break
+ if not valid_single_object:
+ return False, default_result
for field in items:
if field in result and isinstance(result[field], str) and not result[field].strip():
logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 不能为空字符串")
- valid_single_object = False; break
+ valid_single_object = False
+ break
if valid_single_object:
logger.debug(f"[私聊][{private_name}] 成功解析并验证了单个JSON对象。")
return True, result
@@ -545,13 +557,18 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment:
# 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整
rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False)
high_value_count = len(rdict)
- if old_value > 700: value *= 3 / (high_value_count + 2)
- else: value *= 3 / (high_value_count + 3)
- elif value < 0: value = value * math.exp(old_value / 2000)
+ if old_value > 700:
+ value *= 3 / (high_value_count + 2)
+ else:
+ value *= 3 / (high_value_count + 3)
+ elif value < 0:
+ value = value * math.exp(old_value / 2000)
# else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0
else: # old_value < 0
- if value >= 0: value = value * math.exp(old_value / 2000)
- elif value < 0: value = value * math.cos(math.pi * old_value / 2000)
+ if value >= 0:
+ value = value * math.exp(old_value / 2000)
+ elif value < 0:
+ value = value * math.cos(math.pi * old_value / 2000)
# else: value = 0 # 你原始代码中没有这句
return value
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index f142e720..0a9089aa 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -1,5 +1,4 @@
import random
-import asyncio
from datetime import datetime
from .pfc_utils import retrieve_contextual_info
from typing import Optional
From facc4bbef0a21760d457be46535b9cc81869e08a Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:33:00 +0800
Subject: [PATCH 23/27] ruff
---
src/plugins/PFC/pfc_utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 9d85f9f8..19385855 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -296,7 +296,7 @@ async def retrieve_contextual_info(
if current_short_term_history_earliest_time is not None:
try:
log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})"
- except:
+ except Exception:
log_earliest_time_str = str(current_short_term_history_earliest_time)
logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: "
From 37822fb34762953fb221bcc525c0784865c52cb3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Sat, 10 May 2025 06:33:19 +0000
Subject: [PATCH 24/27] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?=
=?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_processor.py | 89 ++++----
src/plugins/PFC/pfc_utils.py | 331 +++++++++++++++++++----------
src/plugins/PFC/reply_generator.py | 70 +++---
3 files changed, 309 insertions(+), 181 deletions(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index 03bdf641..f706bffa 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -1,12 +1,12 @@
import traceback
import re
from typing import Any
-from datetime import datetime # 确保导入 datetime
-from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv
-from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py
+from datetime import datetime # 确保导入 datetime
+from maim_message import UserInfo # UserInfo 来自 maim_message 包 # 从 maim_message 导入 MessageRecv
+from src.plugins.chat.message import MessageRecv # MessageRecv 来自message.py
from src.config.config import global_config
from src.common.logger_manager import get_logger
-from ..chat.chat_stream import ChatStream, chat_manager
+from ..chat.chat_stream import ChatStream, chat_manager
from src.plugins.chat.utils import get_embedding
from src.common.database import db
from .pfc_manager import PFCManager
@@ -14,18 +14,22 @@ from .pfc_manager import PFCManager
logger = get_logger("pfc_processor")
-async def _handle_error(error: Exception, context: str, message: MessageRecv | None = None) -> None: # 明确 message 类型
+async def _handle_error(
+ error: Exception, context: str, message: MessageRecv | None = None
+) -> None: # 明确 message 类型
"""统一的错误处理函数
# ... (方法注释不变) ...
"""
logger.error(f"{context}: {error}")
logger.error(traceback.format_exc())
# 检查 message 是否 None 以及是否有 raw_message 属性
- if message and hasattr(message, 'message_info') and hasattr(message.message_info, 'raw_message'): # MessageRecv 结构可能没有直接的 raw_message
- raw_msg_content = getattr(message.message_info, 'raw_message', None) # 安全获取
+ if (
+ message and hasattr(message, "message_info") and hasattr(message.message_info, "raw_message")
+ ): # MessageRecv 结构可能没有直接的 raw_message
+ raw_msg_content = getattr(message.message_info, "raw_message", None) # 安全获取
if raw_msg_content:
logger.error(f"相关消息原始内容: {raw_msg_content}")
- elif message and hasattr(message, 'raw_message'): # 如果 MessageRecv 直接有 raw_message
+ elif message and hasattr(message, "raw_message"): # 如果 MessageRecv 直接有 raw_message
logger.error(f"相关消息原始内容: {message.raw_message}")
@@ -35,21 +39,22 @@ class PFCProcessor:
# MessageStorage() 的实例化位置和具体类是什么?
# 我们假设它来自 src.plugins.storage.storage
# 但由于我们不能修改那个文件,所以这里的 self.storage 将按原样使用
- from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解
+ from src.plugins.storage.storage import MessageStorage # 明确导入,以便类型提示和理解
+
self.storage: MessageStorage = MessageStorage()
self.pfc_manager = PFCManager.get_instance()
- async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict
+ async def process_message(self, message_data: dict[str, Any]) -> None: # 使用 dict[str, Any] 替代 Dict
"""处理接收到的原始消息数据
# ... (方法注释不变) ...
"""
- message_obj: MessageRecv | None = None # 初始化为 None,并明确类型
+ message_obj: MessageRecv | None = None # 初始化为 None,并明确类型
try:
# 1. 消息解析与初始化
- message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv
+ message_obj = MessageRecv(message_data) # 使用你提供的 message.py 中的 MessageRecv
- groupinfo = getattr(message_obj.message_info, 'group_info', None)
- userinfo = getattr(message_obj.message_info, 'user_info', None)
+ groupinfo = getattr(message_obj.message_info, "group_info", None)
+ userinfo = getattr(message_obj.message_info, "user_info", None)
logger.trace(f"准备为{userinfo.user_id}创建/获取聊天流")
chat = await chat_manager.get_or_create_stream(
@@ -57,12 +62,13 @@ class PFCProcessor:
user_info=userinfo,
group_info=groupinfo,
)
- message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法
+ message_obj.update_chat_stream(chat) # message.py 中 MessageRecv 有此方法
# 2. 过滤检查
- await message_obj.process() # 调用 MessageRecv 的异步 process 方法
- if self._check_ban_words(message_obj.processed_plain_text, userinfo) or \
- self._check_ban_regex(message_obj.raw_message, userinfo): # MessageRecv 有 raw_message 属性
+ await message_obj.process() # 调用 MessageRecv 的异步 process 方法
+ if self._check_ban_words(message_obj.processed_plain_text, userinfo) or self._check_ban_regex(
+ message_obj.raw_message, userinfo
+ ): # MessageRecv 有 raw_message 属性
return
# 3. 消息存储 (保持原有调用)
@@ -71,7 +77,7 @@ class PFCProcessor:
await self.storage.store_message(message_obj, chat)
logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}")
- await self._update_embedding_vector(message_obj) # 明确传递 message_obj
+ await self._update_embedding_vector(message_obj) # 明确传递 message_obj
# 4. 创建 PFC 聊天流
await self._create_pfc_chat(message_obj, chat)
@@ -81,43 +87,41 @@ class PFCProcessor:
current_time_display = datetime.fromtimestamp(float(message_obj.message_info.time)).strftime("%H:%M:%S")
# 确保 userinfo.user_nickname 存在
- user_nickname_display = getattr(userinfo, 'user_nickname', '未知用户')
+ user_nickname_display = getattr(userinfo, "user_nickname", "未知用户")
- logger.info(
- f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}"
- )
+ logger.info(f"[{current_time_display}][私聊]{user_nickname_display}: {message_obj.processed_plain_text}")
except Exception as e:
- await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj
+ await _handle_error(e, "消息处理失败", message_obj) # 传递 message_obj
- async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型
+ async def _create_pfc_chat(self, message: MessageRecv): # 明确 message 类型
try:
chat_id = str(message.chat_stream.stream_id)
- private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname
+ private_name = str(message.message_info.user_info.user_nickname) # 假设 UserInfo 有 user_nickname
if global_config.enable_pfc_chatting:
await self.pfc_manager.get_or_create_conversation(chat_id, private_name)
except Exception as e:
- logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True
+ logger.error(f"创建PFC聊天失败: {e}", exc_info=True) # 添加 exc_info=True
@staticmethod
- def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
+ def _check_ban_words(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
"""检查消息中是否包含过滤词"""
for word in global_config.ban_words:
if word in text:
- logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname
+ logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # 假设 UserInfo 有 user_nickname
logger.info(f"[过滤词识别]消息中含有{word},filtered")
return True
return False
@staticmethod
- def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
+ def _check_ban_regex(text: str, userinfo: UserInfo) -> bool: # 明确 userinfo 类型
"""检查消息是否匹配过滤正则表达式"""
for pattern in global_config.ban_msgs_regex:
- if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象
- logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname
- logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串
+ if pattern.search(text): # 假设 ban_msgs_regex 中的元素是已编译的正则对象
+ logger.info(f"[私聊]{userinfo.user_nickname}:{text}") # _nickname
+ logger.info(f"[正则表达式过滤]消息匹配到{pattern.pattern},filtered") # .pattern 获取原始表达式字符串
return True
return False
@@ -125,7 +129,7 @@ class PFCProcessor:
"""更新消息的嵌入向量"""
# === 新增:为已存储的消息生成嵌入并更新数据库文档 ===
embedding_vector = None
- text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本
+ text_for_embedding = message_obj.processed_plain_text # 使用处理后的纯文本
# 在 storage.py 中,会对 processed_plain_text 进行一次过滤
# 为了保持一致,我们也在这里应用相同的过滤逻辑
@@ -148,18 +152,25 @@ class PFCProcessor:
# 确保你有权限访问和操作 db 对象
update_result = db.messages.update_one(
{"message_id": message_obj.message_info.message_id, "chat_id": chat.stream_id},
- {"$set": {"embedding_vector": embedding_vector}}
+ {"$set": {"embedding_vector": embedding_vector}},
)
if update_result.modified_count > 0:
logger.info(f"成功为消息 ID '{message_obj.message_info.message_id}' 更新嵌入向量到数据库。")
elif update_result.matched_count > 0:
logger.warning(f"消息 ID '{message_obj.message_info.message_id}' 已存在嵌入向量或未作修改。")
else:
- logger.error(f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。")
+ logger.error(
+ f"未能找到消息 ID '{message_obj.message_info.message_id}' (chat_id: {chat.stream_id}) 来更新嵌入向量。可能是存储和更新之间存在延迟或问题。"
+ )
else:
- logger.warning(f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。")
+ logger.warning(
+ f"未能为消息 ID '{message_obj.message_info.message_id}' 的文本 '{filtered_text_for_embedding[:30]}...' 生成嵌入向量。"
+ )
except Exception as e_embed_update:
- logger.error(f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}", exc_info=True)
+ logger.error(
+ f"为消息 ID '{message_obj.message_info.message_id}' 生成嵌入或更新数据库时发生异常: {e_embed_update}",
+ exc_info=True,
+ )
else:
logger.debug(f"消息 ID '{message_obj.message_info.message_id}' 的过滤后纯文本为空,不生成或更新嵌入。")
- # === 新增结束 ===
\ No newline at end of file
+ # === 新增结束 ===
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 19385855..91c04ad5 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -3,35 +3,36 @@ import json
import re
import time
from datetime import datetime
-from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
+from typing import Dict, Any, Optional, Tuple, List, Union # 确保导入这些类型
from src.common.logger_manager import get_logger
from src.config.config import global_config
-from src.common.database import db # << 确认此路径
+from src.common.database import db # << 确认此路径
# --- 依赖于你项目结构的导入,请务必仔细检查并根据你的实际情况调整 ---
-from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径
-from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径
-from src.plugins.chat.utils import get_embedding # << 确认此路径
-from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径
+from src.plugins.memory_system.Hippocampus import HippocampusManager # << 确认此路径
+from src.plugins.heartFC_chat.heartflow_prompt_builder import prompt_builder # << 确认此路径
+from src.plugins.chat.utils import get_embedding # << 确认此路径
+from src.plugins.utils.chat_message_builder import build_readable_messages # << 确认此路径
# --- 依赖导入结束 ---
-from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py
-from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入)
-import math # 来自原始 pfc_utils.py
-from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入)
+from src.plugins.chat.chat_stream import ChatStream # 来自原始 pfc_utils.py
+from ..person_info.person_info import person_info_manager # 来自原始 pfc_utils.py (相对导入)
+import math # 来自原始 pfc_utils.py
+from .observation_info import ObservationInfo # 来自原始 pfc_utils.py (相对导入)
logger = get_logger("pfc_utils")
+
# ==============================================================================
# 新增:专门用于检索 PFC 私聊历史对话上下文的函数
# ==============================================================================
async def find_most_relevant_historical_message(
chat_id: str,
query_text: str,
- similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整
- absolute_search_time_limit: Optional[float] = None # 新增参数:排除最近多少秒内的消息(例如5分钟)
+ similarity_threshold: float = 0.3, # 相似度阈值,可以根据效果调整
+ absolute_search_time_limit: Optional[float] = None, # 新增参数:排除最近多少秒内的消息(例如5分钟)
) -> Optional[Dict[str, Any]]:
"""
根据查询文本,在指定 chat_id 的历史消息中查找最相关的消息。
@@ -51,20 +52,22 @@ async def find_most_relevant_historical_message(
effective_search_upper_limit: float
log_source_of_limit: str = ""
-
+
if absolute_search_time_limit is not None:
effective_search_upper_limit = absolute_search_time_limit
log_source_of_limit = "传入的绝对时间上限"
else:
# 如果没有传入绝对时间上限,可以设置一个默认的回退逻辑
- fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时
+ fallback_exclude_seconds = getattr(global_config, "pfc_historical_fallback_exclude_seconds", 7200) # 默认2小时
effective_search_upper_limit = time.time() - fallback_exclude_seconds
log_source_of_limit = f"回退逻辑 (排除最近 {fallback_exclude_seconds} 秒)"
-
- logger.debug(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: "
- f"将使用时间上限 {effective_search_upper_limit} "
- f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) "
- f"进行历史消息锚点搜索。来源: {log_source_of_limit}")
+
+ logger.debug(
+ f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: "
+ f"将使用时间上限 {effective_search_upper_limit} "
+ f"(可读: {datetime.fromtimestamp(effective_search_upper_limit).strftime('%Y-%m-%d %H:%M:%S')}) "
+ f"进行历史消息锚点搜索。来源: {log_source_of_limit}"
+ )
# --- [新代码结束] ---
pipeline = [
@@ -72,14 +75,46 @@ async def find_most_relevant_historical_message(
"$match": {
"chat_id": chat_id,
"embedding_vector": {"$exists": True, "$ne": None, "$not": {"$size": 0}},
- "time": {"$lt": effective_search_upper_limit} # <--- 使用新的 effective_search_upper_limit
+ "time": {"$lt": effective_search_upper_limit}, # <--- 使用新的 effective_search_upper_limit
}
},
{
"$addFields": {
- "dotProduct": {"$reduce": {"input": {"$range": [0, {"$size": "$embedding_vector"}]}, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": [{"$arrayElemAt": ["$embedding_vector", "$$this"]}, {"$arrayElemAt": [query_embedding, "$$this"]}]}]}}},
- "queryVecMagnitude": {"$sqrt": {"$reduce": {"input": query_embedding, "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}},
- "docVecMagnitude": {"$sqrt": {"$reduce": {"input": "$embedding_vector", "initialValue": 0, "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]}}}}
+ "dotProduct": {
+ "$reduce": {
+ "input": {"$range": [0, {"$size": "$embedding_vector"}]},
+ "initialValue": 0,
+ "in": {
+ "$add": [
+ "$$value",
+ {
+ "$multiply": [
+ {"$arrayElemAt": ["$embedding_vector", "$$this"]},
+ {"$arrayElemAt": [query_embedding, "$$this"]},
+ ]
+ },
+ ]
+ },
+ }
+ },
+ "queryVecMagnitude": {
+ "$sqrt": {
+ "$reduce": {
+ "input": query_embedding,
+ "initialValue": 0,
+ "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]},
+ }
+ }
+ },
+ "docVecMagnitude": {
+ "$sqrt": {
+ "$reduce": {
+ "input": "$embedding_vector",
+ "initialValue": 0,
+ "in": {"$add": ["$$value", {"$multiply": ["$$this", "$$this"]}]},
+ }
+ }
+ },
}
},
{
@@ -88,7 +123,7 @@ async def find_most_relevant_historical_message(
"$cond": [
{"$and": [{"$gt": ["$queryVecMagnitude", 0]}, {"$gt": ["$docVecMagnitude", 0]}]},
{"$divide": ["$dotProduct", {"$multiply": ["$queryVecMagnitude", "$docVecMagnitude"]}]},
- 0
+ 0,
]
}
}
@@ -96,26 +131,44 @@ async def find_most_relevant_historical_message(
{"$match": {"similarity": {"$gte": similarity_threshold}}},
{"$sort": {"similarity": -1}},
{"$limit": 1},
- {"$project": {"_id": 0, "message_id": 1, "time": 1, "chat_id": 1, "user_info": 1, "processed_plain_text": 1, "similarity": 1}} # 可以不返回 embedding_vector 节省带宽
+ {
+ "$project": {
+ "_id": 0,
+ "message_id": 1,
+ "time": 1,
+ "chat_id": 1,
+ "user_info": 1,
+ "processed_plain_text": 1,
+ "similarity": 1,
+ }
+ }, # 可以不返回 embedding_vector 节省带宽
]
try:
# --- 确定性修改:同步执行聚合和结果转换 ---
- cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor
- results = list(cursor) # 直接将 CommandCursor 转换为列表
+ cursor = db.messages.aggregate(pipeline) # PyMongo 的 aggregate 返回一个 CommandCursor
+ results = list(cursor) # 直接将 CommandCursor 转换为列表
if not results:
- logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。")
+ logger.info(
+ f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,未能找到任何与 '{query_text[:30]}...' 相关的历史消息。"
+ )
else:
- logger.info(f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:")
- for res_msg in results:
- msg_time_readable = datetime.fromtimestamp(res_msg.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')
- logger.info(f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text','')[:50]}...'")
+ logger.info(
+ f"[{chat_id}] (私聊历史) find_most_relevant_historical_message: 在时间点 {effective_search_upper_limit} 之前,找到了 {len(results)} 条候选历史消息。最相关的一条是:"
+ )
+ for res_msg in results:
+ msg_time_readable = datetime.fromtimestamp(res_msg.get("time", 0)).strftime("%Y-%m-%d %H:%M:%S")
+ logger.info(
+ f" - MsgID: {res_msg.get('message_id')}, Time: {msg_time_readable} (原始: {res_msg.get('time')}), Sim: {res_msg.get('similarity'):.4f}, Text: '{res_msg.get('processed_plain_text', '')[:50]}...'"
+ )
# --- [修改结束] ---
# --- 修改结束 ---
if results and len(results) > 0:
most_similar_message = results[0]
- logger.info(f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}")
+ logger.info(
+ f"[{chat_id}] (私聊历史)找到最相关消息 ID: {most_similar_message.get('message_id')}, 相似度: {most_similar_message.get('similarity'):.4f}"
+ )
return most_similar_message
else:
logger.debug(f"[{chat_id}] (私聊历史)未找到相似度超过 {similarity_threshold} 的相关消息。")
@@ -124,13 +177,14 @@ async def find_most_relevant_historical_message(
logger.error(f"[{chat_id}] (私聊历史)在数据库中检索时出错: {e}", exc_info=True)
return None
+
async def retrieve_chat_context_window(
chat_id: str,
anchor_message_id: str,
anchor_message_time: float,
excluded_time_threshold_for_window: float,
window_size_before: int = 7,
- window_size_after: int = 7
+ window_size_after: int = 7,
) -> List[Dict[str, Any]]:
"""
以某条消息为锚点,获取其前后的聊天记录形成一个上下文窗口。
@@ -138,33 +192,50 @@ async def retrieve_chat_context_window(
if not anchor_message_id or anchor_message_time is None:
return []
- context_messages: List[Dict[str, Any]] = [] # 明确类型
- logger.debug(f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口...")
+ context_messages: List[Dict[str, Any]] = [] # 明确类型
+ logger.debug(
+ f"[{chat_id}] (私聊历史)准备以消息 ID '{anchor_message_id}' (时间: {anchor_message_time}) 为锚点,获取上下文窗口..."
+ )
try:
# --- 同步执行 find_one 和 find ---
anchor_message = db.messages.find_one({"message_id": anchor_message_id, "chat_id": chat_id})
- messages_before_cursor = db.messages.find(
- {"chat_id": chat_id, "time": {"$lt": anchor_message_time}}
- ).sort("time", -1).limit(window_size_before)
+ messages_before_cursor = (
+ db.messages.find({"chat_id": chat_id, "time": {"$lt": anchor_message_time}})
+ .sort("time", -1)
+ .limit(window_size_before)
+ )
messages_before = list(messages_before_cursor)
messages_before.reverse()
# --- 新增日志 ---
- logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}")
- logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):")
+ logger.debug(
+ f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Anchor Time: {anchor_message_time}, Excluded Window End Time: {excluded_time_threshold_for_window}"
+ )
+ logger.debug(
+ f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages BEFORE anchor ({len(messages_before)}):"
+ )
for msg_b in messages_before:
- logger.debug(f" - Time: {datetime.fromtimestamp(msg_b.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text','')[:30]}...'")
+ logger.debug(
+ f" - Time: {datetime.fromtimestamp(msg_b.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_b.get('processed_plain_text', '')[:30]}...'"
+ )
- messages_after_cursor = db.messages.find(
- {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}}
- ).sort("time", 1).limit(window_size_after)
+ messages_after_cursor = (
+ db.messages.find(
+ {"chat_id": chat_id, "time": {"$gt": anchor_message_time, "$lt": excluded_time_threshold_for_window}}
+ )
+ .sort("time", 1)
+ .limit(window_size_after)
+ )
messages_after = list(messages_after_cursor)
# --- 新增日志 ---
- logger.debug(f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):")
+ logger.debug(
+ f"[{chat_id}] (私聊历史) retrieve_chat_context_window: Messages AFTER anchor ({len(messages_after)}):"
+ )
for msg_a in messages_after:
- logger.debug(f" - Time: {datetime.fromtimestamp(msg_a.get('time',0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text','')[:30]}...'")
-
+ logger.debug(
+ f" - Time: {datetime.fromtimestamp(msg_a.get('time', 0)).strftime('%Y-%m-%d %H:%M:%S')}, Text: '{msg_a.get('processed_plain_text', '')[:30]}...'"
+ )
if messages_before:
context_messages.extend(messages_before)
@@ -173,32 +244,35 @@ async def retrieve_chat_context_window(
context_messages.append(anchor_message)
if messages_after:
context_messages.extend(messages_after)
-
- final_window: List[Dict[str, Any]] = [] # 明确类型
- seen_ids: set[str] = set() # 明确类型
+
+ final_window: List[Dict[str, Any]] = [] # 明确类型
+ seen_ids: set[str] = set() # 明确类型
for msg in context_messages:
msg_id = msg.get("message_id")
- if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在
+ if msg_id and msg_id not in seen_ids: # 确保 msg_id 存在
final_window.append(msg)
seen_ids.add(msg_id)
-
+
final_window.sort(key=lambda m: m.get("time", 0))
- logger.info(f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。")
+ logger.info(
+ f"[{chat_id}] (私聊历史)为锚点 '{anchor_message_id}' 构建了包含 {len(final_window)} 条消息的上下文窗口。"
+ )
return final_window
except Exception as e:
logger.error(f"[{chat_id}] (私聊历史)获取消息 ID '{anchor_message_id}' 的上下文窗口时出错: {e}", exc_info=True)
return []
+
# ==============================================================================
# 修改后的 retrieve_contextual_info 函数
# ==============================================================================
async def retrieve_contextual_info(
- text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录)
- private_name: str, # 用于日志
- chat_id: str, # 用于特定私聊历史的检索
+ text: str, # 用于全局记忆和知识检索的主查询文本 (通常是短期聊天记录)
+ private_name: str, # 用于日志
+ chat_id: str, # 用于特定私聊历史的检索
historical_chat_query_text: Optional[str] = None,
- current_short_term_history_earliest_time: Optional[float] = None # <--- 新增参数
-) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆
+ current_short_term_history_earliest_time: Optional[float] = None, # <--- 新增参数
+) -> Tuple[str, str, str]: # 返回: 全局记忆, 知识, 私聊历史回忆
"""
检索三种类型的上下文信息:全局压缩记忆、知识库知识、当前私聊的特定历史对话。
@@ -222,9 +296,9 @@ async def retrieve_contextual_info(
related_memory = await HippocampusManager.get_instance().get_memory_from_text(
text=text,
max_memory_num=2,
- max_memory_length=2,
+ max_memory_length=2,
max_depth=3,
- fast_retrieval=False,
+ fast_retrieval=False,
)
if related_memory:
temp_global_memory_info = ""
@@ -233,7 +307,7 @@ async def retrieve_contextual_info(
temp_global_memory_info += str(memory_item[1]) + "\n"
elif isinstance(memory_item, str):
temp_global_memory_info += memory_item + "\n"
-
+
if temp_global_memory_info.strip():
retrieved_global_memory_str = f"你回忆起一些相关的全局记忆:\n{temp_global_memory_info.strip()}\n(以上是你的全局记忆,供参考)\n"
global_memory_log_msg = f"自动检索到全局压缩记忆: {temp_global_memory_info.strip()[:100]}..."
@@ -250,7 +324,6 @@ async def retrieve_contextual_info(
else:
logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过全局压缩记忆检索。")
-
# --- 2. 相关知识检索 (来自 prompt_builder) ---
# (保持你原始 pfc_utils.py 中这部分的逻辑基本不变)
knowledge_log_msg = f"开始知识检索 (基于文本: '{text[:30]}...')"
@@ -260,8 +333,8 @@ async def retrieve_contextual_info(
message=text,
threshold=0.38,
)
- if knowledge_result and knowledge_result.strip(): # 确保结果不为空
- retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装
+ if knowledge_result and knowledge_result.strip(): # 确保结果不为空
+ retrieved_knowledge_str = knowledge_result # 直接使用返回结果,如果需要也可以包装
knowledge_log_msg = f"自动检索到相关知识: {knowledge_result[:100]}..."
else:
knowledge_log_msg = "知识检索返回为空。"
@@ -274,9 +347,10 @@ async def retrieve_contextual_info(
else:
logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无有效主查询文本,跳过知识检索。")
-
# --- 3. 当前私聊的特定历史对话上下文检索 ---
- query_for_historical_chat = historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None
+ query_for_historical_chat = (
+ historical_chat_query_text if historical_chat_query_text and historical_chat_query_text.strip() else None
+ )
# historical_chat_log_msg 的初始化可以移到 try 块之后,根据实际情况赋值
if query_for_historical_chat:
@@ -284,13 +358,15 @@ async def retrieve_contextual_info(
# ---- [新代码] 计算最终的、严格的搜索时间上限 ----
# 1. 设置一个基础的、较大的时间回溯窗口,例如2小时 (7200秒)
# 这个值可以从全局配置读取,如果没配置则使用默认值
- default_search_exclude_seconds = getattr(global_config, "pfc_historical_search_default_exclude_seconds", 7200) # 默认2小时
+ default_search_exclude_seconds = getattr(
+ global_config, "pfc_historical_search_default_exclude_seconds", 7200
+ ) # 默认2小时
base_excluded_time_limit = time.time() - default_search_exclude_seconds
-
+
final_search_upper_limit_time = base_excluded_time_limit
if current_short_term_history_earliest_time is not None:
# 我们希望找到的消息严格早于 short_term_history 的开始,减去一个小量确保不包含边界
- limit_from_short_term = current_short_term_history_earliest_time - 0.001
+ limit_from_short_term = current_short_term_history_earliest_time - 0.001
final_search_upper_limit_time = min(base_excluded_time_limit, limit_from_short_term)
log_earliest_time_str = "未提供"
if current_short_term_history_earliest_time is not None:
@@ -298,55 +374,60 @@ async def retrieve_contextual_info(
log_earliest_time_str = f"{current_short_term_history_earliest_time} (即 {datetime.fromtimestamp(current_short_term_history_earliest_time).strftime('%Y-%m-%d %H:%M:%S')})"
except Exception:
log_earliest_time_str = str(current_short_term_history_earliest_time)
-
- logger.debug(f"[{private_name}] (私聊历史) retrieve_contextual_info: "
- f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} "
- f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). "
- f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}")
-
+
+ logger.debug(
+ f"[{private_name}] (私聊历史) retrieve_contextual_info: "
+ f"最终用于历史搜索的时间上限: {final_search_upper_limit_time} "
+ f"(可读: {datetime.fromtimestamp(final_search_upper_limit_time).strftime('%Y-%m-%d %H:%M:%S')}). "
+ f"基于默认排除 {default_search_exclude_seconds}s 和 '最近记录'片段开始时间: {log_earliest_time_str}"
+ )
most_relevant_message_doc = await find_most_relevant_historical_message(
chat_id=chat_id,
query_text=query_for_historical_chat,
- similarity_threshold=0.5, # 您可以调整这个
+ similarity_threshold=0.5, # 您可以调整这个
# exclude_recent_seconds 不再直接使用,而是传递计算好的绝对时间上限
- absolute_search_time_limit=final_search_upper_limit_time # <--- 传递计算好的绝对时间上限
+ absolute_search_time_limit=final_search_upper_limit_time, # <--- 传递计算好的绝对时间上限
)
-
+
if most_relevant_message_doc:
anchor_id = most_relevant_message_doc.get("message_id")
- anchor_time = most_relevant_message_doc.get("time")
-
+ anchor_time = most_relevant_message_doc.get("time")
+
# 校验锚点时间是否真的符合我们的硬性上限 (理论上 find_most_relevant_historical_message 内部已保证)
if anchor_time is not None and anchor_time >= final_search_upper_limit_time:
- logger.warning(f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} "
- f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。")
+ logger.warning(
+ f"[{private_name}] (私聊历史) find_most_relevant_historical_message 返回的锚点时间 {anchor_time} "
+ f"并未严格小于最终搜索上限 {final_search_upper_limit_time}。可能导致重叠。跳过构建上下文。"
+ )
historical_chat_log_msg = "检索到的锚点不符合最终时间要求,可能导致重叠。"
# 直接进入下一个分支 (else),使得 retrieved_historical_chat_str 保持默认值
elif anchor_id and anchor_time is not None:
# 构建上下文窗口时,其“未来”消息的上限也应该是 final_search_upper_limit_time
# 因为我们不希望历史回忆的上下文窗口延伸到“最近聊天记录”的范围内或更近
- time_limit_for_context_window_after = final_search_upper_limit_time
-
- logger.debug(f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window "
- f"with anchor_time: {anchor_time}, "
- f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}")
+ time_limit_for_context_window_after = final_search_upper_limit_time
+
+ logger.debug(
+ f"[{private_name}] (私聊历史) 调用 retrieve_chat_context_window "
+ f"with anchor_time: {anchor_time}, "
+ f"excluded_time_threshold_for_window: {time_limit_for_context_window_after}"
+ )
context_window_messages = await retrieve_chat_context_window(
chat_id=chat_id,
anchor_message_id=anchor_id,
- anchor_message_time=anchor_time,
+ anchor_message_time=anchor_time,
excluded_time_threshold_for_window=time_limit_for_context_window_after,
window_size_before=7,
- window_size_after=7
+ window_size_after=7,
)
if context_window_messages:
formatted_window_str = await build_readable_messages(
context_window_messages,
- replace_bot_name=False, # 在回忆中,保留原始发送者名称
+ replace_bot_name=False, # 在回忆中,保留原始发送者名称
merge_messages=False,
- timestamp_mode="relative", # 可以选择 'absolute' 或 'none'
- read_mark=0.0
+ timestamp_mode="relative", # 可以选择 'absolute' 或 'none'
+ read_mark=0.0,
)
if formatted_window_str and formatted_window_str.strip():
retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n"
@@ -359,14 +440,18 @@ async def retrieve_contextual_info(
historical_chat_log_msg = "检索到的最相关私聊历史消息文档缺少 message_id 或 time。"
else:
historical_chat_log_msg = "未找到足够相关的私聊历史对话消息。"
- logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}")
+ logger.debug(
+ f"[私聊][{private_name}] (retrieve_contextual_info) 私聊历史对话检索: {historical_chat_log_msg}"
+ )
except Exception as e:
logger.error(
f"[私聊][{private_name}] (retrieve_contextual_info) 检索私聊历史对话时出错: {e}\n{traceback.format_exc()}"
)
retrieved_historical_chat_str = "[检索私聊历史对话时出错]\n"
else:
- logger.debug(f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。")
+ logger.debug(
+ f"[私聊][{private_name}] (retrieve_contextual_info) 无专门的私聊历史查询文本,跳过私聊历史对话检索。"
+ )
return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str
@@ -410,13 +495,13 @@ def get_items_from_json(
json_array = json.loads(cleaned_content)
if isinstance(json_array, list):
valid_items_list: List[Dict[str, Any]] = []
- for item_json in json_array: # Renamed item to item_json to avoid conflict
+ for item_json in json_array: # Renamed item to item_json to avoid conflict
if not isinstance(item_json, dict):
logger.warning(f"[私聊][{private_name}] JSON数组中的元素不是字典: {item_json}")
continue
current_item_result = default_result.copy()
valid_item = True
- for field in items: # items is args from function signature
+ for field in items: # items is args from function signature
if field in item_json:
current_item_result[field] = item_json[field]
elif field not in default_result:
@@ -427,15 +512,25 @@ def get_items_from_json(
continue
if required_types:
for field, expected_type in required_types.items():
- if field in current_item_result and not isinstance(current_item_result[field], expected_type):
- logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}")
+ if field in current_item_result and not isinstance(
+ current_item_result[field], expected_type
+ ):
+ logger.warning(
+ f"[私聊][{private_name}] JSON数组元素字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(current_item_result[field]).__name__}): {item_json}"
+ )
valid_item = False
break
if not valid_item:
continue
for field in items:
- if field in current_item_result and isinstance(current_item_result[field], str) and not current_item_result[field].strip():
- logger.warning(f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}")
+ if (
+ field in current_item_result
+ and isinstance(current_item_result[field], str)
+ and not current_item_result[field].strip()
+ ):
+ logger.warning(
+ f"[私聊][{private_name}] JSON数组元素字段 '{field}' 不能为空字符串: {item_json}"
+ )
valid_item = False
break
if valid_item:
@@ -472,12 +567,14 @@ def get_items_from_json(
logger.error(f"[私聊][{private_name}] 正则提取的部分 '{potential_json_str[:100]}...' 无法解析为JSON。")
return False, default_result
else:
- logger.error(f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}...")
+ logger.error(
+ f"[私聊][{private_name}] 无法在返回内容中找到有效的JSON对象部分。原始内容: {cleaned_content[:100]}..."
+ )
return False, default_result
if not isinstance(result, dict):
result = default_result.copy()
valid_single_object = True
- for item_field in items: # Renamed item to item_field
+ for item_field in items: # Renamed item to item_field
if item_field in json_data:
result[item_field] = json_data[item_field]
elif item_field not in default_result:
@@ -489,7 +586,9 @@ def get_items_from_json(
if required_types:
for field, expected_type in required_types.items():
if field in result and not isinstance(result[field], expected_type):
- logger.error(f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})")
+ logger.error(
+ f"[私聊][{private_name}] JSON对象字段 '{field}' 类型错误 (应为 {expected_type.__name__}, 实际为 {type(result[field]).__name__})"
+ )
valid_single_object = False
break
if not valid_single_object:
@@ -507,7 +606,7 @@ def get_items_from_json(
async def get_person_id(private_name: str, chat_stream: ChatStream):
- """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
+ """(保持你原始 pfc_utils.py 中的此函数代码不变)"""
private_user_id_str: Optional[str] = None
private_platform_str: Optional[str] = None
# private_nickname_str = private_name # 这行在你提供的代码中没有被使用,可以考虑移除
@@ -516,7 +615,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream):
private_user_id_str = str(chat_stream.user_info.user_id)
private_platform_str = chat_stream.user_info.platform
logger.debug(
- f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name
+ f"[私聊][{private_name}] 从 ChatStream 获取到私聊对象信息: ID={private_user_id_str}, Platform={private_platform_str}, Name={private_name}" # 使用 private_name
)
# elif chat_stream.group_info is None and private_name: # 这个 elif 条件体为空,可以移除
# pass
@@ -547,7 +646,7 @@ async def get_person_id(private_name: str, chat_stream: ChatStream):
async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment: float) -> float:
- """ (保持你原始 pfc_utils.py 中的此函数代码不变) """
+ """(保持你原始 pfc_utils.py 中的此函数代码不变)"""
old_value = max(-1000, min(1000, old_value))
value = raw_adjustment
if old_value >= 0:
@@ -555,7 +654,9 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment:
value = value * math.cos(math.pi * old_value / 2000)
if old_value > 500:
# 确保 person_info_manager.get_specific_value_list 是异步的,如果是同步则需要调整
- rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False)
+ rdict = await person_info_manager.get_specific_value_list(
+ "relationship_value", lambda x: x > 700 if isinstance(x, (int, float)) else False
+ )
high_value_count = len(rdict)
if old_value > 700:
value *= 3 / (high_value_count + 2)
@@ -564,7 +665,7 @@ async def adjust_relationship_value_nonlinear(old_value: float, raw_adjustment:
elif value < 0:
value = value * math.exp(old_value / 2000)
# else: value = 0 # 你原始代码中没有这句,如果value为0,保持为0
- else: # old_value < 0
+ else: # old_value < 0
if value >= 0:
value = value * math.exp(old_value / 2000)
elif value < 0:
@@ -586,12 +687,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam
)
else:
chat_history_text = "还没有聊天记录。\n"
-
+
unread_count = getattr(observation_info, "new_messages_count", 0)
unread_messages = getattr(observation_info, "unprocessed_messages", [])
if unread_count > 0 and unread_messages:
- bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取
- if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤
+ bot_qq_str = str(global_config.BOT_QQ) if global_config.BOT_QQ else None # 安全获取
+ if bot_qq_str: # 仅当 bot_qq_str 有效时进行过滤
other_unread_messages = [
msg for msg in unread_messages if msg.get("user_info", {}).get("user_id") != bot_qq_str
]
@@ -599,12 +700,12 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam
if other_unread_count > 0:
new_messages_str = await build_readable_messages(
other_unread_messages,
- replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字
+ replace_bot_name=True, # 这里是未处理消息,可能不需要替换机器人名字
merge_messages=False,
timestamp_mode="relative",
read_mark=0.0,
)
- chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的
+ chat_history_text += f"\n{new_messages_str}\n------\n" # 原始代码是加在末尾的
else:
logger.warning(f"[私聊][{private_name}] BOT_QQ 未配置,无法准确过滤未读消息中的机器人自身消息。")
@@ -614,4 +715,4 @@ async def build_chat_history_text(observation_info: ObservationInfo, private_nam
except Exception as e:
logger.error(f"[私聊][{private_name}] 处理聊天记录时发生未知错误: {e}")
chat_history_text = "[处理聊天记录时出错]\n"
- return chat_history_text
\ No newline at end of file
+ return chat_history_text
diff --git a/src/plugins/PFC/reply_generator.py b/src/plugins/PFC/reply_generator.py
index 0a9089aa..7773bc08 100644
--- a/src/plugins/PFC/reply_generator.py
+++ b/src/plugins/PFC/reply_generator.py
@@ -226,7 +226,7 @@ class ReplyGenerator:
chat_history_for_prompt_builder: list = []
recent_history_start_time_for_exclusion: Optional[float] = None
-
+
# 我们需要知道 build_chat_history_text 函数大致会用 observation_info.chat_history 的多少条记录
# 或者 build_chat_history_text 内部的逻辑。
# 假设 build_chat_history_text 主要依赖 observation_info.chat_history_str,
@@ -238,7 +238,7 @@ class ReplyGenerator:
# 如果 observation_info.chat_history_str 是由 observation_info.py 中的 update_from_message 等方法维护的,
# 并且总是代表一个固定长度(比如最后30条)的聊天记录字符串,那么我们就需要从 observation_info.chat_history
# 取出这部分原始消息来确定起始时间。
-
+
# 我们先做一个合理的假设: “最近聊天记录” 字符串 chat_history_text 是基于
# observation_info.chat_history 的一个有限的尾部片段生成的。
# 假设这个片段的长度由 global_config.pfc_recent_history_display_count 控制,默认为20条。
@@ -249,25 +249,29 @@ class ReplyGenerator:
# 如果 observation_info.chat_history 长度小于 display_count,则取全部
start_index = max(0, len(observation_info.chat_history) - recent_history_display_count)
chat_history_for_prompt_builder = observation_info.chat_history[start_index:]
-
- if chat_history_for_prompt_builder: # 如果片段不为空
+
+ if chat_history_for_prompt_builder: # 如果片段不为空
try:
first_message_in_display_slice = chat_history_for_prompt_builder[0]
- recent_history_start_time_for_exclusion = first_message_in_display_slice.get('time')
+ recent_history_start_time_for_exclusion = first_message_in_display_slice.get("time")
if recent_history_start_time_for_exclusion:
# 导入 datetime (如果 reply_generator.py 文件顶部没有的话)
# from datetime import datetime # 通常建议放在文件顶部
- logger.debug(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: "
- f"{recent_history_start_time_for_exclusion} "
- f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})")
+ logger.debug(
+ f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段(共{len(chat_history_for_prompt_builder)}条)的最早时间戳: "
+ f"{recent_history_start_time_for_exclusion} "
+ f"(即 {datetime.fromtimestamp(recent_history_start_time_for_exclusion).strftime('%Y-%m-%d %H:%M:%S')})"
+ )
else:
logger.warning(f"[{self.private_name}] (ReplyGenerator) “最近聊天记录”片段的首条消息无时间戳。")
except (IndexError, KeyError, TypeError) as e:
logger.warning(f"[{self.private_name}] (ReplyGenerator) 获取“最近聊天记录”起始时间失败: {e}")
- recent_history_start_time_for_exclusion = None
+ recent_history_start_time_for_exclusion = None
else:
- logger.debug(f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。")
- # --- [新代码结束] ---
+ logger.debug(
+ f"[{self.private_name}] (ReplyGenerator) observation_info.chat_history 为空,无法确定“最近聊天记录”起始时间。"
+ )
+ # --- [新代码结束] ---
chat_history_text = await build_chat_history_text(observation_info, self.private_name)
@@ -278,28 +282,32 @@ class ReplyGenerator:
persona_text = f"你的名字是{self.name},{self.personality_info}。"
historical_chat_query = ""
- num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子
+ num_recent_messages_for_query = 3 # 例如,取最近3条作为查询引子
if observation_info.chat_history and len(observation_info.chat_history) > 0:
# 从 chat_history (已处理并存入 ObservationInfo 的历史) 中取最新N条
# 或者,如果 observation_info.unprocessed_messages 更能代表“当前上下文”,也可以考虑用它
# 我们先用 chat_history,因为它包含了双方的对话历史,可能更稳定
recent_messages_for_query_list = observation_info.chat_history[-num_recent_messages_for_query:]
-
+
# 将这些消息的文本内容合并
query_texts_list = []
for msg_dict in recent_messages_for_query_list:
text_content = msg_dict.get("processed_plain_text", "")
- if text_content.strip(): # 只添加有内容的文本
+ if text_content.strip(): # 只添加有内容的文本
# 可以选择是否添加发送者信息到查询文本中,例如:
# sender_nickname = msg_dict.get("user_info", {}).get("user_nickname", "用户")
# query_texts_list.append(f"{sender_nickname}: {text_content}")
- query_texts_list.append(text_content) # 简单合并文本内容
-
+ query_texts_list.append(text_content) # 简单合并文本内容
+
if query_texts_list:
historical_chat_query = " ".join(query_texts_list).strip()
- logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'")
+ logger.debug(
+ f"[私聊][{self.private_name}] (ReplyGenerator) 生成的私聊历史查询文本 (最近{num_recent_messages_for_query}条): '{historical_chat_query[:100]}...'"
+ )
else:
- logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。")
+ logger.debug(
+ f"[私聊][{self.private_name}] (ReplyGenerator) 最近{num_recent_messages_for_query}条消息无有效文本内容,不进行私聊历史查询。"
+ )
else:
logger.debug(f"[私聊][{self.private_name}] (ReplyGenerator) 无聊天历史可用于生成私聊历史查询文本。")
@@ -316,13 +324,13 @@ class ReplyGenerator:
(
retrieved_global_memory_str,
retrieved_knowledge_str,
- retrieved_historical_chat_str # << 新增接收私聊历史回忆
+ retrieved_historical_chat_str, # << 新增接收私聊历史回忆
) = await retrieve_contextual_info(
- text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识
+ text=retrieval_context_for_global_and_knowledge, # 用于全局记忆和知识
private_name=self.private_name,
- chat_id=current_chat_id, # << 传递 chat_id
- historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本
- current_short_term_history_earliest_time=recent_history_start_time_for_exclusion # <--- 新增传递的参数
+ chat_id=current_chat_id, # << 传递 chat_id
+ historical_chat_query_text=historical_chat_query, # << 传递专门的查询文本
+ current_short_term_history_earliest_time=recent_history_start_time_for_exclusion, # <--- 新增传递的参数
)
# === 调用修改结束 ===
@@ -394,10 +402,18 @@ class ReplyGenerator:
base_format_params = {
"persona_text": persona_text,
"goals_str": goals_str,
- "chat_history_text": chat_history_text if chat_history_text.strip() else "还没有聊天记录。", # 当前短期历史
- "retrieved_global_memory_str": retrieved_global_memory_str if retrieved_global_memory_str.strip() else "无相关全局记忆。",
- "retrieved_knowledge_str": retrieved_knowledge_str if retrieved_knowledge_str.strip() else "无相关知识。",
- "retrieved_historical_chat_str": retrieved_historical_chat_str if retrieved_historical_chat_str.strip() else "无相关私聊历史回忆。", # << 新增
+ "chat_history_text": chat_history_text
+ if chat_history_text.strip()
+ else "还没有聊天记录。", # 当前短期历史
+ "retrieved_global_memory_str": retrieved_global_memory_str
+ if retrieved_global_memory_str.strip()
+ else "无相关全局记忆。",
+ "retrieved_knowledge_str": retrieved_knowledge_str
+ if retrieved_knowledge_str.strip()
+ else "无相关知识。",
+ "retrieved_historical_chat_str": retrieved_historical_chat_str
+ if retrieved_historical_chat_str.strip()
+ else "无相关私聊历史回忆。", # << 新增
"last_rejection_info": last_rejection_info_str,
"current_time_str": current_time_value,
"sender_name": sender_name_str,
From cb392c1981fbf06ffc5d53a2e57bfe4cf3bba31f Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:44:09 +0800
Subject: [PATCH 25/27] =?UTF-8?q?=E5=AE=8C=E6=95=B4=E4=BC=A0=E5=8F=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_processor.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index f706bffa..926ae821 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -77,7 +77,7 @@ class PFCProcessor:
await self.storage.store_message(message_obj, chat)
logger.trace(f"存储成功 (初步): {message_obj.processed_plain_text}")
- await self._update_embedding_vector(message_obj) # 明确传递 message_obj
+ await self._update_embedding_vector(message_obj, chat) # 明确传递 message_obj
# 4. 创建 PFC 聊天流
await self._create_pfc_chat(message_obj, chat)
From aa3568ecb2e58469506273e31223da86ef4b7173 Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 14:47:16 +0800
Subject: [PATCH 26/27] =?UTF-8?q?=E6=88=91=E6=98=AF=E5=82=BB=E9=80=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_processor.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/plugins/PFC/pfc_processor.py b/src/plugins/PFC/pfc_processor.py
index 926ae821..ea9ac4df 100644
--- a/src/plugins/PFC/pfc_processor.py
+++ b/src/plugins/PFC/pfc_processor.py
@@ -80,7 +80,7 @@ class PFCProcessor:
await self._update_embedding_vector(message_obj, chat) # 明确传递 message_obj
# 4. 创建 PFC 聊天流
- await self._create_pfc_chat(message_obj, chat)
+ await self._create_pfc_chat(message_obj)
# 5. 日志记录
# 确保 message_obj.message_info.time 是 float 类型的时间戳
From 7ec716f0e1036caf6a615b6048f3ca04a2119f8c Mon Sep 17 00:00:00 2001
From: Bakadax
Date: Sat, 10 May 2025 17:27:54 +0800
Subject: [PATCH 27/27] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8A=A5=E9=94=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
src/plugins/PFC/pfc_utils.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/plugins/PFC/pfc_utils.py b/src/plugins/PFC/pfc_utils.py
index 91c04ad5..3710bae0 100644
--- a/src/plugins/PFC/pfc_utils.py
+++ b/src/plugins/PFC/pfc_utils.py
@@ -432,6 +432,7 @@ async def retrieve_contextual_info(
if formatted_window_str and formatted_window_str.strip():
retrieved_historical_chat_str = f"你回忆起一段与当前对话相关的历史聊天:\n------\n{formatted_window_str.strip()}\n------\n(以上是针对本次私聊的回忆,供参考)\n"
historical_chat_log_msg = f"自动检索到相关私聊历史片段 (锚点ID: {anchor_id}, 相似度: {most_relevant_message_doc.get('similarity'):.3f})"
+ return retrieved_global_memory_str, retrieved_knowledge_str, retrieved_historical_chat_str
else:
historical_chat_log_msg = "检索到的私聊历史对话窗口格式化后为空。"
else: