From 3f5f41a58b715c273130875b9898eb55c0e89bf6 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 14:48:02 +0800 Subject: [PATCH 01/58] =?UTF-8?q?=E6=9A=82=E6=97=B6=E6=8F=90=E4=BA=A4?= =?UTF-8?q?=E4=B8=80=E4=B8=8B=EF=BC=8C=E5=85=88=E5=8E=BB=E5=93=81=E9=89=B4?= =?UTF-8?q?pfc=E7=9F=A5=E8=AF=86=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/config.py | 40 ++ src/plugins/group_nickname/nickname_mapper.py | 163 +++++++ .../group_nickname/nickname_processor.py | 301 +++++++++++++ src/plugins/group_nickname/nickname_utils.py | 126 ++++++ src/plugins/heartFC_chat/heartFC_chat.py | 124 +++++- .../person_info/relationship_manager.py | 413 ++++++++++-------- 6 files changed, 976 insertions(+), 191 deletions(-) create mode 100644 src/plugins/group_nickname/config.py create mode 100644 src/plugins/group_nickname/nickname_mapper.py create mode 100644 src/plugins/group_nickname/nickname_processor.py create mode 100644 src/plugins/group_nickname/nickname_utils.py diff --git a/src/plugins/group_nickname/config.py b/src/plugins/group_nickname/config.py new file mode 100644 index 00000000..35fff354 --- /dev/null +++ b/src/plugins/group_nickname/config.py @@ -0,0 +1,40 @@ +# GroupNickname/config.py +import threading + +# 功能总开关 +ENABLE_NICKNAME_MAPPING = True # 设置为 False 可完全禁用此功能 + +# --- LLM 相关配置 (示例,你需要根据实际情况修改) --- +# 用于绰号映射分析的 LLM 模型配置 +LLM_MODEL_NICKNAME_MAPPING = { + "model_name": "your_llm_model_for_mapping", # 替换成你用于分析的模型名称 + "api_key": "YOUR_API_KEY", # 如果需要 + "base_url": "YOUR_API_BASE", # 如果需要 + "temperature": 0.5, + "max_tokens": 200, +} + +# --- 数据库相关配置 (如果需要独立配置) --- +# 例如,如果数据库连接信息不同或需要特定集合名称 +DB_COLLECTION_PERSON_INFO = "person_info" # 你的用户信息集合名称 + +# --- Prompt 注入配置 --- +MAX_NICKNAMES_IN_PROMPT = 10 # Prompt 中最多注入的绰号数量 +NICKNAME_PROBABILITY_SMOOTHING = 1 # 用于加权随机选择的平滑因子 (防止概率为0) + +# --- 进程控制 --- +NICKNAME_QUEUE_MAX_SIZE = 100 # 进程间通信队列的最大容量 +NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 # 映射进程在队列为空时的休眠时间(秒) + + +# --- 运行时状态 (用于安全停止进程) --- +_stop_event = threading.Event() + +def get_stop_event(): + """获取全局停止事件""" + return _stop_event + +def set_stop_event(): + """设置全局停止事件,通知子进程退出""" + _stop_event.set() + diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py new file mode 100644 index 00000000..ae6cf1a0 --- /dev/null +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -0,0 +1,163 @@ +# GroupNickname/nickname_mapper.py +import json +from typing import Dict, Any, Tuple, List, Optional +from src.common.logger_manager import get_logger # 假设你的日志管理器路径 +from src.plugins.models.utils_model import LLMRequest # 假设你的 LLM 请求工具路径 +from .config import LLM_MODEL_NICKNAME_MAPPING, ENABLE_NICKNAME_MAPPING + +logger = get_logger("nickname_mapper") + +# 初始化用于绰号映射的 LLM 实例 +# 注意:这里的初始化方式可能需要根据你的 LLMRequest 实现进行调整 +try: + # 尝试使用字典解包来传递参数 + llm_mapper = LLMRequest( + model=LLM_MODEL_NICKNAME_MAPPING.get("model_name", "default_model"), + temperature=LLM_MODEL_NICKNAME_MAPPING.get("temperature", 0.5), + max_tokens=LLM_MODEL_NICKNAME_MAPPING.get("max_tokens", 200), + api_key=LLM_MODEL_NICKNAME_MAPPING.get("api_key"), + base_url=LLM_MODEL_NICKNAME_MAPPING.get("base_url"), + request_type="nickname_mapping" # 定义一个请求类型用于区分 + ) + logger.info("Nickname mapping LLM initialized successfully.") +except Exception as e: + logger.error(f"Failed to initialize nickname mapping LLM: {e}", exc_info=True) + llm_mapper = None # 初始化失败则置为 None + +def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: + """ + 构建用于 LLM 分析绰号映射的 Prompt。 + + Args: + chat_history_str: 格式化后的聊天记录字符串。 + bot_reply: Bot 的回复内容。 + user_name_map: 用户 ID 到已知名称(如 person_name 或 nickname)的映射。 + + Returns: + str: 构建好的 Prompt。 + """ + user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) + + prompt = f""" +任务:分析以下聊天记录和 Bot 的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 + +已知用户信息: +{user_list_str} + +聊天记录: +--- +{chat_history_str} +--- + +Bot 最新回复: +{bot_reply} + +分析要求: +1. 识别聊天记录和 Bot 回复中出现的可能是用户绰号的词语。 +2. 判断这些绰号是否能明确地指向某个特定的用户 ID。一个绰号必须在上下文中清晰地与某个发言人或被提及的人关联起来。 +3. 如果能建立可靠的一一映射关系,请输出一个 JSON 对象,格式如下: + {{ + "is_exist": true, + "data": {{ + "用户ID_A": "绰号_A", + "用户ID_B": "绰号_B" + }} + }} + 其中 "data" 字段的键是用户的 ID,值是对应的绰号。只包含你能确认映射关系的绰号。 +4. 如果无法建立任何可靠的一一映射关系(例如,绰号指代不明、没有出现绰号、或无法确认绰号与用户的关联),请输出 JSON 对象: + {{ + "is_exist": false + }} +5. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 + +输出: +""" + return prompt + +async def analyze_chat_for_nicknames( + chat_history_str: str, + bot_reply: str, + user_name_map: Dict[str, str] +) -> Dict[str, Any]: + """ + 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 + + Args: + chat_history_str: 格式化后的聊天记录字符串。 + bot_reply: Bot 的回复内容。 + user_name_map: 用户 ID 到已知名称(如 person_name 或 nickname)的映射。 + + Returns: + Dict[str, Any]: 分析结果,格式为 { "is_exist": bool, "data": Optional[Dict[str, str]] }。 + 如果出错,返回 {"is_exist": False}。 + """ + if not ENABLE_NICKNAME_MAPPING: + logger.debug("Nickname mapping feature is disabled.") + return {"is_exist": False} + + if llm_mapper is None: + logger.error("Nickname mapping LLM is not initialized. Cannot perform analysis.") + return {"is_exist": False} + + prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) + logger.debug(f"Nickname mapping prompt built:\n{prompt}") # 调试日志 + + try: + # --- 调用 LLM --- + # 注意:这里的调用方式需要根据你的 LLMRequest 实现进行调整 + # 可能需要使用 generate_response_sync 或其他同步方法,因为这将在独立进程中运行 + # 或者如果 LLMRequest 支持异步,确保在异步环境中调用 + # response_content, _, _ = await llm_mapper.generate_response(prompt) + + # 假设 llm_mapper 有一个同步的 generate 方法或在异步环境中调用 + # 这里暂时使用 await,如果你的 LLMRequest 不支持,需要修改 + response_content, _, _ = await llm_mapper.generate_response(prompt) + + + logger.debug(f"LLM raw response for nickname mapping: {response_content}") + + # --- 解析 LLM 响应 --- + if not response_content: + logger.warning("LLM returned empty content for nickname mapping.") + return {"is_exist": False} + + # 尝试去除可能的代码块标记 + response_content = response_content.strip() + if response_content.startswith("```json"): + response_content = response_content[7:] + if response_content.endswith("```"): + response_content = response_content[:-3] + response_content = response_content.strip() + + try: + result = json.loads(response_content) + # 基本验证 + if isinstance(result, dict) and "is_exist" in result: + if result["is_exist"] is True: + if "data" in result and isinstance(result["data"], dict): + # 过滤掉 data 为空的情况 + if not result["data"]: + logger.debug("LLM indicated is_exist=True but data is empty. Treating as False.") + return {"is_exist": False} + logger.info(f"Nickname mapping found: {result['data']}") + return {"is_exist": True, "data": result["data"]} + else: + logger.warning("LLM response format error: is_exist is True but 'data' is missing or not a dict.") + return {"is_exist": False} + elif result["is_exist"] is False: + logger.info("No reliable nickname mapping found by LLM.") + return {"is_exist": False} + else: + logger.warning("LLM response format error: 'is_exist' is not a boolean.") + return {"is_exist": False} + else: + logger.warning("LLM response format error: Missing 'is_exist' key or not a dict.") + return {"is_exist": False} + except json.JSONDecodeError as json_err: + logger.error(f"Failed to parse LLM response as JSON: {json_err}\nRaw response: {response_content}") + return {"is_exist": False} + + except Exception as e: + logger.error(f"Error during nickname mapping LLM call or processing: {e}", exc_info=True) + return {"is_exist": False} + diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py new file mode 100644 index 00000000..76c4abb5 --- /dev/null +++ b/src/plugins/group_nickname/nickname_processor.py @@ -0,0 +1,301 @@ +# GroupNickname/nickname_processor.py +import asyncio +import time +import traceback +# 明确导入 Event 和 Queue +from multiprocessing import Process, Queue as mpQueue +# 尝试从 synchronize 导入 Event +from multiprocessing.synchronize import Event as mpEvent +from typing import Dict, Any, Tuple, Optional, List + +from pymongo import MongoClient, UpdateOne +from pymongo.errors import ConnectionFailure, OperationFailure + +# 假设你的项目结构允许这样导入 +try: + from src.common.logger_manager import get_logger + from src.config.config import global_config + from .config import ( + ENABLE_NICKNAME_MAPPING, DB_COLLECTION_PERSON_INFO, + NICKNAME_QUEUE_MAX_SIZE, NICKNAME_PROCESS_SLEEP_INTERVAL, + get_stop_event, set_stop_event + ) + from .nickname_mapper import analyze_chat_for_nicknames +except ImportError: + # 提供备选导入路径或记录错误,以便调试 + print("Error: Failed to import necessary modules. Please check your project structure and PYTHONPATH.") + # 在无法导入时,定义临时的 get_logger 以避免 NameError,但这只是权宜之计 + import logging + def get_logger(name): + return logging.getLogger(name) + # 定义临时的全局配置,这同样是权宜之计 + class MockGlobalConfig: + mongodb_uri = "mongodb://localhost:27017/" # 示例 URI + mongodb_database = "your_db_name" # 示例数据库名 + global_config = MockGlobalConfig() + # 定义临时的配置变量 + ENABLE_NICKNAME_MAPPING = True + DB_COLLECTION_PERSON_INFO = "person_info" + NICKNAME_QUEUE_MAX_SIZE = 100 + NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 + # 使用导入的 mpEvent + _stop_event_internal = mpEvent() + def get_stop_event(): return _stop_event_internal + def set_stop_event(): _stop_event_internal.set() + # 定义临时的 analyze_chat_for_nicknames + async def analyze_chat_for_nicknames(*args, **kwargs): return {"is_exist": False} + + +logger = get_logger("nickname_processor") + +# --- 数据库连接 --- +mongo_client: Optional[MongoClient] = None +person_info_collection = None + +def _initialize_db(): + """初始化数据库连接(在子进程中调用)""" + global mongo_client, person_info_collection + if mongo_client is None: + try: + mongo_uri = global_config.mongodb_uri + if not mongo_uri: + raise ValueError("MongoDB URI not found in global config.") + + mongo_client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) + mongo_client.admin.command('ping') + db = mongo_client[global_config.mongodb_database] + person_info_collection = db[DB_COLLECTION_PERSON_INFO] + logger.info("Nickname processor: Database connection initialized successfully.") + except (ConnectionFailure, ValueError, OperationFailure) as e: + logger.error(f"Nickname processor: Failed to initialize database connection: {e}", exc_info=True) + mongo_client = None + person_info_collection = None + except Exception as e: + logger.error(f"Nickname processor: An unexpected error occurred during DB initialization: {e}", exc_info=True) + mongo_client = None + person_info_collection = None + + +def _close_db(): + """关闭数据库连接""" + global mongo_client + if mongo_client: + try: + mongo_client.close() + logger.info("Nickname processor: Database connection closed.") + except Exception as e: + logger.error(f"Nickname processor: Error closing database connection: {e}", exc_info=True) + finally: + mongo_client = None + + +# --- 数据库更新逻辑 --- +async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): + """ + 更新数据库中用户的群组绰号计数。 + + Args: + group_id (str): 群组 ID。 + nickname_map (Dict[str, str]): 需要更新的 {用户ID: 绰号} 映射。 + """ + if not person_info_collection: + logger.error("Database collection is not initialized. Cannot update nickname counts.") + return + if not nickname_map: + logger.debug("Empty nickname map provided for update.") + return + + logger.info(f"Attempting to update nickname counts for group '{group_id}' with map: {nickname_map}") + + for user_id, nickname in nickname_map.items(): + if not user_id or not nickname: + logger.warning(f"Skipping invalid entry in nickname map: user_id='{user_id}', nickname='{nickname}'") + continue + + group_id_str = str(group_id) # 确保是字符串 + + try: + # a. 确保用户文档存在 group_nickname 字段且为 list + person_info_collection.update_one( + {"person_id": user_id}, + {"$setOnInsert": {"group_nickname": []}}, # 如果字段不存在则创建为空列表 + upsert=True + ) + + # b. 确保特定 group_id 的条目存在 + update_result = person_info_collection.update_one( + {"person_id": user_id, f"group_nickname.{group_id_str}": {"$exists": False}}, + {"$push": {"group_nickname": {group_id_str: []}}} # 如果不存在则添加 + ) + if update_result.modified_count > 0: + logger.debug(f"Added group entry for group '{group_id_str}' for user '{user_id}'.") + + # c. 确保特定 nickname 存在于 group_id 的数组中,并增加计数 + update_result = person_info_collection.update_one( + { + "person_id": user_id, + "group_nickname": { + "$elemMatch": { + group_id_str: {"$elemMatch": {nickname: {"$exists": True}}} + } + } + }, + {"$inc": {f"group_nickname.$[group].$[nick].{nickname}": 1}}, + array_filters=[ + {f"group.{group_id_str}": {"$exists": True}}, + {f"nick.{nickname}": {"$exists": True}} + ] + ) + + if update_result.matched_count == 0: + # nickname 不存在,添加 nickname 并设置次数为 1 + add_nick_result = person_info_collection.update_one( + {"person_id": user_id, f"group_nickname.{group_id_str}": {"$exists": True}}, + {"$push": {f"group_nickname.$[group].{group_id_str}": {nickname: 1}}}, + array_filters=[{f"group.{group_id_str}": {"$exists": True}}] + ) + if add_nick_result.modified_count > 0: + logger.debug(f"Added nickname '{nickname}' with count 1 for user '{user_id}' in group '{group_id_str}'.") + else: + logger.warning(f"Failed to add nickname '{nickname}' for user '{user_id}' in group '{group_id_str}'. Update result: {add_nick_result.raw_result}") + + elif update_result.modified_count > 0: + logger.debug(f"Incremented count for nickname '{nickname}' for user '{user_id}' in group '{group_id_str}'.") + else: + logger.warning(f"Nickname increment operation matched but did not modify for user '{user_id}', nickname '{nickname}'. Update result: {update_result.raw_result}") + + except OperationFailure as op_err: + logger.error(f"Database operation failed for user {user_id}, group {group_id_str}, nickname {nickname}: {op_err}", exc_info=True) + except Exception as e: + logger.error(f"Unexpected error updating nickname for user {user_id}, group {group_id_str}, nickname {nickname}: {e}", exc_info=True) + + +# --- 队列和进程 --- +# 使用明确导入的类型 +nickname_queue: mpQueue[Tuple[str, str, str, Dict[str, str]]] = mpQueue(maxsize=NICKNAME_QUEUE_MAX_SIZE) +_nickname_process: Optional[Process] = None + +async def add_to_nickname_queue( + chat_history_str: str, + bot_reply: str, + group_id: Optional[str], # 群聊时需要 + user_name_map: Dict[str, str] # 用户ID到名字的映射 +): + """将需要分析的数据放入队列。""" + if not ENABLE_NICKNAME_MAPPING: + return + + if group_id is None: + logger.debug("Skipping nickname mapping for private chat.") + return # 私聊暂时不处理绰号映射 + + try: + item = (chat_history_str, bot_reply, str(group_id), user_name_map) # 确保 group_id 是字符串 + # 使用 put_nowait,如果队列满则会抛出 Full 异常 + nickname_queue.put_nowait(item) + logger.debug(f"Added item to nickname queue for group {group_id}.") + # 捕获 queue.Full 异常 + except Exception as e: + # 检查异常类型是否为队列满(需要导入 queue 模块或处理 Full 异常) + # from queue import Full # 如果 nickname_queue 是 asyncio.Queue + # if isinstance(e, Full): + # logger.warning("Nickname processing queue is full. Discarding new item.") + # else: + # logger.error(f"Error adding item to nickname queue: {e}", exc_info=True) + # 由于 multiprocessing.Queue 的 Full 异常在不同环境下可能不同,这里暂时捕获通用异常 + logger.warning(f"Failed to add item to nickname queue (possibly full): {e}", exc_info=True) + + +# 使用从 synchronize 导入的 mpEvent +async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): # 使用 mpEvent + """独立进程中的主循环,处理队列任务。""" + _initialize_db() # 初始化数据库连接 + logger.info("Nickname processing loop started.") + + while not stop_event.is_set(): + try: + if not queue.empty(): + # 从队列获取任务 + chat_history_str, bot_reply, group_id, user_name_map = queue.get() + logger.debug(f"Processing nickname mapping task for group {group_id}...") + + # 调用 LLM 分析 + analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) + + # 如果找到映射,更新数据库 + if analysis_result.get("is_exist") and analysis_result.get("data"): + await update_nickname_counts(group_id, analysis_result["data"]) + + # 短暂 sleep 避免 CPU 占用过高 + await asyncio.sleep(0.05) # 稍微减少 sleep 时间 + + else: + # 队列为空时休眠 + await asyncio.sleep(NICKNAME_PROCESS_SLEEP_INTERVAL) + + except asyncio.CancelledError: + logger.info("Nickname processing loop cancelled.") + break # 响应取消请求 + except Exception as e: + logger.error(f"Error in nickname processing loop: {e}\n{traceback.format_exc()}") + # 发生错误时也休眠一下,防止快速连续出错 + await asyncio.sleep(5) + + _close_db() # 关闭数据库连接 + logger.info("Nickname processing loop finished.") + + +# 使用从 synchronize 导入的 mpEvent +def _run_processor_process(queue: mpQueue, stop_event: mpEvent): # 使用 mpEvent + """进程启动函数,运行异步循环。""" + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + loop.run_until_complete(_nickname_processing_loop(queue, stop_event)) + loop.close() + except Exception as e: + logger.error(f"Error running nickname processor process: {e}", exc_info=True) + +def start_nickname_processor(): + """启动绰号映射处理进程。""" + global _nickname_process + if not ENABLE_NICKNAME_MAPPING: + logger.info("Nickname mapping feature is disabled. Processor not started.") + return + + if _nickname_process is None or not _nickname_process.is_alive(): + logger.info("Starting nickname processor process...") + stop_event = get_stop_event() + stop_event.clear() + # 传递明确导入的类型 + _nickname_process = Process(target=_run_processor_process, args=(nickname_queue, stop_event), daemon=True) + _nickname_process.start() + logger.info(f"Nickname processor process started with PID: {_nickname_process.pid}") + else: + logger.warning("Nickname processor process is already running.") + +def stop_nickname_processor(): + """停止绰号映射处理进程。""" + global _nickname_process + if _nickname_process and _nickname_process.is_alive(): + logger.info("Stopping nickname processor process...") + set_stop_event() + try: + _nickname_process.join(timeout=10) + if _nickname_process.is_alive(): + logger.warning("Nickname processor process did not stop gracefully after 10 seconds. Terminating...") + _nickname_process.terminate() + _nickname_process.join(timeout=5) + except Exception as e: + logger.error(f"Error stopping nickname processor process: {e}", exc_info=True) + finally: + if _nickname_process and not _nickname_process.is_alive(): + logger.info("Nickname processor process stopped successfully.") + else: + logger.error("Failed to stop nickname processor process.") + _nickname_process = None + else: + logger.info("Nickname processor process is not running.") + +# 可以在应用启动时调用 start_nickname_processor() +# 在应用关闭时调用 stop_nickname_processor() diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py new file mode 100644 index 00000000..47373124 --- /dev/null +++ b/src/plugins/group_nickname/nickname_utils.py @@ -0,0 +1,126 @@ +# GroupNickname/nickname_utils.py +import random +from typing import List, Dict, Tuple, Optional +from src.common.logger_manager import get_logger +from .config import MAX_NICKNAMES_IN_PROMPT, NICKNAME_PROBABILITY_SMOOTHING + +logger = get_logger("nickname_utils") + +def select_nicknames_for_prompt( + all_nicknames_info: Dict[str, List[Dict[str, int]]] +) -> List[Tuple[str, str, int]]: + """ + 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 + + Args: + all_nicknames_info: 包含用户及其绰号信息的字典,格式为 + { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } + + Returns: + List[Tuple[str, str, int]]: 选中的绰号列表,每个元素为 (用户名, 绰号, 次数)。 + 按次数降序排序。 + """ + if not all_nicknames_info: + return [] + + candidates = [] + for user_name, nicknames in all_nicknames_info.items(): + if nicknames: + for nickname_entry in nicknames: + # nickname_entry 应该是 {"绰号": 次数} 格式 + if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: + nickname, count = list(nickname_entry.items())[0] + # 确保次数是正整数 + if isinstance(count, int) and count > 0: + # 添加平滑因子,避免概率为0,并让低频词也有机会 + weight = count + NICKNAME_PROBABILITY_SMOOTHING + candidates.append((user_name, nickname, count, weight)) + else: + logger.warning(f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping.") + else: + logger.warning(f"Invalid nickname entry format for user '{user_name}': {nickname_entry}. Skipping.") + + + if not candidates: + return [] + + # 计算总权重 + total_weight = sum(c[3] for c in candidates) + + if total_weight <= 0: + # 如果所有权重都无效或为0,则随机选择(或按次数选择) + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + selected = candidates[:MAX_NICKNAMES_IN_PROMPT] + else: + # 计算归一化概率 + probabilities = [c[3] / total_weight for c in candidates] + + # 使用概率分布进行加权随机选择(不重复) + num_to_select = min(MAX_NICKNAMES_IN_PROMPT, len(candidates)) + try: + # random.choices 允许重复,我们需要不重复的选择 + # 可以使用 numpy.random.choice 或手动实现不重复加权抽样 + # 这里用一个简化的方法:多次 choices 然后去重,直到达到数量或无法再选 + selected_indices = set() + selected = [] + attempts = 0 + max_attempts = num_to_select * 5 # 防止无限循环 + + while len(selected) < num_to_select and attempts < max_attempts: + # 每次只选一个,避免一次选多个时概率分布变化导致的问题 + chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] + if chosen_index not in selected_indices: + selected_indices.add(chosen_index) + selected.append(candidates[chosen_index]) + attempts += 1 + + # 如果尝试多次后仍未选够,补充出现次数最多的 + if len(selected) < num_to_select: + remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + needed = num_to_select - len(selected) + selected.extend(remaining_candidates[:needed]) + + except Exception as e: + logger.error(f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True) + # 出错时回退到选择次数最多的 N 个 + candidates.sort(key=lambda x: x[2], reverse=True) + selected = candidates[:MAX_NICKNAMES_IN_PROMPT] + + + # 格式化输出并按次数排序 + result = [(user, nick, count) for user, nick, count, _weight in selected] + result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + + logger.debug(f"Selected nicknames for prompt: {result}") + return result + + +def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, int]]) -> str: + """ + 将选中的绰号信息格式化为注入 Prompt 的字符串。 + + Args: + selected_nicknames: 选中的绰号列表 (用户名, 绰号, 次数)。 + + Returns: + str: 格式化后的字符串,如果列表为空则返回空字符串。 + """ + if not selected_nicknames: + return "" + + prompt_lines = ["以下是聊天记录中一些成员在本群的绰号信息(按常用度排序):"] + grouped_by_user: Dict[str, List[str]] = {} + + for user_name, nickname, _count in selected_nicknames: + if user_name not in grouped_by_user: + grouped_by_user[user_name] = [] + # 添加引号以区分绰号 + grouped_by_user[user_name].append(f'“{nickname}”') + + for user_name, nicknames in grouped_by_user.items(): + nicknames_str = "、".join(nicknames) + prompt_lines.append(f"{user_name},在本群有时被称为:{nicknames_str}") + + return "\n".join(prompt_lines) + "\n" # 末尾加换行符 + diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index c15c4f83..f5e1ddb7 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -21,13 +21,19 @@ from src.heart_flow.sub_mind import SubMind from src.heart_flow.observation import Observation from src.plugins.heartFC_chat.heartflow_prompt_builder import global_prompt_manager, prompt_builder import contextlib -from src.plugins.utils.chat_message_builder import num_new_messages_since +from src.plugins.utils.chat_message_builder import num_new_messages_since, get_raw_msg_before_timestamp_with_chat, build_readable_messages from src.plugins.heartFC_chat.heartFC_Cycleinfo import CycleInfo from .heartFC_sender import HeartFCSender from src.plugins.chat.utils import process_llm_response from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.moods.moods import MoodManager from src.individuality.individuality import Individuality +from src.plugins.person_info.relationship_manager import relationship_manager + +# --- 导入 GroupNickname 相关 --- +from src.plugins.group_nickname.config import ENABLE_NICKNAME_MAPPING # <--- 导入开关 +from src.plugins.group_nickname.nickname_processor import add_to_nickname_queue # <--- 导入队列添加函数 +# --- 结束导入 GroupNickname --- INITIAL_DURATION = 60.0 @@ -469,7 +475,6 @@ class HeartFChatting: return False, "" # execute:执行 - return await self._handle_action( action, reasoning, planner_result.get("emoji_query", ""), cycle_timers, planner_start_db_time ) @@ -509,11 +514,17 @@ class HeartFChatting: try: if action == "text_reply": - return await handler(reasoning, emoji_query, cycle_timers) + # 调用文本回复处理,它会返回 (bool, thinking_id) + success, thinking_id = await handler(reasoning, emoji_query, cycle_timers) + return success, thinking_id # 直接返回结果 elif action == "emoji_reply": - return await handler(reasoning, emoji_query), "" + # 调用表情回复处理,它只返回 bool + success = await handler(reasoning, emoji_query) + return success, "" # thinking_id 为空字符串 else: # no_reply - return await handler(reasoning, planner_start_db_time, cycle_timers), "" + # 调用不回复处理,它只返回 bool + success = await handler(reasoning, planner_start_db_time, cycle_timers) + return success, "" # thinking_id 为空字符串 except HeartFCError as e: logger.error(f"{self.log_prefix} 处理{action}时出错: {e}") # 出错时也重置计数器 @@ -530,6 +541,7 @@ class HeartFChatting: 2. 创建思考消息 3. 生成回复 4. 发送消息 + 5. [新增] 触发绰号分析 参数: reasoning: 回复原因 @@ -553,6 +565,7 @@ class HeartFChatting: if not thinking_id: raise PlannerError("无法创建思考消息") + reply = None # 初始化 reply try: # 生成回复 with Timer("生成回复", cycle_timers): @@ -566,7 +579,6 @@ class HeartFChatting: raise ReplierError("回复生成失败") # 发送消息 - with Timer("发送消息", cycle_timers): await self._sender( thinking_id=thinking_id, @@ -575,6 +587,11 @@ class HeartFChatting: send_emoji=emoji_query, ) + # --- [新增] 触发绰号分析 --- + # 在发送成功后(或至少尝试发送后)触发 + await self._trigger_nickname_analysis(anchor_message, reply) + # --- 结束触发 --- + return True, thinking_id except (ReplierError, SenderError) as e: @@ -682,6 +699,101 @@ class HeartFChatting: # 发生意外错误时,可以选择是否重置计数器,这里选择不重置 return False # 表示动作未成功 + # --- [修改] 触发绰号分析的函数 --- + async def _trigger_nickname_analysis(self, anchor_message: MessageRecv, reply: List[str]): + """ + 触发绰号分析任务,将相关数据放入处理队列。 + + Args: + anchor_message: 锚点消息对象。 + reply: Bot 生成的回复内容列表。 + """ + if not ENABLE_NICKNAME_MAPPING: + return # 如果功能未开启,则直接返回 + + if not anchor_message or not anchor_message.chat_stream or not anchor_message.chat_stream.group_info: + logger.debug(f"{self.log_prefix} Skipping nickname analysis: Not a group chat or invalid anchor.") + return # 仅在群聊中进行分析 + + try: + # 1. 获取原始消息列表 + history_limit = 30 # 例如,获取最近 30 条消息 + history_messages = get_raw_msg_before_timestamp_with_chat( + chat_id=anchor_message.chat_stream.stream_id, + timestamp=time.time(), # 获取当前时间点的历史 + limit=history_limit + ) + + # --- 使用 build_readable_messages 格式化历史记录 --- + chat_history_str = await build_readable_messages( + messages=history_messages, + replace_bot_name=True, # 在分析时也替换机器人名字,使其与 LLM 交互一致 + merge_messages=False, # 不合并,保留原始对话流 + timestamp_mode="relative", # 使用相对时间戳 + read_mark=0.0, # 不需要已读标记 + truncate=False # 获取完整内容进行分析 + ) + # --- 结束使用 build_readable_messages --- + + # 2. 获取 Bot 回复字符串 + bot_reply_str = " ".join(reply) + + # 3. 获取群号 + group_id = str(anchor_message.chat_stream.group_info.group_id) # 确保是字符串 + + # 4. 获取当前上下文中涉及的用户 ID 及其已知名称 + user_ids_in_history = set() + for msg in history_messages: + sender_id = msg.get('sender_id') + if sender_id: + user_ids_in_history.add(str(sender_id)) # 确保是字符串 + + user_name_map = {} + if user_ids_in_history: + platform = anchor_message.chat_stream.platform + # 尝试批量获取 person_name + # 假设 relationship_manager 有 get_person_names_batch(platform, user_ids) + try: + # 注意:你需要实现 get_person_names_batch 方法 + # names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) + # 这里暂时用单次获取代替,如果你的 relationship_manager 没有批量方法 + names_data = {} + for user_id in user_ids_in_history: + name = await relationship_manager.get_person_name(platform, user_id) + if name: + names_data[user_id] = name + + except AttributeError: + logger.warning("relationship_manager does not have get_person_names_batch method. Falling back to single lookups.") + names_data = {} + for user_id in user_ids_in_history: + name = await relationship_manager.get_person_name(platform, user_id) + if name: + names_data[user_id] = name + except Exception as e: + logger.error(f"Error getting person names: {e}", exc_info=True) + names_data = {} # 出错时置空 + + + for user_id in user_ids_in_history: + if user_id in names_data: + user_name_map[user_id] = names_data[user_id] + else: + # 回退查找 nickname + latest_nickname = next((m.get('sender_nickname') for m in reversed(history_messages) if str(m.get('sender_id')) == user_id), None) + if latest_nickname: + user_name_map[user_id] = latest_nickname + else: + user_name_map[user_id] = f"未知({user_id})" + + # 5. 添加到队列 + await add_to_nickname_queue(chat_history_str, bot_reply_str, group_id, user_name_map) + logger.debug(f"{self.log_prefix} Triggered nickname analysis for group {group_id}.") + + except Exception as e: + logger.error(f"{self.log_prefix} Error triggering nickname analysis: {e}", exc_info=True) + # --- 结束触发函数 --- + async def _wait_for_new_message(self, observation, planner_start_db_time: float, log_prefix: str) -> bool: """ 等待新消息 或 检测到关闭信号 diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index a25dd731..adf293ea 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -5,9 +5,7 @@ from bson.decimal128 import Decimal128 from .person_info import person_info_manager import time import random -# import re -# import traceback - +from typing import List, Dict, Any, Optional, Tuple # 确保导入了 List, Dict, Optional, Tuple logger = get_logger("relation") @@ -82,26 +80,155 @@ class RelationshipManager: @staticmethod async def is_qved_name(platform, user_id): - """判断是否认识某人""" + """判断是否已经命名""" person_id = person_info_manager.get_person_id(platform, user_id) - is_qved = await person_info_manager.has_one_field(person_id, "person_name") - old_name = await person_info_manager.get_value(person_id, "person_name") - # print(f"old_name: {old_name}") - # print(f"is_qved: {is_qved}") - if is_qved and old_name is not None: - return True - else: - return False + # 优化:直接检查 person_name 字段是否存在且不为 None 或空字符串 + person_name = await person_info_manager.get_value(person_id, "person_name") + return bool(person_name) # 如果 person_name 非空则返回 True + + @staticmethod + async def get_person_name(platform: str, user_id: str) -> Optional[str]: + """获取单个用户的 person_name""" + person_id = person_info_manager.get_person_id(platform, str(user_id)) # 确保 user_id 是字符串 + return await person_info_manager.get_value(person_id, "person_name") + + # --- [新增] 批量获取用户名称 --- + @staticmethod + async def get_person_names_batch(platform: str, user_ids: List[str]) -> Dict[str, str]: + """ + 批量获取多个用户的 person_name。 + + Args: + platform (str): 平台名称。 + user_ids (List[str]): 用户 ID 列表。 + + Returns: + Dict[str, str]: 映射 {user_id: person_name},只包含成功获取到名称的用户。 + """ + if not user_ids: + return {} + + person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] # 确保 uid 是字符串 + names_map = {} + try: + # 使用 $in 操作符批量查询 + cursor = person_info_manager.collection.find( + {"person_id": {"$in": person_ids}}, + {"_id": 0, "person_id": 1, "person_name": 1} # 只查询需要的字段 + ) + async for doc in cursor: + # 从 person_id 反向推导出原始 user_id + # 注意:这依赖于 get_person_id 的实现方式,假设它是 platform_userid 格式 + original_user_id = doc.get("person_id", "").split("_", 1)[-1] + person_name = doc.get("person_name") + if original_user_id and person_name: + names_map[original_user_id] = person_name + logger.debug(f"Batch get person names for {len(user_ids)} users, found {len(names_map)} names.") + except Exception as e: + logger.error(f"Error during batch get person names: {e}", exc_info=True) + return names_map + # --- 结束新增 --- + + # --- [新增] 批量获取用户群组绰号 --- + @staticmethod + async def get_users_group_nicknames(platform: str, user_ids: List[str], group_id: str) -> Dict[str, List[Dict[str, int]]]: + """ + 批量获取多个用户在指定群组的绰号信息。 + + Args: + platform (str): 平台名称。 + user_ids (List[str]): 用户 ID 列表。 + group_id (str): 群组 ID。 + + Returns: + Dict[str, List[Dict[str, int]]]: 映射 {person_name: [{"绰号A": 次数}, ...]} + 只包含成功获取到绰号信息的用户。 + 键是用户的 person_name。 + """ + if not user_ids or not group_id: + return {} + + person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] + nicknames_data = {} + group_id_str = str(group_id) # 确保 group_id 是字符串 + + try: + # 查询包含目标 person_id 且 group_nickname 字段存在的文档 + cursor = person_info_manager.collection.find( + { + "person_id": {"$in": person_ids}, + "group_nickname": {"$elemMatch": {group_id_str: {"$exists": True}}} # 确保该群组的条目存在 + }, + {"_id": 0, "person_id": 1, "person_name": 1, "group_nickname": 1} # 查询所需字段 + ) + + async for doc in cursor: + person_name = doc.get("person_name") + if not person_name: # 如果没有 person_name,则跳过此用户 + continue + + group_nicknames_list = doc.get("group_nickname", []) + user_group_nicknames = [] + # 遍历 group_nickname 列表,找到对应 group_id 的条目 + for group_entry in group_nicknames_list: + if group_id_str in group_entry and isinstance(group_entry[group_id_str], list): + # 提取该群组的绰号列表 [{"绰号": 次数}, ...] + user_group_nicknames = group_entry[group_id_str] + break # 找到后即可退出内层循环 + + if user_group_nicknames: # 确保列表非空 + # 过滤掉格式不正确的条目 + valid_nicknames = [] + for item in user_group_nicknames: + if isinstance(item, dict) and len(item) == 1: + key, value = list(item.items())[0] + if isinstance(key, str) and isinstance(value, int): + valid_nicknames.append(item) + else: + logger.warning(f"Invalid nickname format in DB for user {person_name}, group {group_id_str}: {item}") + else: + logger.warning(f"Invalid nickname entry format in DB for user {person_name}, group {group_id_str}: {item}") + + if valid_nicknames: + nicknames_data[person_name] = valid_nicknames # 使用 person_name 作为 key + + logger.debug(f"Batch get group nicknames for {len(user_ids)} users in group {group_id_str}, found data for {len(nicknames_data)} users.") + + except Exception as e: + logger.error(f"Error during batch get group nicknames: {e}", exc_info=True) + + return nicknames_data + # --- 结束新增 --- + @staticmethod async def first_knowing_some_one(platform, user_id, user_nickname, user_cardname, user_avatar): """判断是否认识某人""" person_id = person_info_manager.get_person_id(platform, user_id) - await person_info_manager.update_one_field(person_id, "nickname", user_nickname) - # await person_info_manager.update_one_field(person_id, "user_cardname", user_cardname) - # await person_info_manager.update_one_field(person_id, "user_avatar", user_avatar) + # 首次认识时,除了更新 nickname,也应该设置初始关系值等 + initial_data = { + "platform": platform, + "user_id": user_id, + "nickname": user_nickname, + "konw_time": int(time.time()), + "relationship_value": 0.0, # 设置初始关系值为 0 + "msg_interval": -1, # 初始消息间隔设为 -1 或其他标记 + "msg_interval_list": [], + "group_nickname": [] # 初始化为空列表 + } + # 使用 update_one 并结合 $setOnInsert 来避免覆盖已有数据 + await person_info_manager.collection.update_one( + {"person_id": person_id}, + { + "$set": {"nickname": user_nickname}, # 总是更新 nickname + "$setOnInsert": initial_data # 仅在插入新文档时设置这些初始值 + }, + upsert=True + ) + # 尝试获取或生成 person_name await person_info_manager.qv_person_name(person_id, user_nickname, user_cardname, user_avatar) + async def calculate_update_relationship_value(self, chat_stream: ChatStream, label: str, stance: str) -> tuple: """计算并变更关系值 新的关系值变更计算方式: @@ -135,216 +262,132 @@ class RelationshipManager: } person_id = person_info_manager.get_person_id(chat_stream.user_info.platform, chat_stream.user_info.user_id) - data = { + data = { # 这个 data 似乎是用于 setOnInsert 的,应该在 first_knowing 时处理 "platform": chat_stream.user_info.platform, "user_id": chat_stream.user_info.user_id, "nickname": chat_stream.user_info.user_nickname, "konw_time": int(time.time()), } old_value = await person_info_manager.get_value(person_id, "relationship_value") - old_value = self.ensure_float(old_value, person_id) + old_value = self.ensure_float(old_value, person_id) # 确保是 float - if old_value > 1000: - old_value = 1000 - elif old_value < -1000: - old_value = -1000 + # 限制旧值范围 + old_value = max(min(old_value, 1000), -1000) - value = valuedict[label] - if old_value >= 0: - if valuedict[label] >= 0 and stancedict[stance] != 2: - value = value * math.cos(math.pi * old_value / 2000) - if old_value > 500: - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) - high_value_count = len(rdict) - if old_value > 700: - value *= 3 / (high_value_count + 2) # 排除自己 - else: - value *= 3 / (high_value_count + 3) - elif valuedict[label] < 0 and stancedict[stance] != 0: - value = value * math.exp(old_value / 2000) - else: - value = 0 - elif old_value < 0: - if valuedict[label] >= 0 and stancedict[stance] != 2: - value = value * math.exp(old_value / 2000) - elif valuedict[label] < 0 and stancedict[stance] != 0: - value = value * math.cos(math.pi * old_value / 2000) - else: - value = 0 + value_change = 0.0 # 初始化变化量 + base_value = valuedict.get(label, 0.0) # 获取基础情绪值 + # 应用立场影响和关系值衰减/增强逻辑 + if base_value > 0 and stancedict.get(stance, 1) != 2: # 正面情绪且非反对 + value_change = base_value * math.cos(math.pi * old_value / 2000) + if old_value > 500: # 高关系值增长减缓 + rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) + high_value_count = len(rdict) + # 注意:这里的减缓因子可能需要调整 + value_change *= 3 / (high_value_count + (2 if old_value > 700 else 3)) + elif base_value < 0 and stancedict.get(stance, 1) != 0: # 负面情绪且非支持 + # 关系好时负面影响更大,关系差时负面影响减弱 + value_change = base_value * math.exp(old_value / 2000) if old_value >= 0 else base_value * math.cos(math.pi * old_value / 2000) + # else: 立场冲突或情绪平静,基础变化为 0 + + # 应用正反馈系统和情绪反馈 self.positive_feedback_sys(label, stance) - value = self.mood_feedback(value) + value_change = self.mood_feedback(value_change) # 应用当前情绪对关系变化的影响 + value_change = self.feedback_to_mood(value_change) # 应用连续反馈对关系变化的影响 - level_num = self.calculate_level_num(old_value + value) + new_value = old_value + value_change + # 再次限制新值范围 + new_value = max(min(new_value, 1000), -1000) + actual_change = new_value - old_value # 记录实际变化量 + + level_num = self.calculate_level_num(new_value) relationship_level = ["厌恶", "冷漠", "一般", "友好", "喜欢", "暧昧"] logger.info( - f"用户: {chat_stream.user_info.user_nickname}" + f"用户: {chat_stream.user_info.user_nickname} " f"当前关系: {relationship_level[level_num]}, " f"关系值: {old_value:.2f}, " - f"当前立场情感: {stance}-{label}, " - f"变更: {value:+.5f}" + f"立场情感: {stance}-{label}, " + f"变更: {actual_change:+.5f}, " + f"新值: {new_value:.2f}" ) - await person_info_manager.update_one_field(person_id, "relationship_value", old_value + value, data) + # 更新数据库,只更新 relationship_value + await person_info_manager.update_one_field(person_id, "relationship_value", new_value) - return chat_stream.user_info.user_nickname, value, relationship_level[level_num] + return chat_stream.user_info.user_nickname, actual_change, relationship_level[level_num] - async def calculate_update_relationship_value_with_reason( - self, chat_stream: ChatStream, label: str, stance: str, reason: str - ) -> tuple: - """计算并变更关系值 - 新的关系值变更计算方式: - 将关系值限定在-1000到1000 - 对于关系值的变更,期望: - 1.向两端逼近时会逐渐减缓 - 2.关系越差,改善越难,关系越好,恶化越容易 - 3.人维护关系的精力往往有限,所以当高关系值用户越多,对于中高关系值用户增长越慢 - 4.连续正面或负面情感会正反馈 - - 返回: - 用户昵称,变更值,变更后关系等级 - - """ - stancedict = { - "支持": 0, - "中立": 1, - "反对": 2, - } - - valuedict = { - "开心": 1.5, - "愤怒": -2.0, - "悲伤": -0.5, - "惊讶": 0.6, - "害羞": 2.0, - "平静": 0.3, - "恐惧": -1.5, - "厌恶": -1.0, - "困惑": 0.5, - } - - person_id = person_info_manager.get_person_id(chat_stream.user_info.platform, chat_stream.user_info.user_id) - data = { - "platform": chat_stream.user_info.platform, - "user_id": chat_stream.user_info.user_id, - "nickname": chat_stream.user_info.user_nickname, - "konw_time": int(time.time()), - } - old_value = await person_info_manager.get_value(person_id, "relationship_value") - old_value = self.ensure_float(old_value, person_id) - - if old_value > 1000: - old_value = 1000 - elif old_value < -1000: - old_value = -1000 - - value = valuedict[label] - if old_value >= 0: - if valuedict[label] >= 0 and stancedict[stance] != 2: - value = value * math.cos(math.pi * old_value / 2000) - if old_value > 500: - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) - high_value_count = len(rdict) - if old_value > 700: - value *= 3 / (high_value_count + 2) # 排除自己 - else: - value *= 3 / (high_value_count + 3) - elif valuedict[label] < 0 and stancedict[stance] != 0: - value = value * math.exp(old_value / 2000) - else: - value = 0 - elif old_value < 0: - if valuedict[label] >= 0 and stancedict[stance] != 2: - value = value * math.exp(old_value / 2000) - elif valuedict[label] < 0 and stancedict[stance] != 0: - value = value * math.cos(math.pi * old_value / 2000) - else: - value = 0 - - self.positive_feedback_sys(label, stance) - value = self.mood_feedback(value) - - level_num = self.calculate_level_num(old_value + value) - relationship_level = ["厌恶", "冷漠", "一般", "友好", "喜欢", "暧昧"] - logger.info( - f"用户: {chat_stream.user_info.user_nickname}" - f"当前关系: {relationship_level[level_num]}, " - f"关系值: {old_value:.2f}, " - f"当前立场情感: {stance}-{label}, " - f"变更: {value:+.5f}" - ) - - await person_info_manager.update_one_field(person_id, "relationship_value", old_value + value, data) - - return chat_stream.user_info.user_nickname, value, relationship_level[level_num] async def build_relationship_info(self, person, is_id: bool = False) -> str: + """构建用于 Prompt 的关系信息字符串""" if is_id: person_id = person + # 如果只有 person_id,需要反查 platform 和 user_id 来获取 person_name + # 这依赖于 person_id 的格式,假设是 platform_userid + try: + platform, user_id_str = person_id.split("_", 1) + person_name = await self.get_person_name(platform, user_id_str) + except ValueError: + logger.warning(f"Invalid person_id format for prompt building: {person_id}") + person_name = None else: - print(f"person: {person}") - person_id = person_info_manager.get_person_id(person[0], person[1]) - person_name = await person_info_manager.get_value(person_id, "person_name") - print(f"person_name: {person_name}") + platform, user_id, _ = person # 解包元组 + person_id = person_info_manager.get_person_id(platform, user_id) + person_name = await self.get_person_name(platform, user_id) + + if not person_name: + person_name = f"用户({person_id})" # 回退显示 ID + relationship_value = await person_info_manager.get_value(person_id, "relationship_value") + relationship_value = self.ensure_float(relationship_value, person_id) # 确保是 float level_num = self.calculate_level_num(relationship_value) - if level_num == 0 or level_num == 5: - relationship_level = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] - relation_prompt2_list = [ - "忽视的回应", - "冷淡回复", - "保持理性", - "愿意回复", - "积极回复", - "友善和包容的回复", - ] - return f"你{relationship_level[level_num]}{person_name},打算{relation_prompt2_list[level_num]}。\n" - elif level_num == 2: + # 定义关系等级和对应的行为描述 + relationship_levels = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] + relation_prompt_list = ["忽视的回应", "冷淡回复", "保持理性", "愿意回复", "积极回复", "友善和包容的回复"] + + # 根据等级和随机性决定是否输出及输出内容 + if level_num == 2: # "一般"关系不特别提示 return "" + elif level_num in [0, 5] or random.random() < 0.6: # 极好/极差 或 60% 概率 + # 修正索引,确保在列表范围内 + level_idx = max(0, min(level_num, len(relationship_levels) - 1)) + prompt_idx = max(0, min(level_num, len(relation_prompt_list) - 1)) + return f"你{relationship_levels[level_idx]}{person_name},打算{relation_prompt_list[prompt_idx]}。\n" else: - if random.random() < 0.6: - relationship_level = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] - relation_prompt2_list = [ - "忽视的回应", - "冷淡回复", - "保持理性", - "愿意回复", - "积极回复", - "友善和包容的回复", - ] - return f"你{relationship_level[level_num]}{person_name},打算{relation_prompt2_list[level_num]}。\n" - else: - return "" + return "" @staticmethod def calculate_level_num(relationship_value) -> int: """关系等级计算""" - if -1000 <= relationship_value < -227: - level_num = 0 - elif -227 <= relationship_value < -73: - level_num = 1 - elif -73 <= relationship_value < 227: - level_num = 2 - elif 227 <= relationship_value < 587: - level_num = 3 - elif 587 <= relationship_value < 900: - level_num = 4 - elif 900 <= relationship_value <= 1000: - level_num = 5 - else: - level_num = 5 if relationship_value > 1000 else 0 - return level_num + # 确保 value 是 float + try: + value = float(relationship_value.to_decimal() if isinstance(relationship_value, Decimal128) else relationship_value) + except (ValueError, TypeError, AttributeError): + value = 0.0 # 转换失败默认为 0 + + # 阈值判断 + if value < -227: return 0 + elif value < -73: return 1 + elif value < 227: return 2 + elif value < 587: return 3 + elif value < 900: return 4 + else: return 5 # >= 900 @staticmethod def ensure_float(value, person_id): """确保返回浮点数,转换失败返回0.0""" - if isinstance(value, float): - return value + if isinstance(value, (float, int)): # 直接处理 float 和 int + return float(value) try: + # 尝试处理 Decimal128 或其他可转换为 float 的类型 return float(value.to_decimal() if isinstance(value, Decimal128) else value) except (ValueError, TypeError, AttributeError): - logger.warning(f"[关系管理] {person_id}值转换失败(原始值:{value}),已重置为0") + logger.warning(f"[关系管理] {person_id} 值转换失败(原始值:{value}),已重置为0") + # 在转换失败时,尝试在数据库中将该字段重置为 0.0 + try: + person_info_manager.update_one_field(person_id, "relationship_value", 0.0) + except Exception as db_err: + logger.error(f"Failed to reset relationship_value for {person_id} in DB: {db_err}") return 0.0 From 561f0c81f8d28e11985ba923bee71fa285558b72 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 15:24:56 +0800 Subject: [PATCH 02/58] modified: src/plugins/person_info/relationship_manager.py --- .../person_info/relationship_manager.py | 50 +++---------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index adf293ea..e19b33c3 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -20,25 +20,13 @@ class RelationshipManager: def mood_manager(self): if self._mood_manager is None: from ..moods.moods import MoodManager # 延迟导入 - self._mood_manager = MoodManager.get_instance() return self._mood_manager def positive_feedback_sys(self, label: str, stance: str): """正反馈系统,通过正反馈系数增益情绪变化,根据情绪再影响关系变更""" - - positive_list = [ - "开心", - "惊讶", - "害羞", - ] - - negative_list = [ - "愤怒", - "悲伤", - "恐惧", - "厌恶", - ] + positive_list = ["开心", "惊讶", "害羞"] + negative_list = ["愤怒", "悲伤", "恐惧", "厌恶"] if label in positive_list: if 7 > self.positive_feedback_value >= 0: @@ -203,7 +191,7 @@ class RelationshipManager: @staticmethod async def first_knowing_some_one(platform, user_id, user_nickname, user_cardname, user_avatar): - """判断是否认识某人""" + """初次认识某人或更新信息""" person_id = person_info_manager.get_person_id(platform, user_id) # 首次认识时,除了更新 nickname,也应该设置初始关系值等 initial_data = { @@ -230,35 +218,11 @@ class RelationshipManager: async def calculate_update_relationship_value(self, chat_stream: ChatStream, label: str, stance: str) -> tuple: - """计算并变更关系值 - 新的关系值变更计算方式: - 将关系值限定在-1000到1000 - 对于关系值的变更,期望: - 1.向两端逼近时会逐渐减缓 - 2.关系越差,改善越难,关系越好,恶化越容易 - 3.人维护关系的精力往往有限,所以当高关系值用户越多,对于中高关系值用户增长越慢 - 4.连续正面或负面情感会正反馈 - - 返回: - 用户昵称,变更值,变更后关系等级 - - """ - stancedict = { - "支持": 0, - "中立": 1, - "反对": 2, - } - + """计算并变更关系值""" + stancedict = {"支持": 0, "中立": 1, "反对": 2} valuedict = { - "开心": 1.5, - "愤怒": -2.0, - "悲伤": -0.5, - "惊讶": 0.6, - "害羞": 2.0, - "平静": 0.3, - "恐惧": -1.5, - "厌恶": -1.0, - "困惑": 0.5, + "开心": 1.5, "愤怒": -2.0, "悲伤": -0.5, "惊讶": 0.6, "害羞": 2.0, + "平静": 0.3, "恐惧": -1.5, "厌恶": -1.0, "困惑": 0.5, } person_id = person_info_manager.get_person_id(chat_stream.user_info.platform, chat_stream.user_info.user_id) From 14b63958e4978fb244e5fa6b3d9478c195e5f4e2 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 15:33:32 +0800 Subject: [PATCH 03/58] =?UTF-8?q?=E9=BB=98=E8=AE=A4=E5=85=B3=E9=97=AD?= =?UTF-8?q?=E7=BB=B0=E5=8F=B7=E7=B3=BB=E7=BB=9F=EF=BC=8Cprompt=E6=B3=A8?= =?UTF-8?q?=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/config.py | 3 +- src/plugins/heartFC_chat/heartFC_chat.py | 11 +---- .../heartFC_chat/heartflow_prompt_builder.py | 41 +++++++++++++++++++ 3 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/plugins/group_nickname/config.py b/src/plugins/group_nickname/config.py index 35fff354..eb001419 100644 --- a/src/plugins/group_nickname/config.py +++ b/src/plugins/group_nickname/config.py @@ -1,8 +1,7 @@ -# GroupNickname/config.py import threading # 功能总开关 -ENABLE_NICKNAME_MAPPING = True # 设置为 False 可完全禁用此功能 +ENABLE_NICKNAME_MAPPING = False # 设置为 False 可完全禁用此功能 # --- LLM 相关配置 (示例,你需要根据实际情况修改) --- # 用于绰号映射分析的 LLM 模型配置 diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index f5e1ddb7..ab8f9605 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -751,17 +751,8 @@ class HeartFChatting: user_name_map = {} if user_ids_in_history: platform = anchor_message.chat_stream.platform - # 尝试批量获取 person_name - # 假设 relationship_manager 有 get_person_names_batch(platform, user_ids) try: - # 注意:你需要实现 get_person_names_batch 方法 - # names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) - # 这里暂时用单次获取代替,如果你的 relationship_manager 没有批量方法 - names_data = {} - for user_id in user_ids_in_history: - name = await relationship_manager.get_person_name(platform, user_id) - if name: - names_data[user_id] = name + names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) except AttributeError: logger.warning("relationship_manager does not have get_person_names_batch method. Falling back to single lookups.") diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index a0f266d6..4ea9f3e7 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -14,6 +14,9 @@ from ..moods.moods import MoodManager from ..memory_system.Hippocampus import HippocampusManager from ..schedule.schedule_generator import bot_schedule from ..knowledge.knowledge_lib import qa_manager +from src.plugins.group_nickname.config import ENABLE_NICKNAME_MAPPING +from src.plugins.group_nickname.nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection +from src.plugins.person_info.relationship_manager import relationship_manager logger = get_logger("prompt") @@ -222,6 +225,44 @@ class PromptBuilder: logger.debug("开始构建prompt") + # --- [修改] 注入绰号信息 --- + nickname_injection_str = "" + if ENABLE_NICKNAME_MAPPING and chat_stream.group_info: + try: + group_id = str(chat_stream.group_info.group_id) + # 提取上下文中的用户 ID (需要 message_list_before_now 变量在此可用) + # 假设 message_list_before_now 在此函数作用域内可用 + user_ids_in_context = set() + # !!! 注意: 确保 message_list_before_now 在这里是可访问的 !!! + # 如果不是,你需要从 chat_stream 或其他地方重新获取或传递它 + # 假设 message_list_before_now 存在 + if 'message_list_before_now' in locals() or 'message_list_before_now' in globals(): + for msg in message_list_before_now: + sender_id = msg.get('sender_id') + if sender_id: + user_ids_in_context.add(str(sender_id)) + else: + logger.warning("Variable 'message_list_before_now' not found for nickname injection in focus prompt.") + + + if user_ids_in_context: + platform = chat_stream.platform + # --- 调用批量获取群组绰号的方法 --- + all_nicknames_data = await relationship_manager.get_users_group_nicknames( + platform, list(user_ids_in_context), group_id + ) + # --- 结束调用 --- + + if all_nicknames_data: + selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) + nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) + if nickname_injection_str: + logger.debug(f"Injecting nickname info into focus prompt:\n{nickname_injection_str}") + + except Exception as e: + logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) + # --- [结束修改] --- + prompt = await global_prompt_manager.format_prompt( "heart_flow_prompt", info_from_tools=structured_info_prompt, From 5c2dd25ba4719c83454f2ad9827b57b6a2d76023 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 16:57:31 +0800 Subject: [PATCH 04/58] =?UTF-8?q?false=E7=8A=B6=E6=80=81=E4=B8=8B=E4=B8=8D?= =?UTF-8?q?=E6=8A=A5=E9=94=99=E4=BA=86=EF=BC=8C=E5=85=88=E5=AD=98=E4=B8=AA?= =?UTF-8?q?=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/config.py | 20 +- src/plugins/group_nickname/config.py | 39 --- src/plugins/group_nickname/nickname_mapper.py | 121 ++++---- .../group_nickname/nickname_processor.py | 269 ++++++++---------- src/plugins/group_nickname/nickname_utils.py | 44 +-- src/plugins/heartFC_chat/heartFC_chat.py | 6 +- .../heartFC_chat/heartflow_prompt_builder.py | 3 +- template/bot_config_template.toml | 14 + 8 files changed, 221 insertions(+), 295 deletions(-) delete mode 100644 src/plugins/group_nickname/config.py diff --git a/src/config/config.py b/src/config/config.py index f09da9a7..0cce17a2 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -1,7 +1,7 @@ import os import re from dataclasses import dataclass, field -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple, Any from dateutil import tz import tomli @@ -272,6 +272,13 @@ class BotConfig: # enable_think_flow: bool = False # 是否启用思考流程 enable_pfc_chatting: bool = False # 是否启用PFC聊天 + # Group Nickname + ENABLE_NICKNAME_MAPPING: bool = False # 绰号映射功能总开关 + MAX_NICKNAMES_IN_PROMPT: int = 10 # Prompt 中最多注入的绰号数量 + NICKNAME_PROBABILITY_SMOOTHING: int = 1 # 绰号加权随机选择的平滑因子 + NICKNAME_QUEUE_MAX_SIZE: int = 100 # 绰号处理队列最大容量 + NICKNAME_PROCESS_SLEEP_INTERVAL: float = 0.5 # 绰号处理进程休眠间隔(秒) + # 模型配置 llm_reasoning: Dict[str, str] = field(default_factory=lambda: {}) # llm_reasoning_minor: Dict[str, str] = field(default_factory=lambda: {}) @@ -288,6 +295,7 @@ class BotConfig: llm_heartflow: Dict[str, str] = field(default_factory=lambda: {}) llm_tool_use: Dict[str, str] = field(default_factory=lambda: {}) llm_plan: Dict[str, str] = field(default_factory=lambda: {}) + llm_nickname_mapping: Dict[str, Any] = field(default_factory=dict) api_urls: Dict[str, str] = field(default_factory=lambda: {}) @@ -401,6 +409,14 @@ class BotConfig: config.save_emoji = emoji_config.get("save_emoji", config.save_emoji) config.steal_emoji = emoji_config.get("steal_emoji", config.steal_emoji) + def group_nickname(parent: dict): + gn_config = parent.get("group_nickname", {}) + config.ENABLE_NICKNAME_MAPPING = gn_config.get("enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING) + config.MAX_NICKNAMES_IN_PROMPT = gn_config.get("max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT) + config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get("nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING) + config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get("nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE) + config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get("nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL) + def bot(parent: dict): # 机器人基础配置 bot_config = parent["bot"] @@ -488,6 +504,7 @@ class BotConfig: "llm_PFC_action_planner", "llm_PFC_chat", "llm_PFC_reply_checker", + "llm_nickname_mapping", ] for item in config_list: @@ -709,6 +726,7 @@ class BotConfig: "response_splitter": {"func": response_splitter, "support": ">=0.0.11", "necessary": False}, "experimental": {"func": experimental, "support": ">=0.0.11", "necessary": False}, "heartflow": {"func": heartflow, "support": ">=1.0.2", "necessary": False}, + "group_nickname": {"func": group_nickname, "support": ">=0.6.3", "necessary": False}, } # 原地修改,将 字符串版本表达式 转换成 版本对象 diff --git a/src/plugins/group_nickname/config.py b/src/plugins/group_nickname/config.py deleted file mode 100644 index eb001419..00000000 --- a/src/plugins/group_nickname/config.py +++ /dev/null @@ -1,39 +0,0 @@ -import threading - -# 功能总开关 -ENABLE_NICKNAME_MAPPING = False # 设置为 False 可完全禁用此功能 - -# --- LLM 相关配置 (示例,你需要根据实际情况修改) --- -# 用于绰号映射分析的 LLM 模型配置 -LLM_MODEL_NICKNAME_MAPPING = { - "model_name": "your_llm_model_for_mapping", # 替换成你用于分析的模型名称 - "api_key": "YOUR_API_KEY", # 如果需要 - "base_url": "YOUR_API_BASE", # 如果需要 - "temperature": 0.5, - "max_tokens": 200, -} - -# --- 数据库相关配置 (如果需要独立配置) --- -# 例如,如果数据库连接信息不同或需要特定集合名称 -DB_COLLECTION_PERSON_INFO = "person_info" # 你的用户信息集合名称 - -# --- Prompt 注入配置 --- -MAX_NICKNAMES_IN_PROMPT = 10 # Prompt 中最多注入的绰号数量 -NICKNAME_PROBABILITY_SMOOTHING = 1 # 用于加权随机选择的平滑因子 (防止概率为0) - -# --- 进程控制 --- -NICKNAME_QUEUE_MAX_SIZE = 100 # 进程间通信队列的最大容量 -NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 # 映射进程在队列为空时的休眠时间(秒) - - -# --- 运行时状态 (用于安全停止进程) --- -_stop_event = threading.Event() - -def get_stop_event(): - """获取全局停止事件""" - return _stop_event - -def set_stop_event(): - """设置全局停止事件,通知子进程退出""" - _stop_event.set() - diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index ae6cf1a0..45e60d19 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -1,41 +1,42 @@ -# GroupNickname/nickname_mapper.py import json -from typing import Dict, Any, Tuple, List, Optional -from src.common.logger_manager import get_logger # 假设你的日志管理器路径 -from src.plugins.models.utils_model import LLMRequest # 假设你的 LLM 请求工具路径 -from .config import LLM_MODEL_NICKNAME_MAPPING, ENABLE_NICKNAME_MAPPING +from typing import Dict, Any, Optional +from src.common.logger_manager import get_logger +from src.plugins.models.utils_model import LLMRequest +# 从全局配置导入 +from src.config.config import global_config + logger = get_logger("nickname_mapper") -# 初始化用于绰号映射的 LLM 实例 -# 注意:这里的初始化方式可能需要根据你的 LLMRequest 实现进行调整 -try: - # 尝试使用字典解包来传递参数 - llm_mapper = LLMRequest( - model=LLM_MODEL_NICKNAME_MAPPING.get("model_name", "default_model"), - temperature=LLM_MODEL_NICKNAME_MAPPING.get("temperature", 0.5), - max_tokens=LLM_MODEL_NICKNAME_MAPPING.get("max_tokens", 200), - api_key=LLM_MODEL_NICKNAME_MAPPING.get("api_key"), - base_url=LLM_MODEL_NICKNAME_MAPPING.get("base_url"), - request_type="nickname_mapping" # 定义一个请求类型用于区分 - ) - logger.info("Nickname mapping LLM initialized successfully.") -except Exception as e: - logger.error(f"Failed to initialize nickname mapping LLM: {e}", exc_info=True) - llm_mapper = None # 初始化失败则置为 None +llm_mapper: Optional[LLMRequest] = None +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 + try: + # 从全局配置获取模型设置 + model_config = global_config.llm_nickname_mapping + if not model_config or not model_config.get("name"): + logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") + else: + llm_args = { + "model": model_config.get("name"), # 必须有 name + "temperature": model_config.get("temp", 0.5), # 使用 temp 字段 + "max_tokens": model_config.get("max_tokens", 200), # max_tokens 是可选的,取决于 LLMRequest 实现 + "api_key": model_config.get("key"), # 使用 key 字段 + "base_url": model_config.get("base_url"), # 使用 base_url 字段 + "request_type": "nickname_mapping" + } + # 清理 None 值参数 + llm_args = {k: v for k, v in llm_args.items() if v is not None} + + llm_mapper = LLMRequest(**llm_args) + logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") + + except Exception as e: + logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) + llm_mapper = None +# --- 结束修改 --- def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: - """ - 构建用于 LLM 分析绰号映射的 Prompt。 - - Args: - chat_history_str: 格式化后的聊天记录字符串。 - bot_reply: Bot 的回复内容。 - user_name_map: 用户 ID 到已知名称(如 person_name 或 nickname)的映射。 - - Returns: - str: 构建好的 Prompt。 - """ + # ... (函数内容不变) ... user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) prompt = f""" @@ -74,6 +75,7 @@ Bot 最新回复: """ return prompt + async def analyze_chat_for_nicknames( chat_history_str: str, bot_reply: str, @@ -81,47 +83,30 @@ async def analyze_chat_for_nicknames( ) -> Dict[str, Any]: """ 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 - - Args: - chat_history_str: 格式化后的聊天记录字符串。 - bot_reply: Bot 的回复内容。 - user_name_map: 用户 ID 到已知名称(如 person_name 或 nickname)的映射。 - - Returns: - Dict[str, Any]: 分析结果,格式为 { "is_exist": bool, "data": Optional[Dict[str, str]] }。 - 如果出错,返回 {"is_exist": False}。 """ - if not ENABLE_NICKNAME_MAPPING: - logger.debug("Nickname mapping feature is disabled.") + # --- [修改] 使用全局配置开关 --- + if not global_config.ENABLE_NICKNAME_MAPPING: + # --- 结束修改 --- + logger.debug("绰号映射功能已禁用。") return {"is_exist": False} if llm_mapper is None: - logger.error("Nickname mapping LLM is not initialized. Cannot perform analysis.") + logger.error("绰号映射 LLM 未初始化。无法执行分析。") return {"is_exist": False} prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) - logger.debug(f"Nickname mapping prompt built:\n{prompt}") # 调试日志 + logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") try: - # --- 调用 LLM --- - # 注意:这里的调用方式需要根据你的 LLMRequest 实现进行调整 - # 可能需要使用 generate_response_sync 或其他同步方法,因为这将在独立进程中运行 - # 或者如果 LLMRequest 支持异步,确保在异步环境中调用 - # response_content, _, _ = await llm_mapper.generate_response(prompt) - - # 假设 llm_mapper 有一个同步的 generate 方法或在异步环境中调用 - # 这里暂时使用 await,如果你的 LLMRequest 不支持,需要修改 + # 调用 LLM response_content, _, _ = await llm_mapper.generate_response(prompt) + logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") - - logger.debug(f"LLM raw response for nickname mapping: {response_content}") - - # --- 解析 LLM 响应 --- + # ... (解析 LLM 响应的逻辑不变) ... if not response_content: - logger.warning("LLM returned empty content for nickname mapping.") + logger.warning("LLM 返回了空的绰号映射内容。") return {"is_exist": False} - # 尝试去除可能的代码块标记 response_content = response_content.strip() if response_content.startswith("```json"): response_content = response_content[7:] @@ -131,33 +116,31 @@ async def analyze_chat_for_nicknames( try: result = json.loads(response_content) - # 基本验证 if isinstance(result, dict) and "is_exist" in result: if result["is_exist"] is True: if "data" in result and isinstance(result["data"], dict): - # 过滤掉 data 为空的情况 if not result["data"]: - logger.debug("LLM indicated is_exist=True but data is empty. Treating as False.") + logger.debug("LLM 指示 is_exist=True 但 data 为空。视为 False 处理。") return {"is_exist": False} - logger.info(f"Nickname mapping found: {result['data']}") + logger.info(f"找到绰号映射: {result['data']}") return {"is_exist": True, "data": result["data"]} else: - logger.warning("LLM response format error: is_exist is True but 'data' is missing or not a dict.") + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 缺失或不是字典。") return {"is_exist": False} elif result["is_exist"] is False: - logger.info("No reliable nickname mapping found by LLM.") + logger.info("LLM 未找到可靠的绰号映射。") return {"is_exist": False} else: - logger.warning("LLM response format error: 'is_exist' is not a boolean.") + logger.warning("LLM 响应格式错误: 'is_exist' 不是布尔值。") return {"is_exist": False} else: - logger.warning("LLM response format error: Missing 'is_exist' key or not a dict.") + logger.warning("LLM 响应格式错误: 缺少 'is_exist' 键或不是字典。") return {"is_exist": False} except json.JSONDecodeError as json_err: - logger.error(f"Failed to parse LLM response as JSON: {json_err}\nRaw response: {response_content}") + logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") return {"is_exist": False} except Exception as e: - logger.error(f"Error during nickname mapping LLM call or processing: {e}", exc_info=True) + logger.error(f"绰号映射 LLM 调用或处理过程中出错: {e}", exc_info=True) return {"is_exist": False} diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 76c4abb5..e52e4e28 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,134 +1,70 @@ # GroupNickname/nickname_processor.py import asyncio -import time +import threading import traceback # 明确导入 Event 和 Queue from multiprocessing import Process, Queue as mpQueue -# 尝试从 synchronize 导入 Event -from multiprocessing.synchronize import Event as mpEvent -from typing import Dict, Any, Tuple, Optional, List +from multiprocessing.synchronize import Event as mpEvent # 从 synchronize 导入 Event +from typing import Dict, Optional -from pymongo import MongoClient, UpdateOne +from pymongo import MongoClient from pymongo.errors import ConnectionFailure, OperationFailure -# 假设你的项目结构允许这样导入 -try: - from src.common.logger_manager import get_logger - from src.config.config import global_config - from .config import ( - ENABLE_NICKNAME_MAPPING, DB_COLLECTION_PERSON_INFO, - NICKNAME_QUEUE_MAX_SIZE, NICKNAME_PROCESS_SLEEP_INTERVAL, - get_stop_event, set_stop_event - ) - from .nickname_mapper import analyze_chat_for_nicknames -except ImportError: - # 提供备选导入路径或记录错误,以便调试 - print("Error: Failed to import necessary modules. Please check your project structure and PYTHONPATH.") - # 在无法导入时,定义临时的 get_logger 以避免 NameError,但这只是权宜之计 - import logging - def get_logger(name): - return logging.getLogger(name) - # 定义临时的全局配置,这同样是权宜之计 - class MockGlobalConfig: - mongodb_uri = "mongodb://localhost:27017/" # 示例 URI - mongodb_database = "your_db_name" # 示例数据库名 - global_config = MockGlobalConfig() - # 定义临时的配置变量 - ENABLE_NICKNAME_MAPPING = True - DB_COLLECTION_PERSON_INFO = "person_info" - NICKNAME_QUEUE_MAX_SIZE = 100 - NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 - # 使用导入的 mpEvent - _stop_event_internal = mpEvent() - def get_stop_event(): return _stop_event_internal - def set_stop_event(): _stop_event_internal.set() - # 定义临时的 analyze_chat_for_nicknames - async def analyze_chat_for_nicknames(*args, **kwargs): return {"is_exist": False} +from src.common.logger_manager import get_logger # 导入日志管理器 +from src.config.config import global_config # 导入全局配置 +from .nickname_mapper import analyze_chat_for_nicknames # 导入绰号分析函数 +from src.common.database import db # 导入数据库初始化和关闭函数 - -logger = get_logger("nickname_processor") +logger = get_logger("nickname_processor") # 获取日志记录器实例 +# --- 运行时状态 (用于安全停止进程) --- +_stop_event = threading.Event() # --- 数据库连接 --- -mongo_client: Optional[MongoClient] = None -person_info_collection = None - -def _initialize_db(): - """初始化数据库连接(在子进程中调用)""" - global mongo_client, person_info_collection - if mongo_client is None: - try: - mongo_uri = global_config.mongodb_uri - if not mongo_uri: - raise ValueError("MongoDB URI not found in global config.") - - mongo_client = MongoClient(mongo_uri, serverSelectionTimeoutMS=5000) - mongo_client.admin.command('ping') - db = mongo_client[global_config.mongodb_database] - person_info_collection = db[DB_COLLECTION_PERSON_INFO] - logger.info("Nickname processor: Database connection initialized successfully.") - except (ConnectionFailure, ValueError, OperationFailure) as e: - logger.error(f"Nickname processor: Failed to initialize database connection: {e}", exc_info=True) - mongo_client = None - person_info_collection = None - except Exception as e: - logger.error(f"Nickname processor: An unexpected error occurred during DB initialization: {e}", exc_info=True) - mongo_client = None - person_info_collection = None - - -def _close_db(): - """关闭数据库连接""" - global mongo_client - if mongo_client: - try: - mongo_client.close() - logger.info("Nickname processor: Database connection closed.") - except Exception as e: - logger.error(f"Nickname processor: Error closing database connection: {e}", exc_info=True) - finally: - mongo_client = None +mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 +person_info_collection = None # 用户信息集合对象 # --- 数据库更新逻辑 --- async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数。 - - Args: - group_id (str): 群组 ID。 - nickname_map (Dict[str, str]): 需要更新的 {用户ID: 绰号} 映射。 """ - if not person_info_collection: - logger.error("Database collection is not initialized. Cannot update nickname counts.") + # --- [修改] 使用导入的 db 对象访问集合 --- + # !!! 重要:请确保 'person_info' 是你实际存储用户信息的集合名称 !!! + person_info_collection = db.person_info + # --- 结束修改 --- + + if not person_info_collection: # 理论上 db 对象总是可用,但保留检查 + logger.error("无法访问数据库集合 'person_info'。无法更新绰号计数。") return if not nickname_map: - logger.debug("Empty nickname map provided for update.") + logger.debug("提供的用于更新的绰号映射为空。") return - logger.info(f"Attempting to update nickname counts for group '{group_id}' with map: {nickname_map}") + logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") for user_id, nickname in nickname_map.items(): if not user_id or not nickname: - logger.warning(f"Skipping invalid entry in nickname map: user_id='{user_id}', nickname='{nickname}'") + logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id}', nickname='{nickname}'") continue - group_id_str = str(group_id) # 确保是字符串 + group_id_str = str(group_id) try: # a. 确保用户文档存在 group_nickname 字段且为 list person_info_collection.update_one( {"person_id": user_id}, - {"$setOnInsert": {"group_nickname": []}}, # 如果字段不存在则创建为空列表 + {"$setOnInsert": {"group_nickname": []}}, upsert=True ) # b. 确保特定 group_id 的条目存在 update_result = person_info_collection.update_one( {"person_id": user_id, f"group_nickname.{group_id_str}": {"$exists": False}}, - {"$push": {"group_nickname": {group_id_str: []}}} # 如果不存在则添加 + {"$push": {"group_nickname": {group_id_str: []}}} ) if update_result.modified_count > 0: - logger.debug(f"Added group entry for group '{group_id_str}' for user '{user_id}'.") + logger.debug(f"为用户 '{user_id}' 添加了群组 '{group_id_str}' 的条目。") # c. 确保特定 nickname 存在于 group_id 的数组中,并增加计数 update_result = person_info_collection.update_one( @@ -136,7 +72,7 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): "person_id": user_id, "group_nickname": { "$elemMatch": { - group_id_str: {"$elemMatch": {nickname: {"$exists": True}}} + group_id_str: {"$elemMatch": {nickname: {"$exists": True}}} } } }, @@ -155,98 +91,93 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): array_filters=[{f"group.{group_id_str}": {"$exists": True}}] ) if add_nick_result.modified_count > 0: - logger.debug(f"Added nickname '{nickname}' with count 1 for user '{user_id}' in group '{group_id_str}'.") + logger.debug(f"为用户 '{user_id}' 在群组 '{group_id_str}' 中添加了绰号 '{nickname}',计数为 1。") else: - logger.warning(f"Failed to add nickname '{nickname}' for user '{user_id}' in group '{group_id_str}'. Update result: {add_nick_result.raw_result}") + logger.warning(f"未能为用户 '{user_id}' 在群组 '{group_id_str}' 中添加绰号 '{nickname}'。更新结果: {add_nick_result.raw_result}") elif update_result.modified_count > 0: - logger.debug(f"Incremented count for nickname '{nickname}' for user '{user_id}' in group '{group_id_str}'.") + logger.debug(f"用户 '{user_id}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") else: - logger.warning(f"Nickname increment operation matched but did not modify for user '{user_id}', nickname '{nickname}'. Update result: {update_result.raw_result}") + logger.warning(f"绰号增加操作匹配但未修改用户 '{user_id}' 的绰号 '{nickname}'。更新结果: {update_result.raw_result}") except OperationFailure as op_err: - logger.error(f"Database operation failed for user {user_id}, group {group_id_str}, nickname {nickname}: {op_err}", exc_info=True) + logger.error(f"数据库操作失败: 用户 {user_id}, 群组 {group_id_str}, 绰号 {nickname}: {op_err}", exc_info=True) except Exception as e: - logger.error(f"Unexpected error updating nickname for user {user_id}, group {group_id_str}, nickname {nickname}: {e}", exc_info=True) + logger.error(f"更新用户 {user_id} 的绰号 {nickname} 时发生意外错误: {e}", exc_info=True) # --- 队列和进程 --- -# 使用明确导入的类型 -nickname_queue: mpQueue[Tuple[str, str, str, Dict[str, str]]] = mpQueue(maxsize=NICKNAME_QUEUE_MAX_SIZE) +# --- [修改] 使用全局配置 --- +nickname_queue: mpQueue = mpQueue(maxsize=global_config.NICKNAME_QUEUE_MAX_SIZE) +# --- 结束修改 --- _nickname_process: Optional[Process] = None async def add_to_nickname_queue( chat_history_str: str, bot_reply: str, - group_id: Optional[str], # 群聊时需要 - user_name_map: Dict[str, str] # 用户ID到名字的映射 + group_id: Optional[str], + user_name_map: Dict[str, str] ): """将需要分析的数据放入队列。""" - if not ENABLE_NICKNAME_MAPPING: + # --- [修改] 使用全局配置 --- + if not global_config.ENABLE_NICKNAME_MAPPING: + # --- 结束修改 --- return if group_id is None: - logger.debug("Skipping nickname mapping for private chat.") - return # 私聊暂时不处理绰号映射 + logger.debug("私聊跳过绰号映射。") + return try: - item = (chat_history_str, bot_reply, str(group_id), user_name_map) # 确保 group_id 是字符串 - # 使用 put_nowait,如果队列满则会抛出 Full 异常 + item = (chat_history_str, bot_reply, str(group_id), user_name_map) nickname_queue.put_nowait(item) - logger.debug(f"Added item to nickname queue for group {group_id}.") - # 捕获 queue.Full 异常 + logger.debug(f"已将项目添加到群组 {group_id} 的绰号队列。") except Exception as e: - # 检查异常类型是否为队列满(需要导入 queue 模块或处理 Full 异常) - # from queue import Full # 如果 nickname_queue 是 asyncio.Queue - # if isinstance(e, Full): - # logger.warning("Nickname processing queue is full. Discarding new item.") - # else: - # logger.error(f"Error adding item to nickname queue: {e}", exc_info=True) - # 由于 multiprocessing.Queue 的 Full 异常在不同环境下可能不同,这里暂时捕获通用异常 - logger.warning(f"Failed to add item to nickname queue (possibly full): {e}", exc_info=True) + logger.warning(f"无法将项目添加到绰号队列(可能已满): {e}", exc_info=True) -# 使用从 synchronize 导入的 mpEvent -async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): # 使用 mpEvent +async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): """独立进程中的主循环,处理队列任务。""" - _initialize_db() # 初始化数据库连接 - logger.info("Nickname processing loop started.") + # --- [移除] 不再需要本地数据库初始化 --- + # _initialize_db() + # --- 结束移除 --- + logger.info("绰号处理循环已启动。") while not stop_event.is_set(): try: if not queue.empty(): - # 从队列获取任务 - chat_history_str, bot_reply, group_id, user_name_map = queue.get() - logger.debug(f"Processing nickname mapping task for group {group_id}...") + item = queue.get() + if isinstance(item, tuple) and len(item) == 4: + chat_history_str, bot_reply, group_id, user_name_map = item + logger.debug(f"正在处理群组 {group_id} 的绰号映射任务...") - # 调用 LLM 分析 - analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) + analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) - # 如果找到映射,更新数据库 - if analysis_result.get("is_exist") and analysis_result.get("data"): - await update_nickname_counts(group_id, analysis_result["data"]) - - # 短暂 sleep 避免 CPU 占用过高 - await asyncio.sleep(0.05) # 稍微减少 sleep 时间 + if analysis_result.get("is_exist") and analysis_result.get("data"): + await update_nickname_counts(group_id, analysis_result["data"]) + else: + logger.warning(f"从队列接收到意外的项目类型: {type(item)}") + await asyncio.sleep(0.05) else: - # 队列为空时休眠 - await asyncio.sleep(NICKNAME_PROCESS_SLEEP_INTERVAL) + # --- [修改] 使用全局配置 --- + await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) + # --- 结束修改 --- except asyncio.CancelledError: - logger.info("Nickname processing loop cancelled.") - break # 响应取消请求 + logger.info("绰号处理循环已取消。") + break except Exception as e: - logger.error(f"Error in nickname processing loop: {e}\n{traceback.format_exc()}") - # 发生错误时也休眠一下,防止快速连续出错 + logger.error(f"绰号处理循环出错: {e}\n{traceback.format_exc()}") await asyncio.sleep(5) - _close_db() # 关闭数据库连接 - logger.info("Nickname processing loop finished.") + # --- [移除] 不再需要本地数据库关闭 --- + # _close_db() + # --- 结束移除 --- + logger.info("绰号处理循环已结束。") -# 使用从 synchronize 导入的 mpEvent -def _run_processor_process(queue: mpQueue, stop_event: mpEvent): # 使用 mpEvent +def _run_processor_process(queue: mpQueue, stop_event: mpEvent): """进程启动函数,运行异步循环。""" try: loop = asyncio.new_event_loop() @@ -254,48 +185,72 @@ def _run_processor_process(queue: mpQueue, stop_event: mpEvent): # 使用 mpEven loop.run_until_complete(_nickname_processing_loop(queue, stop_event)) loop.close() except Exception as e: - logger.error(f"Error running nickname processor process: {e}", exc_info=True) + logger.error(f"运行绰号处理器进程时出错: {e}", exc_info=True) def start_nickname_processor(): """启动绰号映射处理进程。""" global _nickname_process - if not ENABLE_NICKNAME_MAPPING: - logger.info("Nickname mapping feature is disabled. Processor not started.") + # --- [修改] 使用全局配置 --- + if not global_config.ENABLE_NICKNAME_MAPPING: + # --- 结束修改 --- + logger.info("绰号映射功能已禁用。处理器未启动。") return if _nickname_process is None or not _nickname_process.is_alive(): - logger.info("Starting nickname processor process...") + logger.info("正在启动绰号处理器进程...") + # --- [修改] 从全局配置导入停止事件控制函数 --- + try: + from src.config.config import get_stop_event, set_stop_event # 再次确认导入路径 + except ImportError: + logger.error("无法从 src.config.config 导入 get_stop_event/set_stop_event") + # 提供备选方案或退出 + return # 或者 raise ImportError + stop_event = get_stop_event() + # --- 结束修改 --- stop_event.clear() - # 传递明确导入的类型 _nickname_process = Process(target=_run_processor_process, args=(nickname_queue, stop_event), daemon=True) _nickname_process.start() - logger.info(f"Nickname processor process started with PID: {_nickname_process.pid}") + logger.info(f"绰号处理器进程已启动,PID: {_nickname_process.pid}") else: - logger.warning("Nickname processor process is already running.") + logger.warning("绰号处理器进程已在运行中。") def stop_nickname_processor(): """停止绰号映射处理进程。""" global _nickname_process if _nickname_process and _nickname_process.is_alive(): - logger.info("Stopping nickname processor process...") - set_stop_event() + logger.info("正在停止绰号处理器进程...") + # --- [修改] 从全局配置导入停止事件控制函数 --- + try: + from src.config.config import set_stop_event # 再次确认导入路径 + except ImportError: + logger.error("无法从 src.config.config 导入 set_stop_event") + return # 或者 raise ImportError + set_stop_event() # 发送停止信号 + # --- 结束修改 --- try: _nickname_process.join(timeout=10) if _nickname_process.is_alive(): - logger.warning("Nickname processor process did not stop gracefully after 10 seconds. Terminating...") + logger.warning("绰号处理器进程在 10 秒后未优雅停止。正在终止...") _nickname_process.terminate() _nickname_process.join(timeout=5) except Exception as e: - logger.error(f"Error stopping nickname processor process: {e}", exc_info=True) + logger.error(f"停止绰号处理器进程时出错: {e}", exc_info=True) finally: if _nickname_process and not _nickname_process.is_alive(): - logger.info("Nickname processor process stopped successfully.") + logger.info("绰号处理器进程已成功停止。") else: - logger.error("Failed to stop nickname processor process.") + logger.error("未能停止绰号处理器进程。") _nickname_process = None else: - logger.info("Nickname processor process is not running.") + logger.info("绰号处理器进程未在运行。") # 可以在应用启动时调用 start_nickname_processor() -# 在应用关闭时调用 stop_nickname_processor() +# 可以在应用关闭时调用 stop_nickname_processor() +def get_stop_event(): + """获取全局停止事件""" + return _stop_event + +def set_stop_event(): + """设置全局停止事件,通知子进程退出""" + _stop_event.set() \ No newline at end of file diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 47373124..ced2f6ca 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,8 +1,8 @@ # GroupNickname/nickname_utils.py import random -from typing import List, Dict, Tuple, Optional +from typing import List, Dict, Tuple from src.common.logger_manager import get_logger -from .config import MAX_NICKNAMES_IN_PROMPT, NICKNAME_PROBABILITY_SMOOTHING +from src.config.config import global_config logger = get_logger("nickname_utils") @@ -14,7 +14,7 @@ def select_nicknames_for_prompt( Args: all_nicknames_info: 包含用户及其绰号信息的字典,格式为 - { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } + { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } Returns: List[Tuple[str, str, int]]: 选中的绰号列表,每个元素为 (用户名, 绰号, 次数)。 @@ -32,11 +32,11 @@ def select_nicknames_for_prompt( nickname, count = list(nickname_entry.items())[0] # 确保次数是正整数 if isinstance(count, int) and count > 0: - # 添加平滑因子,避免概率为0,并让低频词也有机会 - weight = count + NICKNAME_PROBABILITY_SMOOTHING + # 添加平滑因子,避免概率为0,并让低频词也有机会 + weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING candidates.append((user_name, nickname, count, weight)) else: - logger.warning(f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping.") + logger.warning(f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping.") else: logger.warning(f"Invalid nickname entry format for user '{user_name}': {nickname_entry}. Skipping.") @@ -50,13 +50,13 @@ def select_nicknames_for_prompt( if total_weight <= 0: # 如果所有权重都无效或为0,则随机选择(或按次数选择) candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - selected = candidates[:MAX_NICKNAMES_IN_PROMPT] + selected = candidates[:global_config.MAX_NICKNAMES_IN_PROMPT] else: # 计算归一化概率 probabilities = [c[3] / total_weight for c in candidates] # 使用概率分布进行加权随机选择(不重复) - num_to_select = min(MAX_NICKNAMES_IN_PROMPT, len(candidates)) + num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) try: # random.choices 允许重复,我们需要不重复的选择 # 可以使用 numpy.random.choice 或手动实现不重复加权抽样 @@ -67,25 +67,25 @@ def select_nicknames_for_prompt( max_attempts = num_to_select * 5 # 防止无限循环 while len(selected) < num_to_select and attempts < max_attempts: - # 每次只选一个,避免一次选多个时概率分布变化导致的问题 - chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] - if chosen_index not in selected_indices: - selected_indices.add(chosen_index) - selected.append(candidates[chosen_index]) - attempts += 1 + # 每次只选一个,避免一次选多个时概率分布变化导致的问题 + chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] + if chosen_index not in selected_indices: + selected_indices.add(chosen_index) + selected.append(candidates[chosen_index]) + attempts += 1 # 如果尝试多次后仍未选够,补充出现次数最多的 if len(selected) < num_to_select: - remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - needed = num_to_select - len(selected) - selected.extend(remaining_candidates[:needed]) + remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + needed = num_to_select - len(selected) + selected.extend(remaining_candidates[:needed]) except Exception as e: - logger.error(f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True) - # 出错时回退到选择次数最多的 N 个 - candidates.sort(key=lambda x: x[2], reverse=True) - selected = candidates[:MAX_NICKNAMES_IN_PROMPT] + logger.error(f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True) + # 出错时回退到选择次数最多的 N 个 + candidates.sort(key=lambda x: x[2], reverse=True) + selected = candidates[:global_config.MAX_NICKNAMES_IN_PROMPT] # 格式化输出并按次数排序 diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index ab8f9605..401e28b8 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -29,11 +29,7 @@ from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.moods.moods import MoodManager from src.individuality.individuality import Individuality from src.plugins.person_info.relationship_manager import relationship_manager - -# --- 导入 GroupNickname 相关 --- -from src.plugins.group_nickname.config import ENABLE_NICKNAME_MAPPING # <--- 导入开关 from src.plugins.group_nickname.nickname_processor import add_to_nickname_queue # <--- 导入队列添加函数 -# --- 结束导入 GroupNickname --- INITIAL_DURATION = 60.0 @@ -708,7 +704,7 @@ class HeartFChatting: anchor_message: 锚点消息对象。 reply: Bot 生成的回复内容列表。 """ - if not ENABLE_NICKNAME_MAPPING: + if not global_config.ENABLE_NICKNAME_MAPPING: return # 如果功能未开启,则直接返回 if not anchor_message or not anchor_message.chat_stream or not anchor_message.chat_stream.group_info: diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 4ea9f3e7..d5ccf482 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -14,7 +14,6 @@ from ..moods.moods import MoodManager from ..memory_system.Hippocampus import HippocampusManager from ..schedule.schedule_generator import bot_schedule from ..knowledge.knowledge_lib import qa_manager -from src.plugins.group_nickname.config import ENABLE_NICKNAME_MAPPING from src.plugins.group_nickname.nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection from src.plugins.person_info.relationship_manager import relationship_manager @@ -227,7 +226,7 @@ class PromptBuilder: # --- [修改] 注入绰号信息 --- nickname_injection_str = "" - if ENABLE_NICKNAME_MAPPING and chat_stream.group_info: + if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: try: group_id = str(chat_stream.group_info.group_id) # 提取上下文中的用户 ID (需要 message_list_before_now 变量在此可用) diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 8ca6aaa3..371ed97d 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -125,6 +125,13 @@ steal_emoji = true # 是否偷取表情包,让麦麦可以发送她保存的 enable_check = false # 是否启用表情包过滤,只有符合该要求的表情包才会被保存 check_prompt = "符合公序良俗" # 表情包过滤要求,只有符合该要求的表情包才会被保存 +[group_nickname] +enable_nickname_mapping = false # 绰号映射功能总开关(默认关闭,建议关闭) +max_nicknames_in_prompt = 10 # Prompt 中最多注入的绰号数量(防止token数量爆炸) +nickname_probability_smoothing = 1 # 绰号加权随机选择的平滑因子 +nickname_queue_max_size = 100 # 绰号处理队列最大容量 +nickname_process_sleep_interval = 0.5 # 绰号处理进程休眠间隔(秒) + [memory] build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多 build_memory_distribution = [6.0,3.0,0.6,32.0,12.0,0.4] # 记忆构建分布,参数:分布1均值,标准差,权重,分布2均值,标准差,权重 @@ -302,6 +309,13 @@ provider = "SILICONFLOW" pri_in = 2 pri_out = 8 +#绰号映射生成模型 +[model.llm_nickname_mapping] +name = "deepseek-ai/DeepSeek-V3" +provider = "SILICONFLOW" +temp = 0.3 +pri_in = 2 +pri_out = 8 #此模型暂时没有使用!! #此模型暂时没有使用!! From 2fd00f44466b323d13da384ee6cddcf9ca9203da Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 18:01:48 +0800 Subject: [PATCH 05/58] =?UTF-8?q?=E6=88=90=E5=8A=9F=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E9=98=9F=E5=88=97=EF=BC=8C=E5=87=86=E5=A4=87=E5=B0=9D=E8=AF=95?= =?UTF-8?q?=E5=BC=80=E5=A7=8B=E8=BF=9B=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 24 +++++-------- .../group_nickname/nickname_processor.py | 34 +------------------ src/plugins/heartFC_chat/heartFC_chat.py | 6 ++-- .../heartFC_chat/heartflow_prompt_builder.py | 4 +-- 4 files changed, 12 insertions(+), 56 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 45e60d19..95803f9d 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -16,27 +16,19 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 if not model_config or not model_config.get("name"): logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") else: - llm_args = { - "model": model_config.get("name"), # 必须有 name - "temperature": model_config.get("temp", 0.5), # 使用 temp 字段 - "max_tokens": model_config.get("max_tokens", 200), # max_tokens 是可选的,取决于 LLMRequest 实现 - "api_key": model_config.get("key"), # 使用 key 字段 - "base_url": model_config.get("base_url"), # 使用 base_url 字段 - "request_type": "nickname_mapping" - } - # 清理 None 值参数 - llm_args = {k: v for k, v in llm_args.items() if v is not None} - - llm_mapper = LLMRequest(**llm_args) + llm_mapper = LLMRequest( # <-- LLM 初始化 + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", + ) logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") except Exception as e: logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) llm_mapper = None -# --- 结束修改 --- def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: - # ... (函数内容不变) ... user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) prompt = f""" @@ -69,7 +61,8 @@ Bot 最新回复: {{ "is_exist": false }} -5. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 +5. 不需要输出 Bot 自身的绰号。 +6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 输出: """ @@ -102,7 +95,6 @@ async def analyze_chat_for_nicknames( response_content, _, _ = await llm_mapper.generate_response(prompt) logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") - # ... (解析 LLM 响应的逻辑不变) ... if not response_content: logger.warning("LLM 返回了空的绰号映射内容。") return {"is_exist": False} diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index e52e4e28..c4c39cf7 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -29,10 +29,7 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数。 """ - # --- [修改] 使用导入的 db 对象访问集合 --- - # !!! 重要:请确保 'person_info' 是你实际存储用户信息的集合名称 !!! person_info_collection = db.person_info - # --- 结束修改 --- if not person_info_collection: # 理论上 db 对象总是可用,但保留检查 logger.error("无法访问数据库集合 'person_info'。无法更新绰号计数。") @@ -107,9 +104,7 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): # --- 队列和进程 --- -# --- [修改] 使用全局配置 --- nickname_queue: mpQueue = mpQueue(maxsize=global_config.NICKNAME_QUEUE_MAX_SIZE) -# --- 结束修改 --- _nickname_process: Optional[Process] = None async def add_to_nickname_queue( @@ -119,9 +114,7 @@ async def add_to_nickname_queue( user_name_map: Dict[str, str] ): """将需要分析的数据放入队列。""" - # --- [修改] 使用全局配置 --- if not global_config.ENABLE_NICKNAME_MAPPING: - # --- 结束修改 --- return if group_id is None: @@ -138,9 +131,7 @@ async def add_to_nickname_queue( async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): """独立进程中的主循环,处理队列任务。""" - # --- [移除] 不再需要本地数据库初始化 --- - # _initialize_db() - # --- 结束移除 --- + logger.info("绰号处理循环已启动。") while not stop_event.is_set(): @@ -160,9 +151,7 @@ async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): await asyncio.sleep(0.05) else: - # --- [修改] 使用全局配置 --- await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) - # --- 结束修改 --- except asyncio.CancelledError: logger.info("绰号处理循环已取消。") @@ -171,9 +160,6 @@ async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): logger.error(f"绰号处理循环出错: {e}\n{traceback.format_exc()}") await asyncio.sleep(5) - # --- [移除] 不再需要本地数据库关闭 --- - # _close_db() - # --- 结束移除 --- logger.info("绰号处理循环已结束。") @@ -190,24 +176,13 @@ def _run_processor_process(queue: mpQueue, stop_event: mpEvent): def start_nickname_processor(): """启动绰号映射处理进程。""" global _nickname_process - # --- [修改] 使用全局配置 --- if not global_config.ENABLE_NICKNAME_MAPPING: - # --- 结束修改 --- logger.info("绰号映射功能已禁用。处理器未启动。") return if _nickname_process is None or not _nickname_process.is_alive(): logger.info("正在启动绰号处理器进程...") - # --- [修改] 从全局配置导入停止事件控制函数 --- - try: - from src.config.config import get_stop_event, set_stop_event # 再次确认导入路径 - except ImportError: - logger.error("无法从 src.config.config 导入 get_stop_event/set_stop_event") - # 提供备选方案或退出 - return # 或者 raise ImportError - stop_event = get_stop_event() - # --- 结束修改 --- stop_event.clear() _nickname_process = Process(target=_run_processor_process, args=(nickname_queue, stop_event), daemon=True) _nickname_process.start() @@ -220,14 +195,7 @@ def stop_nickname_processor(): global _nickname_process if _nickname_process and _nickname_process.is_alive(): logger.info("正在停止绰号处理器进程...") - # --- [修改] 从全局配置导入停止事件控制函数 --- - try: - from src.config.config import set_stop_event # 再次确认导入路径 - except ImportError: - logger.error("无法从 src.config.config 导入 set_stop_event") - return # 或者 raise ImportError set_stop_event() # 发送停止信号 - # --- 结束修改 --- try: _nickname_process.join(timeout=10) if _nickname_process.is_alive(): diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 401e28b8..a291d2b2 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -695,7 +695,7 @@ class HeartFChatting: # 发生意外错误时,可以选择是否重置计数器,这里选择不重置 return False # 表示动作未成功 - # --- [修改] 触发绰号分析的函数 --- + # 触发绰号分析的函数 async def _trigger_nickname_analysis(self, anchor_message: MessageRecv, reply: List[str]): """ 触发绰号分析任务,将相关数据放入处理队列。 @@ -720,7 +720,7 @@ class HeartFChatting: limit=history_limit ) - # --- 使用 build_readable_messages 格式化历史记录 --- + # 格式化历史记录 chat_history_str = await build_readable_messages( messages=history_messages, replace_bot_name=True, # 在分析时也替换机器人名字,使其与 LLM 交互一致 @@ -729,7 +729,6 @@ class HeartFChatting: read_mark=0.0, # 不需要已读标记 truncate=False # 获取完整内容进行分析 ) - # --- 结束使用 build_readable_messages --- # 2. 获取 Bot 回复字符串 bot_reply_str = " ".join(reply) @@ -779,7 +778,6 @@ class HeartFChatting: except Exception as e: logger.error(f"{self.log_prefix} Error triggering nickname analysis: {e}", exc_info=True) - # --- 结束触发函数 --- async def _wait_for_new_message(self, observation, planner_start_db_time: float, log_prefix: str) -> bool: """ diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index d5ccf482..e775d07d 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -224,7 +224,7 @@ class PromptBuilder: logger.debug("开始构建prompt") - # --- [修改] 注入绰号信息 --- + # 注入绰号信息 nickname_injection_str = "" if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: try: @@ -250,7 +250,6 @@ class PromptBuilder: all_nicknames_data = await relationship_manager.get_users_group_nicknames( platform, list(user_ids_in_context), group_id ) - # --- 结束调用 --- if all_nicknames_data: selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) @@ -260,7 +259,6 @@ class PromptBuilder: except Exception as e: logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) - # --- [结束修改] --- prompt = await global_prompt_manager.format_prompt( "heart_flow_prompt", From a12f9fae7fc5a113f86970d6a65e9089ca07f393 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 18:35:32 +0800 Subject: [PATCH 06/58] =?UTF-8?q?=E5=A6=82=E5=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/main.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/main.py b/src/main.py index c0e743d6..859a4df4 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,5 @@ import asyncio +import atexit # 导入atexit import time from .plugins.utils.statistic import LLMStatistics from .plugins.moods.moods import MoodManager @@ -17,6 +18,7 @@ from .common.logger_manager import get_logger from .plugins.remote import heartbeat_thread # noqa: F401 from .individuality.individuality import Individuality from .common.server import global_server +from src.plugins.group_nickname.nickname_processor import start_nickname_processor, stop_nickname_processor logger = get_logger("main") @@ -177,11 +179,23 @@ class MainSystem: async def main(): """主函数""" system = MainSystem() - await asyncio.gather( - system.initialize(), - system.schedule_tasks(), - ) + # 注册退出处理函数 + atexit.register(stop_nickname_processor) # <--- 添加退出处理 + try: + # 启动绰号处理进程 + start_nickname_processor() # <--- 启动 + + await asyncio.gather( + system.initialize(), + system.schedule_tasks(), + ) + except KeyboardInterrupt: + print("程序被中断") + # finally 块不再需要手动调用 stop,atexit 会处理 + # finally: + # print("正在关闭...") + # stop_nickname_processor() # <--- 关闭 if __name__ == "__main__": - asyncio.run(main()) + asyncio.run(main()) \ No newline at end of file From 01b2618f83abed858c3017b20a71b281533b7e30 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 19:16:22 +0800 Subject: [PATCH 07/58] =?UTF-8?q?=E8=BF=9B=E7=A8=8B=E9=99=84=E5=8A=A0?= =?UTF-8?q?=E6=88=90=E5=8A=9F=EF=BC=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 12 ++++++++++ src/main.py | 24 ++++--------------- .../group_nickname/nickname_processor.py | 14 +++++------ 3 files changed, 23 insertions(+), 27 deletions(-) diff --git a/bot.py b/bot.py index d547c360..32cc52d6 100644 --- a/bot.py +++ b/bot.py @@ -14,6 +14,9 @@ from src.common.logger_manager import get_logger from src.common.crash_logger import install_crash_handler from src.main import MainSystem +from src.main import MainSystem +from src.plugins.group_nickname.nickname_processor import start_nickname_processor, stop_nickname_processor # <--- 添加这行导入 +import atexit logger = get_logger("main") confirm_logger = get_logger("confirm") @@ -221,6 +224,15 @@ def raw_main(): env_config = {key: os.getenv(key) for key in os.environ} scan_provider(env_config) + # 在这里启动绰号处理进程 + logger.info("准备启动绰号处理进程...") + start_nickname_processor() # <--- 添加启动调用 + logger.info("已调用启动绰号处理进程。") + + # 注册退出处理函数 (确保进程能被关闭) + atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 + logger.info("已注册绰号处理进程的退出处理程序。") + # 返回MainSystem实例 return MainSystem() diff --git a/src/main.py b/src/main.py index 859a4df4..c0e743d6 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,4 @@ import asyncio -import atexit # 导入atexit import time from .plugins.utils.statistic import LLMStatistics from .plugins.moods.moods import MoodManager @@ -18,7 +17,6 @@ from .common.logger_manager import get_logger from .plugins.remote import heartbeat_thread # noqa: F401 from .individuality.individuality import Individuality from .common.server import global_server -from src.plugins.group_nickname.nickname_processor import start_nickname_processor, stop_nickname_processor logger = get_logger("main") @@ -179,23 +177,11 @@ class MainSystem: async def main(): """主函数""" system = MainSystem() - # 注册退出处理函数 - atexit.register(stop_nickname_processor) # <--- 添加退出处理 + await asyncio.gather( + system.initialize(), + system.schedule_tasks(), + ) - try: - # 启动绰号处理进程 - start_nickname_processor() # <--- 启动 - - await asyncio.gather( - system.initialize(), - system.schedule_tasks(), - ) - except KeyboardInterrupt: - print("程序被中断") - # finally 块不再需要手动调用 stop,atexit 会处理 - # finally: - # print("正在关闭...") - # stop_nickname_processor() # <--- 关闭 if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index c4c39cf7..a0b7d9e2 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,10 +1,7 @@ # GroupNickname/nickname_processor.py import asyncio -import threading import traceback -# 明确导入 Event 和 Queue -from multiprocessing import Process, Queue as mpQueue -from multiprocessing.synchronize import Event as mpEvent # 从 synchronize 导入 Event +from multiprocessing import Process, Queue as mpQueue, Event from typing import Dict, Optional from pymongo import MongoClient @@ -17,7 +14,7 @@ from src.common.database import db # 导入数据库初始化和关闭函数 logger = get_logger("nickname_processor") # 获取日志记录器实例 # --- 运行时状态 (用于安全停止进程) --- -_stop_event = threading.Event() +_stop_event = Event() # --- 数据库连接 --- mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 @@ -129,12 +126,13 @@ async def add_to_nickname_queue( logger.warning(f"无法将项目添加到绰号队列(可能已满): {e}", exc_info=True) -async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): +async def _nickname_processing_loop(queue: mpQueue, stop_event): """独立进程中的主循环,处理队列任务。""" logger.info("绰号处理循环已启动。") while not stop_event.is_set(): + logger.info("绰号处理循环正在运行...") try: if not queue.empty(): item = queue.get() @@ -149,7 +147,7 @@ async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): else: logger.warning(f"从队列接收到意外的项目类型: {type(item)}") - await asyncio.sleep(0.05) + await asyncio.sleep(5) else: await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) @@ -163,7 +161,7 @@ async def _nickname_processing_loop(queue: mpQueue, stop_event: mpEvent): logger.info("绰号处理循环已结束。") -def _run_processor_process(queue: mpQueue, stop_event: mpEvent): +def _run_processor_process(queue: mpQueue, stop_event): """进程启动函数,运行异步循环。""" try: loop = asyncio.new_event_loop() From d76bd95159cf4fc9760e104521915ec837bd0332 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 21:30:30 +0800 Subject: [PATCH 08/58] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E4=BA=86=E5=90=97?= =?UTF-8?q?=EF=BC=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/heart_flow/mai_state_manager.py | 2 +- src/plugins/group_nickname/nickname_mapper.py | 14 +- .../group_nickname/nickname_processor.py | 141 ++++--- src/plugins/heartFC_chat/heartFC_chat.py | 2 +- src/plugins/person_info/person_info.py | 1 + .../person_info/relationship_manager.py | 353 +++++++++++------- 6 files changed, 332 insertions(+), 181 deletions(-) diff --git a/src/heart_flow/mai_state_manager.py b/src/heart_flow/mai_state_manager.py index cd739344..dfba2cc2 100644 --- a/src/heart_flow/mai_state_manager.py +++ b/src/heart_flow/mai_state_manager.py @@ -11,7 +11,7 @@ logger = get_logger("mai_state") # -- 状态相关的可配置参数 (可以从 glocal_config 加载) -- # enable_unlimited_hfc_chat = True # 调试用:无限专注聊天 -enable_unlimited_hfc_chat = False +enable_unlimited_hfc_chat = True prevent_offline_state = True # 目前默认不启用OFFLINE状态 diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 95803f9d..b5e258a4 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -30,9 +30,9 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) - + print(f"\n\n\n{user_list_str}\n\n\n\n") prompt = f""" -任务:分析以下聊天记录和 Bot 的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 +任务:分析以下聊天记录和你的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 已知用户信息: {user_list_str} @@ -42,18 +42,18 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: {chat_history_str} --- -Bot 最新回复: +你的最新回复: {bot_reply} 分析要求: -1. 识别聊天记录和 Bot 回复中出现的可能是用户绰号的词语。 +1. 识别聊天记录和你发言中出现的可能是用户绰号的词语。 2. 判断这些绰号是否能明确地指向某个特定的用户 ID。一个绰号必须在上下文中清晰地与某个发言人或被提及的人关联起来。 3. 如果能建立可靠的一一映射关系,请输出一个 JSON 对象,格式如下: {{ "is_exist": true, "data": {{ - "用户ID_A": "绰号_A", - "用户ID_B": "绰号_B" + "用户A数字id": "绰号_A", + "用户B数字id": "绰号_B" }} }} 其中 "data" 字段的键是用户的 ID,值是对应的绰号。只包含你能确认映射关系的绰号。 @@ -61,7 +61,7 @@ Bot 最新回复: {{ "is_exist": false }} -5. 不需要输出 Bot 自身的绰号。 +5. 你的昵称后面包含"(你)",不需要输出你自身的绰号。 6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 输出: diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index a0b7d9e2..ae75851c 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -20,85 +20,132 @@ _stop_event = Event() mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 person_info_collection = None # 用户信息集合对象 - -# --- 数据库更新逻辑 --- +# --- 数据库更新逻辑 (使用推荐的新结构) --- async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数。 + 使用新的数据结构: + { + "user_id": 12345, + "group_nicknames": [ # <--- 字段名统一为 group_nicknames + { + "group_id": "群号1", + "nicknames": [ { "name": "绰号A", "count": 5 }, ... ] + }, ... + ] + } """ - person_info_collection = db.person_info + person_info_collection = db.person_info # 获取集合对象 - if not person_info_collection: # 理论上 db 对象总是可用,但保留检查 - logger.error("无法访问数据库集合 'person_info'。无法更新绰号计数。") - return if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。") return - logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") + logger.info(f"尝试更新群组 '{group_id}' 的绰号计数 (新结构),映射为: {nickname_map}") - for user_id, nickname in nickname_map.items(): - if not user_id or not nickname: - logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id}', nickname='{nickname}'") + for user_id_str, nickname in nickname_map.items(): # user_id 从 map 中取出是 str + if not user_id_str or not nickname: + logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id_str}', nickname='{nickname}'") continue - group_id_str = str(group_id) + group_id_str = str(group_id) # 确保 group_id 是字符串 + try: + # 假设数据库中存储的用户ID是整数类型,如果不是请移除 int() + user_id_int = int(user_id_str) + except ValueError: + logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") + continue try: - # a. 确保用户文档存在 group_nickname 字段且为 list + # 步骤 1: 确保用户文档存在,且有 group_nicknames 字段 (如果不存在则添加空数组) + # 注意:这里不再使用 $setOnInsert 添加 group_nicknames,因为 $addToSet 或 $push 在字段不存在时会自动创建。 + # upsert=True 确保用户文档存在。 person_info_collection.update_one( - {"person_id": user_id}, - {"$setOnInsert": {"group_nickname": []}}, + {"user_id": user_id_int}, + {"$setOnInsert": {"user_id": user_id_int}}, # 确保 upsert 时 user_id 被正确设置 upsert=True ) - - # b. 确保特定 group_id 的条目存在 - update_result = person_info_collection.update_one( - {"person_id": user_id, f"group_nickname.{group_id_str}": {"$exists": False}}, - {"$push": {"group_nickname": {group_id_str: []}}} + # 确保 group_nicknames 字段存在且为数组 (如果不存在则创建) + person_info_collection.update_one( + {"user_id": user_id_int, "group_nicknames": {"$exists": False}}, + {"$set": {"group_nicknames": []}} ) - if update_result.modified_count > 0: - logger.debug(f"为用户 '{user_id}' 添加了群组 '{group_id_str}' 的条目。") - # c. 确保特定 nickname 存在于 group_id 的数组中,并增加计数 + + # 步骤 2: 尝试直接增加现有绰号的计数 + # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配,且该元素的 nicknames 数组中存在一个元素的 name 匹配 update_result = person_info_collection.update_one( { - "person_id": user_id, - "group_nickname": { - "$elemMatch": { - group_id_str: {"$elemMatch": {nickname: {"$exists": True}}} - } + "user_id": user_id_int, + "group_nicknames": { # <--- 确保使用 group_nicknames + "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} } }, - {"$inc": {f"group_nickname.$[group].$[nick].{nickname}": 1}}, + { # <--- 确保使用 group_nicknames + "$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1} + }, array_filters=[ - {f"group.{group_id_str}": {"$exists": True}}, - {f"nick.{nickname}": {"$exists": True}} + {"group.group_id": group_id_str}, + {"nick.name": nickname} ] ) - if update_result.matched_count == 0: - # nickname 不存在,添加 nickname 并设置次数为 1 - add_nick_result = person_info_collection.update_one( - {"person_id": user_id, f"group_nickname.{group_id_str}": {"$exists": True}}, - {"$push": {f"group_nickname.$[group].{group_id_str}": {nickname: 1}}}, - array_filters=[{f"group.{group_id_str}": {"$exists": True}}] - ) - if add_nick_result.modified_count > 0: - logger.debug(f"为用户 '{user_id}' 在群组 '{group_id_str}' 中添加了绰号 '{nickname}',计数为 1。") - else: - logger.warning(f"未能为用户 '{user_id}' 在群组 '{group_id_str}' 中添加绰号 '{nickname}'。更新结果: {add_nick_result.raw_result}") + if update_result.modified_count > 0: + logger.debug(f"用户 '{user_id_str}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") + continue # 处理完成,进行下一次循环 - elif update_result.modified_count > 0: - logger.debug(f"用户 '{user_id}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") + # 步骤 3: 如果步骤 2 未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 + # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配 + update_result = person_info_collection.update_one( + { + "user_id": user_id_int, + "group_nicknames.group_id": group_id_str # <--- 确保使用 group_nicknames + }, + { # <--- 确保使用 group_nicknames + "$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}} + }, + array_filters=[ + {"group.group_id": group_id_str} + ] + ) + + if update_result.modified_count > 0: + logger.debug(f"为用户 '{user_id_str}' 在群组 '{group_id_str}' 中添加了新绰号 '{nickname}',计数为 1。") + continue # 处理完成,进行下一次循环 + + # 步骤 4: 如果步骤 2 和 3 都未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 + # 条件:用户存在,且 group_nicknames 数组中 *不包含* 指定 group_id 的元素 + update_result = person_info_collection.update_one( + { + "user_id": user_id_int, + "group_nicknames.group_id": {"$ne": group_id_str} # <--- 检查 group_id 是否不存在 + }, + { + "$push": { # <--- 确保使用 group_nicknames + "group_nicknames": { + "group_id": group_id_str, + "nicknames": [{"name": nickname, "count": 1}] + } + } + } + # 注意:这里不需要 upsert=True,因为步骤1已确保用户存在。 + # 如果字段 group_nicknames 不存在,$push 会自动创建它。 + ) + + # 记录日志(无论修改与否,因为可能是因为组已存在但无匹配导致没修改) + if update_result.modified_count > 0: + logger.debug(f"为用户 '{user_id_str}' 添加了新群组 '{group_id_str}' 条目和绰号 '{nickname}'。") else: - logger.warning(f"绰号增加操作匹配但未修改用户 '{user_id}' 的绰号 '{nickname}'。更新结果: {update_result.raw_result}") + # 到这里还没成功,可能意味着群组已存在但之前的步骤意外失败,或者有并发问题 + logger.warning(f"未能为用户 '{user_id_str}' 更新或添加群组 '{group_id_str}' 的绰号 '{nickname}'。可能群组已存在但前面的步骤未成功修改。UpdateResult: {update_result.raw_result}") + except OperationFailure as op_err: - logger.error(f"数据库操作失败: 用户 {user_id}, 群组 {group_id_str}, 绰号 {nickname}: {op_err}", exc_info=True) + # 使用 logger.exception 来记录数据库操作错误,自动包含 traceback + logger.exception(f"数据库操作失败: 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") # <--- 修改了日志记录方式 except Exception as e: - logger.error(f"更新用户 {user_id} 的绰号 {nickname} 时发生意外错误: {e}", exc_info=True) - + # 记录其他意外错误 + logger.exception(f"更新用户 {user_id_str} 的绰号 {nickname} 时发生意外错误") # <--- 修改了日志记录方式 # --- 队列和进程 --- nickname_queue: mpQueue = mpQueue(maxsize=global_config.NICKNAME_QUEUE_MAX_SIZE) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index ca56601e..58586c21 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -582,7 +582,7 @@ class HeartFChatting: response_set=reply, send_emoji=emoji_query, ) - + print("消息发送成功,准备进入绰号分析") # --- [新增] 触发绰号分析 --- # 在发送成功后(或至少尝试发送后)触发 await self._trigger_nickname_analysis(anchor_message, reply) diff --git a/src/plugins/person_info/person_info.py b/src/plugins/person_info/person_info.py index 8bafe5eb..6114f181 100644 --- a/src/plugins/person_info/person_info.py +++ b/src/plugins/person_info/person_info.py @@ -51,6 +51,7 @@ person_info_default = { "konw_time": 0, "msg_interval": 2000, "msg_interval_list": [], + "group_nicknames": [], } # 个人信息的各项与默认值在此定义,以下处理会自动创建/补全每一项 diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index 7bdc02be..c8bda8f3 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -5,7 +5,10 @@ from bson.decimal128 import Decimal128 from .person_info import person_info_manager import time import random -from typing import List, Dict, Any, Optional, Tuple # 确保导入了 List, Dict, Optional, Tuple +from typing import List, Dict +# import re +# import traceback + logger = get_logger("relation") @@ -20,13 +23,25 @@ class RelationshipManager: def mood_manager(self): if self._mood_manager is None: from ..moods.moods import MoodManager # 延迟导入 + self._mood_manager = MoodManager.get_instance() return self._mood_manager def positive_feedback_sys(self, label: str, stance: str): """正反馈系统,通过正反馈系数增益情绪变化,根据情绪再影响关系变更""" - positive_list = ["开心", "惊讶", "害羞"] - negative_list = ["愤怒", "悲伤", "恐惧", "厌恶"] + + positive_list = [ + "开心", + "惊讶", + "害羞", + ] + + negative_list = [ + "愤怒", + "悲伤", + "恐惧", + "厌恶", + ] if label in positive_list: if 7 > self.positive_feedback_value >= 0: @@ -66,21 +81,6 @@ class RelationshipManager: is_known = person_info_manager.is_person_known(platform, user_id) return is_known - @staticmethod - async def is_qved_name(platform, user_id): - """判断是否已经命名""" - person_id = person_info_manager.get_person_id(platform, user_id) - # 优化:直接检查 person_name 字段是否存在且不为 None 或空字符串 - person_name = await person_info_manager.get_value(person_id, "person_name") - return bool(person_name) # 如果 person_name 非空则返回 True - - @staticmethod - async def get_person_name(platform: str, user_id: str) -> Optional[str]: - """获取单个用户的 person_name""" - person_id = person_info_manager.get_person_id(platform, str(user_id)) # 确保 user_id 是字符串 - return await person_info_manager.get_value(person_id, "person_name") - - # --- [新增] 批量获取用户名称 --- @staticmethod async def get_person_names_batch(platform: str, user_ids: List[str]) -> Dict[str, str]: """ @@ -186,169 +186,272 @@ class RelationshipManager: logger.error(f"Error during batch get group nicknames: {e}", exc_info=True) return nicknames_data - # --- 结束新增 --- + @staticmethod + async def is_qved_name(platform, user_id): + """判断是否认识某人""" + person_id = person_info_manager.get_person_id(platform, user_id) + is_qved = await person_info_manager.has_one_field(person_id, "person_name") + old_name = await person_info_manager.get_value(person_id, "person_name") + # print(f"old_name: {old_name}") + # print(f"is_qved: {is_qved}") + if is_qved and old_name is not None: + return True + else: + return False @staticmethod async def first_knowing_some_one(platform, user_id, user_nickname, user_cardname, user_avatar): - """初次认识某人或更新信息""" + """判断是否认识某人""" person_id = person_info_manager.get_person_id(platform, user_id) - # 首次认识时,除了更新 nickname,也应该设置初始关系值等 - initial_data = { - "platform": platform, - "user_id": user_id, - "nickname": user_nickname, - "konw_time": int(time.time()), - "relationship_value": 0.0, # 设置初始关系值为 0 - "msg_interval": -1, # 初始消息间隔设为 -1 或其他标记 - "msg_interval_list": [], - "group_nickname": [] # 初始化为空列表 - } - # 使用 update_one 并结合 $setOnInsert 来避免覆盖已有数据 - await person_info_manager.collection.update_one( - {"person_id": person_id}, - { - "$set": {"nickname": user_nickname}, # 总是更新 nickname - "$setOnInsert": initial_data # 仅在插入新文档时设置这些初始值 - }, - upsert=True - ) - # 尝试获取或生成 person_name + await person_info_manager.update_one_field(person_id, "nickname", user_nickname) + # await person_info_manager.update_one_field(person_id, "user_cardname", user_cardname) + # await person_info_manager.update_one_field(person_id, "user_avatar", user_avatar) await person_info_manager.qv_person_name(person_id, user_nickname, user_cardname, user_avatar) - async def calculate_update_relationship_value(self, chat_stream: ChatStream, label: str, stance: str) -> tuple: - """计算并变更关系值""" - stancedict = {"支持": 0, "中立": 1, "反对": 2} + """计算并变更关系值 + 新的关系值变更计算方式: + 将关系值限定在-1000到1000 + 对于关系值的变更,期望: + 1.向两端逼近时会逐渐减缓 + 2.关系越差,改善越难,关系越好,恶化越容易 + 3.人维护关系的精力往往有限,所以当高关系值用户越多,对于中高关系值用户增长越慢 + 4.连续正面或负面情感会正反馈 + + 返回: + 用户昵称,变更值,变更后关系等级 + + """ + stancedict = { + "支持": 0, + "中立": 1, + "反对": 2, + } + valuedict = { - "开心": 1.5, "愤怒": -2.0, "悲伤": -0.5, "惊讶": 0.6, "害羞": 2.0, - "平静": 0.3, "恐惧": -1.5, "厌恶": -1.0, "困惑": 0.5, + "开心": 1.5, + "愤怒": -2.0, + "悲伤": -0.5, + "惊讶": 0.6, + "害羞": 2.0, + "平静": 0.3, + "恐惧": -1.5, + "厌恶": -1.0, + "困惑": 0.5, } person_id = person_info_manager.get_person_id(chat_stream.user_info.platform, chat_stream.user_info.user_id) - data = { # 这个 data 似乎是用于 setOnInsert 的,应该在 first_knowing 时处理 + data = { "platform": chat_stream.user_info.platform, "user_id": chat_stream.user_info.user_id, "nickname": chat_stream.user_info.user_nickname, "konw_time": int(time.time()), } old_value = await person_info_manager.get_value(person_id, "relationship_value") - old_value = self.ensure_float(old_value, person_id) # 确保是 float + old_value = self.ensure_float(old_value, person_id) - # 限制旧值范围 - old_value = max(min(old_value, 1000), -1000) + if old_value > 1000: + old_value = 1000 + elif old_value < -1000: + old_value = -1000 - value_change = 0.0 # 初始化变化量 - base_value = valuedict.get(label, 0.0) # 获取基础情绪值 + value = valuedict[label] + if old_value >= 0: + if valuedict[label] >= 0 and stancedict[stance] != 2: + value = value * math.cos(math.pi * old_value / 2000) + if old_value > 500: + rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) + high_value_count = len(rdict) + if old_value > 700: + value *= 3 / (high_value_count + 2) # 排除自己 + else: + value *= 3 / (high_value_count + 3) + elif valuedict[label] < 0 and stancedict[stance] != 0: + value = value * math.exp(old_value / 2000) + else: + value = 0 + elif old_value < 0: + if valuedict[label] >= 0 and stancedict[stance] != 2: + value = value * math.exp(old_value / 2000) + elif valuedict[label] < 0 and stancedict[stance] != 0: + value = value * math.cos(math.pi * old_value / 2000) + else: + value = 0 - # 应用立场影响和关系值衰减/增强逻辑 - if base_value > 0 and stancedict.get(stance, 1) != 2: # 正面情绪且非反对 - value_change = base_value * math.cos(math.pi * old_value / 2000) - if old_value > 500: # 高关系值增长减缓 - rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) - high_value_count = len(rdict) - # 注意:这里的减缓因子可能需要调整 - value_change *= 3 / (high_value_count + (2 if old_value > 700 else 3)) - elif base_value < 0 and stancedict.get(stance, 1) != 0: # 负面情绪且非支持 - # 关系好时负面影响更大,关系差时负面影响减弱 - value_change = base_value * math.exp(old_value / 2000) if old_value >= 0 else base_value * math.cos(math.pi * old_value / 2000) - # else: 立场冲突或情绪平静,基础变化为 0 - - # 应用正反馈系统和情绪反馈 self.positive_feedback_sys(label, stance) - value_change = self.mood_feedback(value_change) # 应用当前情绪对关系变化的影响 - value_change = self.feedback_to_mood(value_change) # 应用连续反馈对关系变化的影响 + value = self.mood_feedback(value) - new_value = old_value + value_change - # 再次限制新值范围 - new_value = max(min(new_value, 1000), -1000) - actual_change = new_value - old_value # 记录实际变化量 - - level_num = self.calculate_level_num(new_value) + level_num = self.calculate_level_num(old_value + value) relationship_level = ["厌恶", "冷漠", "一般", "友好", "喜欢", "暧昧"] logger.info( - f"用户: {chat_stream.user_info.user_nickname} " + f"用户: {chat_stream.user_info.user_nickname}" f"当前关系: {relationship_level[level_num]}, " f"关系值: {old_value:.2f}, " - f"立场情感: {stance}-{label}, " - f"变更: {actual_change:+.5f}, " - f"新值: {new_value:.2f}" + f"当前立场情感: {stance}-{label}, " + f"变更: {value:+.5f}" ) - # 更新数据库,只更新 relationship_value - await person_info_manager.update_one_field(person_id, "relationship_value", new_value) + await person_info_manager.update_one_field(person_id, "relationship_value", old_value + value, data) - return chat_stream.user_info.user_nickname, actual_change, relationship_level[level_num] + return chat_stream.user_info.user_nickname, value, relationship_level[level_num] + async def calculate_update_relationship_value_with_reason( + self, chat_stream: ChatStream, label: str, stance: str, reason: str + ) -> tuple: + """计算并变更关系值 + 新的关系值变更计算方式: + 将关系值限定在-1000到1000 + 对于关系值的变更,期望: + 1.向两端逼近时会逐渐减缓 + 2.关系越差,改善越难,关系越好,恶化越容易 + 3.人维护关系的精力往往有限,所以当高关系值用户越多,对于中高关系值用户增长越慢 + 4.连续正面或负面情感会正反馈 + + 返回: + 用户昵称,变更值,变更后关系等级 + + """ + stancedict = { + "支持": 0, + "中立": 1, + "反对": 2, + } + + valuedict = { + "开心": 1.5, + "愤怒": -2.0, + "悲伤": -0.5, + "惊讶": 0.6, + "害羞": 2.0, + "平静": 0.3, + "恐惧": -1.5, + "厌恶": -1.0, + "困惑": 0.5, + } + + person_id = person_info_manager.get_person_id(chat_stream.user_info.platform, chat_stream.user_info.user_id) + data = { + "platform": chat_stream.user_info.platform, + "user_id": chat_stream.user_info.user_id, + "nickname": chat_stream.user_info.user_nickname, + "konw_time": int(time.time()), + } + old_value = await person_info_manager.get_value(person_id, "relationship_value") + old_value = self.ensure_float(old_value, person_id) + + if old_value > 1000: + old_value = 1000 + elif old_value < -1000: + old_value = -1000 + + value = valuedict[label] + if old_value >= 0: + if valuedict[label] >= 0 and stancedict[stance] != 2: + value = value * math.cos(math.pi * old_value / 2000) + if old_value > 500: + rdict = await person_info_manager.get_specific_value_list("relationship_value", lambda x: x > 700) + high_value_count = len(rdict) + if old_value > 700: + value *= 3 / (high_value_count + 2) # 排除自己 + else: + value *= 3 / (high_value_count + 3) + elif valuedict[label] < 0 and stancedict[stance] != 0: + value = value * math.exp(old_value / 2000) + else: + value = 0 + elif old_value < 0: + if valuedict[label] >= 0 and stancedict[stance] != 2: + value = value * math.exp(old_value / 2000) + elif valuedict[label] < 0 and stancedict[stance] != 0: + value = value * math.cos(math.pi * old_value / 2000) + else: + value = 0 + + self.positive_feedback_sys(label, stance) + value = self.mood_feedback(value) + + level_num = self.calculate_level_num(old_value + value) + relationship_level = ["厌恶", "冷漠", "一般", "友好", "喜欢", "暧昧"] + logger.info( + f"用户: {chat_stream.user_info.user_nickname}" + f"当前关系: {relationship_level[level_num]}, " + f"关系值: {old_value:.2f}, " + f"当前立场情感: {stance}-{label}, " + f"变更: {value:+.5f}" + ) + + await person_info_manager.update_one_field(person_id, "relationship_value", old_value + value, data) + + return chat_stream.user_info.user_nickname, value, relationship_level[level_num] async def build_relationship_info(self, person, is_id: bool = False) -> str: - """构建用于 Prompt 的关系信息字符串""" if is_id: person_id = person - # 如果只有 person_id,需要反查 platform 和 user_id 来获取 person_name - # 这依赖于 person_id 的格式,假设是 platform_userid - try: - platform, user_id_str = person_id.split("_", 1) - person_name = await self.get_person_name(platform, user_id_str) - except ValueError: - logger.warning(f"Invalid person_id format for prompt building: {person_id}") - person_name = None else: # print(f"person: {person}") person_id = person_info_manager.get_person_id(person[0], person[1]) person_name = await person_info_manager.get_value(person_id, "person_name") # print(f"person_name: {person_name}") relationship_value = await person_info_manager.get_value(person_id, "relationship_value") - relationship_value = self.ensure_float(relationship_value, person_id) # 确保是 float level_num = self.calculate_level_num(relationship_value) - # 定义关系等级和对应的行为描述 - relationship_levels = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] - relation_prompt_list = ["忽视的回应", "冷淡回复", "保持理性", "愿意回复", "积极回复", "友善和包容的回复"] - - # 根据等级和随机性决定是否输出及输出内容 - if level_num == 2: # "一般"关系不特别提示 + if level_num == 0 or level_num == 5: + relationship_level = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] + relation_prompt2_list = [ + "忽视的回应", + "冷淡回复", + "保持理性", + "愿意回复", + "积极回复", + "友善和包容的回复", + ] + return f"你{relationship_level[level_num]}{person_name},打算{relation_prompt2_list[level_num]}。\n" + elif level_num == 2: return "" - elif level_num in [0, 5] or random.random() < 0.6: # 极好/极差 或 60% 概率 - # 修正索引,确保在列表范围内 - level_idx = max(0, min(level_num, len(relationship_levels) - 1)) - prompt_idx = max(0, min(level_num, len(relation_prompt_list) - 1)) - return f"你{relationship_levels[level_idx]}{person_name},打算{relation_prompt_list[prompt_idx]}。\n" else: - return "" + if random.random() < 0.6: + relationship_level = ["厌恶", "冷漠以对", "认识", "友好对待", "喜欢", "暧昧"] + relation_prompt2_list = [ + "忽视的回应", + "冷淡回复", + "保持理性", + "愿意回复", + "积极回复", + "友善和包容的回复", + ] + return f"你{relationship_level[level_num]}{person_name},打算{relation_prompt2_list[level_num]}。\n" + else: + return "" @staticmethod def calculate_level_num(relationship_value) -> int: """关系等级计算""" - # 确保 value 是 float - try: - value = float(relationship_value.to_decimal() if isinstance(relationship_value, Decimal128) else relationship_value) - except (ValueError, TypeError, AttributeError): - value = 0.0 # 转换失败默认为 0 - - # 阈值判断 - if value < -227: return 0 - elif value < -73: return 1 - elif value < 227: return 2 - elif value < 587: return 3 - elif value < 900: return 4 - else: return 5 # >= 900 + if -1000 <= relationship_value < -227: + level_num = 0 + elif -227 <= relationship_value < -73: + level_num = 1 + elif -73 <= relationship_value < 227: + level_num = 2 + elif 227 <= relationship_value < 587: + level_num = 3 + elif 587 <= relationship_value < 900: + level_num = 4 + elif 900 <= relationship_value <= 1000: + level_num = 5 + else: + level_num = 5 if relationship_value > 1000 else 0 + return level_num @staticmethod def ensure_float(value, person_id): """确保返回浮点数,转换失败返回0.0""" - if isinstance(value, (float, int)): # 直接处理 float 和 int - return float(value) + if isinstance(value, float): + return value try: - # 尝试处理 Decimal128 或其他可转换为 float 的类型 return float(value.to_decimal() if isinstance(value, Decimal128) else value) except (ValueError, TypeError, AttributeError): - logger.warning(f"[关系管理] {person_id} 值转换失败(原始值:{value}),已重置为0") - # 在转换失败时,尝试在数据库中将该字段重置为 0.0 - try: - person_info_manager.update_one_field(person_id, "relationship_value", 0.0) - except Exception as db_err: - logger.error(f"Failed to reset relationship_value for {person_id} in DB: {db_err}") + logger.warning(f"[关系管理] {person_id}值转换失败(原始值:{value}),已重置为0") return 0.0 From 9d28c3660d6f0659c744f02020e328e03eb59fc0 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 22:03:19 +0800 Subject: [PATCH 09/58] =?UTF-8?q?=E6=B5=8B=E8=AF=95prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../group_nickname/nickname_processor.py | 4 +- .../heartFC_chat/heartflow_prompt_builder.py | 40 ++++++++++++++++--- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index ae75851c..669b3094 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,11 +1,10 @@ -# GroupNickname/nickname_processor.py import asyncio import traceback from multiprocessing import Process, Queue as mpQueue, Event from typing import Dict, Optional from pymongo import MongoClient -from pymongo.errors import ConnectionFailure, OperationFailure +from pymongo.errors import OperationFailure from src.common.logger_manager import get_logger # 导入日志管理器 from src.config.config import global_config # 导入全局配置 @@ -179,7 +178,6 @@ async def _nickname_processing_loop(queue: mpQueue, stop_event): logger.info("绰号处理循环已启动。") while not stop_event.is_set(): - logger.info("绰号处理循环正在运行...") try: if not queue.empty(): item = queue.get() diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index eafb2b67..216955c3 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -229,13 +229,8 @@ class PromptBuilder: if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: try: group_id = str(chat_stream.group_info.group_id) - # 提取上下文中的用户 ID (需要 message_list_before_now 变量在此可用) - # 假设 message_list_before_now 在此函数作用域内可用 user_ids_in_context = set() - # !!! 注意: 确保 message_list_before_now 在这里是可访问的 !!! - # 如果不是,你需要从 chat_stream 或其他地方重新获取或传递它 - # 假设 message_list_before_now 存在 - if 'message_list_before_now' in locals() or 'message_list_before_now' in globals(): + if message_list_before_now: for msg in message_list_before_now: sender_id = msg.get('sender_id') if sender_id: @@ -259,6 +254,7 @@ class PromptBuilder: except Exception as e: logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) + logger.debug(f"-------------------nickname_injection_str_______________________\n{nickname_injection_str}\n\n") prompt = await global_prompt_manager.format_prompt( "heart_flow_prompt", @@ -415,6 +411,38 @@ class PromptBuilder: else: schedule_prompt = "" + # 注入绰号信息 + nickname_injection_str = "" + if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: + try: + group_id = str(chat_stream.group_info.group_id) + user_ids_in_context = set() + if message_list_before_now: + for msg in message_list_before_now: + sender_id = msg.get('sender_id') + if sender_id: + user_ids_in_context.add(str(sender_id)) + else: + logger.warning("Variable 'message_list_before_now' not found for nickname injection in focus prompt.") + + + if user_ids_in_context: + platform = chat_stream.platform + # --- 调用批量获取群组绰号的方法 --- + all_nicknames_data = await relationship_manager.get_users_group_nicknames( + platform, list(user_ids_in_context), group_id + ) + + if all_nicknames_data: + selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) + nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) + if nickname_injection_str: + logger.debug(f"Injecting nickname info into focus prompt:\n{nickname_injection_str}") + + except Exception as e: + logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) + logger.debug(f"-------------------nickname_injection_str_______________________\n{nickname_injection_str}\n\n") + prompt = await global_prompt_manager.format_prompt( "reasoning_prompt_main", relation_prompt=relation_prompt, From 925a5310583dc6dc175aa5458879f63c47e360d5 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Wed, 30 Apr 2025 22:44:39 +0800 Subject: [PATCH 10/58] =?UTF-8?q?=E8=A2=AB=E5=82=AC=E7=A6=BB=E6=95=99?= =?UTF-8?q?=E5=AE=A4=E4=BA=86QAQ=EF=BC=8C=E4=BB=8A=E5=A4=A9=E5=B0=B1?= =?UTF-8?q?=E5=85=88=E5=88=B0=E6=AD=A4=E4=B8=BA=E6=AD=A2=E4=BA=86=EF=BC=8C?= =?UTF-8?q?=E7=8E=B0=E5=9C=A8=E6=98=AF=E8=A7=A3=E5=86=B3prompt=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../heartFC_chat/heartflow_prompt_builder.py | 5 +- .../person_info/relationship_manager.py | 83 +++++++++---------- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 216955c3..afef18c8 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -232,7 +232,7 @@ class PromptBuilder: user_ids_in_context = set() if message_list_before_now: for msg in message_list_before_now: - sender_id = msg.get('sender_id') + sender_id = msg["user_info"].get('user_id') if sender_id: user_ids_in_context.add(str(sender_id)) else: @@ -419,7 +419,8 @@ class PromptBuilder: user_ids_in_context = set() if message_list_before_now: for msg in message_list_before_now: - sender_id = msg.get('sender_id') + print(msg) + sender_id = msg["user_info"].get('user_id') if sender_id: user_ids_in_context.add(str(sender_id)) else: diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index c8bda8f3..846f3408 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -6,6 +6,7 @@ from .person_info import person_info_manager import time import random from typing import List, Dict +from ...common.database import db # import re # import traceback @@ -81,91 +82,81 @@ class RelationshipManager: is_known = person_info_manager.is_person_known(platform, user_id) return is_known + # --- [修改] 使用全局 db 对象进行查询 --- @staticmethod async def get_person_names_batch(platform: str, user_ids: List[str]) -> Dict[str, str]: """ 批量获取多个用户的 person_name。 - - Args: - platform (str): 平台名称。 - user_ids (List[str]): 用户 ID 列表。 - - Returns: - Dict[str, str]: 映射 {user_id: person_name},只包含成功获取到名称的用户。 """ if not user_ids: return {} - person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] # 确保 uid 是字符串 + person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] names_map = {} try: - # 使用 $in 操作符批量查询 - cursor = person_info_manager.collection.find( + # --- 修改点:直接使用 db.person_info.find --- + # !!! 确保 'person_info' 是正确的集合名称 !!! + cursor = db.person_info.find( {"person_id": {"$in": person_ids}}, {"_id": 0, "person_id": 1, "person_name": 1} # 只查询需要的字段 ) - async for doc in cursor: - # 从 person_id 反向推导出原始 user_id - # 注意:这依赖于 get_person_id 的实现方式,假设它是 platform_userid 格式 + # --- 结束修改点 --- + + # 注意:pymongo 的 find 返回的是同步游标,如果你的 db 对象是 motor 客户端,需要使用 await cursor.to_list(length=None) + # 假设这里 db 是 pymongo 同步客户端,或者你的环境允许在异步函数中迭代同步游标 + for doc in cursor: # 如果 db 是 motor,这里会报错,需要改为 async for original_user_id = doc.get("person_id", "").split("_", 1)[-1] person_name = doc.get("person_name") if original_user_id and person_name: names_map[original_user_id] = person_name - logger.debug(f"Batch get person names for {len(user_ids)} users, found {len(names_map)} names.") + logger.debug(f"批量获取 {len(user_ids)} 个用户的 person_name,找到 {len(names_map)} 个。") + except AttributeError as e: + # 如果 db 对象没有 person_info 属性,或者 find 方法不存在 + logger.error(f"访问数据库时出错: {e}。请检查 common/database.py 和集合名称。") except Exception as e: - logger.error(f"Error during batch get person names: {e}", exc_info=True) + logger.error(f"批量获取 person_name 时出错: {e}", exc_info=True) return names_map - # --- 结束新增 --- + # --- 结束修改 --- - # --- [新增] 批量获取用户群组绰号 --- + # --- [修改] 使用全局 db 对象进行查询 --- @staticmethod async def get_users_group_nicknames(platform: str, user_ids: List[str], group_id: str) -> Dict[str, List[Dict[str, int]]]: """ 批量获取多个用户在指定群组的绰号信息。 - - Args: - platform (str): 平台名称。 - user_ids (List[str]): 用户 ID 列表。 - group_id (str): 群组 ID。 - - Returns: - Dict[str, List[Dict[str, int]]]: 映射 {person_name: [{"绰号A": 次数}, ...]} - 只包含成功获取到绰号信息的用户。 - 键是用户的 person_name。 """ if not user_ids or not group_id: return {} person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] nicknames_data = {} - group_id_str = str(group_id) # 确保 group_id 是字符串 + group_id_str = str(group_id) try: - # 查询包含目标 person_id 且 group_nickname 字段存在的文档 - cursor = person_info_manager.collection.find( + # --- 修改点:直接使用 db.person_info.find --- + # !!! 确保 'person_info' 是正确的集合名称 !!! + cursor = db.person_info.find( { "person_id": {"$in": person_ids}, - "group_nickname": {"$elemMatch": {group_id_str: {"$exists": True}}} # 确保该群组的条目存在 + "group_nickname": {"$elemMatch": {group_id_str: {"$exists": True}}} }, - {"_id": 0, "person_id": 1, "person_name": 1, "group_nickname": 1} # 查询所需字段 + {"_id": 0, "person_id": 1, "person_name": 1, "group_nickname": 1} ) + # --- 结束修改点 --- - async for doc in cursor: + # 同样,假设同步迭代可行 + for doc in cursor: # 如果 db 是 motor,这里需要改为 async for person_name = doc.get("person_name") - if not person_name: # 如果没有 person_name,则跳过此用户 + if not person_name: continue group_nicknames_list = doc.get("group_nickname", []) user_group_nicknames = [] - # 遍历 group_nickname 列表,找到对应 group_id 的条目 for group_entry in group_nicknames_list: if group_id_str in group_entry and isinstance(group_entry[group_id_str], list): - # 提取该群组的绰号列表 [{"绰号": 次数}, ...] user_group_nicknames = group_entry[group_id_str] - break # 找到后即可退出内层循环 + break - if user_group_nicknames: # 确保列表非空 - # 过滤掉格式不正确的条目 + if user_group_nicknames: valid_nicknames = [] for item in user_group_nicknames: if isinstance(item, dict) and len(item) == 1: @@ -173,19 +164,21 @@ class RelationshipManager: if isinstance(key, str) and isinstance(value, int): valid_nicknames.append(item) else: - logger.warning(f"Invalid nickname format in DB for user {person_name}, group {group_id_str}: {item}") + logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号格式无效: {item}") else: - logger.warning(f"Invalid nickname entry format in DB for user {person_name}, group {group_id_str}: {item}") - + logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号条目格式无效: {item}") if valid_nicknames: - nicknames_data[person_name] = valid_nicknames # 使用 person_name 作为 key + nicknames_data[person_name] = valid_nicknames - logger.debug(f"Batch get group nicknames for {len(user_ids)} users in group {group_id_str}, found data for {len(nicknames_data)} users.") + logger.debug(f"批量获取群组 {group_id_str} 中 {len(user_ids)} 个用户的绰号,找到 {len(nicknames_data)} 个用户的数据。") + except AttributeError as e: + logger.error(f"访问数据库时出错: {e}。请检查 common/database.py 和集合名称 'person_info'。") except Exception as e: - logger.error(f"Error during batch get group nicknames: {e}", exc_info=True) + logger.error(f"批量获取群组绰号时出错: {e}", exc_info=True) return nicknames_data + # --- 结束修改 --- @staticmethod async def is_qved_name(platform, user_id): From 9c7d655be646cebfa58016524ed9f4275cd35b74 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 15:53:22 +0800 Subject: [PATCH 11/58] =?UTF-8?q?=E5=91=9C=E5=91=9C=E5=91=9C~=E9=9A=BE?= =?UTF-8?q?=E9=9A=BE=E9=9A=BE~=E5=A4=A7=E6=9B=B4=E6=94=B9=E5=89=8D?= =?UTF-8?q?=E5=85=88=E5=AD=98=E4=B8=AA=E6=A1=A3=EF=BC=8C=E5=87=86=E5=A4=87?= =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=AD=90=E8=BF=9B=E7=A8=8B=E5=88=9D=E5=A7=8B?= =?UTF-8?q?=E5=8C=96=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/database.py | 14 +++ src/plugins/group_nickname/nickname_mapper.py | 62 ++++++---- .../group_nickname/nickname_processor.py | 115 +++++++++++------- src/plugins/heartFC_chat/heartFC_chat.py | 9 +- .../person_info/relationship_manager.py | 73 ++++++----- 5 files changed, 166 insertions(+), 107 deletions(-) diff --git a/src/common/database.py b/src/common/database.py index ee0ead0b..17a71709 100644 --- a/src/common/database.py +++ b/src/common/database.py @@ -52,6 +52,20 @@ class DBWrapper: def __getitem__(self, key): return get_db()[key] +def close_db(): + """关闭全局 MongoDB 客户端连接。""" + global _client, _db + if _client: + try: + _client.close() + # print(f"数据库连接已由进程 {os.getpid()} 关闭。") # 可选:添加日志 + except Exception as e: + # print(f"关闭数据库连接时出错: {e}") # 可选:记录关闭错误 + pass # 关闭期间避免程序崩溃 + finally: + # 重置全局变量,以便下次 get_db 能重新连接(如果需要) + _client = None + _db = None # 全局数据库访问点 db: Database = DBWrapper() diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index b5e258a4..3cf2687f 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -1,32 +1,17 @@ import json from typing import Dict, Any, Optional +import asyncio # 可能需要用于锁 + from src.common.logger_manager import get_logger from src.plugins.models.utils_model import LLMRequest # 从全局配置导入 from src.config.config import global_config - logger = get_logger("nickname_mapper") -llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 - try: - # 从全局配置获取模型设置 - model_config = global_config.llm_nickname_mapping - if not model_config or not model_config.get("name"): - logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") - else: - llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", - ) - logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") - - except Exception as e: - logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) - llm_mapper = None +if global_config.ENABLE_NICKNAME_MAPPING: + _llm_mapper_instance: Optional[LLMRequest] = None + _llm_mapper_init_lock = asyncio.Lock() # 使用异步锁,因为下面的函数是 async def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) @@ -68,6 +53,39 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: """ return prompt +async def _get_or_initialize_llm_mapper() -> Optional[LLMRequest]: + """获取或在需要时初始化绰号映射 LLM 的单例。""" + global _llm_mapper_instance + # 双重检查锁定模式(适用于 asyncio) + if _llm_mapper_instance is None: + async with _llm_mapper_init_lock: + # 再次检查,防止在等待锁时其他协程已完成初始化 + if _llm_mapper_instance is None: + logger.info("首次调用,尝试初始化绰号映射 LLM...") + if not global_config.ENABLE_NICKNAME_MAPPING: + logger.info("绰号映射功能已禁用,LLM 初始化跳过。") + # 可以选择返回 None 或者设置一个特殊标记 + # 这里我们假设如果禁用,就不应该尝试使用,所以保持 None + # _llm_mapper_instance = None # 已经是 None + else: + try: + model_config = global_config.llm_nickname_mapping + if not model_config or not model_config.get("name"): + logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") + # 初始化失败,保持 None + else: + _llm_mapper_instance = LLMRequest( + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", + ) + logger.info("绰号映射 LLM 初始化成功。") + except Exception as e: + logger.error(f"初始化绰号映射 LLM 失败: {e}", exc_info=True) + # 初始化失败,保持 None + _llm_mapper_instance = None # 确保显式设置为 None + return _llm_mapper_instance async def analyze_chat_for_nicknames( chat_history_str: str, @@ -83,9 +101,7 @@ async def analyze_chat_for_nicknames( logger.debug("绰号映射功能已禁用。") return {"is_exist": False} - if llm_mapper is None: - logger.error("绰号映射 LLM 未初始化。无法执行分析。") - return {"is_exist": False} + llm_mapper = await _get_or_initialize_llm_mapper() prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 669b3094..2f01492e 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,3 +1,4 @@ +import os import asyncio import traceback from multiprocessing import Process, Queue as mpQueue, Event @@ -9,7 +10,7 @@ from pymongo.errors import OperationFailure from src.common.logger_manager import get_logger # 导入日志管理器 from src.config.config import global_config # 导入全局配置 from .nickname_mapper import analyze_chat_for_nicknames # 导入绰号分析函数 -from src.common.database import db # 导入数据库初始化和关闭函数 +from src.common.database import get_db, close_db logger = get_logger("nickname_processor") # 获取日志记录器实例 # --- 运行时状态 (用于安全停止进程) --- @@ -20,48 +21,37 @@ mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 person_info_collection = None # 用户信息集合对象 # --- 数据库更新逻辑 (使用推荐的新结构) --- -async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): +async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str], current_db): """ 更新数据库中用户的群组绰号计数。 - 使用新的数据结构: - { - "user_id": 12345, - "group_nicknames": [ # <--- 字段名统一为 group_nicknames - { - "group_id": "群号1", - "nicknames": [ { "name": "绰号A", "count": 5 }, ... ] - }, ... - ] - } + 使用传入的数据库实例。 """ - person_info_collection = db.person_info # 获取集合对象 + # 从传入的 db 实例获取 collection + person_info_collection = current_db.person_info # <--- 使用 current_db if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。") return - logger.info(f"尝试更新群组 '{group_id}' 的绰号计数 (新结构),映射为: {nickname_map}") + logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") - for user_id_str, nickname in nickname_map.items(): # user_id 从 map 中取出是 str + for user_id_str, nickname in nickname_map.items(): if not user_id_str or not nickname: logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id_str}', nickname='{nickname}'") continue - group_id_str = str(group_id) # 确保 group_id 是字符串 + group_id_str = str(group_id) try: - # 假设数据库中存储的用户ID是整数类型,如果不是请移除 int() user_id_int = int(user_id_str) except ValueError: logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") continue try: - # 步骤 1: 确保用户文档存在,且有 group_nicknames 字段 (如果不存在则添加空数组) - # 注意:这里不再使用 $setOnInsert 添加 group_nicknames,因为 $addToSet 或 $push 在字段不存在时会自动创建。 - # upsert=True 确保用户文档存在。 + # 确保后续所有的数据库操作都使用从 current_db 获取的 person_info_collection person_info_collection.update_one( {"user_id": user_id_int}, - {"$setOnInsert": {"user_id": user_id_int}}, # 确保 upsert 时 user_id 被正确设置 + {"$setOnInsert": {"user_id": user_id_int}}, upsert=True ) # 确保 group_nicknames 字段存在且为数组 (如果不存在则创建) @@ -71,7 +61,7 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): ) - # 步骤 2: 尝试直接增加现有绰号的计数 + # 尝试直接增加现有绰号的计数 # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配,且该元素的 nicknames 数组中存在一个元素的 name 匹配 update_result = person_info_collection.update_one( { @@ -93,14 +83,14 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): logger.debug(f"用户 '{user_id_str}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") continue # 处理完成,进行下一次循环 - # 步骤 3: 如果步骤 2 未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 + # 如果未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配 update_result = person_info_collection.update_one( { "user_id": user_id_int, - "group_nicknames.group_id": group_id_str # <--- 确保使用 group_nicknames + "group_nicknames.group_id": group_id_str }, - { # <--- 确保使用 group_nicknames + { "$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}} }, array_filters=[ @@ -112,15 +102,15 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): logger.debug(f"为用户 '{user_id_str}' 在群组 '{group_id_str}' 中添加了新绰号 '{nickname}',计数为 1。") continue # 处理完成,进行下一次循环 - # 步骤 4: 如果步骤 2 和 3 都未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 + # 如果未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 # 条件:用户存在,且 group_nicknames 数组中 *不包含* 指定 group_id 的元素 update_result = person_info_collection.update_one( { "user_id": user_id_int, - "group_nicknames.group_id": {"$ne": group_id_str} # <--- 检查 group_id 是否不存在 + "group_nicknames.group_id": {"$ne": group_id_str} }, { - "$push": { # <--- 确保使用 group_nicknames + "$push": { "group_nicknames": { "group_id": group_id_str, "nicknames": [{"name": nickname, "count": 1}] @@ -172,49 +162,84 @@ async def add_to_nickname_queue( logger.warning(f"无法将项目添加到绰号队列(可能已满): {e}", exc_info=True) -async def _nickname_processing_loop(queue: mpQueue, stop_event): - """独立进程中的主循环,处理队列任务。""" - - logger.info("绰号处理循环已启动。") +async def _nickname_processing_loop(queue: mpQueue, stop_event, current_db): + """独立进程中的主循环,处理队列任务,使用传入的数据库连接。""" + pid = os.getpid() # 获取进程ID用于日志 + logger.info(f"绰号处理循环已启动 (PID: {pid})。 使用数据库: {current_db.name}") while not stop_event.is_set(): try: if not queue.empty(): + # 或者使用 queue.get(timeout=...) 来避免忙等待,并处理 Empty 异常 item = queue.get() if isinstance(item, tuple) and len(item) == 4: chat_history_str, bot_reply, group_id, user_name_map = item - logger.debug(f"正在处理群组 {group_id} 的绰号映射任务...") + logger.debug(f"(PID: {pid}) 正在处理群组 {group_id} 的绰号映射任务...") analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) if analysis_result.get("is_exist") and analysis_result.get("data"): - await update_nickname_counts(group_id, analysis_result["data"]) + # 将数据库实例传递下去 + await update_nickname_counts(group_id, analysis_result["data"], current_db) else: - logger.warning(f"从队列接收到意外的项目类型: {type(item)}") - - await asyncio.sleep(5) + logger.warning(f"(PID: {pid}) 从队列接收到意外的项目类型: {type(item)}") + # 处理完一个任务后短暂休眠,避免CPU空转 + await asyncio.sleep(0.1) else: + # 队列为空时,休眠更长时间 await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) except asyncio.CancelledError: - logger.info("绰号处理循环已取消。") + logger.info(f"绰号处理循环已取消 (PID: {pid})。") break except Exception as e: - logger.error(f"绰号处理循环出错: {e}\n{traceback.format_exc()}") - await asyncio.sleep(5) + logger.error(f"(PID: {pid}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") + await asyncio.sleep(5) # 出错后等待一段时间 - logger.info("绰号处理循环已结束。") + logger.info(f"绰号处理循环已结束 (PID: {pid})。") def _run_processor_process(queue: mpQueue, stop_event): - """进程启动函数,运行异步循环。""" + """进程启动函数,管理自己的数据库连接并运行异步循环。""" + db_instance = None # 初始化数据库实例变量 + loop = None + pid = os.getpid() + logger.info(f"绰号处理器进程启动中 (PID: {pid})...") + try: + # 调用 get_db() 会触发此进程的懒加载逻辑 + logger.info(f"子进程 (PID: {pid}) - 即将调用 get_db()") + db_instance = get_db() + logger.info(f"子进程 (PID: {pid}) - 完成 get_db(), 连接到数据库: {db_instance.name}") + logger.info(f"绰号处理器进程 (PID: {pid}) 已获取数据库连接: {db_instance.name}") + loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - loop.run_until_complete(_nickname_processing_loop(queue, stop_event)) - loop.close() + # 将获取到的数据库实例传递给异步循环 + logger.info(f"子进程 (PID: {pid}) - 即将运行 _nickname_processing_loop") + loop.run_until_complete(_nickname_processing_loop(queue, stop_event, db_instance)) + logger.info(f"子进程 (PID: {pid}) - 完成 _nickname_processing_loop") + except Exception as e: - logger.error(f"运行绰号处理器进程时出错: {e}", exc_info=True) + logger.error(f"(PID: {pid}) 运行绰号处理器进程时出错: {e}", exc_info=True) + finally: + # --- 清理工作 --- + if loop: + try: + # 关闭事件循环 + if loop.is_running(): + loop.stop() # 先停止 + loop.close() + logger.info(f"(PID: {pid}) asyncio 事件循环已关闭。") + except Exception as loop_close_err: + logger.error(f"(PID: {pid}) 关闭 asyncio 事件循环时出错: {loop_close_err}", exc_info=True) + + try: + close_db() + logger.info(f"(PID: {pid}) 数据库连接已通过 close_db() 关闭。") + except Exception as db_close_err: + logger.error(f"(PID: {pid}) 关闭数据库连接时出错: {db_close_err}", exc_info=True) + logger.info(f"绰号处理器进程已结束 (PID: {pid})。") def start_nickname_processor(): """启动绰号映射处理进程。""" diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 0ad2e061..a707b3a7 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -737,7 +737,7 @@ class HeartFChatting: # 4. 获取当前上下文中涉及的用户 ID 及其已知名称 user_ids_in_history = set() for msg in history_messages: - sender_id = msg.get('sender_id') + sender_id = msg["user_info"].get('user_id') if sender_id: user_ids_in_history.add(str(sender_id)) # 确保是字符串 @@ -747,13 +747,6 @@ class HeartFChatting: try: names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) - except AttributeError: - logger.warning("relationship_manager does not have get_person_names_batch method. Falling back to single lookups.") - names_data = {} - for user_id in user_ids_in_history: - name = await relationship_manager.get_person_name(platform, user_id) - if name: - names_data[user_id] = name except Exception as e: logger.error(f"Error getting person names: {e}", exc_info=True) names_data = {} # 出错时置空 diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index 846f3408..fe229b2c 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -118,57 +118,69 @@ class RelationshipManager: return names_map # --- 结束修改 --- - # --- [修改] 使用全局 db 对象进行查询 --- @staticmethod async def get_users_group_nicknames(platform: str, user_ids: List[str], group_id: str) -> Dict[str, List[Dict[str, int]]]: """ 批量获取多个用户在指定群组的绰号信息。 + + Args: + platform (str): 平台名称。 + user_ids (List[str]): 用户 ID 列表。 + group_id (str): 群组 ID。 + + Returns: + Dict[str, List[Dict[str, int]]]: 映射 {person_name: [{"绰号A": 次数}, ...]} """ if not user_ids or not group_id: return {} person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] nicknames_data = {} - group_id_str = str(group_id) + group_id_str = str(group_id) # 确保 group_id 是字符串 try: - # --- 修改点:直接使用 db.person_info.find --- - # !!! 确保 'person_info' 是正确的集合名称 !!! + # 查询包含目标 person_id 的文档 cursor = db.person_info.find( - { - "person_id": {"$in": person_ids}, - "group_nickname": {"$elemMatch": {group_id_str: {"$exists": True}}} - }, - {"_id": 0, "person_id": 1, "person_name": 1, "group_nickname": 1} + {"person_id": {"$in": person_ids}}, + {"_id": 0, "person_id": 1, "person_name": 1, "group_nicknames": 1} # 查询所需字段 ) - # --- 结束修改点 --- - # 同样,假设同步迭代可行 - for doc in cursor: # 如果 db 是 motor,这里需要改为 async for + # 假设同步迭代可行 + for doc in cursor: person_name = doc.get("person_name") if not person_name: - continue + continue # 跳过没有 person_name 的用户 - group_nicknames_list = doc.get("group_nickname", []) - user_group_nicknames = [] + group_nicknames_list = doc.get("group_nicknames", []) # 获取 group_nicknames 数组 + target_group_nicknames = [] # 存储目标群组的绰号列表 + + # 遍历 group_nicknames 数组,查找匹配的 group_id for group_entry in group_nicknames_list: - if group_id_str in group_entry and isinstance(group_entry[group_id_str], list): - user_group_nicknames = group_entry[group_id_str] - break + # 确保 group_entry 是字典且包含 group_id 键 + if isinstance(group_entry, dict) and group_entry.get("group_id") == group_id_str: + # 提取 nicknames 列表 + nicknames_raw = group_entry.get("nicknames", []) + if isinstance(nicknames_raw, list): + target_group_nicknames = nicknames_raw + break # 找到匹配的 group_id 后即可退出内层循环 - if user_group_nicknames: - valid_nicknames = [] - for item in user_group_nicknames: - if isinstance(item, dict) and len(item) == 1: - key, value = list(item.items())[0] - if isinstance(key, str) and isinstance(value, int): - valid_nicknames.append(item) - else: - logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号格式无效: {item}") + # 如果找到了目标群组的绰号列表 + if target_group_nicknames: + valid_nicknames_formatted = [] # 存储格式化后的绰号 + for item in target_group_nicknames: + # 校验每个绰号条目的格式 { "name": str, "count": int } + if isinstance(item, dict) and \ + isinstance(item.get("name"), str) and \ + isinstance(item.get("count"), int) and \ + item["count"] > 0: # 确保 count 是正整数 + # --- 格式转换:从 { "name": "xxx", "count": y } 转为 { "xxx": y } --- + valid_nicknames_formatted.append({item["name"]: item["count"]}) + # --- 结束格式转换 --- else: - logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号条目格式无效: {item}") - if valid_nicknames: - nicknames_data[person_name] = valid_nicknames + logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号格式无效或 count <= 0: {item}") + + if valid_nicknames_formatted: # 如果存在有效的、格式化后的绰号 + nicknames_data[person_name] = valid_nicknames_formatted # 使用 person_name 作为 key logger.debug(f"批量获取群组 {group_id_str} 中 {len(user_ids)} 个用户的绰号,找到 {len(nicknames_data)} 个用户的数据。") @@ -178,7 +190,6 @@ class RelationshipManager: logger.error(f"批量获取群组绰号时出错: {e}", exc_info=True) return nicknames_data - # --- 结束修改 --- @staticmethod async def is_qved_name(platform, user_id): From 290f76c152306a66e1c3586d24fbb7fc4716ad23 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 16:47:23 +0800 Subject: [PATCH 12/58] =?UTF-8?q?=E4=BF=AE=E4=B8=AA=E5=8F=91=E4=B8=8D?= =?UTF-8?q?=E5=87=BA=E4=BF=A1=E6=81=AF=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index a707b3a7..769a35b6 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -1462,9 +1462,9 @@ class HeartFChatting: if not mark_head: mark_head = True first_bot_msg = bot_message # 保存第一个成功发送的消息对象 - await self.heart_fc_sender.type_and_send_message(bot_message, type=False) + await self.heart_fc_sender.type_and_send_message(bot_message, typing=False) else: - await self.heart_fc_sender.type_and_send_message(bot_message, type=True) + await self.heart_fc_sender.type_and_send_message(bot_message, typing=True) reply_message_ids.append(part_message_id) # 记录我们生成的ID From 4de32f2b8678a40dfb805282337daa6de6086173 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 17:42:23 +0800 Subject: [PATCH 13/58] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E8=BE=93=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 2 +- src/plugins/heartFC_chat/heartflow_prompt_builder.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 769a35b6..ebfed17d 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -750,7 +750,7 @@ class HeartFChatting: except Exception as e: logger.error(f"Error getting person names: {e}", exc_info=True) names_data = {} # 出错时置空 - + print(f"\n\nnames_data:\n{names_data}\n\n") for user_id in user_ids_in_history: if user_id in names_data: diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 3ba78063..290a5e35 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -440,7 +440,6 @@ class PromptBuilder: user_ids_in_context = set() if message_list_before_now: for msg in message_list_before_now: - print(msg) sender_id = msg["user_info"].get('user_id') if sender_id: user_ids_in_context.add(str(sender_id)) From 0a21281a4ef620ef6cf4da3e8e2b6c8be13b595a Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 18:09:07 +0800 Subject: [PATCH 14/58] =?UTF-8?q?=E5=91=9C=E5=91=9C=E5=91=9C~=E6=98=AF?= =?UTF-8?q?=E7=BA=BF=E7=A8=8B=E4=B8=8D=E6=98=AF=E8=BF=9B=E7=A8=8B=EF=BC=8C?= =?UTF-8?q?=E7=90=86=E8=A7=A3=E9=94=99=E4=BA=86=E5=96=B5=EF=BC=8C=E4=BA=8B?= =?UTF-8?q?=E5=88=B0=E5=A6=82=E4=BB=8A=EF=BC=8C=E5=85=88=E8=BF=98=E5=8E=9F?= =?UTF-8?q?=E9=83=A8=E5=88=86=E6=96=87=E4=BB=B6=E8=87=B3=E4=B9=8B=E5=89=8D?= =?UTF-8?q?=E7=9A=84=E6=A0=B7=E5=AD=90=E5=90=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/database.py | 14 --- src/plugins/group_nickname/nickname_mapper.py | 62 ++++------ .../group_nickname/nickname_processor.py | 115 +++++++----------- 3 files changed, 68 insertions(+), 123 deletions(-) diff --git a/src/common/database.py b/src/common/database.py index 17a71709..ee0ead0b 100644 --- a/src/common/database.py +++ b/src/common/database.py @@ -52,20 +52,6 @@ class DBWrapper: def __getitem__(self, key): return get_db()[key] -def close_db(): - """关闭全局 MongoDB 客户端连接。""" - global _client, _db - if _client: - try: - _client.close() - # print(f"数据库连接已由进程 {os.getpid()} 关闭。") # 可选:添加日志 - except Exception as e: - # print(f"关闭数据库连接时出错: {e}") # 可选:记录关闭错误 - pass # 关闭期间避免程序崩溃 - finally: - # 重置全局变量,以便下次 get_db 能重新连接(如果需要) - _client = None - _db = None # 全局数据库访问点 db: Database = DBWrapper() diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 3cf2687f..b5e258a4 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -1,17 +1,32 @@ import json from typing import Dict, Any, Optional -import asyncio # 可能需要用于锁 - from src.common.logger_manager import get_logger from src.plugins.models.utils_model import LLMRequest # 从全局配置导入 from src.config.config import global_config + logger = get_logger("nickname_mapper") -if global_config.ENABLE_NICKNAME_MAPPING: - _llm_mapper_instance: Optional[LLMRequest] = None - _llm_mapper_init_lock = asyncio.Lock() # 使用异步锁,因为下面的函数是 async +llm_mapper: Optional[LLMRequest] = None +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 + try: + # 从全局配置获取模型设置 + model_config = global_config.llm_nickname_mapping + if not model_config or not model_config.get("name"): + logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") + else: + llm_mapper = LLMRequest( # <-- LLM 初始化 + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", + ) + logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") + + except Exception as e: + logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) + llm_mapper = None def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) @@ -53,39 +68,6 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: """ return prompt -async def _get_or_initialize_llm_mapper() -> Optional[LLMRequest]: - """获取或在需要时初始化绰号映射 LLM 的单例。""" - global _llm_mapper_instance - # 双重检查锁定模式(适用于 asyncio) - if _llm_mapper_instance is None: - async with _llm_mapper_init_lock: - # 再次检查,防止在等待锁时其他协程已完成初始化 - if _llm_mapper_instance is None: - logger.info("首次调用,尝试初始化绰号映射 LLM...") - if not global_config.ENABLE_NICKNAME_MAPPING: - logger.info("绰号映射功能已禁用,LLM 初始化跳过。") - # 可以选择返回 None 或者设置一个特殊标记 - # 这里我们假设如果禁用,就不应该尝试使用,所以保持 None - # _llm_mapper_instance = None # 已经是 None - else: - try: - model_config = global_config.llm_nickname_mapping - if not model_config or not model_config.get("name"): - logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") - # 初始化失败,保持 None - else: - _llm_mapper_instance = LLMRequest( - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", - ) - logger.info("绰号映射 LLM 初始化成功。") - except Exception as e: - logger.error(f"初始化绰号映射 LLM 失败: {e}", exc_info=True) - # 初始化失败,保持 None - _llm_mapper_instance = None # 确保显式设置为 None - return _llm_mapper_instance async def analyze_chat_for_nicknames( chat_history_str: str, @@ -101,7 +83,9 @@ async def analyze_chat_for_nicknames( logger.debug("绰号映射功能已禁用。") return {"is_exist": False} - llm_mapper = await _get_or_initialize_llm_mapper() + if llm_mapper is None: + logger.error("绰号映射 LLM 未初始化。无法执行分析。") + return {"is_exist": False} prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 2f01492e..669b3094 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,4 +1,3 @@ -import os import asyncio import traceback from multiprocessing import Process, Queue as mpQueue, Event @@ -10,7 +9,7 @@ from pymongo.errors import OperationFailure from src.common.logger_manager import get_logger # 导入日志管理器 from src.config.config import global_config # 导入全局配置 from .nickname_mapper import analyze_chat_for_nicknames # 导入绰号分析函数 -from src.common.database import get_db, close_db +from src.common.database import db # 导入数据库初始化和关闭函数 logger = get_logger("nickname_processor") # 获取日志记录器实例 # --- 运行时状态 (用于安全停止进程) --- @@ -21,37 +20,48 @@ mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 person_info_collection = None # 用户信息集合对象 # --- 数据库更新逻辑 (使用推荐的新结构) --- -async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str], current_db): +async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数。 - 使用传入的数据库实例。 + 使用新的数据结构: + { + "user_id": 12345, + "group_nicknames": [ # <--- 字段名统一为 group_nicknames + { + "group_id": "群号1", + "nicknames": [ { "name": "绰号A", "count": 5 }, ... ] + }, ... + ] + } """ - # 从传入的 db 实例获取 collection - person_info_collection = current_db.person_info # <--- 使用 current_db + person_info_collection = db.person_info # 获取集合对象 if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。") return - logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") + logger.info(f"尝试更新群组 '{group_id}' 的绰号计数 (新结构),映射为: {nickname_map}") - for user_id_str, nickname in nickname_map.items(): + for user_id_str, nickname in nickname_map.items(): # user_id 从 map 中取出是 str if not user_id_str or not nickname: logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id_str}', nickname='{nickname}'") continue - group_id_str = str(group_id) + group_id_str = str(group_id) # 确保 group_id 是字符串 try: + # 假设数据库中存储的用户ID是整数类型,如果不是请移除 int() user_id_int = int(user_id_str) except ValueError: logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") continue try: - # 确保后续所有的数据库操作都使用从 current_db 获取的 person_info_collection + # 步骤 1: 确保用户文档存在,且有 group_nicknames 字段 (如果不存在则添加空数组) + # 注意:这里不再使用 $setOnInsert 添加 group_nicknames,因为 $addToSet 或 $push 在字段不存在时会自动创建。 + # upsert=True 确保用户文档存在。 person_info_collection.update_one( {"user_id": user_id_int}, - {"$setOnInsert": {"user_id": user_id_int}}, + {"$setOnInsert": {"user_id": user_id_int}}, # 确保 upsert 时 user_id 被正确设置 upsert=True ) # 确保 group_nicknames 字段存在且为数组 (如果不存在则创建) @@ -61,7 +71,7 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str], cu ) - # 尝试直接增加现有绰号的计数 + # 步骤 2: 尝试直接增加现有绰号的计数 # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配,且该元素的 nicknames 数组中存在一个元素的 name 匹配 update_result = person_info_collection.update_one( { @@ -83,14 +93,14 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str], cu logger.debug(f"用户 '{user_id_str}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") continue # 处理完成,进行下一次循环 - # 如果未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 + # 步骤 3: 如果步骤 2 未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配 update_result = person_info_collection.update_one( { "user_id": user_id_int, - "group_nicknames.group_id": group_id_str + "group_nicknames.group_id": group_id_str # <--- 确保使用 group_nicknames }, - { + { # <--- 确保使用 group_nicknames "$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}} }, array_filters=[ @@ -102,15 +112,15 @@ async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str], cu logger.debug(f"为用户 '{user_id_str}' 在群组 '{group_id_str}' 中添加了新绰号 '{nickname}',计数为 1。") continue # 处理完成,进行下一次循环 - # 如果未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 + # 步骤 4: 如果步骤 2 和 3 都未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 # 条件:用户存在,且 group_nicknames 数组中 *不包含* 指定 group_id 的元素 update_result = person_info_collection.update_one( { "user_id": user_id_int, - "group_nicknames.group_id": {"$ne": group_id_str} + "group_nicknames.group_id": {"$ne": group_id_str} # <--- 检查 group_id 是否不存在 }, { - "$push": { + "$push": { # <--- 确保使用 group_nicknames "group_nicknames": { "group_id": group_id_str, "nicknames": [{"name": nickname, "count": 1}] @@ -162,84 +172,49 @@ async def add_to_nickname_queue( logger.warning(f"无法将项目添加到绰号队列(可能已满): {e}", exc_info=True) -async def _nickname_processing_loop(queue: mpQueue, stop_event, current_db): - """独立进程中的主循环,处理队列任务,使用传入的数据库连接。""" - pid = os.getpid() # 获取进程ID用于日志 - logger.info(f"绰号处理循环已启动 (PID: {pid})。 使用数据库: {current_db.name}") +async def _nickname_processing_loop(queue: mpQueue, stop_event): + """独立进程中的主循环,处理队列任务。""" + + logger.info("绰号处理循环已启动。") while not stop_event.is_set(): try: if not queue.empty(): - # 或者使用 queue.get(timeout=...) 来避免忙等待,并处理 Empty 异常 item = queue.get() if isinstance(item, tuple) and len(item) == 4: chat_history_str, bot_reply, group_id, user_name_map = item - logger.debug(f"(PID: {pid}) 正在处理群组 {group_id} 的绰号映射任务...") + logger.debug(f"正在处理群组 {group_id} 的绰号映射任务...") analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) if analysis_result.get("is_exist") and analysis_result.get("data"): - # 将数据库实例传递下去 - await update_nickname_counts(group_id, analysis_result["data"], current_db) + await update_nickname_counts(group_id, analysis_result["data"]) else: - logger.warning(f"(PID: {pid}) 从队列接收到意外的项目类型: {type(item)}") - # 处理完一个任务后短暂休眠,避免CPU空转 - await asyncio.sleep(0.1) + logger.warning(f"从队列接收到意外的项目类型: {type(item)}") + + await asyncio.sleep(5) else: - # 队列为空时,休眠更长时间 await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) except asyncio.CancelledError: - logger.info(f"绰号处理循环已取消 (PID: {pid})。") + logger.info("绰号处理循环已取消。") break except Exception as e: - logger.error(f"(PID: {pid}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") - await asyncio.sleep(5) # 出错后等待一段时间 + logger.error(f"绰号处理循环出错: {e}\n{traceback.format_exc()}") + await asyncio.sleep(5) - logger.info(f"绰号处理循环已结束 (PID: {pid})。") + logger.info("绰号处理循环已结束。") def _run_processor_process(queue: mpQueue, stop_event): - """进程启动函数,管理自己的数据库连接并运行异步循环。""" - db_instance = None # 初始化数据库实例变量 - loop = None - pid = os.getpid() - logger.info(f"绰号处理器进程启动中 (PID: {pid})...") - + """进程启动函数,运行异步循环。""" try: - # 调用 get_db() 会触发此进程的懒加载逻辑 - logger.info(f"子进程 (PID: {pid}) - 即将调用 get_db()") - db_instance = get_db() - logger.info(f"子进程 (PID: {pid}) - 完成 get_db(), 连接到数据库: {db_instance.name}") - logger.info(f"绰号处理器进程 (PID: {pid}) 已获取数据库连接: {db_instance.name}") - loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - # 将获取到的数据库实例传递给异步循环 - logger.info(f"子进程 (PID: {pid}) - 即将运行 _nickname_processing_loop") - loop.run_until_complete(_nickname_processing_loop(queue, stop_event, db_instance)) - logger.info(f"子进程 (PID: {pid}) - 完成 _nickname_processing_loop") - + loop.run_until_complete(_nickname_processing_loop(queue, stop_event)) + loop.close() except Exception as e: - logger.error(f"(PID: {pid}) 运行绰号处理器进程时出错: {e}", exc_info=True) - finally: - # --- 清理工作 --- - if loop: - try: - # 关闭事件循环 - if loop.is_running(): - loop.stop() # 先停止 - loop.close() - logger.info(f"(PID: {pid}) asyncio 事件循环已关闭。") - except Exception as loop_close_err: - logger.error(f"(PID: {pid}) 关闭 asyncio 事件循环时出错: {loop_close_err}", exc_info=True) - - try: - close_db() - logger.info(f"(PID: {pid}) 数据库连接已通过 close_db() 关闭。") - except Exception as db_close_err: - logger.error(f"(PID: {pid}) 关闭数据库连接时出错: {db_close_err}", exc_info=True) - logger.info(f"绰号处理器进程已结束 (PID: {pid})。") + logger.error(f"运行绰号处理器进程时出错: {e}", exc_info=True) def start_nickname_processor(): """启动绰号映射处理进程。""" From f042f7b689f6a75655d429beecfe83fe806cc638 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 19:09:47 +0800 Subject: [PATCH 15/58] =?UTF-8?q?=E7=BA=BF=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 18 +- .../group_nickname/nickname_processor.py | 350 +++++++----------- 2 files changed, 146 insertions(+), 222 deletions(-) diff --git a/bot.py b/bot.py index 32a33e33..c50f939c 100644 --- a/bot.py +++ b/bot.py @@ -221,15 +221,6 @@ def raw_main(): env_config = {key: os.getenv(key) for key in os.environ} scan_provider(env_config) - # 在这里启动绰号处理进程 - logger.info("准备启动绰号处理进程...") - start_nickname_processor() # <--- 添加启动调用 - logger.info("已调用启动绰号处理进程。") - - # 注册退出处理函数 (确保进程能被关闭) - atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 - logger.info("已注册绰号处理进程的退出处理程序。") - # 返回MainSystem实例 return MainSystem() @@ -239,6 +230,15 @@ if __name__ == "__main__": # 获取MainSystem实例 main_system = raw_main() + # 在这里启动绰号处理进程 + logger.info("准备启动绰号处理线程...") + start_nickname_processor() # <--- 添加启动调用 + logger.info("已调用启动绰号处理线程。") + + # 注册退出处理函数 (确保进程能被关闭) + atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 + logger.info("已注册绰号处理线程的退出处理程序。") + # 创建事件循环 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 669b3094..1d25b874 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,155 +1,68 @@ +# nickname_processor.py (多线程版本 - 使用全局 config) + import asyncio import traceback -from multiprocessing import Process, Queue as mpQueue, Event -from typing import Dict, Optional +import threading +import queue +from typing import Dict, Optional, Any +# 数据库和日志导入 from pymongo import MongoClient from pymongo.errors import OperationFailure +from src.common.logger_manager import get_logger +from src.common.database import db # 使用全局 db -from src.common.logger_manager import get_logger # 导入日志管理器 -from src.config.config import global_config # 导入全局配置 -from .nickname_mapper import analyze_chat_for_nicknames # 导入绰号分析函数 -from src.common.database import db # 导入数据库初始化和关闭函数 +logger = get_logger("nickname_processor") -logger = get_logger("nickname_processor") # 获取日志记录器实例 -# --- 运行时状态 (用于安全停止进程) --- -_stop_event = Event() +# --- 恢复导入全局 config --- +try: + from src.config.config import global_config # <--- 直接导入全局配置 +except ImportError: + logger.critical("无法导入 global_config!") + global_config = None # 设置为 None +# --------------------------- -# --- 数据库连接 --- -mongo_client: Optional[MongoClient] = None # MongoDB 客户端实例 -person_info_collection = None # 用户信息集合对象 +# 绰号分析函数导入 +from .nickname_mapper import analyze_chat_for_nicknames -# --- 数据库更新逻辑 (使用推荐的新结构) --- +# --- 使用 threading.Event --- +_stop_event = threading.Event() +# -------------------------- + +# --- 数据库更新逻辑 (使用全局 db) --- async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): - """ - 更新数据库中用户的群组绰号计数。 - 使用新的数据结构: - { - "user_id": 12345, - "group_nicknames": [ # <--- 字段名统一为 group_nicknames - { - "group_id": "群号1", - "nicknames": [ { "name": "绰号A", "count": 5 }, ... ] - }, ... - ] - } - """ - person_info_collection = db.person_info # 获取集合对象 - - if not nickname_map: - logger.debug("提供的用于更新的绰号映射为空。") - return - - logger.info(f"尝试更新群组 '{group_id}' 的绰号计数 (新结构),映射为: {nickname_map}") - - for user_id_str, nickname in nickname_map.items(): # user_id 从 map 中取出是 str - if not user_id_str or not nickname: - logger.warning(f"跳过绰号映射中的无效条目: user_id='{user_id_str}', nickname='{nickname}'") - continue - - group_id_str = str(group_id) # 确保 group_id 是字符串 + """更新数据库中用户的群组绰号计数 (使用全局 db)""" + person_info_collection = db.person_info + # ... (函数体保持不变, 参考之前的版本) ... + if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。"); return + logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") + for user_id_str, nickname in nickname_map.items(): + if not user_id_str or not nickname: logger.warning(f"跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'"); continue + group_id_str = str(group_id) + try: user_id_int = int(user_id_str) + except ValueError: logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。"); continue try: - # 假设数据库中存储的用户ID是整数类型,如果不是请移除 int() - user_id_int = int(user_id_str) - except ValueError: - logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") - continue - - try: - # 步骤 1: 确保用户文档存在,且有 group_nicknames 字段 (如果不存在则添加空数组) - # 注意:这里不再使用 $setOnInsert 添加 group_nicknames,因为 $addToSet 或 $push 在字段不存在时会自动创建。 - # upsert=True 确保用户文档存在。 - person_info_collection.update_one( - {"user_id": user_id_int}, - {"$setOnInsert": {"user_id": user_id_int}}, # 确保 upsert 时 user_id 被正确设置 - upsert=True - ) - # 确保 group_nicknames 字段存在且为数组 (如果不存在则创建) - person_info_collection.update_one( - {"user_id": user_id_int, "group_nicknames": {"$exists": False}}, - {"$set": {"group_nicknames": []}} - ) + person_info_collection.update_one({"user_id": user_id_int},{"$setOnInsert": {"user_id": user_id_int}}, upsert=True) + person_info_collection.update_one({"user_id": user_id_int, "group_nicknames": {"$exists": False}}, {"$set": {"group_nicknames": []}}) + update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames": {"$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname}}}, {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, array_filters=[{"group.group_id": group_id_str}, {"nick.name": nickname}]) + if update_result.modified_count > 0: continue + update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames.group_id": group_id_str}, {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, array_filters=[{"group.group_id": group_id_str}]) + if update_result.modified_count > 0: continue + update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames.group_id": {"$ne": group_id_str}}, {"$push": {"group_nicknames": {"group_id": group_id_str, "nicknames": [{"name": nickname, "count": 1}]}}}) + except OperationFailure as op_err: logger.exception(f"数据库操作失败: 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") + except Exception as e: logger.exception(f"更新用户 {user_id_str} 的绰号 {nickname} 时发生意外错误") - # 步骤 2: 尝试直接增加现有绰号的计数 - # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配,且该元素的 nicknames 数组中存在一个元素的 name 匹配 - update_result = person_info_collection.update_one( - { - "user_id": user_id_int, - "group_nicknames": { # <--- 确保使用 group_nicknames - "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} - } - }, - { # <--- 确保使用 group_nicknames - "$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1} - }, - array_filters=[ - {"group.group_id": group_id_str}, - {"nick.name": nickname} - ] - ) +# --- 使用 queue.Queue --- +# --- 修改:直接使用 global_config --- +queue_max_size = getattr(global_config, 'NICKNAME_QUEUE_MAX_SIZE', 100) if global_config else 100 +# -------------------------------- +nickname_queue: queue.Queue = queue.Queue(maxsize=queue_max_size) +# ---------------------- - if update_result.modified_count > 0: - logger.debug(f"用户 '{user_id_str}' 在群组 '{group_id_str}' 中的绰号 '{nickname}' 计数已增加。") - continue # 处理完成,进行下一次循环 - - # 步骤 3: 如果步骤 2 未修改任何内容,尝试将新绰号添加到现有群组的 nicknames 数组中 - # 条件:用户存在,且 group_nicknames 数组中存在一个元素其 group_id 匹配 - update_result = person_info_collection.update_one( - { - "user_id": user_id_int, - "group_nicknames.group_id": group_id_str # <--- 确保使用 group_nicknames - }, - { # <--- 确保使用 group_nicknames - "$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}} - }, - array_filters=[ - {"group.group_id": group_id_str} - ] - ) - - if update_result.modified_count > 0: - logger.debug(f"为用户 '{user_id_str}' 在群组 '{group_id_str}' 中添加了新绰号 '{nickname}',计数为 1。") - continue # 处理完成,进行下一次循环 - - # 步骤 4: 如果步骤 2 和 3 都未修改任何内容,说明群组条目本身可能不存在于 group_nicknames 数组中,尝试添加新的群组条目 - # 条件:用户存在,且 group_nicknames 数组中 *不包含* 指定 group_id 的元素 - update_result = person_info_collection.update_one( - { - "user_id": user_id_int, - "group_nicknames.group_id": {"$ne": group_id_str} # <--- 检查 group_id 是否不存在 - }, - { - "$push": { # <--- 确保使用 group_nicknames - "group_nicknames": { - "group_id": group_id_str, - "nicknames": [{"name": nickname, "count": 1}] - } - } - } - # 注意:这里不需要 upsert=True,因为步骤1已确保用户存在。 - # 如果字段 group_nicknames 不存在,$push 会自动创建它。 - ) - - # 记录日志(无论修改与否,因为可能是因为组已存在但无匹配导致没修改) - if update_result.modified_count > 0: - logger.debug(f"为用户 '{user_id_str}' 添加了新群组 '{group_id_str}' 条目和绰号 '{nickname}'。") - else: - # 到这里还没成功,可能意味着群组已存在但之前的步骤意外失败,或者有并发问题 - logger.warning(f"未能为用户 '{user_id_str}' 更新或添加群组 '{group_id_str}' 的绰号 '{nickname}'。可能群组已存在但前面的步骤未成功修改。UpdateResult: {update_result.raw_result}") - - - except OperationFailure as op_err: - # 使用 logger.exception 来记录数据库操作错误,自动包含 traceback - logger.exception(f"数据库操作失败: 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") # <--- 修改了日志记录方式 - except Exception as e: - # 记录其他意外错误 - logger.exception(f"更新用户 {user_id_str} 的绰号 {nickname} 时发生意外错误") # <--- 修改了日志记录方式 - -# --- 队列和进程 --- -nickname_queue: mpQueue = mpQueue(maxsize=global_config.NICKNAME_QUEUE_MAX_SIZE) -_nickname_process: Optional[Process] = None +_nickname_thread: Optional[threading.Thread] = None +# --- add_to_nickname_queue (使用全局 config) --- async def add_to_nickname_queue( chat_history_str: str, bot_reply: str, @@ -157,111 +70,122 @@ async def add_to_nickname_queue( user_name_map: Dict[str, str] ): """将需要分析的数据放入队列。""" - if not global_config.ENABLE_NICKNAME_MAPPING: + # --- 修改:使用全局 config --- + if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: + # --------------------------- return - - if group_id is None: - logger.debug("私聊跳过绰号映射。") - return - + if group_id is None: logger.debug("私聊跳过绰号映射。"); return try: item = (chat_history_str, bot_reply, str(group_id), user_name_map) nickname_queue.put_nowait(item) - logger.debug(f"已将项目添加到群组 {group_id} 的绰号队列。") - except Exception as e: - logger.warning(f"无法将项目添加到绰号队列(可能已满): {e}", exc_info=True) + logger.debug(f"已将项目添加到群组 {group_id} 的绰号队列。当前大小: {nickname_queue.qsize()}") + except queue.Full: logger.warning(f"无法将项目添加到绰号队列:队列已满 (maxsize={nickname_queue.maxsize})。") + except Exception as e: logger.warning(f"无法将项目添加到绰号队列: {e}", exc_info=True) -async def _nickname_processing_loop(queue: mpQueue, stop_event): - """独立进程中的主循环,处理队列任务。""" - - logger.info("绰号处理循环已启动。") +# --- _nickname_processing_loop (使用全局 config) --- +async def _nickname_processing_loop(q: queue.Queue, stop_event: threading.Event): + """独立线程中的主循环,处理队列任务 (使用全局 db 和 config)。""" + thread_id = threading.get_ident() + logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") + # --- 修改:使用全局 config --- + sleep_interval = getattr(global_config, 'NICKNAME_PROCESS_SLEEP_INTERVAL', 0.5) if global_config else 0.5 + # --------------------------- while not stop_event.is_set(): try: - if not queue.empty(): - item = queue.get() - if isinstance(item, tuple) and len(item) == 4: - chat_history_str, bot_reply, group_id, user_name_map = item - logger.debug(f"正在处理群组 {group_id} 的绰号映射任务...") - - analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) - - if analysis_result.get("is_exist") and analysis_result.get("data"): - await update_nickname_counts(group_id, analysis_result["data"]) - else: - logger.warning(f"从队列接收到意外的项目类型: {type(item)}") - - await asyncio.sleep(5) + item = q.get(block=True, timeout=sleep_interval) + if isinstance(item, tuple) and len(item) == 4: + chat_history_str, bot_reply, group_id, user_name_map = item + logger.debug(f"(线程 ID: {thread_id}) 正在处理群组 {group_id} 的绰号映射任务...") + # analyze_chat_for_nicknames 内部也应使用 global_config + analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) + if analysis_result.get("is_exist") and analysis_result.get("data"): + await update_nickname_counts(group_id, analysis_result["data"]) else: - await asyncio.sleep(global_config.NICKNAME_PROCESS_SLEEP_INTERVAL) - - except asyncio.CancelledError: - logger.info("绰号处理循环已取消。") - break - except Exception as e: - logger.error(f"绰号处理循环出错: {e}\n{traceback.format_exc()}") - await asyncio.sleep(5) - - logger.info("绰号处理循环已结束。") + logger.warning(f"(线程 ID: {thread_id}) 从队列接收到意外的项目类型: {type(item)}") + q.task_done() + except queue.Empty: continue + except asyncio.CancelledError: logger.info(f"绰号处理循环已取消 (线程 ID: {thread_id})。"); break + except Exception as e: logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}"); await asyncio.sleep(5) + logger.info(f"绰号处理循环已结束 (线程 ID: {thread_id})。") -def _run_processor_process(queue: mpQueue, stop_event): - """进程启动函数,运行异步循环。""" +# --- _run_processor_thread (保持不变,不处理 db 或 config) --- +def _run_processor_thread(q: queue.Queue, stop_event: threading.Event): + """线程启动函数,运行异步循环。""" + loop = None + thread_id = threading.get_ident() + logger.info(f"Nickname processor thread starting (Thread ID: {thread_id})...") try: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) - loop.run_until_complete(_nickname_processing_loop(queue, stop_event)) - loop.close() - except Exception as e: - logger.error(f"运行绰号处理器进程时出错: {e}", exc_info=True) + logger.info(f"(Thread ID: {thread_id}) Asyncio event loop created and set.") + loop.run_until_complete(_nickname_processing_loop(q, stop_event)) + except Exception as e: logger.error(f"(Thread ID: {thread_id}) Error running nickname processor thread: {e}", exc_info=True) + finally: + if loop: + try: + if loop.is_running(): + all_tasks = asyncio.all_tasks(loop) + if all_tasks: + logger.info(f"(Thread ID: {thread_id}) Cancelling {len(all_tasks)} tasks...") + for task in all_tasks: task.cancel() + loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) + loop.stop() + loop.close() + logger.info(f"(Thread ID: {thread_id}) Asyncio loop closed.") + except Exception as loop_close_err: logger.error(f"(Thread ID: {thread_id}) Error closing loop: {loop_close_err}", exc_info=True) + logger.info(f"Nickname processor thread finished (Thread ID: {thread_id}).") + +# --- start_nickname_processor (使用全局 config) --- def start_nickname_processor(): - """启动绰号映射处理进程。""" - global _nickname_process - if not global_config.ENABLE_NICKNAME_MAPPING: - logger.info("绰号映射功能已禁用。处理器未启动。") + """启动绰号映射处理线程。""" + global _nickname_thread + # --- 修改:使用全局 config --- + if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: + # --------------------------- + logger.info("绰号映射功能已禁用或无法获取配置。处理器未启动。") return - if _nickname_process is None or not _nickname_process.is_alive(): - logger.info("正在启动绰号处理器进程...") + if _nickname_thread is None or not _nickname_thread.is_alive(): + logger.info("正在启动绰号处理器线程...") stop_event = get_stop_event() stop_event.clear() - _nickname_process = Process(target=_run_processor_process, args=(nickname_queue, stop_event), daemon=True) - _nickname_process.start() - logger.info(f"绰号处理器进程已启动,PID: {_nickname_process.pid}") + _nickname_thread = threading.Thread( + target=_run_processor_thread, + args=(nickname_queue, stop_event), + daemon=True + ) + _nickname_thread.start() + logger.info(f"绰号处理器线程已启动 (Thread ID: {_nickname_thread.ident})") else: - logger.warning("绰号处理器进程已在运行中。") + logger.warning("绰号处理器线程已在运行中。") +# --- stop_nickname_processor (保持不变) --- def stop_nickname_processor(): - """停止绰号映射处理进程。""" - global _nickname_process - if _nickname_process and _nickname_process.is_alive(): - logger.info("正在停止绰号处理器进程...") - set_stop_event() # 发送停止信号 + """停止绰号映射处理线程。""" + global _nickname_thread + if _nickname_thread and _nickname_thread.is_alive(): + logger.info("正在停止绰号处理器线程...") + set_stop_event() try: - _nickname_process.join(timeout=10) - if _nickname_process.is_alive(): - logger.warning("绰号处理器进程在 10 秒后未优雅停止。正在终止...") - _nickname_process.terminate() - _nickname_process.join(timeout=5) - except Exception as e: - logger.error(f"停止绰号处理器进程时出错: {e}", exc_info=True) + _nickname_thread.join(timeout=10) + if _nickname_thread.is_alive(): logger.warning("绰号处理器线程在 10 秒后未结束。") + except Exception as e: logger.error(f"停止绰号处理器线程时出错: {e}", exc_info=True) finally: - if _nickname_process and not _nickname_process.is_alive(): - logger.info("绰号处理器进程已成功停止。") - else: - logger.error("未能停止绰号处理器进程。") - _nickname_process = None + if _nickname_thread and not _nickname_thread.is_alive(): logger.info("绰号处理器线程已成功停止。") + else: logger.warning("停止绰号处理器线程:线程可能仍在运行。") + _nickname_thread = None else: - logger.info("绰号处理器进程未在运行。") + logger.info("绰号处理器线程未在运行或已被清理。") -# 可以在应用启动时调用 start_nickname_processor() -# 可以在应用关闭时调用 stop_nickname_processor() -def get_stop_event(): +# --- Event 控制函数 (保持不变) --- +def get_stop_event() -> threading.Event: """获取全局停止事件""" return _stop_event def set_stop_event(): - """设置全局停止事件,通知子进程退出""" - _stop_event.set() \ No newline at end of file + """设置全局停止事件,通知子线程退出""" + _stop_event.set() From 420aecb7be8495c8534637bfd8c5885a91f26805 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 20:19:21 +0800 Subject: [PATCH 16/58] =?UTF-8?q?pfc=E4=BF=AE=E5=A4=8D=EF=BC=8C=E6=9F=A5?= =?UTF-8?q?=E8=AF=A2user=5Fid=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/chat_observer.py | 1 - src/plugins/PFC/observation_info.py | 3 --- src/plugins/person_info/relationship_manager.py | 11 +++-------- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/src/plugins/PFC/chat_observer.py b/src/plugins/PFC/chat_observer.py index e66824a2..1a17db90 100644 --- a/src/plugins/PFC/chat_observer.py +++ b/src/plugins/PFC/chat_observer.py @@ -39,7 +39,6 @@ class ChatObserver: stream_id: 聊天流ID """ self.last_check_time = None - self.last_check_time = None self.last_bot_speak_time = None self.last_user_speak_time = None if stream_id in self._instances: diff --git a/src/plugins/PFC/observation_info.py b/src/plugins/PFC/observation_info.py index af7f537b..2fc5e4c5 100644 --- a/src/plugins/PFC/observation_info.py +++ b/src/plugins/PFC/observation_info.py @@ -158,9 +158,6 @@ class ObservationInfo: # meta_plan_trigger: bool = False # --- 修改:移除 __post_init__ 的参数 --- - def __init__(self): - self.chat_observer = None - self.chat_observer = None def __post_init__(self): """初始化后创建handler并进行必要的设置""" diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index fe229b2c..57976176 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -94,18 +94,13 @@ class RelationshipManager: person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] names_map = {} try: - # --- 修改点:直接使用 db.person_info.find --- - # !!! 确保 'person_info' 是正确的集合名称 !!! cursor = db.person_info.find( {"person_id": {"$in": person_ids}}, - {"_id": 0, "person_id": 1, "person_name": 1} # 只查询需要的字段 + {"_id": 0, "person_id": 1, "user_id": 1, "person_name": 1} # 只查询需要的字段 ) - # --- 结束修改点 --- - # 注意:pymongo 的 find 返回的是同步游标,如果你的 db 对象是 motor 客户端,需要使用 await cursor.to_list(length=None) - # 假设这里 db 是 pymongo 同步客户端,或者你的环境允许在异步函数中迭代同步游标 - for doc in cursor: # 如果 db 是 motor,这里会报错,需要改为 async for - original_user_id = doc.get("person_id", "").split("_", 1)[-1] + for doc in cursor: + original_user_id = doc.get("user_id", "").split("_", 1)[-1] person_name = doc.get("person_name") if original_user_id and person_name: names_map[original_user_id] = person_name From cc53904ea2f55e4664806571151c820c240fcf6e Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 20:52:36 +0800 Subject: [PATCH 17/58] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dpfc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/PFC/observation_info.py | 99 ++++++++++++++++++----------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/src/plugins/PFC/observation_info.py b/src/plugins/PFC/observation_info.py index 2fc5e4c5..b684367e 100644 --- a/src/plugins/PFC/observation_info.py +++ b/src/plugins/PFC/observation_info.py @@ -121,48 +121,70 @@ class ObservationInfoHandler(NotificationHandler): logger.error(traceback.format_exc()) # 打印详细堆栈信息 -@dataclass +# @dataclass <-- 这个,不需要了(递黄瓜) class ObservationInfo: - """决策信息类,用于收集和管理来自chat_observer的通知信息""" + """决策信息类,用于收集和管理来自chat_observer的通知信息 (手动实现 __init__)""" - # --- 修改:添加 private_name 字段 --- - private_name: str = field(init=True) # 让 dataclass 的 __init__ 接收 private_name + # 类型提示保留,可用于文档和静态分析 + private_name: str + chat_history: List[Dict[str, Any]] + chat_history_str: str + unprocessed_messages: List[Dict[str, Any]] + active_users: Set[str] + last_bot_speak_time: Optional[float] + last_user_speak_time: Optional[float] + last_message_time: Optional[float] + last_message_id: Optional[str] + last_message_content: str + last_message_sender: Optional[str] + bot_id: Optional[str] + chat_history_count: int + new_messages_count: int + cold_chat_start_time: Optional[float] + cold_chat_duration: float + is_typing: bool + is_cold_chat: bool + changed: bool + chat_observer: Optional[ChatObserver] + handler: Optional[ObservationInfoHandler] - # data_list - chat_history: List[Dict[str, Any]] = field(default_factory=list) # 修改:明确类型为 Dict - chat_history_str: str = "" - unprocessed_messages: List[Dict[str, Any]] = field(default_factory=list) # 修改:明确类型为 Dict - active_users: Set[str] = field(default_factory=set) - # data - last_bot_speak_time: Optional[float] = None - last_user_speak_time: Optional[float] = None - last_message_time: Optional[float] = None - # 添加 last_message_id - last_message_id: Optional[str] = None - last_message_content: str = "" - last_message_sender: Optional[str] = None - bot_id: Optional[str] = None - chat_history_count: int = 0 - new_messages_count: int = 0 - cold_chat_start_time: Optional[float] = None # 用于计算冷场持续时间 - cold_chat_duration: float = 0.0 # 缓存计算结果 + def __init__(self, private_name: str): + """ + 手动初始化 ObservationInfo 的所有实例变量。 + """ - # state - is_typing: bool = False # 可能表示对方正在输入 - # has_unread_messages: bool = False # 这个状态可以通过 new_messages_count > 0 判断 - is_cold_chat: bool = False - changed: bool = False # 用于标记状态是否有变化,以便外部模块决定是否重新规划 + # 接收的参数 + self.private_name: str = private_name - # #spec (暂时注释掉,如果不需要) - # meta_plan_trigger: bool = False + # data_list + self.chat_history: List[Dict[str, Any]] = [] + self.chat_history_str: str = "" + self.unprocessed_messages: List[Dict[str, Any]] = [] + self.active_users: Set[str] = set() - # --- 修改:移除 __post_init__ 的参数 --- + # data + self.last_bot_speak_time: Optional[float] = None + self.last_user_speak_time: Optional[float] = None + self.last_message_time: Optional[float] = None + self.last_message_id: Optional[str] = None + self.last_message_content: str = "" + self.last_message_sender: Optional[str] = None + self.bot_id: Optional[str] = None + self.chat_history_count: int = 0 + self.new_messages_count: int = 0 + self.cold_chat_start_time: Optional[float] = None + self.cold_chat_duration: float = 0.0 - def __post_init__(self): - """初始化后创建handler并进行必要的设置""" - self.chat_observer: Optional[ChatObserver] = None # 添加类型提示 - self.handler = ObservationInfoHandler(self, self.private_name) + # state + self.is_typing: bool = False + self.is_cold_chat: bool = False + self.changed: bool = False + + # 关联对象 + self.chat_observer: Optional[ChatObserver] = None + + self.handler: ObservationInfoHandler = ObservationInfoHandler(self, self.private_name) def bind_to_chat_observer(self, chat_observer: ChatObserver): """绑定到指定的chat_observer @@ -176,6 +198,11 @@ class ObservationInfo: self.chat_observer = chat_observer try: + if not self.handler: # 确保 handler 已经被创建 + logger.error(f"[私聊][{self.private_name}] 尝试绑定时 handler 未初始化!") + self.chat_observer = None # 重置,防止后续错误 + return + # 注册关心的通知类型 self.chat_observer.notification_manager.register_handler( target="observation_info", notification_type=NotificationType.NEW_MESSAGE, handler=self.handler @@ -194,7 +221,7 @@ class ObservationInfo: def unbind_from_chat_observer(self): """解除与chat_observer的绑定""" - if self.chat_observer and hasattr(self.chat_observer, "notification_manager"): # 增加检查 + if self.chat_observer and hasattr(self.chat_observer, "notification_manager") and self.handler: # 增加 handler 检查 try: self.chat_observer.notification_manager.unregister_handler( target="observation_info", notification_type=NotificationType.NEW_MESSAGE, handler=self.handler @@ -212,7 +239,7 @@ class ObservationInfo: finally: # 确保 chat_observer 被重置 self.chat_observer = None else: - logger.warning(f"[私聊][{self.private_name}]尝试解绑时 ChatObserver 不存在或无效") + logger.warning(f"[私聊][{self.private_name}]尝试解绑时 ChatObserver 不存在、无效或 handler 未设置") # 修改:update_from_message 接收 UserInfo 对象 async def update_from_message(self, message: Dict[str, Any], user_info: Optional[UserInfo]): From 8bf709580078dedafdd52fd9737c51a63fd53352 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 22:53:21 +0800 Subject: [PATCH 18/58] =?UTF-8?q?=E8=BF=87=E6=BB=A4=E8=87=AA=E8=BA=AB?= =?UTF-8?q?=E4=B8=8E=E6=97=A0=E5=8F=98=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 67 +++-- .../group_nickname/nickname_processor.py | 248 +++++++++++++----- src/plugins/heartFC_chat/heartFC_chat.py | 2 +- .../person_info/relationship_manager.py | 19 +- 4 files changed, 258 insertions(+), 78 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index b5e258a4..b4cfe072 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -29,12 +29,14 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 llm_mapper = None def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: + """构建用于 LLM 绰号映射的 Prompt""" + # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) - print(f"\n\n\n{user_list_str}\n\n\n\n") + # print(f"\n\n\nKnown User Info for LLM:\n{user_list_str}\n\n\n\n") # Debugging print prompt = f""" 任务:分析以下聊天记录和你的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 -已知用户信息: +已知用户信息(ID: 名称): {user_list_str} 聊天记录: @@ -56,12 +58,12 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: "用户B数字id": "绰号_B" }} }} - 其中 "data" 字段的键是用户的 ID,值是对应的绰号。只包含你能确认映射关系的绰号。 + 其中 "data" 字段的键是用户的 ID (字符串形式),值是对应的绰号。只包含你能确认映射关系的绰号。 4. 如果无法建立任何可靠的一一映射关系(例如,绰号指代不明、没有出现绰号、或无法确认绰号与用户的关联),请输出 JSON 对象: {{ "is_exist": false }} -5. 你的昵称后面包含"(你)",不需要输出你自身的绰号。 +5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 输出: @@ -72,14 +74,12 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: async def analyze_chat_for_nicknames( chat_history_str: str, bot_reply: str, - user_name_map: Dict[str, str] + user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 ) -> Dict[str, Any]: """ - 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 + 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 """ - # --- [修改] 使用全局配置开关 --- if not global_config.ENABLE_NICKNAME_MAPPING: - # --- 结束修改 --- logger.debug("绰号映射功能已禁用。") return {"is_exist": False} @@ -99,6 +99,7 @@ async def analyze_chat_for_nicknames( logger.warning("LLM 返回了空的绰号映射内容。") return {"is_exist": False} + # 清理可能的 Markdown 代码块标记 response_content = response_content.strip() if response_content.startswith("```json"): response_content = response_content[7:] @@ -110,14 +111,51 @@ async def analyze_chat_for_nicknames( result = json.loads(response_content) if isinstance(result, dict) and "is_exist" in result: if result["is_exist"] is True: - if "data" in result and isinstance(result["data"], dict): - if not result["data"]: - logger.debug("LLM 指示 is_exist=True 但 data 为空。视为 False 处理。") + original_data = result.get("data") # 使用 .get() 更安全 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + logger.info(f"LLM 找到的原始绰号映射: {original_data}") + + # --- 开始过滤 --- + filtered_data = {} + bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 + + for user_id, nickname in original_data.items(): + # 检查 user_id 是否是字符串,以防万一 + if not isinstance(user_id, str): + logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") + continue + + # 条件 1: 排除机器人自身 + if user_id == bot_qq_str: + logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") + continue + + # 条件 2: 排除 nickname 与 person_name 相同的情况 + person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name + if person_name and person_name == nickname: + logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + continue + + # 如果通过所有过滤条件,则保留 + filtered_data[user_id] = nickname + # --- 结束过滤 --- + + # 检查过滤后是否还有数据 + if not filtered_data: + logger.info("所有找到的绰号映射都被过滤掉了。") return {"is_exist": False} - logger.info(f"找到绰号映射: {result['data']}") - return {"is_exist": True, "data": result["data"]} + else: + logger.info(f"过滤后的绰号映射: {filtered_data}") + return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + else: - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 缺失或不是字典。") + # is_exist 为 True 但 data 缺失、不是字典或为空 + if "data" not in result: + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") + elif not isinstance(result.get("data"), dict): + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") + else: # data 为空字典 + logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") return {"is_exist": False} elif result["is_exist"] is False: logger.info("LLM 未找到可靠的绰号映射。") @@ -135,4 +173,3 @@ async def analyze_chat_for_nicknames( except Exception as e: logger.error(f"绰号映射 LLM 调用或处理过程中出错: {e}", exc_info=True) return {"is_exist": False} - diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 1d25b874..fc5944ab 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,4 +1,4 @@ -# nickname_processor.py (多线程版本 - 使用全局 config) +# nickname_processor.py (多线程版本 - 使用全局 config - 修复 Race Condition on person_id) import asyncio import traceback @@ -8,7 +8,7 @@ from typing import Dict, Optional, Any # 数据库和日志导入 from pymongo import MongoClient -from pymongo.errors import OperationFailure +from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError from src.common.logger_manager import get_logger from src.common.database import db # 使用全局 db @@ -19,7 +19,12 @@ try: from src.config.config import global_config # <--- 直接导入全局配置 except ImportError: logger.critical("无法导入 global_config!") - global_config = None # 设置为 None + # 提供一个默认的回退配置对象,如果 global_config 导入失败 + class FallbackConfig: + ENABLE_NICKNAME_MAPPING = False + NICKNAME_QUEUE_MAX_SIZE = 100 + NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 + global_config = FallbackConfig() # --------------------------- # 绰号分析函数导入 @@ -29,89 +34,206 @@ from .nickname_mapper import analyze_chat_for_nicknames _stop_event = threading.Event() # -------------------------- -# --- 数据库更新逻辑 (使用全局 db) --- -async def update_nickname_counts(group_id: str, nickname_map: Dict[str, str]): - """更新数据库中用户的群组绰号计数 (使用全局 db)""" +# --- 数据库更新逻辑 (使用全局 db) - 修复 Race Condition 版 --- +async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): + """ + 更新数据库中用户的群组绰号计数 (使用全局 db)。 + 通过首先基于 person_id 进行 upsert 来处理潜在的 race condition。 + + Args: + platform (str): 平台名称 (e.g., 'qq')。 + group_id (str): 群组 ID。 + nickname_map (Dict[str, str]): 用户 ID (字符串) 到绰号的映射。 + """ + # 尝试导入 person_info_manager (放在函数内部以减少潜在的导入问题) + try: + # 假设 person_info 在 group_nickname 的上一级目录 + from ..person_info.person_info import person_info_manager + except ImportError: + logger.error("无法导入 person_info_manager,无法生成 person_id!") + return # 无法继续,因为需要 person_id + person_info_collection = db.person_info - # ... (函数体保持不变, 参考之前的版本) ... - if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。"); return - logger.info(f"尝试更新群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") + + if not nickname_map: + logger.debug("提供的用于更新的绰号映射为空。") + return + + logger.info(f"尝试更新平台 '{platform}' 群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") + for user_id_str, nickname in nickname_map.items(): - if not user_id_str or not nickname: logger.warning(f"跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'"); continue + # --- 基本验证 --- + if not user_id_str or not nickname: + logger.warning(f"跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'") + continue group_id_str = str(group_id) - try: user_id_int = int(user_id_str) - except ValueError: logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。"); continue try: - person_info_collection.update_one({"user_id": user_id_int},{"$setOnInsert": {"user_id": user_id_int}}, upsert=True) - person_info_collection.update_one({"user_id": user_id_int, "group_nicknames": {"$exists": False}}, {"$set": {"group_nicknames": []}}) - update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames": {"$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname}}}, {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, array_filters=[{"group.group_id": group_id_str}, {"nick.name": nickname}]) - if update_result.modified_count > 0: continue - update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames.group_id": group_id_str}, {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, array_filters=[{"group.group_id": group_id_str}]) - if update_result.modified_count > 0: continue - update_result = person_info_collection.update_one({"user_id": user_id_int, "group_nicknames.group_id": {"$ne": group_id_str}}, {"$push": {"group_nicknames": {"group_id": group_id_str, "nicknames": [{"name": nickname, "count": 1}]}}}) - except OperationFailure as op_err: logger.exception(f"数据库操作失败: 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") - except Exception as e: logger.exception(f"更新用户 {user_id_str} 的绰号 {nickname} 时发生意外错误") + user_id_int = int(user_id_str) + except ValueError: + logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") + continue + # --- 结束验证 --- + + try: + # --- 步骤 1: 生成 person_id --- + person_id = person_info_manager.get_person_id(platform, user_id_str) + if not person_id: + logger.error(f"无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") + continue + + # --- 步骤 2: 基于 person_id 执行 Upsert --- + # 这是关键步骤,用于原子性地确保文档存在,避免 person_id 冲突。 + # 如果文档不存在,它会被创建,并设置 $setOnInsert 中的字段。 + # 如果文档已存在,此操作不会修改任何内容(因为没有 $set 操作符)。 + upsert_result = person_info_collection.update_one( + {"person_id": person_id}, # Filter by the unique key + { + "$setOnInsert": { + "person_id": person_id, + "user_id": user_id_int, + "platform": platform, + "group_nicknames": [] # 初始化 group_nicknames 数组 + } + }, + upsert=True + ) + + # 可选日志:记录是否创建了新文档 + if upsert_result.upserted_id: + logger.debug(f"Upsert on person_id created new document: {person_id}") + # else: + # logger.debug(f"Upsert on person_id found existing document: {person_id}") + + # --- 步骤 3: 更新群组绰号 --- + # 现在我们确信具有此 person_id 的文档存在,可以安全地更新其 group_nicknames。 + + # 3a. 尝试增加现有群组中现有绰号的计数 + update_result_inc = person_info_collection.update_one( + { + "person_id": person_id, # 明确目标文档 + "group_nicknames": { # 检查数组中是否有匹配项 + "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} + } + }, + {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, # 增加计数 + array_filters=[ # 指定要更新的数组元素 + {"group.group_id": group_id_str}, + {"nick.name": nickname} + ] + ) + + # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 + if update_result_inc.modified_count == 0: + update_result_push_nick = person_info_collection.update_one( + { + "person_id": person_id, # 明确目标文档 + "group_nicknames.group_id": group_id_str # 检查群组是否存在 + }, + # 将新绰号添加到匹配群组的 nicknames 数组中 + {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, + array_filters=[{"group.group_id": group_id_str}] # 指定要推送到的群组 + ) + + # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 + if update_result_push_nick.modified_count == 0: + # 确保 group_nicknames 数组存在 (如果 $setOnInsert 失败或数据不一致时的保险措施) + person_info_collection.update_one( + {"person_id": person_id, "group_nicknames": {"$exists": False}}, + {"$set": {"group_nicknames": []}} + ) + # 推送新的群组对象到 group_nicknames 数组 + update_result_push_group = person_info_collection.update_one( + { + "person_id": person_id, # 明确目标文档 + "group_nicknames.group_id": {"$ne": group_id_str} # 确保该群组 ID 尚未存在 + }, + { + "$push": { # 添加新的群组条目 + "group_nicknames": { + "group_id": group_id_str, + "nicknames": [{"name": nickname, "count": 1}] # 初始化绰号列表 + } + } + } + ) + if update_result_push_group.modified_count > 0: + logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'") + + except DuplicateKeyError as dk_err: + # 这个错误理论上不应该再由步骤 2 的 upsert 触发。 + # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 + logger.error(f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。") + except OperationFailure as op_err: + logger.exception(f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") + except Exception as e: + logger.exception(f"更新用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误") # --- 使用 queue.Queue --- -# --- 修改:直接使用 global_config --- -queue_max_size = getattr(global_config, 'NICKNAME_QUEUE_MAX_SIZE', 100) if global_config else 100 -# -------------------------------- +queue_max_size = getattr(global_config, 'NICKNAME_QUEUE_MAX_SIZE', 100) nickname_queue: queue.Queue = queue.Queue(maxsize=queue_max_size) -# ---------------------- _nickname_thread: Optional[threading.Thread] = None -# --- add_to_nickname_queue (使用全局 config) --- +# --- add_to_nickname_queue (保持不变,已包含 platform) --- async def add_to_nickname_queue( chat_history_str: str, bot_reply: str, + platform: str, group_id: Optional[str], user_name_map: Dict[str, str] ): """将需要分析的数据放入队列。""" - # --- 修改:使用全局 config --- if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: - # --------------------------- return - if group_id is None: logger.debug("私聊跳过绰号映射。"); return + if group_id is None: + logger.debug("私聊跳过绰号映射。") + return try: - item = (chat_history_str, bot_reply, str(group_id), user_name_map) + item = (chat_history_str, bot_reply, platform, str(group_id), user_name_map) nickname_queue.put_nowait(item) - logger.debug(f"已将项目添加到群组 {group_id} 的绰号队列。当前大小: {nickname_queue.qsize()}") - except queue.Full: logger.warning(f"无法将项目添加到绰号队列:队列已满 (maxsize={nickname_queue.maxsize})。") - except Exception as e: logger.warning(f"无法将项目添加到绰号队列: {e}", exc_info=True) + logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {nickname_queue.qsize()}") + except queue.Full: + logger.warning(f"无法将项目添加到绰号队列:队列已满 (maxsize={nickname_queue.maxsize})。") + except Exception as e: + logger.warning(f"无法将项目添加到绰号队列: {e}", exc_info=True) -# --- _nickname_processing_loop (使用全局 config) --- +# --- _nickname_processing_loop (保持不变,已包含 platform) --- async def _nickname_processing_loop(q: queue.Queue, stop_event: threading.Event): """独立线程中的主循环,处理队列任务 (使用全局 db 和 config)。""" thread_id = threading.get_ident() logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") - # --- 修改:使用全局 config --- - sleep_interval = getattr(global_config, 'NICKNAME_PROCESS_SLEEP_INTERVAL', 0.5) if global_config else 0.5 - # --------------------------- + sleep_interval = getattr(global_config, 'NICKNAME_PROCESS_SLEEP_INTERVAL', 0.5) while not stop_event.is_set(): try: item = q.get(block=True, timeout=sleep_interval) - if isinstance(item, tuple) and len(item) == 4: - chat_history_str, bot_reply, group_id, user_name_map = item - logger.debug(f"(线程 ID: {thread_id}) 正在处理群组 {group_id} 的绰号映射任务...") - # analyze_chat_for_nicknames 内部也应使用 global_config + + if isinstance(item, tuple) and len(item) == 5: + chat_history_str, bot_reply, platform, group_id, user_name_map = item + logger.debug(f"(线程 ID: {thread_id}) 正在处理平台 '{platform}' 群组 '{group_id}' 的绰号映射任务...") analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) if analysis_result.get("is_exist") and analysis_result.get("data"): - await update_nickname_counts(group_id, analysis_result["data"]) + await update_nickname_counts(platform, group_id, analysis_result["data"]) else: - logger.warning(f"(线程 ID: {thread_id}) 从队列接收到意外的项目类型: {type(item)}") + logger.warning(f"(线程 ID: {thread_id}) 从队列接收到意外的项目类型或长度: {type(item)}, 内容: {item}") + q.task_done() - except queue.Empty: continue - except asyncio.CancelledError: logger.info(f"绰号处理循环已取消 (线程 ID: {thread_id})。"); break - except Exception as e: logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}"); await asyncio.sleep(5) + + except queue.Empty: + continue + except asyncio.CancelledError: + logger.info(f"绰号处理循环已取消 (线程 ID: {thread_id})。") + break + except Exception as e: + logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") + await asyncio.sleep(5) + logger.info(f"绰号处理循环已结束 (线程 ID: {thread_id})。") -# --- _run_processor_thread (保持不变,不处理 db 或 config) --- +# --- _run_processor_thread (保持不变) --- def _run_processor_thread(q: queue.Queue, stop_event: threading.Event): """线程启动函数,运行异步循环。""" loop = None @@ -122,30 +244,34 @@ def _run_processor_thread(q: queue.Queue, stop_event: threading.Event): asyncio.set_event_loop(loop) logger.info(f"(Thread ID: {thread_id}) Asyncio event loop created and set.") loop.run_until_complete(_nickname_processing_loop(q, stop_event)) - except Exception as e: logger.error(f"(Thread ID: {thread_id}) Error running nickname processor thread: {e}", exc_info=True) + except Exception as e: + logger.error(f"(Thread ID: {thread_id}) Error running nickname processor thread: {e}", exc_info=True) finally: if loop: try: if loop.is_running(): + logger.info(f"(Thread ID: {thread_id}) Stopping the asyncio loop...") all_tasks = asyncio.all_tasks(loop) if all_tasks: - logger.info(f"(Thread ID: {thread_id}) Cancelling {len(all_tasks)} tasks...") + logger.info(f"(Thread ID: {thread_id}) Cancelling {len(all_tasks)} running tasks...") for task in all_tasks: task.cancel() loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) + logger.info(f"(Thread ID: {thread_id}) All tasks cancelled.") loop.stop() - loop.close() - logger.info(f"(Thread ID: {thread_id}) Asyncio loop closed.") - except Exception as loop_close_err: logger.error(f"(Thread ID: {thread_id}) Error closing loop: {loop_close_err}", exc_info=True) + logger.info(f"(Thread ID: {thread_id}) Loop stopped.") + if not loop.is_closed(): + loop.close() + logger.info(f"(Thread ID: {thread_id}) Asyncio loop closed.") + except Exception as loop_close_err: + logger.error(f"(Thread ID: {thread_id}) Error closing loop: {loop_close_err}", exc_info=True) logger.info(f"Nickname processor thread finished (Thread ID: {thread_id}).") -# --- start_nickname_processor (使用全局 config) --- +# --- start_nickname_processor (保持不变) --- def start_nickname_processor(): """启动绰号映射处理线程。""" global _nickname_thread - # --- 修改:使用全局 config --- if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: - # --------------------------- logger.info("绰号映射功能已禁用或无法获取配置。处理器未启动。") return @@ -172,11 +298,15 @@ def stop_nickname_processor(): set_stop_event() try: _nickname_thread.join(timeout=10) - if _nickname_thread.is_alive(): logger.warning("绰号处理器线程在 10 秒后未结束。") - except Exception as e: logger.error(f"停止绰号处理器线程时出错: {e}", exc_info=True) + if _nickname_thread.is_alive(): + logger.warning("绰号处理器线程在 10 秒后未结束。") + except Exception as e: + logger.error(f"停止绰号处理器线程时出错: {e}", exc_info=True) finally: - if _nickname_thread and not _nickname_thread.is_alive(): logger.info("绰号处理器线程已成功停止。") - else: logger.warning("停止绰号处理器线程:线程可能仍在运行。") + if _nickname_thread and not _nickname_thread.is_alive(): + logger.info("绰号处理器线程已成功停止。") + else: + logger.warning("停止绰号处理器线程:线程可能仍在运行或未正确清理。") _nickname_thread = None else: logger.info("绰号处理器线程未在运行或已被清理。") diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index ebfed17d..faa0ffd5 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -764,7 +764,7 @@ class HeartFChatting: user_name_map[user_id] = f"未知({user_id})" # 5. 添加到队列 - await add_to_nickname_queue(chat_history_str, bot_reply_str, group_id, user_name_map) + await add_to_nickname_queue(chat_history_str, bot_reply_str,platform, group_id, user_name_map) logger.debug(f"{self.log_prefix} Triggered nickname analysis for group {group_id}.") except Exception as e: diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index 57976176..78bcb7c7 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -99,11 +99,25 @@ class RelationshipManager: {"_id": 0, "person_id": 1, "user_id": 1, "person_name": 1} # 只查询需要的字段 ) - for doc in cursor: - original_user_id = doc.get("user_id", "").split("_", 1)[-1] + for doc in cursor: + user_id_val = doc.get("user_id") # 获取原始值 + original_user_id = None # 初始化 + + if isinstance(user_id_val, (int, float)): # 检查是否是数字类型 + original_user_id = str(user_id_val) # 直接转换为字符串 + elif isinstance(user_id_val, str): # 检查是否是字符串 + if "_" in user_id_val: # 如果包含下划线,则分割 + original_user_id = user_id_val.split("_", 1)[-1] + else: # 如果不包含下划线,则直接使用该字符串 + original_user_id = user_id_val + # else: # 其他类型或 None,original_user_id 保持为 None + person_name = doc.get("person_name") + + # 确保 original_user_id 和 person_name 都有效 if original_user_id and person_name: names_map[original_user_id] = person_name + logger.debug(f"批量获取 {len(user_ids)} 个用户的 person_name,找到 {len(names_map)} 个。") except AttributeError as e: # 如果 db 对象没有 person_info 属性,或者 find 方法不存在 @@ -111,7 +125,6 @@ class RelationshipManager: except Exception as e: logger.error(f"批量获取 person_name 时出错: {e}", exc_info=True) return names_map - # --- 结束修改 --- @staticmethod async def get_users_group_nicknames(platform: str, user_ids: List[str], group_id: str) -> Dict[str, List[Dict[str, int]]]: From 0ca3cada9aa27bdf55a0f1a5331b73069c46ca21 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 23:03:43 +0800 Subject: [PATCH 19/58] ruff --- bot.py | 2 -- src/config/config.py | 2 +- .../group_nickname/nickname_processor.py | 36 +++++-------------- 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/bot.py b/bot.py index c50f939c..dcd2a3e1 100644 --- a/bot.py +++ b/bot.py @@ -12,8 +12,6 @@ from src.common.logger_manager import get_logger # from src.common.logger import LogConfig, CONFIRM_STYLE_CONFIG from src.common.crash_logger import install_crash_handler -from src.main import MainSystem - from src.main import MainSystem from src.plugins.group_nickname.nickname_processor import start_nickname_processor, stop_nickname_processor # <--- 添加这行导入 import atexit diff --git a/src/config/config.py b/src/config/config.py index 6c4e3bb4..c3cae240 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -1,7 +1,7 @@ import os import re from dataclasses import dataclass, field -from typing import Dict, List, Optional, Tuple, Any +from typing import Dict, List, Optional, Any from dateutil import tz import tomli diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index fc5944ab..df68fb78 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -4,37 +4,18 @@ import asyncio import traceback import threading import queue -from typing import Dict, Optional, Any +from typing import Dict, Optional -# 数据库和日志导入 -from pymongo import MongoClient from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError from src.common.logger_manager import get_logger from src.common.database import db # 使用全局 db +from .nickname_mapper import analyze_chat_for_nicknames +from src.config.config import global_config logger = get_logger("nickname_processor") -# --- 恢复导入全局 config --- -try: - from src.config.config import global_config # <--- 直接导入全局配置 -except ImportError: - logger.critical("无法导入 global_config!") - # 提供一个默认的回退配置对象,如果 global_config 导入失败 - class FallbackConfig: - ENABLE_NICKNAME_MAPPING = False - NICKNAME_QUEUE_MAX_SIZE = 100 - NICKNAME_PROCESS_SLEEP_INTERVAL = 0.5 - global_config = FallbackConfig() -# --------------------------- - -# 绰号分析函数导入 -from .nickname_mapper import analyze_chat_for_nicknames - -# --- 使用 threading.Event --- _stop_event = threading.Event() -# -------------------------- -# --- 数据库更新逻辑 (使用全局 db) - 修复 Race Condition 版 --- async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数 (使用全局 db)。 @@ -100,7 +81,7 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic # 可选日志:记录是否创建了新文档 if upsert_result.upserted_id: - logger.debug(f"Upsert on person_id created new document: {person_id}") + logger.debug(f"Upsert on person_id created new document: {person_id}") # else: # logger.debug(f"Upsert on person_id found existing document: {person_id}") @@ -157,16 +138,16 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic } ) if update_result_push_group.modified_count > 0: - logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'") + logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'") except DuplicateKeyError as dk_err: # 这个错误理论上不应该再由步骤 2 的 upsert 触发。 # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 logger.error(f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。") except OperationFailure as op_err: - logger.exception(f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}") + logger.exception(f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}({op_err})") except Exception as e: - logger.exception(f"更新用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误") + logger.exception(f"更新用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") # --- 使用 queue.Queue --- @@ -254,7 +235,8 @@ def _run_processor_thread(q: queue.Queue, stop_event: threading.Event): all_tasks = asyncio.all_tasks(loop) if all_tasks: logger.info(f"(Thread ID: {thread_id}) Cancelling {len(all_tasks)} running tasks...") - for task in all_tasks: task.cancel() + for task in all_tasks: + task.cancel() loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) logger.info(f"(Thread ID: {thread_id}) All tasks cancelled.") loop.stop() From 6e66d66b1f73652d20ae7c1af6df061ad569028d Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 1 May 2025 23:09:35 +0800 Subject: [PATCH 20/58] ruff --- src/plugins/heartFC_chat/heartflow_prompt_builder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 290a5e35..db764385 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -15,7 +15,6 @@ from ..memory_system.Hippocampus import HippocampusManager from ..schedule.schedule_generator import bot_schedule from ..knowledge.knowledge_lib import qa_manager from src.plugins.group_nickname.nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection -from src.plugins.person_info.relationship_manager import relationship_manager logger = get_logger("prompt") From 635ead2b6a4af4d6b55dccaca34ba4cdacae833e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 15:09:56 +0000 Subject: [PATCH 21/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 9 ++- src/config/config.py | 18 +++-- src/plugins/group_nickname/nickname_mapper.py | 30 ++++---- .../group_nickname/nickname_processor.py | 75 ++++++++++--------- src/plugins/group_nickname/nickname_utils.py | 32 ++++---- src/plugins/heartFC_chat/heartFC_chat.py | 57 ++++++++------ .../heartFC_chat/heartflow_prompt_builder.py | 10 +-- .../person_info/relationship_manager.py | 56 ++++++++------ 8 files changed, 161 insertions(+), 126 deletions(-) diff --git a/bot.py b/bot.py index dcd2a3e1..0a3c46e8 100644 --- a/bot.py +++ b/bot.py @@ -13,7 +13,10 @@ from src.common.logger_manager import get_logger # from src.common.logger import LogConfig, CONFIRM_STYLE_CONFIG from src.common.crash_logger import install_crash_handler from src.main import MainSystem -from src.plugins.group_nickname.nickname_processor import start_nickname_processor, stop_nickname_processor # <--- 添加这行导入 +from src.plugins.group_nickname.nickname_processor import ( + start_nickname_processor, + stop_nickname_processor, +) # <--- 添加这行导入 import atexit logger = get_logger("main") @@ -230,11 +233,11 @@ if __name__ == "__main__": # 在这里启动绰号处理进程 logger.info("准备启动绰号处理线程...") - start_nickname_processor() # <--- 添加启动调用 + start_nickname_processor() # <--- 添加启动调用 logger.info("已调用启动绰号处理线程。") # 注册退出处理函数 (确保进程能被关闭) - atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 + atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 logger.info("已注册绰号处理线程的退出处理程序。") # 创建事件循环 diff --git a/src/config/config.py b/src/config/config.py index c3cae240..b440f01d 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -271,11 +271,11 @@ class BotConfig: enable_pfc_chatting: bool = False # 是否启用PFC聊天 # Group Nickname - ENABLE_NICKNAME_MAPPING: bool = False # 绰号映射功能总开关 - MAX_NICKNAMES_IN_PROMPT: int = 10 # Prompt 中最多注入的绰号数量 - NICKNAME_PROBABILITY_SMOOTHING: int = 1 # 绰号加权随机选择的平滑因子 - NICKNAME_QUEUE_MAX_SIZE: int = 100 # 绰号处理队列最大容量 - NICKNAME_PROCESS_SLEEP_INTERVAL: float = 0.5 # 绰号处理进程休眠间隔(秒) + ENABLE_NICKNAME_MAPPING: bool = False # 绰号映射功能总开关 + MAX_NICKNAMES_IN_PROMPT: int = 10 # Prompt 中最多注入的绰号数量 + NICKNAME_PROBABILITY_SMOOTHING: int = 1 # 绰号加权随机选择的平滑因子 + NICKNAME_QUEUE_MAX_SIZE: int = 100 # 绰号处理队列最大容量 + NICKNAME_PROCESS_SLEEP_INTERVAL: float = 0.5 # 绰号处理进程休眠间隔(秒) # 模型配置 llm_reasoning: dict[str, str] = field(default_factory=lambda: {}) @@ -410,9 +410,13 @@ class BotConfig: gn_config = parent.get("group_nickname", {}) config.ENABLE_NICKNAME_MAPPING = gn_config.get("enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING) config.MAX_NICKNAMES_IN_PROMPT = gn_config.get("max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT) - config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get("nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING) + config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get( + "nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING + ) config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get("nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE) - config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get("nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL) + config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get( + "nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL + ) def bot(parent: dict): # 机器人基础配置 diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index b4cfe072..283f8090 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -2,6 +2,7 @@ import json from typing import Dict, Any, Optional from src.common.logger_manager import get_logger from src.plugins.models.utils_model import LLMRequest + # 从全局配置导入 from src.config.config import global_config @@ -9,7 +10,7 @@ from src.config.config import global_config logger = get_logger("nickname_mapper") llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 try: # 从全局配置获取模型设置 model_config = global_config.llm_nickname_mapping @@ -17,10 +18,10 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") else: llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", ) logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") @@ -28,6 +29,7 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) llm_mapper = None + def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: """构建用于 LLM 绰号映射的 Prompt""" # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 @@ -74,7 +76,7 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: async def analyze_chat_for_nicknames( chat_history_str: str, bot_reply: str, - user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 + user_name_map: Dict[str, str], # 这个 map 包含了 user_id -> person_name 的信息 ) -> Dict[str, Any]: """ 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 @@ -111,13 +113,13 @@ async def analyze_chat_for_nicknames( result = json.loads(response_content) if isinstance(result, dict) and "is_exist" in result: if result["is_exist"] is True: - original_data = result.get("data") # 使用 .get() 更安全 - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + original_data = result.get("data") # 使用 .get() 更安全 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 logger.info(f"LLM 找到的原始绰号映射: {original_data}") # --- 开始过滤 --- filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 + bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 for user_id, nickname in original_data.items(): # 检查 user_id 是否是字符串,以防万一 @@ -131,9 +133,11 @@ async def analyze_chat_for_nicknames( continue # 条件 2: 排除 nickname 与 person_name 相同的情况 - person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name + person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name if person_name and person_name == nickname: - logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + logger.debug( + f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。" + ) continue # 如果通过所有过滤条件,则保留 @@ -146,7 +150,7 @@ async def analyze_chat_for_nicknames( return {"is_exist": False} else: logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 else: # is_exist 为 True 但 data 缺失、不是字典或为空 @@ -154,7 +158,7 @@ async def analyze_chat_for_nicknames( logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") elif not isinstance(result.get("data"), dict): logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") - else: # data 为空字典 + else: # data 为空字典 logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") return {"is_exist": False} elif result["is_exist"] is False: diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index df68fb78..264654af 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -6,9 +6,9 @@ import threading import queue from typing import Dict, Optional -from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError +from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError from src.common.logger_manager import get_logger -from src.common.database import db # 使用全局 db +from src.common.database import db # 使用全局 db from .nickname_mapper import analyze_chat_for_nicknames from src.config.config import global_config @@ -16,6 +16,7 @@ logger = get_logger("nickname_processor") _stop_event = threading.Event() + async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数 (使用全局 db)。 @@ -32,7 +33,7 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic from ..person_info.person_info import person_info_manager except ImportError: logger.error("无法导入 person_info_manager,无法生成 person_id!") - return # 无法继续,因为需要 person_id + return # 无法继续,因为需要 person_id person_info_collection = db.person_info @@ -67,16 +68,16 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic # 如果文档不存在,它会被创建,并设置 $setOnInsert 中的字段。 # 如果文档已存在,此操作不会修改任何内容(因为没有 $set 操作符)。 upsert_result = person_info_collection.update_one( - {"person_id": person_id}, # Filter by the unique key + {"person_id": person_id}, # Filter by the unique key { "$setOnInsert": { "person_id": person_id, "user_id": user_id_int, "platform": platform, - "group_nicknames": [] # 初始化 group_nicknames 数组 + "group_nicknames": [], # 初始化 group_nicknames 数组 } }, - upsert=True + upsert=True, ) # 可选日志:记录是否创建了新文档 @@ -91,28 +92,28 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic # 3a. 尝试增加现有群组中现有绰号的计数 update_result_inc = person_info_collection.update_one( { - "person_id": person_id, # 明确目标文档 - "group_nicknames": { # 检查数组中是否有匹配项 + "person_id": person_id, # 明确目标文档 + "group_nicknames": { # 检查数组中是否有匹配项 "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} - } + }, }, - {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, # 增加计数 - array_filters=[ # 指定要更新的数组元素 + {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, # 增加计数 + array_filters=[ # 指定要更新的数组元素 {"group.group_id": group_id_str}, - {"nick.name": nickname} - ] + {"nick.name": nickname}, + ], ) # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 if update_result_inc.modified_count == 0: update_result_push_nick = person_info_collection.update_one( { - "person_id": person_id, # 明确目标文档 - "group_nicknames.group_id": group_id_str # 检查群组是否存在 + "person_id": person_id, # 明确目标文档 + "group_nicknames.group_id": group_id_str, # 检查群组是否存在 }, # 将新绰号添加到匹配群组的 nicknames 数组中 {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, - array_filters=[{"group.group_id": group_id_str}] # 指定要推送到的群组 + array_filters=[{"group.group_id": group_id_str}], # 指定要推送到的群组 ) # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 @@ -120,22 +121,22 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic # 确保 group_nicknames 数组存在 (如果 $setOnInsert 失败或数据不一致时的保险措施) person_info_collection.update_one( {"person_id": person_id, "group_nicknames": {"$exists": False}}, - {"$set": {"group_nicknames": []}} + {"$set": {"group_nicknames": []}}, ) # 推送新的群组对象到 group_nicknames 数组 update_result_push_group = person_info_collection.update_one( { - "person_id": person_id, # 明确目标文档 - "group_nicknames.group_id": {"$ne": group_id_str} # 确保该群组 ID 尚未存在 + "person_id": person_id, # 明确目标文档 + "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 }, { - "$push": { # 添加新的群组条目 + "$push": { # 添加新的群组条目 "group_nicknames": { "group_id": group_id_str, - "nicknames": [{"name": nickname, "count": 1}] # 初始化绰号列表 + "nicknames": [{"name": nickname, "count": 1}], # 初始化绰号列表 } } - } + }, ) if update_result_push_group.modified_count > 0: logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'") @@ -143,26 +144,27 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic except DuplicateKeyError as dk_err: # 这个错误理论上不应该再由步骤 2 的 upsert 触发。 # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 - logger.error(f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。") + logger.error( + f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" + ) except OperationFailure as op_err: - logger.exception(f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}({op_err})") + logger.exception( + f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}({op_err})" + ) except Exception as e: logger.exception(f"更新用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") # --- 使用 queue.Queue --- -queue_max_size = getattr(global_config, 'NICKNAME_QUEUE_MAX_SIZE', 100) +queue_max_size = getattr(global_config, "NICKNAME_QUEUE_MAX_SIZE", 100) nickname_queue: queue.Queue = queue.Queue(maxsize=queue_max_size) _nickname_thread: Optional[threading.Thread] = None + # --- add_to_nickname_queue (保持不变,已包含 platform) --- async def add_to_nickname_queue( - chat_history_str: str, - bot_reply: str, - platform: str, - group_id: Optional[str], - user_name_map: Dict[str, str] + chat_history_str: str, bot_reply: str, platform: str, group_id: Optional[str], user_name_map: Dict[str, str] ): """将需要分析的数据放入队列。""" if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: @@ -173,7 +175,9 @@ async def add_to_nickname_queue( try: item = (chat_history_str, bot_reply, platform, str(group_id), user_name_map) nickname_queue.put_nowait(item) - logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {nickname_queue.qsize()}") + logger.debug( + f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {nickname_queue.qsize()}" + ) except queue.Full: logger.warning(f"无法将项目添加到绰号队列:队列已满 (maxsize={nickname_queue.maxsize})。") except Exception as e: @@ -185,7 +189,7 @@ async def _nickname_processing_loop(q: queue.Queue, stop_event: threading.Event) """独立线程中的主循环,处理队列任务 (使用全局 db 和 config)。""" thread_id = threading.get_ident() logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") - sleep_interval = getattr(global_config, 'NICKNAME_PROCESS_SLEEP_INTERVAL', 0.5) + sleep_interval = getattr(global_config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) while not stop_event.is_set(): try: @@ -262,15 +266,14 @@ def start_nickname_processor(): stop_event = get_stop_event() stop_event.clear() _nickname_thread = threading.Thread( - target=_run_processor_thread, - args=(nickname_queue, stop_event), - daemon=True + target=_run_processor_thread, args=(nickname_queue, stop_event), daemon=True ) _nickname_thread.start() logger.info(f"绰号处理器线程已启动 (Thread ID: {_nickname_thread.ident})") else: logger.warning("绰号处理器线程已在运行中。") + # --- stop_nickname_processor (保持不变) --- def stop_nickname_processor(): """停止绰号映射处理线程。""" @@ -293,11 +296,13 @@ def stop_nickname_processor(): else: logger.info("绰号处理器线程未在运行或已被清理。") + # --- Event 控制函数 (保持不变) --- def get_stop_event() -> threading.Event: """获取全局停止事件""" return _stop_event + def set_stop_event(): """设置全局停止事件,通知子线程退出""" _stop_event.set() diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index ced2f6ca..230ce8c4 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -6,9 +6,8 @@ from src.config.config import global_config logger = get_logger("nickname_utils") -def select_nicknames_for_prompt( - all_nicknames_info: Dict[str, List[Dict[str, int]]] -) -> List[Tuple[str, str, int]]: + +def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int]]]) -> List[Tuple[str, str, int]]: """ 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 @@ -36,11 +35,12 @@ def select_nicknames_for_prompt( weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING candidates.append((user_name, nickname, count, weight)) else: - logger.warning(f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping.") + logger.warning( + f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping." + ) else: logger.warning(f"Invalid nickname entry format for user '{user_name}': {nickname_entry}. Skipping.") - if not candidates: return [] @@ -49,8 +49,8 @@ def select_nicknames_for_prompt( if total_weight <= 0: # 如果所有权重都无效或为0,则随机选择(或按次数选择) - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - selected = candidates[:global_config.MAX_NICKNAMES_IN_PROMPT] + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] else: # 计算归一化概率 probabilities = [c[3] / total_weight for c in candidates] @@ -64,7 +64,7 @@ def select_nicknames_for_prompt( selected_indices = set() selected = [] attempts = 0 - max_attempts = num_to_select * 5 # 防止无限循环 + max_attempts = num_to_select * 5 # 防止无限循环 while len(selected) < num_to_select and attempts < max_attempts: # 每次只选一个,避免一次选多个时概率分布变化导致的问题 @@ -77,20 +77,21 @@ def select_nicknames_for_prompt( # 如果尝试多次后仍未选够,补充出现次数最多的 if len(selected) < num_to_select: remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 needed = num_to_select - len(selected) selected.extend(remaining_candidates[:needed]) except Exception as e: - logger.error(f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True) + logger.error( + f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True + ) # 出错时回退到选择次数最多的 N 个 candidates.sort(key=lambda x: x[2], reverse=True) - selected = candidates[:global_config.MAX_NICKNAMES_IN_PROMPT] - + selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] # 格式化输出并按次数排序 result = [(user, nick, count) for user, nick, count, _weight in selected] - result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 logger.debug(f"Selected nicknames for prompt: {result}") return result @@ -116,11 +117,10 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in if user_name not in grouped_by_user: grouped_by_user[user_name] = [] # 添加引号以区分绰号 - grouped_by_user[user_name].append(f'“{nickname}”') + grouped_by_user[user_name].append(f"“{nickname}”") for user_name, nicknames in grouped_by_user.items(): nicknames_str = "、".join(nicknames) prompt_lines.append(f"{user_name},在本群有时被称为:{nicknames_str}") - return "\n".join(prompt_lines) + "\n" # 末尾加换行符 - + return "\n".join(prompt_lines) + "\n" # 末尾加换行符 diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index faa0ffd5..3d536edf 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -20,7 +20,11 @@ from src.heart_flow.sub_mind import SubMind from src.heart_flow.observation import Observation from src.plugins.heartFC_chat.heartflow_prompt_builder import global_prompt_manager, prompt_builder import contextlib -from src.plugins.utils.chat_message_builder import num_new_messages_since, get_raw_msg_before_timestamp_with_chat, build_readable_messages +from src.plugins.utils.chat_message_builder import ( + num_new_messages_since, + get_raw_msg_before_timestamp_with_chat, + build_readable_messages, +) from src.plugins.heartFC_chat.heartFC_Cycleinfo import CycleInfo from .heartFC_sender import HeartFCSender from src.plugins.chat.utils import process_llm_response @@ -28,7 +32,7 @@ from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.moods.moods import MoodManager from src.individuality.individuality import Individuality from src.plugins.person_info.relationship_manager import relationship_manager -from src.plugins.group_nickname.nickname_processor import add_to_nickname_queue # <--- 导入队列添加函数 +from src.plugins.group_nickname.nickname_processor import add_to_nickname_queue # <--- 导入队列添加函数 WAITING_TIME_THRESHOLD = 300 # 等待新消息时间阈值,单位秒 @@ -511,15 +515,15 @@ class HeartFChatting: if action == "text_reply": # 调用文本回复处理,它会返回 (bool, thinking_id) success, thinking_id = await handler(reasoning, emoji_query, cycle_timers) - return success, thinking_id # 直接返回结果 + return success, thinking_id # 直接返回结果 elif action == "emoji_reply": # 调用表情回复处理,它只返回 bool success = await handler(reasoning, emoji_query) - return success, "" # thinking_id 为空字符串 + return success, "" # thinking_id 为空字符串 else: # no_reply # 调用不回复处理,它只返回 bool success = await handler(reasoning, planner_start_db_time, cycle_timers) - return success, "" # thinking_id 为空字符串 + return success, "" # thinking_id 为空字符串 except HeartFCError as e: logger.error(f"{self.log_prefix} 处理{action}时出错: {e}") # 出错时也重置计数器 @@ -560,7 +564,7 @@ class HeartFChatting: if not thinking_id: raise PlannerError("无法创建思考消息") - reply = None # 初始化 reply + reply = None # 初始化 reply try: # 生成回复 with Timer("生成回复", cycle_timers): @@ -703,43 +707,43 @@ class HeartFChatting: reply: Bot 生成的回复内容列表。 """ if not global_config.ENABLE_NICKNAME_MAPPING: - return # 如果功能未开启,则直接返回 + return # 如果功能未开启,则直接返回 if not anchor_message or not anchor_message.chat_stream or not anchor_message.chat_stream.group_info: logger.debug(f"{self.log_prefix} Skipping nickname analysis: Not a group chat or invalid anchor.") - return # 仅在群聊中进行分析 + return # 仅在群聊中进行分析 try: # 1. 获取原始消息列表 - history_limit = 30 # 例如,获取最近 30 条消息 + history_limit = 30 # 例如,获取最近 30 条消息 history_messages = get_raw_msg_before_timestamp_with_chat( chat_id=anchor_message.chat_stream.stream_id, - timestamp=time.time(), # 获取当前时间点的历史 - limit=history_limit + timestamp=time.time(), # 获取当前时间点的历史 + limit=history_limit, ) # 格式化历史记录 chat_history_str = await build_readable_messages( messages=history_messages, replace_bot_name=True, # 在分析时也替换机器人名字,使其与 LLM 交互一致 - merge_messages=False, # 不合并,保留原始对话流 - timestamp_mode="relative", # 使用相对时间戳 - read_mark=0.0, # 不需要已读标记 - truncate=False # 获取完整内容进行分析 + merge_messages=False, # 不合并,保留原始对话流 + timestamp_mode="relative", # 使用相对时间戳 + read_mark=0.0, # 不需要已读标记 + truncate=False, # 获取完整内容进行分析 ) # 2. 获取 Bot 回复字符串 bot_reply_str = " ".join(reply) # 3. 获取群号 - group_id = str(anchor_message.chat_stream.group_info.group_id) # 确保是字符串 + group_id = str(anchor_message.chat_stream.group_info.group_id) # 确保是字符串 # 4. 获取当前上下文中涉及的用户 ID 及其已知名称 user_ids_in_history = set() for msg in history_messages: - sender_id = msg["user_info"].get('user_id') + sender_id = msg["user_info"].get("user_id") if sender_id: - user_ids_in_history.add(str(sender_id)) # 确保是字符串 + user_ids_in_history.add(str(sender_id)) # 确保是字符串 user_name_map = {} if user_ids_in_history: @@ -749,7 +753,7 @@ class HeartFChatting: except Exception as e: logger.error(f"Error getting person names: {e}", exc_info=True) - names_data = {} # 出错时置空 + names_data = {} # 出错时置空 print(f"\n\nnames_data:\n{names_data}\n\n") for user_id in user_ids_in_history: @@ -757,14 +761,21 @@ class HeartFChatting: user_name_map[user_id] = names_data[user_id] else: # 回退查找 nickname - latest_nickname = next((m.get('sender_nickname') for m in reversed(history_messages) if str(m.get('sender_id')) == user_id), None) + latest_nickname = next( + ( + m.get("sender_nickname") + for m in reversed(history_messages) + if str(m.get("sender_id")) == user_id + ), + None, + ) if latest_nickname: - user_name_map[user_id] = latest_nickname + user_name_map[user_id] = latest_nickname else: - user_name_map[user_id] = f"未知({user_id})" + user_name_map[user_id] = f"未知({user_id})" # 5. 添加到队列 - await add_to_nickname_queue(chat_history_str, bot_reply_str,platform, group_id, user_name_map) + await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) logger.debug(f"{self.log_prefix} Triggered nickname analysis for group {group_id}.") except Exception as e: diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index db764385..3980b044 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -222,13 +222,12 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s user_ids_in_context = set() if message_list_before_now: for msg in message_list_before_now: - sender_id = msg["user_info"].get('user_id') + sender_id = msg["user_info"].get("user_id") if sender_id: user_ids_in_context.add(str(sender_id)) else: logger.warning("Variable 'message_list_before_now' not found for nickname injection in focus prompt.") - if user_ids_in_context: platform = chat_stream.platform # --- 调用批量获取群组绰号的方法 --- @@ -439,12 +438,13 @@ class PromptBuilder: user_ids_in_context = set() if message_list_before_now: for msg in message_list_before_now: - sender_id = msg["user_info"].get('user_id') + sender_id = msg["user_info"].get("user_id") if sender_id: user_ids_in_context.add(str(sender_id)) else: - logger.warning("Variable 'message_list_before_now' not found for nickname injection in focus prompt.") - + logger.warning( + "Variable 'message_list_before_now' not found for nickname injection in focus prompt." + ) if user_ids_in_context: platform = chat_stream.platform diff --git a/src/plugins/person_info/relationship_manager.py b/src/plugins/person_info/relationship_manager.py index 78bcb7c7..c7837dc5 100644 --- a/src/plugins/person_info/relationship_manager.py +++ b/src/plugins/person_info/relationship_manager.py @@ -96,19 +96,19 @@ class RelationshipManager: try: cursor = db.person_info.find( {"person_id": {"$in": person_ids}}, - {"_id": 0, "person_id": 1, "user_id": 1, "person_name": 1} # 只查询需要的字段 + {"_id": 0, "person_id": 1, "user_id": 1, "person_name": 1}, # 只查询需要的字段 ) for doc in cursor: - user_id_val = doc.get("user_id") # 获取原始值 - original_user_id = None # 初始化 + user_id_val = doc.get("user_id") # 获取原始值 + original_user_id = None # 初始化 - if isinstance(user_id_val, (int, float)): # 检查是否是数字类型 - original_user_id = str(user_id_val) # 直接转换为字符串 - elif isinstance(user_id_val, str): # 检查是否是字符串 - if "_" in user_id_val: # 如果包含下划线,则分割 + if isinstance(user_id_val, (int, float)): # 检查是否是数字类型 + original_user_id = str(user_id_val) # 直接转换为字符串 + elif isinstance(user_id_val, str): # 检查是否是字符串 + if "_" in user_id_val: # 如果包含下划线,则分割 original_user_id = user_id_val.split("_", 1)[-1] - else: # 如果不包含下划线,则直接使用该字符串 + else: # 如果不包含下划线,则直接使用该字符串 original_user_id = user_id_val # else: # 其他类型或 None,original_user_id 保持为 None @@ -127,7 +127,9 @@ class RelationshipManager: return names_map @staticmethod - async def get_users_group_nicknames(platform: str, user_ids: List[str], group_id: str) -> Dict[str, List[Dict[str, int]]]: + async def get_users_group_nicknames( + platform: str, user_ids: List[str], group_id: str + ) -> Dict[str, List[Dict[str, int]]]: """ 批量获取多个用户在指定群组的绰号信息。 @@ -144,23 +146,23 @@ class RelationshipManager: person_ids = [person_info_manager.get_person_id(platform, str(uid)) for uid in user_ids] nicknames_data = {} - group_id_str = str(group_id) # 确保 group_id 是字符串 + group_id_str = str(group_id) # 确保 group_id 是字符串 try: # 查询包含目标 person_id 的文档 cursor = db.person_info.find( {"person_id": {"$in": person_ids}}, - {"_id": 0, "person_id": 1, "person_name": 1, "group_nicknames": 1} # 查询所需字段 + {"_id": 0, "person_id": 1, "person_name": 1, "group_nicknames": 1}, # 查询所需字段 ) # 假设同步迭代可行 for doc in cursor: person_name = doc.get("person_name") if not person_name: - continue # 跳过没有 person_name 的用户 + continue # 跳过没有 person_name 的用户 - group_nicknames_list = doc.get("group_nicknames", []) # 获取 group_nicknames 数组 - target_group_nicknames = [] # 存储目标群组的绰号列表 + group_nicknames_list = doc.get("group_nicknames", []) # 获取 group_nicknames 数组 + target_group_nicknames = [] # 存储目标群组的绰号列表 # 遍历 group_nicknames 数组,查找匹配的 group_id for group_entry in group_nicknames_list: @@ -170,27 +172,33 @@ class RelationshipManager: nicknames_raw = group_entry.get("nicknames", []) if isinstance(nicknames_raw, list): target_group_nicknames = nicknames_raw - break # 找到匹配的 group_id 后即可退出内层循环 + break # 找到匹配的 group_id 后即可退出内层循环 # 如果找到了目标群组的绰号列表 if target_group_nicknames: - valid_nicknames_formatted = [] # 存储格式化后的绰号 + valid_nicknames_formatted = [] # 存储格式化后的绰号 for item in target_group_nicknames: # 校验每个绰号条目的格式 { "name": str, "count": int } - if isinstance(item, dict) and \ - isinstance(item.get("name"), str) and \ - isinstance(item.get("count"), int) and \ - item["count"] > 0: # 确保 count 是正整数 + if ( + isinstance(item, dict) + and isinstance(item.get("name"), str) + and isinstance(item.get("count"), int) + and item["count"] > 0 + ): # 确保 count 是正整数 # --- 格式转换:从 { "name": "xxx", "count": y } 转为 { "xxx": y } --- valid_nicknames_formatted.append({item["name"]: item["count"]}) # --- 结束格式转换 --- else: - logger.warning(f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号格式无效或 count <= 0: {item}") + logger.warning( + f"数据库中用户 {person_name} 群组 {group_id_str} 的绰号格式无效或 count <= 0: {item}" + ) - if valid_nicknames_formatted: # 如果存在有效的、格式化后的绰号 - nicknames_data[person_name] = valid_nicknames_formatted # 使用 person_name 作为 key + if valid_nicknames_formatted: # 如果存在有效的、格式化后的绰号 + nicknames_data[person_name] = valid_nicknames_formatted # 使用 person_name 作为 key - logger.debug(f"批量获取群组 {group_id_str} 中 {len(user_ids)} 个用户的绰号,找到 {len(nicknames_data)} 个用户的数据。") + logger.debug( + f"批量获取群组 {group_id_str} 中 {len(user_ids)} 个用户的绰号,找到 {len(nicknames_data)} 个用户的数据。" + ) except AttributeError as e: logger.error(f"访问数据库时出错: {e}。请检查 common/database.py 和集合名称 'person_info'。") From 7c95166e0aa39c51ef46cd2b7c427dccf36c1609 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 00:43:52 +0800 Subject: [PATCH 22/58] =?UTF-8?q?=E6=8F=90=E5=8F=96=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E5=88=B0=E5=B7=A5=E5=85=B7=E9=87=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_utils.py | 192 ++++++++++++++---- src/plugins/heartFC_chat/heartFC_chat.py | 110 +--------- .../heartFC_chat/heartflow_prompt_builder.py | 78 ++----- src/plugins/heartFC_chat/normal_chat.py | 2 + 4 files changed, 173 insertions(+), 209 deletions(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 230ce8c4..2cff5a01 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,8 +1,14 @@ -# GroupNickname/nickname_utils.py import random -from typing import List, Dict, Tuple +import time +from typing import List, Dict, Tuple, Optional from src.common.logger_manager import get_logger from src.config.config import global_config +from src.plugins.person_info.relationship_manager import relationship_manager +from src.plugins.chat.chat_stream import ChatStream +from src.plugins.chat.message import MessageRecv +from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat +from .nickname_processor import add_to_nickname_queue + logger = get_logger("nickname_utils") @@ -10,14 +16,6 @@ logger = get_logger("nickname_utils") def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int]]]) -> List[Tuple[str, str, int]]: """ 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 - - Args: - all_nicknames_info: 包含用户及其绰号信息的字典,格式为 - { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } - - Returns: - List[Tuple[str, str, int]]: 选中的绰号列表,每个元素为 (用户名, 绰号, 次数)。 - 按次数降序排序。 """ if not all_nicknames_info: return [] @@ -26,12 +24,9 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int for user_name, nicknames in all_nicknames_info.items(): if nicknames: for nickname_entry in nicknames: - # nickname_entry 应该是 {"绰号": 次数} 格式 if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: nickname, count = list(nickname_entry.items())[0] - # 确保次数是正整数 if isinstance(count, int) and count > 0: - # 添加平滑因子,避免概率为0,并让低频词也有机会 weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING candidates.append((user_name, nickname, count, weight)) else: @@ -44,55 +39,39 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int if not candidates: return [] - # 计算总权重 total_weight = sum(c[3] for c in candidates) if total_weight <= 0: - # 如果所有权重都无效或为0,则随机选择(或按次数选择) - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + candidates.sort(key=lambda x: x[2], reverse=True) selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] else: - # 计算归一化概率 probabilities = [c[3] / total_weight for c in candidates] - - # 使用概率分布进行加权随机选择(不重复) num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) try: - # random.choices 允许重复,我们需要不重复的选择 - # 可以使用 numpy.random.choice 或手动实现不重复加权抽样 - # 这里用一个简化的方法:多次 choices 然后去重,直到达到数量或无法再选 selected_indices = set() selected = [] attempts = 0 - max_attempts = num_to_select * 5 # 防止无限循环 - + max_attempts = num_to_select * 5 while len(selected) < num_to_select and attempts < max_attempts: - # 每次只选一个,避免一次选多个时概率分布变化导致的问题 chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] if chosen_index not in selected_indices: selected_indices.add(chosen_index) selected.append(candidates[chosen_index]) attempts += 1 - - # 如果尝试多次后仍未选够,补充出现次数最多的 if len(selected) < num_to_select: remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) needed = num_to_select - len(selected) selected.extend(remaining_candidates[:needed]) - except Exception as e: logger.error( f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True ) - # 出错时回退到选择次数最多的 N 个 candidates.sort(key=lambda x: x[2], reverse=True) selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] - # 格式化输出并按次数排序 result = [(user, nick, count) for user, nick, count, _weight in selected] - result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 - + result.sort(key=lambda x: x[2], reverse=True) logger.debug(f"Selected nicknames for prompt: {result}") return result @@ -100,27 +79,154 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, int]]) -> str: """ 将选中的绰号信息格式化为注入 Prompt 的字符串。 - - Args: - selected_nicknames: 选中的绰号列表 (用户名, 绰号, 次数)。 - - Returns: - str: 格式化后的字符串,如果列表为空则返回空字符串。 + (代码保持不变) """ if not selected_nicknames: return "" - prompt_lines = ["以下是聊天记录中一些成员在本群的绰号信息(按常用度排序):"] + prompt_lines = ["【群成员绰号信息】"] grouped_by_user: Dict[str, List[str]] = {} for user_name, nickname, _count in selected_nicknames: if user_name not in grouped_by_user: grouped_by_user[user_name] = [] - # 添加引号以区分绰号 grouped_by_user[user_name].append(f"“{nickname}”") for user_name, nicknames in grouped_by_user.items(): nicknames_str = "、".join(nicknames) - prompt_lines.append(f"{user_name},在本群有时被称为:{nicknames_str}") + prompt_lines.append(f"- {user_name},有时被称为:{nicknames_str}") - return "\n".join(prompt_lines) + "\n" # 末尾加换行符 + if len(prompt_lines) > 1: + return "\n".join(prompt_lines) + "\n" + else: + return "" + + +async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: + """ + 获取并格式化用于 Prompt 注入的绰号信息字符串。 + """ + nickname_injection_str = "" + if global_config.ENABLE_NICKNAME_MAPPING and chat_stream and chat_stream.group_info: + try: + group_id = str(chat_stream.group_info.group_id) + user_ids_in_context = set() + if message_list_before_now: + for msg in message_list_before_now: + sender_id = msg["user_info"].get("user_id") + if sender_id: + user_ids_in_context.add(str(sender_id)) + else: + recent_speakers = chat_stream.get_recent_speakers(limit=5) + for speaker in recent_speakers: + user_ids_in_context.add(str(speaker['user_id'])) + if not user_ids_in_context: + logger.warning(f"[{chat_stream.stream_id}] No messages or recent speakers found for nickname injection.") + + if user_ids_in_context: + platform = chat_stream.platform + all_nicknames_data = await relationship_manager.get_users_group_nicknames( + platform, list(user_ids_in_context), group_id + ) + if all_nicknames_data: + selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) + nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) + if nickname_injection_str: + logger.debug(f"[{chat_stream.stream_id}] Generated nickname info for prompt:\n{nickname_injection_str}") + except Exception as e: + logger.error(f"[{chat_stream.stream_id}] Error getting or formatting nickname info for prompt: {e}", exc_info=True) + nickname_injection_str = "" + return nickname_injection_str + + +# --- 新增:触发绰号分析的工具函数 --- +async def trigger_nickname_analysis_if_needed( + anchor_message: MessageRecv, + bot_reply: List[str], + chat_stream: Optional[ChatStream] = None # 允许传入 chat_stream 或从 anchor_message 获取 +): + """ + 如果满足条件(群聊、功能开启),则准备数据并触发绰号分析任务。 + + Args: + anchor_message: 触发回复的原始消息对象。 + bot_reply: Bot 生成的回复内容列表。 + chat_stream: 可选的 ChatStream 对象。 + """ + # 检查功能是否开启 + if not global_config.ENABLE_NICKNAME_MAPPING: + return + + # 确定使用的 chat_stream + current_chat_stream = chat_stream or anchor_message.chat_stream + + # 检查是否是群聊且 chat_stream 有效 + if not current_chat_stream or not current_chat_stream.group_info: + logger.debug(f"[{current_chat_stream.stream_id if current_chat_stream else 'Unknown'}] Skipping nickname analysis: Not a group chat or invalid chat stream.") + return + + log_prefix = f"[{current_chat_stream.stream_id}]" # 日志前缀 + + try: + # 1. 获取历史记录 + history_limit = 30 # 可配置的历史记录条数 + history_messages = get_raw_msg_before_timestamp_with_chat( + chat_id=current_chat_stream.stream_id, + timestamp=time.time(), + limit=history_limit, + ) + + # 格式化历史记录 + chat_history_str = await build_readable_messages( + messages=history_messages, + replace_bot_name=True, + merge_messages=False, + timestamp_mode="relative", + read_mark=0.0, + truncate=False, + ) + + # 2. 获取 Bot 回复字符串 + bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 + + # 3. 获取群号和平台 + group_id = str(current_chat_stream.group_info.group_id) + platform = current_chat_stream.platform + + # 4. 构建用户 ID 到名称的映射 + user_ids_in_history = set() + for msg in history_messages: + sender_id = msg["user_info"].get("user_id") + if sender_id: + user_ids_in_history.add(str(sender_id)) + + user_name_map = {} + if user_ids_in_history: + try: + # 批量获取 person_name + names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) + except Exception as e: + logger.error(f"{log_prefix} Error getting person names batch: {e}", exc_info=True) + names_data = {} + + for user_id in user_ids_in_history: + if user_id in names_data: + user_name_map[user_id] = names_data[user_id] + else: + # 回退查找 nickname (从后往前找最新的) + latest_nickname = next( + ( + m["user_info"].get("user_nickname") # 从 user_info 获取 + for m in reversed(history_messages) + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") # 确保 nickname 存在 + ), + None, + ) + user_name_map[user_id] = latest_nickname or f"未知({user_id})" # 提供回退 + + # 5. 添加到处理队列 + await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) + logger.debug(f"{log_prefix} Triggered nickname analysis for group {group_id}.") + + except Exception as e: + logger.error(f"{log_prefix} Error triggering nickname analysis: {e}", exc_info=True) \ No newline at end of file diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 3d536edf..fa180673 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -1,38 +1,33 @@ import asyncio import time import traceback -import random # <--- 添加导入 -import json # <--- 确保导入 json +import random +import json from typing import List, Optional, Dict, Any, Deque, Callable, Coroutine from collections import deque from src.plugins.chat.message import MessageRecv, BaseMessageInfo, MessageThinking, MessageSending -from src.plugins.chat.message import Seg # Local import needed after move +from src.plugins.chat.message import Seg from src.plugins.chat.chat_stream import ChatStream from src.plugins.chat.message import UserInfo from src.plugins.chat.chat_stream import chat_manager from src.common.logger_manager import get_logger from src.plugins.models.utils_model import LLMRequest from src.config.config import global_config -from src.plugins.chat.utils_image import image_path_to_base64 # Local import needed after move -from src.plugins.utils.timer_calculator import Timer # <--- Import Timer +from src.plugins.chat.utils_image import image_path_to_base64 +from src.plugins.utils.timer_calculator import Timer from src.plugins.emoji_system.emoji_manager import emoji_manager from src.heart_flow.sub_mind import SubMind from src.heart_flow.observation import Observation from src.plugins.heartFC_chat.heartflow_prompt_builder import global_prompt_manager, prompt_builder import contextlib -from src.plugins.utils.chat_message_builder import ( - num_new_messages_since, - get_raw_msg_before_timestamp_with_chat, - build_readable_messages, -) +from src.plugins.utils.chat_message_builder import num_new_messages_since from src.plugins.heartFC_chat.heartFC_Cycleinfo import CycleInfo from .heartFC_sender import HeartFCSender from src.plugins.chat.utils import process_llm_response from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.moods.moods import MoodManager from src.individuality.individuality import Individuality -from src.plugins.person_info.relationship_manager import relationship_manager -from src.plugins.group_nickname.nickname_processor import add_to_nickname_queue # <--- 导入队列添加函数 +from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_if_needed WAITING_TIME_THRESHOLD = 300 # 等待新消息时间阈值,单位秒 @@ -586,10 +581,9 @@ class HeartFChatting: send_emoji=emoji_query, ) print("消息发送成功,准备进入绰号分析") - # --- [新增] 触发绰号分析 --- - # 在发送成功后(或至少尝试发送后)触发 - await self._trigger_nickname_analysis(anchor_message, reply) - # --- 结束触发 --- + + # 调用工具函数触发绰号分析 + await trigger_nickname_analysis_if_needed(anchor_message, reply, self.chat_stream) return True, thinking_id @@ -697,90 +691,6 @@ class HeartFChatting: # 发生意外错误时,可以选择是否重置计数器,这里选择不重置 return False # 表示动作未成功 - # 触发绰号分析的函数 - async def _trigger_nickname_analysis(self, anchor_message: MessageRecv, reply: List[str]): - """ - 触发绰号分析任务,将相关数据放入处理队列。 - - Args: - anchor_message: 锚点消息对象。 - reply: Bot 生成的回复内容列表。 - """ - if not global_config.ENABLE_NICKNAME_MAPPING: - return # 如果功能未开启,则直接返回 - - if not anchor_message or not anchor_message.chat_stream or not anchor_message.chat_stream.group_info: - logger.debug(f"{self.log_prefix} Skipping nickname analysis: Not a group chat or invalid anchor.") - return # 仅在群聊中进行分析 - - try: - # 1. 获取原始消息列表 - history_limit = 30 # 例如,获取最近 30 条消息 - history_messages = get_raw_msg_before_timestamp_with_chat( - chat_id=anchor_message.chat_stream.stream_id, - timestamp=time.time(), # 获取当前时间点的历史 - limit=history_limit, - ) - - # 格式化历史记录 - chat_history_str = await build_readable_messages( - messages=history_messages, - replace_bot_name=True, # 在分析时也替换机器人名字,使其与 LLM 交互一致 - merge_messages=False, # 不合并,保留原始对话流 - timestamp_mode="relative", # 使用相对时间戳 - read_mark=0.0, # 不需要已读标记 - truncate=False, # 获取完整内容进行分析 - ) - - # 2. 获取 Bot 回复字符串 - bot_reply_str = " ".join(reply) - - # 3. 获取群号 - group_id = str(anchor_message.chat_stream.group_info.group_id) # 确保是字符串 - - # 4. 获取当前上下文中涉及的用户 ID 及其已知名称 - user_ids_in_history = set() - for msg in history_messages: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_history.add(str(sender_id)) # 确保是字符串 - - user_name_map = {} - if user_ids_in_history: - platform = anchor_message.chat_stream.platform - try: - names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) - - except Exception as e: - logger.error(f"Error getting person names: {e}", exc_info=True) - names_data = {} # 出错时置空 - print(f"\n\nnames_data:\n{names_data}\n\n") - - for user_id in user_ids_in_history: - if user_id in names_data: - user_name_map[user_id] = names_data[user_id] - else: - # 回退查找 nickname - latest_nickname = next( - ( - m.get("sender_nickname") - for m in reversed(history_messages) - if str(m.get("sender_id")) == user_id - ), - None, - ) - if latest_nickname: - user_name_map[user_id] = latest_nickname - else: - user_name_map[user_id] = f"未知({user_id})" - - # 5. 添加到队列 - await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) - logger.debug(f"{self.log_prefix} Triggered nickname analysis for group {group_id}.") - - except Exception as e: - logger.error(f"{self.log_prefix} Error triggering nickname analysis: {e}", exc_info=True) - async def _wait_for_new_message(self, observation, planner_start_db_time: float, log_prefix: str) -> bool: """ 等待新消息 或 检测到关闭信号 diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 3980b044..43075ef8 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -1,4 +1,6 @@ import random +import time +from typing import Union, Optional, List, Dict, Any # 引入 List, Dict, Any from ...config.config import global_config from src.common.logger_manager import get_logger from ...individuality.individuality import Individuality @@ -6,15 +8,13 @@ from src.plugins.utils.prompt_builder import Prompt, global_prompt_manager from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat from src.plugins.person_info.relationship_manager import relationship_manager from src.plugins.chat.utils import get_embedding -import time -from typing import Union, Optional from ...common.database import db from ..chat.utils import get_recent_group_speaker from ..moods.moods import MoodManager from ..memory_system.Hippocampus import HippocampusManager from ..schedule.schedule_generator import bot_schedule from ..knowledge.knowledge_lib import qa_manager -from src.plugins.group_nickname.nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection +from src.plugins.group_nickname.nickname_utils import get_nickname_injection_for_prompt logger = get_logger("prompt") @@ -23,6 +23,7 @@ def init_prompt(): Prompt( """ {info_from_tools} +{nickname_info} {chat_target} {chat_talking_prompt} 现在你想要在群里发言或者回复。\n @@ -131,6 +132,7 @@ JSON 结构如下,包含三个字段 "action", "reasoning", "emoji_query": {relation_prompt} {prompt_info} {schedule_prompt} +{nickname_info} {chat_target} {chat_talking_prompt} 现在"{sender_name}"说的:{message_txt}。引起了你的注意,你想要在群里发言或者回复这条消息。\n @@ -214,40 +216,13 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s logger.debug("开始构建prompt") - # 注入绰号信息 - nickname_injection_str = "" - if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: - try: - group_id = str(chat_stream.group_info.group_id) - user_ids_in_context = set() - if message_list_before_now: - for msg in message_list_before_now: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_context.add(str(sender_id)) - else: - logger.warning("Variable 'message_list_before_now' not found for nickname injection in focus prompt.") - - if user_ids_in_context: - platform = chat_stream.platform - # --- 调用批量获取群组绰号的方法 --- - all_nicknames_data = await relationship_manager.get_users_group_nicknames( - platform, list(user_ids_in_context), group_id - ) - - if all_nicknames_data: - selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) - nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) - if nickname_injection_str: - logger.debug(f"Injecting nickname info into focus prompt:\n{nickname_injection_str}") - - except Exception as e: - logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) - logger.debug(f"-------------------nickname_injection_str_______________________\n{nickname_injection_str}\n\n") + # 调用新的工具函数获取绰号信息 + nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) prompt = await global_prompt_manager.format_prompt( "heart_flow_prompt", info_from_tools=structured_info_prompt, + nickname_info=nickname_injection_str, chat_target=await global_prompt_manager.get_prompt_async("chat_target_group1") if chat_in_group else await global_prompt_manager.get_prompt_async("chat_target_private1"), @@ -299,7 +274,7 @@ class PromptBuilder: ) return None - async def _build_prompt_normal(self, chat_stream, message_txt: str, sender_name: str = "某人") -> tuple[str, str]: + async def _build_prompt_normal(self, chat_stream, message_txt: str, sender_name: str = "某人") -> str: # 返回值改为 str individuality = Individuality.get_instance() prompt_personality = individuality.get_prompt(x_person=2, level=2) @@ -430,38 +405,8 @@ class PromptBuilder: else: schedule_prompt = "" - # 注入绰号信息 - nickname_injection_str = "" - if global_config.ENABLE_NICKNAME_MAPPING and chat_stream.group_info: - try: - group_id = str(chat_stream.group_info.group_id) - user_ids_in_context = set() - if message_list_before_now: - for msg in message_list_before_now: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_context.add(str(sender_id)) - else: - logger.warning( - "Variable 'message_list_before_now' not found for nickname injection in focus prompt." - ) - - if user_ids_in_context: - platform = chat_stream.platform - # --- 调用批量获取群组绰号的方法 --- - all_nicknames_data = await relationship_manager.get_users_group_nicknames( - platform, list(user_ids_in_context), group_id - ) - - if all_nicknames_data: - selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) - nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) - if nickname_injection_str: - logger.debug(f"Injecting nickname info into focus prompt:\n{nickname_injection_str}") - - except Exception as e: - logger.error(f"Error getting or formatting nickname info for focus prompt: {e}", exc_info=True) - logger.debug(f"-------------------nickname_injection_str_______________________\n{nickname_injection_str}\n\n") + # 调用新的工具函数获取绰号信息 + nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) prompt = await global_prompt_manager.format_prompt( "reasoning_prompt_main", @@ -470,6 +415,7 @@ class PromptBuilder: memory_prompt=memory_prompt, prompt_info=prompt_info, schedule_prompt=schedule_prompt, + nickname_info=nickname_injection_str, # <--- 注入绰号信息 chat_target=await global_prompt_manager.get_prompt_async("chat_target_group1") if chat_in_group else await global_prompt_manager.get_prompt_async("chat_target_private1"), diff --git a/src/plugins/heartFC_chat/normal_chat.py b/src/plugins/heartFC_chat/normal_chat.py index 9ed63c2d..76fb1746 100644 --- a/src/plugins/heartFC_chat/normal_chat.py +++ b/src/plugins/heartFC_chat/normal_chat.py @@ -19,6 +19,7 @@ from src.plugins.chat.chat_stream import ChatStream, chat_manager from src.plugins.person_info.relationship_manager import relationship_manager from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.utils.timer_calculator import Timer +from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_if_needed logger = get_logger("chat") @@ -286,6 +287,7 @@ class NormalChat: # 检查 first_bot_msg 是否为 None (例如思考消息已被移除的情况) if first_bot_msg: info_catcher.catch_after_response(timing_results["消息发送"], response_set, first_bot_msg) + await trigger_nickname_analysis_if_needed(message, response_set, self.chat_stream) else: logger.warning(f"[{self.stream_name}] 思考消息 {thinking_id} 在发送前丢失,无法记录 info_catcher") From c7cf0b102e0327449010bb5ca50252376bfd09fb Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 00:50:13 +0800 Subject: [PATCH 23/58] =?UTF-8?q?=E5=8A=A0=E7=82=B9=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_utils.py | 390 ++++++++----------- 1 file changed, 166 insertions(+), 224 deletions(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 2cff5a01..641c2999 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,232 +1,174 @@ -import random -import time -from typing import List, Dict, Tuple, Optional +import json +from typing import Dict, Any, Optional from src.common.logger_manager import get_logger +from src.plugins.models.utils_model import LLMRequest from src.config.config import global_config -from src.plugins.person_info.relationship_manager import relationship_manager -from src.plugins.chat.chat_stream import ChatStream -from src.plugins.chat.message import MessageRecv -from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat -from .nickname_processor import add_to_nickname_queue -logger = get_logger("nickname_utils") - - -def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int]]]) -> List[Tuple[str, str, int]]: - """ - 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 - """ - if not all_nicknames_info: - return [] - - candidates = [] - for user_name, nicknames in all_nicknames_info.items(): - if nicknames: - for nickname_entry in nicknames: - if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: - nickname, count = list(nickname_entry.items())[0] - if isinstance(count, int) and count > 0: - weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING - candidates.append((user_name, nickname, count, weight)) - else: - logger.warning( - f"Invalid count for nickname '{nickname}' of user '{user_name}': {count}. Skipping." - ) - else: - logger.warning(f"Invalid nickname entry format for user '{user_name}': {nickname_entry}. Skipping.") - - if not candidates: - return [] - - total_weight = sum(c[3] for c in candidates) - - if total_weight <= 0: - candidates.sort(key=lambda x: x[2], reverse=True) - selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] - else: - probabilities = [c[3] / total_weight for c in candidates] - num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) - try: - selected_indices = set() - selected = [] - attempts = 0 - max_attempts = num_to_select * 5 - while len(selected) < num_to_select and attempts < max_attempts: - chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] - if chosen_index not in selected_indices: - selected_indices.add(chosen_index) - selected.append(candidates[chosen_index]) - attempts += 1 - if len(selected) < num_to_select: - remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) - needed = num_to_select - len(selected) - selected.extend(remaining_candidates[:needed]) - except Exception as e: - logger.error( - f"Error during weighted random choice for nicknames: {e}. Falling back to top N.", exc_info=True - ) - candidates.sort(key=lambda x: x[2], reverse=True) - selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] - - result = [(user, nick, count) for user, nick, count, _weight in selected] - result.sort(key=lambda x: x[2], reverse=True) - logger.debug(f"Selected nicknames for prompt: {result}") - return result - - -def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, int]]) -> str: - """ - 将选中的绰号信息格式化为注入 Prompt 的字符串。 - (代码保持不变) - """ - if not selected_nicknames: - return "" - - prompt_lines = ["【群成员绰号信息】"] - grouped_by_user: Dict[str, List[str]] = {} - - for user_name, nickname, _count in selected_nicknames: - if user_name not in grouped_by_user: - grouped_by_user[user_name] = [] - grouped_by_user[user_name].append(f"“{nickname}”") - - for user_name, nicknames in grouped_by_user.items(): - nicknames_str = "、".join(nicknames) - prompt_lines.append(f"- {user_name},有时被称为:{nicknames_str}") - - if len(prompt_lines) > 1: - return "\n".join(prompt_lines) + "\n" - else: - return "" - - -async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: - """ - 获取并格式化用于 Prompt 注入的绰号信息字符串。 - """ - nickname_injection_str = "" - if global_config.ENABLE_NICKNAME_MAPPING and chat_stream and chat_stream.group_info: - try: - group_id = str(chat_stream.group_info.group_id) - user_ids_in_context = set() - if message_list_before_now: - for msg in message_list_before_now: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_context.add(str(sender_id)) - else: - recent_speakers = chat_stream.get_recent_speakers(limit=5) - for speaker in recent_speakers: - user_ids_in_context.add(str(speaker['user_id'])) - if not user_ids_in_context: - logger.warning(f"[{chat_stream.stream_id}] No messages or recent speakers found for nickname injection.") - - if user_ids_in_context: - platform = chat_stream.platform - all_nicknames_data = await relationship_manager.get_users_group_nicknames( - platform, list(user_ids_in_context), group_id - ) - if all_nicknames_data: - selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) - nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) - if nickname_injection_str: - logger.debug(f"[{chat_stream.stream_id}] Generated nickname info for prompt:\n{nickname_injection_str}") - except Exception as e: - logger.error(f"[{chat_stream.stream_id}] Error getting or formatting nickname info for prompt: {e}", exc_info=True) - nickname_injection_str = "" - return nickname_injection_str - - -# --- 新增:触发绰号分析的工具函数 --- -async def trigger_nickname_analysis_if_needed( - anchor_message: MessageRecv, - bot_reply: List[str], - chat_stream: Optional[ChatStream] = None # 允许传入 chat_stream 或从 anchor_message 获取 -): - """ - 如果满足条件(群聊、功能开启),则准备数据并触发绰号分析任务。 - - Args: - anchor_message: 触发回复的原始消息对象。 - bot_reply: Bot 生成的回复内容列表。 - chat_stream: 可选的 ChatStream 对象。 - """ - # 检查功能是否开启 - if not global_config.ENABLE_NICKNAME_MAPPING: - return - - # 确定使用的 chat_stream - current_chat_stream = chat_stream or anchor_message.chat_stream - - # 检查是否是群聊且 chat_stream 有效 - if not current_chat_stream or not current_chat_stream.group_info: - logger.debug(f"[{current_chat_stream.stream_id if current_chat_stream else 'Unknown'}] Skipping nickname analysis: Not a group chat or invalid chat stream.") - return - - log_prefix = f"[{current_chat_stream.stream_id}]" # 日志前缀 +logger = get_logger("nickname_mapper") +llm_mapper: Optional[LLMRequest] = None +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 try: - # 1. 获取历史记录 - history_limit = 30 # 可配置的历史记录条数 - history_messages = get_raw_msg_before_timestamp_with_chat( - chat_id=current_chat_stream.stream_id, - timestamp=time.time(), - limit=history_limit, - ) - - # 格式化历史记录 - chat_history_str = await build_readable_messages( - messages=history_messages, - replace_bot_name=True, - merge_messages=False, - timestamp_mode="relative", - read_mark=0.0, - truncate=False, - ) - - # 2. 获取 Bot 回复字符串 - bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 - - # 3. 获取群号和平台 - group_id = str(current_chat_stream.group_info.group_id) - platform = current_chat_stream.platform - - # 4. 构建用户 ID 到名称的映射 - user_ids_in_history = set() - for msg in history_messages: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_history.add(str(sender_id)) - - user_name_map = {} - if user_ids_in_history: - try: - # 批量获取 person_name - names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) - except Exception as e: - logger.error(f"{log_prefix} Error getting person names batch: {e}", exc_info=True) - names_data = {} - - for user_id in user_ids_in_history: - if user_id in names_data: - user_name_map[user_id] = names_data[user_id] - else: - # 回退查找 nickname (从后往前找最新的) - latest_nickname = next( - ( - m["user_info"].get("user_nickname") # 从 user_info 获取 - for m in reversed(history_messages) - if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") # 确保 nickname 存在 - ), - None, - ) - user_name_map[user_id] = latest_nickname or f"未知({user_id})" # 提供回退 - - # 5. 添加到处理队列 - await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) - logger.debug(f"{log_prefix} Triggered nickname analysis for group {group_id}.") + # 从全局配置获取模型设置 + model_config = global_config.llm_nickname_mapping + if not model_config or not model_config.get("name"): + logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") + else: + llm_mapper = LLMRequest( # <-- LLM 初始化 + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", + ) + logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") except Exception as e: - logger.error(f"{log_prefix} Error triggering nickname analysis: {e}", exc_info=True) \ No newline at end of file + logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) + llm_mapper = None + +def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: + """构建用于 LLM 绰号映射的 Prompt""" + # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 + user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) + # print(f"\n\n\nKnown User Info for LLM:\n{user_list_str}\n\n\n\n") # Debugging print + prompt = f""" +任务:分析以下聊天记录和你的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 + +已知用户信息(ID: 名称): +{user_list_str} + +聊天记录: +--- +{chat_history_str} +--- + +你的最新回复: +{bot_reply} + +分析要求: +1. 识别聊天记录和你发言中出现的可能是用户绰号的词语。 +2. 判断这些绰号是否能明确地指向某个特定的用户 ID。一个绰号必须在上下文中清晰地与某个发言人或被提及的人关联起来。 +3. 如果能建立可靠的一一映射关系,请输出一个 JSON 对象,格式如下: + {{ + "is_exist": true, + "data": {{ + "用户A数字id": "绰号_A", + "用户B数字id": "绰号_B" + }} + }} + 其中 "data" 字段的键是用户的 ID (字符串形式),值是对应的绰号。只包含你能确认映射关系的绰号。 +4. 如果无法建立任何可靠的一一映射关系(例如,绰号指代不明、没有出现绰号、或无法确认绰号与用户的关联),请输出 JSON 对象: + {{ + "is_exist": false + }} +5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 +6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 + +输出: +""" + return prompt + + +async def analyze_chat_for_nicknames( + chat_history_str: str, + bot_reply: str, + user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 +) -> Dict[str, Any]: + """ + 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 + """ + if not global_config.ENABLE_NICKNAME_MAPPING: + logger.debug("绰号映射功能已禁用。") + return {"is_exist": False} + + if llm_mapper is None: + logger.error("绰号映射 LLM 未初始化。无法执行分析。") + return {"is_exist": False} + + prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) + logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") + + try: + # 调用 LLM + response_content, _, _ = await llm_mapper.generate_response(prompt) + logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") + + if not response_content: + logger.warning("LLM 返回了空的绰号映射内容。") + return {"is_exist": False} + + # 清理可能的 Markdown 代码块标记 + response_content = response_content.strip() + if response_content.startswith("```json"): + response_content = response_content[7:] + if response_content.endswith("```"): + response_content = response_content[:-3] + response_content = response_content.strip() + + try: + result = json.loads(response_content) + if isinstance(result, dict) and "is_exist" in result: + if result["is_exist"] is True: + original_data = result.get("data") # 使用 .get() 更安全 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + logger.info(f"LLM 找到的原始绰号映射: {original_data}") + + # --- 开始过滤 --- + filtered_data = {} + bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 + + for user_id, nickname in original_data.items(): + # 检查 user_id 是否是字符串,以防万一 + if not isinstance(user_id, str): + logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") + continue + + # 条件 1: 排除机器人自身 + if user_id == bot_qq_str: + logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") + continue + + # 条件 2: 排除 nickname 与 person_name 相同的情况 + person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name + if person_name and person_name == nickname: + logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + continue + + # 如果通过所有过滤条件,则保留 + filtered_data[user_id] = nickname + # --- 结束过滤 --- + + # 检查过滤后是否还有数据 + if not filtered_data: + logger.info("所有找到的绰号映射都被过滤掉了。") + return {"is_exist": False} + else: + logger.info(f"过滤后的绰号映射: {filtered_data}") + return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + + else: + # is_exist 为 True 但 data 缺失、不是字典或为空 + if "data" not in result: + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") + elif not isinstance(result.get("data"), dict): + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") + else: # data 为空字典 + logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") + return {"is_exist": False} + elif result["is_exist"] is False: + logger.info("LLM 未找到可靠的绰号映射。") + return {"is_exist": False} + else: + logger.warning("LLM 响应格式错误: 'is_exist' 不是布尔值。") + return {"is_exist": False} + else: + logger.warning("LLM 响应格式错误: 缺少 'is_exist' 键或不是字典。") + return {"is_exist": False} + except json.JSONDecodeError as json_err: + logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") + return {"is_exist": False} + + except Exception as e: + logger.error(f"绰号映射 LLM 调用或处理过程中出错: {e}", exc_info=True) + return {"is_exist": False} From 6b6945fd77d7433b30e3d327f57c3b82d06ccb1e Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:04:41 +0800 Subject: [PATCH 24/58] =?UTF-8?q?prompt=E4=BC=98=E5=8C=96=EF=BC=8C?= =?UTF-8?q?=E9=80=82=E9=85=8D=E5=8F=AF=E4=B9=90=E8=80=81=E5=A4=A7=E6=96=B0?= =?UTF-8?q?=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 41 +- src/plugins/group_nickname/nickname_utils.py | 456 ++++++++++++------ 2 files changed, 315 insertions(+), 182 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 283f8090..0696961a 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -2,15 +2,13 @@ import json from typing import Dict, Any, Optional from src.common.logger_manager import get_logger from src.plugins.models.utils_model import LLMRequest - -# 从全局配置导入 from src.config.config import global_config logger = get_logger("nickname_mapper") llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 try: # 从全局配置获取模型设置 model_config = global_config.llm_nickname_mapping @@ -18,10 +16,10 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") else: llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", ) logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") @@ -29,7 +27,6 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) llm_mapper = None - def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: """构建用于 LLM 绰号映射的 Prompt""" # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 @@ -66,7 +63,8 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: "is_exist": false }} 5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 -6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 +6. 不要输出与用户名称相同的绰号。 +7. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 输出: """ @@ -76,7 +74,7 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: async def analyze_chat_for_nicknames( chat_history_str: str, bot_reply: str, - user_name_map: Dict[str, str], # 这个 map 包含了 user_id -> person_name 的信息 + user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 ) -> Dict[str, Any]: """ 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 @@ -113,13 +111,13 @@ async def analyze_chat_for_nicknames( result = json.loads(response_content) if isinstance(result, dict) and "is_exist" in result: if result["is_exist"] is True: - original_data = result.get("data") # 使用 .get() 更安全 - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + original_data = result.get("data") # 使用 .get() 更安全 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 logger.info(f"LLM 找到的原始绰号映射: {original_data}") # --- 开始过滤 --- filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 + bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 for user_id, nickname in original_data.items(): # 检查 user_id 是否是字符串,以防万一 @@ -132,13 +130,12 @@ async def analyze_chat_for_nicknames( logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") continue - # 条件 2: 排除 nickname 与 person_name 相同的情况 - person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name - if person_name and person_name == nickname: - logger.debug( - f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。" - ) - continue + # 有了改名工具后,该过滤器已不适合了,尝试通过修改 prompt 获得更好的结果 + # # 条件 2: 排除 nickname 与 person_name 相同的情况 + # person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name + # if person_name and person_name == nickname: + # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + # continue # 如果通过所有过滤条件,则保留 filtered_data[user_id] = nickname @@ -150,7 +147,7 @@ async def analyze_chat_for_nicknames( return {"is_exist": False} else: logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 else: # is_exist 为 True 但 data 缺失、不是字典或为空 @@ -158,7 +155,7 @@ async def analyze_chat_for_nicknames( logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") elif not isinstance(result.get("data"), dict): logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") - else: # data 为空字典 + else: # data 为空字典 logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") return {"is_exist": False} elif result["is_exist"] is False: diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 641c2999..2ad23cac 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,174 +1,310 @@ -import json -from typing import Dict, Any, Optional +# GroupNickname/nickname_utils.py +import random +import time +from typing import List, Dict, Tuple, Optional, Any from src.common.logger_manager import get_logger -from src.plugins.models.utils_model import LLMRequest from src.config.config import global_config +from src.plugins.person_info.relationship_manager import relationship_manager +from src.plugins.chat.chat_stream import ChatStream +from src.plugins.chat.message import MessageRecv +from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat +from .nickname_processor import add_to_nickname_queue -logger = get_logger("nickname_mapper") - -llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 - try: - # 从全局配置获取模型设置 - model_config = global_config.llm_nickname_mapping - if not model_config or not model_config.get("name"): - logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") - else: - llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", - ) - logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") - - except Exception as e: - logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) - llm_mapper = None - -def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: - """构建用于 LLM 绰号映射的 Prompt""" - # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 - user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) - # print(f"\n\n\nKnown User Info for LLM:\n{user_list_str}\n\n\n\n") # Debugging print - prompt = f""" -任务:分析以下聊天记录和你的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 - -已知用户信息(ID: 名称): -{user_list_str} - -聊天记录: ---- -{chat_history_str} ---- - -你的最新回复: -{bot_reply} - -分析要求: -1. 识别聊天记录和你发言中出现的可能是用户绰号的词语。 -2. 判断这些绰号是否能明确地指向某个特定的用户 ID。一个绰号必须在上下文中清晰地与某个发言人或被提及的人关联起来。 -3. 如果能建立可靠的一一映射关系,请输出一个 JSON 对象,格式如下: - {{ - "is_exist": true, - "data": {{ - "用户A数字id": "绰号_A", - "用户B数字id": "绰号_B" - }} - }} - 其中 "data" 字段的键是用户的 ID (字符串形式),值是对应的绰号。只包含你能确认映射关系的绰号。 -4. 如果无法建立任何可靠的一一映射关系(例如,绰号指代不明、没有出现绰号、或无法确认绰号与用户的关联),请输出 JSON 对象: - {{ - "is_exist": false - }} -5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 -6. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 - -输出: -""" - return prompt +# 获取日志记录器,命名为 "绰号工具" +logger = get_logger("nickname_utils") -async def analyze_chat_for_nicknames( - chat_history_str: str, - bot_reply: str, - user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 -) -> Dict[str, Any]: +def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int]]]) -> List[Tuple[str, str, int]]: """ - 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 + 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 + + Args: + all_nicknames_info: 包含用户及其绰号信息的字典,格式为 + { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } + + Returns: + List[Tuple[str, str, int]]: 选中的绰号列表,每个元素为 (用户名, 绰号, 次数)。 + 按次数降序排序。 """ - if not global_config.ENABLE_NICKNAME_MAPPING: - logger.debug("绰号映射功能已禁用。") - return {"is_exist": False} - - if llm_mapper is None: - logger.error("绰号映射 LLM 未初始化。无法执行分析。") - return {"is_exist": False} - - prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) - logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") - - try: - # 调用 LLM - response_content, _, _ = await llm_mapper.generate_response(prompt) - logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") - - if not response_content: - logger.warning("LLM 返回了空的绰号映射内容。") - return {"is_exist": False} - - # 清理可能的 Markdown 代码块标记 - response_content = response_content.strip() - if response_content.startswith("```json"): - response_content = response_content[7:] - if response_content.endswith("```"): - response_content = response_content[:-3] - response_content = response_content.strip() - - try: - result = json.loads(response_content) - if isinstance(result, dict) and "is_exist" in result: - if result["is_exist"] is True: - original_data = result.get("data") # 使用 .get() 更安全 - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 - logger.info(f"LLM 找到的原始绰号映射: {original_data}") - - # --- 开始过滤 --- - filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 - - for user_id, nickname in original_data.items(): - # 检查 user_id 是否是字符串,以防万一 - if not isinstance(user_id, str): - logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") - continue - - # 条件 1: 排除机器人自身 - if user_id == bot_qq_str: - logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") - continue - - # 条件 2: 排除 nickname 与 person_name 相同的情况 - person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name - if person_name and person_name == nickname: - logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") - continue - - # 如果通过所有过滤条件,则保留 - filtered_data[user_id] = nickname - # --- 结束过滤 --- - - # 检查过滤后是否还有数据 - if not filtered_data: - logger.info("所有找到的绰号映射都被过滤掉了。") - return {"is_exist": False} - else: - logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + if not all_nicknames_info: + # 如果输入为空,直接返回空列表 + return [] + candidates = [] # 候选绰号列表,包含 (用户名, 绰号, 次数, 权重) + for user_name, nicknames in all_nicknames_info.items(): + if nicknames: + for nickname_entry in nicknames: + # nickname_entry 应该是 {"绰号": 次数} 格式 + if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: + nickname, count = list(nickname_entry.items())[0] + # 确保次数是正整数 + if isinstance(count, int) and count > 0: + # 添加平滑因子,避免概率为0,并让低频词也有机会 + weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING + candidates.append((user_name, nickname, count, weight)) else: - # is_exist 为 True 但 data 缺失、不是字典或为空 - if "data" not in result: - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") - elif not isinstance(result.get("data"), dict): - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") - else: # data 为空字典 - logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") - return {"is_exist": False} - elif result["is_exist"] is False: - logger.info("LLM 未找到可靠的绰号映射。") - return {"is_exist": False} + # 日志:记录无效的绰号次数 + logger.warning( + f"用户 '{user_name}' 的绰号 '{nickname}' 次数无效: {count}。已跳过。" + ) else: - logger.warning("LLM 响应格式错误: 'is_exist' 不是布尔值。") - return {"is_exist": False} + # 日志:记录无效的绰号条目格式 + logger.warning(f"用户 '{user_name}' 的绰号条目格式无效: {nickname_entry}。已跳过。") + + if not candidates: + # 如果没有有效的候选绰号,返回空列表 + return [] + + # 计算总权重 + total_weight = sum(c[3] for c in candidates) + + if total_weight <= 0: + # 如果所有权重都无效或为0,则按原始次数排序选择前 N 个 + logger.warning("所有候选绰号的总权重为0或负数,将按原始次数选择 Top N。") + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] + else: + # 计算归一化概率 + probabilities = [c[3] / total_weight for c in candidates] + + # 使用概率分布进行加权随机选择(不重复) + num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) + try: + # 实现不重复加权抽样 + selected_indices = set() + selected = [] + attempts = 0 + max_attempts = num_to_select * 5 # 设置最大尝试次数,防止无限循环 + + while len(selected) < num_to_select and attempts < max_attempts: + # 每次只选一个 + chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] + if chosen_index not in selected_indices: + selected_indices.add(chosen_index) + selected.append(candidates[chosen_index]) + attempts += 1 + + # 如果尝试多次后仍未选够,补充出现次数最多的 + if len(selected) < num_to_select: + logger.debug(f"加权随机选择后数量不足 ({len(selected)}/{num_to_select}),补充选择次数最多的。") + remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + needed = num_to_select - len(selected) + selected.extend(remaining_candidates[:needed]) + + except Exception as e: + # 日志:记录加权随机选择时发生的错误,并回退到简单选择 + logger.error( + f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True + ) + # 出错时回退到选择次数最多的 N 个 + candidates.sort(key=lambda x: x[2], reverse=True) + selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] + + # 格式化输出结果为 (用户名, 绰号, 次数) + result = [(user, nick, count) for user, nick, count, _weight in selected] + result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + + # 日志:记录最终选中的用于 Prompt 的绰号 + logger.debug(f"为 Prompt 选择的绰号: {result}") + return result + + +def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, int]]) -> str: + """ + 将选中的绰号信息格式化为注入 Prompt 的字符串。 + + Args: + selected_nicknames: 选中的绰号列表 (用户名, 绰号, 次数)。 + + Returns: + str: 格式化后的字符串,如果列表为空则返回空字符串。 + """ + if not selected_nicknames: + # 如果没有选中的绰号,返回空字符串 + return "" + + prompt_lines = ["以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),如果有需要提及对方,用你认为合适的方式提及:"] # 注入部分的标题 + grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 + + # 按用户分组绰号 + for user_name, nickname, _count in selected_nicknames: + if user_name not in grouped_by_user: + grouped_by_user[user_name] = [] + # 添加中文引号以区分绰号 + grouped_by_user[user_name].append(f"“{nickname}”") + + # 构建每个用户的绰号字符串 + for user_name, nicknames in grouped_by_user.items(): + nicknames_str = "、".join(nicknames) # 使用中文顿号连接 + prompt_lines.append(f"- 你私下称呼ta为{user_name},ta被有时被群友称为:{nicknames_str}") # 格式化输出 + + # 如果只有标题行,返回空字符串,避免注入无意义的标题 + if len(prompt_lines) > 1: + # 末尾加换行符,以便在 Prompt 中正确分隔 + return "\n".join(prompt_lines) + "\n" + else: + return "" + + +async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: + """ + 获取并格式化用于 Prompt 注入的绰号信息字符串。 + 这是一个封装函数,整合了获取、选择和格式化的逻辑。 + + Args: + chat_stream: 当前的 ChatStream 对象。 + message_list_before_now: 用于确定上下文中用户的消息列表。 + + Returns: + str: 格式化后的绰号信息字符串,如果无法获取或格式化则返回空字符串。 + """ + nickname_injection_str = "" + # 仅在群聊且功能开启时执行 + if global_config.ENABLE_NICKNAME_MAPPING and chat_stream and chat_stream.group_info: + try: + group_id = str(chat_stream.group_info.group_id) + user_ids_in_context = set() # 存储上下文中出现的用户ID + + # 从消息列表中提取用户ID + if message_list_before_now: + for msg in message_list_before_now: + sender_id = msg["user_info"].get("user_id") + if sender_id: + user_ids_in_context.add(str(sender_id)) else: - logger.warning("LLM 响应格式错误: 缺少 'is_exist' 键或不是字典。") - return {"is_exist": False} - except json.JSONDecodeError as json_err: - logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") - return {"is_exist": False} + # 如果消息列表为空,尝试获取最近发言者作为上下文用户 + recent_speakers = chat_stream.get_recent_speakers(limit=5) # 获取最近5个发言者 + for speaker in recent_speakers: + user_ids_in_context.add(str(speaker['user_id'])) + if not user_ids_in_context: + # 日志:记录未找到上下文用户 + logger.warning(f"[{chat_stream.stream_id}] 未找到消息或最近发言者用于绰号注入。") + + # 如果找到了上下文用户 + if user_ids_in_context: + platform = chat_stream.platform + # --- 调用批量获取群组绰号的方法 --- + # 使用 relationship_manager 从数据库获取数据 + all_nicknames_data = await relationship_manager.get_users_group_nicknames( + platform, list(user_ids_in_context), group_id + ) + + # 如果获取到了绰号数据 + if all_nicknames_data: + # 调用选择和格式化函数 + selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) + nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) + if nickname_injection_str: + # 日志:记录生成的用于 Prompt 的绰号信息 + logger.debug(f"[{chat_stream.stream_id}] 已生成用于 Prompt 的绰号信息:\n{nickname_injection_str}") + + except Exception as e: + # 日志:记录获取或格式化绰号信息时发生的错误 + logger.error(f"[{chat_stream.stream_id}] 获取或格式化 Prompt 绰号信息时出错: {e}", exc_info=True) + nickname_injection_str = "" # 出错时确保返回空字符串 + + # 返回最终生成的字符串(可能为空) + return nickname_injection_str + + +async def trigger_nickname_analysis_if_needed( + anchor_message: MessageRecv, + bot_reply: List[str], + chat_stream: Optional[ChatStream] = None # 允许传入 chat_stream 或从 anchor_message 获取 +): + """ + 如果满足条件(群聊、功能开启),则准备数据并触发绰号分析任务。 + 将相关信息放入处理队列,由 nickname_processor 处理。 + + Args: + anchor_message: 触发回复的原始消息对象。 + bot_reply: Bot 生成的回复内容列表。 + chat_stream: 可选的 ChatStream 对象。 + """ + # 检查功能是否开启 + if not global_config.ENABLE_NICKNAME_MAPPING: + return # 如果功能禁用,直接返回 + + # 确定使用的 chat_stream + current_chat_stream = chat_stream or anchor_message.chat_stream + + # 检查是否是群聊且 chat_stream 有效 + if not current_chat_stream or not current_chat_stream.group_info: + # 日志:记录跳过分析的原因(非群聊或无效流) + logger.debug(f"[{current_chat_stream.stream_id if current_chat_stream else '未知流'}] 跳过绰号分析:非群聊或无效聊天流。") + return + + log_prefix = f"[{current_chat_stream.stream_id}]" # 用于日志的前缀 + + try: + # 1. 获取历史记录 + history_limit = 30 # 定义获取历史记录的数量限制 + history_messages = get_raw_msg_before_timestamp_with_chat( + chat_id=current_chat_stream.stream_id, + timestamp=time.time(), # 获取当前时间之前的记录 + limit=history_limit, + ) + + # 格式化历史记录为可读字符串 + chat_history_str = await build_readable_messages( + messages=history_messages, + replace_bot_name=True, # 替换机器人名称,以便 LLM 分析 + merge_messages=False, # 不合并消息,保留原始对话结构 + timestamp_mode="relative", # 使用相对时间戳 + read_mark=0.0, # 不需要已读标记 + truncate=False, # 获取完整内容进行分析 + ) + + # 2. 获取 Bot 回复字符串 + bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 + + # 3. 获取群号和平台信息 + group_id = str(current_chat_stream.group_info.group_id) + platform = current_chat_stream.platform + + # 4. 构建用户 ID 到名称的映射 (user_name_map) + user_ids_in_history = set() # 存储历史记录中出现的用户ID + for msg in history_messages: + sender_id = msg["user_info"].get("user_id") + if sender_id: + user_ids_in_history.add(str(sender_id)) + + user_name_map = {} # 初始化映射字典 + if user_ids_in_history: + try: + # 批量从数据库获取这些用户的 person_name + names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) + except Exception as e: + # 日志:记录获取 person_name 时发生的错误 + logger.error(f"{log_prefix} 批量获取 person_name 时出错: {e}", exc_info=True) + names_data = {} # 出错时使用空字典 + + # 填充 user_name_map + for user_id in user_ids_in_history: + if user_id in names_data: + # 如果数据库中有 person_name,则使用它 + user_name_map[user_id] = names_data[user_id] + else: + # 如果数据库中没有,则回退查找用户在历史记录中最近使用的 nickname + latest_nickname = next( + ( + m["user_info"].get("user_nickname") # 从 user_info 获取 nickname + for m in reversed(history_messages) # 从后往前找 + # 确保消息的用户ID匹配且 nickname 存在 + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") + ), + None, # 如果找不到,返回 None + ) + # 如果找到了 nickname 则使用,否则使用 "未知(ID)" + user_name_map[user_id] = latest_nickname or f"未知({user_id})" + + # 5. 将准备好的数据添加到绰号处理队列 + await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) + # 日志:记录已成功触发分析任务 + logger.debug(f"{log_prefix} 已为群组 {group_id} 触发绰号分析任务。") except Exception as e: - logger.error(f"绰号映射 LLM 调用或处理过程中出错: {e}", exc_info=True) - return {"is_exist": False} + # 日志:记录触发分析过程中发生的任何其他错误 + logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) \ No newline at end of file From 7986d2d3bacad0648aecf664b0d048348223deae Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:06:15 +0800 Subject: [PATCH 25/58] =?UTF-8?q?prompt=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 0696961a..1af61ba8 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -63,7 +63,7 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: "is_exist": false }} 5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 -6. 不要输出与用户名称相同的绰号。 +6. 不要输出与用户名称相同的绰号,不要输出你发言中对他人的绰号映射。 7. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 输出: From 9794182545cb4e89fc358697a1413836c19d8a22 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:07:40 +0800 Subject: [PATCH 26/58] ruff --- src/plugins/group_nickname/nickname_utils.py | 2 +- src/plugins/heartFC_chat/heartflow_prompt_builder.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 2ad23cac..3a784e70 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,7 +1,7 @@ # GroupNickname/nickname_utils.py import random import time -from typing import List, Dict, Tuple, Optional, Any +from typing import List, Dict, Tuple, Optional from src.common.logger_manager import get_logger from src.config.config import global_config from src.plugins.person_info.relationship_manager import relationship_manager diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 43075ef8..bee95a56 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -1,6 +1,6 @@ import random import time -from typing import Union, Optional, List, Dict, Any # 引入 List, Dict, Any +from typing import Union, Optional from ...config.config import global_config from src.common.logger_manager import get_logger from ...individuality.individuality import Individuality From 5345eb725ae714ffad958e70d566b7560115904f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 17:08:00 +0000 Subject: [PATCH 27/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 23 +++--- src/plugins/group_nickname/nickname_utils.py | 72 ++++++++++--------- .../heartFC_chat/heartflow_prompt_builder.py | 6 +- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 1af61ba8..0723f8ee 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -8,7 +8,7 @@ from src.config.config import global_config logger = get_logger("nickname_mapper") llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 +if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 try: # 从全局配置获取模型设置 model_config = global_config.llm_nickname_mapping @@ -16,10 +16,10 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") else: llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", + model=global_config.llm_nickname_mapping, + temperature=global_config.llm_nickname_mapping["temp"], + max_tokens=256, + request_type="nickname_mapping", ) logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") @@ -27,6 +27,7 @@ if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) llm_mapper = None + def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: """构建用于 LLM 绰号映射的 Prompt""" # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 @@ -74,7 +75,7 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: async def analyze_chat_for_nicknames( chat_history_str: str, bot_reply: str, - user_name_map: Dict[str, str] # 这个 map 包含了 user_id -> person_name 的信息 + user_name_map: Dict[str, str], # 这个 map 包含了 user_id -> person_name 的信息 ) -> Dict[str, Any]: """ 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 @@ -111,13 +112,13 @@ async def analyze_chat_for_nicknames( result = json.loads(response_content) if isinstance(result, dict) and "is_exist" in result: if result["is_exist"] is True: - original_data = result.get("data") # 使用 .get() 更安全 - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + original_data = result.get("data") # 使用 .get() 更安全 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 logger.info(f"LLM 找到的原始绰号映射: {original_data}") # --- 开始过滤 --- filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 + bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 for user_id, nickname in original_data.items(): # 检查 user_id 是否是字符串,以防万一 @@ -147,7 +148,7 @@ async def analyze_chat_for_nicknames( return {"is_exist": False} else: logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 + return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 else: # is_exist 为 True 但 data 缺失、不是字典或为空 @@ -155,7 +156,7 @@ async def analyze_chat_for_nicknames( logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") elif not isinstance(result.get("data"), dict): logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") - else: # data 为空字典 + else: # data 为空字典 logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") return {"is_exist": False} elif result["is_exist"] is False: diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 3a784e70..bbd124a7 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -31,7 +31,7 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 如果输入为空,直接返回空列表 return [] - candidates = [] # 候选绰号列表,包含 (用户名, 绰号, 次数, 权重) + candidates = [] # 候选绰号列表,包含 (用户名, 绰号, 次数, 权重) for user_name, nicknames in all_nicknames_info.items(): if nicknames: for nickname_entry in nicknames: @@ -45,9 +45,7 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int candidates.append((user_name, nickname, count, weight)) else: # 日志:记录无效的绰号次数 - logger.warning( - f"用户 '{user_name}' 的绰号 '{nickname}' 次数无效: {count}。已跳过。" - ) + logger.warning(f"用户 '{user_name}' 的绰号 '{nickname}' 次数无效: {count}。已跳过。") else: # 日志:记录无效的绰号条目格式 logger.warning(f"用户 '{user_name}' 的绰号条目格式无效: {nickname_entry}。已跳过。") @@ -89,15 +87,13 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int if len(selected) < num_to_select: logger.debug(f"加权随机选择后数量不足 ({len(selected)}/{num_to_select}),补充选择次数最多的。") remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 needed = num_to_select - len(selected) selected.extend(remaining_candidates[:needed]) except Exception as e: # 日志:记录加权随机选择时发生的错误,并回退到简单选择 - logger.error( - f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True - ) + logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) # 出错时回退到选择次数最多的 N 个 candidates.sort(key=lambda x: x[2], reverse=True) selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] @@ -125,8 +121,10 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 如果没有选中的绰号,返回空字符串 return "" - prompt_lines = ["以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),如果有需要提及对方,用你认为合适的方式提及:"] # 注入部分的标题 - grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 + prompt_lines = [ + "以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),如果有需要提及对方,用你认为合适的方式提及:" + ] # 注入部分的标题 + grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 # 按用户分组绰号 for user_name, nickname, _count in selected_nicknames: @@ -137,8 +135,8 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 构建每个用户的绰号字符串 for user_name, nicknames in grouped_by_user.items(): - nicknames_str = "、".join(nicknames) # 使用中文顿号连接 - prompt_lines.append(f"- 你私下称呼ta为{user_name},ta被有时被群友称为:{nicknames_str}") # 格式化输出 + nicknames_str = "、".join(nicknames) # 使用中文顿号连接 + prompt_lines.append(f"- 你私下称呼ta为{user_name},ta被有时被群友称为:{nicknames_str}") # 格式化输出 # 如果只有标题行,返回空字符串,避免注入无意义的标题 if len(prompt_lines) > 1: @@ -165,7 +163,7 @@ async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_lis if global_config.ENABLE_NICKNAME_MAPPING and chat_stream and chat_stream.group_info: try: group_id = str(chat_stream.group_info.group_id) - user_ids_in_context = set() # 存储上下文中出现的用户ID + user_ids_in_context = set() # 存储上下文中出现的用户ID # 从消息列表中提取用户ID if message_list_before_now: @@ -175,9 +173,9 @@ async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_lis user_ids_in_context.add(str(sender_id)) else: # 如果消息列表为空,尝试获取最近发言者作为上下文用户 - recent_speakers = chat_stream.get_recent_speakers(limit=5) # 获取最近5个发言者 + recent_speakers = chat_stream.get_recent_speakers(limit=5) # 获取最近5个发言者 for speaker in recent_speakers: - user_ids_in_context.add(str(speaker['user_id'])) + user_ids_in_context.add(str(speaker["user_id"])) if not user_ids_in_context: # 日志:记录未找到上下文用户 logger.warning(f"[{chat_stream.stream_id}] 未找到消息或最近发言者用于绰号注入。") @@ -198,12 +196,14 @@ async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_lis nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) if nickname_injection_str: # 日志:记录生成的用于 Prompt 的绰号信息 - logger.debug(f"[{chat_stream.stream_id}] 已生成用于 Prompt 的绰号信息:\n{nickname_injection_str}") + logger.debug( + f"[{chat_stream.stream_id}] 已生成用于 Prompt 的绰号信息:\n{nickname_injection_str}" + ) except Exception as e: # 日志:记录获取或格式化绰号信息时发生的错误 logger.error(f"[{chat_stream.stream_id}] 获取或格式化 Prompt 绰号信息时出错: {e}", exc_info=True) - nickname_injection_str = "" # 出错时确保返回空字符串 + nickname_injection_str = "" # 出错时确保返回空字符串 # 返回最终生成的字符串(可能为空) return nickname_injection_str @@ -212,7 +212,7 @@ async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_lis async def trigger_nickname_analysis_if_needed( anchor_message: MessageRecv, bot_reply: List[str], - chat_stream: Optional[ChatStream] = None # 允许传入 chat_stream 或从 anchor_message 获取 + chat_stream: Optional[ChatStream] = None, # 允许传入 chat_stream 或从 anchor_message 获取 ): """ 如果满足条件(群聊、功能开启),则准备数据并触发绰号分析任务。 @@ -225,7 +225,7 @@ async def trigger_nickname_analysis_if_needed( """ # 检查功能是否开启 if not global_config.ENABLE_NICKNAME_MAPPING: - return # 如果功能禁用,直接返回 + return # 如果功能禁用,直接返回 # 确定使用的 chat_stream current_chat_stream = chat_stream or anchor_message.chat_stream @@ -233,45 +233,47 @@ async def trigger_nickname_analysis_if_needed( # 检查是否是群聊且 chat_stream 有效 if not current_chat_stream or not current_chat_stream.group_info: # 日志:记录跳过分析的原因(非群聊或无效流) - logger.debug(f"[{current_chat_stream.stream_id if current_chat_stream else '未知流'}] 跳过绰号分析:非群聊或无效聊天流。") + logger.debug( + f"[{current_chat_stream.stream_id if current_chat_stream else '未知流'}] 跳过绰号分析:非群聊或无效聊天流。" + ) return - log_prefix = f"[{current_chat_stream.stream_id}]" # 用于日志的前缀 + log_prefix = f"[{current_chat_stream.stream_id}]" # 用于日志的前缀 try: # 1. 获取历史记录 history_limit = 30 # 定义获取历史记录的数量限制 history_messages = get_raw_msg_before_timestamp_with_chat( chat_id=current_chat_stream.stream_id, - timestamp=time.time(), # 获取当前时间之前的记录 + timestamp=time.time(), # 获取当前时间之前的记录 limit=history_limit, ) # 格式化历史记录为可读字符串 chat_history_str = await build_readable_messages( messages=history_messages, - replace_bot_name=True, # 替换机器人名称,以便 LLM 分析 - merge_messages=False, # 不合并消息,保留原始对话结构 - timestamp_mode="relative", # 使用相对时间戳 - read_mark=0.0, # 不需要已读标记 - truncate=False, # 获取完整内容进行分析 + replace_bot_name=True, # 替换机器人名称,以便 LLM 分析 + merge_messages=False, # 不合并消息,保留原始对话结构 + timestamp_mode="relative", # 使用相对时间戳 + read_mark=0.0, # 不需要已读标记 + truncate=False, # 获取完整内容进行分析 ) # 2. 获取 Bot 回复字符串 - bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 + bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 # 3. 获取群号和平台信息 group_id = str(current_chat_stream.group_info.group_id) platform = current_chat_stream.platform # 4. 构建用户 ID 到名称的映射 (user_name_map) - user_ids_in_history = set() # 存储历史记录中出现的用户ID + user_ids_in_history = set() # 存储历史记录中出现的用户ID for msg in history_messages: sender_id = msg["user_info"].get("user_id") if sender_id: user_ids_in_history.add(str(sender_id)) - user_name_map = {} # 初始化映射字典 + user_name_map = {} # 初始化映射字典 if user_ids_in_history: try: # 批量从数据库获取这些用户的 person_name @@ -279,7 +281,7 @@ async def trigger_nickname_analysis_if_needed( except Exception as e: # 日志:记录获取 person_name 时发生的错误 logger.error(f"{log_prefix} 批量获取 person_name 时出错: {e}", exc_info=True) - names_data = {} # 出错时使用空字典 + names_data = {} # 出错时使用空字典 # 填充 user_name_map for user_id in user_ids_in_history: @@ -290,12 +292,12 @@ async def trigger_nickname_analysis_if_needed( # 如果数据库中没有,则回退查找用户在历史记录中最近使用的 nickname latest_nickname = next( ( - m["user_info"].get("user_nickname") # 从 user_info 获取 nickname - for m in reversed(history_messages) # 从后往前找 + m["user_info"].get("user_nickname") # 从 user_info 获取 nickname + for m in reversed(history_messages) # 从后往前找 # 确保消息的用户ID匹配且 nickname 存在 if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") ), - None, # 如果找不到,返回 None + None, # 如果找不到,返回 None ) # 如果找到了 nickname 则使用,否则使用 "未知(ID)" user_name_map[user_id] = latest_nickname or f"未知({user_id})" @@ -307,4 +309,4 @@ async def trigger_nickname_analysis_if_needed( except Exception as e: # 日志:记录触发分析过程中发生的任何其他错误 - logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) \ No newline at end of file + logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index bee95a56..777dc1dc 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -274,7 +274,9 @@ class PromptBuilder: ) return None - async def _build_prompt_normal(self, chat_stream, message_txt: str, sender_name: str = "某人") -> str: # 返回值改为 str + async def _build_prompt_normal( + self, chat_stream, message_txt: str, sender_name: str = "某人" + ) -> str: # 返回值改为 str individuality = Individuality.get_instance() prompt_personality = individuality.get_prompt(x_person=2, level=2) @@ -415,7 +417,7 @@ class PromptBuilder: memory_prompt=memory_prompt, prompt_info=prompt_info, schedule_prompt=schedule_prompt, - nickname_info=nickname_injection_str, # <--- 注入绰号信息 + nickname_info=nickname_injection_str, # <--- 注入绰号信息 chat_target=await global_prompt_manager.get_prompt_async("chat_target_group1") if chat_in_group else await global_prompt_manager.get_prompt_async("chat_target_private1"), From 8c78cf59baa6a4ac8478eed5670268ea0a55e92f Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:16:35 +0800 Subject: [PATCH 28/58] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_processor.py | 2 -- src/plugins/group_nickname/nickname_utils.py | 1 - src/plugins/heartFC_chat/heartflow_prompt_builder.py | 4 +--- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index 264654af..ceb282da 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -1,5 +1,3 @@ -# nickname_processor.py (多线程版本 - 使用全局 config - 修复 Race Condition on person_id) - import asyncio import traceback import threading diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index bbd124a7..812e14f6 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,4 +1,3 @@ -# GroupNickname/nickname_utils.py import random import time from typing import List, Dict, Tuple, Optional diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 777dc1dc..8edace17 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -274,9 +274,7 @@ class PromptBuilder: ) return None - async def _build_prompt_normal( - self, chat_stream, message_txt: str, sender_name: str = "某人" - ) -> str: # 返回值改为 str + async def _build_prompt_normal(self, chat_stream, message_txt: str, sender_name: str = "某人") -> tuple[str, str]: individuality = Individuality.get_instance() prompt_personality = individuality.get_prompt(x_person=2, level=2) From 14d21c8b08face359ceb48d326a87f95c89b30a9 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:26:16 +0800 Subject: [PATCH 29/58] =?UTF-8?q?=E5=88=A0=E6=8E=89=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E8=AF=AD=E5=8F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index fa180673..747b5726 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -580,8 +580,7 @@ class HeartFChatting: response_set=reply, send_emoji=emoji_query, ) - print("消息发送成功,准备进入绰号分析") - + # 调用工具函数触发绰号分析 await trigger_nickname_analysis_if_needed(anchor_message, reply, self.chat_stream) From 67bf0e26debf9d50688d310a6addacec9132c649 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 17:26:36 +0000 Subject: [PATCH 30/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 747b5726..476ca868 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -580,7 +580,7 @@ class HeartFChatting: response_set=reply, send_emoji=emoji_query, ) - + # 调用工具函数触发绰号分析 await trigger_nickname_analysis_if_needed(anchor_message, reply, self.chat_stream) From df38e46f1d8bf8ee12341b0b895cba85a9c43875 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 01:45:17 +0800 Subject: [PATCH 31/58] =?UTF-8?q?=E5=85=B3=E6=8E=89=E7=A9=BA=E8=B0=83?= =?UTF-8?q?=E8=B5=B7=E7=88=86=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/heart_flow/mai_state_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/heart_flow/mai_state_manager.py b/src/heart_flow/mai_state_manager.py index d87e0a11..d289a94a 100644 --- a/src/heart_flow/mai_state_manager.py +++ b/src/heart_flow/mai_state_manager.py @@ -14,7 +14,7 @@ logger = get_logger("mai_state") # whether a specific debugging feature is enabled or not. When `enable_unlimited_hfc_chat` is set to # `False`, it means that the debugging feature for unlimited focused chatting is disabled. # enable_unlimited_hfc_chat = True # 调试用:无限专注聊天 -enable_unlimited_hfc_chat = True +enable_unlimited_hfc_chat = False prevent_offline_state = True # 目前默认不启用OFFLINE状态 From f27871c15bcc1d5d758b830f9f3d6cbba8291834 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 18:10:03 +0000 Subject: [PATCH 32/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../heartFC_chat/heartflow_prompt_builder.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 08c88d44..ff4e5062 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -277,7 +277,7 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s prompt = await global_prompt_manager.format_prompt( template_name, info_from_tools=structured_info_prompt, - nickname_info=nickname_injection_str, + nickname_info=nickname_injection_str, sender_name=effective_sender_name, # Used in private template chat_talking_prompt=chat_talking_prompt, bot_name=global_config.BOT_NICKNAME, @@ -478,25 +478,25 @@ class PromptBuilder: nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) prompt = await global_prompt_manager.format_prompt( - template_name, - relation_prompt=relation_prompt, - sender_name=effective_sender_name, - memory_prompt=memory_prompt, - prompt_info=prompt_info, - schedule_prompt=schedule_prompt, + template_name, + relation_prompt=relation_prompt, + sender_name=effective_sender_name, + memory_prompt=memory_prompt, + prompt_info=prompt_info, + schedule_prompt=schedule_prompt, nickname_info=nickname_injection_str, # <--- 注入绰号信息 - chat_talking_prompt=chat_talking_prompt, - message_txt=message_txt, - bot_name=global_config.BOT_NICKNAME, - bot_other_names="/".join(global_config.BOT_ALIAS_NAMES), - prompt_personality=prompt_personality, - mood_prompt=mood_prompt, - reply_style1=reply_style1_chosen, - reply_style2=reply_style2_chosen, - keywords_reaction_prompt=keywords_reaction_prompt, - prompt_ger=prompt_ger, - moderation_prompt=await global_prompt_manager.get_prompt_async("moderation_prompt"), - ) + chat_talking_prompt=chat_talking_prompt, + message_txt=message_txt, + bot_name=global_config.BOT_NICKNAME, + bot_other_names="/".join(global_config.BOT_ALIAS_NAMES), + prompt_personality=prompt_personality, + mood_prompt=mood_prompt, + reply_style1=reply_style1_chosen, + reply_style2=reply_style2_chosen, + keywords_reaction_prompt=keywords_reaction_prompt, + prompt_ger=prompt_ger, + moderation_prompt=await global_prompt_manager.get_prompt_async("moderation_prompt"), + ) # --- End choosing template --- return prompt From e150bfc9f43d5eb5a1846fcd394a39e2b039919d Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 02:23:01 +0800 Subject: [PATCH 33/58] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E7=BC=A9=E8=BF=9B?= =?UTF-8?q?=E5=90=88=E5=B9=B6=E9=94=99=E8=AF=AF=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../heartFC_chat/heartflow_prompt_builder.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index ff4e5062..e9f9b3b6 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -475,29 +475,29 @@ class PromptBuilder: effective_sender_name = sender_name # 调用新的工具函数获取绰号信息 - nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) + nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) - prompt = await global_prompt_manager.format_prompt( - template_name, - relation_prompt=relation_prompt, - sender_name=effective_sender_name, - memory_prompt=memory_prompt, - prompt_info=prompt_info, - schedule_prompt=schedule_prompt, - nickname_info=nickname_injection_str, # <--- 注入绰号信息 - chat_talking_prompt=chat_talking_prompt, - message_txt=message_txt, - bot_name=global_config.BOT_NICKNAME, - bot_other_names="/".join(global_config.BOT_ALIAS_NAMES), - prompt_personality=prompt_personality, - mood_prompt=mood_prompt, - reply_style1=reply_style1_chosen, - reply_style2=reply_style2_chosen, - keywords_reaction_prompt=keywords_reaction_prompt, - prompt_ger=prompt_ger, - moderation_prompt=await global_prompt_manager.get_prompt_async("moderation_prompt"), - ) - # --- End choosing template --- + prompt = await global_prompt_manager.format_prompt( + template_name, + relation_prompt=relation_prompt, + sender_name=effective_sender_name, + memory_prompt=memory_prompt, + prompt_info=prompt_info, + schedule_prompt=schedule_prompt, + nickname_info=nickname_injection_str, # <--- 注入绰号信息 + chat_talking_prompt=chat_talking_prompt, + message_txt=message_txt, + bot_name=global_config.BOT_NICKNAME, + bot_other_names="/".join(global_config.BOT_ALIAS_NAMES), + prompt_personality=prompt_personality, + mood_prompt=mood_prompt, + reply_style1=reply_style1_chosen, + reply_style2=reply_style2_chosen, + keywords_reaction_prompt=keywords_reaction_prompt, + prompt_ger=prompt_ger, + moderation_prompt=await global_prompt_manager.get_prompt_async("moderation_prompt"), + ) + # --- End choosing template --- return prompt From 380a9098da0569a3af9b0109f6684c7c2385293c Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 19:47:54 +0000 Subject: [PATCH 34/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index ab4d30e6..dcc92ebe 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -33,7 +33,6 @@ from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_ install(show_locals=True, extra_lines=3) - WAITING_TIME_THRESHOLD = 300 # 等待新消息时间阈值,单位秒 EMOJI_SEND_PRO = 0.3 # 设置一个概率,比如 30% 才真的发 From 63ff2fc7f672582a337f5fe062a1c7016b5144da Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 04:18:24 +0800 Subject: [PATCH 35/58] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8A=A5=E9=94=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartflow_prompt_builder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index e9f9b3b6..2313b632 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -259,6 +259,7 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s prompt = await global_prompt_manager.format_prompt( template_name, info_from_tools=structured_info_prompt, + nickname_info=nickname_injection_str, chat_target=chat_target_1, # Used in group template chat_talking_prompt=chat_talking_prompt, bot_name=global_config.BOT_NICKNAME, @@ -277,7 +278,6 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s prompt = await global_prompt_manager.format_prompt( template_name, info_from_tools=structured_info_prompt, - nickname_info=nickname_injection_str, sender_name=effective_sender_name, # Used in private template chat_talking_prompt=chat_talking_prompt, bot_name=global_config.BOT_NICKNAME, @@ -449,6 +449,9 @@ class PromptBuilder: chat_target_1 = await global_prompt_manager.get_prompt_async("chat_target_group1") chat_target_2 = await global_prompt_manager.get_prompt_async("chat_target_group2") + # 调用新的工具函数获取绰号信息 + nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) + prompt = await global_prompt_manager.format_prompt( template_name, relation_prompt=relation_prompt, @@ -456,6 +459,7 @@ class PromptBuilder: memory_prompt=memory_prompt, prompt_info=prompt_info, schedule_prompt=schedule_prompt, + nickname_info=nickname_injection_str, # <--- 注入绰号信息 chat_target=chat_target_1, chat_target_2=chat_target_2, chat_talking_prompt=chat_talking_prompt, @@ -474,9 +478,6 @@ class PromptBuilder: template_name = "reasoning_prompt_private_main" effective_sender_name = sender_name - # 调用新的工具函数获取绰号信息 - nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) - prompt = await global_prompt_manager.format_prompt( template_name, relation_prompt=relation_prompt, @@ -484,7 +485,6 @@ class PromptBuilder: memory_prompt=memory_prompt, prompt_info=prompt_info, schedule_prompt=schedule_prompt, - nickname_info=nickname_injection_str, # <--- 注入绰号信息 chat_talking_prompt=chat_talking_prompt, message_txt=message_txt, bot_name=global_config.BOT_NICKNAME, From abc6613f94eac20087d43e2e533dbacfcbf22c59 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 20:18:46 +0000 Subject: [PATCH 36/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartflow_prompt_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 2313b632..44c3a1d0 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -451,7 +451,7 @@ class PromptBuilder: # 调用新的工具函数获取绰号信息 nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) - + prompt = await global_prompt_manager.format_prompt( template_name, relation_prompt=relation_prompt, From d62e9f0a87bcb4f7e78be1369e62efb895319af6 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 06:26:19 +0800 Subject: [PATCH 37/58] =?UTF-8?q?=E4=B8=BA=20planner=20=E7=9A=84=20prompt?= =?UTF-8?q?=20=E5=A2=9E=E5=8A=A0=E7=BB=B0=E5=8F=B7=E6=B3=A8=E5=85=A5?= =?UTF-8?q?=EF=BC=8C=E4=BF=AE=E6=94=B9=20config=20=E9=BB=98=E8=AE=A4?= =?UTF-8?q?=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/config.py | 2 +- src/plugins/heartFC_chat/heartFC_chat.py | 12 ++++++++++++ src/plugins/heartFC_chat/heartflow_prompt_builder.py | 3 +++ template/bot_config_template.toml | 2 +- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/config/config.py b/src/config/config.py index 88898050..0dbc931a 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -279,7 +279,7 @@ class BotConfig: MAX_NICKNAMES_IN_PROMPT: int = 10 # Prompt 中最多注入的绰号数量 NICKNAME_PROBABILITY_SMOOTHING: int = 1 # 绰号加权随机选择的平滑因子 NICKNAME_QUEUE_MAX_SIZE: int = 100 # 绰号处理队列最大容量 - NICKNAME_PROCESS_SLEEP_INTERVAL: float = 0.5 # 绰号处理进程休眠间隔(秒) + NICKNAME_PROCESS_SLEEP_INTERVAL: float = 5 # 绰号处理进程休眠间隔(秒) # 模型配置 llm_reasoning: dict[str, str] = field(default_factory=lambda: {}) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index dcc92ebe..faf34896 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -29,6 +29,8 @@ from src.plugins.moods.moods import MoodManager from src.heart_flow.utils_chat import get_chat_type_and_target_info from rich.traceback import install from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_if_needed +from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat +from src.plugins.group_nickname.nickname_utils import get_nickname_injection_for_prompt install(show_locals=True, extra_lines=3) @@ -865,6 +867,15 @@ class HeartFChatting: f"{self.log_prefix}[Planner] 临时移除的动作: {actions_to_remove_temporarily}, 当前可用: {list(current_available_actions.keys())}" ) + # 需要获取用于上下文的历史消息 + message_list_before_now = get_raw_msg_before_timestamp_with_chat( + chat_id=self.stream_id, + timestamp=time.time(), # 使用当前时间作为参考点 + limit=global_config.observation_context_size, # 使用与 prompt 构建一致的 limit + ) + # 调用工具函数获取格式化后的绰号字符串 + nickname_injection_str = await get_nickname_injection_for_prompt(self.chat_stream, message_list_before_now) + print(nickname_injection_str) # --- 构建提示词 (调用修改后的 PromptBuilder 方法) --- prompt = await prompt_builder.build_planner_prompt( is_group_chat=self.is_group_chat, # <-- Pass HFC state @@ -874,6 +885,7 @@ class HeartFChatting: current_mind=current_mind, # <-- Pass argument structured_info=self.sub_mind.structured_info_str, # <-- Pass SubMind info current_available_actions=current_available_actions, # <-- Pass determined actions + nickname_info=nickname_injection_str, ) # --- 调用 LLM (普通文本生成) --- diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index 44c3a1d0..dc9e43ee 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -55,6 +55,7 @@ def init_prompt(): Prompt( """你的名字是{bot_name},{prompt_personality},{chat_context_description}。需要基于以下信息决定如何参与对话: {structured_info_block} +{nickname_info} {chat_content_block} {current_mind_block} {cycle_info_block} @@ -766,6 +767,7 @@ class PromptBuilder: current_mind: Optional[str], structured_info: Dict[str, Any], current_available_actions: Dict[str, str], + nickname_info: str, # replan_prompt: str, # Replan logic still simplified ) -> str: """构建 Planner LLM 的提示词 (获取模板并填充数据)""" @@ -847,6 +849,7 @@ class PromptBuilder: prompt = planner_prompt_template.format( bot_name=global_config.BOT_NICKNAME, + nickname_info=nickname_info, prompt_personality=prompt_personality, chat_context_description=chat_context_description, structured_info_block=structured_info_block, diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index a74e121c..439b6a46 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -128,7 +128,7 @@ enable_nickname_mapping = false # 绰号映射功能总开关(默认关闭, max_nicknames_in_prompt = 10 # Prompt 中最多注入的绰号数量(防止token数量爆炸) nickname_probability_smoothing = 1 # 绰号加权随机选择的平滑因子 nickname_queue_max_size = 100 # 绰号处理队列最大容量 -nickname_process_sleep_interval = 0.5 # 绰号处理进程休眠间隔(秒) +nickname_process_sleep_interval = 5 # 绰号处理进程休眠间隔(秒) [memory] build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多 From ae70adb10dfe1ab2c97f86351ee07eb9676f838b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 22:26:41 +0000 Subject: [PATCH 38/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index faf34896..cb637081 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -870,8 +870,8 @@ class HeartFChatting: # 需要获取用于上下文的历史消息 message_list_before_now = get_raw_msg_before_timestamp_with_chat( chat_id=self.stream_id, - timestamp=time.time(), # 使用当前时间作为参考点 - limit=global_config.observation_context_size, # 使用与 prompt 构建一致的 limit + timestamp=time.time(), # 使用当前时间作为参考点 + limit=global_config.observation_context_size, # 使用与 prompt 构建一致的 limit ) # 调用工具函数获取格式化后的绰号字符串 nickname_injection_str = await get_nickname_injection_for_prompt(self.chat_stream, message_list_before_now) From 68cef9a7257529044265e44f0ea6a1cca8f6f2ea Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 06:34:33 +0800 Subject: [PATCH 39/58] =?UTF-8?q?=E6=9B=B4=E5=A5=BD=E7=9A=84=E9=9A=8F?= =?UTF-8?q?=E6=9C=BA=E9=80=89=E6=8B=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_utils.py | 118 +++++++++++-------- 1 file changed, 68 insertions(+), 50 deletions(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 812e14f6..c2168016 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -27,81 +27,56 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int 按次数降序排序。 """ if not all_nicknames_info: - # 如果输入为空,直接返回空列表 return [] - candidates = [] # 候选绰号列表,包含 (用户名, 绰号, 次数, 权重) + candidates = [] for user_name, nicknames in all_nicknames_info.items(): if nicknames: for nickname_entry in nicknames: - # nickname_entry 应该是 {"绰号": 次数} 格式 if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: nickname, count = list(nickname_entry.items())[0] - # 确保次数是正整数 if isinstance(count, int) and count > 0: - # 添加平滑因子,避免概率为0,并让低频词也有机会 weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING candidates.append((user_name, nickname, count, weight)) else: - # 日志:记录无效的绰号次数 logger.warning(f"用户 '{user_name}' 的绰号 '{nickname}' 次数无效: {count}。已跳过。") else: - # 日志:记录无效的绰号条目格式 logger.warning(f"用户 '{user_name}' 的绰号条目格式无效: {nickname_entry}。已跳过。") if not candidates: - # 如果没有有效的候选绰号,返回空列表 return [] - # 计算总权重 - total_weight = sum(c[3] for c in candidates) + # 确定需要选择的数量 + num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) - if total_weight <= 0: - # 如果所有权重都无效或为0,则按原始次数排序选择前 N 个 - logger.warning("所有候选绰号的总权重为0或负数,将按原始次数选择 Top N。") - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] - else: - # 计算归一化概率 - probabilities = [c[3] / total_weight for c in candidates] + try: + # 调用新的辅助函数进行不重复加权抽样 + selected_candidates_with_weight = weighted_sample_without_replacement(candidates, num_to_select) - # 使用概率分布进行加权随机选择(不重复) - num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) - try: - # 实现不重复加权抽样 - selected_indices = set() - selected = [] - attempts = 0 - max_attempts = num_to_select * 5 # 设置最大尝试次数,防止无限循环 + # 如果抽样结果数量不足(例如权重问题导致提前退出),可以考虑是否需要补充 + if len(selected_candidates_with_weight) < num_to_select: + logger.debug(f"加权随机选择后数量不足 ({len(selected_candidates_with_weight)}/{num_to_select}),补充选择次数最多的。") + # 筛选出未被选中的候选 + selected_ids = set((c[0], c[1]) for c in selected_candidates_with_weight) # 使用 (用户名, 绰号) 作为唯一标识 + remaining_candidates = [c for c in candidates if (c[0], c[1]) not in selected_ids] + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + needed = num_to_select - len(selected_candidates_with_weight) + selected_candidates_with_weight.extend(remaining_candidates[:needed]) - while len(selected) < num_to_select and attempts < max_attempts: - # 每次只选一个 - chosen_index = random.choices(range(len(candidates)), weights=probabilities, k=1)[0] - if chosen_index not in selected_indices: - selected_indices.add(chosen_index) - selected.append(candidates[chosen_index]) - attempts += 1 + except Exception as e: + # 日志:记录加权随机选择时发生的错误,并回退到简单选择 + logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) + # 出错时回退到选择次数最多的 N 个 + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + # 注意:这里需要选择包含权重的元组,或者调整后续处理 + selected_candidates_with_weight = candidates[:num_to_select] - # 如果尝试多次后仍未选够,补充出现次数最多的 - if len(selected) < num_to_select: - logger.debug(f"加权随机选择后数量不足 ({len(selected)}/{num_to_select}),补充选择次数最多的。") - remaining_candidates = [c for i, c in enumerate(candidates) if i not in selected_indices] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - needed = num_to_select - len(selected) - selected.extend(remaining_candidates[:needed]) - except Exception as e: - # 日志:记录加权随机选择时发生的错误,并回退到简单选择 - logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) - # 出错时回退到选择次数最多的 N 个 - candidates.sort(key=lambda x: x[2], reverse=True) - selected = candidates[: global_config.MAX_NICKNAMES_IN_PROMPT] + # 格式化输出结果为 (用户名, 绰号, 次数),移除权重 + result = [(user, nick, count) for user, nick, count, _weight in selected_candidates_with_weight] - # 格式化输出结果为 (用户名, 绰号, 次数) - result = [(user, nick, count) for user, nick, count, _weight in selected] - result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 - # 日志:记录最终选中的用于 Prompt 的绰号 logger.debug(f"为 Prompt 选择的绰号: {result}") return result @@ -309,3 +284,46 @@ async def trigger_nickname_analysis_if_needed( except Exception as e: # 日志:记录触发分析过程中发生的任何其他错误 logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) + +def weighted_sample_without_replacement(candidates: List[Tuple[str, str, int, float]], k: int) -> List[Tuple[str, str, int, float]]: + """ + 执行不重复的加权随机抽样。 + + Args: + candidates: 候选列表,每个元素为 (用户名, 绰号, 次数, 权重)。 + k: 需要选择的数量。 + + Returns: + List[Tuple[str, str, int, float]]: 选中的元素列表。 + """ + if k <= 0: + return [] + if k >= len(candidates): + # 如果需要选择的数量大于或等于候选数量,直接返回所有候选 + return candidates[:] # 返回副本以避免修改原始列表 + + pool = candidates[:] # 创建候选列表的副本进行操作 + selected = [] + # 注意:原评论代码中计算 total_weight 但未使用,这里也省略。 + # random.choices 内部会处理权重的归一化。 + + for _ in range(min(k, len(pool))): # 确保迭代次数不超过池中剩余元素 + if not pool: # 如果池已空,提前结束 + break + + weights = [c[3] for c in pool] # 获取当前池中所有元素的权重 + # 检查权重是否有效 + if sum(weights) <= 0: + # 如果所有剩余权重无效,随机选择一个(或根据需要采取其他策略) + logger.warning("加权抽样池中剩余权重总和为0或负数,随机选择一个。") + chosen_index = random.randrange(len(pool)) + chosen = pool.pop(chosen_index) + else: + # 使用 random.choices 进行加权抽样,选择 1 个 + # random.choices 返回一个列表,所以取第一个元素 [0] + chosen = random.choices(pool, weights=weights, k=1)[0] + pool.remove(chosen) # 从池中移除选中的元素,实现不重复抽样 + + selected.append(chosen) + + return selected \ No newline at end of file From 7e3b2d5a4b870452161932b1ea6abc4d94a4a91d Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 06:38:52 +0800 Subject: [PATCH 40/58] =?UTF-8?q?=E5=87=BD=E6=95=B0=E6=8B=86=E5=88=86?= =?UTF-8?q?=EF=BC=8C=E6=8F=90=E9=AB=98=E4=BB=A3=E7=A0=81=E7=9A=84=E5=8F=AF?= =?UTF-8?q?=E8=AF=BB=E6=80=A7=E5=92=8C=E5=8F=AF=E7=BB=B4=E6=8A=A4=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../group_nickname/nickname_processor.py | 224 +++++++++++------- 1 file changed, 132 insertions(+), 92 deletions(-) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index ceb282da..f36244a4 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -3,7 +3,7 @@ import traceback import threading import queue from typing import Dict, Optional - +from pymongo.collection import Collection from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError from src.common.logger_manager import get_logger from src.common.database import db # 使用全局 db @@ -14,27 +14,144 @@ logger = get_logger("nickname_processor") _stop_event = threading.Event() +def _upsert_person(collection: Collection, person_id: str, user_id_int: int, platform: str): + """ + 确保数据库中存在指定 person_id 的文档 (Upsert)。 + 如果文档不存在,则使用提供的用户信息创建它。 + + Args: + collection: MongoDB 集合对象 (person_info)。 + person_id: 要查找或创建的 person_id。 + user_id_int: 用户的整数 ID。 + platform: 平台名称。 + + Returns: + UpdateResult: MongoDB 更新操作的结果。 + + Raises: + DuplicateKeyError: 如果发生重复键错误 (理论上不应由 upsert 触发)。 + Exception: 其他数据库操作错误。 + """ + try: + # 关键步骤:基于 person_id 执行 Upsert + # 如果文档不存在,它会被创建,并设置 $setOnInsert 中的字段。 + # 如果文档已存在,此操作不会修改任何内容。 + result = collection.update_one( + {"person_id": person_id}, + { + "$setOnInsert": { + "person_id": person_id, + "user_id": user_id_int, # 确保这里使用传入的 user_id_int + "platform": platform, + "group_nicknames": [], # 初始化 group_nicknames 数组 + } + }, + upsert=True, + ) + if result.upserted_id: + logger.debug(f"Upsert on person_id created new document: {person_id}") + # else: + # logger.debug(f"Upsert on person_id found existing document: {person_id}") + return result + except DuplicateKeyError as dk_err: + # 这个错误理论上不应该再由 upsert 触发。 + # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 + logger.error( + f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" + ) + raise # 将异常向上抛出,让调用者处理 + except Exception as e: + logger.exception(f"对 person_id {person_id} 执行 Upsert 时失败: {e}") + raise # 将异常向上抛出 + + +def _update_group_nickname(collection: Collection, person_id: str, group_id_str: str, nickname: str): + """ + 尝试更新 person_id 文档中特定群组的绰号计数,或添加新条目。 + 按顺序尝试:增加计数 -> 添加绰号 -> 添加群组。 + + Args: + collection: MongoDB 集合对象 (person_info)。 + person_id: 目标文档的 person_id。 + group_id_str: 目标群组的 ID (字符串)。 + nickname: 要更新或添加的绰号。 + """ + # 3a. 尝试增加现有群组中现有绰号的计数 + result_inc = collection.update_one( + { + "person_id": person_id, + "group_nicknames": { + "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} + }, + }, + {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, + array_filters=[ + {"group.group_id": group_id_str}, + {"nick.name": nickname}, + ], + ) + if result_inc.modified_count > 0: + # logger.debug(f"成功增加 person_id {person_id} 在群组 {group_id_str} 中绰号 '{nickname}' 的计数。") + return # 成功增加计数,操作完成 + + # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 + result_push_nick = collection.update_one( + { + "person_id": person_id, + "group_nicknames.group_id": group_id_str, # 检查群组是否存在 + }, + {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, + array_filters=[{"group.group_id": group_id_str}], + ) + if result_push_nick.modified_count > 0: + logger.debug(f"成功为 person_id {person_id} 在现有群组 {group_id_str} 中添加新绰号 '{nickname}'。") + return # 成功添加绰号,操作完成 + + # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 + # 确保 group_nicknames 数组存在 (作为保险措施) + collection.update_one( + {"person_id": person_id, "group_nicknames": {"$exists": False}}, + {"$set": {"group_nicknames": []}}, + ) + # 推送新的群组对象到 group_nicknames 数组 + result_push_group = collection.update_one( + { + "person_id": person_id, + "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 + }, + { + "$push": { + "group_nicknames": { + "group_id": group_id_str, + "nicknames": [{"name": nickname, "count": 1}], + } + } + }, + ) + if result_push_group.modified_count > 0: + logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'。") + # else: + # 如果连添加群组也失败 (例如 group_id 已存在但之前的步骤都未匹配,理论上不太可能), + # 可能需要进一步的日志或错误处理,但这通常意味着数据状态异常。 + # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): """ 更新数据库中用户的群组绰号计数 (使用全局 db)。 - 通过首先基于 person_id 进行 upsert 来处理潜在的 race condition。 + 通过调用辅助函数来处理 person 文档的 upsert 和绰号更新。 Args: platform (str): 平台名称 (e.g., 'qq')。 group_id (str): 群组 ID。 nickname_map (Dict[str, str]): 用户 ID (字符串) 到绰号的映射。 """ - # 尝试导入 person_info_manager (放在函数内部以减少潜在的导入问题) try: - # 假设 person_info 在 group_nickname 的上一级目录 from ..person_info.person_info import person_info_manager except ImportError: logger.error("无法导入 person_info_manager,无法生成 person_id!") - return # 无法继续,因为需要 person_id + return person_info_collection = db.person_info - if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。") return @@ -61,97 +178,20 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic logger.error(f"无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") continue - # --- 步骤 2: 基于 person_id 执行 Upsert --- - # 这是关键步骤,用于原子性地确保文档存在,避免 person_id 冲突。 - # 如果文档不存在,它会被创建,并设置 $setOnInsert 中的字段。 - # 如果文档已存在,此操作不会修改任何内容(因为没有 $set 操作符)。 - upsert_result = person_info_collection.update_one( - {"person_id": person_id}, # Filter by the unique key - { - "$setOnInsert": { - "person_id": person_id, - "user_id": user_id_int, - "platform": platform, - "group_nicknames": [], # 初始化 group_nicknames 数组 - } - }, - upsert=True, - ) + # --- 步骤 2: 确保 Person 文档存在 (调用辅助函数) --- + _upsert_person(person_info_collection, person_id, user_id_int, platform) - # 可选日志:记录是否创建了新文档 - if upsert_result.upserted_id: - logger.debug(f"Upsert on person_id created new document: {person_id}") - # else: - # logger.debug(f"Upsert on person_id found existing document: {person_id}") + # --- 步骤 3: 更新群组绰号 (调用辅助函数) --- + _update_group_nickname(person_info_collection, person_id, group_id_str, nickname) - # --- 步骤 3: 更新群组绰号 --- - # 现在我们确信具有此 person_id 的文档存在,可以安全地更新其 group_nicknames。 - - # 3a. 尝试增加现有群组中现有绰号的计数 - update_result_inc = person_info_collection.update_one( - { - "person_id": person_id, # 明确目标文档 - "group_nicknames": { # 检查数组中是否有匹配项 - "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} - }, - }, - {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, # 增加计数 - array_filters=[ # 指定要更新的数组元素 - {"group.group_id": group_id_str}, - {"nick.name": nickname}, - ], - ) - - # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 - if update_result_inc.modified_count == 0: - update_result_push_nick = person_info_collection.update_one( - { - "person_id": person_id, # 明确目标文档 - "group_nicknames.group_id": group_id_str, # 检查群组是否存在 - }, - # 将新绰号添加到匹配群组的 nicknames 数组中 - {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, - array_filters=[{"group.group_id": group_id_str}], # 指定要推送到的群组 - ) - - # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 - if update_result_push_nick.modified_count == 0: - # 确保 group_nicknames 数组存在 (如果 $setOnInsert 失败或数据不一致时的保险措施) - person_info_collection.update_one( - {"person_id": person_id, "group_nicknames": {"$exists": False}}, - {"$set": {"group_nicknames": []}}, - ) - # 推送新的群组对象到 group_nicknames 数组 - update_result_push_group = person_info_collection.update_one( - { - "person_id": person_id, # 明确目标文档 - "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 - }, - { - "$push": { # 添加新的群组条目 - "group_nicknames": { - "group_id": group_id_str, - "nicknames": [{"name": nickname, "count": 1}], # 初始化绰号列表 - } - } - }, - ) - if update_result_push_group.modified_count > 0: - logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'") - - except DuplicateKeyError as dk_err: - # 这个错误理论上不应该再由步骤 2 的 upsert 触发。 - # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 - logger.error( - f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" - ) - except OperationFailure as op_err: + # --- 统一处理数据库操作可能抛出的异常 --- + except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 logger.exception( - f"数据库操作失败 (OperationFailure): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}({op_err})" + f"数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}. 错误: {db_err}" ) except Exception as e: - logger.exception(f"更新用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") - + # 捕获其他所有可能的错误 (例如 person_id 生成、辅助函数内部未捕获的错误等) + logger.exception(f"处理用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") # --- 使用 queue.Queue --- queue_max_size = getattr(global_config, "NICKNAME_QUEUE_MAX_SIZE", 100) From 681814f7d18007c5597f0d8224a6bc13b061a7a9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 23:00:56 +0000 Subject: [PATCH 41/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../group_nickname/nickname_processor.py | 29 ++++++++-------- src/plugins/group_nickname/nickname_utils.py | 34 +++++++++++-------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index f36244a4..f765cb9b 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -14,6 +14,7 @@ logger = get_logger("nickname_processor") _stop_event = threading.Event() + def _upsert_person(collection: Collection, person_id: str, user_id_int: int, platform: str): """ 确保数据库中存在指定 person_id 的文档 (Upsert)。 @@ -41,7 +42,7 @@ def _upsert_person(collection: Collection, person_id: str, user_id_int: int, pla { "$setOnInsert": { "person_id": person_id, - "user_id": user_id_int, # 确保这里使用传入的 user_id_int + "user_id": user_id_int, # 确保这里使用传入的 user_id_int "platform": platform, "group_nicknames": [], # 初始化 group_nicknames 数组 } @@ -59,10 +60,10 @@ def _upsert_person(collection: Collection, person_id: str, user_id_int: int, pla logger.error( f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" ) - raise # 将异常向上抛出,让调用者处理 + raise # 将异常向上抛出,让调用者处理 except Exception as e: logger.exception(f"对 person_id {person_id} 执行 Upsert 时失败: {e}") - raise # 将异常向上抛出 + raise # 将异常向上抛出 def _update_group_nickname(collection: Collection, person_id: str, group_id_str: str, nickname: str): @@ -80,9 +81,7 @@ def _update_group_nickname(collection: Collection, person_id: str, group_id_str: result_inc = collection.update_one( { "person_id": person_id, - "group_nicknames": { - "$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname} - }, + "group_nicknames": {"$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname}}, }, {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, array_filters=[ @@ -92,20 +91,20 @@ def _update_group_nickname(collection: Collection, person_id: str, group_id_str: ) if result_inc.modified_count > 0: # logger.debug(f"成功增加 person_id {person_id} 在群组 {group_id_str} 中绰号 '{nickname}' 的计数。") - return # 成功增加计数,操作完成 + return # 成功增加计数,操作完成 # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 result_push_nick = collection.update_one( { "person_id": person_id, - "group_nicknames.group_id": group_id_str, # 检查群组是否存在 + "group_nicknames.group_id": group_id_str, # 检查群组是否存在 }, {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, array_filters=[{"group.group_id": group_id_str}], ) if result_push_nick.modified_count > 0: logger.debug(f"成功为 person_id {person_id} 在现有群组 {group_id_str} 中添加新绰号 '{nickname}'。") - return # 成功添加绰号,操作完成 + return # 成功添加绰号,操作完成 # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 # 确保 group_nicknames 数组存在 (作为保险措施) @@ -117,7 +116,7 @@ def _update_group_nickname(collection: Collection, person_id: str, group_id_str: result_push_group = collection.update_one( { "person_id": person_id, - "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 + "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 }, { "$push": { @@ -131,9 +130,10 @@ def _update_group_nickname(collection: Collection, person_id: str, group_id_str: if result_push_group.modified_count > 0: logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'。") # else: - # 如果连添加群组也失败 (例如 group_id 已存在但之前的步骤都未匹配,理论上不太可能), - # 可能需要进一步的日志或错误处理,但这通常意味着数据状态异常。 - # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") + # 如果连添加群组也失败 (例如 group_id 已存在但之前的步骤都未匹配,理论上不太可能), + # 可能需要进一步的日志或错误处理,但这通常意味着数据状态异常。 + # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") + async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): """ @@ -185,7 +185,7 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic _update_group_nickname(person_info_collection, person_id, group_id_str, nickname) # --- 统一处理数据库操作可能抛出的异常 --- - except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 + except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 logger.exception( f"数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}. 错误: {db_err}" ) @@ -193,6 +193,7 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic # 捕获其他所有可能的错误 (例如 person_id 生成、辅助函数内部未捕获的错误等) logger.exception(f"处理用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") + # --- 使用 queue.Queue --- queue_max_size = getattr(global_config, "NICKNAME_QUEUE_MAX_SIZE", 100) nickname_queue: queue.Queue = queue.Queue(maxsize=queue_max_size) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index c2168016..9c05bd3a 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -55,11 +55,15 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 如果抽样结果数量不足(例如权重问题导致提前退出),可以考虑是否需要补充 if len(selected_candidates_with_weight) < num_to_select: - logger.debug(f"加权随机选择后数量不足 ({len(selected_candidates_with_weight)}/{num_to_select}),补充选择次数最多的。") + logger.debug( + f"加权随机选择后数量不足 ({len(selected_candidates_with_weight)}/{num_to_select}),补充选择次数最多的。" + ) # 筛选出未被选中的候选 - selected_ids = set((c[0], c[1]) for c in selected_candidates_with_weight) # 使用 (用户名, 绰号) 作为唯一标识 + selected_ids = set( + (c[0], c[1]) for c in selected_candidates_with_weight + ) # 使用 (用户名, 绰号) 作为唯一标识 remaining_candidates = [c for c in candidates if (c[0], c[1]) not in selected_ids] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 needed = num_to_select - len(selected_candidates_with_weight) selected_candidates_with_weight.extend(remaining_candidates[:needed]) @@ -67,15 +71,14 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 日志:记录加权随机选择时发生的错误,并回退到简单选择 logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) # 出错时回退到选择次数最多的 N 个 - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 # 注意:这里需要选择包含权重的元组,或者调整后续处理 selected_candidates_with_weight = candidates[:num_to_select] - # 格式化输出结果为 (用户名, 绰号, 次数),移除权重 result = [(user, nick, count) for user, nick, count, _weight in selected_candidates_with_weight] - result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 logger.debug(f"为 Prompt 选择的绰号: {result}") return result @@ -285,7 +288,10 @@ async def trigger_nickname_analysis_if_needed( # 日志:记录触发分析过程中发生的任何其他错误 logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) -def weighted_sample_without_replacement(candidates: List[Tuple[str, str, int, float]], k: int) -> List[Tuple[str, str, int, float]]: + +def weighted_sample_without_replacement( + candidates: List[Tuple[str, str, int, float]], k: int +) -> List[Tuple[str, str, int, float]]: """ 执行不重复的加权随机抽样。 @@ -300,18 +306,18 @@ def weighted_sample_without_replacement(candidates: List[Tuple[str, str, int, fl return [] if k >= len(candidates): # 如果需要选择的数量大于或等于候选数量,直接返回所有候选 - return candidates[:] # 返回副本以避免修改原始列表 + return candidates[:] # 返回副本以避免修改原始列表 - pool = candidates[:] # 创建候选列表的副本进行操作 + pool = candidates[:] # 创建候选列表的副本进行操作 selected = [] # 注意:原评论代码中计算 total_weight 但未使用,这里也省略。 # random.choices 内部会处理权重的归一化。 - for _ in range(min(k, len(pool))): # 确保迭代次数不超过池中剩余元素 - if not pool: # 如果池已空,提前结束 + for _ in range(min(k, len(pool))): # 确保迭代次数不超过池中剩余元素 + if not pool: # 如果池已空,提前结束 break - weights = [c[3] for c in pool] # 获取当前池中所有元素的权重 + weights = [c[3] for c in pool] # 获取当前池中所有元素的权重 # 检查权重是否有效 if sum(weights) <= 0: # 如果所有剩余权重无效,随机选择一个(或根据需要采取其他策略) @@ -322,8 +328,8 @@ def weighted_sample_without_replacement(candidates: List[Tuple[str, str, int, fl # 使用 random.choices 进行加权抽样,选择 1 个 # random.choices 返回一个列表,所以取第一个元素 [0] chosen = random.choices(pool, weights=weights, k=1)[0] - pool.remove(chosen) # 从池中移除选中的元素,实现不重复抽样 + pool.remove(chosen) # 从池中移除选中的元素,实现不重复抽样 selected.append(chosen) - return selected \ No newline at end of file + return selected From cdebc31ef6e7f44445b663e1aab381a607333e1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=AA=E4=BE=86=E6=98=9F=E7=B9=94?= Date: Fri, 2 May 2025 07:09:28 +0800 Subject: [PATCH 42/58] Update src/plugins/group_nickname/nickname_mapper.py Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> --- src/plugins/group_nickname/nickname_mapper.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 0723f8ee..1d834294 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -102,11 +102,12 @@ async def analyze_chat_for_nicknames( # 清理可能的 Markdown 代码块标记 response_content = response_content.strip() - if response_content.startswith("```json"): - response_content = response_content[7:] - if response_content.endswith("```"): - response_content = response_content[:-3] - response_content = response_content.strip() + # 使用正则表达式处理各种 Markdown 代码块情况 + import re + markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL) + match = markdown_code_regex.match(response_content) + if match: + response_content = match.group(1).strip() try: result = json.loads(response_content) From 3d0c594df2f9d6d9fced03324f108db5a9c250fe Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 1 May 2025 23:09:41 +0000 Subject: [PATCH 43/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 1d834294..64112ed6 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -104,6 +104,7 @@ async def analyze_chat_for_nicknames( response_content = response_content.strip() # 使用正则表达式处理各种 Markdown 代码块情况 import re + markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL) match = markdown_code_regex.match(response_content) if match: From fff7429b6c4d4087fe4a046197900435d8097690 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 07:11:31 +0800 Subject: [PATCH 44/58] =?UTF-8?q?=E5=88=A0=E9=99=A4=E8=B0=83=E8=AF=95?= =?UTF-8?q?=E8=AF=AD=E5=8F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/heartFC_chat/heartFC_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index cb637081..6d3e22ef 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -875,7 +875,7 @@ class HeartFChatting: ) # 调用工具函数获取格式化后的绰号字符串 nickname_injection_str = await get_nickname_injection_for_prompt(self.chat_stream, message_list_before_now) - print(nickname_injection_str) + # --- 构建提示词 (调用修改后的 PromptBuilder 方法) --- prompt = await prompt_builder.build_planner_prompt( is_group_chat=self.is_group_chat, # <-- Pass HFC state From a799cb5ffff56b6ad656162fa53235bdf0aa8793 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 17:56:20 +0800 Subject: [PATCH 45/58] =?UTF-8?q?prompt=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 9c05bd3a..2429df98 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -113,7 +113,7 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 构建每个用户的绰号字符串 for user_name, nicknames in grouped_by_user.items(): nicknames_str = "、".join(nicknames) # 使用中文顿号连接 - prompt_lines.append(f"- 你私下称呼ta为{user_name},ta被有时被群友称为:{nicknames_str}") # 格式化输出 + prompt_lines.append(f"- {user_name},ta被被群友称为:{nicknames_str}") # 格式化输出 # 如果只有标题行,返回空字符串,避免注入无意义的标题 if len(prompt_lines) > 1: From 5356bb59e7606e963f6f46b17e852595bd924216 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 20:06:56 +0800 Subject: [PATCH 46/58] =?UTF-8?q?=E7=89=88=E6=9C=AC=E6=8E=A7=E5=88=B6?= =?UTF-8?q?=E5=92=8C=E7=89=88=E6=9C=AC=E5=8F=B7=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/config.py | 21 +++++++++++---------- template/bot_config_template.toml | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/config/config.py b/src/config/config.py index 9f5cc9a6..23613948 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -411,16 +411,17 @@ class BotConfig: config.steal_emoji = emoji_config.get("steal_emoji", config.steal_emoji) def group_nickname(parent: dict): - gn_config = parent.get("group_nickname", {}) - config.ENABLE_NICKNAME_MAPPING = gn_config.get("enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING) - config.MAX_NICKNAMES_IN_PROMPT = gn_config.get("max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT) - config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get( - "nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING - ) - config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get("nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE) - config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get( - "nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL - ) + if config.INNER_VERSION in SpecifierSet(">=1.6.2"): + gn_config = parent.get("group_nickname", {}) + config.ENABLE_NICKNAME_MAPPING = gn_config.get("enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING) + config.MAX_NICKNAMES_IN_PROMPT = gn_config.get("max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT) + config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get( + "nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING + ) + config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get("nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE) + config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get( + "nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL + ) def bot(parent: dict): # 机器人基础配置 diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 439b6a46..81cead61 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "1.6.1" +version = "1.6.2" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请在修改后将version的值进行变更 From 9b938215dbf136b15e44dc5066bcd38f324cc9d7 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 2 May 2025 13:32:07 +0000 Subject: [PATCH 47/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/config.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/config/config.py b/src/config/config.py index 23613948..3c90c201 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -413,12 +413,18 @@ class BotConfig: def group_nickname(parent: dict): if config.INNER_VERSION in SpecifierSet(">=1.6.2"): gn_config = parent.get("group_nickname", {}) - config.ENABLE_NICKNAME_MAPPING = gn_config.get("enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING) - config.MAX_NICKNAMES_IN_PROMPT = gn_config.get("max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT) + config.ENABLE_NICKNAME_MAPPING = gn_config.get( + "enable_nickname_mapping", config.ENABLE_NICKNAME_MAPPING + ) + config.MAX_NICKNAMES_IN_PROMPT = gn_config.get( + "max_nicknames_in_prompt", config.MAX_NICKNAMES_IN_PROMPT + ) config.NICKNAME_PROBABILITY_SMOOTHING = gn_config.get( "nickname_probability_smoothing", config.NICKNAME_PROBABILITY_SMOOTHING ) - config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get("nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE) + config.NICKNAME_QUEUE_MAX_SIZE = gn_config.get( + "nickname_queue_max_size", config.NICKNAME_QUEUE_MAX_SIZE + ) config.NICKNAME_PROCESS_SLEEP_INTERVAL = gn_config.get( "nickname_process_sleep_interval", config.NICKNAME_PROCESS_SLEEP_INTERVAL ) From cf999792befd6291603d50d2c262ed6e44d96f41 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 21:32:37 +0800 Subject: [PATCH 48/58] typo --- src/plugins/group_nickname/nickname_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 2429df98..83931417 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -113,7 +113,7 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 构建每个用户的绰号字符串 for user_name, nicknames in grouped_by_user.items(): nicknames_str = "、".join(nicknames) # 使用中文顿号连接 - prompt_lines.append(f"- {user_name},ta被被群友称为:{nicknames_str}") # 格式化输出 + prompt_lines.append(f"- {user_name},ta被群友称为:{nicknames_str}") # 格式化输出 # 如果只有标题行,返回空字符串,避免注入无意义的标题 if len(prompt_lines) > 1: From 76a37452a037feb890aef9fcc3efa1ccd5afe93d Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 22:36:27 +0800 Subject: [PATCH 49/58] =?UTF-8?q?=E5=B0=86=E6=A8=A1=E5=9D=97=E5=AF=BC?= =?UTF-8?q?=E5=85=A5=E6=94=BE=E5=88=B0=E9=A1=B6=E4=B8=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 64112ed6..783013b1 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -1,3 +1,4 @@ +import re import json from typing import Dict, Any, Optional from src.common.logger_manager import get_logger @@ -103,8 +104,6 @@ async def analyze_chat_for_nicknames( # 清理可能的 Markdown 代码块标记 response_content = response_content.strip() # 使用正则表达式处理各种 Markdown 代码块情况 - import re - markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL) match = markdown_code_regex.match(response_content) if match: From b8d243873b8cccc3e5020d050b09e9222fa7945a Mon Sep 17 00:00:00 2001 From: Bakadax Date: Fri, 2 May 2025 22:42:49 +0800 Subject: [PATCH 50/58] =?UTF-8?q?=E9=81=BF=E5=85=8Dtry=20catch=E5=B5=8C?= =?UTF-8?q?=E5=A5=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 139 ++++++++++-------- 1 file changed, 75 insertions(+), 64 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 783013b1..e3e47d66 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -103,76 +103,87 @@ async def analyze_chat_for_nicknames( # 清理可能的 Markdown 代码块标记 response_content = response_content.strip() - # 使用正则表达式处理各种 Markdown 代码块情况 markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL) match = markdown_code_regex.match(response_content) if match: response_content = match.group(1).strip() - try: - result = json.loads(response_content) - if isinstance(result, dict) and "is_exist" in result: - if result["is_exist"] is True: - original_data = result.get("data") # 使用 .get() 更安全 - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 - logger.info(f"LLM 找到的原始绰号映射: {original_data}") + # 解析 JSON + result = json.loads(response_content) # 可能抛出 json.JSONDecodeError - # --- 开始过滤 --- - filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) # 将机器人QQ转为字符串以便比较 - - for user_id, nickname in original_data.items(): - # 检查 user_id 是否是字符串,以防万一 - if not isinstance(user_id, str): - logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") - continue - - # 条件 1: 排除机器人自身 - if user_id == bot_qq_str: - logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") - continue - - # 有了改名工具后,该过滤器已不适合了,尝试通过修改 prompt 获得更好的结果 - # # 条件 2: 排除 nickname 与 person_name 相同的情况 - # person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name - # if person_name and person_name == nickname: - # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") - # continue - - # 如果通过所有过滤条件,则保留 - filtered_data[user_id] = nickname - # --- 结束过滤 --- - - # 检查过滤后是否还有数据 - if not filtered_data: - logger.info("所有找到的绰号映射都被过滤掉了。") - return {"is_exist": False} - else: - logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} # 返回过滤后的数据 - - else: - # is_exist 为 True 但 data 缺失、不是字典或为空 - if "data" not in result: - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") - elif not isinstance(result.get("data"), dict): - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。") - else: # data 为空字典 - logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") - return {"is_exist": False} - elif result["is_exist"] is False: - logger.info("LLM 未找到可靠的绰号映射。") - return {"is_exist": False} - else: - logger.warning("LLM 响应格式错误: 'is_exist' 不是布尔值。") - return {"is_exist": False} - else: - logger.warning("LLM 响应格式错误: 缺少 'is_exist' 键或不是字典。") - return {"is_exist": False} - except json.JSONDecodeError as json_err: - logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") + # 检查 result 是否为字典 + if not isinstance(result, dict): + logger.warning(f"LLM 响应不是一个有效的 JSON 对象 (字典类型)。响应内容: {response_content}") return {"is_exist": False} - except Exception as e: - logger.error(f"绰号映射 LLM 调用或处理过程中出错: {e}", exc_info=True) + # 使用 get 获取 is_exist,避免 KeyError + is_exist = result.get("is_exist") # 如果 result 不是字典,下面 get 会在 except AttributeError 中捕获 + + if is_exist is True: + original_data = result.get("data") + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + logger.info(f"LLM 找到的原始绰号映射: {original_data}") + + # --- 开始过滤 --- + filtered_data = {} + bot_qq_str = str(global_config.BOT_QQ) + + for user_id, nickname in original_data.items(): + if not isinstance(user_id, str): + logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") + continue + if user_id == bot_qq_str: + logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") + continue + + # 有了改名工具后,该过滤器已不适合了,尝试通过修改 prompt 获得更好的结果 + # # 条件 2: 排除 nickname 与 person_name 相同的情况 + # person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name + # if person_name and person_name == nickname: + # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + # continue + + # 如果通过所有过滤条件,则保留 + filtered_data[user_id] = nickname + + # 检查过滤后是否还有数据 + if not filtered_data: + logger.info("所有找到的绰号映射都被过滤掉了。") + return {"is_exist": False} + else: + logger.info(f"过滤后的绰号映射: {filtered_data}") + return {"is_exist": True, "data": filtered_data} + else: + # is_exist 为 True 但 data 缺失、不是字典或为空 + if "data" not in result: + logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") + elif not isinstance(original_data, dict): + logger.warning(f"LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。 原始 data: {original_data}") + else: # data 为空字典 + logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") + return {"is_exist": False} + + elif is_exist is False: + logger.info("LLM 未找到可靠的绰号映射。") + return {"is_exist": False} + + elif is_exist is None: # 处理 is_exist 键存在但值为 null/None 的情况 + logger.warning("LLM 响应格式错误: 'is_exist' 键的值为 None。") + return {"is_exist": False} + + else: # 处理 is_exist 存在但值不是 True/False/None 的情况 + logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 不是预期的布尔值或 None。") + return {"is_exist": False} + + + except json.JSONDecodeError as json_err: + logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") + return {"is_exist": False} + except AttributeError as attr_err: + # 这个理论上不应该发生,因为在调用 get 前检查了 result 是否是 dict + logger.error(f"处理 LLM 响应时发生属性错误 (可能尝试在非字典对象上使用 .get): {attr_err}\n原始响应: {response_content}") + return {"is_exist": False} + except Exception as e: + # 捕获其他所有未预料到的异常 + logger.error(f"绰号映射 LLM 调用或处理过程中发生未预料的错误: {e}", exc_info=True) return {"is_exist": False} From 369d078c6d156adda38bfad28bcd78ee88ade916 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 2 May 2025 14:43:10 +0000 Subject: [PATCH 51/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index e3e47d66..d3390a18 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -109,7 +109,7 @@ async def analyze_chat_for_nicknames( response_content = match.group(1).strip() # 解析 JSON - result = json.loads(response_content) # 可能抛出 json.JSONDecodeError + result = json.loads(response_content) # 可能抛出 json.JSONDecodeError # 检查 result 是否为字典 if not isinstance(result, dict): @@ -117,11 +117,11 @@ async def analyze_chat_for_nicknames( return {"is_exist": False} # 使用 get 获取 is_exist,避免 KeyError - is_exist = result.get("is_exist") # 如果 result 不是字典,下面 get 会在 except AttributeError 中捕获 + is_exist = result.get("is_exist") # 如果 result 不是字典,下面 get 会在 except AttributeError 中捕获 if is_exist is True: original_data = result.get("data") - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 + if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 logger.info(f"LLM 找到的原始绰号映射: {original_data}") # --- 开始过滤 --- @@ -158,8 +158,10 @@ async def analyze_chat_for_nicknames( if "data" not in result: logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") elif not isinstance(original_data, dict): - logger.warning(f"LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。 原始 data: {original_data}") - else: # data 为空字典 + logger.warning( + f"LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。 原始 data: {original_data}" + ) + else: # data 为空字典 logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") return {"is_exist": False} @@ -167,21 +169,22 @@ async def analyze_chat_for_nicknames( logger.info("LLM 未找到可靠的绰号映射。") return {"is_exist": False} - elif is_exist is None: # 处理 is_exist 键存在但值为 null/None 的情况 + elif is_exist is None: # 处理 is_exist 键存在但值为 null/None 的情况 logger.warning("LLM 响应格式错误: 'is_exist' 键的值为 None。") return {"is_exist": False} - else: # 处理 is_exist 存在但值不是 True/False/None 的情况 + else: # 处理 is_exist 存在但值不是 True/False/None 的情况 logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 不是预期的布尔值或 None。") return {"is_exist": False} - except json.JSONDecodeError as json_err: logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") return {"is_exist": False} except AttributeError as attr_err: # 这个理论上不应该发生,因为在调用 get 前检查了 result 是否是 dict - logger.error(f"处理 LLM 响应时发生属性错误 (可能尝试在非字典对象上使用 .get): {attr_err}\n原始响应: {response_content}") + logger.error( + f"处理 LLM 响应时发生属性错误 (可能尝试在非字典对象上使用 .get): {attr_err}\n原始响应: {response_content}" + ) return {"is_exist": False} except Exception as e: # 捕获其他所有未预料到的异常 From ea5fb0d77d2872f3786c5a220ac6be102c750d6e Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 13:08:42 +0800 Subject: [PATCH 52/58] =?UTF-8?q?=E5=88=A0=E9=99=A4try=E5=AF=BC=E5=85=A5?= =?UTF-8?q?=E6=A8=A1=E5=9D=97=EF=BC=8C=E4=BD=BF=E7=94=A8isdigit=E6=A3=80?= =?UTF-8?q?=E6=9F=A5user=5Fid=5Fstr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/group_nickname/nickname_mapper.py | 8 ++++---- .../group_nickname/nickname_processor.py | 19 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index e3e47d66..89238930 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -179,10 +179,10 @@ async def analyze_chat_for_nicknames( except json.JSONDecodeError as json_err: logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") return {"is_exist": False} - except AttributeError as attr_err: - # 这个理论上不应该发生,因为在调用 get 前检查了 result 是否是 dict - logger.error(f"处理 LLM 响应时发生属性错误 (可能尝试在非字典对象上使用 .get): {attr_err}\n原始响应: {response_content}") - return {"is_exist": False} + # except AttributeError as attr_err: + # # 这个理论上不应该发生,因为在调用 get 前检查了 result 是否是 dict + # logger.error(f"处理 LLM 响应时发生属性错误 (可能尝试在非字典对象上使用 .get): {attr_err}\n原始响应: {response_content}") + # return {"is_exist": False} except Exception as e: # 捕获其他所有未预料到的异常 logger.error(f"绰号映射 LLM 调用或处理过程中发生未预料的错误: {e}", exc_info=True) diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py index f765cb9b..a73811bb 100644 --- a/src/plugins/group_nickname/nickname_processor.py +++ b/src/plugins/group_nickname/nickname_processor.py @@ -9,6 +9,7 @@ from src.common.logger_manager import get_logger from src.common.database import db # 使用全局 db from .nickname_mapper import analyze_chat_for_nicknames from src.config.config import global_config +from ..person_info.person_info import person_info_manager logger = get_logger("nickname_processor") @@ -145,12 +146,6 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic group_id (str): 群组 ID。 nickname_map (Dict[str, str]): 用户 ID (字符串) 到绰号的映射。 """ - try: - from ..person_info.person_info import person_info_manager - except ImportError: - logger.error("无法导入 person_info_manager,无法生成 person_id!") - return - person_info_collection = db.person_info if not nickname_map: logger.debug("提供的用于更新的绰号映射为空。") @@ -164,11 +159,15 @@ async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dic logger.warning(f"跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'") continue group_id_str = str(group_id) - try: - user_id_int = int(user_id_str) - except ValueError: - logger.warning(f"无效的用户ID格式: '{user_id_str}',跳过。") + + # 使用 isdigit() 检查 user_id_str 是否为纯数字字符串 + if not user_id_str.isdigit(): + # isdigit() 会对空字符串返回 False,并且不识别负号、小数点等 + logger.warning(f"无效的用户ID格式 (非纯数字): '{user_id_str}',跳过。") continue + + user_id_int = int(user_id_str) + # --- 结束验证 --- try: From 612d4b1a7e0e4375eed017eedf482bfeed754f47 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 14:03:13 +0800 Subject: [PATCH 53/58] =?UTF-8?q?=E9=87=8D=E6=9E=84=E9=83=A8=E5=88=86?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BB=A5=E4=BD=BF=E7=94=A8class=E5=B0=81?= =?UTF-8?q?=E8=A3=85=EF=BC=8C=E8=B0=83=E6=88=90=E9=BB=98=E8=AE=A4=E4=BC=91?= =?UTF-8?q?=E7=9C=A0=E9=97=B4=E9=9A=94=E4=B8=BA1=E5=88=86=E9=92=9F?= =?UTF-8?q?=EF=BC=8C=E8=B0=83=E6=95=B4=E7=BA=BF=E7=A8=8B=E5=90=AF=E5=8A=A8?= =?UTF-8?q?=E9=A1=BA=E5=BA=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 27 +- src/config/config.py | 2 +- src/plugins/group_nickname/nickname_db.py | 156 +++++ .../group_nickname/nickname_manager.py | 534 ++++++++++++++++++ src/plugins/group_nickname/nickname_mapper.py | 210 ++----- .../group_nickname/nickname_processor.py | 346 ------------ src/plugins/group_nickname/nickname_utils.py | 279 ++------- src/plugins/heartFC_chat/heartFC_chat.py | 7 +- .../heartFC_chat/heartflow_prompt_builder.py | 6 +- src/plugins/heartFC_chat/normal_chat.py | 4 +- template/bot_config_template.toml | 2 +- template/lpmm_config_template.toml | 4 +- 12 files changed, 825 insertions(+), 752 deletions(-) create mode 100644 src/plugins/group_nickname/nickname_db.py create mode 100644 src/plugins/group_nickname/nickname_manager.py delete mode 100644 src/plugins/group_nickname/nickname_processor.py diff --git a/bot.py b/bot.py index f324a432..c1b3a253 100644 --- a/bot.py +++ b/bot.py @@ -14,10 +14,7 @@ from src.common.logger_manager import get_logger from src.common.crash_logger import install_crash_handler from src.main import MainSystem from rich.traceback import install -from src.plugins.group_nickname.nickname_processor import ( - start_nickname_processor, - stop_nickname_processor, -) +from src.plugins.group_nickname.nickname_manager import nickname_manager import atexit install(extra_lines=3) @@ -226,6 +223,19 @@ def raw_main(): env_config = {key: os.getenv(key) for key in os.environ} scan_provider(env_config) + # 确保 NicknameManager 单例实例存在并已初始化 + # (单例模式下,导入时或第一次调用时会自动初始化) + _ = nickname_manager # 显式引用一次 + + # 启动 NicknameManager 的后台处理器线程 + logger.info("准备启动绰号处理管理器...") + nickname_manager.start_processor() # 调用实例的方法 + logger.info("已调用启动绰号处理管理器。") + + # 注册 NicknameManager 的停止方法到 atexit,确保程序退出时线程能被清理 + atexit.register(nickname_manager.stop_processor) # 注册实例的方法 + logger.info("已注册绰号处理管理器的退出处理程序。") + # 返回MainSystem实例 return MainSystem() @@ -235,15 +245,6 @@ if __name__ == "__main__": # 获取MainSystem实例 main_system = raw_main() - # 在这里启动绰号处理进程 - logger.info("准备启动绰号处理线程...") - start_nickname_processor() # <--- 添加启动调用 - logger.info("已调用启动绰号处理线程。") - - # 注册退出处理函数 (确保进程能被关闭) - atexit.register(stop_nickname_processor) # <--- 在这里注册停止函数 - logger.info("已注册绰号处理线程的退出处理程序。") - # 创建事件循环 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) diff --git a/src/config/config.py b/src/config/config.py index 3c90c201..312f3e95 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -279,7 +279,7 @@ class BotConfig: MAX_NICKNAMES_IN_PROMPT: int = 10 # Prompt 中最多注入的绰号数量 NICKNAME_PROBABILITY_SMOOTHING: int = 1 # 绰号加权随机选择的平滑因子 NICKNAME_QUEUE_MAX_SIZE: int = 100 # 绰号处理队列最大容量 - NICKNAME_PROCESS_SLEEP_INTERVAL: float = 5 # 绰号处理进程休眠间隔(秒) + NICKNAME_PROCESS_SLEEP_INTERVAL: float = 60 # 绰号处理进程休眠间隔(秒) # 模型配置 llm_reasoning: dict[str, str] = field(default_factory=lambda: {}) diff --git a/src/plugins/group_nickname/nickname_db.py b/src/plugins/group_nickname/nickname_db.py new file mode 100644 index 00000000..d0c6d975 --- /dev/null +++ b/src/plugins/group_nickname/nickname_db.py @@ -0,0 +1,156 @@ +from pymongo.collection import Collection +from pymongo.errors import OperationFailure, DuplicateKeyError +from src.common.logger_manager import get_logger +from typing import Optional + +logger = get_logger("nickname_db") + +class NicknameDB: + """ + 处理与群组绰号相关的数据库操作 (MongoDB)。 + 封装了对 'person_info' 集合的读写操作。 + """ + def __init__(self, person_info_collection: Optional[Collection]): + """ + 初始化 NicknameDB 处理器。 + + Args: + person_info_collection: MongoDB 'person_info' 集合对象。 + 如果为 None,则数据库操作将被禁用。 + """ + if person_info_collection is None: + logger.error("未提供 person_info 集合,NicknameDB 操作将被禁用。") + self.person_info_collection = None + else: + self.person_info_collection = person_info_collection + logger.info("NicknameDB 初始化成功。") + + def is_available(self) -> bool: + """检查数据库集合是否可用。""" + return self.person_info_collection is not None + + def upsert_person(self, person_id: str, user_id_int: int, platform: str): + """ + 确保数据库中存在指定 person_id 的文档 (Upsert)。 + 如果文档不存在,则使用提供的用户信息创建它。 + + Args: + person_id: 要查找或创建的 person_id。 + user_id_int: 用户的整数 ID。 + platform: 平台名称。 + + Returns: + UpdateResult 或 None: MongoDB 更新操作的结果,如果数据库不可用则返回 None。 + + Raises: + DuplicateKeyError: 如果发生重复键错误 (理论上不应由 upsert 触发)。 + Exception: 其他数据库操作错误。 + """ + if not self.is_available(): + logger.error("数据库集合不可用,无法执行 upsert_person。") + return None + try: + # 关键步骤:基于 person_id 执行 Upsert + result = self.person_info_collection.update_one( + {"person_id": person_id}, + { + "$setOnInsert": { + "person_id": person_id, + "user_id": user_id_int, + "platform": platform, + "group_nicknames": [], # 初始化 group_nicknames 数组 + } + }, + upsert=True, + ) + if result.upserted_id: + logger.debug(f"Upsert 创建了新的 person 文档: {person_id}") + return result + except DuplicateKeyError as dk_err: + # 这个错误理论上不应该再由 upsert 触发。 + logger.error( + f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" + ) + raise # 将异常向上抛出 + except Exception as e: + logger.exception(f"对 person_id {person_id} 执行 Upsert 时失败: {e}") + raise # 将异常向上抛出 + + def update_group_nickname_count(self, person_id: str, group_id_str: str, nickname: str): + """ + 尝试更新 person_id 文档中特定群组的绰号计数,或添加新条目。 + 按顺序尝试:增加计数 -> 添加绰号 -> 添加群组。 + + Args: + person_id: 目标文档的 person_id。 + group_id_str: 目标群组的 ID (字符串)。 + nickname: 要更新或添加的绰号。 + """ + if not self.is_available(): + logger.error("数据库集合不可用,无法执行 update_group_nickname_count。") + return + + try: + # 3a. 尝试增加现有群组中现有绰号的计数 + result_inc = self.person_info_collection.update_one( + { + "person_id": person_id, + "group_nicknames": {"$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname}}, + }, + {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, + array_filters=[ + {"group.group_id": group_id_str}, + {"nick.name": nickname}, + ], + ) + if result_inc.modified_count > 0: + # logger.debug(f"成功增加 person_id {person_id} 在群组 {group_id_str} 中绰号 '{nickname}' 的计数。") + return # 成功增加计数,操作完成 + + # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 + result_push_nick = self.person_info_collection.update_one( + { + "person_id": person_id, + "group_nicknames.group_id": group_id_str, # 检查群组是否存在 + }, + {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, + array_filters=[{"group.group_id": group_id_str}], + ) + if result_push_nick.modified_count > 0: + logger.debug(f"成功为 person_id {person_id} 在现有群组 {group_id_str} 中添加新绰号 '{nickname}'。") + return # 成功添加绰号,操作完成 + + # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 + # 确保 group_nicknames 数组存在 (作为保险措施) + self.person_info_collection.update_one( + {"person_id": person_id, "group_nicknames": {"$exists": False}}, + {"$set": {"group_nicknames": []}}, + ) + # 推送新的群组对象到 group_nicknames 数组 + result_push_group = self.person_info_collection.update_one( + { + "person_id": person_id, + "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 + }, + { + "$push": { + "group_nicknames": { + "group_id": group_id_str, + "nicknames": [{"name": nickname, "count": 1}], + } + } + }, + ) + if result_push_group.modified_count > 0: + logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'。") + # else: + # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") + + except (OperationFailure, DuplicateKeyError) as db_err: + logger.exception( + f"数据库操作失败 ({type(db_err).__name__}): person_id {person_id}, 群组 {group_id_str}, 绰号 {nickname}. 错误: {db_err}" + ) + # 根据需要决定是否向上抛出 raise db_err + except Exception as e: + logger.exception(f"更新群组绰号计数时发生意外错误: person_id {person_id}, group {group_id_str}, nick {nickname}. Error: {e}") + # 根据需要决定是否向上抛出 raise e \ No newline at end of file diff --git a/src/plugins/group_nickname/nickname_manager.py b/src/plugins/group_nickname/nickname_manager.py new file mode 100644 index 00000000..3d47ad1d --- /dev/null +++ b/src/plugins/group_nickname/nickname_manager.py @@ -0,0 +1,534 @@ +import asyncio +import threading +import queue +import traceback +import time +import json +import re +from typing import Dict, Optional, List, Any + +from pymongo.errors import OperationFailure, DuplicateKeyError +from src.common.logger_manager import get_logger +from src.common.database import db +from src.config.config import global_config +from src.plugins.models.utils_model import LLMRequest +from .nickname_db import NicknameDB +from .nickname_mapper import _build_mapping_prompt +from .nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection + +# 依赖于 person_info_manager 来生成 person_id +from ..person_info.person_info import person_info_manager +# 依赖于 relationship_manager 来获取用户名称和现有绰号 +from ..person_info.relationship_manager import relationship_manager + +# 导入消息和聊天流相关的类型和工具 +from src.plugins.chat.chat_stream import ChatStream +from src.plugins.chat.message import MessageRecv +from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat + +logger = get_logger("NicknameManager") + +class NicknameManager: + """ + 管理群组绰号分析、处理、存储和使用的单例类。 + 封装了 LLM 调用、后台处理线程和数据库交互。 + """ + _instance = None + _lock = threading.Lock() + + # Singleton Implementation + def __new__(cls, *args, **kwargs): + if not cls._instance: + with cls._lock: + # 再次检查,防止多线程并发创建实例 + if not cls._instance: + logger.info("正在创建 NicknameManager 单例实例...") + cls._instance = super(NicknameManager, cls).__new__(cls) + cls._instance._initialized = False # 添加初始化标志 + return cls._instance + + def __init__(self): + """ + 初始化 NicknameManager。 + 使用锁和标志确保实际初始化只执行一次。 + """ + if self._initialized: # 如果已初始化,直接返回 + return + + with self._lock: + # 再次检查初始化标志,防止重复初始化 + if self._initialized: + return + + logger.info("正在初始化 NicknameManager 组件...") + self.config = global_config + self.is_enabled = self.config.ENABLE_NICKNAME_MAPPING + + # 数据库处理器 + person_info_collection = getattr(db, 'person_info', None) + self.db_handler = NicknameDB(person_info_collection) + if not self.db_handler.is_available(): + logger.error("数据库处理器初始化失败,NicknameManager 功能受限。") + self.is_enabled = False # 如果数据库不可用,禁用功能 + + # LLM 映射器 + self.llm_mapper: Optional[LLMRequest] = None + if self.is_enabled: + try: + model_config = self.config.llm_nickname_mapping + if model_config and model_config.get("name"): + self.llm_mapper = LLMRequest( + model=model_config, + temperature=model_config.get("temp", 0.5), # 使用 get 获取并提供默认值 + max_tokens=model_config.get("max_tokens", 256), # 使用 get 获取并提供默认值 + request_type="nickname_mapping", + ) + logger.info("绰号映射 LLM 映射器初始化成功。") + else: + logger.warning("绰号映射 LLM 配置无效或缺失 'name',功能禁用。") + self.is_enabled = False + except KeyError as ke: + logger.error(f"初始化绰号映射 LLM 时缺少配置项: {ke},功能禁用。", exc_info=True) + self.llm_mapper = None + self.is_enabled = False + except Exception as e: + logger.error(f"初始化绰号映射 LLM 映射器失败: {e},功能禁用。", exc_info=True) + self.llm_mapper = None + self.is_enabled = False + + # 队列和线程 + self.queue_max_size = getattr(self.config, "NICKNAME_QUEUE_MAX_SIZE", 100) + self.nickname_queue: queue.Queue = queue.Queue(maxsize=self.queue_max_size) + self._stop_event = threading.Event() + self._nickname_thread: Optional[threading.Thread] = None + self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) + + self._initialized = True # 标记为已初始化 + logger.info("NicknameManager 初始化完成。") + + # 公共方法 + + def start_processor(self): + """启动后台处理线程(如果已启用且未运行)。""" + if not self.is_enabled: + logger.info("绰号处理功能已禁用,处理器未启动。") + return + if self._nickname_thread is None or not self._nickname_thread.is_alive(): + logger.info("正在启动绰号处理器线程...") + self._stop_event.clear() # 清除停止事件标志 + self._nickname_thread = threading.Thread( + target=self._run_processor_in_thread, # 线程执行的入口函数 + daemon=True # 设置为守护线程,主程序退出时自动结束 + ) + self._nickname_thread.start() + logger.info(f"绰号处理器线程已启动 (ID: {self._nickname_thread.ident})") + else: + logger.warning("绰号处理器线程已在运行中。") + + def stop_processor(self): + """停止后台处理线程。""" + if self._nickname_thread and self._nickname_thread.is_alive(): + logger.info("正在停止绰号处理器线程...") + self._stop_event.set() # 设置停止事件标志 + try: + # 可选:尝试清空队列,避免丢失未处理的任务 + # while not self.nickname_queue.empty(): + # try: + # self.nickname_queue.get_nowait() + # self.nickname_queue.task_done() + # except queue.Empty: + # break + # logger.info("绰号处理队列已清空。") + + self._nickname_thread.join(timeout=10) # 等待线程结束,设置超时 + if self._nickname_thread.is_alive(): + logger.warning("绰号处理器线程在超时后仍未停止。") + except Exception as e: + logger.error(f"停止绰号处理器线程时出错: {e}", exc_info=True) + finally: + if self._nickname_thread and not self._nickname_thread.is_alive(): + logger.info("绰号处理器线程已成功停止。") + self._nickname_thread = None # 清理线程对象引用 + else: + logger.info("绰号处理器线程未在运行或已被清理。") + + async def trigger_nickname_analysis( + self, + anchor_message: MessageRecv, + bot_reply: List[str], + chat_stream: Optional[ChatStream] = None, + ): + """ + 准备数据并将其排队等待绰号分析(如果满足条件)。 + 取代了旧的 trigger_nickname_analysis_if_needed 函数。 + """ + if not self.is_enabled: + return # 功能禁用则直接返回 + + current_chat_stream = chat_stream or anchor_message.chat_stream + if not current_chat_stream or not current_chat_stream.group_info: + logger.debug("跳过绰号分析:非群聊或无效的聊天流。") + return + + log_prefix = f"[{current_chat_stream.stream_id}]" + try: + # 1. 获取历史记录 + history_limit = getattr(self.config, "NICKNAME_ANALYSIS_HISTORY_LIMIT", 30) + history_messages = get_raw_msg_before_timestamp_with_chat( + chat_id=current_chat_stream.stream_id, + timestamp=time.time(), + limit=history_limit, + ) + + # 格式化历史记录 + chat_history_str = await build_readable_messages( + messages=history_messages, + replace_bot_name=True, merge_messages=False, timestamp_mode="relative", + read_mark=0.0, truncate=False, + ) + + # 2. 获取 Bot 回复 + bot_reply_str = " ".join(bot_reply) if bot_reply else "" + + # 3. 获取群组和平台信息 + group_id = str(current_chat_stream.group_info.group_id) + platform = current_chat_stream.platform + + # 4. 构建用户 ID 到名称的映射 (user_name_map) + user_ids_in_history = {str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id")} + user_name_map = {} + if user_ids_in_history: + try: + # 使用 relationship_manager 批量获取名称 + names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) + except Exception as e: + logger.error(f"{log_prefix} 批量获取 person_name 时出错: {e}", exc_info=True) + names_data = {} + + # 填充 user_name_map + for user_id in user_ids_in_history: + if user_id in names_data: + user_name_map[user_id] = names_data[user_id] + else: + # 回退查找历史记录中的 nickname + latest_nickname = next( + (m["user_info"].get("user_nickname") + for m in reversed(history_messages) + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname")), + None, + ) + user_name_map[user_id] = latest_nickname or f"未知({user_id})" + + # 5. 添加到内部处理队列 + item = (chat_history_str, bot_reply_str, platform, group_id, user_name_map) + self._add_to_queue(item, platform, group_id) # 调用私有方法入队 + + except Exception as e: + logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) + + + async def get_nickname_prompt_injection(self, chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: + """ + 获取并格式化用于 Prompt 注入的绰号信息字符串。 + 取代了旧的 get_nickname_injection_for_prompt 函数。 + """ + if not self.is_enabled or not chat_stream or not chat_stream.group_info: + return "" # 功能禁用或非群聊则返回空 + + log_prefix = f"[{chat_stream.stream_id}]" + try: + group_id = str(chat_stream.group_info.group_id) + platform = chat_stream.platform + + # 确定上下文中的用户 ID + user_ids_in_context = {str(msg["user_info"]["user_id"]) for msg in message_list_before_now if msg.get("user_info", {}).get("user_id")} + + # 如果消息列表为空,尝试获取最近发言者 + if not user_ids_in_context: + recent_speakers = chat_stream.get_recent_speakers(limit=5) + user_ids_in_context.update(str(speaker["user_id"]) for speaker in recent_speakers) + + if not user_ids_in_context: + logger.warning(f"{log_prefix} 未找到上下文用户用于绰号注入。") + return "" + + # 使用 relationship_manager 批量获取这些用户的群组绰号 + all_nicknames_data = await relationship_manager.get_users_group_nicknames( + platform, list(user_ids_in_context), group_id + ) + + if all_nicknames_data: + # 使用 nickname_utils 中的工具函数进行选择和格式化 + selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) + injection_str = format_nickname_prompt_injection(selected_nicknames) + if injection_str: + logger.debug(f"{log_prefix} 生成的绰号 Prompt 注入:\n{injection_str}") + return injection_str + else: + return "" # 没有获取到绰号数据 + + except Exception as e: + logger.error(f"{log_prefix} 获取绰号注入时出错: {e}", exc_info=True) + return "" # 出错时返回空 + + + # 私有/内部方法 + + def _add_to_queue(self, item: tuple, platform: str, group_id: str): + """将项目添加到内部处理队列。""" + try: + self.nickname_queue.put_nowait(item) + logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}") + except queue.Full: + logger.warning(f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。") + except Exception as e: + logger.error(f"将项目添加到绰号队列时出错: {e}", exc_info=True) + + + async def _analyze_and_update_nicknames(self, item: tuple): + """处理单个队列项目:调用 LLM 分析并更新数据库。""" + if not isinstance(item, tuple) or len(item) != 5: + logger.warning(f"从队列接收到无效项目: {type(item)}") + return + + chat_history_str, bot_reply, platform, group_id, user_name_map = item + thread_id = threading.get_ident() + log_prefix = f"[线程 {thread_id}][{platform}:{group_id}]" + logger.debug(f"{log_prefix} 开始处理绰号分析任务...") + + if not self.llm_mapper: + logger.error(f"{log_prefix} LLM 映射器不可用,无法执行分析。") + return + if not self.db_handler.is_available(): + logger.error(f"{log_prefix} 数据库处理器不可用,无法更新计数。") + return + + # 1. 调用 LLM 分析 (逻辑从 nickname_mapper 移入) + analysis_result = await self._call_llm_for_analysis(chat_history_str, bot_reply, user_name_map) + + # 2. 如果分析成功且找到映射,则更新数据库 + if analysis_result.get("is_exist") and analysis_result.get("data"): + nickname_map_to_update = analysis_result["data"] + logger.info(f"{log_prefix} LLM 找到绰号映射,准备更新数据库: {nickname_map_to_update}") + + for user_id_str, nickname in nickname_map_to_update.items(): + # 基本验证 + if not user_id_str or not nickname: + logger.warning(f"{log_prefix} 跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'") + continue + if not user_id_str.isdigit(): + logger.warning(f"{log_prefix} 无效的用户ID格式 (非纯数字): '{user_id_str}',跳过。") + continue + user_id_int = int(user_id_str) + # 结束验证 + + try: + # 步骤 1: 生成 person_id + person_id = person_info_manager.get_person_id(platform, user_id_str) + if not person_id: + logger.error(f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") + continue + + # 步骤 2: 确保 Person 文档存在 (调用 DB Handler) + self.db_handler.upsert_person(person_id, user_id_int, platform) + + # 步骤 3: 更新群组绰号 (调用 DB Handler) + self.db_handler.update_group_nickname_count(person_id, group_id, nickname) + + except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 + logger.exception( + f"{log_prefix} 数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 绰号 {nickname}. 错误: {db_err}" + ) + except Exception as e: + logger.exception(f"{log_prefix} 处理用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") + else: + logger.debug(f"{log_prefix} LLM 未找到可靠的绰号映射或分析失败。") + + + async def _call_llm_for_analysis( + self, + chat_history_str: str, + bot_reply: str, + user_name_map: Dict[str, str], + ) -> Dict[str, Any]: + """ + 内部方法:调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 + (逻辑从 analyze_chat_for_nicknames 移入) + """ + if not self.llm_mapper: # 再次检查 LLM 映射器 + logger.error("LLM 映射器未初始化,无法执行分析。") + return {"is_exist": False} + + prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) + logger.debug(f"构建的绰号映射 Prompt:\n{prompt[:500]}...") # 截断日志输出 + + try: + # 调用 LLM + response_content, _, _ = await self.llm_mapper.generate_response(prompt) + logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") + + if not response_content: + logger.warning("LLM 返回了空的绰号映射内容。") + return {"is_exist": False} + + # 清理可能的 Markdown 代码块标记 + response_content = response_content.strip() + markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL | re.IGNORECASE) + match = markdown_code_regex.match(response_content) + if match: + response_content = match.group(1).strip() + # 尝试直接解析 JSON,即使没有代码块标记 + elif response_content.startswith("{") and response_content.endswith("}"): + pass # 可能是纯 JSON + else: + # 尝试在文本中查找 JSON 对象 + json_match = re.search(r'\{.*\}', response_content, re.DOTALL) + if json_match: + response_content = json_match.group(0) + else: + logger.warning(f"LLM 响应似乎不包含有效的 JSON 对象。响应: {response_content}") + return {"is_exist": False} + + + # 解析 JSON + result = json.loads(response_content) + + # 结果验证和过滤 + if not isinstance(result, dict): + logger.warning(f"LLM 响应不是一个有效的 JSON 对象 (字典类型)。响应内容: {response_content}") + return {"is_exist": False} + + is_exist = result.get("is_exist") + + if is_exist is True: + original_data = result.get("data") + if isinstance(original_data, dict) and original_data: + logger.info(f"LLM 找到的原始绰号映射: {original_data}") + filtered_data = self._filter_llm_results(original_data, user_name_map) # 调用过滤函数 + if not filtered_data: + logger.info("所有找到的绰号映射都被过滤掉了。") + return {"is_exist": False} + else: + logger.info(f"过滤后的绰号映射: {filtered_data}") + return {"is_exist": True, "data": filtered_data} + else: + # is_exist 为 True 但 data 缺失、不是字典或为空 + logger.warning(f"LLM 响应格式错误: is_exist=True 但 data 无效。原始 data: {original_data}") + return {"is_exist": False} + elif is_exist is False: + logger.info("LLM 明确指示未找到可靠的绰号映射 (is_exist=False)。") + return {"is_exist": False} + else: # is_exist 不是 True 或 False (包括 None) + logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 无效。") + return {"is_exist": False} + + except json.JSONDecodeError as json_err: + logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") + return {"is_exist": False} + except Exception as e: + logger.error(f"绰号映射 LLM 调用或处理过程中发生意外错误: {e}", exc_info=True) + return {"is_exist": False} + + def _filter_llm_results(self, original_data: Dict[str, str], user_name_map: Dict[str, str]) -> Dict[str, str]: + """过滤 LLM 返回的绰号映射结果。""" + filtered_data = {} + bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, 'BOT_QQ') else None + + for user_id, nickname in original_data.items(): + # 过滤条件 1: user_id 必须是字符串 + if not isinstance(user_id, str): + logger.warning(f"过滤掉非字符串 user_id: {user_id}") + continue + # 过滤条件 2: 排除机器人自身 + if bot_qq_str and user_id == bot_qq_str: + logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") + continue + # 过滤条件 3: 排除 nickname 为空或仅包含空白的情况 + if not nickname or nickname.isspace(): + logger.debug(f"过滤掉用户 {user_id} 的空绰号。") + continue + + # 过滤条件 4 (可选,根据 Prompt 效果决定是否保留): 排除 nickname 与已知名称相同的情况 + # person_name = user_name_map.get(user_id) + # if person_name and person_name == nickname: + # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") + # continue + + # 如果通过所有过滤条件,则保留 + filtered_data[user_id] = nickname.strip() # 保留时去除首尾空白 + + return filtered_data + + + # 线程相关 + def _run_processor_in_thread(self): + """后台线程的入口函数,负责创建和运行 asyncio 事件循环。""" + loop = None + thread_id = threading.get_ident() + logger.info(f"绰号处理器线程启动 (线程 ID: {thread_id})...") + try: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + logger.info(f"(线程 ID: {thread_id}) Asyncio 事件循环已创建并设置。") + # 运行主处理循环直到停止事件被设置 + loop.run_until_complete(self._processing_loop()) + except Exception as e: + logger.error(f"(线程 ID: {thread_id}) 运行绰号处理器线程时出错: {e}", exc_info=True) + finally: + # 确保循环被正确关闭 + if loop: + try: + if loop.is_running(): + logger.info(f"(线程 ID: {thread_id}) 正在停止 asyncio 循环...") + all_tasks = asyncio.all_tasks(loop) + if all_tasks: + logger.info(f"(线程 ID: {thread_id}) 正在取消 {len(all_tasks)} 个运行中的任务...") + for task in all_tasks: + task.cancel() + # 等待任务取消完成 + loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) + logger.info(f"(线程 ID: {thread_id}) 所有任务已取消。") + loop.stop() + logger.info(f"(线程 ID: {thread_id}) 循环已停止。") + if not loop.is_closed(): + loop.close() + logger.info(f"(线程 ID: {thread_id}) Asyncio 循环已关闭。") + except Exception as loop_close_err: + logger.error(f"(线程 ID: {thread_id}) 关闭循环时出错: {loop_close_err}", exc_info=True) + logger.info(f"绰号处理器线程结束 (线程 ID: {thread_id}).") + + + async def _processing_loop(self): + """后台线程中运行的异步处理循环。""" + thread_id = threading.get_ident() + logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") + + while not self._stop_event.is_set(): + try: + # 从队列中获取项目,设置超时以允许检查停止事件 + item = self.nickname_queue.get(block=True, timeout=self.sleep_interval) + + # 处理获取到的项目 + await self._analyze_and_update_nicknames(item) + + self.nickname_queue.task_done() # 标记任务完成 + + except queue.Empty: + # 超时,队列为空,继续循环检查停止事件 + continue + except asyncio.CancelledError: + logger.info(f"绰号处理循环被取消 (线程 ID: {thread_id})。") + break # 任务被取消,退出循环 + except Exception as e: + # 捕获处理单个项目时可能发生的其他异常 + logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") + # 可以在这里添加错误处理逻辑,例如将失败的任务放回队列或记录到错误日志 + # 短暂休眠避免快速连续失败 + await asyncio.sleep(5) + + logger.info(f"绰号处理循环已结束 (线程 ID: {thread_id})。") + + +# 在模块级别创建单例实例 +# 这使得其他模块可以通过 `from .nickname_manager import nickname_manager` 来导入和使用 +nickname_manager = NicknameManager() diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index 20753559..aa86ef14 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -1,44 +1,37 @@ -import re -import json -from typing import Dict, Any, Optional +# src/plugins/group_nickname/nickname_mapper.py +from typing import Dict from src.common.logger_manager import get_logger -from src.plugins.models.utils_model import LLMRequest -from src.config.config import global_config +# 这个文件现在只负责构建 Prompt,LLM 的初始化和调用移至 NicknameManager logger = get_logger("nickname_mapper") -llm_mapper: Optional[LLMRequest] = None -if global_config.ENABLE_NICKNAME_MAPPING: # 使用全局开关 - try: - # 从全局配置获取模型设置 - model_config = global_config.llm_nickname_mapping - if not model_config or not model_config.get("name"): - logger.error("在全局配置中未找到有效的 'llm_nickname_mapping' 配置或缺少 'name' 字段。") - else: - llm_mapper = LLMRequest( # <-- LLM 初始化 - model=global_config.llm_nickname_mapping, - temperature=global_config.llm_nickname_mapping["temp"], - max_tokens=256, - request_type="nickname_mapping", - ) - logger.info("绰号映射 LLM 初始化成功 (使用全局配置)。") - - except Exception as e: - logger.error(f"使用全局配置初始化绰号映射 LLM 失败: {e}", exc_info=True) - llm_mapper = None - +# LLMRequest 实例和 analyze_chat_for_nicknames 函数已被移除 def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: - """构建用于 LLM 绰号映射的 Prompt""" - # user_name_map 包含了 user_id 到 person_name (或 fallback nickname) 的映射 - user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items()]) - # print(f"\n\n\nKnown User Info for LLM:\n{user_list_str}\n\n\n\n") # Debugging print + """ + 构建用于 LLM 进行绰号映射分析的 Prompt。 + + Args: + chat_history_str: 格式化后的聊天历史记录字符串。 + bot_reply: Bot 的最新回复字符串。 + user_name_map: 用户 ID 到已知名称(person_name 或 fallback nickname)的映射。 + + Returns: + str: 构建好的 Prompt 字符串。 + """ + # 将 user_name_map 格式化为列表字符串 + user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items() if uid and name]) + if not user_list_str: + user_list_str = "无" # 如果映射为空,明确告知 + + # 核心 Prompt 内容 prompt = f""" -任务:分析以下聊天记录和你的最新回复,判断其中是否包含用户绰号,并确定绰号与用户 ID 之间是否存在明确的一一对应关系。 +任务:仔细分析以下聊天记录和“你的最新回复”,判断其中是否明确提到了某个用户的绰号,并且这个绰号可以清晰地与一个特定的用户 ID 对应起来。 已知用户信息(ID: 名称): {user_list_str} +*注意:名称后面带有"(你)"表示是你自己。* 聊天记录: --- @@ -48,139 +41,36 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: 你的最新回复: {bot_reply} -分析要求: -1. 识别聊天记录和你发言中出现的可能是用户绰号的词语。 -2. 判断这些绰号是否能明确地指向某个特定的用户 ID。一个绰号必须在上下文中清晰地与某个发言人或被提及的人关联起来。 -3. 如果能建立可靠的一一映射关系,请输出一个 JSON 对象,格式如下: - {{ - "is_exist": true, - "data": {{ - "用户A数字id": "绰号_A", - "用户B数字id": "绰号_B" +分析要求与输出格式: +1. 找出聊天记录和“你的最新回复”中可能是用户绰号的词语。 +2. 判断这些绰号是否在上下文中**清晰、无歧义**地指向了“已知用户信息”列表中的**某一个特定用户 ID**。必须是强关联,避免猜测。 +3. **不要**输出你自己(名称后带"(你)"的用户)的绰号映射。 + **不要**输出与用户已知名称完全相同的词语作为绰号。 + **不要**将在“你的最新回复”中你对他人使用的称呼或绰号进行映射(只分析聊天记录中他人对用户的称呼)。 + **不要**输出指代不明或过于通用的词语(如“大佬”、“兄弟”、“那个谁”等,除非上下文能非常明确地指向特定用户)。 +4. 如果找到了**至少一个**满足上述所有条件的**明确**的用户 ID 到绰号的映射关系,请输出 JSON 对象: + ```json + {{ + "is_exist": true, + "data": {{ + "用户A数字id": "绰号_A", + "用户B数字id": "绰号_B" + }} }} - }} - 其中 "data" 字段的键是用户的 ID (字符串形式),值是对应的绰号。只包含你能确认映射关系的绰号。 -4. 如果无法建立任何可靠的一一映射关系(例如,绰号指代不明、没有出现绰号、或无法确认绰号与用户的关联),请输出 JSON 对象: - {{ - "is_exist": false - }} -5. 在“已知用户信息”列表中,你的昵称后面可能包含"(你)",这表示是你自己,不需要输出你自身的绰号映射。请确保不要将你自己的ID和任何词语映射为绰号。 -6. 不要输出与用户名称相同的绰号,不要输出你发言中对他人的绰号映射。 -7. 请严格按照 JSON 格式输出,不要包含任何额外的解释或文本。 + ``` + - `"data"` 字段的键必须是用户的**数字 ID (字符串形式)**,值是对应的**绰号 (字符串形式)**。 + - 只包含你能**百分百确认**映射关系的条目。宁缺毋滥。 + 如果**无法找到任何一个**满足条件的明确映射关系,请输出 JSON 对象: + ```json + {{ + "is_exist": false + }} + ``` +5. 请**仅**输出 JSON 对象,不要包含任何额外的解释、注释或代码块标记之外的文本。 输出: """ + # logger.debug(f"构建的绰号映射 Prompt (部分):\n{prompt[:500]}...") # 可以在 NicknameManager 中记录 return prompt - -async def analyze_chat_for_nicknames( - chat_history_str: str, - bot_reply: str, - user_name_map: Dict[str, str], # 这个 map 包含了 user_id -> person_name 的信息 -) -> Dict[str, Any]: - """ - 调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射,并进行过滤。 - """ - if not global_config.ENABLE_NICKNAME_MAPPING: - logger.debug("绰号映射功能已禁用。") - return {"is_exist": False} - - if llm_mapper is None: - logger.error("绰号映射 LLM 未初始化。无法执行分析。") - return {"is_exist": False} - - prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) - logger.debug(f"构建的绰号映射 Prompt:\n{prompt}") - - try: - # 调用 LLM - response_content, _, _ = await llm_mapper.generate_response(prompt) - logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") - - if not response_content: - logger.warning("LLM 返回了空的绰号映射内容。") - return {"is_exist": False} - - # 清理可能的 Markdown 代码块标记 - response_content = response_content.strip() - markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL) - match = markdown_code_regex.match(response_content) - if match: - response_content = match.group(1).strip() - - # 解析 JSON - result = json.loads(response_content) # 可能抛出 json.JSONDecodeError - - # 检查 result 是否为字典 - if not isinstance(result, dict): - logger.warning(f"LLM 响应不是一个有效的 JSON 对象 (字典类型)。响应内容: {response_content}") - return {"is_exist": False} - - # 使用 get 获取 is_exist,避免 KeyError - is_exist = result.get("is_exist") # 如果 result 不是字典,下面 get 会在 except AttributeError 中捕获 - - if is_exist is True: - original_data = result.get("data") - if isinstance(original_data, dict) and original_data: # 确保 data 是非空字典 - logger.info(f"LLM 找到的原始绰号映射: {original_data}") - - # --- 开始过滤 --- - filtered_data = {} - bot_qq_str = str(global_config.BOT_QQ) - - for user_id, nickname in original_data.items(): - if not isinstance(user_id, str): - logger.warning(f"LLM 返回的 user_id '{user_id}' 不是字符串,跳过。") - continue - if user_id == bot_qq_str: - logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") - continue - - # 有了改名工具后,该过滤器已不适合了,尝试通过修改 prompt 获得更好的结果 - # # 条件 2: 排除 nickname 与 person_name 相同的情况 - # person_name = user_name_map.get(user_id) # 从传入的映射中查找 person_name - # if person_name and person_name == nickname: - # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") - # continue - - # 如果通过所有过滤条件,则保留 - filtered_data[user_id] = nickname - - # 检查过滤后是否还有数据 - if not filtered_data: - logger.info("所有找到的绰号映射都被过滤掉了。") - return {"is_exist": False} - else: - logger.info(f"过滤后的绰号映射: {filtered_data}") - return {"is_exist": True, "data": filtered_data} - else: - # is_exist 为 True 但 data 缺失、不是字典或为空 - if "data" not in result: - logger.warning("LLM 响应格式错误: is_exist 为 True 但 'data' 键缺失。") - elif not isinstance(original_data, dict): - logger.warning( - f"LLM 响应格式错误: is_exist 为 True 但 'data' 不是字典。 原始 data: {original_data}" - ) - else: # data 为空字典 - logger.debug("LLM 指示 is_exist=True 但 data 为空字典。视为 False 处理。") - return {"is_exist": False} - - elif is_exist is False: - logger.info("LLM 未找到可靠的绰号映射。") - return {"is_exist": False} - - elif is_exist is None: # 处理 is_exist 键存在但值为 null/None 的情况 - logger.warning("LLM 响应格式错误: 'is_exist' 键的值为 None。") - return {"is_exist": False} - - else: # 处理 is_exist 存在但值不是 True/False/None 的情况 - logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 不是预期的布尔值或 None。") - return {"is_exist": False} - - except json.JSONDecodeError as json_err: - logger.error(f"解析 LLM 响应 JSON 失败: {json_err}\n原始响应: {response_content}") - return {"is_exist": False} - except Exception as e: - # 捕获其他所有未预料到的异常 - logger.error(f"绰号映射 LLM 调用或处理过程中发生未预料的错误: {e}", exc_info=True) - return {"is_exist": False} +# analyze_chat_for_nicknames 函数已被移除,其逻辑移至 NicknameManager._call_llm_for_analysis diff --git a/src/plugins/group_nickname/nickname_processor.py b/src/plugins/group_nickname/nickname_processor.py deleted file mode 100644 index a73811bb..00000000 --- a/src/plugins/group_nickname/nickname_processor.py +++ /dev/null @@ -1,346 +0,0 @@ -import asyncio -import traceback -import threading -import queue -from typing import Dict, Optional -from pymongo.collection import Collection -from pymongo.errors import OperationFailure, DuplicateKeyError # 引入 DuplicateKeyError -from src.common.logger_manager import get_logger -from src.common.database import db # 使用全局 db -from .nickname_mapper import analyze_chat_for_nicknames -from src.config.config import global_config -from ..person_info.person_info import person_info_manager - -logger = get_logger("nickname_processor") - -_stop_event = threading.Event() - - -def _upsert_person(collection: Collection, person_id: str, user_id_int: int, platform: str): - """ - 确保数据库中存在指定 person_id 的文档 (Upsert)。 - 如果文档不存在,则使用提供的用户信息创建它。 - - Args: - collection: MongoDB 集合对象 (person_info)。 - person_id: 要查找或创建的 person_id。 - user_id_int: 用户的整数 ID。 - platform: 平台名称。 - - Returns: - UpdateResult: MongoDB 更新操作的结果。 - - Raises: - DuplicateKeyError: 如果发生重复键错误 (理论上不应由 upsert 触发)。 - Exception: 其他数据库操作错误。 - """ - try: - # 关键步骤:基于 person_id 执行 Upsert - # 如果文档不存在,它会被创建,并设置 $setOnInsert 中的字段。 - # 如果文档已存在,此操作不会修改任何内容。 - result = collection.update_one( - {"person_id": person_id}, - { - "$setOnInsert": { - "person_id": person_id, - "user_id": user_id_int, # 确保这里使用传入的 user_id_int - "platform": platform, - "group_nicknames": [], # 初始化 group_nicknames 数组 - } - }, - upsert=True, - ) - if result.upserted_id: - logger.debug(f"Upsert on person_id created new document: {person_id}") - # else: - # logger.debug(f"Upsert on person_id found existing document: {person_id}") - return result - except DuplicateKeyError as dk_err: - # 这个错误理论上不应该再由 upsert 触发。 - # 如果仍然出现,可能指示 person_id 生成逻辑问题或非常罕见的 MongoDB 内部情况。 - logger.error( - f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" - ) - raise # 将异常向上抛出,让调用者处理 - except Exception as e: - logger.exception(f"对 person_id {person_id} 执行 Upsert 时失败: {e}") - raise # 将异常向上抛出 - - -def _update_group_nickname(collection: Collection, person_id: str, group_id_str: str, nickname: str): - """ - 尝试更新 person_id 文档中特定群组的绰号计数,或添加新条目。 - 按顺序尝试:增加计数 -> 添加绰号 -> 添加群组。 - - Args: - collection: MongoDB 集合对象 (person_info)。 - person_id: 目标文档的 person_id。 - group_id_str: 目标群组的 ID (字符串)。 - nickname: 要更新或添加的绰号。 - """ - # 3a. 尝试增加现有群组中现有绰号的计数 - result_inc = collection.update_one( - { - "person_id": person_id, - "group_nicknames": {"$elemMatch": {"group_id": group_id_str, "nicknames.name": nickname}}, - }, - {"$inc": {"group_nicknames.$[group].nicknames.$[nick].count": 1}}, - array_filters=[ - {"group.group_id": group_id_str}, - {"nick.name": nickname}, - ], - ) - if result_inc.modified_count > 0: - # logger.debug(f"成功增加 person_id {person_id} 在群组 {group_id_str} 中绰号 '{nickname}' 的计数。") - return # 成功增加计数,操作完成 - - # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 - result_push_nick = collection.update_one( - { - "person_id": person_id, - "group_nicknames.group_id": group_id_str, # 检查群组是否存在 - }, - {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, - array_filters=[{"group.group_id": group_id_str}], - ) - if result_push_nick.modified_count > 0: - logger.debug(f"成功为 person_id {person_id} 在现有群组 {group_id_str} 中添加新绰号 '{nickname}'。") - return # 成功添加绰号,操作完成 - - # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 - # 确保 group_nicknames 数组存在 (作为保险措施) - collection.update_one( - {"person_id": person_id, "group_nicknames": {"$exists": False}}, - {"$set": {"group_nicknames": []}}, - ) - # 推送新的群组对象到 group_nicknames 数组 - result_push_group = collection.update_one( - { - "person_id": person_id, - "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 - }, - { - "$push": { - "group_nicknames": { - "group_id": group_id_str, - "nicknames": [{"name": nickname, "count": 1}], - } - } - }, - ) - if result_push_group.modified_count > 0: - logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'。") - # else: - # 如果连添加群组也失败 (例如 group_id 已存在但之前的步骤都未匹配,理论上不太可能), - # 可能需要进一步的日志或错误处理,但这通常意味着数据状态异常。 - # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") - - -async def update_nickname_counts(platform: str, group_id: str, nickname_map: Dict[str, str]): - """ - 更新数据库中用户的群组绰号计数 (使用全局 db)。 - 通过调用辅助函数来处理 person 文档的 upsert 和绰号更新。 - - Args: - platform (str): 平台名称 (e.g., 'qq')。 - group_id (str): 群组 ID。 - nickname_map (Dict[str, str]): 用户 ID (字符串) 到绰号的映射。 - """ - person_info_collection = db.person_info - if not nickname_map: - logger.debug("提供的用于更新的绰号映射为空。") - return - - logger.info(f"尝试更新平台 '{platform}' 群组 '{group_id}' 的绰号计数,映射为: {nickname_map}") - - for user_id_str, nickname in nickname_map.items(): - # --- 基本验证 --- - if not user_id_str or not nickname: - logger.warning(f"跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'") - continue - group_id_str = str(group_id) - - # 使用 isdigit() 检查 user_id_str 是否为纯数字字符串 - if not user_id_str.isdigit(): - # isdigit() 会对空字符串返回 False,并且不识别负号、小数点等 - logger.warning(f"无效的用户ID格式 (非纯数字): '{user_id_str}',跳过。") - continue - - user_id_int = int(user_id_str) - - # --- 结束验证 --- - - try: - # --- 步骤 1: 生成 person_id --- - person_id = person_info_manager.get_person_id(platform, user_id_str) - if not person_id: - logger.error(f"无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") - continue - - # --- 步骤 2: 确保 Person 文档存在 (调用辅助函数) --- - _upsert_person(person_info_collection, person_id, user_id_int, platform) - - # --- 步骤 3: 更新群组绰号 (调用辅助函数) --- - _update_group_nickname(person_info_collection, person_id, group_id_str, nickname) - - # --- 统一处理数据库操作可能抛出的异常 --- - except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 - logger.exception( - f"数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 群组 {group_id_str}, 绰号 {nickname}. 错误: {db_err}" - ) - except Exception as e: - # 捕获其他所有可能的错误 (例如 person_id 生成、辅助函数内部未捕获的错误等) - logger.exception(f"处理用户 {user_id_str} 的绰号 '{nickname}' 时发生意外错误:{e}") - - -# --- 使用 queue.Queue --- -queue_max_size = getattr(global_config, "NICKNAME_QUEUE_MAX_SIZE", 100) -nickname_queue: queue.Queue = queue.Queue(maxsize=queue_max_size) - -_nickname_thread: Optional[threading.Thread] = None - - -# --- add_to_nickname_queue (保持不变,已包含 platform) --- -async def add_to_nickname_queue( - chat_history_str: str, bot_reply: str, platform: str, group_id: Optional[str], user_name_map: Dict[str, str] -): - """将需要分析的数据放入队列。""" - if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: - return - if group_id is None: - logger.debug("私聊跳过绰号映射。") - return - try: - item = (chat_history_str, bot_reply, platform, str(group_id), user_name_map) - nickname_queue.put_nowait(item) - logger.debug( - f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {nickname_queue.qsize()}" - ) - except queue.Full: - logger.warning(f"无法将项目添加到绰号队列:队列已满 (maxsize={nickname_queue.maxsize})。") - except Exception as e: - logger.warning(f"无法将项目添加到绰号队列: {e}", exc_info=True) - - -# --- _nickname_processing_loop (保持不变,已包含 platform) --- -async def _nickname_processing_loop(q: queue.Queue, stop_event: threading.Event): - """独立线程中的主循环,处理队列任务 (使用全局 db 和 config)。""" - thread_id = threading.get_ident() - logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") - sleep_interval = getattr(global_config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) - - while not stop_event.is_set(): - try: - item = q.get(block=True, timeout=sleep_interval) - - if isinstance(item, tuple) and len(item) == 5: - chat_history_str, bot_reply, platform, group_id, user_name_map = item - logger.debug(f"(线程 ID: {thread_id}) 正在处理平台 '{platform}' 群组 '{group_id}' 的绰号映射任务...") - analysis_result = await analyze_chat_for_nicknames(chat_history_str, bot_reply, user_name_map) - if analysis_result.get("is_exist") and analysis_result.get("data"): - await update_nickname_counts(platform, group_id, analysis_result["data"]) - else: - logger.warning(f"(线程 ID: {thread_id}) 从队列接收到意外的项目类型或长度: {type(item)}, 内容: {item}") - - q.task_done() - - except queue.Empty: - continue - except asyncio.CancelledError: - logger.info(f"绰号处理循环已取消 (线程 ID: {thread_id})。") - break - except Exception as e: - logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") - await asyncio.sleep(5) - - logger.info(f"绰号处理循环已结束 (线程 ID: {thread_id})。") - - -# --- _run_processor_thread (保持不变) --- -def _run_processor_thread(q: queue.Queue, stop_event: threading.Event): - """线程启动函数,运行异步循环。""" - loop = None - thread_id = threading.get_ident() - logger.info(f"Nickname processor thread starting (Thread ID: {thread_id})...") - try: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - logger.info(f"(Thread ID: {thread_id}) Asyncio event loop created and set.") - loop.run_until_complete(_nickname_processing_loop(q, stop_event)) - except Exception as e: - logger.error(f"(Thread ID: {thread_id}) Error running nickname processor thread: {e}", exc_info=True) - finally: - if loop: - try: - if loop.is_running(): - logger.info(f"(Thread ID: {thread_id}) Stopping the asyncio loop...") - all_tasks = asyncio.all_tasks(loop) - if all_tasks: - logger.info(f"(Thread ID: {thread_id}) Cancelling {len(all_tasks)} running tasks...") - for task in all_tasks: - task.cancel() - loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) - logger.info(f"(Thread ID: {thread_id}) All tasks cancelled.") - loop.stop() - logger.info(f"(Thread ID: {thread_id}) Loop stopped.") - if not loop.is_closed(): - loop.close() - logger.info(f"(Thread ID: {thread_id}) Asyncio loop closed.") - except Exception as loop_close_err: - logger.error(f"(Thread ID: {thread_id}) Error closing loop: {loop_close_err}", exc_info=True) - logger.info(f"Nickname processor thread finished (Thread ID: {thread_id}).") - - -# --- start_nickname_processor (保持不变) --- -def start_nickname_processor(): - """启动绰号映射处理线程。""" - global _nickname_thread - if not global_config or not global_config.ENABLE_NICKNAME_MAPPING: - logger.info("绰号映射功能已禁用或无法获取配置。处理器未启动。") - return - - if _nickname_thread is None or not _nickname_thread.is_alive(): - logger.info("正在启动绰号处理器线程...") - stop_event = get_stop_event() - stop_event.clear() - _nickname_thread = threading.Thread( - target=_run_processor_thread, args=(nickname_queue, stop_event), daemon=True - ) - _nickname_thread.start() - logger.info(f"绰号处理器线程已启动 (Thread ID: {_nickname_thread.ident})") - else: - logger.warning("绰号处理器线程已在运行中。") - - -# --- stop_nickname_processor (保持不变) --- -def stop_nickname_processor(): - """停止绰号映射处理线程。""" - global _nickname_thread - if _nickname_thread and _nickname_thread.is_alive(): - logger.info("正在停止绰号处理器线程...") - set_stop_event() - try: - _nickname_thread.join(timeout=10) - if _nickname_thread.is_alive(): - logger.warning("绰号处理器线程在 10 秒后未结束。") - except Exception as e: - logger.error(f"停止绰号处理器线程时出错: {e}", exc_info=True) - finally: - if _nickname_thread and not _nickname_thread.is_alive(): - logger.info("绰号处理器线程已成功停止。") - else: - logger.warning("停止绰号处理器线程:线程可能仍在运行或未正确清理。") - _nickname_thread = None - else: - logger.info("绰号处理器线程未在运行或已被清理。") - - -# --- Event 控制函数 (保持不变) --- -def get_stop_event() -> threading.Event: - """获取全局停止事件""" - return _stop_event - - -def set_stop_event(): - """设置全局停止事件,通知子线程退出""" - _stop_event.set() diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index 83931417..f0402b12 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,26 +1,21 @@ import random -import time from typing import List, Dict, Tuple, Optional from src.common.logger_manager import get_logger from src.config.config import global_config -from src.plugins.person_info.relationship_manager import relationship_manager -from src.plugins.chat.chat_stream import ChatStream -from src.plugins.chat.message import MessageRecv -from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat -from .nickname_processor import add_to_nickname_queue +# 这个文件现在只包含纯粹的工具函数,与状态和流程无关 -# 获取日志记录器,命名为 "绰号工具" logger = get_logger("nickname_utils") def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int]]]) -> List[Tuple[str, str, int]]: """ - 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号。 + 从给定的绰号信息中,根据映射次数加权随机选择最多 N 个绰号用于 Prompt。 Args: all_nicknames_info: 包含用户及其绰号信息的字典,格式为 { "用户名1": [{"绰号A": 次数}, {"绰号B": 次数}], ... } + 注意:这里的用户名是 person_name。 Returns: List[Tuple[str, str, int]]: 选中的绰号列表,每个元素为 (用户名, 绰号, 次数)。 @@ -29,17 +24,21 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int if not all_nicknames_info: return [] - candidates = [] + candidates = [] # 存储 (用户名, 绰号, 次数, 权重) + smoothing_factor = getattr(global_config, "NICKNAME_PROBABILITY_SMOOTHING", 1.0) # 平滑因子,避免权重为0 + for user_name, nicknames in all_nicknames_info.items(): - if nicknames: + if nicknames and isinstance(nicknames, list): for nickname_entry in nicknames: + # 确保条目是字典且只有一个键值对 if isinstance(nickname_entry, dict) and len(nickname_entry) == 1: nickname, count = list(nickname_entry.items())[0] - if isinstance(count, int) and count > 0: - weight = count + global_config.NICKNAME_PROBABILITY_SMOOTHING + # 确保次数是正整数 + if isinstance(count, int) and count > 0 and isinstance(nickname, str) and nickname: + weight = count + smoothing_factor # 计算权重 candidates.append((user_name, nickname, count, weight)) else: - logger.warning(f"用户 '{user_name}' 的绰号 '{nickname}' 次数无效: {count}。已跳过。") + logger.warning(f"用户 '{user_name}' 的绰号条目无效: {nickname_entry} (次数非正整数或绰号为空)。已跳过。") else: logger.warning(f"用户 '{user_name}' 的绰号条目格式无效: {nickname_entry}。已跳过。") @@ -47,23 +46,24 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int return [] # 确定需要选择的数量 - num_to_select = min(global_config.MAX_NICKNAMES_IN_PROMPT, len(candidates)) + max_nicknames = getattr(global_config, "MAX_NICKNAMES_IN_PROMPT", 5) + num_to_select = min(max_nicknames, len(candidates)) try: - # 调用新的辅助函数进行不重复加权抽样 + # 调用加权随机抽样(不重复) selected_candidates_with_weight = weighted_sample_without_replacement(candidates, num_to_select) # 如果抽样结果数量不足(例如权重问题导致提前退出),可以考虑是否需要补充 if len(selected_candidates_with_weight) < num_to_select: logger.debug( - f"加权随机选择后数量不足 ({len(selected_candidates_with_weight)}/{num_to_select}),补充选择次数最多的。" + f"加权随机选择后数量不足 ({len(selected_candidates_with_weight)}/{num_to_select}),尝试补充选择次数最多的。" ) # 筛选出未被选中的候选 selected_ids = set( (c[0], c[1]) for c in selected_candidates_with_weight - ) # 使用 (用户名, 绰号) 作为唯一标识 + ) # 使用 (用户名, 绰号) 作为唯一标识 remaining_candidates = [c for c in candidates if (c[0], c[1]) not in selected_ids] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 needed = num_to_select - len(selected_candidates_with_weight) selected_candidates_with_weight.extend(remaining_candidates[:needed]) @@ -71,14 +71,14 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 日志:记录加权随机选择时发生的错误,并回退到简单选择 logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) # 出错时回退到选择次数最多的 N 个 - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 - # 注意:这里需要选择包含权重的元组,或者调整后续处理 + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 selected_candidates_with_weight = candidates[:num_to_select] # 格式化输出结果为 (用户名, 绰号, 次数),移除权重 result = [(user, nick, count) for user, nick, count, _weight in selected_candidates_with_weight] - result.sort(key=lambda x: x[2], reverse=True) # 按次数降序 + # 按次数降序排序最终结果 + result.sort(key=lambda x: x[2], reverse=True) logger.debug(f"为 Prompt 选择的绰号: {result}") return result @@ -95,13 +95,13 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in str: 格式化后的字符串,如果列表为空则返回空字符串。 """ if not selected_nicknames: - # 如果没有选中的绰号,返回空字符串 return "" + # Prompt 注入部分的标题 prompt_lines = [ - "以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),如果有需要提及对方,用你认为合适的方式提及:" - ] # 注入部分的标题 - grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 + "以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),供你参考:" + ] + grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 # 按用户分组绰号 for user_name, nickname, _count in selected_nicknames: @@ -112,8 +112,9 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 构建每个用户的绰号字符串 for user_name, nicknames in grouped_by_user.items(): - nicknames_str = "、".join(nicknames) # 使用中文顿号连接 - prompt_lines.append(f"- {user_name},ta被群友称为:{nicknames_str}") # 格式化输出 + nicknames_str = "、".join(nicknames) # 使用中文顿号连接 + # 格式化输出,例如: "- 张三,ta 可能被称为:“三儿”、“张哥”" + prompt_lines.append(f"- {user_name},ta 可能被称为:{nicknames_str}") # 如果只有标题行,返回空字符串,避免注入无意义的标题 if len(prompt_lines) > 1: @@ -123,213 +124,51 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in return "" -async def get_nickname_injection_for_prompt(chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: - """ - 获取并格式化用于 Prompt 注入的绰号信息字符串。 - 这是一个封装函数,整合了获取、选择和格式化的逻辑。 - - Args: - chat_stream: 当前的 ChatStream 对象。 - message_list_before_now: 用于确定上下文中用户的消息列表。 - - Returns: - str: 格式化后的绰号信息字符串,如果无法获取或格式化则返回空字符串。 - """ - nickname_injection_str = "" - # 仅在群聊且功能开启时执行 - if global_config.ENABLE_NICKNAME_MAPPING and chat_stream and chat_stream.group_info: - try: - group_id = str(chat_stream.group_info.group_id) - user_ids_in_context = set() # 存储上下文中出现的用户ID - - # 从消息列表中提取用户ID - if message_list_before_now: - for msg in message_list_before_now: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_context.add(str(sender_id)) - else: - # 如果消息列表为空,尝试获取最近发言者作为上下文用户 - recent_speakers = chat_stream.get_recent_speakers(limit=5) # 获取最近5个发言者 - for speaker in recent_speakers: - user_ids_in_context.add(str(speaker["user_id"])) - if not user_ids_in_context: - # 日志:记录未找到上下文用户 - logger.warning(f"[{chat_stream.stream_id}] 未找到消息或最近发言者用于绰号注入。") - - # 如果找到了上下文用户 - if user_ids_in_context: - platform = chat_stream.platform - # --- 调用批量获取群组绰号的方法 --- - # 使用 relationship_manager 从数据库获取数据 - all_nicknames_data = await relationship_manager.get_users_group_nicknames( - platform, list(user_ids_in_context), group_id - ) - - # 如果获取到了绰号数据 - if all_nicknames_data: - # 调用选择和格式化函数 - selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) - nickname_injection_str = format_nickname_prompt_injection(selected_nicknames) - if nickname_injection_str: - # 日志:记录生成的用于 Prompt 的绰号信息 - logger.debug( - f"[{chat_stream.stream_id}] 已生成用于 Prompt 的绰号信息:\n{nickname_injection_str}" - ) - - except Exception as e: - # 日志:记录获取或格式化绰号信息时发生的错误 - logger.error(f"[{chat_stream.stream_id}] 获取或格式化 Prompt 绰号信息时出错: {e}", exc_info=True) - nickname_injection_str = "" # 出错时确保返回空字符串 - - # 返回最终生成的字符串(可能为空) - return nickname_injection_str - - -async def trigger_nickname_analysis_if_needed( - anchor_message: MessageRecv, - bot_reply: List[str], - chat_stream: Optional[ChatStream] = None, # 允许传入 chat_stream 或从 anchor_message 获取 -): - """ - 如果满足条件(群聊、功能开启),则准备数据并触发绰号分析任务。 - 将相关信息放入处理队列,由 nickname_processor 处理。 - - Args: - anchor_message: 触发回复的原始消息对象。 - bot_reply: Bot 生成的回复内容列表。 - chat_stream: 可选的 ChatStream 对象。 - """ - # 检查功能是否开启 - if not global_config.ENABLE_NICKNAME_MAPPING: - return # 如果功能禁用,直接返回 - - # 确定使用的 chat_stream - current_chat_stream = chat_stream or anchor_message.chat_stream - - # 检查是否是群聊且 chat_stream 有效 - if not current_chat_stream or not current_chat_stream.group_info: - # 日志:记录跳过分析的原因(非群聊或无效流) - logger.debug( - f"[{current_chat_stream.stream_id if current_chat_stream else '未知流'}] 跳过绰号分析:非群聊或无效聊天流。" - ) - return - - log_prefix = f"[{current_chat_stream.stream_id}]" # 用于日志的前缀 - - try: - # 1. 获取历史记录 - history_limit = 30 # 定义获取历史记录的数量限制 - history_messages = get_raw_msg_before_timestamp_with_chat( - chat_id=current_chat_stream.stream_id, - timestamp=time.time(), # 获取当前时间之前的记录 - limit=history_limit, - ) - - # 格式化历史记录为可读字符串 - chat_history_str = await build_readable_messages( - messages=history_messages, - replace_bot_name=True, # 替换机器人名称,以便 LLM 分析 - merge_messages=False, # 不合并消息,保留原始对话结构 - timestamp_mode="relative", # 使用相对时间戳 - read_mark=0.0, # 不需要已读标记 - truncate=False, # 获取完整内容进行分析 - ) - - # 2. 获取 Bot 回复字符串 - bot_reply_str = " ".join(bot_reply) if bot_reply else "" # 处理空回复列表 - - # 3. 获取群号和平台信息 - group_id = str(current_chat_stream.group_info.group_id) - platform = current_chat_stream.platform - - # 4. 构建用户 ID 到名称的映射 (user_name_map) - user_ids_in_history = set() # 存储历史记录中出现的用户ID - for msg in history_messages: - sender_id = msg["user_info"].get("user_id") - if sender_id: - user_ids_in_history.add(str(sender_id)) - - user_name_map = {} # 初始化映射字典 - if user_ids_in_history: - try: - # 批量从数据库获取这些用户的 person_name - names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) - except Exception as e: - # 日志:记录获取 person_name 时发生的错误 - logger.error(f"{log_prefix} 批量获取 person_name 时出错: {e}", exc_info=True) - names_data = {} # 出错时使用空字典 - - # 填充 user_name_map - for user_id in user_ids_in_history: - if user_id in names_data: - # 如果数据库中有 person_name,则使用它 - user_name_map[user_id] = names_data[user_id] - else: - # 如果数据库中没有,则回退查找用户在历史记录中最近使用的 nickname - latest_nickname = next( - ( - m["user_info"].get("user_nickname") # 从 user_info 获取 nickname - for m in reversed(history_messages) # 从后往前找 - # 确保消息的用户ID匹配且 nickname 存在 - if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") - ), - None, # 如果找不到,返回 None - ) - # 如果找到了 nickname 则使用,否则使用 "未知(ID)" - user_name_map[user_id] = latest_nickname or f"未知({user_id})" - - # 5. 将准备好的数据添加到绰号处理队列 - await add_to_nickname_queue(chat_history_str, bot_reply_str, platform, group_id, user_name_map) - # 日志:记录已成功触发分析任务 - logger.debug(f"{log_prefix} 已为群组 {group_id} 触发绰号分析任务。") - - except Exception as e: - # 日志:记录触发分析过程中发生的任何其他错误 - logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) - - def weighted_sample_without_replacement( candidates: List[Tuple[str, str, int, float]], k: int ) -> List[Tuple[str, str, int, float]]: """ - 执行不重复的加权随机抽样。 + 执行不重复的加权随机抽样。使用 A-ExpJ 算法思想的简化实现。 Args: candidates: 候选列表,每个元素为 (用户名, 绰号, 次数, 权重)。 k: 需要选择的数量。 Returns: - List[Tuple[str, str, int, float]]: 选中的元素列表。 + List[Tuple[str, str, int, float]]: 选中的元素列表(包含权重)。 """ if k <= 0: return [] - if k >= len(candidates): - # 如果需要选择的数量大于或等于候选数量,直接返回所有候选 - return candidates[:] # 返回副本以避免修改原始列表 + n = len(candidates) + if k >= n: + return candidates[:] # 返回副本 - pool = candidates[:] # 创建候选列表的副本进行操作 - selected = [] - # 注意:原评论代码中计算 total_weight 但未使用,这里也省略。 - # random.choices 内部会处理权重的归一化。 - - for _ in range(min(k, len(pool))): # 确保迭代次数不超过池中剩余元素 - if not pool: # 如果池已空,提前结束 - break - - weights = [c[3] for c in pool] # 获取当前池中所有元素的权重 - # 检查权重是否有效 - if sum(weights) <= 0: - # 如果所有剩余权重无效,随机选择一个(或根据需要采取其他策略) - logger.warning("加权抽样池中剩余权重总和为0或负数,随机选择一个。") - chosen_index = random.randrange(len(pool)) - chosen = pool.pop(chosen_index) + # 计算每个元素的 key = U^(1/weight),其中 U 是 (0, 1) 之间的随机数 + # 为了数值稳定性,计算 log(key) = log(U) / weight + # log(U) 可以用 -Exponential(1) 来生成 + weighted_keys = [] + for i in range(n): + weight = candidates[i][3] + if weight <= 0: + # 处理权重为0或负数的情况,赋予一个极小的概率(或极大负数的log_key) + log_key = float('-inf') # 或者一个非常大的负数 + logger.warning(f"候选者 {candidates[i][:2]} 的权重为非正数 ({weight}),抽中概率极低。") else: - # 使用 random.choices 进行加权抽样,选择 1 个 - # random.choices 返回一个列表,所以取第一个元素 [0] - chosen = random.choices(pool, weights=weights, k=1)[0] - pool.remove(chosen) # 从池中移除选中的元素,实现不重复抽样 + log_u = -random.expovariate(1.0) # 生成 -Exponential(1) 随机数 + log_key = log_u / weight + weighted_keys.append((log_key, i)) # 存储 (log_key, 原始索引) - selected.append(chosen) + # 按 log_key 降序排序 (相当于按 key 升序排序) + weighted_keys.sort(key=lambda x: x[0], reverse=True) - return selected + # 选择 log_key 最大的 k 个元素的原始索引 + selected_indices = [index for _log_key, index in weighted_keys[:k]] + + # 根据选中的索引从原始 candidates 列表中获取元素 + selected_items = [candidates[i] for i in selected_indices] + + return selected_items + +# 移除旧的流程函数 +# get_nickname_injection_for_prompt 和 trigger_nickname_analysis_if_needed +# 的逻辑现在由 NicknameManager 处理 diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 0e690cc2..12b720bf 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -28,9 +28,8 @@ from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.moods.moods import MoodManager from src.heart_flow.utils_chat import get_chat_type_and_target_info from rich.traceback import install -from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_if_needed from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat -from src.plugins.group_nickname.nickname_utils import get_nickname_injection_for_prompt +from src.plugins.group_nickname.nickname_manager import nickname_manager install(extra_lines=3) @@ -605,7 +604,7 @@ class HeartFChatting: ) # 调用工具函数触发绰号分析 - await trigger_nickname_analysis_if_needed(anchor_message, reply, self.chat_stream) + await nickname_manager.trigger_nickname_analysis(anchor_message, reply, self.chat_stream) return True, thinking_id @@ -874,7 +873,7 @@ class HeartFChatting: limit=global_config.observation_context_size, # 使用与 prompt 构建一致的 limit ) # 调用工具函数获取格式化后的绰号字符串 - nickname_injection_str = await get_nickname_injection_for_prompt(self.chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(self.chat_stream, message_list_before_now) # --- 构建提示词 (调用修改后的 PromptBuilder 方法) --- prompt = await prompt_builder.build_planner_prompt( diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index dc9e43ee..b979268c 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -14,7 +14,7 @@ from ..moods.moods import MoodManager from ..memory_system.Hippocampus import HippocampusManager from ..schedule.schedule_generator import bot_schedule from ..knowledge.knowledge_lib import qa_manager -from src.plugins.group_nickname.nickname_utils import get_nickname_injection_for_prompt +from src.plugins.group_nickname.nickname_manager import nickname_manager import traceback from .heartFC_Cycleinfo import CycleInfo @@ -255,7 +255,7 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s chat_target_2 = await global_prompt_manager.get_prompt_async("chat_target_group2") # 调用新的工具函数获取绰号信息 - nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(chat_stream, message_list_before_now) prompt = await global_prompt_manager.format_prompt( template_name, @@ -451,7 +451,7 @@ class PromptBuilder: chat_target_2 = await global_prompt_manager.get_prompt_async("chat_target_group2") # 调用新的工具函数获取绰号信息 - nickname_injection_str = await get_nickname_injection_for_prompt(chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(chat_stream, message_list_before_now) prompt = await global_prompt_manager.format_prompt( template_name, diff --git a/src/plugins/heartFC_chat/normal_chat.py b/src/plugins/heartFC_chat/normal_chat.py index ca266413..6521ae13 100644 --- a/src/plugins/heartFC_chat/normal_chat.py +++ b/src/plugins/heartFC_chat/normal_chat.py @@ -20,7 +20,7 @@ from src.plugins.person_info.relationship_manager import relationship_manager from src.plugins.respon_info_catcher.info_catcher import info_catcher_manager from src.plugins.utils.timer_calculator import Timer from src.heart_flow.utils_chat import get_chat_type_and_target_info -from src.plugins.group_nickname.nickname_utils import trigger_nickname_analysis_if_needed +from src.plugins.group_nickname.nickname_manager import nickname_manager logger = get_logger("chat") @@ -317,7 +317,7 @@ class NormalChat: # 检查 first_bot_msg 是否为 None (例如思考消息已被移除的情况) if first_bot_msg: info_catcher.catch_after_response(timing_results["消息发送"], response_set, first_bot_msg) - await trigger_nickname_analysis_if_needed(message, response_set, self.chat_stream) + await nickname_manager.trigger_nickname_analysis(message, response_set, self.chat_stream) else: logger.warning(f"[{self.stream_name}] 思考消息 {thinking_id} 在发送前丢失,无法记录 info_catcher") diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 81cead61..6723f2de 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -128,7 +128,7 @@ enable_nickname_mapping = false # 绰号映射功能总开关(默认关闭, max_nicknames_in_prompt = 10 # Prompt 中最多注入的绰号数量(防止token数量爆炸) nickname_probability_smoothing = 1 # 绰号加权随机选择的平滑因子 nickname_queue_max_size = 100 # 绰号处理队列最大容量 -nickname_process_sleep_interval = 5 # 绰号处理进程休眠间隔(秒) +nickname_process_sleep_interval = 60 # 绰号处理进程休眠间隔(秒) [memory] build_memory_interval = 2000 # 记忆构建间隔 单位秒 间隔越低,麦麦学习越多,但是冗余信息也会增多 diff --git a/template/lpmm_config_template.toml b/template/lpmm_config_template.toml index aae664d5..491e1feb 100644 --- a/template/lpmm_config_template.toml +++ b/template/lpmm_config_template.toml @@ -54,7 +54,7 @@ res_top_k = 3 # 最终提供的文段TopK [persistence] # 持久化配置(存储中间数据,防止重复计算) data_root_path = "data" # 数据根目录 -raw_data_path = "data/imported_lpmm_data" # 原始数据路径 -openie_data_path = "data/openie" # OpenIE数据路径 +raw_data_path = "data/import.json" # 原始数据路径 +openie_data_path = "data/openie.json" # OpenIE数据路径 embedding_data_dir = "data/embedding" # 嵌入数据目录 rag_data_dir = "data/rag" # RAG数据目录 From 0c2e94c76c6a0fed4c74a13e7abb9573cd81cce0 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 14:04:33 +0800 Subject: [PATCH 54/58] ruff --- src/plugins/group_nickname/nickname_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index f0402b12..fefe3c0c 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -1,5 +1,5 @@ import random -from typing import List, Dict, Tuple, Optional +from typing import List, Dict, Tuple from src.common.logger_manager import get_logger from src.config.config import global_config From bde5c33fc22cb7ff351017b99e2474486c778acd Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 3 May 2025 06:04:52 +0000 Subject: [PATCH 55/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bot.py | 6 +- src/plugins/group_nickname/nickname_db.py | 22 ++-- .../group_nickname/nickname_manager.py | 105 ++++++++++-------- src/plugins/group_nickname/nickname_mapper.py | 4 +- src/plugins/group_nickname/nickname_utils.py | 33 +++--- src/plugins/heartFC_chat/heartFC_chat.py | 4 +- .../heartFC_chat/heartflow_prompt_builder.py | 8 +- 7 files changed, 104 insertions(+), 78 deletions(-) diff --git a/bot.py b/bot.py index c1b3a253..4197556e 100644 --- a/bot.py +++ b/bot.py @@ -225,15 +225,15 @@ def raw_main(): # 确保 NicknameManager 单例实例存在并已初始化 # (单例模式下,导入时或第一次调用时会自动初始化) - _ = nickname_manager # 显式引用一次 + _ = nickname_manager # 显式引用一次 # 启动 NicknameManager 的后台处理器线程 logger.info("准备启动绰号处理管理器...") - nickname_manager.start_processor() # 调用实例的方法 + nickname_manager.start_processor() # 调用实例的方法 logger.info("已调用启动绰号处理管理器。") # 注册 NicknameManager 的停止方法到 atexit,确保程序退出时线程能被清理 - atexit.register(nickname_manager.stop_processor) # 注册实例的方法 + atexit.register(nickname_manager.stop_processor) # 注册实例的方法 logger.info("已注册绰号处理管理器的退出处理程序。") # 返回MainSystem实例 diff --git a/src/plugins/group_nickname/nickname_db.py b/src/plugins/group_nickname/nickname_db.py index d0c6d975..ac3bd24c 100644 --- a/src/plugins/group_nickname/nickname_db.py +++ b/src/plugins/group_nickname/nickname_db.py @@ -5,11 +5,13 @@ from typing import Optional logger = get_logger("nickname_db") + class NicknameDB: """ 处理与群组绰号相关的数据库操作 (MongoDB)。 封装了对 'person_info' 集合的读写操作。 """ + def __init__(self, person_info_collection: Optional[Collection]): """ 初始化 NicknameDB 处理器。 @@ -71,10 +73,10 @@ class NicknameDB: logger.error( f"数据库操作失败 (DuplicateKeyError): person_id {person_id}. 错误: {dk_err}. 这不应该发生,请检查 person_id 生成逻辑和数据库状态。" ) - raise # 将异常向上抛出 + raise # 将异常向上抛出 except Exception as e: logger.exception(f"对 person_id {person_id} 执行 Upsert 时失败: {e}") - raise # 将异常向上抛出 + raise # 将异常向上抛出 def update_group_nickname_count(self, person_id: str, group_id_str: str, nickname: str): """ @@ -105,20 +107,20 @@ class NicknameDB: ) if result_inc.modified_count > 0: # logger.debug(f"成功增加 person_id {person_id} 在群组 {group_id_str} 中绰号 '{nickname}' 的计数。") - return # 成功增加计数,操作完成 + return # 成功增加计数,操作完成 # 3b. 如果上一步未修改 (绰号不存在于该群组),尝试将新绰号添加到现有群组 result_push_nick = self.person_info_collection.update_one( { "person_id": person_id, - "group_nicknames.group_id": group_id_str, # 检查群组是否存在 + "group_nicknames.group_id": group_id_str, # 检查群组是否存在 }, {"$push": {"group_nicknames.$[group].nicknames": {"name": nickname, "count": 1}}}, array_filters=[{"group.group_id": group_id_str}], ) if result_push_nick.modified_count > 0: logger.debug(f"成功为 person_id {person_id} 在现有群组 {group_id_str} 中添加新绰号 '{nickname}'。") - return # 成功添加绰号,操作完成 + return # 成功添加绰号,操作完成 # 3c. 如果上一步也未修改 (群组条目本身不存在),则添加新的群组条目和绰号 # 确保 group_nicknames 数组存在 (作为保险措施) @@ -130,7 +132,7 @@ class NicknameDB: result_push_group = self.person_info_collection.update_one( { "person_id": person_id, - "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 + "group_nicknames.group_id": {"$ne": group_id_str}, # 确保该群组 ID 尚未存在 }, { "$push": { @@ -144,7 +146,7 @@ class NicknameDB: if result_push_group.modified_count > 0: logger.debug(f"为 person_id {person_id} 添加了新的群组 {group_id_str} 和绰号 '{nickname}'。") # else: - # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") + # logger.warning(f"尝试为 person_id {person_id} 添加新群组 {group_id_str} 失败,可能群组已存在但结构不符合预期。") except (OperationFailure, DuplicateKeyError) as db_err: logger.exception( @@ -152,5 +154,7 @@ class NicknameDB: ) # 根据需要决定是否向上抛出 raise db_err except Exception as e: - logger.exception(f"更新群组绰号计数时发生意外错误: person_id {person_id}, group {group_id_str}, nick {nickname}. Error: {e}") - # 根据需要决定是否向上抛出 raise e \ No newline at end of file + logger.exception( + f"更新群组绰号计数时发生意外错误: person_id {person_id}, group {group_id_str}, nick {nickname}. Error: {e}" + ) + # 根据需要决定是否向上抛出 raise e diff --git a/src/plugins/group_nickname/nickname_manager.py b/src/plugins/group_nickname/nickname_manager.py index 3d47ad1d..a5a3872f 100644 --- a/src/plugins/group_nickname/nickname_manager.py +++ b/src/plugins/group_nickname/nickname_manager.py @@ -18,6 +18,7 @@ from .nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_ # 依赖于 person_info_manager 来生成 person_id from ..person_info.person_info import person_info_manager + # 依赖于 relationship_manager 来获取用户名称和现有绰号 from ..person_info.relationship_manager import relationship_manager @@ -28,11 +29,13 @@ from src.plugins.utils.chat_message_builder import build_readable_messages, get_ logger = get_logger("NicknameManager") + class NicknameManager: """ 管理群组绰号分析、处理、存储和使用的单例类。 封装了 LLM 调用、后台处理线程和数据库交互。 """ + _instance = None _lock = threading.Lock() @@ -44,7 +47,7 @@ class NicknameManager: if not cls._instance: logger.info("正在创建 NicknameManager 单例实例...") cls._instance = super(NicknameManager, cls).__new__(cls) - cls._instance._initialized = False # 添加初始化标志 + cls._instance._initialized = False # 添加初始化标志 return cls._instance def __init__(self): @@ -52,7 +55,7 @@ class NicknameManager: 初始化 NicknameManager。 使用锁和标志确保实际初始化只执行一次。 """ - if self._initialized: # 如果已初始化,直接返回 + if self._initialized: # 如果已初始化,直接返回 return with self._lock: @@ -65,11 +68,11 @@ class NicknameManager: self.is_enabled = self.config.ENABLE_NICKNAME_MAPPING # 数据库处理器 - person_info_collection = getattr(db, 'person_info', None) + person_info_collection = getattr(db, "person_info", None) self.db_handler = NicknameDB(person_info_collection) if not self.db_handler.is_available(): logger.error("数据库处理器初始化失败,NicknameManager 功能受限。") - self.is_enabled = False # 如果数据库不可用,禁用功能 + self.is_enabled = False # 如果数据库不可用,禁用功能 # LLM 映射器 self.llm_mapper: Optional[LLMRequest] = None @@ -79,8 +82,8 @@ class NicknameManager: if model_config and model_config.get("name"): self.llm_mapper = LLMRequest( model=model_config, - temperature=model_config.get("temp", 0.5), # 使用 get 获取并提供默认值 - max_tokens=model_config.get("max_tokens", 256), # 使用 get 获取并提供默认值 + temperature=model_config.get("temp", 0.5), # 使用 get 获取并提供默认值 + max_tokens=model_config.get("max_tokens", 256), # 使用 get 获取并提供默认值 request_type="nickname_mapping", ) logger.info("绰号映射 LLM 映射器初始化成功。") @@ -103,7 +106,7 @@ class NicknameManager: self._nickname_thread: Optional[threading.Thread] = None self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) - self._initialized = True # 标记为已初始化 + self._initialized = True # 标记为已初始化 logger.info("NicknameManager 初始化完成。") # 公共方法 @@ -115,10 +118,10 @@ class NicknameManager: return if self._nickname_thread is None or not self._nickname_thread.is_alive(): logger.info("正在启动绰号处理器线程...") - self._stop_event.clear() # 清除停止事件标志 + self._stop_event.clear() # 清除停止事件标志 self._nickname_thread = threading.Thread( - target=self._run_processor_in_thread, # 线程执行的入口函数 - daemon=True # 设置为守护线程,主程序退出时自动结束 + target=self._run_processor_in_thread, # 线程执行的入口函数 + daemon=True, # 设置为守护线程,主程序退出时自动结束 ) self._nickname_thread.start() logger.info(f"绰号处理器线程已启动 (ID: {self._nickname_thread.ident})") @@ -129,7 +132,7 @@ class NicknameManager: """停止后台处理线程。""" if self._nickname_thread and self._nickname_thread.is_alive(): logger.info("正在停止绰号处理器线程...") - self._stop_event.set() # 设置停止事件标志 + self._stop_event.set() # 设置停止事件标志 try: # 可选:尝试清空队列,避免丢失未处理的任务 # while not self.nickname_queue.empty(): @@ -140,7 +143,7 @@ class NicknameManager: # break # logger.info("绰号处理队列已清空。") - self._nickname_thread.join(timeout=10) # 等待线程结束,设置超时 + self._nickname_thread.join(timeout=10) # 等待线程结束,设置超时 if self._nickname_thread.is_alive(): logger.warning("绰号处理器线程在超时后仍未停止。") except Exception as e: @@ -148,7 +151,7 @@ class NicknameManager: finally: if self._nickname_thread and not self._nickname_thread.is_alive(): logger.info("绰号处理器线程已成功停止。") - self._nickname_thread = None # 清理线程对象引用 + self._nickname_thread = None # 清理线程对象引用 else: logger.info("绰号处理器线程未在运行或已被清理。") @@ -163,7 +166,7 @@ class NicknameManager: 取代了旧的 trigger_nickname_analysis_if_needed 函数。 """ if not self.is_enabled: - return # 功能禁用则直接返回 + return # 功能禁用则直接返回 current_chat_stream = chat_stream or anchor_message.chat_stream if not current_chat_stream or not current_chat_stream.group_info: @@ -183,8 +186,11 @@ class NicknameManager: # 格式化历史记录 chat_history_str = await build_readable_messages( messages=history_messages, - replace_bot_name=True, merge_messages=False, timestamp_mode="relative", - read_mark=0.0, truncate=False, + replace_bot_name=True, + merge_messages=False, + timestamp_mode="relative", + read_mark=0.0, + truncate=False, ) # 2. 获取 Bot 回复 @@ -195,7 +201,9 @@ class NicknameManager: platform = current_chat_stream.platform # 4. 构建用户 ID 到名称的映射 (user_name_map) - user_ids_in_history = {str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id")} + user_ids_in_history = { + str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id") + } user_name_map = {} if user_ids_in_history: try: @@ -212,28 +220,29 @@ class NicknameManager: else: # 回退查找历史记录中的 nickname latest_nickname = next( - (m["user_info"].get("user_nickname") - for m in reversed(history_messages) - if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname")), + ( + m["user_info"].get("user_nickname") + for m in reversed(history_messages) + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") + ), None, ) user_name_map[user_id] = latest_nickname or f"未知({user_id})" # 5. 添加到内部处理队列 item = (chat_history_str, bot_reply_str, platform, group_id, user_name_map) - self._add_to_queue(item, platform, group_id) # 调用私有方法入队 + self._add_to_queue(item, platform, group_id) # 调用私有方法入队 except Exception as e: logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) - async def get_nickname_prompt_injection(self, chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: """ 获取并格式化用于 Prompt 注入的绰号信息字符串。 取代了旧的 get_nickname_injection_for_prompt 函数。 """ if not self.is_enabled or not chat_stream or not chat_stream.group_info: - return "" # 功能禁用或非群聊则返回空 + return "" # 功能禁用或非群聊则返回空 log_prefix = f"[{chat_stream.stream_id}]" try: @@ -241,7 +250,11 @@ class NicknameManager: platform = chat_stream.platform # 确定上下文中的用户 ID - user_ids_in_context = {str(msg["user_info"]["user_id"]) for msg in message_list_before_now if msg.get("user_info", {}).get("user_id")} + user_ids_in_context = { + str(msg["user_info"]["user_id"]) + for msg in message_list_before_now + if msg.get("user_info", {}).get("user_id") + } # 如果消息列表为空,尝试获取最近发言者 if not user_ids_in_context: @@ -265,12 +278,11 @@ class NicknameManager: logger.debug(f"{log_prefix} 生成的绰号 Prompt 注入:\n{injection_str}") return injection_str else: - return "" # 没有获取到绰号数据 + return "" # 没有获取到绰号数据 except Exception as e: logger.error(f"{log_prefix} 获取绰号注入时出错: {e}", exc_info=True) - return "" # 出错时返回空 - + return "" # 出错时返回空 # 私有/内部方法 @@ -278,13 +290,16 @@ class NicknameManager: """将项目添加到内部处理队列。""" try: self.nickname_queue.put_nowait(item) - logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}") + logger.debug( + f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}" + ) except queue.Full: - logger.warning(f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。") + logger.warning( + f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。" + ) except Exception as e: logger.error(f"将项目添加到绰号队列时出错: {e}", exc_info=True) - async def _analyze_and_update_nicknames(self, item: tuple): """处理单个队列项目:调用 LLM 分析并更新数据库。""" if not isinstance(item, tuple) or len(item) != 5: @@ -326,7 +341,9 @@ class NicknameManager: # 步骤 1: 生成 person_id person_id = person_info_manager.get_person_id(platform, user_id_str) if not person_id: - logger.error(f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") + logger.error( + f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。" + ) continue # 步骤 2: 确保 Person 文档存在 (调用 DB Handler) @@ -335,7 +352,7 @@ class NicknameManager: # 步骤 3: 更新群组绰号 (调用 DB Handler) self.db_handler.update_group_nickname_count(person_id, group_id, nickname) - except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 + except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 logger.exception( f"{log_prefix} 数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 绰号 {nickname}. 错误: {db_err}" ) @@ -344,7 +361,6 @@ class NicknameManager: else: logger.debug(f"{log_prefix} LLM 未找到可靠的绰号映射或分析失败。") - async def _call_llm_for_analysis( self, chat_history_str: str, @@ -355,12 +371,12 @@ class NicknameManager: 内部方法:调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 (逻辑从 analyze_chat_for_nicknames 移入) """ - if not self.llm_mapper: # 再次检查 LLM 映射器 + if not self.llm_mapper: # 再次检查 LLM 映射器 logger.error("LLM 映射器未初始化,无法执行分析。") return {"is_exist": False} prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) - logger.debug(f"构建的绰号映射 Prompt:\n{prompt[:500]}...") # 截断日志输出 + logger.debug(f"构建的绰号映射 Prompt:\n{prompt[:500]}...") # 截断日志输出 try: # 调用 LLM @@ -379,17 +395,16 @@ class NicknameManager: response_content = match.group(1).strip() # 尝试直接解析 JSON,即使没有代码块标记 elif response_content.startswith("{") and response_content.endswith("}"): - pass # 可能是纯 JSON + pass # 可能是纯 JSON else: # 尝试在文本中查找 JSON 对象 - json_match = re.search(r'\{.*\}', response_content, re.DOTALL) + json_match = re.search(r"\{.*\}", response_content, re.DOTALL) if json_match: response_content = json_match.group(0) else: logger.warning(f"LLM 响应似乎不包含有效的 JSON 对象。响应: {response_content}") return {"is_exist": False} - # 解析 JSON result = json.loads(response_content) @@ -404,7 +419,7 @@ class NicknameManager: original_data = result.get("data") if isinstance(original_data, dict) and original_data: logger.info(f"LLM 找到的原始绰号映射: {original_data}") - filtered_data = self._filter_llm_results(original_data, user_name_map) # 调用过滤函数 + filtered_data = self._filter_llm_results(original_data, user_name_map) # 调用过滤函数 if not filtered_data: logger.info("所有找到的绰号映射都被过滤掉了。") return {"is_exist": False} @@ -418,7 +433,7 @@ class NicknameManager: elif is_exist is False: logger.info("LLM 明确指示未找到可靠的绰号映射 (is_exist=False)。") return {"is_exist": False} - else: # is_exist 不是 True 或 False (包括 None) + else: # is_exist 不是 True 或 False (包括 None) logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 无效。") return {"is_exist": False} @@ -432,7 +447,7 @@ class NicknameManager: def _filter_llm_results(self, original_data: Dict[str, str], user_name_map: Dict[str, str]) -> Dict[str, str]: """过滤 LLM 返回的绰号映射结果。""" filtered_data = {} - bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, 'BOT_QQ') else None + bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, "BOT_QQ") else None for user_id, nickname in original_data.items(): # 过滤条件 1: user_id 必须是字符串 @@ -455,11 +470,10 @@ class NicknameManager: # continue # 如果通过所有过滤条件,则保留 - filtered_data[user_id] = nickname.strip() # 保留时去除首尾空白 + filtered_data[user_id] = nickname.strip() # 保留时去除首尾空白 return filtered_data - # 线程相关 def _run_processor_in_thread(self): """后台线程的入口函数,负责创建和运行 asyncio 事件循环。""" @@ -497,7 +511,6 @@ class NicknameManager: logger.error(f"(线程 ID: {thread_id}) 关闭循环时出错: {loop_close_err}", exc_info=True) logger.info(f"绰号处理器线程结束 (线程 ID: {thread_id}).") - async def _processing_loop(self): """后台线程中运行的异步处理循环。""" thread_id = threading.get_ident() @@ -511,14 +524,14 @@ class NicknameManager: # 处理获取到的项目 await self._analyze_and_update_nicknames(item) - self.nickname_queue.task_done() # 标记任务完成 + self.nickname_queue.task_done() # 标记任务完成 except queue.Empty: # 超时,队列为空,继续循环检查停止事件 continue except asyncio.CancelledError: logger.info(f"绰号处理循环被取消 (线程 ID: {thread_id})。") - break # 任务被取消,退出循环 + break # 任务被取消,退出循环 except Exception as e: # 捕获处理单个项目时可能发生的其他异常 logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") diff --git a/src/plugins/group_nickname/nickname_mapper.py b/src/plugins/group_nickname/nickname_mapper.py index aa86ef14..35f96445 100644 --- a/src/plugins/group_nickname/nickname_mapper.py +++ b/src/plugins/group_nickname/nickname_mapper.py @@ -8,6 +8,7 @@ logger = get_logger("nickname_mapper") # LLMRequest 实例和 analyze_chat_for_nicknames 函数已被移除 + def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: Dict[str, str]) -> str: """ 构建用于 LLM 进行绰号映射分析的 Prompt。 @@ -23,7 +24,7 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: # 将 user_name_map 格式化为列表字符串 user_list_str = "\n".join([f"- {uid}: {name}" for uid, name in user_name_map.items() if uid and name]) if not user_list_str: - user_list_str = "无" # 如果映射为空,明确告知 + user_list_str = "无" # 如果映射为空,明确告知 # 核心 Prompt 内容 prompt = f""" @@ -73,4 +74,5 @@ def _build_mapping_prompt(chat_history_str: str, bot_reply: str, user_name_map: # logger.debug(f"构建的绰号映射 Prompt (部分):\n{prompt[:500]}...") # 可以在 NicknameManager 中记录 return prompt + # analyze_chat_for_nicknames 函数已被移除,其逻辑移至 NicknameManager._call_llm_for_analysis diff --git a/src/plugins/group_nickname/nickname_utils.py b/src/plugins/group_nickname/nickname_utils.py index fefe3c0c..4fdca08d 100644 --- a/src/plugins/group_nickname/nickname_utils.py +++ b/src/plugins/group_nickname/nickname_utils.py @@ -24,8 +24,8 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int if not all_nicknames_info: return [] - candidates = [] # 存储 (用户名, 绰号, 次数, 权重) - smoothing_factor = getattr(global_config, "NICKNAME_PROBABILITY_SMOOTHING", 1.0) # 平滑因子,避免权重为0 + candidates = [] # 存储 (用户名, 绰号, 次数, 权重) + smoothing_factor = getattr(global_config, "NICKNAME_PROBABILITY_SMOOTHING", 1.0) # 平滑因子,避免权重为0 for user_name, nicknames in all_nicknames_info.items(): if nicknames and isinstance(nicknames, list): @@ -35,10 +35,12 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int nickname, count = list(nickname_entry.items())[0] # 确保次数是正整数 if isinstance(count, int) and count > 0 and isinstance(nickname, str) and nickname: - weight = count + smoothing_factor # 计算权重 + weight = count + smoothing_factor # 计算权重 candidates.append((user_name, nickname, count, weight)) else: - logger.warning(f"用户 '{user_name}' 的绰号条目无效: {nickname_entry} (次数非正整数或绰号为空)。已跳过。") + logger.warning( + f"用户 '{user_name}' 的绰号条目无效: {nickname_entry} (次数非正整数或绰号为空)。已跳过。" + ) else: logger.warning(f"用户 '{user_name}' 的绰号条目格式无效: {nickname_entry}。已跳过。") @@ -61,9 +63,9 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 筛选出未被选中的候选 selected_ids = set( (c[0], c[1]) for c in selected_candidates_with_weight - ) # 使用 (用户名, 绰号) 作为唯一标识 + ) # 使用 (用户名, 绰号) 作为唯一标识 remaining_candidates = [c for c in candidates if (c[0], c[1]) not in selected_ids] - remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + remaining_candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 needed = num_to_select - len(selected_candidates_with_weight) selected_candidates_with_weight.extend(remaining_candidates[:needed]) @@ -71,7 +73,7 @@ def select_nicknames_for_prompt(all_nicknames_info: Dict[str, List[Dict[str, int # 日志:记录加权随机选择时发生的错误,并回退到简单选择 logger.error(f"绰号加权随机选择时出错: {e}。将回退到选择次数最多的 Top N。", exc_info=True) # 出错时回退到选择次数最多的 N 个 - candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 + candidates.sort(key=lambda x: x[2], reverse=True) # 按原始次数排序 selected_candidates_with_weight = candidates[:num_to_select] # 格式化输出结果为 (用户名, 绰号, 次数),移除权重 @@ -98,10 +100,8 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in return "" # Prompt 注入部分的标题 - prompt_lines = [ - "以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),供你参考:" - ] - grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 + prompt_lines = ["以下是聊天记录中一些成员在本群的绰号信息(按常用度排序),供你参考:"] + grouped_by_user: Dict[str, List[str]] = {} # 用于按用户分组 # 按用户分组绰号 for user_name, nickname, _count in selected_nicknames: @@ -112,7 +112,7 @@ def format_nickname_prompt_injection(selected_nicknames: List[Tuple[str, str, in # 构建每个用户的绰号字符串 for user_name, nicknames in grouped_by_user.items(): - nicknames_str = "、".join(nicknames) # 使用中文顿号连接 + nicknames_str = "、".join(nicknames) # 使用中文顿号连接 # 格式化输出,例如: "- 张三,ta 可能被称为:“三儿”、“张哥”" prompt_lines.append(f"- {user_name},ta 可能被称为:{nicknames_str}") @@ -141,7 +141,7 @@ def weighted_sample_without_replacement( return [] n = len(candidates) if k >= n: - return candidates[:] # 返回副本 + return candidates[:] # 返回副本 # 计算每个元素的 key = U^(1/weight),其中 U 是 (0, 1) 之间的随机数 # 为了数值稳定性,计算 log(key) = log(U) / weight @@ -151,12 +151,12 @@ def weighted_sample_without_replacement( weight = candidates[i][3] if weight <= 0: # 处理权重为0或负数的情况,赋予一个极小的概率(或极大负数的log_key) - log_key = float('-inf') # 或者一个非常大的负数 + log_key = float("-inf") # 或者一个非常大的负数 logger.warning(f"候选者 {candidates[i][:2]} 的权重为非正数 ({weight}),抽中概率极低。") else: - log_u = -random.expovariate(1.0) # 生成 -Exponential(1) 随机数 + log_u = -random.expovariate(1.0) # 生成 -Exponential(1) 随机数 log_key = log_u / weight - weighted_keys.append((log_key, i)) # 存储 (log_key, 原始索引) + weighted_keys.append((log_key, i)) # 存储 (log_key, 原始索引) # 按 log_key 降序排序 (相当于按 key 升序排序) weighted_keys.sort(key=lambda x: x[0], reverse=True) @@ -169,6 +169,7 @@ def weighted_sample_without_replacement( return selected_items + # 移除旧的流程函数 # get_nickname_injection_for_prompt 和 trigger_nickname_analysis_if_needed # 的逻辑现在由 NicknameManager 处理 diff --git a/src/plugins/heartFC_chat/heartFC_chat.py b/src/plugins/heartFC_chat/heartFC_chat.py index 12b720bf..d8f5f804 100644 --- a/src/plugins/heartFC_chat/heartFC_chat.py +++ b/src/plugins/heartFC_chat/heartFC_chat.py @@ -873,7 +873,9 @@ class HeartFChatting: limit=global_config.observation_context_size, # 使用与 prompt 构建一致的 limit ) # 调用工具函数获取格式化后的绰号字符串 - nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(self.chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection( + self.chat_stream, message_list_before_now + ) # --- 构建提示词 (调用修改后的 PromptBuilder 方法) --- prompt = await prompt_builder.build_planner_prompt( diff --git a/src/plugins/heartFC_chat/heartflow_prompt_builder.py b/src/plugins/heartFC_chat/heartflow_prompt_builder.py index b979268c..f9445301 100644 --- a/src/plugins/heartFC_chat/heartflow_prompt_builder.py +++ b/src/plugins/heartFC_chat/heartflow_prompt_builder.py @@ -255,7 +255,9 @@ async def _build_prompt_focus(reason, current_mind_info, structured_info, chat_s chat_target_2 = await global_prompt_manager.get_prompt_async("chat_target_group2") # 调用新的工具函数获取绰号信息 - nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection( + chat_stream, message_list_before_now + ) prompt = await global_prompt_manager.format_prompt( template_name, @@ -451,7 +453,9 @@ class PromptBuilder: chat_target_2 = await global_prompt_manager.get_prompt_async("chat_target_group2") # 调用新的工具函数获取绰号信息 - nickname_injection_str = await nickname_manager.get_nickname_prompt_injection(chat_stream, message_list_before_now) + nickname_injection_str = await nickname_manager.get_nickname_prompt_injection( + chat_stream, message_list_before_now + ) prompt = await global_prompt_manager.format_prompt( template_name, From 9890d76c4313d1e0ac12bdaa43456a578c0b6c83 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 14:28:23 +0800 Subject: [PATCH 56/58] modified: src/plugins/group_nickname/nickname_manager.py --- .../group_nickname/nickname_manager.py | 323 ++++++++---------- 1 file changed, 145 insertions(+), 178 deletions(-) diff --git a/src/plugins/group_nickname/nickname_manager.py b/src/plugins/group_nickname/nickname_manager.py index a5a3872f..b54c92b3 100644 --- a/src/plugins/group_nickname/nickname_manager.py +++ b/src/plugins/group_nickname/nickname_manager.py @@ -1,12 +1,9 @@ import asyncio import threading -import queue -import traceback import time import json import re from typing import Dict, Optional, List, Any - from pymongo.errors import OperationFailure, DuplicateKeyError from src.common.logger_manager import get_logger from src.common.database import db @@ -15,39 +12,76 @@ from src.plugins.models.utils_model import LLMRequest from .nickname_db import NicknameDB from .nickname_mapper import _build_mapping_prompt from .nickname_utils import select_nicknames_for_prompt, format_nickname_prompt_injection - -# 依赖于 person_info_manager 来生成 person_id from ..person_info.person_info import person_info_manager - -# 依赖于 relationship_manager 来获取用户名称和现有绰号 from ..person_info.relationship_manager import relationship_manager - -# 导入消息和聊天流相关的类型和工具 from src.plugins.chat.chat_stream import ChatStream from src.plugins.chat.message import MessageRecv from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat logger = get_logger("NicknameManager") +logger_helper = get_logger("AsyncLoopHelper") # 为辅助函数创建单独的 logger +def run_async_loop(loop: asyncio.AbstractEventLoop, coro): + """ + 运行给定的协程直到完成,并确保循环最终关闭。 + + Args: + loop: 要使用的 asyncio 事件循环。 + coro: 要在循环中运行的主协程。 + """ + try: + logger_helper.debug(f"Running coroutine in loop {id(loop)}...") + result = loop.run_until_complete(coro) + logger_helper.debug(f"Coroutine completed in loop {id(loop)}.") + return result + except asyncio.CancelledError: + logger_helper.info(f"Coroutine in loop {id(loop)} was cancelled.") + # 取消是预期行为,不视为错误 + except Exception as e: + logger_helper.error(f"Error in async loop {id(loop)}: {e}", exc_info=True) + finally: + try: + # 1. 取消所有剩余任务 + all_tasks = asyncio.all_tasks(loop) + current_task = asyncio.current_task(loop) + tasks_to_cancel = [task for task in all_tasks if task is not current_task] # 避免取消 run_until_complete 本身 + if tasks_to_cancel: + logger_helper.info(f"Cancelling {len(tasks_to_cancel)} outstanding tasks in loop {id(loop)}...") + for task in tasks_to_cancel: + task.cancel() + # 等待取消完成 + loop.run_until_complete(asyncio.gather(*tasks_to_cancel, return_exceptions=True)) + logger_helper.info(f"Outstanding tasks cancelled in loop {id(loop)}.") + + # 2. 停止循环 (如果仍在运行) + if loop.is_running(): + loop.stop() + logger_helper.info(f"Asyncio loop {id(loop)} stopped.") + + # 3. 关闭循环 (如果未关闭) + if not loop.is_closed(): + # 在关闭前再运行一次以处理挂起的关闭回调 + loop.run_until_complete(loop.shutdown_asyncgens()) # 关闭异步生成器 + loop.close() + logger_helper.info(f"Asyncio loop {id(loop)} closed.") + except Exception as close_err: + logger_helper.error(f"Error during asyncio loop cleanup for loop {id(loop)}: {close_err}", exc_info=True) class NicknameManager: """ 管理群组绰号分析、处理、存储和使用的单例类。 封装了 LLM 调用、后台处理线程和数据库交互。 """ - _instance = None _lock = threading.Lock() - # Singleton Implementation def __new__(cls, *args, **kwargs): if not cls._instance: with cls._lock: - # 再次检查,防止多线程并发创建实例 if not cls._instance: logger.info("正在创建 NicknameManager 单例实例...") cls._instance = super(NicknameManager, cls).__new__(cls) - cls._instance._initialized = False # 添加初始化标志 + cls._instance._initialized = False return cls._instance def __init__(self): @@ -55,12 +89,11 @@ class NicknameManager: 初始化 NicknameManager。 使用锁和标志确保实际初始化只执行一次。 """ - if self._initialized: # 如果已初始化,直接返回 + if hasattr(self, '_initialized') and self._initialized: return with self._lock: - # 再次检查初始化标志,防止重复初始化 - if self._initialized: + if hasattr(self, '_initialized') and self._initialized: return logger.info("正在初始化 NicknameManager 组件...") @@ -68,11 +101,11 @@ class NicknameManager: self.is_enabled = self.config.ENABLE_NICKNAME_MAPPING # 数据库处理器 - person_info_collection = getattr(db, "person_info", None) + person_info_collection = getattr(db, 'person_info', None) self.db_handler = NicknameDB(person_info_collection) if not self.db_handler.is_available(): logger.error("数据库处理器初始化失败,NicknameManager 功能受限。") - self.is_enabled = False # 如果数据库不可用,禁用功能 + self.is_enabled = False # LLM 映射器 self.llm_mapper: Optional[LLMRequest] = None @@ -82,8 +115,8 @@ class NicknameManager: if model_config and model_config.get("name"): self.llm_mapper = LLMRequest( model=model_config, - temperature=model_config.get("temp", 0.5), # 使用 get 获取并提供默认值 - max_tokens=model_config.get("max_tokens", 256), # 使用 get 获取并提供默认值 + temperature=model_config.get("temp", 0.5), + max_tokens=model_config.get("max_tokens", 256), request_type="nickname_mapping", ) logger.info("绰号映射 LLM 映射器初始化成功。") @@ -101,16 +134,15 @@ class NicknameManager: # 队列和线程 self.queue_max_size = getattr(self.config, "NICKNAME_QUEUE_MAX_SIZE", 100) - self.nickname_queue: queue.Queue = queue.Queue(maxsize=self.queue_max_size) - self._stop_event = threading.Event() + # 使用 asyncio.Queue + self.nickname_queue: asyncio.Queue = asyncio.Queue(maxsize=self.queue_max_size) + self._stop_event = threading.Event() # stop_event 仍然使用 threading.Event,因为它是由另一个线程设置的 self._nickname_thread: Optional[threading.Thread] = None - self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) + self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) # 超时时间 - self._initialized = True # 标记为已初始化 + self._initialized = True logger.info("NicknameManager 初始化完成。") - # 公共方法 - def start_processor(self): """启动后台处理线程(如果已启用且未运行)。""" if not self.is_enabled: @@ -118,10 +150,10 @@ class NicknameManager: return if self._nickname_thread is None or not self._nickname_thread.is_alive(): logger.info("正在启动绰号处理器线程...") - self._stop_event.clear() # 清除停止事件标志 + self._stop_event.clear() self._nickname_thread = threading.Thread( - target=self._run_processor_in_thread, # 线程执行的入口函数 - daemon=True, # 设置为守护线程,主程序退出时自动结束 + target=self._run_processor_in_thread, # 线程目标函数不变 + daemon=True ) self._nickname_thread.start() logger.info(f"绰号处理器线程已启动 (ID: {self._nickname_thread.ident})") @@ -132,18 +164,10 @@ class NicknameManager: """停止后台处理线程。""" if self._nickname_thread and self._nickname_thread.is_alive(): logger.info("正在停止绰号处理器线程...") - self._stop_event.set() # 设置停止事件标志 + self._stop_event.set() # 设置停止事件,_processing_loop 会检测到 try: - # 可选:尝试清空队列,避免丢失未处理的任务 - # while not self.nickname_queue.empty(): - # try: - # self.nickname_queue.get_nowait() - # self.nickname_queue.task_done() - # except queue.Empty: - # break - # logger.info("绰号处理队列已清空。") - - self._nickname_thread.join(timeout=10) # 等待线程结束,设置超时 + # 不需要清空 asyncio.Queue,让循环自然结束或被取消 + self._nickname_thread.join(timeout=10) # 等待线程结束 if self._nickname_thread.is_alive(): logger.warning("绰号处理器线程在超时后仍未停止。") except Exception as e: @@ -151,7 +175,7 @@ class NicknameManager: finally: if self._nickname_thread and not self._nickname_thread.is_alive(): logger.info("绰号处理器线程已成功停止。") - self._nickname_thread = None # 清理线程对象引用 + self._nickname_thread = None else: logger.info("绰号处理器线程未在运行或已被清理。") @@ -163,10 +187,10 @@ class NicknameManager: ): """ 准备数据并将其排队等待绰号分析(如果满足条件)。 - 取代了旧的 trigger_nickname_analysis_if_needed 函数。 + (现在调用异步的 _add_to_queue) """ if not self.is_enabled: - return # 功能禁用则直接返回 + return current_chat_stream = chat_stream or anchor_message.chat_stream if not current_chat_stream or not current_chat_stream.group_info: @@ -182,56 +206,41 @@ class NicknameManager: timestamp=time.time(), limit=history_limit, ) - # 格式化历史记录 chat_history_str = await build_readable_messages( messages=history_messages, - replace_bot_name=True, - merge_messages=False, - timestamp_mode="relative", - read_mark=0.0, - truncate=False, + replace_bot_name=True, merge_messages=False, timestamp_mode="relative", + read_mark=0.0, truncate=False, ) - # 2. 获取 Bot 回复 bot_reply_str = " ".join(bot_reply) if bot_reply else "" - # 3. 获取群组和平台信息 group_id = str(current_chat_stream.group_info.group_id) platform = current_chat_stream.platform - # 4. 构建用户 ID 到名称的映射 (user_name_map) - user_ids_in_history = { - str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id") - } + user_ids_in_history = {str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id")} user_name_map = {} if user_ids_in_history: try: - # 使用 relationship_manager 批量获取名称 names_data = await relationship_manager.get_person_names_batch(platform, list(user_ids_in_history)) except Exception as e: logger.error(f"{log_prefix} 批量获取 person_name 时出错: {e}", exc_info=True) names_data = {} - - # 填充 user_name_map for user_id in user_ids_in_history: if user_id in names_data: user_name_map[user_id] = names_data[user_id] else: - # 回退查找历史记录中的 nickname latest_nickname = next( - ( - m["user_info"].get("user_nickname") - for m in reversed(history_messages) - if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") - ), + (m["user_info"].get("user_nickname") + for m in reversed(history_messages) + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname")), None, ) user_name_map[user_id] = latest_nickname or f"未知({user_id})" - # 5. 添加到内部处理队列 + item = (chat_history_str, bot_reply_str, platform, group_id, user_name_map) - self._add_to_queue(item, platform, group_id) # 调用私有方法入队 + await self._add_to_queue(item, platform, group_id) except Exception as e: logger.error(f"{log_prefix} 触发绰号分析时出错: {e}", exc_info=True) @@ -239,24 +248,16 @@ class NicknameManager: async def get_nickname_prompt_injection(self, chat_stream: ChatStream, message_list_before_now: List[Dict]) -> str: """ 获取并格式化用于 Prompt 注入的绰号信息字符串。 - 取代了旧的 get_nickname_injection_for_prompt 函数。 """ if not self.is_enabled or not chat_stream or not chat_stream.group_info: - return "" # 功能禁用或非群聊则返回空 + return "" log_prefix = f"[{chat_stream.stream_id}]" try: group_id = str(chat_stream.group_info.group_id) platform = chat_stream.platform + user_ids_in_context = {str(msg["user_info"]["user_id"]) for msg in message_list_before_now if msg.get("user_info", {}).get("user_id")} - # 确定上下文中的用户 ID - user_ids_in_context = { - str(msg["user_info"]["user_id"]) - for msg in message_list_before_now - if msg.get("user_info", {}).get("user_id") - } - - # 如果消息列表为空,尝试获取最近发言者 if not user_ids_in_context: recent_speakers = chat_stream.get_recent_speakers(limit=5) user_ids_in_context.update(str(speaker["user_id"]) for speaker in recent_speakers) @@ -265,41 +266,40 @@ class NicknameManager: logger.warning(f"{log_prefix} 未找到上下文用户用于绰号注入。") return "" - # 使用 relationship_manager 批量获取这些用户的群组绰号 all_nicknames_data = await relationship_manager.get_users_group_nicknames( platform, list(user_ids_in_context), group_id ) if all_nicknames_data: - # 使用 nickname_utils 中的工具函数进行选择和格式化 selected_nicknames = select_nicknames_for_prompt(all_nicknames_data) injection_str = format_nickname_prompt_injection(selected_nicknames) if injection_str: logger.debug(f"{log_prefix} 生成的绰号 Prompt 注入:\n{injection_str}") return injection_str else: - return "" # 没有获取到绰号数据 + return "" except Exception as e: logger.error(f"{log_prefix} 获取绰号注入时出错: {e}", exc_info=True) - return "" # 出错时返回空 + return "" + # 私有/内部方法 - def _add_to_queue(self, item: tuple, platform: str, group_id: str): - """将项目添加到内部处理队列。""" + async def _add_to_queue(self, item: tuple, platform: str, group_id: str): + """将项目异步添加到内部处理队列 (asyncio.Queue)。""" try: - self.nickname_queue.put_nowait(item) - logger.debug( - f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}" - ) - except queue.Full: - logger.warning( - f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。" - ) + # 使用 await put(),如果队列满则异步等待 + await self.nickname_queue.put(item) + logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}") + except asyncio.QueueFull: + # 理论上 await put() 不会直接抛 QueueFull,除非 maxsize=0 + # 但保留以防万一或未来修改 + logger.warning(f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。") except Exception as e: logger.error(f"将项目添加到绰号队列时出错: {e}", exc_info=True) + async def _analyze_and_update_nicknames(self, item: tuple): """处理单个队列项目:调用 LLM 分析并更新数据库。""" if not isinstance(item, tuple) or len(item) != 5: @@ -307,8 +307,9 @@ class NicknameManager: return chat_history_str, bot_reply, platform, group_id, user_name_map = item - thread_id = threading.get_ident() - log_prefix = f"[线程 {thread_id}][{platform}:{group_id}]" + # 使用 asyncio.get_running_loop().call_soon(threading.get_ident) 可能不准确,线程ID是同步概念 + # 可以考虑移除线程ID日志或寻找异步安全的获取标识符的方式 + log_prefix = f"[{platform}:{group_id}]" # 简化日志前缀 logger.debug(f"{log_prefix} 开始处理绰号分析任务...") if not self.llm_mapper: @@ -318,16 +319,16 @@ class NicknameManager: logger.error(f"{log_prefix} 数据库处理器不可用,无法更新计数。") return - # 1. 调用 LLM 分析 (逻辑从 nickname_mapper 移入) + # 1. 调用 LLM 分析 (内部逻辑不变) analysis_result = await self._call_llm_for_analysis(chat_history_str, bot_reply, user_name_map) - # 2. 如果分析成功且找到映射,则更新数据库 + # 2. 如果分析成功且找到映射,则更新数据库 (内部逻辑不变) if analysis_result.get("is_exist") and analysis_result.get("data"): nickname_map_to_update = analysis_result["data"] logger.info(f"{log_prefix} LLM 找到绰号映射,准备更新数据库: {nickname_map_to_update}") for user_id_str, nickname in nickname_map_to_update.items(): - # 基本验证 + # ... (验证和数据库更新逻辑保持不变) ... if not user_id_str or not nickname: logger.warning(f"{log_prefix} 跳过无效条目: user_id='{user_id_str}', nickname='{nickname}'") continue @@ -335,24 +336,15 @@ class NicknameManager: logger.warning(f"{log_prefix} 无效的用户ID格式 (非纯数字): '{user_id_str}',跳过。") continue user_id_int = int(user_id_str) - # 结束验证 try: - # 步骤 1: 生成 person_id person_id = person_info_manager.get_person_id(platform, user_id_str) if not person_id: - logger.error( - f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。" - ) + logger.error(f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") continue - - # 步骤 2: 确保 Person 文档存在 (调用 DB Handler) self.db_handler.upsert_person(person_id, user_id_int, platform) - - # 步骤 3: 更新群组绰号 (调用 DB Handler) self.db_handler.update_group_nickname_count(person_id, group_id, nickname) - - except (OperationFailure, DuplicateKeyError) as db_err: # 捕获特定的数据库错误 + except (OperationFailure, DuplicateKeyError) as db_err: logger.exception( f"{log_prefix} 数据库操作失败 ({type(db_err).__name__}): 用户 {user_id_str}, 绰号 {nickname}. 错误: {db_err}" ) @@ -361,6 +353,7 @@ class NicknameManager: else: logger.debug(f"{log_prefix} LLM 未找到可靠的绰号映射或分析失败。") + async def _call_llm_for_analysis( self, chat_history_str: str, @@ -369,17 +362,16 @@ class NicknameManager: ) -> Dict[str, Any]: """ 内部方法:调用 LLM 分析聊天记录和 Bot 回复,提取可靠的 用户ID-绰号 映射。 - (逻辑从 analyze_chat_for_nicknames 移入) """ - if not self.llm_mapper: # 再次检查 LLM 映射器 + # ... (此方法内部逻辑保持不变) ... + if not self.llm_mapper: logger.error("LLM 映射器未初始化,无法执行分析。") return {"is_exist": False} prompt = _build_mapping_prompt(chat_history_str, bot_reply, user_name_map) - logger.debug(f"构建的绰号映射 Prompt:\n{prompt[:500]}...") # 截断日志输出 + logger.debug(f"构建的绰号映射 Prompt:\n{prompt[:500]}...") try: - # 调用 LLM response_content, _, _ = await self.llm_mapper.generate_response(prompt) logger.debug(f"LLM 原始响应 (绰号映射): {response_content}") @@ -387,28 +379,23 @@ class NicknameManager: logger.warning("LLM 返回了空的绰号映射内容。") return {"is_exist": False} - # 清理可能的 Markdown 代码块标记 response_content = response_content.strip() markdown_code_regex = re.compile(r"^```(?:\w+)?\s*\n(.*?)\n\s*```$", re.DOTALL | re.IGNORECASE) match = markdown_code_regex.match(response_content) if match: response_content = match.group(1).strip() - # 尝试直接解析 JSON,即使没有代码块标记 elif response_content.startswith("{") and response_content.endswith("}"): pass # 可能是纯 JSON else: - # 尝试在文本中查找 JSON 对象 - json_match = re.search(r"\{.*\}", response_content, re.DOTALL) + json_match = re.search(r'\{.*\}', response_content, re.DOTALL) if json_match: response_content = json_match.group(0) else: logger.warning(f"LLM 响应似乎不包含有效的 JSON 对象。响应: {response_content}") return {"is_exist": False} - # 解析 JSON result = json.loads(response_content) - # 结果验证和过滤 if not isinstance(result, dict): logger.warning(f"LLM 响应不是一个有效的 JSON 对象 (字典类型)。响应内容: {response_content}") return {"is_exist": False} @@ -419,7 +406,7 @@ class NicknameManager: original_data = result.get("data") if isinstance(original_data, dict) and original_data: logger.info(f"LLM 找到的原始绰号映射: {original_data}") - filtered_data = self._filter_llm_results(original_data, user_name_map) # 调用过滤函数 + filtered_data = self._filter_llm_results(original_data, user_name_map) if not filtered_data: logger.info("所有找到的绰号映射都被过滤掉了。") return {"is_exist": False} @@ -427,13 +414,12 @@ class NicknameManager: logger.info(f"过滤后的绰号映射: {filtered_data}") return {"is_exist": True, "data": filtered_data} else: - # is_exist 为 True 但 data 缺失、不是字典或为空 logger.warning(f"LLM 响应格式错误: is_exist=True 但 data 无效。原始 data: {original_data}") return {"is_exist": False} elif is_exist is False: logger.info("LLM 明确指示未找到可靠的绰号映射 (is_exist=False)。") return {"is_exist": False} - else: # is_exist 不是 True 或 False (包括 None) + else: logger.warning(f"LLM 响应格式错误: 'is_exist' 的值 '{is_exist}' 无效。") return {"is_exist": False} @@ -444,104 +430,85 @@ class NicknameManager: logger.error(f"绰号映射 LLM 调用或处理过程中发生意外错误: {e}", exc_info=True) return {"is_exist": False} + def _filter_llm_results(self, original_data: Dict[str, str], user_name_map: Dict[str, str]) -> Dict[str, str]: """过滤 LLM 返回的绰号映射结果。""" filtered_data = {} - bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, "BOT_QQ") else None + bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, 'BOT_QQ') else None for user_id, nickname in original_data.items(): - # 过滤条件 1: user_id 必须是字符串 if not isinstance(user_id, str): logger.warning(f"过滤掉非字符串 user_id: {user_id}") continue - # 过滤条件 2: 排除机器人自身 if bot_qq_str and user_id == bot_qq_str: logger.debug(f"过滤掉机器人自身的映射: ID {user_id}") continue - # 过滤条件 3: 排除 nickname 为空或仅包含空白的情况 if not nickname or nickname.isspace(): logger.debug(f"过滤掉用户 {user_id} 的空绰号。") continue - - # 过滤条件 4 (可选,根据 Prompt 效果决定是否保留): 排除 nickname 与已知名称相同的情况 # person_name = user_name_map.get(user_id) # if person_name and person_name == nickname: # logger.debug(f"过滤掉用户 {user_id} 的映射: 绰号 '{nickname}' 与其名称 '{person_name}' 相同。") # continue - - # 如果通过所有过滤条件,则保留 - filtered_data[user_id] = nickname.strip() # 保留时去除首尾空白 + filtered_data[user_id] = nickname.strip() return filtered_data + # 线程相关 + # 修改:使用 run_async_loop 辅助函数 def _run_processor_in_thread(self): - """后台线程的入口函数,负责创建和运行 asyncio 事件循环。""" - loop = None - thread_id = threading.get_ident() + """后台线程入口函数,使用辅助函数管理 asyncio 事件循环。""" + thread_id = threading.get_ident() # 获取线程ID用于日志 logger.info(f"绰号处理器线程启动 (线程 ID: {thread_id})...") - try: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - logger.info(f"(线程 ID: {thread_id}) Asyncio 事件循环已创建并设置。") - # 运行主处理循环直到停止事件被设置 - loop.run_until_complete(self._processing_loop()) - except Exception as e: - logger.error(f"(线程 ID: {thread_id}) 运行绰号处理器线程时出错: {e}", exc_info=True) - finally: - # 确保循环被正确关闭 - if loop: - try: - if loop.is_running(): - logger.info(f"(线程 ID: {thread_id}) 正在停止 asyncio 循环...") - all_tasks = asyncio.all_tasks(loop) - if all_tasks: - logger.info(f"(线程 ID: {thread_id}) 正在取消 {len(all_tasks)} 个运行中的任务...") - for task in all_tasks: - task.cancel() - # 等待任务取消完成 - loop.run_until_complete(asyncio.gather(*all_tasks, return_exceptions=True)) - logger.info(f"(线程 ID: {thread_id}) 所有任务已取消。") - loop.stop() - logger.info(f"(线程 ID: {thread_id}) 循环已停止。") - if not loop.is_closed(): - loop.close() - logger.info(f"(线程 ID: {thread_id}) Asyncio 循环已关闭。") - except Exception as loop_close_err: - logger.error(f"(线程 ID: {thread_id}) 关闭循环时出错: {loop_close_err}", exc_info=True) - logger.info(f"绰号处理器线程结束 (线程 ID: {thread_id}).") + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) # 为当前线程设置事件循环 + logger.info(f"(线程 ID: {thread_id}) Asyncio 事件循环已创建并设置。") + # 调用辅助函数来运行主处理协程并管理循环生命周期 + run_async_loop(loop, self._processing_loop()) + + logger.info(f"绰号处理器线程结束 (线程 ID: {thread_id}).") + # 结束修改 + + + # 修改:使用 asyncio.Queue 和 wait_for async def _processing_loop(self): - """后台线程中运行的异步处理循环。""" - thread_id = threading.get_ident() - logger.info(f"绰号处理循环已启动 (线程 ID: {thread_id})。") + """后台线程中运行的异步处理循环 (使用 asyncio.Queue)。""" + # 移除线程ID日志,因为它在异步上下文中不一定准确 + logger.info("绰号异步处理循环已启动。") - while not self._stop_event.is_set(): + while not self._stop_event.is_set(): # 仍然检查同步的停止事件 try: - # 从队列中获取项目,设置超时以允许检查停止事件 - item = self.nickname_queue.get(block=True, timeout=self.sleep_interval) + # 使用 asyncio.wait_for 从异步队列获取项目,并设置超时 + item = await asyncio.wait_for( + self.nickname_queue.get(), + timeout=self.sleep_interval + ) - # 处理获取到的项目 + # 处理获取到的项目 (调用异步方法) await self._analyze_and_update_nicknames(item) - self.nickname_queue.task_done() # 标记任务完成 + self.nickname_queue.task_done() # 标记任务完成 - except queue.Empty: - # 超时,队列为空,继续循环检查停止事件 + except asyncio.TimeoutError: + # 等待超时,相当于之前 queue.Empty,继续循环检查停止事件 continue except asyncio.CancelledError: - logger.info(f"绰号处理循环被取消 (线程 ID: {thread_id})。") - break # 任务被取消,退出循环 + # 协程被取消 (通常在 stop_processor 中发生) + logger.info("绰号处理循环被取消。") + break # 退出循环 except Exception as e: # 捕获处理单个项目时可能发生的其他异常 - logger.error(f"(线程 ID: {thread_id}) 绰号处理循环出错: {e}\n{traceback.format_exc()}") - # 可以在这里添加错误处理逻辑,例如将失败的任务放回队列或记录到错误日志 - # 短暂休眠避免快速连续失败 + logger.error(f"绰号处理循环出错: {e}", exc_info=True) + # 短暂异步休眠避免快速连续失败 await asyncio.sleep(5) - logger.info(f"绰号处理循环已结束 (线程 ID: {thread_id})。") + logger.info("绰号异步处理循环已结束。") + # 可以在这里添加清理逻辑,比如确保队列为空或处理剩余项目 + # 例如:await self.nickname_queue.join() # 等待所有任务完成 (如果需要) + # 结束修改 # 在模块级别创建单例实例 -# 这使得其他模块可以通过 `from .nickname_manager import nickname_manager` 来导入和使用 nickname_manager = NicknameManager() From 0b1c3a2196fb5cf15d516fa58c0019e90cfda101 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 3 May 2025 06:28:48 +0000 Subject: [PATCH 57/58] =?UTF-8?q?=F0=9F=A4=96=20=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E5=8C=96=E4=BB=A3=E7=A0=81=20[skip=20ci]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../group_nickname/nickname_manager.py | 96 +++++++++++-------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/src/plugins/group_nickname/nickname_manager.py b/src/plugins/group_nickname/nickname_manager.py index b54c92b3..ac158a66 100644 --- a/src/plugins/group_nickname/nickname_manager.py +++ b/src/plugins/group_nickname/nickname_manager.py @@ -19,7 +19,8 @@ from src.plugins.chat.message import MessageRecv from src.plugins.utils.chat_message_builder import build_readable_messages, get_raw_msg_before_timestamp_with_chat logger = get_logger("NicknameManager") -logger_helper = get_logger("AsyncLoopHelper") # 为辅助函数创建单独的 logger +logger_helper = get_logger("AsyncLoopHelper") # 为辅助函数创建单独的 logger + def run_async_loop(loop: asyncio.AbstractEventLoop, coro): """ @@ -44,7 +45,9 @@ def run_async_loop(loop: asyncio.AbstractEventLoop, coro): # 1. 取消所有剩余任务 all_tasks = asyncio.all_tasks(loop) current_task = asyncio.current_task(loop) - tasks_to_cancel = [task for task in all_tasks if task is not current_task] # 避免取消 run_until_complete 本身 + tasks_to_cancel = [ + task for task in all_tasks if task is not current_task + ] # 避免取消 run_until_complete 本身 if tasks_to_cancel: logger_helper.info(f"Cancelling {len(tasks_to_cancel)} outstanding tasks in loop {id(loop)}...") for task in tasks_to_cancel: @@ -61,17 +64,19 @@ def run_async_loop(loop: asyncio.AbstractEventLoop, coro): # 3. 关闭循环 (如果未关闭) if not loop.is_closed(): # 在关闭前再运行一次以处理挂起的关闭回调 - loop.run_until_complete(loop.shutdown_asyncgens()) # 关闭异步生成器 + loop.run_until_complete(loop.shutdown_asyncgens()) # 关闭异步生成器 loop.close() logger_helper.info(f"Asyncio loop {id(loop)} closed.") except Exception as close_err: logger_helper.error(f"Error during asyncio loop cleanup for loop {id(loop)}: {close_err}", exc_info=True) + class NicknameManager: """ 管理群组绰号分析、处理、存储和使用的单例类。 封装了 LLM 调用、后台处理线程和数据库交互。 """ + _instance = None _lock = threading.Lock() @@ -89,11 +94,11 @@ class NicknameManager: 初始化 NicknameManager。 使用锁和标志确保实际初始化只执行一次。 """ - if hasattr(self, '_initialized') and self._initialized: + if hasattr(self, "_initialized") and self._initialized: return with self._lock: - if hasattr(self, '_initialized') and self._initialized: + if hasattr(self, "_initialized") and self._initialized: return logger.info("正在初始化 NicknameManager 组件...") @@ -101,7 +106,7 @@ class NicknameManager: self.is_enabled = self.config.ENABLE_NICKNAME_MAPPING # 数据库处理器 - person_info_collection = getattr(db, 'person_info', None) + person_info_collection = getattr(db, "person_info", None) self.db_handler = NicknameDB(person_info_collection) if not self.db_handler.is_available(): logger.error("数据库处理器初始化失败,NicknameManager 功能受限。") @@ -136,9 +141,9 @@ class NicknameManager: self.queue_max_size = getattr(self.config, "NICKNAME_QUEUE_MAX_SIZE", 100) # 使用 asyncio.Queue self.nickname_queue: asyncio.Queue = asyncio.Queue(maxsize=self.queue_max_size) - self._stop_event = threading.Event() # stop_event 仍然使用 threading.Event,因为它是由另一个线程设置的 + self._stop_event = threading.Event() # stop_event 仍然使用 threading.Event,因为它是由另一个线程设置的 self._nickname_thread: Optional[threading.Thread] = None - self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) # 超时时间 + self.sleep_interval = getattr(self.config, "NICKNAME_PROCESS_SLEEP_INTERVAL", 0.5) # 超时时间 self._initialized = True logger.info("NicknameManager 初始化完成。") @@ -152,8 +157,8 @@ class NicknameManager: logger.info("正在启动绰号处理器线程...") self._stop_event.clear() self._nickname_thread = threading.Thread( - target=self._run_processor_in_thread, # 线程目标函数不变 - daemon=True + target=self._run_processor_in_thread, # 线程目标函数不变 + daemon=True, ) self._nickname_thread.start() logger.info(f"绰号处理器线程已启动 (ID: {self._nickname_thread.ident})") @@ -164,10 +169,10 @@ class NicknameManager: """停止后台处理线程。""" if self._nickname_thread and self._nickname_thread.is_alive(): logger.info("正在停止绰号处理器线程...") - self._stop_event.set() # 设置停止事件,_processing_loop 会检测到 + self._stop_event.set() # 设置停止事件,_processing_loop 会检测到 try: # 不需要清空 asyncio.Queue,让循环自然结束或被取消 - self._nickname_thread.join(timeout=10) # 等待线程结束 + self._nickname_thread.join(timeout=10) # 等待线程结束 if self._nickname_thread.is_alive(): logger.warning("绰号处理器线程在超时后仍未停止。") except Exception as e: @@ -209,8 +214,11 @@ class NicknameManager: # 格式化历史记录 chat_history_str = await build_readable_messages( messages=history_messages, - replace_bot_name=True, merge_messages=False, timestamp_mode="relative", - read_mark=0.0, truncate=False, + replace_bot_name=True, + merge_messages=False, + timestamp_mode="relative", + read_mark=0.0, + truncate=False, ) # 2. 获取 Bot 回复 bot_reply_str = " ".join(bot_reply) if bot_reply else "" @@ -218,7 +226,9 @@ class NicknameManager: group_id = str(current_chat_stream.group_info.group_id) platform = current_chat_stream.platform # 4. 构建用户 ID 到名称的映射 (user_name_map) - user_ids_in_history = {str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id")} + user_ids_in_history = { + str(msg["user_info"]["user_id"]) for msg in history_messages if msg.get("user_info", {}).get("user_id") + } user_name_map = {} if user_ids_in_history: try: @@ -231,14 +241,15 @@ class NicknameManager: user_name_map[user_id] = names_data[user_id] else: latest_nickname = next( - (m["user_info"].get("user_nickname") - for m in reversed(history_messages) - if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname")), + ( + m["user_info"].get("user_nickname") + for m in reversed(history_messages) + if str(m["user_info"].get("user_id")) == user_id and m["user_info"].get("user_nickname") + ), None, ) user_name_map[user_id] = latest_nickname or f"未知({user_id})" - item = (chat_history_str, bot_reply_str, platform, group_id, user_name_map) await self._add_to_queue(item, platform, group_id) @@ -256,7 +267,11 @@ class NicknameManager: try: group_id = str(chat_stream.group_info.group_id) platform = chat_stream.platform - user_ids_in_context = {str(msg["user_info"]["user_id"]) for msg in message_list_before_now if msg.get("user_info", {}).get("user_id")} + user_ids_in_context = { + str(msg["user_info"]["user_id"]) + for msg in message_list_before_now + if msg.get("user_info", {}).get("user_id") + } if not user_ids_in_context: recent_speakers = chat_stream.get_recent_speakers(limit=5) @@ -283,7 +298,6 @@ class NicknameManager: logger.error(f"{log_prefix} 获取绰号注入时出错: {e}", exc_info=True) return "" - # 私有/内部方法 async def _add_to_queue(self, item: tuple, platform: str, group_id: str): @@ -291,15 +305,18 @@ class NicknameManager: try: # 使用 await put(),如果队列满则异步等待 await self.nickname_queue.put(item) - logger.debug(f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}") + logger.debug( + f"已将项目添加到平台 '{platform}' 群组 '{group_id}' 的绰号队列。当前大小: {self.nickname_queue.qsize()}" + ) except asyncio.QueueFull: # 理论上 await put() 不会直接抛 QueueFull,除非 maxsize=0 # 但保留以防万一或未来修改 - logger.warning(f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。") + logger.warning( + f"绰号队列已满 (最大={self.queue_max_size})。平台 '{platform}' 群组 '{group_id}' 的项目被丢弃。" + ) except Exception as e: logger.error(f"将项目添加到绰号队列时出错: {e}", exc_info=True) - async def _analyze_and_update_nicknames(self, item: tuple): """处理单个队列项目:调用 LLM 分析并更新数据库。""" if not isinstance(item, tuple) or len(item) != 5: @@ -309,7 +326,7 @@ class NicknameManager: chat_history_str, bot_reply, platform, group_id, user_name_map = item # 使用 asyncio.get_running_loop().call_soon(threading.get_ident) 可能不准确,线程ID是同步概念 # 可以考虑移除线程ID日志或寻找异步安全的获取标识符的方式 - log_prefix = f"[{platform}:{group_id}]" # 简化日志前缀 + log_prefix = f"[{platform}:{group_id}]" # 简化日志前缀 logger.debug(f"{log_prefix} 开始处理绰号分析任务...") if not self.llm_mapper: @@ -340,7 +357,9 @@ class NicknameManager: try: person_id = person_info_manager.get_person_id(platform, user_id_str) if not person_id: - logger.error(f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。") + logger.error( + f"{log_prefix} 无法为 platform='{platform}', user_id='{user_id_str}' 生成 person_id,跳过此用户。" + ) continue self.db_handler.upsert_person(person_id, user_id_int, platform) self.db_handler.update_group_nickname_count(person_id, group_id, nickname) @@ -353,7 +372,6 @@ class NicknameManager: else: logger.debug(f"{log_prefix} LLM 未找到可靠的绰号映射或分析失败。") - async def _call_llm_for_analysis( self, chat_history_str: str, @@ -387,7 +405,7 @@ class NicknameManager: elif response_content.startswith("{") and response_content.endswith("}"): pass # 可能是纯 JSON else: - json_match = re.search(r'\{.*\}', response_content, re.DOTALL) + json_match = re.search(r"\{.*\}", response_content, re.DOTALL) if json_match: response_content = json_match.group(0) else: @@ -430,11 +448,10 @@ class NicknameManager: logger.error(f"绰号映射 LLM 调用或处理过程中发生意外错误: {e}", exc_info=True) return {"is_exist": False} - def _filter_llm_results(self, original_data: Dict[str, str], user_name_map: Dict[str, str]) -> Dict[str, str]: """过滤 LLM 返回的绰号映射结果。""" filtered_data = {} - bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, 'BOT_QQ') else None + bot_qq_str = str(self.config.BOT_QQ) if hasattr(self.config, "BOT_QQ") else None for user_id, nickname in original_data.items(): if not isinstance(user_id, str): @@ -454,23 +471,22 @@ class NicknameManager: return filtered_data - # 线程相关 # 修改:使用 run_async_loop 辅助函数 def _run_processor_in_thread(self): """后台线程入口函数,使用辅助函数管理 asyncio 事件循环。""" - thread_id = threading.get_ident() # 获取线程ID用于日志 + thread_id = threading.get_ident() # 获取线程ID用于日志 logger.info(f"绰号处理器线程启动 (线程 ID: {thread_id})...") loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) # 为当前线程设置事件循环 + asyncio.set_event_loop(loop) # 为当前线程设置事件循环 logger.info(f"(线程 ID: {thread_id}) Asyncio 事件循环已创建并设置。") # 调用辅助函数来运行主处理协程并管理循环生命周期 run_async_loop(loop, self._processing_loop()) logger.info(f"绰号处理器线程结束 (线程 ID: {thread_id}).") - # 结束修改 + # 结束修改 # 修改:使用 asyncio.Queue 和 wait_for async def _processing_loop(self): @@ -478,18 +494,15 @@ class NicknameManager: # 移除线程ID日志,因为它在异步上下文中不一定准确 logger.info("绰号异步处理循环已启动。") - while not self._stop_event.is_set(): # 仍然检查同步的停止事件 + while not self._stop_event.is_set(): # 仍然检查同步的停止事件 try: # 使用 asyncio.wait_for 从异步队列获取项目,并设置超时 - item = await asyncio.wait_for( - self.nickname_queue.get(), - timeout=self.sleep_interval - ) + item = await asyncio.wait_for(self.nickname_queue.get(), timeout=self.sleep_interval) # 处理获取到的项目 (调用异步方法) await self._analyze_and_update_nicknames(item) - self.nickname_queue.task_done() # 标记任务完成 + self.nickname_queue.task_done() # 标记任务完成 except asyncio.TimeoutError: # 等待超时,相当于之前 queue.Empty,继续循环检查停止事件 @@ -497,7 +510,7 @@ class NicknameManager: except asyncio.CancelledError: # 协程被取消 (通常在 stop_processor 中发生) logger.info("绰号处理循环被取消。") - break # 退出循环 + break # 退出循环 except Exception as e: # 捕获处理单个项目时可能发生的其他异常 logger.error(f"绰号处理循环出错: {e}", exc_info=True) @@ -507,6 +520,7 @@ class NicknameManager: logger.info("绰号异步处理循环已结束。") # 可以在这里添加清理逻辑,比如确保队列为空或处理剩余项目 # 例如:await self.nickname_queue.join() # 等待所有任务完成 (如果需要) + # 结束修改 From d5a599d8abae085a9f28d6d799fa4b04d3f1486b Mon Sep 17 00:00:00 2001 From: Bakadax Date: Sat, 3 May 2025 14:38:51 +0800 Subject: [PATCH 58/58] =?UTF-8?q?=E6=94=B9=E9=94=99=E4=BA=86=EF=BC=8C?= =?UTF-8?q?=E6=94=B9=E5=9B=9E=E6=9D=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- template/lpmm_config_template.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/template/lpmm_config_template.toml b/template/lpmm_config_template.toml index 491e1feb..aae664d5 100644 --- a/template/lpmm_config_template.toml +++ b/template/lpmm_config_template.toml @@ -54,7 +54,7 @@ res_top_k = 3 # 最终提供的文段TopK [persistence] # 持久化配置(存储中间数据,防止重复计算) data_root_path = "data" # 数据根目录 -raw_data_path = "data/import.json" # 原始数据路径 -openie_data_path = "data/openie.json" # OpenIE数据路径 +raw_data_path = "data/imported_lpmm_data" # 原始数据路径 +openie_data_path = "data/openie" # OpenIE数据路径 embedding_data_dir = "data/embedding" # 嵌入数据目录 rag_data_dir = "data/rag" # RAG数据目录