From 0852af49f9a81ab81315af02ce0e89c01f36b845 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 24 Dec 2025 18:43:32 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9B=E4=BC=98=E5=8C=96=E4=BA=86?= =?UTF-8?q?=E8=AE=B0=E5=BF=86=E6=A3=80=E7=B4=A2=E7=9A=84=E9=80=9F=E5=BA=A6?= =?UTF-8?q?=E5=92=8Ctoken=E6=B6=88=E8=80=97=EF=BC=88=E5=B0=86question?= =?UTF-8?q?=E6=8F=90=E5=87=BA=E4=BA=A4=E7=BB=99planenr)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- changelogs/changelog.md | 4 + src/bw_learner/expression_selector.py | 4 +- src/bw_learner/jargon_explainer.py | 7 +- src/chat/heart_flow/heartFC_chat.py | 2 +- src/chat/planner_actions/planner.py | 29 +- src/chat/replyer/group_generator.py | 14 +- src/chat/replyer/private_generator.py | 18 +- src/chat/utils/statistic.py | 33 ++- src/config/official_configs.py | 26 +- .../chat_history_summarizer.py | 5 +- src/memory_system/memory_retrieval.py | 268 ++++++++++-------- src/memory_system/retrieval_tools/__init__.py | 2 + .../retrieval_tools/query_chat_history.py | 97 ++++++- .../retrieval_tools/query_words.py | 80 ++++++ template/bot_config_template.toml | 11 +- 15 files changed, 448 insertions(+), 152 deletions(-) create mode 100644 src/memory_system/retrieval_tools/query_words.py diff --git a/changelogs/changelog.md b/changelogs/changelog.md index f3bc52c4..030d45ec 100644 --- a/changelogs/changelog.md +++ b/changelogs/changelog.md @@ -1,5 +1,9 @@ # Changelog +移除 enable_jargon_detection +添加 global_memory_blacklist + + ## [0.12.0] - 2025-12-21 ### 🌟 重大更新 - 添加思考力度机制,动态控制回复时间和长度 diff --git a/src/bw_learner/expression_selector.py b/src/bw_learner/expression_selector.py index 4140d102..e0e3450b 100644 --- a/src/bw_learner/expression_selector.py +++ b/src/bw_learner/expression_selector.py @@ -407,8 +407,8 @@ class ExpressionSelector: # 4. 调用LLM content, (reasoning_content, model_name, _) = await self.llm_model.generate_response_async(prompt=prompt) - print(prompt) - print(content) + # print(prompt) + # print(content) if not content: logger.warning("LLM返回空结果") diff --git a/src/bw_learner/jargon_explainer.py b/src/bw_learner/jargon_explainer.py index 207a080a..31e33cdc 100644 --- a/src/bw_learner/jargon_explainer.py +++ b/src/bw_learner/jargon_explainer.py @@ -341,7 +341,7 @@ async def retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> st meaning = result.get("meaning", "").strip() if found_content and meaning: output_parts.append(f"找到 '{found_content}' 的含义为:{meaning}") - results.append(",".join(output_parts)) + results.append("\n".join(output_parts)) # 换行分隔每个jargon解释 logger.info(f"在jargon库中找到匹配(模糊搜索): {concept},找到{len(jargon_results)}条结果") else: # 精确匹配 @@ -350,7 +350,8 @@ async def retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> st meaning = result.get("meaning", "").strip() if meaning: output_parts.append(f"'{concept}' 为黑话或者网络简写,含义为:{meaning}") - results.append(";".join(output_parts) if len(output_parts) > 1 else output_parts[0]) + # 换行分隔每个jargon解释 + results.append("\n".join(output_parts) if len(output_parts) > 1 else output_parts[0]) exact_matches.append(concept) # 收集精确匹配的概念,稍后统一打印 else: # 未找到,不返回占位信息,只记录日志 @@ -361,5 +362,5 @@ async def retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> st logger.info(f"找到黑话: {', '.join(exact_matches)},共找到{len(exact_matches)}条结果") if results: - return "【概念检索结果】\n" + "\n".join(results) + "\n" + return "你了解以下词语可能的含义:\n" + "\n".join(results) + "\n" return "" diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index 13db1783..45e79308 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -687,7 +687,7 @@ class HeartFChatting: return { "action_type": "reply", "success": True, - "result": f"你回复内容{reply_text}", + "result": f"你使用reply动作,对' {action_planner_info.action_message.processed_plain_text} '这句话进行了回复,回复内容为: '{reply_text}'", "loop_info": loop_info, } diff --git a/src/chat/planner_actions/planner.py b/src/chat/planner_actions/planner.py index 121c7ebc..791038ca 100644 --- a/src/chat/planner_actions/planner.py +++ b/src/chat/planner_actions/planner.py @@ -53,7 +53,7 @@ reply 4.不要选择回复你自己发送的消息 5.不要单独对表情包进行回复 6.将上下文中所有含义不明的,疑似黑话的,缩写词均写入unknown_words中 -7.用一句简单的话来描述当前回复场景,不超过10个字 +7.如果你对上下文存在疑问,有需要查询的问题,写入question中 {reply_action_example} no_reply @@ -224,6 +224,25 @@ class ActionPlanner: else: reasoning = "未提供原因" action_data = {key: value for key, value in action_json.items() if key not in ["action"]} + + # 验证和清理 question + if "question" in action_data: + q = action_data.get("question") + if isinstance(q, str): + cleaned_q = q.strip() + if cleaned_q: + action_data["question"] = cleaned_q + else: + # 如果清理后为空字符串,移除该字段 + action_data.pop("question", None) + elif q is None: + # 如果为 None,移除该字段 + action_data.pop("question", None) + else: + # 如果不是字符串类型,记录警告并移除 + logger.warning(f"{self.log_prefix}question 格式不正确,应为字符串类型,已忽略") + action_data.pop("question", None) + # 非no_reply动作需要target_message_id target_message = None @@ -503,18 +522,20 @@ class ActionPlanner: name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。" # 根据 think_mode 配置决定 reply action 的示例 JSON - # 在 JSON 中直接作为 action 参数携带 unknown_words + # 在 JSON 中直接作为 action 参数携带 unknown_words 和 question if global_config.chat.think_mode == "classic": reply_action_example = ( '{{"action":"reply", "target_message_id":"消息id(m+数字)", ' - '"unknown_words":["词语1","词语2"]}}' + '"unknown_words":["词语1","词语2"], ' + '"question":"需要查询的问题"}' ) else: reply_action_example = ( "5.think_level表示思考深度,0表示该回复不需要思考和回忆,1表示该回复需要进行回忆和思考\n" + '{{"action":"reply", "think_level":数值等级(0或1), ' '"target_message_id":"消息id(m+数字)", ' - '"unknown_words":["词语1","词语2"]}}' + '"unknown_words":["词语1","词语2"], ' + '"question":"需要查询的问题"}' ) planner_prompt_template = await global_prompt_manager.get_prompt_async("planner_prompt") diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index bdd56c6b..5c8003f0 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -947,6 +947,18 @@ class DefaultReplyer: chat_id, message_list_before_short, chat_talking_prompt_short, unknown_words ) + # 从 chosen_actions 中提取 question(仅在 reply 动作中) + question = None + if chosen_actions: + for action_info in chosen_actions: + if action_info.action_type == "reply" and isinstance(action_info.action_data, dict): + q = action_info.action_data.get("question") + if isinstance(q, str): + cleaned_q = q.strip() + if cleaned_q: + question = cleaned_q + break + # 并行执行构建任务(包括黑话解释,可配置关闭) task_results = await asyncio.gather( self._time_and_run_task( @@ -961,7 +973,7 @@ class DefaultReplyer: self._time_and_run_task(self.build_personality_prompt(), "personality_prompt"), self._time_and_run_task( build_memory_retrieval_prompt( - chat_talking_prompt_short, sender, target, self.chat_stream, think_level=think_level + chat_talking_prompt_short, sender, target, self.chat_stream, think_level=think_level, unknown_words=unknown_words, question=question ), "memory_retrieval", ), diff --git a/src/chat/replyer/private_generator.py b/src/chat/replyer/private_generator.py index 39d3b370..e7b4f4da 100644 --- a/src/chat/replyer/private_generator.py +++ b/src/chat/replyer/private_generator.py @@ -110,6 +110,7 @@ class PrivateReplyer: enable_tool=enable_tool, reply_message=reply_message, reply_reason=reply_reason, + unknown_words=unknown_words, ) llm_response.prompt = prompt llm_response.selected_expressions = selected_expressions @@ -611,6 +612,7 @@ class PrivateReplyer: available_actions: Optional[Dict[str, ActionInfo]] = None, chosen_actions: Optional[List[ActionPlannerInfo]] = None, enable_tool: bool = True, + unknown_words: Optional[List[str]] = None, ) -> Tuple[str, List[int]]: """ 构建回复器上下文 @@ -709,12 +711,24 @@ class PrivateReplyer: else: jargon_coroutine = self._build_disabled_jargon_explanation() + # 从 chosen_actions 中提取 question(仅在 reply 动作中) + question = None + if chosen_actions: + for action_info in chosen_actions: + if action_info.action_type == "reply" and isinstance(action_info.action_data, dict): + q = action_info.action_data.get("question") + if isinstance(q, str): + cleaned_q = q.strip() + if cleaned_q: + question = cleaned_q + break + # 并行执行九个构建任务(包括黑话解释,可配置关闭) task_results = await asyncio.gather( self._time_and_run_task( self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits" ), - self._time_and_run_task(self.build_relation_info(chat_talking_prompt_short, sender), "relation_info"), + # self._time_and_run_task(self.build_relation_info(chat_talking_prompt_short, sender), "relation_info"), self._time_and_run_task( self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info" ), @@ -723,7 +737,7 @@ class PrivateReplyer: self._time_and_run_task(self.build_personality_prompt(), "personality_prompt"), self._time_and_run_task( build_memory_retrieval_prompt( - chat_talking_prompt_short, sender, target, self.chat_stream, self.tool_executor + chat_talking_prompt_short, sender, target, self.chat_stream, think_level=1, unknown_words=unknown_words, question=question ), "memory_retrieval", ), diff --git a/src/chat/utils/statistic.py b/src/chat/utils/statistic.py index 20c0843b..b23e4503 100644 --- a/src/chat/utils/statistic.py +++ b/src/chat/utils/statistic.py @@ -743,13 +743,13 @@ class StatisticOutputTask(AsyncTask): """ if stats[TOTAL_REQ_CNT] <= 0: return "" - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12}" + data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12} {:>12}" total_replies = stats.get(TOTAL_REPLY_CNT, 0) output = [ "按模型分类统计:", - " 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数", + " 模型名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数 每次调用平均Token", ] for model_name, count in sorted(stats[REQ_CNT_BY_MODEL].items()): name = f"{model_name[:29]}..." if len(model_name) > 32 else model_name @@ -764,6 +764,9 @@ class StatisticOutputTask(AsyncTask): avg_count_per_reply = count / total_replies if total_replies > 0 else 0.0 avg_tokens_per_reply = tokens / total_replies if total_replies > 0 else 0.0 + # 计算每次调用平均token + avg_tokens_per_call = tokens / count if count > 0 else 0.0 + # 格式化大数字 formatted_count = _format_large_number(count) formatted_in_tokens = _format_large_number(in_tokens) @@ -771,6 +774,7 @@ class StatisticOutputTask(AsyncTask): formatted_tokens = _format_large_number(tokens) formatted_avg_count = _format_large_number(avg_count_per_reply) if total_replies > 0 else "N/A" formatted_avg_tokens = _format_large_number(avg_tokens_per_reply) if total_replies > 0 else "N/A" + formatted_avg_tokens_per_call = _format_large_number(avg_tokens_per_call) if count > 0 else "N/A" output.append( data_fmt.format( @@ -784,6 +788,7 @@ class StatisticOutputTask(AsyncTask): std_time_cost, formatted_avg_count, formatted_avg_tokens, + formatted_avg_tokens_per_call, ) ) @@ -797,13 +802,13 @@ class StatisticOutputTask(AsyncTask): """ if stats[TOTAL_REQ_CNT] <= 0: return "" - data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12}" + data_fmt = "{:<32} {:>10} {:>12} {:>12} {:>12} {:>9.2f}¥ {:>10.1f} {:>10.1f} {:>12} {:>12} {:>12}" total_replies = stats.get(TOTAL_REPLY_CNT, 0) output = [ "按模块分类统计:", - " 模块名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数", + " 模块名称 调用次数 输入Token 输出Token Token总量 累计花费 平均耗时(秒) 标准差(秒) 每次回复平均调用次数 每次回复平均Token数 每次调用平均Token", ] for module_name, count in sorted(stats[REQ_CNT_BY_MODULE].items()): name = f"{module_name[:29]}..." if len(module_name) > 32 else module_name @@ -818,6 +823,9 @@ class StatisticOutputTask(AsyncTask): avg_count_per_reply = count / total_replies if total_replies > 0 else 0.0 avg_tokens_per_reply = tokens / total_replies if total_replies > 0 else 0.0 + # 计算每次调用平均token + avg_tokens_per_call = tokens / count if count > 0 else 0.0 + # 格式化大数字 formatted_count = _format_large_number(count) formatted_in_tokens = _format_large_number(in_tokens) @@ -825,6 +833,7 @@ class StatisticOutputTask(AsyncTask): formatted_tokens = _format_large_number(tokens) formatted_avg_count = _format_large_number(avg_count_per_reply) if total_replies > 0 else "N/A" formatted_avg_tokens = _format_large_number(avg_tokens_per_reply) if total_replies > 0 else "N/A" + formatted_avg_tokens_per_call = _format_large_number(avg_tokens_per_call) if count > 0 else "N/A" output.append( data_fmt.format( @@ -838,6 +847,7 @@ class StatisticOutputTask(AsyncTask): std_time_cost, formatted_avg_count, formatted_avg_tokens, + formatted_avg_tokens_per_call, ) ) @@ -935,11 +945,12 @@ class StatisticOutputTask(AsyncTask): f"{stat_data[STD_TIME_COST_BY_MODEL][model_name]:.1f} 秒" f"{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}" f"{_format_large_number(stat_data[TOTAL_TOK_BY_MODEL][model_name] / total_replies, html=True) if total_replies > 0 else 'N/A'}" + f"{_format_large_number(stat_data[TOTAL_TOK_BY_MODEL][model_name] / count, html=True) if count > 0 else 'N/A'}" f"" for model_name, count in sorted(stat_data[REQ_CNT_BY_MODEL].items()) ] if stat_data[REQ_CNT_BY_MODEL] - else ["暂无数据"] + else ["暂无数据"] ) # 按请求类型分类统计 type_rows = "\n".join( @@ -955,11 +966,12 @@ class StatisticOutputTask(AsyncTask): f"{stat_data[STD_TIME_COST_BY_TYPE][req_type]:.1f} 秒" f"{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}" f"{_format_large_number(stat_data[TOTAL_TOK_BY_TYPE][req_type] / total_replies, html=True) if total_replies > 0 else 'N/A'}" + f"{_format_large_number(stat_data[TOTAL_TOK_BY_TYPE][req_type] / count, html=True) if count > 0 else 'N/A'}" f"" for req_type, count in sorted(stat_data[REQ_CNT_BY_TYPE].items()) ] if stat_data[REQ_CNT_BY_TYPE] - else ["暂无数据"] + else ["暂无数据"] ) # 按模块分类统计 module_rows = "\n".join( @@ -975,11 +987,12 @@ class StatisticOutputTask(AsyncTask): f"{stat_data[STD_TIME_COST_BY_MODULE][module_name]:.1f} 秒" f"{_format_large_number(count / total_replies, html=True) if total_replies > 0 else 'N/A'}" f"{_format_large_number(stat_data[TOTAL_TOK_BY_MODULE][module_name] / total_replies, html=True) if total_replies > 0 else 'N/A'}" + f"{_format_large_number(stat_data[TOTAL_TOK_BY_MODULE][module_name] / count, html=True) if count > 0 else 'N/A'}" f"" for module_name, count in sorted(stat_data[REQ_CNT_BY_MODULE].items()) ] if stat_data[REQ_CNT_BY_MODULE] - else ["暂无数据"] + else ["暂无数据"] ) # 聊天消息统计 @@ -1054,7 +1067,7 @@ class StatisticOutputTask(AsyncTask):

按模型分类统计

- + {model_rows} @@ -1065,7 +1078,7 @@ class StatisticOutputTask(AsyncTask):
模型名称调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数
模型名称调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数每次调用平均Token
- + {module_rows} @@ -1077,7 +1090,7 @@ class StatisticOutputTask(AsyncTask):
模块名称调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数
模块名称调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数每次调用平均Token
- + {type_rows} diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 38665e6d..99d47dd5 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -260,12 +260,32 @@ class MemoryConfig(ConfigBase): agent_timeout_seconds: float = 120.0 """Agent超时时间(秒)""" - enable_jargon_detection: bool = True - """记忆检索过程中是否启用黑话识别""" - global_memory: bool = False """是否允许记忆检索在聊天记录中进行全局查询(忽略当前chat_id,仅对 search_chat_history 等工具生效)""" + global_memory_blacklist: list[str] = field(default_factory=lambda: []) + """ + 全局记忆黑名单,当启用全局记忆时,不将特定聊天流纳入检索 + 格式: ["platform:id:type", ...] + + 示例: + [ + "qq:1919810:private", # 排除特定私聊 + "qq:114514:group", # 排除特定群聊 + ] + + 说明: + - 当启用全局记忆时,黑名单中的聊天流不会被检索 + - 当在黑名单中的聊天流进行查询时,仅使用该聊天流的本地记忆 + """ + + planner_question: bool = True + """ + 是否使用 Planner 提供的 question 作为记忆检索问题 + - True: 当 Planner 在 reply 动作中提供了 question 时,直接使用该问题进行记忆检索,跳过 LLM 生成问题的步骤 + - False: 沿用旧模式,使用 LLM 生成问题 + """ + def __post_init__(self): """验证配置值""" if self.max_agent_iterations < 1: diff --git a/src/hippo_memorizer/chat_history_summarizer.py b/src/hippo_memorizer/chat_history_summarizer.py index c8d4a943..fddd2100 100644 --- a/src/hippo_memorizer/chat_history_summarizer.py +++ b/src/hippo_memorizer/chat_history_summarizer.py @@ -912,8 +912,11 @@ class ChatHistorySummarizer: result = _parse_with_quote_fix(extracted_json) keywords = result.get("keywords", []) - summary = result.get("summary", "无概括") + summary = result.get("summary", "") key_point = result.get("key_point", []) + + if not (keywords and summary) and key_point: + logger.warning(f"{self.log_prefix} LLM返回的JSON中缺少字段,原文\n{response}") # 确保keywords和key_point是列表 if isinstance(keywords, str): diff --git a/src/memory_system/memory_retrieval.py b/src/memory_system/memory_retrieval.py index 9aafb9ef..b4f9b21e 100644 --- a/src/memory_system/memory_retrieval.py +++ b/src/memory_system/memory_retrieval.py @@ -2,7 +2,7 @@ import time import json import asyncio import re -from typing import List, Dict, Any, Optional, Tuple, Set +from typing import List, Dict, Any, Optional, Tuple from src.common.logger import get_logger from src.config.config import global_config, model_config from src.chat.utils.prompt_builder import Prompt, global_prompt_manager @@ -11,7 +11,8 @@ from src.common.database.database_model import ThinkingBack from src.memory_system.retrieval_tools import get_tool_registry, init_all_tools from src.memory_system.memory_utils import parse_questions_json from src.llm_models.payload_content.message import MessageBuilder, RoleType, Message -from src.bw_learner.jargon_explainer import match_jargon_from_text, retrieve_concepts_with_jargon +from src.chat.message_receive.chat_stream import get_chat_manager +from src.bw_learner.jargon_explainer import retrieve_concepts_with_jargon logger = get_logger("memory_retrieval") @@ -100,6 +101,7 @@ def init_memory_retrieval_prompt(): **工具说明:** - 如果涉及过往事件,或者查询某个过去可能提到过的概念,或者某段时间发生的事件。可以使用聊天记录查询工具查询过往事件 - 如果涉及人物,可以使用人物信息查询工具查询人物信息 +- 如果遇到不熟悉的词语、缩写、黑话或网络用语,可以使用query_words工具查询其含义 - 如果没有可靠信息,且查询时间充足,或者不确定查询类别,也可以使用lpmm知识库查询,作为辅助信息 **思考** @@ -202,7 +204,6 @@ async def _react_agent_solve_question( max_iterations: int = 5, timeout: float = 30.0, initial_info: str = "", - initial_jargon_concepts: Optional[List[str]] = None, ) -> Tuple[bool, str, List[Dict[str, Any]], bool]: """使用ReAct架构的Agent来解决问题 @@ -211,28 +212,29 @@ async def _react_agent_solve_question( chat_id: 聊天ID max_iterations: 最大迭代次数 timeout: 超时时间(秒) - initial_info: 初始信息(如概念检索结果),将作为collected_info的初始值 - initial_jargon_concepts: 预先已解析过的黑话列表,避免重复解释 + initial_info: 初始信息,将作为collected_info的初始值 Returns: Tuple[bool, str, List[Dict[str, Any]], bool]: (是否找到答案, 答案内容, 思考步骤列表, 是否超时) """ start_time = time.time() collected_info = initial_info if initial_info else "" - enable_jargon_detection = global_config.memory.enable_jargon_detection - seen_jargon_concepts: Set[str] = set() - if enable_jargon_detection and initial_jargon_concepts: - for concept in initial_jargon_concepts: - concept = (concept or "").strip() - if concept: - seen_jargon_concepts.add(concept) + # 构造日志前缀:[聊天流名称],用于在日志中标识聊天流 + try: + chat_name = get_chat_manager().get_stream_name(chat_id) or chat_id + except Exception: + chat_name = chat_id + react_log_prefix = f"[{chat_name}] " thinking_steps = [] is_timeout = False conversation_messages: List[Message] = [] first_head_prompt: Optional[str] = None # 保存第一次使用的head_prompt(用于日志显示) + last_tool_name: Optional[str] = None # 记录最后一次使用的工具名称 - # 正常迭代:max_iterations 次(最终评估单独处理,不算在迭代中) - for iteration in range(max_iterations): + # 使用 while 循环,支持额外迭代 + iteration = 0 + max_iterations_with_extra = max_iterations + while iteration < max_iterations_with_extra: # 检查超时 if time.time() - start_time > timeout: logger.warning(f"ReAct Agent超时,已迭代{iteration}次") @@ -475,7 +477,7 @@ async def _react_agent_solve_question( step["observations"] = ["检测到finish_search文本格式调用,找到答案"] thinking_steps.append(step) logger.info( - f"ReAct Agent 第 {iteration + 1} 次迭代 通过finish_search文本格式找到关于问题{question}的答案: {parsed_answer}" + f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search文本格式找到关于问题{question}的答案: {parsed_answer}" ) _log_conversation_messages( @@ -488,7 +490,7 @@ async def _react_agent_solve_question( else: # found_answer为True但没有提供answer,视为错误,继续迭代 logger.warning( - f"ReAct Agent 第 {iteration + 1} 次迭代 finish_search文本格式found_answer为True但未提供answer" + f"{react_log_prefix}第 {iteration + 1} 次迭代 finish_search文本格式found_answer为True但未提供answer" ) else: # 未找到答案,直接退出查询 @@ -497,7 +499,9 @@ async def _react_agent_solve_question( ) step["observations"] = ["检测到finish_search文本格式调用,未找到答案"] thinking_steps.append(step) - logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 通过finish_search文本格式判断未找到答案") + logger.info( + f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search文本格式判断未找到答案" + ) _log_conversation_messages( conversation_messages, @@ -509,10 +513,12 @@ async def _react_agent_solve_question( # 如果没有检测到finish_search格式,记录思考过程,继续下一轮迭代 step["observations"] = [f"思考完成,但未调用工具。响应: {response}"] - logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 思考完成但未调用工具: {response}") + logger.info( + f"{react_log_prefix}第 {iteration + 1} 次迭代 思考完成但未调用工具: {response}" + ) collected_info += f"思考: {response}" else: - logger.warning(f"ReAct Agent 第 {iteration + 1} 次迭代 无工具调用且无响应") + logger.warning(f"{react_log_prefix}第 {iteration + 1} 次迭代 无工具调用且无响应") step["observations"] = ["无响应且无工具调用"] thinking_steps.append(step) continue @@ -541,7 +547,7 @@ async def _react_agent_solve_question( step["observations"] = ["检测到finish_search工具调用,找到答案"] thinking_steps.append(step) logger.info( - f"ReAct Agent 第 {iteration + 1} 次迭代 通过finish_search工具找到关于问题{question}的答案: {finish_search_answer}" + f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search工具找到关于问题{question}的答案: {finish_search_answer}" ) _log_conversation_messages( @@ -554,14 +560,16 @@ async def _react_agent_solve_question( else: # found_answer为True但没有提供answer,视为错误 logger.warning( - f"ReAct Agent 第 {iteration + 1} 次迭代 finish_search工具found_answer为True但未提供answer" + f"{react_log_prefix}第 {iteration + 1} 次迭代 finish_search工具found_answer为True但未提供answer" ) else: # 未找到答案,直接退出查询 step["actions"].append({"action_type": "finish_search", "action_params": {"found_answer": False}}) step["observations"] = ["检测到finish_search工具调用,未找到答案"] thinking_steps.append(step) - logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 通过finish_search工具判断未找到答案") + logger.info( + f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search工具判断未找到答案" + ) _log_conversation_messages( conversation_messages, @@ -578,13 +586,16 @@ async def _react_agent_solve_question( tool_args = tool_call.args or {} logger.debug( - f"ReAct Agent 第 {iteration + 1} 次迭代 工具调用 {i + 1}/{len(tool_calls)}: {tool_name}({tool_args})" + f"{react_log_prefix}第 {iteration + 1} 次迭代 工具调用 {i + 1}/{len(tool_calls)}: {tool_name}({tool_args})" ) # 跳过finish_search工具调用(已经在上面处理过了) if tool_name == "finish_search": continue + # 记录最后一次使用的工具名称(用于判断是否需要额外迭代) + last_tool_name = tool_name + # 普通工具调用 tool = tool_registry.get_tool(tool_name) if tool: @@ -604,14 +615,18 @@ async def _react_agent_solve_question( return f"查询{tool_name_str}({param_str})的结果:{observation}" except Exception as e: error_msg = f"工具执行失败: {str(e)}" - logger.error(f"ReAct Agent 第 {iter_num + 1} 次迭代 工具 {tool_name_str} {error_msg}") + logger.error( + f"{react_log_prefix}第 {iter_num + 1} 次迭代 工具 {tool_name_str} {error_msg}" + ) return f"查询{tool_name_str}失败: {error_msg}" tool_tasks.append(execute_single_tool(tool, tool_params, tool_name, iteration)) step["actions"].append({"action_type": tool_name, "action_params": tool_args}) else: error_msg = f"未知的工具类型: {tool_name}" - logger.warning(f"ReAct Agent 第 {iteration + 1} 次迭代 工具 {i + 1}/{len(tool_calls)} {error_msg}") + logger.warning( + f"{react_log_prefix}第 {iteration + 1} 次迭代 工具 {i + 1}/{len(tool_calls)} {error_msg}" + ) tool_tasks.append(asyncio.create_task(asyncio.sleep(0, result=f"查询{tool_name}失败: {error_msg}"))) # 并行执行所有工具 @@ -622,31 +637,16 @@ async def _react_agent_solve_question( for i, (tool_call_item, observation) in enumerate(zip(tool_calls, observations, strict=False)): if isinstance(observation, Exception): observation = f"工具执行异常: {str(observation)}" - logger.error(f"ReAct Agent 第 {iteration + 1} 次迭代 工具 {i + 1} 执行异常: {observation}") + logger.error( + f"{react_log_prefix}第 {iteration + 1} 次迭代 工具 {i + 1} 执行异常: {observation}" + ) observation_text = observation if isinstance(observation, str) else str(observation) stripped_observation = observation_text.strip() step["observations"].append(observation_text) collected_info += f"\n{observation_text}\n" if stripped_observation: - # 检查工具输出中是否有新的jargon,如果有则追加到工具结果中 - if enable_jargon_detection: - jargon_concepts = match_jargon_from_text(stripped_observation, chat_id) - if jargon_concepts: - new_concepts = [] - for concept in jargon_concepts: - normalized_concept = concept.strip() - if normalized_concept and normalized_concept not in seen_jargon_concepts: - new_concepts.append(normalized_concept) - seen_jargon_concepts.add(normalized_concept) - if new_concepts: - jargon_info = await retrieve_concepts_with_jargon(new_concepts, chat_id) - if jargon_info: - # 将jargon查询结果追加到工具结果中 - observation_text += f"\n\n{jargon_info}" - collected_info += f"\n{jargon_info}\n" - logger.info(f"工具输出触发黑话解析: {new_concepts}") - + # 不再自动检测工具输出中的jargon,改为通过 query_words 工具主动查询 tool_builder = MessageBuilder() tool_builder.set_role(RoleType.Tool) tool_builder.add_text_content(observation_text) @@ -655,15 +655,24 @@ async def _react_agent_solve_question( thinking_steps.append(step) + # 检查是否需要额外迭代:如果最后一次使用的工具是 search_chat_history 且达到最大迭代次数,额外增加一回合 + if iteration + 1 >= max_iterations and last_tool_name == "search_chat_history" and not is_timeout: + max_iterations_with_extra = max_iterations + 1 + logger.info( + f"{react_log_prefix}达到最大迭代次数(已迭代{iteration + 1}次),最后一次使用工具为 search_chat_history,额外增加一回合尝试" + ) + + iteration += 1 + # 正常迭代结束后,如果达到最大迭代次数或超时,执行最终评估 # 最终评估单独处理,不算在迭代中 should_do_final_evaluation = False if is_timeout: should_do_final_evaluation = True - logger.warning(f"ReAct Agent超时,已迭代{iteration + 1}次,进入最终评估") - elif iteration + 1 >= max_iterations: + logger.warning(f"{react_log_prefix}超时,已迭代{iteration}次,进入最终评估") + elif iteration >= max_iterations: should_do_final_evaluation = True - logger.info(f"ReAct Agent达到最大迭代次数(已迭代{iteration + 1}次),进入最终评估") + logger.info(f"{react_log_prefix}达到最大迭代次数(已迭代{iteration}次),进入最终评估") if should_do_final_evaluation: # 获取必要变量用于最终评估 @@ -766,8 +775,8 @@ async def _react_agent_solve_question( return False, "最终评估阶段LLM调用失败", thinking_steps, is_timeout if global_config.debug.show_memory_prompt: - logger.info(f"ReAct Agent 最终评估Prompt: {evaluation_prompt}") - logger.info(f"ReAct Agent 最终评估响应: {eval_response}") + logger.info(f"{react_log_prefix}最终评估Prompt: {evaluation_prompt}") + logger.info(f"{react_log_prefix}最终评估响应: {eval_response}") # 从最终评估响应中提取found_answer或not_enough_info found_answer_content = None @@ -998,7 +1007,6 @@ async def _process_single_question( chat_id: str, context: str, initial_info: str = "", - initial_jargon_concepts: Optional[List[str]] = None, max_iterations: Optional[int] = None, ) -> Optional[str]: """处理单个问题的查询 @@ -1007,8 +1015,8 @@ async def _process_single_question( question: 要查询的问题 chat_id: 聊天ID context: 上下文信息 - initial_info: 初始信息(如概念检索结果),将传递给ReAct Agent - initial_jargon_concepts: 已经处理过的黑话概念列表,用于ReAct阶段的去重 + initial_info: 初始信息,将传递给ReAct Agent + max_iterations: 最大迭代次数 Returns: Optional[str]: 如果找到答案,返回格式化的结果字符串,否则返回None @@ -1022,8 +1030,6 @@ async def _process_single_question( # 直接使用ReAct Agent查询(不再从thinking_back获取缓存) # logger.info(f"使用ReAct Agent查询,问题: {question[:50]}...") - jargon_concepts_for_agent = initial_jargon_concepts if global_config.memory.enable_jargon_detection else None - # 如果未指定max_iterations,使用配置的默认值 if max_iterations is None: max_iterations = global_config.memory.max_agent_iterations @@ -1034,7 +1040,6 @@ async def _process_single_question( max_iterations=max_iterations, timeout=global_config.memory.agent_timeout_seconds, initial_info=question_initial_info, - initial_jargon_concepts=jargon_concepts_for_agent, ) # 存储查询历史到数据库(超时时不存储) @@ -1062,6 +1067,8 @@ async def build_memory_retrieval_prompt( target: str, chat_stream, think_level: int = 1, + unknown_words: Optional[List[str]] = None, + question: Optional[str] = None, ) -> str: """构建记忆检索提示 使用两段式查询:第一步生成问题,第二步使用ReAct Agent查询答案 @@ -1071,14 +1078,33 @@ async def build_memory_retrieval_prompt( sender: 发送者名称 target: 目标消息内容 chat_stream: 聊天流对象 - tool_executor: 工具执行器(保留参数以兼容接口) + think_level: 思考深度等级 + unknown_words: Planner 提供的未知词语列表,优先使用此列表而不是从聊天记录匹配 + question: Planner 提供的问题,当 planner_question 配置开启时,直接使用此问题进行检索 Returns: str: 记忆检索结果字符串 """ start_time = time.time() - logger.info(f"检测是否需要回忆,元消息:{message[:30]}...,消息长度: {len(message)}") + # 构造日志前缀:[聊天流名称],用于在日志中标识聊天流(优先群名称/用户昵称) + try: + group_info = chat_stream.group_info + user_info = chat_stream.user_info + # 群聊优先使用群名称 + if group_info is not None and getattr(group_info, "group_name", None): + stream_name = group_info.group_name.strip() or str(group_info.group_id) + # 私聊使用用户昵称 + elif user_info is not None and getattr(user_info, "user_nickname", None): + stream_name = user_info.user_nickname.strip() or str(user_info.user_id) + # 兜底使用 stream_id + else: + stream_name = chat_stream.stream_id + except Exception: + stream_name = chat_stream.stream_id + log_prefix = f"[{stream_name}] " if stream_name else "" + + logger.info(f"{log_prefix}检测是否需要回忆,元消息:{message[:30]}...,消息长度: {len(message)}") try: time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) bot_name = global_config.bot.nickname @@ -1089,63 +1115,78 @@ async def build_memory_retrieval_prompt( if not recent_query_history: recent_query_history = "最近没有查询记录。" - # 第一步:生成问题 - question_prompt = await global_prompt_manager.format_prompt( - "memory_retrieval_question_prompt", - bot_name=bot_name, - time_now=time_now, - chat_history=message, - recent_query_history=recent_query_history, - sender=sender, - target_message=target, - ) - - success, response, reasoning_content, model_name = await llm_api.generate_with_model( - question_prompt, - model_config=model_config.model_task_config.tool_use, - request_type="memory.question", - ) - - if global_config.debug.show_memory_prompt: - logger.info(f"记忆检索问题生成提示词: {question_prompt}") - # logger.info(f"记忆检索问题生成响应: {response}") - - if not success: - logger.error(f"LLM生成问题失败: {response}") - return "" - - # 解析概念列表和问题列表 - _, questions = parse_questions_json(response) - if questions: - logger.info(f"解析到 {len(questions)} 个问题: {questions}") - - enable_jargon_detection = global_config.memory.enable_jargon_detection - concepts: List[str] = [] - - if enable_jargon_detection: - # 使用匹配逻辑自动识别聊天中的黑话概念 - concepts = match_jargon_from_text(message, chat_id) - if concepts: - logger.info(f"黑话匹配命中 {len(concepts)} 个概念: {concepts}") + # 第一步:生成问题或使用 Planner 提供的问题 + questions = [] + + # 如果 planner_question 配置开启,只使用 Planner 提供的问题,不使用旧模式 + if global_config.memory.planner_question: + if question and isinstance(question, str) and question.strip(): + # 清理和验证 question + cleaned_question = question.strip() + questions = [cleaned_question] + logger.info(f"{log_prefix}使用 Planner 提供的 question: {cleaned_question}") else: - logger.debug("黑话匹配未命中任何概念") + # planner_question 开启但没有提供 question,跳过记忆检索 + logger.debug(f"{log_prefix}planner_question 已开启但未提供 question,跳过记忆检索") + end_time = time.time() + logger.info(f"{log_prefix}无当次查询,不返回任何结果,耗时: {(end_time - start_time):.3f}秒") + return "" else: - logger.debug("已禁用记忆检索中的黑话识别") + # planner_question 关闭,使用旧模式:LLM 生成问题 + question_prompt = await global_prompt_manager.format_prompt( + "memory_retrieval_question_prompt", + bot_name=bot_name, + time_now=time_now, + chat_history=message, + recent_query_history=recent_query_history, + sender=sender, + target_message=target, + ) - # 对匹配到的概念进行jargon检索,作为初始信息 + success, response, reasoning_content, model_name = await llm_api.generate_with_model( + question_prompt, + model_config=model_config.model_task_config.tool_use, + request_type="memory.question", + ) + + if global_config.debug.show_memory_prompt: + logger.info(f"{log_prefix}记忆检索问题生成提示词: {question_prompt}") + # logger.info(f"记忆检索问题生成响应: {response}") + + if not success: + logger.error(f"{log_prefix}LLM生成问题失败: {response}") + return "" + + # 解析概念列表和问题列表 + _, questions = parse_questions_json(response) + if questions: + logger.info(f"{log_prefix}解析到 {len(questions)} 个问题: {questions}") + + # 初始阶段:使用 Planner 提供的 unknown_words 进行检索(如果提供) initial_info = "" - if enable_jargon_detection and concepts: - concept_info = await retrieve_concepts_with_jargon(concepts, chat_id) - if concept_info: - initial_info += concept_info - logger.debug(f"概念检索完成,结果: {concept_info}") - else: - logger.debug("概念检索未找到任何结果") + if unknown_words and len(unknown_words) > 0: + # 清理和去重 unknown_words + cleaned_concepts = [] + for word in unknown_words: + if isinstance(word, str): + cleaned = word.strip() + if cleaned: + cleaned_concepts.append(cleaned) + if cleaned_concepts: + # 对匹配到的概念进行jargon检索,作为初始信息 + concept_info = await retrieve_concepts_with_jargon(cleaned_concepts, chat_id) + if concept_info: + initial_info += concept_info + logger.info( + f"{log_prefix}使用 Planner 提供的 unknown_words,共 {len(cleaned_concepts)} 个概念,检索结果: {concept_info[:100]}..." + ) + else: + logger.debug(f"{log_prefix}unknown_words 检索未找到任何结果") if not questions: - logger.debug("模型认为不需要检索记忆或解析失败,不返回任何查询结果") + logger.debug(f"{log_prefix}模型认为不需要检索记忆或解析失败,不返回任何查询结果") end_time = time.time() - logger.info(f"无当次查询,不返回任何结果,耗时: {(end_time - start_time):.3f}秒") + logger.info(f"{log_prefix}无当次查询,不返回任何结果,耗时: {(end_time - start_time):.3f}秒") return "" # 第二步:并行处理所有问题(使用配置的最大迭代次数和超时时间) @@ -1157,17 +1198,16 @@ async def build_memory_retrieval_prompt( max_iterations = base_max_iterations timeout_seconds = global_config.memory.agent_timeout_seconds logger.debug( - f"问题数量: {len(questions)},think_level={think_level},设置最大迭代次数: {max_iterations}(基础值: {base_max_iterations}),超时时间: {timeout_seconds}秒" + f"{log_prefix}问题数量: {len(questions)},think_level={think_level},设置最大迭代次数: {max_iterations}(基础值: {base_max_iterations}),超时时间: {timeout_seconds}秒" ) - # 并行处理所有问题,将概念检索结果作为初始信息传递 + # 并行处理所有问题 question_tasks = [ _process_single_question( question=question, chat_id=chat_id, context=message, initial_info=initial_info, - initial_jargon_concepts=concepts if enable_jargon_detection else None, max_iterations=max_iterations, ) for question in questions @@ -1180,7 +1220,7 @@ async def build_memory_retrieval_prompt( question_results: List[str] = [] for i, result in enumerate(results): if isinstance(result, Exception): - logger.error(f"处理问题 '{questions[i]}' 时发生异常: {result}") + logger.error(f"{log_prefix}处理问题 '{questions[i]}' 时发生异常: {result}") elif result is not None: question_results.append(result) @@ -1216,14 +1256,14 @@ async def build_memory_retrieval_prompt( current_count = len(question_results) cached_count = len(all_results) - current_count logger.info( - f"记忆检索成功,耗时: {(end_time - start_time):.3f}秒," + f"{log_prefix}记忆检索成功,耗时: {(end_time - start_time):.3f}秒," f"当前查询 {current_count} 条记忆,缓存 {cached_count} 条记忆,共 {len(all_results)} 条记忆" ) return f"你回忆起了以下信息:\n{retrieved_memory}\n如果与回复内容相关,可以参考这些回忆的信息。\n" else: - logger.debug("所有问题均未找到答案,且无缓存答案") + logger.debug(f"{log_prefix}所有问题均未找到答案,且无缓存答案") return "" except Exception as e: - logger.error(f"记忆检索时发生异常: {str(e)}") + logger.error(f"{log_prefix}记忆检索时发生异常: {str(e)}") return "" diff --git a/src/memory_system/retrieval_tools/__init__.py b/src/memory_system/retrieval_tools/__init__.py index f30fd779..e058deb9 100644 --- a/src/memory_system/retrieval_tools/__init__.py +++ b/src/memory_system/retrieval_tools/__init__.py @@ -14,6 +14,7 @@ from .tool_registry import ( from .query_chat_history import register_tool as register_query_chat_history from .query_lpmm_knowledge import register_tool as register_lpmm_knowledge from .query_person_info import register_tool as register_query_person_info +from .query_words import register_tool as register_query_words from .found_answer import register_tool as register_finish_search from src.config.config import global_config @@ -22,6 +23,7 @@ def init_all_tools(): """初始化并注册所有记忆检索工具""" register_query_chat_history() register_query_person_info() + register_query_words() # 注册query_words工具 register_finish_search() # 注册finish_search工具 if global_config.lpmm_knowledge.lpmm_mode == "agent": diff --git a/src/memory_system/retrieval_tools/query_chat_history.py b/src/memory_system/retrieval_tools/query_chat_history.py index 351d0606..1abe44ef 100644 --- a/src/memory_system/retrieval_tools/query_chat_history.py +++ b/src/memory_system/retrieval_tools/query_chat_history.py @@ -4,7 +4,7 @@ """ import json -from typing import Optional +from typing import Optional, Set from datetime import datetime from src.common.logger import get_logger @@ -16,6 +16,72 @@ from .tool_registry import register_memory_retrieval_tool logger = get_logger("memory_retrieval_tools") +def _parse_blacklist_to_chat_ids(blacklist: list[str]) -> Set[str]: + """将黑名单配置(platform:id:type格式)转换为chat_id集合 + + Args: + blacklist: 黑名单配置列表,格式为 ["platform:id:type", ...] + + Returns: + Set[str]: chat_id集合 + """ + chat_ids = set() + if not blacklist: + return chat_ids + + try: + from src.chat.message_receive.chat_stream import get_chat_manager + + chat_manager = get_chat_manager() + for blacklist_item in blacklist: + if not isinstance(blacklist_item, str): + continue + + try: + parts = blacklist_item.split(":") + if len(parts) != 3: + logger.warning(f"黑名单配置格式错误,应为 platform:id:type,实际: {blacklist_item}") + continue + + platform = parts[0] + id_str = parts[1] + stream_type = parts[2] + + # 判断是否为群聊 + is_group = stream_type == "group" + + # 转换为chat_id + chat_id = chat_manager.get_stream_id(platform, str(id_str), is_group=is_group) + if chat_id: + chat_ids.add(chat_id) + else: + logger.warning(f"无法将黑名单配置转换为chat_id: {blacklist_item}") + except Exception as e: + logger.warning(f"解析黑名单配置失败: {blacklist_item}, 错误: {e}") + + except Exception as e: + logger.error(f"初始化黑名单chat_id集合失败: {e}") + + return chat_ids + + +def _is_chat_id_in_blacklist(chat_id: str) -> bool: + """检查chat_id是否在全局记忆黑名单中 + + Args: + chat_id: 要检查的chat_id + + Returns: + bool: 如果chat_id在黑名单中返回True,否则返回False + """ + blacklist = getattr(global_config.memory, "global_memory_blacklist", []) + if not blacklist: + return False + + blacklist_chat_ids = _parse_blacklist_to_chat_ids(blacklist) + return chat_id in blacklist_chat_ids + + async def search_chat_history(chat_id: str, keyword: Optional[str] = None, participant: Optional[str] = None) -> str: """根据关键词或参与人查询记忆,返回匹配的记忆id、记忆标题theme和关键词keywords @@ -33,17 +99,34 @@ async def search_chat_history(chat_id: str, keyword: Optional[str] = None, parti return "未指定查询参数(需要提供keyword或participant之一)" # 构建查询条件 + # 检查当前chat_id是否在黑名单中 + is_current_chat_in_blacklist = _is_chat_id_in_blacklist(chat_id) + # 根据配置决定是否限制在当前 chat_id 内查询 - use_global_search = global_config.memory.global_memory + # 如果当前chat_id在黑名单中,强制使用本地查询 + use_global_search = global_config.memory.global_memory and not is_current_chat_in_blacklist if use_global_search: - # 全局查询所有聊天记录 - query = ChatHistory.select() - logger.debug( - f"search_chat_history 启用全局查询模式,忽略 chat_id 过滤,keyword={keyword}, participant={participant}" - ) + # 全局查询所有聊天记录,但排除黑名单中的聊天流 + blacklist_chat_ids = _parse_blacklist_to_chat_ids(global_config.memory.global_memory_blacklist) + if blacklist_chat_ids: + # 排除黑名单中的chat_id + query = ChatHistory.select().where(~(ChatHistory.chat_id.in_(blacklist_chat_ids))) + logger.debug( + f"search_chat_history 启用全局查询模式(排除黑名单 {len(blacklist_chat_ids)} 个聊天流),keyword={keyword}, participant={participant}" + ) + else: + # 没有黑名单,查询所有 + query = ChatHistory.select() + logger.debug( + f"search_chat_history 启用全局查询模式,忽略 chat_id 过滤,keyword={keyword}, participant={participant}" + ) else: # 仅在当前聊天流内查询 + if is_current_chat_in_blacklist: + logger.debug( + f"search_chat_history 当前聊天流在黑名单中,强制使用本地查询,chat_id={chat_id}, keyword={keyword}, participant={participant}" + ) query = ChatHistory.select().where(ChatHistory.chat_id == chat_id) # 执行查询 diff --git a/src/memory_system/retrieval_tools/query_words.py b/src/memory_system/retrieval_tools/query_words.py new file mode 100644 index 00000000..02b5eff3 --- /dev/null +++ b/src/memory_system/retrieval_tools/query_words.py @@ -0,0 +1,80 @@ +""" +查询黑话/概念含义 - 工具实现 +用于在记忆检索过程中主动查询未知词语或黑话的含义 +""" + +from typing import List, Optional +from src.common.logger import get_logger +from src.bw_learner.jargon_explainer import retrieve_concepts_with_jargon +from .tool_registry import register_memory_retrieval_tool + +logger = get_logger("memory_retrieval_tools") + + +async def query_words(chat_id: str, words: str) -> str: + """查询词语或黑话的含义 + + Args: + chat_id: 聊天ID + words: 要查询的词语,可以是单个词语或多个词语(用逗号、空格等分隔) + + Returns: + str: 查询结果,包含词语的含义解释 + """ + try: + if not words or not words.strip(): + return "未提供要查询的词语" + + # 解析词语列表(支持逗号、空格等分隔符) + words_list = [] + for separator in [",", ",", " ", "\n", "\t"]: + if separator in words: + words_list = [w.strip() for w in words.split(separator) if w.strip()] + break + + # 如果没有找到分隔符,整个字符串作为一个词语 + if not words_list: + words_list = [words.strip()] + + # 去重 + unique_words = [] + seen = set() + for word in words_list: + if word and word not in seen: + unique_words.append(word) + seen.add(word) + + if not unique_words: + return "未提供有效的词语" + + logger.info(f"查询词语含义: {unique_words}") + + # 调用检索函数 + result = await retrieve_concepts_with_jargon(unique_words, chat_id) + + if result: + return result + else: + return f"未找到词语 '{', '.join(unique_words)}' 的含义或黑话解释" + + except Exception as e: + logger.error(f"查询词语含义失败: {e}") + return f"查询失败: {str(e)}" + + +def register_tool(): + """注册工具""" + register_memory_retrieval_tool( + name="query_words", + description="查询词语或黑话的含义。当遇到不熟悉的词语、缩写、黑话或网络用语时,可以使用此工具查询其含义。支持查询单个或多个词语(用逗号、空格等分隔)。", + parameters=[ + { + "name": "words", + "type": "string", + "description": "要查询的词语,可以是单个词语或多个词语(用逗号、空格等分隔,如:'YYDS' 或 'YYDS,内卷,996')", + "required": True, + }, + ], + execute_func=query_words, + ) + diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index c051c3c5..f006a600 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "7.2.5" +version = "7.2.8" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- # 如果你想要修改配置文件,请递增version的值 @@ -122,10 +122,13 @@ talk_value_rules = [ ] [memory] -max_agent_iterations = 3 # 记忆思考深度(最低为1) -agent_timeout_seconds = 200.0 # 最长回忆时间(秒) -enable_jargon_detection = true # 记忆检索过程中是否启用黑话识别 +max_agent_iterations = 5 # 记忆思考深度(最低为1) +agent_timeout_seconds = 180.0 # 最长回忆时间(秒) global_memory = false # 是否允许记忆检索进行全局查询 +global_memory_blacklist = [ + +] # 全局记忆黑名单,当启用全局记忆时,不将特定聊天流纳入检索。格式: ["platform:id:type", ...],例如: ["qq:1919810:private", "qq:114514:group"] +planner_question = true # 是否使用 Planner 提供的 question 作为记忆检索问题。开启后,当 Planner 在 reply 动作中提供了 question 时,直接使用该问题进行记忆检索,跳过 LLM 生成问题的步骤;关闭后沿用旧模式,使用 LLM 生成问题 [dream] interval_minutes = 60 # 做梦时间间隔(分钟),默认30分钟
请求类型调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数
请求类型调用次数输入Token输出TokenToken总量累计花费平均耗时(秒)标准差(秒)每次回复平均调用次数每次回复平均Token数每次调用平均Token