From 6adc972e532b5ef0efb0e61aeb45dfcde4d57fe9 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Tue, 4 Nov 2025 21:20:59 +0800 Subject: [PATCH 1/6] =?UTF-8?q?feat=EF=BC=9A=E6=B7=BB=E5=8A=A0=E4=B8=BB?= =?UTF-8?q?=E5=8A=A8=E5=8F=91=E8=A8=80=E7=9B=B8=E5=85=B3api?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/heart_flow/heartFC_chat.py | 3 +- src/plugin_system/__init__.py | 2 + src/plugin_system/apis/__init__.py | 2 + src/plugin_system/apis/auto_talk_api.py | 51 +++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 src/plugin_system/apis/auto_talk_api.py diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index 0df89678..e4900197 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -107,6 +107,7 @@ class HeartFChatting: self.last_active_time = time.time() # 记录上一次非noreply时间 + self.question_probability_multiplier = 1 self.questioned = False @@ -192,7 +193,7 @@ class HeartFChatting: else: question_probability = 0.00003 - question_probability = question_probability * global_config.chat.get_auto_chat_value(self.stream_id) + question_probability = question_probability * global_config.chat.get_auto_chat_value(self.stream_id) * self.question_probability_multiplier # print(f"{self.log_prefix} questioned: {self.questioned},len: {len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id))}") if question_probability > 0 and not self.questioned and len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id)) == 0: #长久没有回复,可以试试主动发言,提问概率随着时间增加 diff --git a/src/plugin_system/__init__.py b/src/plugin_system/__init__.py index 18c04df7..a3561f0e 100644 --- a/src/plugin_system/__init__.py +++ b/src/plugin_system/__init__.py @@ -53,6 +53,7 @@ from .apis import ( person_api, plugin_manage_api, send_api, + auto_talk_api, register_plugin, get_logger, ) @@ -83,6 +84,7 @@ __all__ = [ "person_api", "plugin_manage_api", "send_api", + "auto_talk_api", "register_plugin", "get_logger", # 基础类 diff --git a/src/plugin_system/apis/__init__.py b/src/plugin_system/apis/__init__.py index 5df61ef8..730ee907 100644 --- a/src/plugin_system/apis/__init__.py +++ b/src/plugin_system/apis/__init__.py @@ -20,6 +20,7 @@ from src.plugin_system.apis import ( tool_api, frequency_api, mood_api, + auto_talk_api, ) from .logging_api import get_logger from .plugin_register_api import register_plugin @@ -42,4 +43,5 @@ __all__ = [ "tool_api", "frequency_api", "mood_api", + "auto_talk_api", ] diff --git a/src/plugin_system/apis/auto_talk_api.py b/src/plugin_system/apis/auto_talk_api.py new file mode 100644 index 00000000..37700ff2 --- /dev/null +++ b/src/plugin_system/apis/auto_talk_api.py @@ -0,0 +1,51 @@ +from src.common.logger import get_logger +from src.chat.heart_flow.heartFC_chat import HeartFChatting +from src.chat.heart_flow.heartflow import heartflow + +logger = get_logger("auto_talk_api") + + +def set_question_probability_multiplier(chat_id: str, multiplier: float) -> bool: + """ + 设置指定 chat_id 的主动发言概率乘数。 + + 返回: + bool: 设置是否成功。仅当目标聊天为群聊(HeartFChatting)且存在时为 True。 + """ + try: + if not isinstance(chat_id, str): + raise TypeError("chat_id 必须是 str") + if not isinstance(multiplier, (int, float)): + raise TypeError("multiplier 必须是数值类型") + + chat = heartflow.heartflow_chat_list.get(chat_id) + if chat is None: + logger.warning(f"未找到 chat_id={chat_id} 的心流实例,无法设置乘数") + return False + + if not isinstance(chat, HeartFChatting): + logger.warning(f"chat_id={chat_id} 非群聊(HeartFChatting),不支持设置主动发言乘数") + return False + + # 约束:不允许负值 + value = float(multiplier) + if value < 0: + value = 0.0 + + chat.question_probability_multiplier = value + logger.info(f"[auto_talk_api] chat_id={chat_id} 主动发言乘数已设为 {value}") + return True + except Exception as e: + logger.error(f"设置主动发言乘数失败: {e}") + return False + + +def get_question_probability_multiplier(chat_id: str) -> float: + """获取指定 chat_id 的主动发言概率乘数,未找到则返回 0。""" + try: + chat = heartflow.heartflow_chat_list.get(chat_id) + if isinstance(chat, HeartFChatting): + return float(getattr(chat, "question_probability_multiplier", 0.0)) + return 0.0 + except Exception: + return 0.0 From a4d43e1aee0ee9aa033913818ad1e0fe371a44a1 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Tue, 4 Nov 2025 21:31:41 +0800 Subject: [PATCH 2/6] Update auto_talk_api.py --- src/plugin_system/apis/auto_talk_api.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/plugin_system/apis/auto_talk_api.py b/src/plugin_system/apis/auto_talk_api.py index 37700ff2..60ad9501 100644 --- a/src/plugin_system/apis/auto_talk_api.py +++ b/src/plugin_system/apis/auto_talk_api.py @@ -1,6 +1,4 @@ from src.common.logger import get_logger -from src.chat.heart_flow.heartFC_chat import HeartFChatting -from src.chat.heart_flow.heartflow import heartflow logger = get_logger("auto_talk_api") @@ -18,13 +16,17 @@ def set_question_probability_multiplier(chat_id: str, multiplier: float) -> bool if not isinstance(multiplier, (int, float)): raise TypeError("multiplier 必须是数值类型") - chat = heartflow.heartflow_chat_list.get(chat_id) + # 延迟导入以避免循环依赖 + from src.chat.heart_flow.heartflow import heartflow as _heartflow + + chat = _heartflow.heartflow_chat_list.get(chat_id) if chat is None: logger.warning(f"未找到 chat_id={chat_id} 的心流实例,无法设置乘数") return False - if not isinstance(chat, HeartFChatting): - logger.warning(f"chat_id={chat_id} 非群聊(HeartFChatting),不支持设置主动发言乘数") + # 仅对拥有该属性的群聊心流生效(鸭子类型,避免导入类) + if not hasattr(chat, "question_probability_multiplier"): + logger.warning(f"chat_id={chat_id} 实例不支持主动发言乘数设置") return False # 约束:不允许负值 @@ -43,9 +45,12 @@ def set_question_probability_multiplier(chat_id: str, multiplier: float) -> bool def get_question_probability_multiplier(chat_id: str) -> float: """获取指定 chat_id 的主动发言概率乘数,未找到则返回 0。""" try: - chat = heartflow.heartflow_chat_list.get(chat_id) - if isinstance(chat, HeartFChatting): - return float(getattr(chat, "question_probability_multiplier", 0.0)) - return 0.0 + # 延迟导入以避免循环依赖 + from src.chat.heart_flow.heartflow import heartflow as _heartflow + + chat = _heartflow.heartflow_chat_list.get(chat_id) + if chat is None: + return 0.0 + return float(getattr(chat, "question_probability_multiplier", 0.0)) except Exception: return 0.0 From 03e06c282ce2401c9253f6f85b2208347244977e Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 5 Nov 2025 00:35:16 +0800 Subject: [PATCH 3/6] =?UTF-8?q?feat=EF=BC=9A=E5=8F=AF=E4=BB=A5=E5=AF=B9?= =?UTF-8?q?=E4=B8=8D=E5=90=8Cchat=E8=87=AA=E5=AE=9A=E4=B9=89=E4=B8=80?= =?UTF-8?q?=E6=AE=B5=E9=A2=9D=E5=A4=96prompt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/emoji_system/emoji_manager.py | 4 +- src/chat/heart_flow/heartFC_chat.py | 6 +- src/common/database/database_model.py | 102 +++----- src/jargon/__init__.py | 7 + src/jargon/jargon_miner.py | 230 +++++++++++++++++++ src/llm_models/model_client/openai_client.py | 2 +- src/memory_system/curious.py | 54 ++++- 7 files changed, 329 insertions(+), 76 deletions(-) create mode 100644 src/jargon/__init__.py create mode 100644 src/jargon/jargon_miner.py diff --git a/src/chat/emoji_system/emoji_manager.py b/src/chat/emoji_system/emoji_manager.py index 512e7e55..b26ab844 100644 --- a/src/chat/emoji_system/emoji_manager.py +++ b/src/chat/emoji_system/emoji_manager.py @@ -940,13 +940,13 @@ class EmojiManager: image_base64 = get_image_manager().transform_gif(image_base64) # type: ignore if not image_base64: raise RuntimeError("GIF表情包转换失败") - prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析" + prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,从互联网梗,meme的角度去分析,精简回答" description, _ = await self.vlm.generate_response_for_image( prompt, image_base64, "jpg", temperature=0.5 ) else: prompt = ( - "这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析" + "这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析,精简回答" ) description, _ = await self.vlm.generate_response_for_image( prompt, image_base64, image_format, temperature=0.5 diff --git a/src/chat/heart_flow/heartFC_chat.py b/src/chat/heart_flow/heartFC_chat.py index e4900197..99e55122 100644 --- a/src/chat/heart_flow/heartFC_chat.py +++ b/src/chat/heart_flow/heartFC_chat.py @@ -17,12 +17,12 @@ from src.chat.planner_actions.planner import ActionPlanner from src.chat.planner_actions.action_modifier import ActionModifier from src.chat.planner_actions.action_manager import ActionManager from src.chat.heart_flow.hfc_utils import CycleDetail -from src.chat.heart_flow.hfc_utils import send_typing, stop_typing from src.express.expression_learner import expression_learner_manager from src.chat.frequency_control.frequency_control import frequency_control_manager from src.memory_system.question_maker import QuestionMaker from src.memory_system.questions import global_conflict_tracker from src.memory_system.curious import check_and_make_question +from src.jargon import extract_and_store_jargon from src.person_info.person_info import Person from src.plugin_system.base.component_types import EventType, ActionInfo from src.plugin_system.core import events_manager @@ -336,7 +336,9 @@ class HeartFChatting: asyncio.create_task(frequency_control_manager.get_or_create_frequency_control(self.stream_id).trigger_frequency_adjust()) # 添加curious检测任务 - 检测聊天记录中的矛盾、冲突或需要提问的内容 - asyncio.create_task(check_and_make_question(self.stream_id, recent_messages_list)) + asyncio.create_task(check_and_make_question(self.stream_id)) + # 添加jargon提取任务 - 提取聊天中的黑话/俚语并入库(内部自行取消息并带冷却) + asyncio.create_task(extract_and_store_jargon(self.stream_id)) cycle_timers, thinking_id = self.start_cycle() diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index 89e0a019..a1aaaa23 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -20,6 +20,8 @@ logger = get_logger("database_model") # 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。 # 这允许您在一个地方为所有模型指定数据库。 + + class BaseModel(Model): class Meta: # 将下面的 'db' 替换为您实际的数据库实例变量名。 @@ -343,30 +345,45 @@ class MemoryConflict(BaseModel): class Meta: table_name = "memory_conflicts" + +class Jargon(BaseModel): + """ + 用于存储俚语的模型 + """ + content = TextField() + raw_content = TextField(null=True) + type = TextField(null=True) + translation = TextField(null=True) + meaning = TextField(null=True) + chat_id = TextField(index=True) + is_global = BooleanField(default=False) + count = IntegerField(default=0) + + class Meta: + table_name = "jargon" - +MODELS = [ + ChatStreams, + LLMUsage, + Emoji, + Messages, + Images, + ImageDescriptions, + OnlineTime, + PersonInfo, + Expression, + ActionRecords, + MemoryChest, + MemoryConflict, + Jargon, +] def create_tables(): """ 创建所有在模型中定义的数据库表。 """ with db: - db.create_tables( - [ - ChatStreams, - LLMUsage, - Emoji, - Messages, - Images, - ImageDescriptions, - OnlineTime, - PersonInfo, - Expression, - ActionRecords, # 添加 ActionRecords 到初始化列表 - MemoryChest, - MemoryConflict, # 添加记忆冲突表 - ] - ) + db.create_tables(MODELS) def initialize_database(sync_constraints=False): @@ -379,24 +396,9 @@ def initialize_database(sync_constraints=False): 如果为 True,会检查并修复字段的 NULL 约束不一致问题。 """ - models = [ - ChatStreams, - LLMUsage, - Emoji, - Messages, - Images, - ImageDescriptions, - OnlineTime, - PersonInfo, - Expression, - ActionRecords, # 添加 ActionRecords 到初始化列表 - MemoryChest, - MemoryConflict, - ] - try: with db: # 管理 table_exists 检查的连接 - for model in models: + for model in MODELS: table_name = model._meta.table_name if not db.table_exists(model): logger.warning(f"表 '{table_name}' 未找到,正在创建...") @@ -476,24 +478,9 @@ def sync_field_constraints(): 如果发现不一致,会自动修复字段约束。 """ - models = [ - ChatStreams, - LLMUsage, - Emoji, - Messages, - Images, - ImageDescriptions, - OnlineTime, - PersonInfo, - Expression, - ActionRecords, - MemoryChest, - MemoryConflict, - ] - try: with db: - for model in models: + for model in MODELS: table_name = model._meta.table_name if not db.table_exists(model): logger.warning(f"表 '{table_name}' 不存在,跳过约束检查") @@ -660,26 +647,11 @@ def check_field_constraints(): 用于在修复前预览需要修复的内容。 """ - models = [ - ChatStreams, - LLMUsage, - Emoji, - Messages, - Images, - ImageDescriptions, - OnlineTime, - PersonInfo, - Expression, - ActionRecords, - MemoryChest, - MemoryConflict, - ] - inconsistencies = {} try: with db: - for model in models: + for model in MODELS: table_name = model._meta.table_name if not db.table_exists(model): continue diff --git a/src/jargon/__init__.py b/src/jargon/__init__.py new file mode 100644 index 00000000..1a60a94a --- /dev/null +++ b/src/jargon/__init__.py @@ -0,0 +1,7 @@ +from .jargon_miner import extract_and_store_jargon + +__all__ = [ + "extract_and_store_jargon", +] + + diff --git a/src/jargon/jargon_miner.py b/src/jargon/jargon_miner.py new file mode 100644 index 00000000..c0f035d8 --- /dev/null +++ b/src/jargon/jargon_miner.py @@ -0,0 +1,230 @@ +import time +import json +from typing import List +from json_repair import repair_json + +from src.common.logger import get_logger +from src.common.database.database_model import Jargon +from src.llm_models.utils_model import LLMRequest +from src.config.config import model_config +from src.chat.message_receive.chat_stream import get_chat_manager +from src.chat.utils.chat_message_builder import ( + build_anonymous_messages, + get_raw_msg_by_timestamp_with_chat_inclusive, +) +from src.chat.utils.prompt_builder import Prompt, global_prompt_manager + + +logger = get_logger("jargon") + + +def _init_prompt() -> None: + prompt_str = """ +**聊天内容** +{chat_str} + +请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。 +- 必须为对话中真实出现过的短词或短语 +- 必须是你无法理解含义的词语,或者出现频率较高的词语 +- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合、意义不明但频繁的词汇 +- 排除:人名、@、明显的表情/图片占位、纯标点、常规功能词(如的、了、呢、啊等) +- 每个词条长度建议 2-8 个字符(不强制),尽量短小 +- 合并重复项,去重 + +分类规则: +- p(拼音缩写):由字母或字母和汉字构成的,疑似拼音简写词,例如:nb、yyds、xswl +- c(中文缩写):中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷 +- e(英文缩写):英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API + +以 JSON 数组输出,元素为对象(严格按以下结构): +[ + {{"content": "词条", "raw_content": "包含该词条的完整句子", "type": "p"}}, + {{"content": "词条2", "raw_content": "包含该词条的完整句子", "type": "c"}} +] + +现在请输出: +""" + Prompt(prompt_str, "extract_jargon_prompt") + + +_init_prompt() + + +class JargonMiner: + def __init__(self, chat_id: str) -> None: + self.chat_id = chat_id + self.last_learning_time: float = time.time() + # 频率控制,可按需调整 + self.min_messages_for_learning: int = 20 + self.min_learning_interval: float = 30 + + self.llm = LLMRequest( + model_set=model_config.model_task_config.utils, + request_type="jargon.extract", + ) + + def should_trigger(self) -> bool: + # 冷却时间检查 + if time.time() - self.last_learning_time < self.min_learning_interval: + return False + + # 拉取最近消息数量是否足够 + recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive( + chat_id=self.chat_id, + timestamp_start=self.last_learning_time, + timestamp_end=time.time(), + ) + return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning) + + async def run_once(self) -> None: + try: + if not self.should_trigger(): + return + + chat_stream = get_chat_manager().get_stream(self.chat_id) + if not chat_stream: + return + + # 拉取学习窗口内的消息 + messages = get_raw_msg_by_timestamp_with_chat_inclusive( + chat_id=self.chat_id, + timestamp_start=self.last_learning_time, + timestamp_end=time.time(), + limit=20, + ) + if not messages: + return + + chat_str: str = await build_anonymous_messages(messages) + if not chat_str.strip(): + return + + prompt: str = await global_prompt_manager.format_prompt( + "extract_jargon_prompt", + chat_str=chat_str, + ) + + response, _ = await self.llm.generate_response_async(prompt, temperature=0.2) + if not response: + return + + logger.info(f"jargon提取提示词: {prompt}") + logger.info(f"jargon提取结果: {response}") + + # 解析为JSON + entries: List[dict] = [] + try: + resp = response.strip() + parsed = None + if resp.startswith("[") and resp.endswith("]"): + parsed = json.loads(resp) + else: + repaired = repair_json(resp) + if isinstance(repaired, str): + parsed = json.loads(repaired) + else: + parsed = repaired + + if isinstance(parsed, dict): + parsed = [parsed] + + if not isinstance(parsed, list): + return + + for item in parsed: + if not isinstance(item, dict): + continue + content = str(item.get("content", "")).strip() + raw_content = str(item.get("raw_content", "")).strip() + type_str = str(item.get("type", "")).strip().lower() + + # 验证type是否为有效值 + if type_str not in ["p", "c", "e"]: + type_str = "p" # 默认值 + + if content: + entries.append({ + "content": content, + "raw_content": raw_content, + "type": type_str + }) + except Exception as e: + logger.error(f"解析jargon JSON失败: {e}; 原始: {response}") + return + + if not entries: + return + + # 去重并写入DB(按 chat_id + content 去重) + # 使用content作为去重键 + seen = set() + uniq_entries = [] + for entry in entries: + content_key = entry["content"] + if content_key not in seen: + seen.add(content_key) + uniq_entries.append(entry) + + saved = 0 + updated = 0 + for entry in uniq_entries: + content = entry["content"] + raw_content = entry["raw_content"] + type_str = entry["type"] + try: + query = ( + Jargon.select() + .where((Jargon.chat_id == self.chat_id) & (Jargon.content == content)) + ) + if query.exists(): + obj = query.get() + try: + obj.count = (obj.count or 0) + 1 + except Exception: + obj.count = 1 + # 更新raw_content和type(如果为空或需要更新) + if raw_content and not obj.raw_content: + obj.raw_content = raw_content + if type_str and not obj.type: + obj.type = type_str + obj.save() + updated += 1 + else: + Jargon.create( + content=content, + raw_content=raw_content, + type=type_str, + chat_id=self.chat_id, + is_global=False, + count=1 + ) + saved += 1 + except Exception as e: + logger.error(f"保存jargon失败: chat_id={self.chat_id}, content={content}, err={e}") + continue + + if saved or updated: + logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated} 条,chat_id={self.chat_id}") + self.last_learning_time = time.time() + except Exception as e: + logger.error(f"JargonMiner 运行失败: {e}") + + +class JargonMinerManager: + def __init__(self) -> None: + self._miners: dict[str, JargonMiner] = {} + + def get_miner(self, chat_id: str) -> JargonMiner: + if chat_id not in self._miners: + self._miners[chat_id] = JargonMiner(chat_id) + return self._miners[chat_id] + + +miner_manager = JargonMinerManager() + + +async def extract_and_store_jargon(chat_id: str) -> None: + miner = miner_manager.get_miner(chat_id) + await miner.run_once() + + diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index 36af7775..8c91e867 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -444,7 +444,7 @@ def _default_normal_response_parser( choice0 = resp.choices[0] reason = getattr(choice0, "finish_reason", None) if reason and reason == "length": - print(resp) + # print(resp) _model_name = resp.model # 统一日志格式 logger.info( diff --git a/src/memory_system/curious.py b/src/memory_system/curious.py index badb421d..80bffdae 100644 --- a/src/memory_system/curious.py +++ b/src/memory_system/curious.py @@ -1,9 +1,8 @@ import time -import asyncio -from typing import List, Optional, Tuple +from typing import List, Optional from src.common.logger import get_logger from src.chat.utils.chat_message_builder import ( - get_raw_msg_before_timestamp_with_chat, + get_raw_msg_by_timestamp_with_chat_inclusive, build_readable_messages_with_id, ) from src.llm_models.utils_model import LLMRequest @@ -25,7 +24,21 @@ class CuriousDetector: model_set=model_config.model_task_config.utils, request_type="curious_detector", ) + # 触发控制 + self.last_detection_time: float = time.time() + self.min_interval_seconds: float = 60.0 + self.min_messages: int = 20 + def should_trigger(self) -> bool: + if time.time() - self.last_detection_time < self.min_interval_seconds: + return False + recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive( + chat_id=self.chat_id, + timestamp_start=self.last_detection_time, + timestamp_end=time.time(), + ) + return bool(recent_messages and len(recent_messages) >= self.min_messages) + async def detect_questions(self, recent_messages: List) -> Optional[str]: """ 检测最近消息中是否有需要提问的内容 @@ -91,6 +104,9 @@ class CuriousDetector: result_text, _ = await self.llm_request.generate_response_async(prompt, temperature=0.3) + logger.info(f"好奇心检测提示词: {prompt}") + logger.info(f"好奇心检测结果: {result_text}") + if not result_text: return None @@ -154,7 +170,20 @@ class CuriousDetector: return False -async def check_and_make_question(chat_id: str, recent_messages: List) -> bool: +class CuriousManager: + def __init__(self) -> None: + self._detectors: dict[str, CuriousDetector] = {} + + def get_detector(self, chat_id: str) -> CuriousDetector: + if chat_id not in self._detectors: + self._detectors[chat_id] = CuriousDetector(chat_id) + return self._detectors[chat_id] + + +curious_manager = CuriousManager() + + +async def check_and_make_question(chat_id: str) -> bool: """ 检查聊天记录并生成问题(如果检测到需要提问的内容) @@ -166,8 +195,20 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool: bool: 是否检测到并记录了问题 """ try: - detector = CuriousDetector(chat_id) - + detector = curious_manager.get_detector(chat_id) + if not detector.should_trigger(): + return False + + # 拉取窗口内消息 + recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive( + chat_id=chat_id, + timestamp_start=detector.last_detection_time, + timestamp_end=time.time(), + limit=80, + ) + if not recent_messages: + return False + # 检测是否需要提问 question = await detector.detect_questions(recent_messages) @@ -176,6 +217,7 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool: success = await detector.make_question_from_detection(question) if success: logger.info(f"成功检测并记录问题: {question}") + detector.last_detection_time = time.time() return True return False From 5bde31e5123e6b1384fd15fa954d640008479e04 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 5 Nov 2025 00:35:26 +0800 Subject: [PATCH 4/6] =?UTF-8?q?feat:=E6=B7=BB=E5=8A=A0=E9=BB=91=E8=AF=9D?= =?UTF-8?q?=E6=94=B6=E9=9B=86=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/replyer/group_generator.py | 83 +++++++++++++++++++++++ src/chat/replyer/private_generator.py | 83 +++++++++++++++++++++++ src/chat/replyer/prompt/replyer_prompt.py | 6 +- src/config/official_configs.py | 19 ++++++ template/bot_config_template.toml | 12 +++- 5 files changed, 199 insertions(+), 4 deletions(-) diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index cc6dfee4..6f0a944d 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -639,6 +639,83 @@ class DefaultReplyer: prompt_personality = f"{prompt_personality};" return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}" + def _parse_chat_prompt_config_to_chat_id(self, chat_prompt_str: str) -> Optional[tuple[str, str]]: + """ + 解析聊天prompt配置字符串并生成对应的 chat_id 和 prompt内容 + + Args: + chat_prompt_str: 格式为 "platform:id:type:prompt内容" 的字符串 + + Returns: + tuple: (chat_id, prompt_content),如果解析失败则返回 None + """ + try: + # 使用 split 分割,但限制分割次数为3,因为prompt内容可能包含冒号 + parts = chat_prompt_str.split(":", 3) + if len(parts) != 4: + return None + + platform = parts[0] + id_str = parts[1] + stream_type = parts[2] + prompt_content = parts[3] + + # 判断是否为群聊 + is_group = stream_type == "group" + + # 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id + import hashlib + + if is_group: + components = [platform, str(id_str)] + else: + components = [platform, str(id_str), "private"] + key = "_".join(components) + chat_id = hashlib.md5(key.encode()).hexdigest() + + return chat_id, prompt_content + + except (ValueError, IndexError): + return None + + def get_chat_prompt_for_chat(self, chat_id: str) -> str: + """ + 根据聊天流ID获取匹配的额外prompt(仅匹配group类型) + + Args: + chat_id: 聊天流ID(哈希值) + + Returns: + str: 匹配的额外prompt内容,如果没有匹配则返回空字符串 + """ + if not global_config.experimental.chat_prompts: + return "" + + for chat_prompt_str in global_config.experimental.chat_prompts: + if not isinstance(chat_prompt_str, str): + continue + + # 解析配置字符串,检查类型是否为group + parts = chat_prompt_str.split(":", 3) + if len(parts) != 4: + continue + + stream_type = parts[2] + # 只匹配group类型 + if stream_type != "group": + continue + + result = self._parse_chat_prompt_config_to_chat_id(chat_prompt_str) + if result is None: + continue + + config_chat_id, prompt_content = result + if config_chat_id == chat_id: + logger.debug(f"匹配到群聊prompt配置,chat_id: {chat_id}, prompt: {prompt_content[:50]}...") + return prompt_content + + return "" + async def build_prompt_reply_context( self, reply_message: Optional[DatabaseMessages] = None, @@ -820,6 +897,11 @@ class DefaultReplyer: # 构建分离的对话 prompt dialogue_prompt = self.build_chat_history_prompts(message_list_before_now_long, user_id, sender) + # 获取匹配的额外prompt + chat_prompt_content = self.get_chat_prompt_for_chat(chat_id) + chat_prompt_block = f"{chat_prompt_content}\n" if chat_prompt_content else "" + + # 固定使用群聊回复模板 return await global_prompt_manager.format_prompt( "replyer_prompt", expression_habits_block=expression_habits_block, @@ -840,6 +922,7 @@ class DefaultReplyer: keywords_reaction_prompt=keywords_reaction_prompt, moderation_prompt=moderation_prompt_block, question_block=question_block, + chat_prompt=chat_prompt_block, ), selected_expressions async def build_prompt_rewrite_context( diff --git a/src/chat/replyer/private_generator.py b/src/chat/replyer/private_generator.py index 2bd48de4..8a92fd15 100644 --- a/src/chat/replyer/private_generator.py +++ b/src/chat/replyer/private_generator.py @@ -536,6 +536,83 @@ class PrivateReplyer: prompt_personality = f"{prompt_personality};" return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}" + def _parse_chat_prompt_config_to_chat_id(self, chat_prompt_str: str) -> Optional[tuple[str, str]]: + """ + 解析聊天prompt配置字符串并生成对应的 chat_id 和 prompt内容 + + Args: + chat_prompt_str: 格式为 "platform:id:type:prompt内容" 的字符串 + + Returns: + tuple: (chat_id, prompt_content),如果解析失败则返回 None + """ + try: + # 使用 split 分割,但限制分割次数为3,因为prompt内容可能包含冒号 + parts = chat_prompt_str.split(":", 3) + if len(parts) != 4: + return None + + platform = parts[0] + id_str = parts[1] + stream_type = parts[2] + prompt_content = parts[3] + + # 判断是否为群聊 + is_group = stream_type == "group" + + # 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id + import hashlib + + if is_group: + components = [platform, str(id_str)] + else: + components = [platform, str(id_str), "private"] + key = "_".join(components) + chat_id = hashlib.md5(key.encode()).hexdigest() + + return chat_id, prompt_content + + except (ValueError, IndexError): + return None + + def get_chat_prompt_for_chat(self, chat_id: str) -> str: + """ + 根据聊天流ID获取匹配的额外prompt(仅匹配private类型) + + Args: + chat_id: 聊天流ID(哈希值) + + Returns: + str: 匹配的额外prompt内容,如果没有匹配则返回空字符串 + """ + if not global_config.experimental.chat_prompts: + return "" + + for chat_prompt_str in global_config.experimental.chat_prompts: + if not isinstance(chat_prompt_str, str): + continue + + # 解析配置字符串,检查类型是否为private + parts = chat_prompt_str.split(":", 3) + if len(parts) != 4: + continue + + stream_type = parts[2] + # 只匹配private类型 + if stream_type != "private": + continue + + result = self._parse_chat_prompt_config_to_chat_id(chat_prompt_str) + if result is None: + continue + + config_chat_id, prompt_content = result + if config_chat_id == chat_id: + logger.debug(f"匹配到私聊prompt配置,chat_id: {chat_id}, prompt: {prompt_content[:50]}...") + return prompt_content + + return "" + async def build_prompt_reply_context( self, reply_message: Optional[DatabaseMessages] = None, @@ -718,6 +795,10 @@ class PrivateReplyer: # 其他情况(空内容等) reply_target_block = f"现在对方说的:{target}。引起了你的注意" + # 获取匹配的额外prompt + chat_prompt_content = self.get_chat_prompt_for_chat(chat_id) + chat_prompt_block = f"{chat_prompt_content}\n" if chat_prompt_content else "" + if global_config.bot.qq_account == user_id and platform == global_config.bot.platform: return await global_prompt_manager.format_prompt( "private_replyer_self_prompt", @@ -738,6 +819,7 @@ class PrivateReplyer: reply_style=global_config.personality.reply_style, keywords_reaction_prompt=keywords_reaction_prompt, moderation_prompt=moderation_prompt_block, + chat_prompt=chat_prompt_block, ), selected_expressions else: return await global_prompt_manager.format_prompt( @@ -758,6 +840,7 @@ class PrivateReplyer: keywords_reaction_prompt=keywords_reaction_prompt, moderation_prompt=moderation_prompt_block, sender_name=sender, + chat_prompt=chat_prompt_block, ), selected_expressions async def build_prompt_rewrite_context( diff --git a/src/chat/replyer/prompt/replyer_prompt.py b/src/chat/replyer/prompt/replyer_prompt.py index 4e9b015d..26b47cb4 100644 --- a/src/chat/replyer/prompt/replyer_prompt.py +++ b/src/chat/replyer/prompt/replyer_prompt.py @@ -21,7 +21,7 @@ def init_replyer_prompt(): {reply_target_block}。 {identity} -你正在群里聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state} +{chat_prompt}你正在群里聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state} 尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。 {reply_style} 请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出一句回复内容就好。 @@ -41,7 +41,7 @@ def init_replyer_prompt(): {reply_target_block}。 {identity} -你正在和{sender_name}聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state} +{chat_prompt}你正在和{sender_name}聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state} 尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。 {reply_style} 请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。 @@ -61,7 +61,7 @@ def init_replyer_prompt(): 你现在想补充说明你刚刚自己的发言内容:{target},原因是{reason} 请你根据聊天内容,组织一条新回复。注意,{target} 是刚刚你自己的发言,你要在这基础上进一步发言,请按照你自己的角度来继续进行回复。注意保持上下文的连贯性。{mood_state} {identity} -尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。 +{chat_prompt}尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。 {reply_style} 请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。 {moderation_prompt}不要输出多余内容(包括冒号和引号,括号,表情包,at或 @等 )。 diff --git a/src/config/official_configs.py b/src/config/official_configs.py index 8c29d066..57a3e232 100644 --- a/src/config/official_configs.py +++ b/src/config/official_configs.py @@ -656,6 +656,25 @@ class ExperimentalConfig(ConfigBase): enable_friend_chat: bool = False """是否启用好友聊天""" + chat_prompts: list[str] = field(default_factory=lambda: []) + """ + 为指定聊天添加额外的prompt配置列表 + 格式: ["platform:id:type:prompt内容", ...] + + 示例: + [ + "qq:114514:group:这是一个摄影群,你精通摄影知识", + "qq:19198:group:这是一个二次元交流群", + "qq:114514:private:这是你与好朋友的私聊" + ] + + 说明: + - platform: 平台名称,如 "qq" + - id: 群ID或用户ID + - type: "group" 或 "private" + - prompt内容: 要添加的额外prompt文本 + """ + @dataclass class MaimMessageConfig(ConfigBase): diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index d2621a35..20a4eea2 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "6.19.2" +version = "6.20.0" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请递增version的值 @@ -241,6 +241,16 @@ enable = true [experimental] #实验性功能 none = false # 暂无 +# 为指定聊天添加额外的prompt配置 +# 格式: ["platform:id:type:prompt内容", ...] +# 示例: +# chat_prompts = [ +# "qq:114514:group:这是一个摄影群,你精通摄影知识", +# "qq:19198:group:这是一个二次元交流群", +# "qq:114514:private:这是你与好朋友的私聊" +# ] +chat_prompts = [] + #此系统暂时移除,无效配置 [relationship] From 69a6116b2ac32c827aa572e181fa19e2721bab07 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Wed, 5 Nov 2025 00:36:33 +0800 Subject: [PATCH 5/6] Update bot_config_template.toml --- template/bot_config_template.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 20a4eea2..7d678d67 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "6.20.0" +version = "6.20.1" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- #如果你想要修改配置文件,请递增version的值 @@ -239,8 +239,6 @@ key_file = "" # SSL密钥文件路径,仅在use_wss=true时有效 enable = true [experimental] #实验性功能 -none = false # 暂无 - # 为指定聊天添加额外的prompt配置 # 格式: ["platform:id:type:prompt内容", ...] # 示例: From a8f4863d2f67478968ac0f895cc3707c2b72e552 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Fri, 7 Nov 2025 02:21:51 +0800 Subject: [PATCH 6/6] =?UTF-8?q?feat=EF=BC=9A=E6=B7=BB=E5=8A=A0=E6=9C=AF?= =?UTF-8?q?=E8=AF=AD=E6=8E=A8=E6=96=AD=E5=92=8C=E6=9C=AF=E8=AF=AD=E7=90=86?= =?UTF-8?q?=E8=A7=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/common/database/database_model.py | 3 + src/jargon/jargon_miner.py | 435 ++++++++++++++++-- src/plugin_system/base/base_tool.py | 4 +- src/plugins/built_in/jargon/_manifest.json | 36 ++ .../built_in/jargon/jargon_explanation.py | 180 ++++++++ src/plugins/built_in/jargon/plugin.py | 56 +++ 6 files changed, 686 insertions(+), 28 deletions(-) create mode 100644 src/plugins/built_in/jargon/_manifest.json create mode 100644 src/plugins/built_in/jargon/jargon_explanation.py create mode 100644 src/plugins/built_in/jargon/plugin.py diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py index a1aaaa23..7e39eeb7 100644 --- a/src/common/database/database_model.py +++ b/src/common/database/database_model.py @@ -358,6 +358,9 @@ class Jargon(BaseModel): chat_id = TextField(index=True) is_global = BooleanField(default=False) count = IntegerField(default=0) + is_jargon = BooleanField(null=True) # None表示未判定,True表示是黑话,False表示不是黑话 + last_inference_count = IntegerField(null=True) # 最后一次判定的count值,用于避免重启后重复判定 + is_complete = BooleanField(default=False) # 是否已完成所有推断(count>=100后不再推断) class Meta: table_name = "jargon" diff --git a/src/jargon/jargon_miner.py b/src/jargon/jargon_miner.py index c0f035d8..a8c88cb7 100644 --- a/src/jargon/jargon_miner.py +++ b/src/jargon/jargon_miner.py @@ -1,5 +1,6 @@ import time import json +import asyncio from typing import List from json_repair import repair_json @@ -26,20 +27,22 @@ def _init_prompt() -> None: 请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。 - 必须为对话中真实出现过的短词或短语 - 必须是你无法理解含义的词语,或者出现频率较高的词语 -- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合、意义不明但频繁的词汇 +- 请不要选择有明确含义,或者含义清晰的词语 +- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合 - 排除:人名、@、明显的表情/图片占位、纯标点、常规功能词(如的、了、呢、啊等) - 每个词条长度建议 2-8 个字符(不强制),尽量短小 - 合并重复项,去重 -分类规则: -- p(拼音缩写):由字母或字母和汉字构成的,疑似拼音简写词,例如:nb、yyds、xswl +分类规则,type必须根据规则填写: +- p(拼音缩写):由字母或字母和汉字构成的,用汉语拼音简写词,或汉语拼音首字母的简写词,例如:nb、yyds、xswl - c(中文缩写):中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷 - e(英文缩写):英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API +- x(谐音梗):谐音梗,用谐音词概括一个词汇或含义,例如:好似,难崩 以 JSON 数组输出,元素为对象(严格按以下结构): [ - {{"content": "词条", "raw_content": "包含该词条的完整句子", "type": "p"}}, - {{"content": "词条2", "raw_content": "包含该词条的完整句子", "type": "c"}} + {{"content": "词条", "raw_content": "包含该词条的完整对话原文", "type": "p"}}, + {{"content": "词条2", "raw_content": "包含该词条的完整对话原文", "type": "c"}} ] 现在请输出: @@ -47,7 +50,107 @@ def _init_prompt() -> None: Prompt(prompt_str, "extract_jargon_prompt") +def _init_inference_prompts() -> None: + """初始化含义推断相关的prompt""" + # Prompt 1: 基于raw_content和content推断 + prompt1_str = """ +**词条内容** +{content} + +**词条出现的上下文(raw_content)** +{raw_content_list} + +请根据以上词条内容和上下文,推断这个词条的含义。 +- 如果这是一个黑话、俚语或网络用语,请推断其含义和翻译 +- 如果含义明确(常规词汇),也请说明 + +以 JSON 格式输出: +{{ + "meaning": "含义说明", + "translation": "翻译或解释" +}} +""" + Prompt(prompt1_str, "jargon_inference_with_context_prompt") + + # Prompt 2: 仅基于content推断 + prompt2_str = """ +**词条内容** +{content} + +请仅根据这个词条本身,推断其含义。 +- 如果这是一个黑话、俚语或网络用语,请推断其含义和翻译 +- 如果含义明确(常规词汇),也请说明 + +以 JSON 格式输出: +{{ + "meaning": "含义说明", + "translation": "翻译或解释" +}} +""" + Prompt(prompt2_str, "jargon_inference_content_only_prompt") + + # Prompt 3: 比较两个推断结果 + prompt3_str = """ +**推断结果1(基于上下文)** +{inference1} + +**推断结果2(仅基于词条)** +{inference2} + +请比较这两个推断结果,判断它们是否相同或类似。 +- 如果两个推断结果的"含义"相同或类似,说明这个词条不是黑话(含义明确) +- 如果两个推断结果有差异,说明这个词条可能是黑话(需要上下文才能理解) + +以 JSON 格式输出: +{{ + "is_similar": true/false, + "reason": "判断理由" +}} +""" + Prompt(prompt3_str, "jargon_compare_inference_prompt") + + _init_prompt() +_init_inference_prompts() + + +def _should_infer_meaning(jargon_obj: Jargon) -> bool: + """ + 判断是否需要进行含义推断 + 在 count 达到 5, 10, 20, 40, 60, 100 时进行推断 + 并且count必须大于last_inference_count,避免重启后重复判定 + 如果is_complete为True,不再进行推断 + """ + # 如果已完成所有推断,不再推断 + if jargon_obj.is_complete: + return False + + count = jargon_obj.count or 0 + last_inference = jargon_obj.last_inference_count or 0 + + # 阈值列表:5, 10, 20, 40, 60, 100 + thresholds = [5, 10, 20, 40, 60, 100] + + if count < thresholds[0]: + return False + + # 如果count没有超过上次判定值,不需要判定 + if count <= last_inference: + return False + + # 找到第一个大于last_inference的阈值 + next_threshold = None + for threshold in thresholds: + if threshold > last_inference: + next_threshold = threshold + break + + # 如果没有找到下一个阈值,说明已经超过100,不应该再推断 + if next_threshold is None: + return False + + # 检查count是否达到或超过这个阈值 + return count >= next_threshold class JargonMiner: @@ -63,6 +166,162 @@ class JargonMiner: request_type="jargon.extract", ) + async def _infer_meaning_by_id(self, jargon_id: int) -> None: + """通过ID加载对象并推断""" + try: + jargon_obj = Jargon.get_by_id(jargon_id) + # 再次检查is_complete,因为可能在异步任务执行时已被标记为完成 + if jargon_obj.is_complete: + logger.debug(f"jargon {jargon_obj.content} 已完成所有推断,跳过") + return + await self.infer_meaning(jargon_obj) + except Exception as e: + logger.error(f"通过ID推断jargon失败: {e}") + + async def infer_meaning(self, jargon_obj: Jargon) -> None: + """ + 对jargon进行含义推断 + """ + try: + content = jargon_obj.content + raw_content_str = jargon_obj.raw_content or "" + + # 解析raw_content列表 + raw_content_list = [] + if raw_content_str: + try: + raw_content_list = json.loads(raw_content_str) if isinstance(raw_content_str, str) else raw_content_str + if not isinstance(raw_content_list, list): + raw_content_list = [raw_content_list] if raw_content_list else [] + except (json.JSONDecodeError, TypeError): + raw_content_list = [raw_content_str] if raw_content_str else [] + + if not raw_content_list: + logger.warning(f"jargon {content} 没有raw_content,跳过推断") + return + + # 步骤1: 基于raw_content和content推断 + raw_content_text = "\n".join(raw_content_list) + prompt1 = await global_prompt_manager.format_prompt( + "jargon_inference_with_context_prompt", + content=content, + raw_content_list=raw_content_text, + ) + + response1, _ = await self.llm.generate_response_async(prompt1, temperature=0.3) + if not response1: + logger.warning(f"jargon {content} 推断1失败:无响应") + return + + # 解析推断1结果 + inference1 = None + try: + resp1 = response1.strip() + if resp1.startswith("{") and resp1.endswith("}"): + inference1 = json.loads(resp1) + else: + repaired = repair_json(resp1) + inference1 = json.loads(repaired) if isinstance(repaired, str) else repaired + if not isinstance(inference1, dict): + logger.warning(f"jargon {content} 推断1结果格式错误") + return + except Exception as e: + logger.error(f"jargon {content} 推断1解析失败: {e}") + return + + # 步骤2: 仅基于content推断 + prompt2 = await global_prompt_manager.format_prompt( + "jargon_inference_content_only_prompt", + content=content, + ) + + response2, _ = await self.llm.generate_response_async(prompt2, temperature=0.3) + if not response2: + logger.warning(f"jargon {content} 推断2失败:无响应") + return + + # 解析推断2结果 + inference2 = None + try: + resp2 = response2.strip() + if resp2.startswith("{") and resp2.endswith("}"): + inference2 = json.loads(resp2) + else: + repaired = repair_json(resp2) + inference2 = json.loads(repaired) if isinstance(repaired, str) else repaired + if not isinstance(inference2, dict): + logger.warning(f"jargon {content} 推断2结果格式错误") + return + except Exception as e: + logger.error(f"jargon {content} 推断2解析失败: {e}") + return + logger.info(f"jargon {content} 推断2提示词: {prompt2}") + logger.info(f"jargon {content} 推断2结果: {response2}") + # logger.info(f"jargon {content} 推断2结果: {inference2}") + logger.info(f"jargon {content} 推断1提示词: {prompt1}") + logger.info(f"jargon {content} 推断1结果: {response1}") + # logger.info(f"jargon {content} 推断1结果: {inference1}") + + # 步骤3: 比较两个推断结果 + prompt3 = await global_prompt_manager.format_prompt( + "jargon_compare_inference_prompt", + inference1=json.dumps(inference1, ensure_ascii=False), + inference2=json.dumps(inference2, ensure_ascii=False), + ) + + logger.info(f"jargon {content} 比较提示词: {prompt3}") + + response3, _ = await self.llm.generate_response_async(prompt3, temperature=0.3) + if not response3: + logger.warning(f"jargon {content} 比较失败:无响应") + return + + # 解析比较结果 + comparison = None + try: + resp3 = response3.strip() + if resp3.startswith("{") and resp3.endswith("}"): + comparison = json.loads(resp3) + else: + repaired = repair_json(resp3) + comparison = json.loads(repaired) if isinstance(repaired, str) else repaired + if not isinstance(comparison, dict): + logger.warning(f"jargon {content} 比较结果格式错误") + return + except Exception as e: + logger.error(f"jargon {content} 比较解析失败: {e}") + return + + # 判断是否为黑话 + is_similar = comparison.get("is_similar", False) + is_jargon = not is_similar # 如果相似,说明不是黑话;如果有差异,说明是黑话 + + # 更新数据库记录 + jargon_obj.is_jargon = is_jargon + if is_jargon: + # 是黑话,使用推断1的结果(基于上下文,更准确) + jargon_obj.meaning = inference1.get("meaning", "") + jargon_obj.translation = inference1.get("translation", "") + else: + # 不是黑话,也记录含义(使用推断2的结果,因为含义明确) + jargon_obj.meaning = inference2.get("meaning", "") + jargon_obj.translation = inference2.get("translation", "") + + # 更新最后一次判定的count值,避免重启后重复判定 + jargon_obj.last_inference_count = jargon_obj.count or 0 + + # 如果count>=100,标记为完成,不再进行推断 + if (jargon_obj.count or 0) >= 100: + jargon_obj.is_complete = True + + jargon_obj.save() + logger.info(f"jargon {content} 推断完成: is_jargon={is_jargon}, meaning={jargon_obj.meaning}, last_inference_count={jargon_obj.last_inference_count}, is_complete={jargon_obj.is_complete}") + + except Exception as e: + logger.error(f"jargon推断失败: {e}") + import traceback + traceback.print_exc() + def should_trigger(self) -> bool: # 冷却时间检查 if time.time() - self.last_learning_time < self.min_learning_interval: @@ -85,11 +344,15 @@ class JargonMiner: if not chat_stream: return + # 记录本次提取的时间窗口,避免重复提取 + extraction_start_time = self.last_learning_time + extraction_end_time = time.time() + # 拉取学习窗口内的消息 messages = get_raw_msg_by_timestamp_with_chat_inclusive( chat_id=self.chat_id, - timestamp_start=self.last_learning_time, - timestamp_end=time.time(), + timestamp_start=extraction_start_time, + timestamp_end=extraction_end_time, limit=20, ) if not messages: @@ -135,17 +398,27 @@ class JargonMiner: if not isinstance(item, dict): continue content = str(item.get("content", "")).strip() - raw_content = str(item.get("raw_content", "")).strip() + raw_content_value = item.get("raw_content", "") + + # 处理raw_content:可能是字符串或列表 + raw_content_list = [] + if isinstance(raw_content_value, list): + raw_content_list = [str(rc).strip() for rc in raw_content_value if str(rc).strip()] + elif isinstance(raw_content_value, str): + raw_content_str = raw_content_value.strip() + if raw_content_str: + raw_content_list = [raw_content_str] + type_str = str(item.get("type", "")).strip().lower() # 验证type是否为有效值 if type_str not in ["p", "c", "e"]: type_str = "p" # 默认值 - if content: + if content and raw_content_list: entries.append({ "content": content, - "raw_content": raw_content, + "raw_content": raw_content_list, "type": type_str }) except Exception as e: @@ -167,14 +440,20 @@ class JargonMiner: saved = 0 updated = 0 + merged = 0 for entry in uniq_entries: content = entry["content"] - raw_content = entry["raw_content"] + raw_content_list = entry["raw_content"] # 已经是列表 type_str = entry["type"] try: + # 步骤1: 检查同chat_id的记录,默认纳入global项目 + # 查询条件:chat_id匹配 OR (is_global为True且content匹配) query = ( Jargon.select() - .where((Jargon.chat_id == self.chat_id) & (Jargon.content == content)) + .where( + ((Jargon.chat_id == self.chat_id) | Jargon.is_global) & + (Jargon.content == content) + ) ) if query.exists(): obj = query.get() @@ -182,30 +461,134 @@ class JargonMiner: obj.count = (obj.count or 0) + 1 except Exception: obj.count = 1 - # 更新raw_content和type(如果为空或需要更新) - if raw_content and not obj.raw_content: - obj.raw_content = raw_content + + # 合并raw_content列表:读取现有列表,追加新值,去重 + existing_raw_content = [] + if obj.raw_content: + try: + existing_raw_content = json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content + if not isinstance(existing_raw_content, list): + existing_raw_content = [existing_raw_content] if existing_raw_content else [] + except (json.JSONDecodeError, TypeError): + existing_raw_content = [obj.raw_content] if obj.raw_content else [] + + # 合并并去重 + merged_list = list(dict.fromkeys(existing_raw_content + raw_content_list)) + obj.raw_content = json.dumps(merged_list, ensure_ascii=False) + + # 更新type(如果为空) if type_str and not obj.type: obj.type = type_str obj.save() + + # 检查是否需要推断(达到阈值且超过上次判定值) + if _should_infer_meaning(obj): + # 异步触发推断,不阻塞主流程 + # 重新加载对象以确保数据最新 + jargon_id = obj.id + asyncio.create_task(self._infer_meaning_by_id(jargon_id)) + updated += 1 else: - Jargon.create( - content=content, - raw_content=raw_content, - type=type_str, - chat_id=self.chat_id, - is_global=False, - count=1 + # 步骤2: 同chat_id没有找到,检查所有chat_id中是否有相同content的记录 + # 查询所有非global的记录(global的已经在步骤1检查过了) + all_content_query = ( + Jargon.select() + .where( + (Jargon.content == content) & + (~Jargon.is_global) + ) ) - saved += 1 + all_matching = list(all_content_query) + + # 如果找到3个或更多相同content的记录,合并它们 + if len(all_matching) >= 3: + # 找到3个或更多已有记录,合并它们(新条目也会被包含在合并中) + total_count = sum((obj.count or 0) for obj in all_matching) + 1 # +1 是因为当前新条目 + + # 合并所有raw_content列表 + all_raw_content = [] + for obj in all_matching: + if obj.raw_content: + try: + obj_raw = json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content + if not isinstance(obj_raw, list): + obj_raw = [obj_raw] if obj_raw else [] + all_raw_content.extend(obj_raw) + except (json.JSONDecodeError, TypeError): + if obj.raw_content: + all_raw_content.append(obj.raw_content) + + # 添加当前新条目的raw_content + all_raw_content.extend(raw_content_list) + # 去重 + merged_raw_content = list(dict.fromkeys(all_raw_content)) + + # 合并type:优先使用非空的值 + merged_type = type_str + for obj in all_matching: + if obj.type and not merged_type: + merged_type = obj.type + break + + # 合并其他字段:优先使用已有值 + merged_meaning = None + merged_translation = None + merged_is_jargon = None + merged_last_inference_count = None + merged_is_complete = False + + for obj in all_matching: + if obj.meaning and not merged_meaning: + merged_meaning = obj.meaning + if obj.translation and not merged_translation: + merged_translation = obj.translation + if obj.is_jargon is not None and merged_is_jargon is None: + merged_is_jargon = obj.is_jargon + if obj.last_inference_count is not None and merged_last_inference_count is None: + merged_last_inference_count = obj.last_inference_count + if obj.is_complete: + merged_is_complete = True + + # 删除旧的记录 + for obj in all_matching: + obj.delete_instance() + + # 创建新的global记录 + Jargon.create( + content=content, + raw_content=json.dumps(merged_raw_content, ensure_ascii=False), + type=merged_type, + chat_id="global", + is_global=True, + count=total_count, + meaning=merged_meaning, + translation=merged_translation, + is_jargon=merged_is_jargon, + last_inference_count=merged_last_inference_count, + is_complete=merged_is_complete + ) + merged += 1 + logger.info(f"合并jargon为global: content={content}, 合并了{len(all_matching)}条已有记录+1条新记录(共{len(all_matching)+1}条),总count={total_count}") + else: + # 找到少于3个已有记录,正常创建新记录 + Jargon.create( + content=content, + raw_content=json.dumps(raw_content_list, ensure_ascii=False), + type=type_str, + chat_id=self.chat_id, + is_global=False, + count=1 + ) + saved += 1 except Exception as e: logger.error(f"保存jargon失败: chat_id={self.chat_id}, content={content}, err={e}") continue - if saved or updated: - logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated} 条,chat_id={self.chat_id}") - self.last_learning_time = time.time() + if saved or updated or merged: + logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated} 条,合并为global {merged} 条,chat_id={self.chat_id}") + # 更新为本次提取的结束时间,确保不会重复提取相同的消息窗口 + self.last_learning_time = extraction_end_time except Exception as e: logger.error(f"JargonMiner 运行失败: {e}") diff --git a/src/plugin_system/base/base_tool.py b/src/plugin_system/base/base_tool.py index 3ecf347c..072d68b1 100644 --- a/src/plugin_system/base/base_tool.py +++ b/src/plugin_system/base/base_tool.py @@ -57,7 +57,7 @@ class BaseTool(ABC): Returns: dict: 工具定义字典 """ - if not cls.name or not cls.description or not cls.parameters: + if not cls.name or not cls.description or cls.parameters is None: raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性") return {"name": cls.name, "description": cls.description, "parameters": cls.parameters} @@ -65,7 +65,7 @@ class BaseTool(ABC): @classmethod def get_tool_info(cls) -> ToolInfo: """获取工具信息""" - if not cls.name or not cls.description or not cls.parameters: + if not cls.name or not cls.description or cls.parameters is None: raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性") return ToolInfo( diff --git a/src/plugins/built_in/jargon/_manifest.json b/src/plugins/built_in/jargon/_manifest.json new file mode 100644 index 00000000..3b0e2c65 --- /dev/null +++ b/src/plugins/built_in/jargon/_manifest.json @@ -0,0 +1,36 @@ +{ + "manifest_version": 1, + "name": "Jargon插件", + "version": "1.0.0", + "description": "记录和管理jargon(黑话/俚语)的解释", + "author": { + "name": "Mai", + "url": "https://github.com/MaiM-with-u" + }, + "license": "GPL-v3.0-or-later", + + "host_application": { + "min_version": "0.10.4" + }, + "homepage_url": "https://github.com/MaiM-with-u/maibot", + "repository_url": "https://github.com/MaiM-with-u/maibot", + "keywords": ["jargon", "slang", "built-in"], + "categories": ["Jargon"], + + "default_locale": "zh-CN", + "locales_path": "_locales", + + "plugin_info": { + "is_built_in": true, + "plugin_type": "tool_provider", + "components": [ + { + "type": "record_jargon_explanation", + "name": "record_jargon_explanation", + "description": "记录聊天中明确解释的jargon词义" + } + ] + } +} + + diff --git a/src/plugins/built_in/jargon/jargon_explanation.py b/src/plugins/built_in/jargon/jargon_explanation.py new file mode 100644 index 00000000..b01b5e58 --- /dev/null +++ b/src/plugins/built_in/jargon/jargon_explanation.py @@ -0,0 +1,180 @@ +from typing import Any, Dict, List, Tuple + +from src.common.logger import get_logger +from src.common.database.database_model import Jargon +from src.plugin_system import BaseTool, ToolParamType + +logger = get_logger("jargon_explanation") + + +class RecordJargonExplanationTool(BaseTool): + """记录jargon解释工具 + + 检测聊天记录中是否有对某个词义的明确解释,如果有则记录到jargon表中 + """ + + name: str = "record_explanation" + description: str = ( + "当检测到有人明确解释了某个缩写,拼音缩写,中文缩写,英文缩写的含义时(例如:'xxx是yyy的意思'、'xxx指的是yyy'等)" + "当某人明确纠正了对某个词汇的错误解释时(例如:'xxx不是yyy的意思'、'xxx不是指的是yyy'等)" + ) + parameters: List[Tuple[str, ToolParamType, str, bool, None]] = [ + ("content", ToolParamType.STRING, "被解释的目标词汇(黑话/俚语/缩写),例如:yyds、内卷、社死等", True, None), + ("translation", ToolParamType.STRING, "词汇的翻译或简称,例如:永远的神、社会性死亡等", True, None), + ("meaning", ToolParamType.STRING, "词汇的详细含义说明", True, None), + ] + available_for_llm: bool = True + + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, str]: + """执行jargon解释检测和记录 + + Args: + function_args: 工具参数,包含content、translation、meaning + + Returns: + dict: 工具执行结果 + """ + if not self.chat_id: + return {"name": self.name, "content": "无法记录jargon解释:缺少chat_id"} + + try: + # 从参数中获取信息 + content = str(function_args.get("content", "")).strip() + translation = str(function_args.get("translation", "")).strip() + meaning = str(function_args.get("meaning", "")).strip() + + if not content: + return {"name": self.name, "content": "目标词汇不能为空"} + + if not translation and not meaning: + return {"name": self.name, "content": "翻译和含义至少需要提供一个"} + + # 检查是否已存在相同的jargon + query = Jargon.select().where( + (Jargon.chat_id == self.chat_id) & + (Jargon.content == content) + ) + + if query.exists(): + # 已存在,更新translation和meaning(追加,用/分隔) + obj = query.get() + existing_translation = obj.translation or "" + existing_meaning = obj.meaning or "" + + # 追加新内容 + if translation: + if existing_translation: + obj.translation = f"{existing_translation}/{translation}" + else: + obj.translation = translation + + if meaning: + if existing_meaning: + obj.meaning = f"{existing_meaning}/{meaning}" + else: + obj.meaning = meaning + + # 确保is_jargon为True + obj.is_jargon = True + obj.save() + + logger.info(f"更新jargon解释: {content}, translation={obj.translation}, meaning={obj.meaning}") + # 优先使用meaning,如果没有则使用translation + explanation = obj.meaning or obj.translation or "" + return {"name": self.name, "content": f"你了解到 {content}的含义应该是 {explanation}"} + else: + # 新建记录 + Jargon.create( + content=content, + chat_id=self.chat_id, + translation=translation, + meaning=meaning, + is_jargon=True, + is_global=False, + count=0, + ) + + logger.info(f"记录新jargon解释: {content}, translation={translation}, meaning={meaning}") + # 优先使用meaning,如果没有则使用translation + explanation = meaning or translation or "" + return {"name": self.name, "content": f"你了解到 {content}的含义应该是 {explanation}"} + + except Exception as exc: + logger.error(f"记录jargon解释失败: {exc}", exc_info=True) + return {"name": self.name, "content": f"记录jargon解释失败: {exc}"} + + +class LookupJargonMeaningTool(BaseTool): + """查询jargon含义工具 + + 输入一个可能意义不明的词或缩写,查询数据库中是否已有匹配且带有含义或翻译的记录。 + 命中则返回解释字符串(优先meaning,其次translation),未命中返回空字符串。 + """ + + name: str = "lookup_jargon_meaning" + description: str = ( + "查询是否存在已知的jargon解释(含meaning或translation),若存在返回解释,否则返回空字符串" + ) + parameters: List[Tuple[str, ToolParamType, str, bool, None]] = [ + ("content", ToolParamType.STRING, "待查询的目标词汇(黑话/俚语/缩写)", True, None), + ] + available_for_llm: bool = True + + async def execute(self, function_args: Dict[str, Any]) -> Dict[str, str]: + if not self.chat_id: + # 和其它工具保持一致的返回结构 + return {"name": self.name, "content": ""} + + try: + content = str(function_args.get("content", "")).strip() + if not content: + return {"name": self.name, "content": ""} + + # 优先在当前会话或global中查找该content,且需要meaning或translation非空 + # Peewee 条件: + # (content == 输入) AND ((chat_id == 当前chat) OR is_global) AND ((meaning非空) OR (translation非空)) + candidates = ( + Jargon.select() + .where( + (Jargon.content == content) + & ((Jargon.chat_id == self.chat_id) | Jargon.is_global) + & ( + ((Jargon.meaning.is_null(False)) & (Jargon.meaning != "")) + | ((Jargon.translation.is_null(False)) & (Jargon.translation != "")) + ) + ) + .limit(1) + ) + + if candidates.exists(): + obj = candidates.get() + translation = (obj.translation or "").strip() + meaning = (obj.meaning or "").strip() + formatted = f"“{content}可能为黑话或者网络简写,翻译为:{translation},含义为:{meaning}”" + return {"name": self.name, "content": formatted} + + # 未命中:允许退化为全库搜索(不限chat_id),以提升命中率 + fallback = ( + Jargon.select() + .where( + (Jargon.content == content) + & ( + ((Jargon.meaning.is_null(False)) & (Jargon.meaning != "")) + | ((Jargon.translation.is_null(False)) & (Jargon.translation != "")) + ) + ) + .limit(1) + ) + if fallback.exists(): + obj = fallback.get() + translation = (obj.translation or "").strip() + meaning = (obj.meaning or "").strip() + formatted = f"“{content}可能为黑话或者网络简写,翻译为:{translation},含义为:{meaning}”" + return {"name": self.name, "content": formatted} + + # 彻底未命中 + return {"name": self.name, "content": ""} + except Exception as exc: + logger.error(f"查询jargon解释失败: {exc}", exc_info=True) + return {"name": self.name, "content": ""} + diff --git a/src/plugins/built_in/jargon/plugin.py b/src/plugins/built_in/jargon/plugin.py new file mode 100644 index 00000000..d2a02fab --- /dev/null +++ b/src/plugins/built_in/jargon/plugin.py @@ -0,0 +1,56 @@ +from typing import List, Tuple, Type + +# 导入新插件系统 +from src.plugin_system import BasePlugin, ComponentInfo, register_plugin +from src.plugin_system.base.config_types import ConfigField + +# 导入依赖的系统组件 +from src.common.logger import get_logger + +from src.plugins.built_in.jargon.jargon_explanation import RecordJargonExplanationTool, LookupJargonMeaningTool + +logger = get_logger("jargon_plugin") + + +@register_plugin +class JargonPlugin(BasePlugin): + """Jargon插件 + + 系统内置插件,提供jargon相关的功能: + - RecordJargonExplanation: 记录聊天中明确解释的jargon词义 + - LookupJargonMeaning: 查询未知词是否已有解释 + + 注意:插件基本信息优先从_manifest.json文件中读取 + """ + + # 插件基本信息 + plugin_name: str = "jargon" # 内部标识符 + enable_plugin: bool = True + dependencies: list[str] = [] # 插件依赖列表 + python_dependencies: list[str] = [] # Python包依赖列表 + config_file_name: str = "config.toml" + + # 配置节描述 + config_section_descriptions = { + "plugin": "插件启用配置", + "components": "核心组件启用配置", + } + + # 配置Schema定义 + config_schema: dict = { + "plugin": { + "enabled": ConfigField(type=bool, default=True, description="是否启用插件"), + "config_version": ConfigField(type=str, default="1.0.0", description="配置文件版本"), + }, + } + + def get_plugin_components(self) -> List[Tuple[ComponentInfo, Type]]: + """返回插件包含的组件列表""" + + # --- 根据配置注册组件 --- + components = [] + components.append((RecordJargonExplanationTool.get_tool_info(), RecordJargonExplanationTool)) + components.append((LookupJargonMeaningTool.get_tool_info(), LookupJargonMeaningTool)) + + return components +