From 20c97b9cc992504c220009826850517ecb623ec6 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sun, 7 Dec 2025 23:38:20 +0800 Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E8=A1=A8=E8=BE=BE=E6=96=B9?= =?UTF-8?q?=E5=BC=8F=E6=A0=B9=E6=8D=AE=E4=B8=8D=E5=90=8Cthinking=20level?= =?UTF-8?q?=E8=BF=9B=E5=88=86=E5=88=AB=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bw_learner/expression_learner.py | 13 +++ src/bw_learner/expression_selector.py | 142 ++++++++++++++++++++++++-- src/chat/replyer/group_generator.py | 7 +- template/bot_config_template.toml | 2 +- 4 files changed, 152 insertions(+), 12 deletions(-) diff --git a/src/bw_learner/expression_learner.py b/src/bw_learner/expression_learner.py index 759f4e0e..71866dea 100644 --- a/src/bw_learner/expression_learner.py +++ b/src/bw_learner/expression_learner.py @@ -34,6 +34,8 @@ def init_prompt() -> None: 5. 例子仅供参考,请严格根据群聊内容总结!!! 注意:总结成如下格式的规律,总结的内容要详细,但具有概括性: 例如:当"AAAAA"时,可以"BBBBB", AAAAA代表某个场景,不超过20个字。BBBBB代表对应的语言风格,特定句式或表达方式,不超过20个字。 +表达方式在3-5个左右,不要超过10个 + 任务2:请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。 - 必须为对话中真实出现过的短词或短语 @@ -49,6 +51,7 @@ def init_prompt() -> None: 输出要求: 将表达方式,语言风格和黑话以 JSON 数组输出,每个元素为一个对象,结构如下(注意字段名): +注意请不要输出重复内容,请对表达方式和黑话进行去重。 [ {{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}}, @@ -132,6 +135,16 @@ class ExpressionLearner: expressions, jargon_entries = self.parse_expression_response(response) expressions = self._filter_self_reference_styles(expressions) + # 检查表达方式数量,如果超过10个则放弃本次表达学习 + if len(expressions) > 10: + logger.info(f"表达方式提取数量超过10个(实际{len(expressions)}个),放弃本次表达学习") + expressions = [] + + # 检查黑话数量,如果超过30个则放弃本次黑话学习 + if len(jargon_entries) > 30: + logger.info(f"黑话提取数量超过30个(实际{len(jargon_entries)}个),放弃本次黑话学习") + jargon_entries = [] + # 处理黑话条目,路由到 jargon_miner(即使没有表达方式也要处理黑话) if jargon_entries: await self._process_jargon_entries(jargon_entries, random_msg) diff --git a/src/bw_learner/expression_selector.py b/src/bw_learner/expression_selector.py index d8a59779..996ed04a 100644 --- a/src/bw_learner/expression_selector.py +++ b/src/bw_learner/expression_selector.py @@ -111,6 +111,65 @@ class ExpressionSelector: return group_chat_ids return [chat_id] + def _select_expressions_simple(self, chat_id: str, max_num: int) -> Tuple[List[Dict[str, Any]], List[int]]: + """ + 简单模式:只选择 count > 1 的项目,要求至少有10个才进行选择,随机选5个,不进行LLM选择 + + Args: + chat_id: 聊天流ID + max_num: 最大选择数量(此参数在此模式下不使用,固定选择5个) + + Returns: + Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表 + """ + try: + # 支持多chat_id合并抽选 + related_chat_ids = self.get_related_chat_ids(chat_id) + + # 查询所有相关chat_id的表达方式,排除 rejected=1 的,且只选择 count > 1 的 + style_query = Expression.select().where( + (Expression.chat_id.in_(related_chat_ids)) + & (~Expression.rejected) + & (Expression.count > 1) + ) + + style_exprs = [ + { + "id": expr.id, + "situation": expr.situation, + "style": expr.style, + "last_active_time": expr.last_active_time, + "source_id": expr.chat_id, + "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time, + "count": expr.count if getattr(expr, "count", None) is not None else 1, + "checked": expr.checked if getattr(expr, "checked", None) is not None else False, + } + for expr in style_query + ] + + # 要求至少有10个 count > 1 的表达方式才进行选择 + min_required = 10 + if len(style_exprs) < min_required: + logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_required} 个(实际 {len(style_exprs)} 个),不进行选择") + return [], [] + + # 固定选择5个 + select_count = 5 + import random + selected_style = random.sample(style_exprs, select_count) + + # 更新last_active_time + if selected_style: + self.update_expressions_last_active_time(selected_style) + + selected_ids = [expr["id"] for expr in selected_style] + logger.debug(f"think_level=0: 从 {len(style_exprs)} 个 count>1 的表达方式中随机选择了 {len(selected_style)} 个") + return selected_style, selected_ids + + except Exception as e: + logger.error(f"简单模式选择表达方式失败: {e}") + return [], [] + def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]: """ 随机选择表达方式 @@ -164,6 +223,7 @@ class ExpressionSelector: max_num: int = 10, target_message: Optional[str] = None, reply_reason: Optional[str] = None, + think_level: int = 1, ) -> Tuple[List[Dict[str, Any]], List[int]]: """ 选择适合的表达方式(使用classic模式:随机选择+LLM选择) @@ -174,6 +234,7 @@ class ExpressionSelector: max_num: 最大选择数量 target_message: 目标消息内容 reply_reason: planner给出的回复理由 + think_level: 思考级别,0/1/2 Returns: Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表 @@ -184,8 +245,8 @@ class ExpressionSelector: return [], [] # 使用classic模式(随机选择+LLM选择) - logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式") - return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason) + logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式,think_level={think_level}") + return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason, think_level) async def _select_expressions_classic( self, @@ -194,6 +255,7 @@ class ExpressionSelector: max_num: int = 10, target_message: Optional[str] = None, reply_reason: Optional[str] = None, + think_level: int = 1, ) -> Tuple[List[Dict[str, Any]], List[int]]: """ classic模式:随机选择+LLM选择 @@ -204,24 +266,88 @@ class ExpressionSelector: max_num: 最大选择数量 target_message: 目标消息内容 reply_reason: planner给出的回复理由 + think_level: 思考级别,0/1/2 Returns: Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表 """ try: - # 1. 使用随机抽样选择表达方式 - style_exprs = self._random_expressions(chat_id, 20) + # think_level == 0: 只选择 count > 1 的项目,随机选10个,不进行LLM选择 + if think_level == 0: + return self._select_expressions_simple(chat_id, max_num) + + # think_level == 1 或 2: 先选高count,再从所有表达方式中随机抽样 + # 1. 获取所有表达方式并分离 count > 1 和 count <= 1 的 + related_chat_ids = self.get_related_chat_ids(chat_id) + style_query = Expression.select().where( + (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected) + ) + + all_style_exprs = [ + { + "id": expr.id, + "situation": expr.situation, + "style": expr.style, + "last_active_time": expr.last_active_time, + "source_id": expr.chat_id, + "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time, + "count": expr.count if getattr(expr, "count", None) is not None else 1, + "checked": expr.checked if getattr(expr, "checked", None) is not None else False, + } + for expr in style_query + ] - if len(style_exprs) < 10: - logger.info(f"聊天流 {chat_id} 表达方式正在积累中") + # 分离 count > 1 和 count <= 1 的表达方式 + high_count_exprs = [expr for expr in all_style_exprs if (expr.get("count", 1) or 1) > 1] + + # 根据 think_level 设置要求 + if think_level == 1: + # level 1: 需要至少10个高count和10个总数 + min_high_count = 10 + min_total_count = 10 + select_high_count = 5 + select_random_count = 5 + else: # think_level == 2 + # level 2: 需要至少20个高count和20个总数 + min_high_count = 20 + min_total_count = 20 + select_high_count = 10 + select_random_count = 10 + + # 检查数量要求 + if len(high_count_exprs) < min_high_count: + logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_high_count} 个(实际 {len(high_count_exprs)} 个),不进行选择") return [], [] + + if len(all_style_exprs) < min_total_count: + logger.info(f"聊天流 {chat_id} 总表达方式不足 {min_total_count} 个(实际 {len(all_style_exprs)} 个),不进行选择") + return [], [] + + # 先选取高count的表达方式 + selected_high = weighted_sample(high_count_exprs, min(len(high_count_exprs), select_high_count)) + + # 然后从所有表达方式中随机抽样(使用加权抽样) + remaining_num = select_random_count + selected_random = weighted_sample(all_style_exprs, min(len(all_style_exprs), remaining_num)) + + # 合并候选池(去重,避免重复) + candidate_exprs = selected_high.copy() + candidate_ids = {expr["id"] for expr in candidate_exprs} + for expr in selected_random: + if expr["id"] not in candidate_ids: + candidate_exprs.append(expr) + candidate_ids.add(expr["id"]) + + # 打乱顺序,避免高count的都在前面 + import random + random.shuffle(candidate_exprs) # 2. 构建所有表达方式的索引和情境列表 all_expressions: List[Dict[str, Any]] = [] all_situations: List[str] = [] # 添加style表达方式 - for expr in style_exprs: + for expr in candidate_exprs: expr = expr.copy() all_expressions.append(expr) all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}") @@ -233,7 +359,7 @@ class ExpressionSelector: all_situations_str = "\n".join(all_situations) if target_message: - target_message_str = f",现在你想要对这条消息进行回复:“{target_message}”" + target_message_str = f",现在你想要对这条消息进行回复:\"{target_message}\"" target_message_extra_block = "4.考虑你要回复的目标消息" else: target_message_str = "" diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 5bf21567..57a8687e 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -229,7 +229,7 @@ class DefaultReplyer: return False, llm_response async def build_expression_habits( - self, chat_history: str, target: str, reply_reason: str = "" + self, chat_history: str, target: str, reply_reason: str = "", think_level: int = 1 ) -> Tuple[str, List[int]]: # sourcery skip: for-append-to-extend """构建表达习惯块 @@ -238,6 +238,7 @@ class DefaultReplyer: chat_history: 聊天历史记录 target: 目标消息内容 reply_reason: planner给出的回复理由 + think_level: 思考级别,0/1/2 Returns: str: 表达习惯信息字符串 @@ -250,7 +251,7 @@ class DefaultReplyer: # 使用从处理器传来的选中表达方式 # 使用模型预测选择表达方式 selected_expressions, selected_ids = await expression_selector.select_suitable_expressions( - self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason + self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason, think_level=think_level ) if selected_expressions: @@ -788,7 +789,7 @@ class DefaultReplyer: # 并行执行八个构建任务(包括黑话解释) task_results = await asyncio.gather( self._time_and_run_task( - self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits" + self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level), "expression_habits" ), self._time_and_run_task( self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info" diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml index 16e4c235..0d60c7fd 100644 --- a/template/bot_config_template.toml +++ b/template/bot_config_template.toml @@ -1,5 +1,5 @@ [inner] -version = "7.1.0" +version = "7.1.2" #----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读---- # 如果你想要修改配置文件,请递增version的值