From 20c97b9cc992504c220009826850517ecb623ec6 Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Sun, 7 Dec 2025 23:38:20 +0800
Subject: [PATCH] =?UTF-8?q?feat=EF=BC=9A=E8=A1=A8=E8=BE=BE=E6=96=B9?=
 =?UTF-8?q?=E5=BC=8F=E6=A0=B9=E6=8D=AE=E4=B8=8D=E5=90=8Cthinking=20level?=
 =?UTF-8?q?=E8=BF=9B=E5=88=86=E5=88=AB=E5=A4=84=E7=90=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/bw_learner/expression_learner.py  |  13 +++
 src/bw_learner/expression_selector.py | 142 ++++++++++++++++++++++++--
 src/chat/replyer/group_generator.py   |   7 +-
 template/bot_config_template.toml     |   2 +-
 4 files changed, 152 insertions(+), 12 deletions(-)

diff --git a/src/bw_learner/expression_learner.py b/src/bw_learner/expression_learner.py
index 759f4e0e..71866dea 100644
--- a/src/bw_learner/expression_learner.py
+++ b/src/bw_learner/expression_learner.py
@@ -34,6 +34,8 @@ def init_prompt() -> None:
 5. 例子仅供参考，请严格根据群聊内容总结!!!
 注意：总结成如下格式的规律，总结的内容要详细，但具有概括性：
 例如：当"AAAAA"时，可以"BBBBB", AAAAA代表某个场景，不超过20个字。BBBBB代表对应的语言风格，特定句式或表达方式，不超过20个字。
+表达方式在3-5个左右，不要超过10个
+
 
 任务2：请从上面这段聊天内容中提取"可能是黑话"的候选项（黑话/俚语/网络缩写/口头禅）。
 - 必须为对话中真实出现过的短词或短语
@@ -49,6 +51,7 @@ def init_prompt() -> None:
 
 输出要求：
 将表达方式，语言风格和黑话以 JSON 数组输出，每个元素为一个对象，结构如下（注意字段名）：
+注意请不要输出重复内容，请对表达方式和黑话进行去重。
 
 [
   {{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}},
@@ -132,6 +135,16 @@ class ExpressionLearner:
         expressions, jargon_entries = self.parse_expression_response(response)
         expressions = self._filter_self_reference_styles(expressions)
         
+        # 检查表达方式数量，如果超过10个则放弃本次表达学习
+        if len(expressions) > 10:
+            logger.info(f"表达方式提取数量超过10个（实际{len(expressions)}个），放弃本次表达学习")
+            expressions = []
+        
+        # 检查黑话数量，如果超过30个则放弃本次黑话学习
+        if len(jargon_entries) > 30:
+            logger.info(f"黑话提取数量超过30个（实际{len(jargon_entries)}个），放弃本次黑话学习")
+            jargon_entries = []
+        
         # 处理黑话条目，路由到 jargon_miner（即使没有表达方式也要处理黑话）
         if jargon_entries:
             await self._process_jargon_entries(jargon_entries, random_msg)
diff --git a/src/bw_learner/expression_selector.py b/src/bw_learner/expression_selector.py
index d8a59779..996ed04a 100644
--- a/src/bw_learner/expression_selector.py
+++ b/src/bw_learner/expression_selector.py
@@ -111,6 +111,65 @@ class ExpressionSelector:
                 return group_chat_ids
         return [chat_id]
 
+    def _select_expressions_simple(self, chat_id: str, max_num: int) -> Tuple[List[Dict[str, Any]], List[int]]:
+        """
+        简单模式：只选择 count > 1 的项目，要求至少有10个才进行选择，随机选5个，不进行LLM选择
+
+        Args:
+            chat_id: 聊天流ID
+            max_num: 最大选择数量（此参数在此模式下不使用，固定选择5个）
+
+        Returns:
+            Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
+        """
+        try:
+            # 支持多chat_id合并抽选
+            related_chat_ids = self.get_related_chat_ids(chat_id)
+
+            # 查询所有相关chat_id的表达方式，排除 rejected=1 的，且只选择 count > 1 的
+            style_query = Expression.select().where(
+                (Expression.chat_id.in_(related_chat_ids)) 
+                & (~Expression.rejected)
+                & (Expression.count > 1)
+            )
+
+            style_exprs = [
+                {
+                    "id": expr.id,
+                    "situation": expr.situation,
+                    "style": expr.style,
+                    "last_active_time": expr.last_active_time,
+                    "source_id": expr.chat_id,
+                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
+                    "count": expr.count if getattr(expr, "count", None) is not None else 1,
+                    "checked": expr.checked if getattr(expr, "checked", None) is not None else False,
+                }
+                for expr in style_query
+            ]
+
+            # 要求至少有10个 count > 1 的表达方式才进行选择
+            min_required = 10
+            if len(style_exprs) < min_required:
+                logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_required} 个（实际 {len(style_exprs)} 个），不进行选择")
+                return [], []
+
+            # 固定选择5个
+            select_count = 5
+            import random
+            selected_style = random.sample(style_exprs, select_count)
+
+            # 更新last_active_time
+            if selected_style:
+                self.update_expressions_last_active_time(selected_style)
+
+            selected_ids = [expr["id"] for expr in selected_style]
+            logger.debug(f"think_level=0: 从 {len(style_exprs)} 个 count>1 的表达方式中随机选择了 {len(selected_style)} 个")
+            return selected_style, selected_ids
+
+        except Exception as e:
+            logger.error(f"简单模式选择表达方式失败: {e}")
+            return [], []
+
     def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
         """
         随机选择表达方式
@@ -164,6 +223,7 @@ class ExpressionSelector:
         max_num: int = 10,
         target_message: Optional[str] = None,
         reply_reason: Optional[str] = None,
+        think_level: int = 1,
     ) -> Tuple[List[Dict[str, Any]], List[int]]:
         """
         选择适合的表达方式（使用classic模式：随机选择+LLM选择）
@@ -174,6 +234,7 @@ class ExpressionSelector:
             max_num: 最大选择数量
             target_message: 目标消息内容
             reply_reason: planner给出的回复理由
+            think_level: 思考级别，0/1/2
 
         Returns:
             Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
@@ -184,8 +245,8 @@ class ExpressionSelector:
             return [], []
 
         # 使用classic模式（随机选择+LLM选择）
-        logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式")
-        return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason)
+        logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式，think_level={think_level}")
+        return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason, think_level)
 
     async def _select_expressions_classic(
         self,
@@ -194,6 +255,7 @@ class ExpressionSelector:
         max_num: int = 10,
         target_message: Optional[str] = None,
         reply_reason: Optional[str] = None,
+        think_level: int = 1,
     ) -> Tuple[List[Dict[str, Any]], List[int]]:
         """
         classic模式：随机选择+LLM选择
@@ -204,24 +266,88 @@ class ExpressionSelector:
             max_num: 最大选择数量
             target_message: 目标消息内容
             reply_reason: planner给出的回复理由
+            think_level: 思考级别，0/1/2
 
         Returns:
             Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
         """
         try:
-            # 1. 使用随机抽样选择表达方式
-            style_exprs = self._random_expressions(chat_id, 20)
+            # think_level == 0: 只选择 count > 1 的项目，随机选10个，不进行LLM选择
+            if think_level == 0:
+                return self._select_expressions_simple(chat_id, max_num)
+            
+            # think_level == 1 或 2: 先选高count，再从所有表达方式中随机抽样
+            # 1. 获取所有表达方式并分离 count > 1 和 count <= 1 的
+            related_chat_ids = self.get_related_chat_ids(chat_id)
+            style_query = Expression.select().where(
+                (Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected)
+            )
+            
+            all_style_exprs = [
+                {
+                    "id": expr.id,
+                    "situation": expr.situation,
+                    "style": expr.style,
+                    "last_active_time": expr.last_active_time,
+                    "source_id": expr.chat_id,
+                    "create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
+                    "count": expr.count if getattr(expr, "count", None) is not None else 1,
+                    "checked": expr.checked if getattr(expr, "checked", None) is not None else False,
+                }
+                for expr in style_query
+            ]
 
-            if len(style_exprs) < 10:
-                logger.info(f"聊天流 {chat_id} 表达方式正在积累中")
+            # 分离 count > 1 和 count <= 1 的表达方式
+            high_count_exprs = [expr for expr in all_style_exprs if (expr.get("count", 1) or 1) > 1]
+            
+            # 根据 think_level 设置要求
+            if think_level == 1:
+                # level 1: 需要至少10个高count和10个总数
+                min_high_count = 10
+                min_total_count = 10
+                select_high_count = 5
+                select_random_count = 5
+            else:  # think_level == 2
+                # level 2: 需要至少20个高count和20个总数
+                min_high_count = 20
+                min_total_count = 20
+                select_high_count = 10
+                select_random_count = 10
+            
+            # 检查数量要求
+            if len(high_count_exprs) < min_high_count:
+                logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_high_count} 个（实际 {len(high_count_exprs)} 个），不进行选择")
                 return [], []
+            
+            if len(all_style_exprs) < min_total_count:
+                logger.info(f"聊天流 {chat_id} 总表达方式不足 {min_total_count} 个（实际 {len(all_style_exprs)} 个），不进行选择")
+                return [], []
+            
+            # 先选取高count的表达方式
+            selected_high = weighted_sample(high_count_exprs, min(len(high_count_exprs), select_high_count))
+            
+            # 然后从所有表达方式中随机抽样（使用加权抽样）
+            remaining_num = select_random_count
+            selected_random = weighted_sample(all_style_exprs, min(len(all_style_exprs), remaining_num))
+            
+            # 合并候选池（去重，避免重复）
+            candidate_exprs = selected_high.copy()
+            candidate_ids = {expr["id"] for expr in candidate_exprs}
+            for expr in selected_random:
+                if expr["id"] not in candidate_ids:
+                    candidate_exprs.append(expr)
+                    candidate_ids.add(expr["id"])
+            
+            # 打乱顺序，避免高count的都在前面
+            import random
+            random.shuffle(candidate_exprs)
 
             # 2. 构建所有表达方式的索引和情境列表
             all_expressions: List[Dict[str, Any]] = []
             all_situations: List[str] = []
 
             # 添加style表达方式
-            for expr in style_exprs:
+            for expr in candidate_exprs:
                 expr = expr.copy()
                 all_expressions.append(expr)
                 all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时，使用 {expr['style']}")
@@ -233,7 +359,7 @@ class ExpressionSelector:
             all_situations_str = "\n".join(all_situations)
 
             if target_message:
-                target_message_str = f"，现在你想要对这条消息进行回复：“{target_message}”"
+                target_message_str = f"，现在你想要对这条消息进行回复：\"{target_message}\""
                 target_message_extra_block = "4.考虑你要回复的目标消息"
             else:
                 target_message_str = ""
diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py
index 5bf21567..57a8687e 100644
--- a/src/chat/replyer/group_generator.py
+++ b/src/chat/replyer/group_generator.py
@@ -229,7 +229,7 @@ class DefaultReplyer:
             return False, llm_response
 
     async def build_expression_habits(
-        self, chat_history: str, target: str, reply_reason: str = ""
+        self, chat_history: str, target: str, reply_reason: str = "", think_level: int = 1
     ) -> Tuple[str, List[int]]:
         # sourcery skip: for-append-to-extend
         """构建表达习惯块
@@ -238,6 +238,7 @@ class DefaultReplyer:
             chat_history: 聊天历史记录
             target: 目标消息内容
             reply_reason: planner给出的回复理由
+            think_level: 思考级别，0/1/2
 
         Returns:
             str: 表达习惯信息字符串
@@ -250,7 +251,7 @@ class DefaultReplyer:
         # 使用从处理器传来的选中表达方式
         # 使用模型预测选择表达方式
         selected_expressions, selected_ids = await expression_selector.select_suitable_expressions(
-            self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason
+            self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason, think_level=think_level
         )
 
         if selected_expressions:
@@ -788,7 +789,7 @@ class DefaultReplyer:
         # 并行执行八个构建任务（包括黑话解释）
         task_results = await asyncio.gather(
             self._time_and_run_task(
-                self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
+                self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level), "expression_habits"
             ),
             self._time_and_run_task(
                 self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"
diff --git a/template/bot_config_template.toml b/template/bot_config_template.toml
index 16e4c235..0d60c7fd 100644
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@@ -1,5 +1,5 @@
 [inner]
-version = "7.1.0"
+version = "7.1.2"
 
 #----以下是给开发人员阅读的，如果你只是部署了麦麦，不需要阅读----
 # 如果你想要修改配置文件，请递增version的值