From a41f4694cf92545c2df722f9bfdda93f7ffbb0bb Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Tue, 18 Nov 2025 01:21:46 +0800
Subject: [PATCH] =?UTF-8?q?better=EF=BC=9A=E4=BC=98=E5=8C=96=E8=A1=A8?=
 =?UTF-8?q?=E8=BE=BE=E6=96=B9=E5=BC=8F=EF=BC=8C=E7=8E=B0=E5=9C=A8=E8=A1=A8?=
 =?UTF-8?q?=E8=BE=BE=E6=96=B9=E5=BC=8F=E4=BC=9A=E9=9A=8F=E6=97=B6=E9=97=B4?=
 =?UTF-8?q?=E6=9B=B4=E5=8A=A0=E7=B2=BE=E5=87=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 changelogs/changelog.md               |   4 +
 src/common/database/database_model.py |   2 +
 src/express/expression_learner.py     | 178 +++++++++++++++++++++++---
 3 files changed, 166 insertions(+), 18 deletions(-)

diff --git a/changelogs/changelog.md b/changelogs/changelog.md
index a1de221a..f8f30857 100644
--- a/changelogs/changelog.md
+++ b/changelogs/changelog.md
@@ -3,8 +3,12 @@
 ## [0.11.3] - 2025-11-17
 ### 功能更改和修复
 - 优化记忆提取策略
+- 优化表达方式学习
 - 修改readme
 
+提示：清理旧的记忆数据和表达方式，表现更好
+方法：删除数据库中 expression jargon 和 thinking_back 的全部内容
+
 ## [0.11.2] - 2025-11-16
 ### 🌟 主要功能更改
 - "海马体Agent"记忆系统上线，最新最好的记忆系统，默认已接入lpmm
diff --git a/src/common/database/database_model.py b/src/common/database/database_model.py
index 3673e6d2..c97c0b72 100644
--- a/src/common/database/database_model.py
+++ b/src/common/database/database_model.py
@@ -311,6 +311,8 @@ class Expression(BaseModel):
     context = TextField(null=True)
     up_content = TextField(null=True)
 
+    content_list = TextField(null=True)
+    count = IntegerField(default=1)
     last_active_time = FloatField()
     chat_id = TextField(index=True)
     create_date = FloatField(null=True)  # 创建日期，允许为空以兼容老数据
diff --git a/src/express/expression_learner.py b/src/express/expression_learner.py
index 72dd831a..2a83e028 100644
--- a/src/express/expression_learner.py
+++ b/src/express/expression_learner.py
@@ -77,6 +77,9 @@ class ExpressionLearner:
         self.express_learn_model: LLMRequest = LLMRequest(
             model_set=model_config.model_task_config.utils, request_type="expression.learner"
         )
+        self.summary_model: LLMRequest = LLMRequest(
+            model_set=model_config.model_task_config.utils_small, request_type="expression.summary"
+        )
         self.embedding_model: LLMRequest = LLMRequest(
             model_set=model_config.model_task_config.embedding, request_type="expression.embedding"
         )
@@ -186,25 +189,13 @@ class ExpressionLearner:
             context,
             up_content,
         ) in learnt_expressions:
-            # 查找是否已存在相似表达方式
-            query = Expression.select().where(
-                (Expression.chat_id == self.chat_id) & (Expression.situation == situation) & (Expression.style == style)
+            await self._upsert_expression_record(
+                situation=situation,
+                style=style,
+                context=context,
+                up_content=up_content,
+                current_time=current_time,
             )
-            if query.exists():
-                # 表达方式完全相同，只更新时间戳
-                expr_obj = query.get()
-                expr_obj.last_active_time = current_time
-                expr_obj.save()
-            else:
-                Expression.create(
-                    situation=situation,
-                    style=style,
-                    last_active_time=current_time,
-                    chat_id=self.chat_id,
-                    create_date=current_time,  # 手动设置创建日期
-                    context=context,
-                    up_content=up_content,
-                )
 
         return learnt_expressions
 
@@ -362,6 +353,10 @@ class ExpressionLearner:
             logger.error(f"学习表达方式失败,模型生成出错: {e}")
             return None
         expressions: List[Tuple[str, str]] = self.parse_expression_response(response)
+        expressions = self._filter_self_reference_styles(expressions)
+        if not expressions:
+            logger.info("过滤后没有可用的表达方式（style 与机器人名称重复）")
+            return None
         # logger.debug(f"学习{type_str}的response: {response}")
 
         # 对表达方式溯源
@@ -433,6 +428,153 @@ class ExpressionLearner:
             expressions.append((situation, style))
         return expressions
 
+    def _filter_self_reference_styles(self, expressions: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
+        """
+        过滤掉style与机器人名称/昵称重复的表达
+        """
+        banned_names = set()
+        bot_nickname = (global_config.bot.nickname or "").strip()
+        if bot_nickname:
+            banned_names.add(bot_nickname)
+
+        alias_names = global_config.bot.alias_names or []
+        for alias in alias_names:
+            alias = alias.strip()
+            if alias:
+                banned_names.add(alias)
+
+        banned_casefold = {name.casefold() for name in banned_names if name}
+
+        filtered: List[Tuple[str, str]] = []
+        removed_count = 0
+        for situation, style in expressions:
+            normalized_style = (style or "").strip()
+            if normalized_style and normalized_style.casefold() not in banned_casefold:
+                filtered.append((situation, style))
+            else:
+                removed_count += 1
+
+        if removed_count:
+            logger.debug(f"已过滤 {removed_count} 条style与机器人名称重复的表达方式")
+
+        return filtered
+
+    async def _upsert_expression_record(
+        self,
+        situation: str,
+        style: str,
+        context: str,
+        up_content: str,
+        current_time: float,
+    ) -> None:
+        expr_obj = (
+            Expression.select()
+            .where((Expression.chat_id == self.chat_id) & (Expression.style == style))
+            .first()
+        )
+
+        if expr_obj:
+            await self._update_existing_expression(
+                expr_obj=expr_obj,
+                situation=situation,
+                context=context,
+                up_content=up_content,
+                current_time=current_time,
+            )
+            return
+
+        await self._create_expression_record(
+            situation=situation,
+            style=style,
+            context=context,
+            up_content=up_content,
+            current_time=current_time,
+        )
+
+    async def _create_expression_record(
+        self,
+        situation: str,
+        style: str,
+        context: str,
+        up_content: str,
+        current_time: float,
+    ) -> None:
+        content_list = [situation]
+        formatted_situation = await self._compose_situation_text(content_list, 1, situation)
+
+        Expression.create(
+            situation=formatted_situation,
+            style=style,
+            content_list=json.dumps(content_list, ensure_ascii=False),
+            count=1,
+            last_active_time=current_time,
+            chat_id=self.chat_id,
+            create_date=current_time,
+            context=context,
+            up_content=up_content,
+        )
+
+    async def _update_existing_expression(
+        self,
+        expr_obj: Expression,
+        situation: str,
+        context: str,
+        up_content: str,
+        current_time: float,
+    ) -> None:
+        content_list = self._parse_content_list(expr_obj.content_list)
+        content_list.append(situation)
+
+        expr_obj.content_list = json.dumps(content_list, ensure_ascii=False)
+        expr_obj.count = (expr_obj.count or 0) + 1
+        expr_obj.last_active_time = current_time
+        expr_obj.context = context
+        expr_obj.up_content = up_content
+
+        new_situation = await self._compose_situation_text(
+            content_list=content_list,
+            count=expr_obj.count,
+            fallback=expr_obj.situation,
+        )
+        expr_obj.situation = new_situation
+
+        expr_obj.save()
+
+    def _parse_content_list(self, stored_list: Optional[str]) -> List[str]:
+        if not stored_list:
+            return []
+        try:
+            data = json.loads(stored_list)
+        except json.JSONDecodeError:
+            return []
+        return [str(item) for item in data if isinstance(item, str)] if isinstance(data, list) else []
+
+    async def _compose_situation_text(self, content_list: List[str], count: int, fallback: str = "") -> str:
+        sanitized = [c.strip() for c in content_list if c.strip()]
+        summary = await self._summarize_situations(sanitized)
+        if summary:
+            return summary
+        return "/".join(sanitized) if sanitized else fallback
+
+    async def _summarize_situations(self, situations: List[str]) -> Optional[str]:
+        if not situations:
+            return None
+
+        prompt = (
+            "请阅读以下多个聊天情境描述，并将它们概括成一句简短的话，"
+            "长度不超过20个字，保留共同特点：\n"
+            f"{chr(10).join(f'- {s}' for s in situations[-10:])}\n只输出概括内容。"
+        )
+
+        try:
+            summary, _ = await self.summary_model.generate_response_async(prompt, temperature=0.2)
+            summary = summary.strip()
+            if summary:
+                return summary
+        except Exception as e:
+            logger.error(f"概括表达情境失败: {e}")
+        return None
+
     def _build_bare_lines(self, messages: List) -> List[Tuple[int, str]]:
         """
         为每条消息构建精简文本列表，保留到原消息索引的映射