feat：黑话更高的提取率;增加提取准确性

黑话解释现在独立运行，拥有更高的提取率增加提取准确性
2025-11-25 19:19:52 +08:00 · 2025-11-25 19:19:52 +08:00 · 644d470558
parent 39ab2b5fab
commit 644d470558
8 changed files with 609 additions and 340 deletions
--- a/src/chat/replyer/group_generator.py
+++ b/src/chat/replyer/group_generator.py
@ -36,6 +36,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
 from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
 from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
 from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
 from src.jargon.jargon_explainer import explain_jargon_in_context
 init_lpmm_prompt()
 init_replyer_prompt()
@ -786,7 +787,7 @@ class DefaultReplyer:
            show_actions=True,
        )
-        # 并行执行七个构建任务
+        # 并行执行八个构建任务（包括黑话解释）
        task_results = await asyncio.gather(
            self._time_and_run_task(
                self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
@ -804,6 +805,10 @@ class DefaultReplyer:
                ),
                "memory_retrieval",
            ),
            self._time_and_run_task(
                explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short),
                "jargon_explanation",
            ),
        )
        # 任务名称中英文映射
@ -816,6 +821,7 @@ class DefaultReplyer:
            "personality_prompt": "人格信息",
            "mood_state_prompt": "情绪状态",
            "memory_retrieval": "记忆检索",
            "jargon_explanation": "黑话解释",
        }
        # 处理结果
@ -846,6 +852,7 @@ class DefaultReplyer:
        memory_retrieval: str = results_dict["memory_retrieval"]
        keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
        mood_state_prompt: str = results_dict["mood_state_prompt"]
        jargon_explanation: Optional[str] = results_dict.get("jargon_explanation")
        # 从 chosen_actions 中提取 planner 的整体思考理由
        planner_reasoning = ""
@ -896,6 +903,7 @@ class DefaultReplyer:
            mood_state=mood_state_prompt,
            # relation_info_block=relation_info,
            extra_info_block=extra_info_block,
            jargon_explanation=jargon_explanation,
            identity=personality_prompt,
            action_descriptions=actions_info,
            sender_name=sender,
--- a/src/chat/replyer/private_generator.py
+++ b/src/chat/replyer/private_generator.py
@ -37,6 +37,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
 from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
 from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
 from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
 from src.jargon.jargon_explainer import explain_jargon_in_context
 init_lpmm_prompt()
 init_replyer_prompt()
@ -706,7 +707,7 @@ class PrivateReplyer:
            show_actions=True,
        )
-        # 并行执行八个构建任务
+        # 并行执行九个构建任务（包括黑话解释）
        task_results = await asyncio.gather(
            self._time_and_run_task(
                self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
@ -725,6 +726,10 @@ class PrivateReplyer:
                ),
                "memory_retrieval",
            ),
            self._time_and_run_task(
                explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short),
                "jargon_explanation",
            ),
        )
        # 任务名称中英文映射
@ -737,6 +742,7 @@ class PrivateReplyer:
            "personality_prompt": "人格信息",
            "mood_state_prompt": "情绪状态",
            "memory_retrieval": "记忆检索",
            "jargon_explanation": "黑话解释",
        }
        # 处理结果
@ -767,6 +773,7 @@ class PrivateReplyer:
        mood_state_prompt: str = results_dict["mood_state_prompt"]
        memory_retrieval: str = results_dict["memory_retrieval"]
        keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
        jargon_explanation: Optional[str] = results_dict.get("jargon_explanation")
        # 从 chosen_actions 中提取 planner 的整体思考理由
        planner_reasoning = ""
@ -813,6 +820,7 @@ class PrivateReplyer:
                identity=personality_prompt,
                action_descriptions=actions_info,
                dialogue_prompt=dialogue_prompt,
                jargon_explanation=jargon_explanation,
                time_block=time_block,
                target=target,
                reason=reply_reason,
@ -835,6 +843,7 @@ class PrivateReplyer:
                identity=personality_prompt,
                action_descriptions=actions_info,
                dialogue_prompt=dialogue_prompt,
                jargon_explanation=jargon_explanation,
                time_block=time_block,
                reply_target_block=reply_target_block,
                reply_style=global_config.personality.reply_style,
--- a/src/chat/replyer/prompt/replyer_prompt.py
+++ b/src/chat/replyer/prompt/replyer_prompt.py
@ -8,7 +8,7 @@ def init_replyer_prompt():
    Prompt(
        """{knowledge_prompt}{tool_info_block}{extra_info_block}
-{expression_habits_block}{memory_retrieval}
+{expression_habits_block}{memory_retrieval}{jargon_explanation}
 你正在qq群里聊天，下面是群里正在聊的内容，其中包含聊天记录和聊天中的图片
 其中标注 {bot_name}(你) 的发言是你自己的发言，请注意区分:
@ -29,7 +29,7 @@ def init_replyer_prompt():
    Prompt(
        """{knowledge_prompt}{tool_info_block}{extra_info_block}
-{expression_habits_block}{memory_retrieval}
+{expression_habits_block}{memory_retrieval}{jargon_explanation}
 你正在和{sender_name}聊天，这是你们之前聊的内容:
 {time_block}
@ -48,7 +48,7 @@ def init_replyer_prompt():
    Prompt(
        """{knowledge_prompt}{tool_info_block}{extra_info_block}
-{expression_habits_block}{memory_retrieval}
+{expression_habits_block}{memory_retrieval}{jargon_explanation}
 你正在和{sender_name}聊天，这是你们之前聊的内容:
 {time_block}
--- a/src/jargon/jargon_explainer.py
+++ b/src/jargon/jargon_explainer.py
@ -0,0 +1,261 @@
 import re
 import time
 from typing import List, Dict, Optional, Any
 from src.common.logger import get_logger
 from src.common.database.database_model import Jargon
 from src.llm_models.utils_model import LLMRequest
 from src.config.config import model_config, global_config
 from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
 from src.jargon.jargon_miner import search_jargon
 from src.jargon.jargon_utils import is_bot_message, contains_bot_self_name, parse_chat_id_list, chat_id_list_contains
 logger = get_logger("jargon")
 def _init_explainer_prompts() -> None:
    """初始化黑话解释器相关的prompt"""
    # Prompt：概括黑话解释结果
    summarize_prompt_str = """
 **上下文聊天内容**
 {chat_context}
 **提取到的黑话及其含义**
 {jargon_explanations}
 请根据上述信息，对黑话解释进行概括和整理。
 - 如果上下文中有黑话出现，请简要说明这些黑话在上下文中的使用情况
 - 将黑话解释整理成简洁、易读的格式
 - 如果某个黑话在上下文中没有出现，可以省略
 - 输出格式要自然，适合作为回复参考信息
 请输出概括后的黑话解释（直接输出文本，不要使用JSON格式）：
 """
    Prompt(summarize_prompt_str, "jargon_explainer_summarize_prompt")
 _init_explainer_prompts()
 class JargonExplainer:
    """黑话解释器，用于在回复前识别和解释上下文中的黑话"""
    def __init__(self, chat_id: str) -> None:
        self.chat_id = chat_id
        self.llm = LLMRequest(
            model_set=model_config.model_task_config.utils,
            request_type="jargon.explain",
        )
    def match_jargon_from_messages(
        self, messages: List[Any]
    ) -> List[Dict[str, str]]:
        """
        通过直接匹配数据库中的jargon字符串来提取黑话
        Args:
            messages: 消息列表
        Returns:
            List[Dict[str, str]]: 提取到的黑话列表，每个元素包含content
        """
        start_time = time.time()
        if not messages:
            return []
        # 收集所有消息的文本内容
        message_texts: List[str] = []
        for msg in messages:
            # 跳过机器人自己的消息
            if is_bot_message(msg):
                continue
            msg_text = (getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "").strip()
            if msg_text:
                message_texts.append(msg_text)
        if not message_texts:
            return []
        # 合并所有消息文本
        combined_text = " ".join(message_texts)
        # 查询所有有meaning的jargon记录
        query = Jargon.select().where(
            (Jargon.meaning.is_null(False)) & (Jargon.meaning != "")
        )
        # 根据all_global配置决定查询逻辑
        if global_config.jargon.all_global:
            # 开启all_global：只查询is_global=True的记录
            query = query.where(Jargon.is_global)
        else:
            # 关闭all_global：查询is_global=True或chat_id列表包含当前chat_id的记录
            # 这里先查询所有，然后在Python层面过滤
            pass
        # 按count降序排序，优先匹配出现频率高的
        query = query.order_by(Jargon.count.desc())
        # 执行查询并匹配
        matched_jargon: Dict[str, Dict[str, str]] = {}
        query_time = time.time()
        for jargon in query:
            content = jargon.content or ""
            if not content or not content.strip():
                continue
            # 跳过包含机器人昵称的词条
            if contains_bot_self_name(content):
                continue
            # 检查chat_id（如果all_global=False）
            if not global_config.jargon.all_global:
                if jargon.is_global:
                    # 全局黑话，包含
                    pass
                else:
                    # 检查chat_id列表是否包含当前chat_id
                    chat_id_list = parse_chat_id_list(jargon.chat_id)
                    if not chat_id_list_contains(chat_id_list, self.chat_id):
                        continue
            # 在文本中查找匹配（大小写不敏感）
            pattern = re.escape(content)
            # 使用单词边界或中文字符边界来匹配，避免部分匹配
            # 对于中文，使用Unicode字符类；对于英文，使用单词边界
            if re.search(r'[\u4e00-\u9fff]', content):
                # 包含中文，使用更宽松的匹配
                search_pattern = pattern
            else:
                # 纯英文/数字，使用单词边界
                search_pattern = r'\b' + pattern + r'\b'
            if re.search(search_pattern, combined_text, re.IGNORECASE):
                # 找到匹配，记录（去重）
                if content not in matched_jargon:
                    matched_jargon[content] = {"content": content}
        match_time = time.time()
        total_time = match_time - start_time
        query_duration = query_time - start_time
        match_duration = match_time - query_time
        logger.info(
            f"黑话匹配完成: 查询耗时 {query_duration:.3f}s, 匹配耗时 {match_duration:.3f}s, "
            f"总耗时 {total_time:.3f}s, 匹配到 {len(matched_jargon)} 个黑话"
        )
        return list(matched_jargon.values())
    async def explain_jargon(
        self, messages: List[Any], chat_context: str
    ) -> Optional[str]:
        """
        解释上下文中的黑话
        Args:
            messages: 消息列表
            chat_context: 聊天上下文的文本表示
        Returns:
            Optional[str]: 黑话解释的概括文本，如果没有黑话则返回None
        """
        if not messages:
            return None
        # 直接匹配方式：从数据库中查询jargon并在消息中匹配
        jargon_entries = self.match_jargon_from_messages(messages)
        if not jargon_entries:
            return None
        # 去重（按content）
        unique_jargon: Dict[str, Dict[str, str]] = {}
        for entry in jargon_entries:
            content = entry["content"]
            if content not in unique_jargon:
                unique_jargon[content] = entry
        jargon_list = list(unique_jargon.values())
        logger.info(f"从上下文中提取到 {len(jargon_list)} 个黑话: {[j['content'] for j in jargon_list]}")
        # 查询每个黑话的含义
        jargon_explanations: List[str] = []
        for entry in jargon_list:
            content = entry["content"]
            # 根据是否开启全局黑话，决定查询方式
            if global_config.jargon.all_global:
                # 开启全局黑话：查询所有is_global=True的记录
                results = search_jargon(
                    keyword=content,
                    chat_id=None,  # 不指定chat_id，查询全局黑话
                    limit=1,
                    case_sensitive=False,
                    fuzzy=False,  # 精确匹配
                )
            else:
                # 关闭全局黑话：优先查询当前聊天或全局的黑话
                results = search_jargon(
                    keyword=content,
                    chat_id=self.chat_id,
                    limit=1,
                    case_sensitive=False,
                    fuzzy=False,  # 精确匹配
                )
            if results and len(results) > 0:
                meaning = results[0].get("meaning", "").strip()
                if meaning:
                    jargon_explanations.append(f"- {content}: {meaning}")
                else:
                    logger.info(f"黑话 {content} 没有找到含义")
            else:
                logger.info(f"黑话 {content} 未在数据库中找到")
        if not jargon_explanations:
            logger.info("没有找到任何黑话的含义，跳过解释")
            return None
        # 拼接所有黑话解释
        explanations_text = "\n".join(jargon_explanations)
        # 使用LLM概括黑话解释
        summarize_prompt = await global_prompt_manager.format_prompt(
            "jargon_explainer_summarize_prompt",
            chat_context=chat_context,
            jargon_explanations=explanations_text,
        )
        summary, _ = await self.llm.generate_response_async(summarize_prompt, temperature=0.3)
        if not summary:
            # 如果LLM概括失败，直接返回原始解释
            return f"上下文中的黑话解释：\n{explanations_text}"
        summary = summary.strip()
        if not summary:
            return f"上下文中的黑话解释：\n{explanations_text}"
        return summary
 async def explain_jargon_in_context(
    chat_id: str, messages: List[Any], chat_context: str
 ) -> Optional[str]:
    """
    解释上下文中的黑话（便捷函数）
    Args:
        chat_id: 聊天ID
        messages: 消息列表
        chat_context: 聊天上下文的文本表示
    Returns:
        Optional[str]: 黑话解释的概括文本，如果没有黑话则返回None
    """
    explainer = JargonExplainer(chat_id)
    return await explainer.explain_jargon(messages, chat_context)
--- a/src/jargon/jargon_miner.py
+++ b/src/jargon/jargon_miner.py
@ -11,127 +11,24 @@ from src.common.database.database_model import Jargon
 from src.llm_models.utils_model import LLMRequest
 from src.config.config import model_config, global_config
 from src.chat.message_receive.chat_stream import get_chat_manager
 from src.plugin_system.apis import llm_api
 from src.chat.utils.chat_message_builder import (
    build_readable_messages,
    build_readable_messages_with_id,
    get_raw_msg_by_timestamp_with_chat_inclusive,
    get_raw_msg_before_timestamp_with_chat,
    build_readable_messages_with_list,
 )
 from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
-from src.chat.utils.utils import parse_platform_accounts
+from src.jargon.jargon_utils import (
    is_bot_message, 
    build_context_paragraph, 
    contains_bot_self_name, 
    parse_chat_id_list, 
    chat_id_list_contains,
    update_chat_id_list
 )
 logger = get_logger("jargon")
 def _contains_bot_self_name(content: str) -> bool:
    """
    判断词条是否包含机器人的昵称或别名
    """
    if not content:
        return False
    bot_config = getattr(global_config, "bot", None)
    if not bot_config:
        return False
    target = content.strip().lower()
    nickname = str(getattr(bot_config, "nickname", "") or "").strip().lower()
    alias_names = [str(alias or "").strip().lower() for alias in getattr(bot_config, "alias_names", []) or []]
    candidates = [name for name in [nickname, *alias_names] if name]
    return any(name in target for name in candidates if target)
 def _build_context_paragraph(messages: List[Any], center_index: int) -> Optional[str]:
    """
    构建包含中心消息上下文的段落（前3条+后3条），使用标准的 readable builder 输出
    """
    if not messages or center_index < 0 or center_index >= len(messages):
        return None
    context_start = max(0, center_index - 3)
    context_end = min(len(messages), center_index + 1 + 3)
    context_messages = messages[context_start:context_end]
    if not context_messages:
        return None
    try:
        paragraph = build_readable_messages(
            messages=context_messages,
            replace_bot_name=True,
            timestamp_mode="relative",
            read_mark=0.0,
            truncate=False,
            show_actions=False,
            show_pic=True,
            message_id_list=None,
            remove_emoji_stickers=False,
            pic_single=True,
        )
    except Exception as e:
        logger.warning(f"构建上下文段落失败: {e}")
        return None
    paragraph = paragraph.strip()
    return paragraph or None
 def _is_bot_message(msg: Any) -> bool:
    """判断消息是否来自机器人自身"""
    if msg is None:
        return False
    bot_config = getattr(global_config, "bot", None)
    if not bot_config:
        return False
    platform = (
        str(getattr(msg, "user_platform", "") or getattr(getattr(msg, "user_info", None), "platform", "") or "")
        .strip()
        .lower()
    )
    user_id = (
        str(getattr(msg, "user_id", "") or getattr(getattr(msg, "user_info", None), "user_id", "") or "")
        .strip()
    )
    if not platform or not user_id:
        return False
    platform_accounts = {}
    try:
        platform_accounts = parse_platform_accounts(getattr(bot_config, "platforms", []) or [])
    except Exception:
        platform_accounts = {}
    bot_accounts: Dict[str, str] = {}
    qq_account = str(getattr(bot_config, "qq_account", "") or "").strip()
    if qq_account:
        bot_accounts["qq"] = qq_account
    telegram_account = str(getattr(bot_config, "telegram_account", "") or "").strip()
    if telegram_account:
        bot_accounts["telegram"] = telegram_account
    for plat, account in platform_accounts.items():
        if account and plat not in bot_accounts:
            bot_accounts[plat] = account
    bot_account = bot_accounts.get(platform)
    return bool(bot_account and user_id == bot_account)
 def _has_adjacent_bot_message(messages: List[Any], center_index: int) -> bool:
    """检查目标消息的上一条或下一条是否为机器人发言"""
    for neighbor in (center_index - 1, center_index + 1):
        if 0 <= neighbor < len(messages) and _is_bot_message(messages[neighbor]):
            return True
    return False
 def _init_prompt() -> None:
@ -176,6 +73,7 @@ def _init_inference_prompts() -> None:
 请根据上下文，推断"{content}"这个词条的含义。
 - 如果这是一个黑话、俚语或网络用语，请推断其含义
 - 如果含义明确（常规词汇），也请说明
 - {bot_name} 的发言内容可能包含错误，请不要参考其发言内容
 - 如果上下文信息不足，无法推断含义，请设置 no_info 为 true
 以 JSON 格式输出：
@ -228,94 +126,6 @@ _init_prompt()
 _init_inference_prompts()
 async def _enrich_raw_content_if_needed(
    content: str,
    raw_content_list: List[str],
    chat_id: str,
    messages: List[Any],
    extraction_start_time: float,
    extraction_end_time: float,
 ) -> List[str]:
    """
    检查raw_content是否只包含黑话本身，如果是，则获取该消息的前三条消息作为原始内容
    Args:
        content: 黑话内容
        raw_content_list: 原始raw_content列表
        chat_id: 聊天ID
        messages: 当前时间窗口内的消息列表
        extraction_start_time: 提取开始时间
        extraction_end_time: 提取结束时间
    Returns:
        处理后的raw_content列表
    """
    enriched_list = []
    for raw_content in raw_content_list:
        # 检查raw_content是否只包含黑话本身（去除空白字符后比较）
        raw_content_clean = raw_content.strip()
        content_clean = content.strip()
        # 如果raw_content只包含黑话本身（可能有一些标点或空白），则尝试获取上下文
        # 去除所有空白字符后比较，确保只包含黑话本身
        raw_content_normalized = raw_content_clean.replace(" ", "").replace("\n", "").replace("\t", "")
        content_normalized = content_clean.replace(" ", "").replace("\n", "").replace("\t", "")
        if raw_content_normalized == content_normalized:
            # 在消息列表中查找只包含该黑话的消息（去除空白后比较）
            target_message = None
            for msg in messages:
                msg_content = (msg.processed_plain_text or msg.display_message or "").strip()
                msg_content_normalized = msg_content.replace(" ", "").replace("\n", "").replace("\t", "")
                # 检查消息内容是否只包含黑话本身（去除空白后完全匹配）
                if msg_content_normalized == content_normalized:
                    target_message = msg
                    break
            if target_message and target_message.time:
                # 获取该消息的前三条消息
                try:
                    previous_messages = get_raw_msg_before_timestamp_with_chat(
                        chat_id=chat_id, timestamp=target_message.time, limit=3
                    )
                    if previous_messages:
                        # 将前三条消息和当前消息一起格式化
                        context_messages = previous_messages + [target_message]
                        # 按时间排序
                        context_messages.sort(key=lambda x: x.time or 0)
                        # 格式化为可读消息
                        formatted_context, _ = await build_readable_messages_with_list(
                            context_messages,
                            replace_bot_name=True,
                            timestamp_mode="relative",
                            truncate=False,
                        )
                        if formatted_context.strip():
                            enriched_list.append(formatted_context.strip())
                            logger.warning(f"为黑话 {content} 补充了上下文消息")
                        else:
                            # 如果格式化失败，使用原始raw_content
                            enriched_list.append(raw_content)
                    else:
                        # 没有找到前三条消息，使用原始raw_content
                        enriched_list.append(raw_content)
                except Exception as e:
                    logger.warning(f"获取黑话 {content} 的上下文消息失败: {e}")
                    # 出错时使用原始raw_content
                    enriched_list.append(raw_content)
            else:
                # 没有找到包含黑话的消息，使用原始raw_content
                enriched_list.append(raw_content)
        else:
            # raw_content包含更多内容，直接使用
            enriched_list.append(raw_content)
    return enriched_list
 def _should_infer_meaning(jargon_obj: Jargon) -> bool:
    """
@ -402,7 +212,7 @@ class JargonMiner:
        for idx, msg in enumerate(messages):
            msg_text = (getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "").strip()
-            if not msg_text or _is_bot_message(msg):
+            if not msg_text or is_bot_message(msg):
                continue
            for content in self.cache.keys():
@ -411,9 +221,7 @@ class JargonMiner:
                if (content, idx) in processed_pairs:
                    continue
                if content in msg_text:
-                    if _has_adjacent_bot_message(messages, idx):
+                    paragraph = build_context_paragraph(messages, idx)
                        continue
                    paragraph = _build_context_paragraph(messages, idx)
                    if not paragraph:
                        continue
                    cached_entries.append({"content": content, "raw_content": [paragraph]})
@ -719,7 +527,7 @@ class JargonMiner:
                    if not content:
                        continue
-                    if _contains_bot_self_name(content):
+                    if contains_bot_self_name(content):
                        logger.info(f"解析阶段跳过包含机器人昵称/别名的词条: {content}")
                        continue
@ -734,16 +542,11 @@ class JargonMiner:
                        continue
                    target_msg = messages[msg_index]
-                    if _is_bot_message(target_msg):
+                    if is_bot_message(target_msg):
                        logger.info(f"解析阶段跳过引用机器人自身消息的词条: content={content}, msg_id={msg_id_str}")
                        continue
                    if _has_adjacent_bot_message(messages, msg_index):
                        logger.info(
                            f"解析阶段跳过因邻近机器人发言的词条: content={content}, msg_id={msg_id_str}"
                        )
                        continue
-                    context_paragraph = _build_context_paragraph(messages, msg_index)
+                    context_paragraph = build_context_paragraph(messages, msg_index)
                    if not context_paragraph:
                        logger.warning(f"解析jargon失败：上下文为空，content={content}, msg_id={msg_id_str}")
                        continue
@ -785,27 +588,27 @@ class JargonMiner:
                content = entry["content"]
                raw_content_list = entry["raw_content"]  # 已经是列表
                # 检查并补充raw_content：如果只包含黑话本身，则获取前三条消息作为上下文
                # raw_content_list = await _enrich_raw_content_if_needed(
                #     content=content,
                #     raw_content_list=raw_content_list,
                #     chat_id=self.chat_id,
                #     messages=messages,
                #     extraction_start_time=extraction_start_time,
                #     extraction_end_time=extraction_end_time,
                # )
                try:
-                    # 根据all_global配置决定查询逻辑
+                    # 查询所有content匹配的记录
-                    if global_config.jargon.all_global:
+                    query = Jargon.select().where(Jargon.content == content)
                        # 开启all_global：无视chat_id，查询所有content匹配的记录（所有记录都是全局的）
                        query = Jargon.select().where(Jargon.content == content)
                    else:
                        # 关闭all_global：只查询chat_id匹配的记录（不考虑is_global）
                        query = Jargon.select().where((Jargon.chat_id == self.chat_id) & (Jargon.content == content))
-                    if query.exists():
+                    # 查找匹配的记录
-                        obj = query.get()
+                    matched_obj = None
                    for obj in query:
                        if global_config.jargon.all_global:
                            # 开启all_global：所有content匹配的记录都可以
                            matched_obj = obj
                            break
                        else:
                            # 关闭all_global：需要检查chat_id列表是否包含目标chat_id
                            chat_id_list = parse_chat_id_list(obj.chat_id)
                            if chat_id_list_contains(chat_id_list, self.chat_id):
                                matched_obj = obj
                                break
                    if matched_obj:
                        obj = matched_obj
                        try:
                            obj.count = (obj.count or 0) + 1
                        except Exception:
@ -827,6 +630,11 @@ class JargonMiner:
                        merged_list = list(dict.fromkeys(existing_raw_content + raw_content_list))
                        obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
                        # 更新chat_id列表：增加当前chat_id的计数
                        chat_id_list = parse_chat_id_list(obj.chat_id)
                        updated_chat_id_list = update_chat_id_list(chat_id_list, self.chat_id, increment=1)
                        obj.chat_id = json.dumps(updated_chat_id_list, ensure_ascii=False)
                        # 开启all_global时，确保记录标记为is_global=True
                        if global_config.jargon.all_global:
                            obj.is_global = True
@ -851,10 +659,14 @@ class JargonMiner:
                            # 关闭all_global：新记录is_global=False
                            is_global_new = False
                        # 使用新格式创建chat_id列表：[[chat_id, count]]
                        chat_id_list = [[self.chat_id, 1]]
                        chat_id_json = json.dumps(chat_id_list, ensure_ascii=False)
                        Jargon.create(
                            content=content,
                            raw_content=json.dumps(raw_content_list, ensure_ascii=False),
-                            chat_id=self.chat_id,
+                            chat_id=chat_id_json,
                            is_global=is_global_new,
                            count=1,
                        )
@ -924,8 +736,8 @@ def search_jargon(
    keyword = keyword.strip()
-    # 构建查询
+    # 构建查询（选择所有需要的字段，以便后续过滤）
-    query = Jargon.select(Jargon.content, Jargon.meaning)
+    query = Jargon.select()
    # 构建搜索条件
    if case_sensitive:
@ -951,102 +763,34 @@ def search_jargon(
    if global_config.jargon.all_global:
        # 开启all_global：所有记录都是全局的，查询所有is_global=True的记录（无视chat_id）
        query = query.where(Jargon.is_global)
-    else:
+    # 注意：对于all_global=False的情况，chat_id过滤在Python层面进行，以便兼容新旧格式
        # 关闭all_global：如果提供了chat_id，优先搜索该聊天或global的jargon
        if chat_id:
            query = query.where((Jargon.chat_id == chat_id) | Jargon.is_global)
-    # 只返回有meaning的记录
+    # 注意：meaning的过滤移到Python层面，因为我们需要先过滤chat_id
    query = query.where((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
    # 按count降序排序，优先返回出现频率高的
    query = query.order_by(Jargon.count.desc())
-    # 限制结果数量
+    # 限制结果数量（先多取一些，因为后面可能过滤）
-    query = query.limit(limit)
+    query = query.limit(limit * 2)
-    # 执行查询并返回结果
+    # 执行查询并返回结果，过滤chat_id
    results = []
    for jargon in query:
        # 如果提供了chat_id且all_global=False，需要检查chat_id列表是否包含目标chat_id
        if chat_id and not global_config.jargon.all_global:
            chat_id_list = parse_chat_id_list(jargon.chat_id)
            # 如果记录是is_global=True，或者chat_id列表包含目标chat_id，则包含
            if not jargon.is_global and not chat_id_list_contains(chat_id_list, chat_id):
                continue
        # 只返回有meaning的记录
        if not jargon.meaning or jargon.meaning.strip() == "":
            continue
        results.append({"content": jargon.content or "", "meaning": jargon.meaning or ""})
        # 达到限制数量后停止
        if len(results) >= limit:
            break
-    return results
+    return results
 async def store_jargon_from_answer(jargon_keyword: str, answer: str, chat_id: str) -> None:
    """将黑话存入jargon系统
    Args:
        jargon_keyword: 黑话关键词
        answer: 答案内容（将概括为raw_content）
        chat_id: 聊天ID
    """
    try:
        # 概括答案为简短的raw_content
        summary_prompt = f"""请将以下答案概括为一句简短的话（不超过50字），作为黑话"{jargon_keyword}"的使用示例：
 答案：{answer}
 只输出概括后的内容，不要输出其他内容："""
        success, summary, _, _ = await llm_api.generate_with_model(
            summary_prompt,
            model_config=model_config.model_task_config.utils_small,
            request_type="memory.summarize_jargon",
        )
        logger.info(f"概括答案提示: {summary_prompt}")
        logger.info(f"概括答案: {summary}")
        if not success:
            logger.warning(f"概括答案失败，使用原始答案: {summary}")
            summary = answer[:100]  # 截取前100字符作为备用
        raw_content = summary.strip()[:200]  # 限制长度
        # 检查是否已存在
        if global_config.jargon.all_global:
            query = Jargon.select().where(Jargon.content == jargon_keyword)
        else:
            query = Jargon.select().where((Jargon.chat_id == chat_id) & (Jargon.content == jargon_keyword))
        if query.exists():
            # 更新现有记录
            obj = query.get()
            obj.count = (obj.count or 0) + 1
            # 合并raw_content列表
            existing_raw_content = []
            if obj.raw_content:
                try:
                    existing_raw_content = (
                        json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content
                    )
                    if not isinstance(existing_raw_content, list):
                        existing_raw_content = [existing_raw_content] if existing_raw_content else []
                except (json.JSONDecodeError, TypeError):
                    existing_raw_content = [obj.raw_content] if obj.raw_content else []
            # 合并并去重
            merged_list = list(dict.fromkeys(existing_raw_content + [raw_content]))
            obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
            if global_config.jargon.all_global:
                obj.is_global = True
            obj.save()
            logger.info(f"更新jargon记录: {jargon_keyword}")
        else:
            # 创建新记录
            is_global_new = True if global_config.jargon.all_global else False
            Jargon.create(
                content=jargon_keyword,
                raw_content=json.dumps([raw_content], ensure_ascii=False),
                chat_id=chat_id,
                is_global=is_global_new,
                count=1,
            )
            logger.info(f"创建新jargon记录: {jargon_keyword}")
    except Exception as e:
        logger.error(f"存储jargon失败: {e}")
--- a/src/jargon/jargon_utils.py
+++ b/src/jargon/jargon_utils.py
@ -0,0 +1,199 @@
 import json
 from typing import List, Dict, Optional, Any
 from src.common.logger import get_logger
 from src.common.database.database_model import Jargon
 from src.config.config import global_config
 from src.chat.utils.chat_message_builder import (
    build_readable_messages,
    build_readable_messages_with_id,
 )
 from src.chat.utils.utils import parse_platform_accounts
 logger = get_logger("jargon")
 def parse_chat_id_list(chat_id_value: Any) -> List[List[Any]]:
    """
    解析chat_id字段，兼容旧格式（字符串）和新格式（JSON列表）
    Args:
        chat_id_value: 可能是字符串（旧格式）或JSON字符串（新格式）
    Returns:
        List[List[Any]]: 格式为 [[chat_id, count], ...] 的列表
    """
    if not chat_id_value:
        return []
    # 如果是字符串，尝试解析为JSON
    if isinstance(chat_id_value, str):
        # 尝试解析JSON
        try:
            parsed = json.loads(chat_id_value)
            if isinstance(parsed, list):
                # 新格式：已经是列表
                return parsed
            elif isinstance(parsed, str):
                # 解析后还是字符串，说明是旧格式
                return [[parsed, 1]]
            else:
                # 其他类型，当作旧格式处理
                return [[str(chat_id_value), 1]]
        except (json.JSONDecodeError, TypeError):
            # 解析失败，当作旧格式（纯字符串）
            return [[str(chat_id_value), 1]]
    elif isinstance(chat_id_value, list):
        # 已经是列表格式
        return chat_id_value
    else:
        # 其他类型，转换为旧格式
        return [[str(chat_id_value), 1]]
 def update_chat_id_list(chat_id_list: List[List[Any]], target_chat_id: str, increment: int = 1) -> List[List[Any]]:
    """
    更新chat_id列表，如果target_chat_id已存在则增加计数，否则添加新条目
    Args:
        chat_id_list: 当前的chat_id列表，格式为 [[chat_id, count], ...]
        target_chat_id: 要更新或添加的chat_id
        increment: 增加的计数，默认为1
    Returns:
        List[List[Any]]: 更新后的chat_id列表
    """
    # 查找是否已存在该chat_id
    found = False
    for item in chat_id_list:
        if isinstance(item, list) and len(item) >= 1 and str(item[0]) == str(target_chat_id):
            # 找到匹配的chat_id，增加计数
            if len(item) >= 2:
                item[1] = (item[1] if isinstance(item[1], (int, float)) else 0) + increment
            else:
                item.append(increment)
            found = True
            break
    if not found:
        # 未找到，添加新条目
        chat_id_list.append([target_chat_id, increment])
    return chat_id_list
 def chat_id_list_contains(chat_id_list: List[List[Any]], target_chat_id: str) -> bool:
    """
    检查chat_id列表中是否包含指定的chat_id
    Args:
        chat_id_list: chat_id列表，格式为 [[chat_id, count], ...]
        target_chat_id: 要查找的chat_id
    Returns:
        bool: 如果包含则返回True
    """
    for item in chat_id_list:
        if isinstance(item, list) and len(item) >= 1 and str(item[0]) == str(target_chat_id):
            return True
    return False
 def contains_bot_self_name(content: str) -> bool:
    """
    判断词条是否包含机器人的昵称或别名
    """
    if not content:
        return False
    bot_config = getattr(global_config, "bot", None)
    if not bot_config:
        return False
    target = content.strip().lower()
    nickname = str(getattr(bot_config, "nickname", "") or "").strip().lower()
    alias_names = [str(alias or "").strip().lower() for alias in getattr(bot_config, "alias_names", []) or []]
    candidates = [name for name in [nickname, *alias_names] if name]
    return any(name in target for name in candidates if target)
 def build_context_paragraph(messages: List[Any], center_index: int) -> Optional[str]:
    """
    构建包含中心消息上下文的段落（前3条+后3条），使用标准的 readable builder 输出
    """
    if not messages or center_index < 0 or center_index >= len(messages):
        return None
    context_start = max(0, center_index - 3)
    context_end = min(len(messages), center_index + 1 + 3)
    context_messages = messages[context_start:context_end]
    if not context_messages:
        return None
    try:
        paragraph = build_readable_messages(
            messages=context_messages,
            replace_bot_name=True,
            timestamp_mode="relative",
            read_mark=0.0,
            truncate=False,
            show_actions=False,
            show_pic=True,
            message_id_list=None,
            remove_emoji_stickers=False,
            pic_single=True,
        )
    except Exception as e:
        logger.warning(f"构建上下文段落失败: {e}")
        return None
    paragraph = paragraph.strip()
    return paragraph or None
 def is_bot_message(msg: Any) -> bool:
    """判断消息是否来自机器人自身"""
    if msg is None:
        return False
    bot_config = getattr(global_config, "bot", None)
    if not bot_config:
        return False
    platform = (
        str(getattr(msg, "user_platform", "") or getattr(getattr(msg, "user_info", None), "platform", "") or "")
        .strip()
        .lower()
    )
    user_id = (
        str(getattr(msg, "user_id", "") or getattr(getattr(msg, "user_info", None), "user_id", "") or "")
        .strip()
    )
    if not platform or not user_id:
        return False
    platform_accounts = {}
    try:
        platform_accounts = parse_platform_accounts(getattr(bot_config, "platforms", []) or [])
    except Exception:
        platform_accounts = {}
    bot_accounts: Dict[str, str] = {}
    qq_account = str(getattr(bot_config, "qq_account", "") or "").strip()
    if qq_account:
        bot_accounts["qq"] = qq_account
    telegram_account = str(getattr(bot_config, "telegram_account", "") or "").strip()
    if telegram_account:
        bot_accounts["telegram"] = telegram_account
    for plat, account in platform_accounts.items():
        if account and plat not in bot_accounts:
            bot_accounts[plat] = account
    bot_account = bot_accounts.get(platform)
    return bool(bot_account and user_id == bot_account)
--- a/src/memory_system/memory_retrieval.py
+++ b/src/memory_system/memory_retrieval.py
@ -8,11 +8,12 @@ from src.common.logger import get_logger
 from src.config.config import global_config, model_config
 from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
 from src.plugin_system.apis import llm_api
-from src.common.database.database_model import ThinkingBack
+from src.common.database.database_model import ThinkingBack, Jargon
 from json_repair import repair_json
 from src.memory_system.retrieval_tools import get_tool_registry, init_all_tools
 from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
 from src.llm_models.payload_content.message import MessageBuilder, RoleType, Message
 from src.jargon.jargon_utils import parse_chat_id_list, chat_id_list_contains, contains_bot_self_name
 logger = get_logger("memory_retrieval")
@ -63,27 +64,23 @@ def init_memory_retrieval_prompt():
 2. 是否有需要回忆的内容（比如"之前说过"、"上次"、"以前"等）
 3. 是否有需要查找历史信息的问题
 4. 是否有问题可以搜集信息帮助你聊天
 5. 对话中是否包含黑话、俚语、缩写等可能需要查询的概念
 重要提示：
 - **每次只能提出一个问题**，选择最需要查询的关键问题
 - 如果"最近已查询的问题和结果"中已经包含了类似的问题并得到了答案，请避免重复生成相同或相似的问题，不需要重复查询
 - 如果之前已经查询过某个问题但未找到答案，可以尝试用不同的方式提问或更具体的问题
-如果你认为需要从记忆中检索信息来回答，请：
+如果你认为需要从记忆中检索信息来回答，请根据上下文提出**一个**最关键的问题来帮助你回复目标消息，放入"questions"字段
 1. 识别对话中可能需要查询的概念（黑话/俚语/缩写/专有名词等关键词），放入"concepts"字段
 2. 根据上下文提出**一个**最关键的问题来帮助你回复目标消息，放入"questions"字段
 问题格式示例：
 - "xxx在前几天干了什么"
- "xxx是什么"
+- "xxx是什么，在什么时候提到过?"
 - "xxxx和xxx的关系是什么"
 - "xxx在某个时间点发生了什么"
 输出格式示例（需要检索时）：
 ```json
 {{
  "concepts": ["AAA", "BBB", "CCC"], #需要检索的概念列表（字符串数组），如果不需要检索概念则输出空数组[]
  "questions": ["张三在前几天干了什么"] #问题数组（字符串数组），如果不需要检索记忆则输出空数组[]，如果需要检索则只输出包含一个问题的数组
 }}
 ```
@ -91,7 +88,6 @@ def init_memory_retrieval_prompt():
 输出格式示例（不需要检索时）：
 ```json
 {{
  "concepts": [],
  "questions": []
 }}
 ```
@ -280,6 +276,54 @@ async def _retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> s
    return ""
 def _match_jargon_from_text(chat_text: str, chat_id: str) -> List[str]:
    """直接在聊天文本中匹配已知的jargon，返回出现过的黑话列表"""
    if not chat_text or not chat_text.strip():
        return []
    start_time = time.time()
    query = Jargon.select().where((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
    if global_config.jargon.all_global:
        query = query.where(Jargon.is_global)
    query = query.order_by(Jargon.count.desc())
    query_time = time.time()
    matched: Dict[str, None] = {}
    for jargon in query:
        content = (jargon.content or "").strip()
        if not content:
            continue
        if contains_bot_self_name(content):
            continue
        if not global_config.jargon.all_global and not jargon.is_global:
            chat_id_list = parse_chat_id_list(jargon.chat_id)
            if not chat_id_list_contains(chat_id_list, chat_id):
                continue
        pattern = re.escape(content)
        if re.search(r"[\u4e00-\u9fff]", content):
            search_pattern = pattern
        else:
            search_pattern = r"\b" + pattern + r"\b"
        if re.search(search_pattern, chat_text, re.IGNORECASE):
            matched[content] = None
    end_time = time.time()
    logger.info(
        f"记忆检索黑话匹配: 查询耗时 {(query_time - start_time):.3f}s, "
        f"匹配耗时 {(end_time - query_time):.3f}s, 总耗时 {(end_time - start_time):.3f}s, "
        f"匹配到 {len(matched)} 个黑话"
    )
    return list(matched.keys())
 async def _react_agent_solve_question(
    question: str, chat_id: str, max_iterations: int = 5, timeout: float = 30.0, initial_info: str = ""
 ) -> Tuple[bool, str, List[Dict[str, Any]], bool]:
@ -991,11 +1035,17 @@ async def build_memory_retrieval_prompt(
            return ""
        # 解析概念列表和问题列表
-        concepts, questions = _parse_questions_json(response)
+        _, questions = _parse_questions_json(response)
        logger.info(f"解析到 {len(concepts)} 个概念: {concepts}")
        logger.info(f"解析到 {len(questions)} 个问题: {questions}")
-        # 对概念进行jargon检索，作为初始信息
+        # 使用匹配逻辑自动识别聊天中的黑话概念
        concepts = _match_jargon_from_text(message, chat_id)
        if concepts:
            logger.info(f"黑话匹配命中 {len(concepts)} 个概念: {concepts}")
        else:
            logger.info("黑话匹配未命中任何概念")
        # 对匹配到的概念进行jargon检索，作为初始信息
        initial_info = ""
        if concepts:
            logger.info(f"开始对 {len(concepts)} 个概念进行jargon检索")
@ -1026,8 +1076,6 @@ async def build_memory_retrieval_prompt(
            else:
                return ""
        logger.info(f"解析到 {len(questions)} 个问题: {questions}")
        # 第二步：并行处理所有问题（使用配置的最大迭代次数/120秒超时）
        max_iterations = global_config.memory.max_agent_iterations
        logger.info(f"问题数量: {len(questions)}，设置最大迭代次数: {max_iterations}，超时时间: 120秒")
--- a/template/bot_config_template.toml
+++ b/template/bot_config_template.toml
@ -1,5 +1,5 @@
 [inner]
-version = "6.23.1"
+version = "6.23.4"
 #----以下是给开发人员阅读的，如果你只是部署了麦麦，不需要阅读----
 #如果你想要修改配置文件，请递增version的值