better：优化记忆提取能力

2025-11-11 23:57:47 +08:00 · 2025-11-11 23:57:47 +08:00 · 2d6eba7da1
parent 82a87f4926
commit 2d6eba7da1
3 changed files with 185 additions and 122 deletions
--- a/src/chat/frequency_control/frequency_control.py
+++ b/src/chat/frequency_control/frequency_control.py
@ -112,10 +112,10 @@ class FrequencyControl:
            if len(response) < 20:
                if "过于频繁" in response:
                    logger.info(f"频率调整: 过于频繁，调整值到{final_value_by_api}")
-                    self.talk_frequency_adjust = max(0.1, min(3.0, self.talk_frequency_adjust * 0.8))
+                    self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 0.8))
                elif "过少" in response:
                    logger.info(f"频率调整: 过少，调整值到{final_value_by_api}")
-                    self.talk_frequency_adjust = max(0.1, min(3.0, self.talk_frequency_adjust * 1.2))
+                    self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2))
                self.last_frequency_adjust_time = time.time()
            else:
                logger.info(f"频率调整：response不符合要求，取消本次调整")    
--- a/src/chat/utils/statistic.py
+++ b/src/chat/utils/statistic.py
@ -690,9 +690,6 @@ class StatisticOutputTask(AsyncTask):
        online_hours = stats[ONLINE_TIME] / 3600.0 if stats[ONLINE_TIME] > 0 else 0.0
        cost_per_hour = stats[TOTAL_COST] / online_hours if online_hours > 0 else 0.0
        
-        # 计算token/消息数量指标（每100条）
-        tokens_per_100_messages = (total_tokens / stats[TOTAL_MSG_CNT] * 100) if stats[TOTAL_MSG_CNT] > 0 else 0.0
-        
        # 计算token/时间指标（token/小时）
        tokens_per_hour = (total_tokens / online_hours) if online_hours > 0 else 0.0
        
@ -700,8 +697,9 @@ class StatisticOutputTask(AsyncTask):
        total_replies = stats.get(TOTAL_REPLY_CNT, 0)
        cost_per_100_replies = (stats[TOTAL_COST] / total_replies * 100) if total_replies > 0 else 0.0
        
-        # 计算token/回复数量指标（每100条）
-        tokens_per_100_replies = (total_tokens / total_replies * 100) if total_replies > 0 else 0.0
+        # 计算花费/消息数量（排除自己回复）指标（每100条）
+        total_messages_excluding_replies = stats[TOTAL_MSG_CNT] - total_replies
+        cost_per_100_messages_excluding_replies = (stats[TOTAL_COST] / total_messages_excluding_replies * 100) if total_messages_excluding_replies > 0 else 0.0

        output = [
            f"总在线时间: {_format_online_time(stats[ONLINE_TIME])}",
@ -711,10 +709,9 @@ class StatisticOutputTask(AsyncTask):
            f"总Token数: {_format_large_number(total_tokens)}",
            f"总花费: {stats[TOTAL_COST]:.2f}¥",
            f"花费/消息数量: {cost_per_100_messages:.4f}¥/100条" if stats[TOTAL_MSG_CNT] > 0 else "花费/消息数量: N/A",
-            f"花费/回复数量: {cost_per_100_replies:.4f}¥/100条" if total_replies > 0 else "花费/回复数量: N/A",
+            f"花费/接受消息数量: {cost_per_100_messages_excluding_replies:.4f}¥/100条" if total_messages_excluding_replies > 0 else "花费/消息数量(排除回复): N/A",
+            f"花费/回复消息数量: {cost_per_100_replies:.4f}¥/100条" if total_replies > 0 else "花费/回复数量: N/A",
            f"花费/时间: {cost_per_hour:.2f}¥/小时" if online_hours > 0 else "花费/时间: N/A",
-            f"Token/消息数量: {_format_large_number(tokens_per_100_messages)}/100条" if stats[TOTAL_MSG_CNT] > 0 else "Token/消息数量: N/A",
-            f"Token/回复数量: {_format_large_number(tokens_per_100_replies)}/100条" if total_replies > 0 else "Token/回复数量: N/A",
            f"Token/时间: {_format_large_number(tokens_per_hour)}/小时" if online_hours > 0 else "Token/时间: N/A",
            "",
        ]
@ -933,25 +930,21 @@ class StatisticOutputTask(AsyncTask):
                        <div class=\"kpi-value\">{(stat_data[TOTAL_COST] / stat_data[TOTAL_MSG_CNT] * 100 if stat_data[TOTAL_MSG_CNT] > 0 else 0.0):.4f} ¥/100条</div>
                    </div>
                    <div class=\"kpi-card\">
-                        <div class=\"kpi-title\">花费/时间</div>
-                        <div class=\"kpi-value\">{(stat_data[TOTAL_COST] / (stat_data[ONLINE_TIME] / 3600.0) if stat_data[ONLINE_TIME] > 0 else 0.0):.2f} ¥/小时</div>
-                    </div>
-                    <div class=\"kpi-card\">
-                        <div class=\"kpi-title\">Token/消息数量</div>
-                        <div class=\"kpi-value\">{_format_large_number(sum(stat_data[TOTAL_TOK_BY_MODEL].values()) / stat_data[TOTAL_MSG_CNT] * 100 if stat_data[TOTAL_MSG_CNT] > 0 and stat_data[TOTAL_TOK_BY_MODEL] else 0.0, html=True)}/100条</div>
-                    </div>
-                    <div class=\"kpi-card\">
-                        <div class=\"kpi-title\">Token/回复数量</div>
-                        <div class=\"kpi-value\">{_format_large_number(sum(stat_data[TOTAL_TOK_BY_MODEL].values()) / stat_data.get(TOTAL_REPLY_CNT, 0) * 100 if stat_data.get(TOTAL_REPLY_CNT, 0) > 0 and stat_data[TOTAL_TOK_BY_MODEL] else 0.0, html=True)}/100条</div>
-                    </div>
-                    <div class=\"kpi-card\">
-                        <div class=\"kpi-title\">Token/时间</div>
-                        <div class=\"kpi-value\">{_format_large_number(sum(stat_data[TOTAL_TOK_BY_MODEL].values()) / (stat_data[ONLINE_TIME] / 3600.0) if stat_data[ONLINE_TIME] > 0 and stat_data[TOTAL_TOK_BY_MODEL] else 0.0, html=True)}/小时</div>
+                        <div class=\"kpi-title\">花费/消息数量(排除回复)</div>
+                        <div class=\"kpi-value\">{(stat_data[TOTAL_COST] / (stat_data[TOTAL_MSG_CNT] - stat_data.get(TOTAL_REPLY_CNT, 0)) * 100 if (stat_data[TOTAL_MSG_CNT] - stat_data.get(TOTAL_REPLY_CNT, 0)) > 0 else 0.0):.4f} ¥/100条</div>
                    </div>
                    <div class=\"kpi-card\">
                        <div class=\"kpi-title\">花费/回复数量</div>
                        <div class=\"kpi-value\">{(stat_data[TOTAL_COST] / stat_data.get(TOTAL_REPLY_CNT, 0) * 100 if stat_data.get(TOTAL_REPLY_CNT, 0) > 0 else 0.0):.4f} ¥/100条</div>
                    </div>
+                    <div class=\"kpi-card\">
+                        <div class=\"kpi-title\">花费/时间</div>
+                        <div class=\"kpi-value\">{(stat_data[TOTAL_COST] / (stat_data[ONLINE_TIME] / 3600.0) if stat_data[ONLINE_TIME] > 0 else 0.0):.2f} ¥/小时</div>
+                    </div>
+                    <div class=\"kpi-card\">
+                        <div class=\"kpi-title\">Token/时间</div>
+                        <div class=\"kpi-value\">{_format_large_number(sum(stat_data[TOTAL_TOK_BY_MODEL].values()) / (stat_data[ONLINE_TIME] / 3600.0) if stat_data[ONLINE_TIME] > 0 and stat_data[TOTAL_TOK_BY_MODEL] else 0.0, html=True)}/小时</div>
+                    </div>
                </div>
                
                <h2>按模型分类统计</h2>
@ -1805,10 +1798,8 @@ class StatisticOutputTask(AsyncTask):
        # 初始化数据结构
        cost_per_100_messages = [0.0] * len(time_points)  # 花费/消息数量（每100条）
        cost_per_hour = [0.0] * len(time_points)  # 花费/时间（每小时）
-        tokens_per_100_messages = [0.0] * len(time_points)  # Token/消息数量（每100条）
        tokens_per_hour = [0.0] * len(time_points)  # Token/时间（每小时）
        cost_per_100_replies = [0.0] * len(time_points)  # 花费/回复数量（每100条）
-        tokens_per_100_replies = [0.0] * len(time_points)  # Token/回复数量（每100条）
        
        # 每个时间点的累计数据
        total_costs = [0.0] * len(time_points)
@ -1882,10 +1873,6 @@ class StatisticOutputTask(AsyncTask):
            if total_online_hours[idx] > 0:
                cost_per_hour[idx] = (total_costs[idx] / total_online_hours[idx])
            
-            # Token/消息数量（每100条）
-            if total_messages[idx] > 0:
-                tokens_per_100_messages[idx] = (total_tokens[idx] / total_messages[idx] * 100)
-            
            # Token/时间（每小时）
            if total_online_hours[idx] > 0:
                tokens_per_hour[idx] = (total_tokens[idx] / total_online_hours[idx])
@ -1893,10 +1880,6 @@ class StatisticOutputTask(AsyncTask):
            # 花费/回复数量（每100条）
            if total_replies[idx] > 0:
                cost_per_100_replies[idx] = (total_costs[idx] / total_replies[idx] * 100)
-            
-            # Token/回复数量（每100条）
-            if total_replies[idx] > 0:
-                tokens_per_100_replies[idx] = (total_tokens[idx] / total_replies[idx] * 100)
        
        # 生成时间标签
        if interval_hours == 1:
@ -1908,10 +1891,8 @@ class StatisticOutputTask(AsyncTask):
            "time_labels": time_labels,
            "cost_per_100_messages": cost_per_100_messages,
            "cost_per_hour": cost_per_hour,
-            "tokens_per_100_messages": tokens_per_100_messages,
            "tokens_per_hour": tokens_per_hour,
            "cost_per_100_replies": cost_per_100_replies,
-            "tokens_per_100_replies": tokens_per_100_replies,
        }
    
    def _generate_metrics_tab(self, metrics_data: dict) -> str:
@ -1919,10 +1900,8 @@ class StatisticOutputTask(AsyncTask):
        colors = {
            "cost_per_100_messages": "#8b5cf6",
            "cost_per_hour": "#9f8efb",
-            "tokens_per_100_messages": "#b5a6ff",
            "tokens_per_hour": "#c7bbff",
            "cost_per_100_replies": "#d9ceff",
-            "tokens_per_100_replies": "#a78bfa",
        }
        
        return f"""
@ -1944,17 +1923,11 @@ class StatisticOutputTask(AsyncTask):
                <div style="margin-bottom: 40px;">
                    <canvas id="costPerHourChart" width="800" height="400"></canvas>
                </div>
-                <div style="margin-bottom: 40px;">
-                    <canvas id="tokensPer100MessagesChart" width="800" height="400"></canvas>
-                </div>
                <div style="margin-bottom: 40px;">
                    <canvas id="tokensPerHourChart" width="800" height="400"></canvas>
                </div>
-                <div style="margin-bottom: 40px;">
-                    <canvas id="costPer100RepliesChart" width="800" height="400"></canvas>
-                </div>
                <div>
-                    <canvas id="tokensPer100RepliesChart" width="800" height="400"></canvas>
+                    <canvas id="costPer100RepliesChart" width="800" height="400"></canvas>
                </div>
            </div>
            
@ -2001,13 +1974,6 @@ class StatisticOutputTask(AsyncTask):
                        dataKey: 'cost_per_hour',
                        color: '{colors["cost_per_hour"]}'
                    }},
-                    tokensPer100Messages: {{
-                        id: 'tokensPer100MessagesChart',
-                        title: 'Token/消息数量',
-                        yAxisLabel: 'Token (/100条)',
-                        dataKey: 'tokens_per_100_messages',
-                        color: '{colors["tokens_per_100_messages"]}'
-                    }},
                    tokensPerHour: {{
                        id: 'tokensPerHourChart',
                        title: 'Token/时间',
@ -2021,13 +1987,6 @@ class StatisticOutputTask(AsyncTask):
                        yAxisLabel: '花费 (¥/100条)',
                        dataKey: 'cost_per_100_replies',
                        color: '{colors["cost_per_100_replies"]}'
-                    }},
-                    tokensPer100Replies: {{
-                        id: 'tokensPer100RepliesChart',
-                        title: 'Token/回复数量',
-                        yAxisLabel: 'Token (/100条)',
-                        dataKey: 'tokens_per_100_replies',
-                        color: '{colors["tokens_per_100_replies"]}'
                    }}
                }};
                
@ -2054,10 +2013,8 @@ class StatisticOutputTask(AsyncTask):
                    // 重新创建图表
                    createMetricsChart('costPer100Messages', data, timeScale);
                    createMetricsChart('costPerHour', data, timeScale);
-                    createMetricsChart('tokensPer100Messages', data, timeScale);
                    createMetricsChart('tokensPerHour', data, timeScale);
                    createMetricsChart('costPer100Replies', data, timeScale);
-                    createMetricsChart('tokensPer100Replies', data, timeScale);
                }}
                
                function createMetricsChart(chartType, data, timeScale) {{
--- a/src/memory_system/memory_retrieval.py
+++ b/src/memory_system/memory_retrieval.py
@ -23,7 +23,7 @@ def init_memory_retrieval_prompt():
    # 第一步：问题生成prompt
    Prompt(
        """
-你是一个专门检测是否需要回忆的助手。你的名字是{bot_name}。现在是{time_now}。
+你的名字是{bot_name}。现在是{time_now}。
 群里正在进行的聊天内容：
 {chat_history}

@ -34,6 +34,7 @@ def init_memory_retrieval_prompt():
 1. 对话中是否提到了过去发生的事情、人物、事件或信息
 2. 是否有需要回忆的内容（比如"之前说过"、"上次"、"以前"等）
 3. 是否有需要查找历史信息的问题
+4. 是否有问题可以搜集信息帮助你聊天

 重要提示：
 - 如果"最近已查询的问题和结果"中已经包含了类似的问题，请避免重复生成相同或相似的问题
@ -65,7 +66,9 @@ def init_memory_retrieval_prompt():
    
    # 第二步：ReAct Agent prompt（工具描述会在运行时动态生成）
    Prompt(
-        """你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。
+        """
+你的名字是{bot_name}，你正在参与聊天，你需要搜集信息来回答问题，帮助你参与聊天。
+你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。

 当前问题：{question}
 已收集的信息：
@ -78,14 +81,20 @@ def init_memory_retrieval_prompt():
 ```json
 {{
  "thought": "你的思考过程，分析当前情况，决定下一步行动",
-  "action_type": {action_types_list},
-  "action_params": {{参数名: 参数值}} 或 null
+  "actions": [
+    {{
+      "action_type": {action_types_list},
+      "action_params": {{参数名: 参数值}} 或 null
+    }}
+  ]
 }}
 ```

-你可以选择以下动作：
-1. 如果已经收集到足够的信息可以回答问题，请设置action_type为"final_answer"，并在thought中说明答案。除非明确找到答案，否则不要设置为final_answer。
-2. 如果经过多次查询后，确认无法找到相关信息或答案，请设置action_type为"no_answer"，并在thought中说明原因。
+重要说明：
+- 你可以在一次迭代中执行多个查询，将多个action放在actions数组中
+- 如果只需要执行一个查询，actions数组中只包含一个action即可
+- 如果已经收集到足够的信息可以回答问题，请设置actions为包含一个action_type为"final_answer"的数组，并在thought中说明答案。除非明确找到答案，否则不要设置为final_answer。
+- 如果经过多次查询后，确认无法找到相关信息或答案，请设置actions为包含一个action_type为"no_answer"的数组，并在thought中说明原因。

 请只输出JSON，不要输出其他内容：
 """,
@ -101,6 +110,8 @@ def _parse_react_response(response: str) -> Optional[Dict[str, Any]]:
        
    Returns:
        Dict[str, Any]: 解析后的动作信息，如果解析失败返回None
+        格式: {"thought": str, "actions": List[Dict[str, Any]]}
+        每个action格式: {"action_type": str, "action_params": dict}
    """
    try:
        # 尝试提取JSON（可能包含在```json代码块中）
@ -123,6 +134,20 @@ def _parse_react_response(response: str) -> Optional[Dict[str, Any]]:
            logger.warning(f"解析的JSON不是对象格式: {action_info}")
            return None
        
+        # 确保actions字段存在且为列表
+        if "actions" not in action_info:
+            logger.warning(f"响应中缺少actions字段: {action_info}")
+            return None
+        
+        if not isinstance(action_info["actions"], list):
+            logger.warning(f"actions字段不是数组格式: {action_info['actions']}")
+            return None
+        
+        # 确保actions不为空
+        if len(action_info["actions"]) == 0:
+            logger.warning("actions数组为空")
+            return None
+        
        return action_info
        
    except Exception as e:
@ -163,9 +188,13 @@ async def _react_agent_solve_question(
        # 获取工具注册器
        tool_registry = get_tool_registry()
        
+        # 获取bot_name
+        bot_name = global_config.bot.nickname
+        
        # 构建prompt（动态生成工具描述）
        prompt = await global_prompt_manager.format_prompt(
            "memory_retrieval_react_prompt",
+            bot_name=bot_name,
            question=question,
            collected_info=collected_info if collected_info else "暂无信息",
            tools_description=tool_registry.get_tools_description(),
@ -196,45 +225,50 @@ async def _react_agent_solve_question(
            break
        
        thought = action_info.get("thought", "")
-        action_type = action_info.get("action_type", "")
-        action_params = action_info.get("action_params", {})
+        actions = action_info.get("actions", [])
        
        logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 思考: {thought}")
-        logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 动作类型: {action_type}")
-        logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 动作参数: {action_params}")
+        logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 动作数量: {len(actions)}")
        
-        # 记录思考步骤
+        # 记录思考步骤（包含所有actions）
        step = {
            "iteration": iteration + 1,
            "thought": thought,
-            "action_type": action_type,
-            "action_params": action_params,
-            "observation": ""
+            "actions": actions,
+            "observations": []
        }
        
-        # 执行动作
-        if action_type == "final_answer":
-            # Agent认为已经找到答案
-            answer = thought  # 使用thought作为答案
-            step["observation"] = "找到答案"
-            thinking_steps.append(step)
-            logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 找到最终答案: {answer}")
-            return True, answer, thinking_steps
+        # 检查是否有final_answer或no_answer
+        for action in actions:
+            action_type = action.get("action_type", "")
+            if action_type == "final_answer":
+                # Agent认为已经找到答案
+                answer = thought  # 使用thought作为答案
+                step["observations"] = ["找到答案"]
+                thinking_steps.append(step)
+                logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 找到最终答案: {answer}")
+                return True, answer, thinking_steps
+            elif action_type == "no_answer":
+                # Agent确认无法找到答案
+                answer = thought  # 使用thought说明无法找到答案的原因
+                step["observations"] = ["确认无法找到答案"]
+                thinking_steps.append(step)
+                logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 确认无法找到答案: {answer}")
+                return False, answer, thinking_steps
        
-        elif action_type == "no_answer":
-            # Agent确认无法找到答案
-            answer = thought  # 使用thought说明无法找到答案的原因
-            step["observation"] = "确认无法找到答案"
-            thinking_steps.append(step)
-            logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 确认无法找到答案: {answer}")
-            return False, answer, thinking_steps
-        
-        # 使用工具注册器执行工具
+        # 并行执行所有工具
        tool_registry = get_tool_registry()
-        tool = tool_registry.get_tool(action_type)
+        tool_tasks = []
        
-        if tool:
-            try:
+        for i, action in enumerate(actions):
+            action_type = action.get("action_type", "")
+            action_params = action.get("action_params", {})
+            
+            logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 动作 {i+1}/{len(actions)}: {action_type}({action_params})")
+            
+            tool = tool_registry.get_tool(action_type)
+            
+            if tool:
                # 准备工具参数（需要添加chat_id如果工具需要）
                tool_params = action_params.copy()
                
@ -244,31 +278,38 @@ async def _react_agent_solve_question(
                if "chat_id" in sig.parameters:
                    tool_params["chat_id"] = chat_id
                
-                logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 执行工具: {action_type}({tool_params})")
+                # 创建异步任务
+                async def execute_single_tool(tool_instance, params, act_type, act_params, iter_num):
+                    try:
+                        observation = await tool_instance.execute(**params)
+                        param_str = ", ".join([f"{k}={v}" for k, v in act_params.items()])
+                        return f"查询{act_type}({param_str})的结果：{observation}"
+                    except Exception as e:
+                        error_msg = f"工具执行失败: {str(e)}"
+                        logger.error(f"ReAct Agent 第 {iter_num + 1} 次迭代 动作 {act_type} {error_msg}")
+                        return f"查询{act_type}失败: {error_msg}"
                
-                # 执行工具
-                observation = await tool.execute(**tool_params)
-                step["observation"] = observation
+                tool_tasks.append(execute_single_tool(tool, tool_params, action_type, action_params, iteration))
+            else:
+                error_msg = f"未知的工具类型: {action_type}"
+                logger.warning(f"ReAct Agent 第 {iteration + 1} 次迭代 动作 {i+1}/{len(actions)} {error_msg}")
+                tool_tasks.append(asyncio.create_task(asyncio.sleep(0, result=f"查询{action_type}失败: {error_msg}")))
+        
+        # 并行执行所有工具
+        if tool_tasks:
+            observations = await asyncio.gather(*tool_tasks, return_exceptions=True)
+            
+            # 处理执行结果
+            for i, observation in enumerate(observations):
+                if isinstance(observation, Exception):
+                    observation = f"工具执行异常: {str(observation)}"
+                    logger.error(f"ReAct Agent 第 {iteration + 1} 次迭代 动作 {i+1} 执行异常: {observation}")
                
-                # 构建收集信息的描述
-                param_str = ", ".join([f"{k}={v}" for k, v in action_params.items()])
-                collected_info += f"\n查询{action_type}({param_str})的结果：{observation}\n"
-                
-                logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 工具执行结果: {observation}")
-            except Exception as e:
-                error_msg = f"工具执行失败: {str(e)}"
-                step["observation"] = error_msg
-                logger.error(f"ReAct Agent 第 {iteration + 1} 次迭代 {error_msg}")
-        else:
-            step["observation"] = f"未知的工具类型: {action_type}"
-            logger.warning(f"ReAct Agent 第 {iteration + 1} 次迭代 未知的工具类型: {action_type}")
+                step["observations"].append(observation)
+                collected_info += f"\n{observation}\n"
+                logger.info(f"ReAct Agent 第 {iteration + 1} 次迭代 动作 {i+1} 执行结果: {observation}")
        
        thinking_steps.append(step)
-        
-        # 如果观察结果为空或无效，继续下一轮
-        if step["observation"] and "无有效信息" not in step["observation"] and "未找到" not in step["observation"]:
-            # 有有效信息，继续思考
-            pass
    
    # 达到最大迭代次数或超时，但Agent没有明确返回final_answer
    # 迭代超时应该直接视为no_answer，而不是使用已有信息
@ -333,6 +374,47 @@ def _get_recent_query_history(chat_id: str, time_window_seconds: float = 300.0)
        return ""


+def _get_cached_memories(chat_id: str, time_window_seconds: float = 300.0) -> List[str]:
+    """获取最近一段时间内缓存的记忆（只返回找到答案的记录）
+    
+    Args:
+        chat_id: 聊天ID
+        time_window_seconds: 时间窗口（秒），默认300秒（5分钟）
+        
+    Returns:
+        List[str]: 格式化的记忆列表，每个元素格式为 "问题：xxx\n答案：xxx"
+    """
+    try:
+        current_time = time.time()
+        start_time = current_time - time_window_seconds
+        
+        # 查询最近时间窗口内找到答案的记录，按更新时间倒序
+        records = (
+            ThinkingBack.select()
+            .where(
+                (ThinkingBack.chat_id == chat_id) &
+                (ThinkingBack.update_time >= start_time) &
+                (ThinkingBack.found_answer == 1)
+            )
+            .order_by(ThinkingBack.update_time.desc())
+            .limit(5)  # 最多返回5条最近的记录
+        )
+        
+        if not records.exists():
+            return []
+        
+        cached_memories = []
+        for record in records:
+            if record.answer:
+                cached_memories.append(f"问题：{record.question}\n答案：{record.answer}")
+        
+        return cached_memories
+        
+    except Exception as e:
+        logger.error(f"获取缓存记忆失败: {e}")
+        return []
+
+
 def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, str]]:
    """从thinking_back数据库中查询是否有现成的答案
    
@ -441,11 +523,11 @@ def _get_max_iterations_by_question_count(question_count: int) -> int:
        int: 最大迭代次数
    """
    if question_count == 1:
-        return 5
+        return 6
    elif question_count == 2:
        return 3
    else:  # 3个或以上
-        return 1
+        return 2


 async def _process_single_question(
@ -587,9 +669,19 @@ async def build_memory_retrieval_prompt(
        # 解析问题列表
        questions = _parse_questions_json(response)
        
+        # 获取缓存的记忆（与question时使用相同的时间窗口和数量限制）
+        cached_memories = _get_cached_memories(chat_id, time_window_seconds=300.0)
+        
        if not questions:
            logger.debug("模型认为不需要检索记忆或解析失败")
-            return ""
+            # 即使没有当次查询，也返回缓存的记忆
+            if cached_memories:
+                retrieved_memory = "\n\n".join(cached_memories)
+                end_time = time.time()
+                logger.info(f"无当次查询，返回缓存记忆，耗时: {(end_time - start_time):.3f}秒，包含 {len(cached_memories)} 条缓存记忆")
+                return f"你回忆起了以下信息：\n{retrieved_memory}\n请在回复时参考这些回忆的信息。\n"
+            else:
+                return ""
        
        logger.info(f"解析到 {len(questions)} 个问题: {questions}")
        
@ -613,20 +705,34 @@ async def build_memory_retrieval_prompt(
        
        # 收集所有有效结果
        all_results = []
+        current_questions = set()  # 用于去重，避免缓存和当次查询重复
        for i, result in enumerate(results):
            if isinstance(result, Exception):
                logger.error(f"处理问题 '{questions[i]}' 时发生异常: {result}")
            elif result is not None:
                all_results.append(result)
+                # 提取问题用于去重
+                if result.startswith("问题："):
+                    question = result.split("\n")[0].replace("问题：", "").strip()
+                    current_questions.add(question)
+        
+        # 将缓存的记忆添加到结果中（排除当次查询已包含的问题，避免重复）
+        for cached_memory in cached_memories:
+            if cached_memory.startswith("问题："):
+                question = cached_memory.split("\n")[0].replace("问题：", "").strip()
+                # 只有当次查询中没有相同问题时，才添加缓存记忆
+                if question not in current_questions:
+                    all_results.append(cached_memory)
+                    logger.debug(f"添加缓存记忆: {question[:50]}...")
        
        end_time = time.time()
        
        if all_results:
            retrieved_memory = "\n\n".join(all_results)
-            logger.info(f"记忆检索成功，耗时: {(end_time - start_time):.3f}秒")
+            logger.info(f"记忆检索成功，耗时: {(end_time - start_time):.3f}秒，包含 {len(all_results)} 条记忆（含缓存）")
            return f"你回忆起了以下信息：\n{retrieved_memory}\n请在回复时参考这些回忆的信息。\n"
        else:
-            logger.debug("所有问题均未找到答案")
+            logger.debug("所有问题均未找到答案，且无缓存记忆")
            return ""
            
    except Exception as e: