feat：优化记忆检索和停止

2025-12-31 19:34:33 +08:00 · 2025-12-31 19:34:33 +08:00 · 57b92ca124
parent c5276ce629
commit 57b92ca124
5 changed files with 163 additions and 1130 deletions
--- a/scripts/compare_finish_search_token.py
+++ b/scripts/compare_finish_search_token.py
@ -1,507 +0,0 @@
-import argparse
-import asyncio
-import os
-import sys
-import time
-import json
-import importlib
-from typing import Dict, Any
-from datetime import datetime
-
-# 强制使用 utf-8，避免控制台编码报错
-try:
-    if hasattr(sys.stdout, "reconfigure"):
-        sys.stdout.reconfigure(encoding="utf-8")
-    if hasattr(sys.stderr, "reconfigure"):
-        sys.stderr.reconfigure(encoding="utf-8")
-except Exception:
-    pass
-
-# 确保能导入 src.*
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
-
-from src.common.logger import initialize_logging, get_logger
-from src.common.database.database import db
-from src.common.database.database_model import LLMUsage
-
-logger = get_logger("compare_finish_search_token")
-
-
-def get_token_usage_since(start_time: float) -> Dict[str, Any]:
-    """获取从指定时间开始的token使用情况
-    
-    Args:
-        start_time: 开始时间戳
-        
-    Returns:
-        包含token使用统计的字典
-    """
-    try:
-        start_datetime = datetime.fromtimestamp(start_time)
-        
-        # 查询从开始时间到现在的所有memory相关的token使用记录
-        records = (
-            LLMUsage.select()
-            .where(
-                (LLMUsage.timestamp >= start_datetime)
-                & (
-                    (LLMUsage.request_type.like("%memory%"))
-                    | (LLMUsage.request_type == "memory.question")
-                    | (LLMUsage.request_type == "memory.react")
-                    | (LLMUsage.request_type == "memory.react.final")
-                )
-            )
-            .order_by(LLMUsage.timestamp.asc())
-        )
-        
-        total_prompt_tokens = 0
-        total_completion_tokens = 0
-        total_tokens = 0
-        total_cost = 0.0
-        request_count = 0
-        model_usage = {}  # 按模型统计
-        
-        for record in records:
-            total_prompt_tokens += record.prompt_tokens or 0
-            total_completion_tokens += record.completion_tokens or 0
-            total_tokens += record.total_tokens or 0
-            total_cost += record.cost or 0.0
-            request_count += 1
-            
-            # 按模型统计
-            model_name = record.model_name or "unknown"
-            if model_name not in model_usage:
-                model_usage[model_name] = {
-                    "prompt_tokens": 0,
-                    "completion_tokens": 0,
-                    "total_tokens": 0,
-                    "cost": 0.0,
-                    "request_count": 0,
-                }
-            model_usage[model_name]["prompt_tokens"] += record.prompt_tokens or 0
-            model_usage[model_name]["completion_tokens"] += record.completion_tokens or 0
-            model_usage[model_name]["total_tokens"] += record.total_tokens or 0
-            model_usage[model_name]["cost"] += record.cost or 0.0
-            model_usage[model_name]["request_count"] += 1
-        
-        return {
-            "total_prompt_tokens": total_prompt_tokens,
-            "total_completion_tokens": total_completion_tokens,
-            "total_tokens": total_tokens,
-            "total_cost": total_cost,
-            "request_count": request_count,
-            "model_usage": model_usage,
-        }
-    except Exception as e:
-        logger.error(f"获取token使用情况失败: {e}")
-        return {
-            "total_prompt_tokens": 0,
-            "total_completion_tokens": 0,
-            "total_tokens": 0,
-            "total_cost": 0.0,
-            "request_count": 0,
-            "model_usage": {},
-        }
-
-
-def _import_memory_retrieval():
-    """使用 importlib 动态导入 memory_retrieval 模块，避免循环导入"""
-    try:
-        # 先导入 prompt_builder，检查 prompt 是否已经初始化
-        from src.chat.utils.prompt_builder import global_prompt_manager
-        
-        # 检查 memory_retrieval 相关的 prompt 是否已经注册
-        # 如果已经注册，说明模块可能已经通过其他路径初始化过了
-        prompt_already_init = "memory_retrieval_question_prompt" in global_prompt_manager._prompts
-        
-        module_name = "src.memory_system.memory_retrieval"
-        
-        # 如果 prompt 已经初始化，尝试直接使用已加载的模块
-        if prompt_already_init and module_name in sys.modules:
-            existing_module = sys.modules[module_name]
-            if hasattr(existing_module, 'init_memory_retrieval_prompt'):
-                return (
-                    existing_module.init_memory_retrieval_prompt,
-                    existing_module._react_agent_solve_question,
-                )
-        
-        # 如果模块已经在 sys.modules 中但部分初始化，先移除它
-        if module_name in sys.modules:
-            existing_module = sys.modules[module_name]
-            if not hasattr(existing_module, 'init_memory_retrieval_prompt'):
-                # 模块部分初始化，移除它
-                logger.warning(f"检测到部分初始化的模块 {module_name}，尝试重新导入")
-                del sys.modules[module_name]
-                # 清理可能相关的部分初始化模块
-                keys_to_remove = []
-                for key in sys.modules.keys():
-                    if key.startswith('src.memory_system.') and key != 'src.memory_system':
-                        keys_to_remove.append(key)
-                for key in keys_to_remove:
-                    try:
-                        del sys.modules[key]
-                    except KeyError:
-                        pass
-        
-        # 在导入 memory_retrieval 之前，先确保所有可能触发循环导入的模块都已完全加载
-        # 这些模块在导入时可能会触发 memory_retrieval 的导入，所以我们需要先加载它们
-        try:
-            # 先导入可能触发循环导入的模块，让它们完成初始化
-            import src.config.config
-            import src.chat.utils.prompt_builder
-            # 尝试导入可能触发循环导入的模块（这些模块可能在模块级别导入了 memory_retrieval）
-            # 如果它们已经导入，就确保它们完全初始化
-            try:
-                import src.chat.replyer.group_generator  # noqa: F401
-            except (ImportError, AttributeError):
-                pass  # 如果导入失败，继续
-            try:
-                import src.chat.replyer.private_generator  # noqa: F401
-            except (ImportError, AttributeError):
-                pass  # 如果导入失败，继续
-        except Exception as e:
-            logger.warning(f"预加载依赖模块时出现警告: {e}")
-        
-        # 现在尝试导入 memory_retrieval
-        # 如果此时仍然触发循环导入，说明有其他模块在模块级别导入了 memory_retrieval
-        memory_retrieval_module = importlib.import_module(module_name)
-        
-        return (
-            memory_retrieval_module.init_memory_retrieval_prompt,
-            memory_retrieval_module._react_agent_solve_question,
-        )
-    except (ImportError, AttributeError) as e:
-        logger.error(f"导入 memory_retrieval 模块失败: {e}", exc_info=True)
-        raise
-
-
-def _init_tools_without_finish_search():
-    """初始化工具但不注册 finish_search"""
-    from src.memory_system.retrieval_tools import (
-        register_query_chat_history,
-        register_query_person_info,
-        register_query_words,
-    )
-    from src.memory_system.retrieval_tools.tool_registry import get_tool_registry
-    from src.config.config import global_config
-    
-    # 清空工具注册器
-    tool_registry = get_tool_registry()
-    tool_registry.tools.clear()
-    
-    # 注册除 finish_search 外的所有工具
-    register_query_chat_history()
-    register_query_person_info()
-    register_query_words()
-    
-    # 如果启用 LPMM agent 模式，也注册 LPMM 工具
-    if global_config.lpmm_knowledge.lpmm_mode == "agent":
-        from src.memory_system.retrieval_tools.query_lpmm_knowledge import register_tool as register_lpmm_knowledge
-        register_lpmm_knowledge()
-    
-    logger.info("已初始化工具（不包含 finish_search）")
-
-
-def _init_tools_with_finish_search():
-    """初始化工具并注册 finish_search"""
-    from src.memory_system.retrieval_tools.tool_registry import get_tool_registry
-    from src.memory_system.retrieval_tools import init_all_tools
-    
-    # 清空工具注册器
-    tool_registry = get_tool_registry()
-    tool_registry.tools.clear()
-    
-    # 初始化所有工具（包括 finish_search）
-    init_all_tools()
-    logger.info("已初始化工具（包含 finish_search）")
-
-
-async def get_prompt_tokens_for_tools(
-    question: str,
-    chat_id: str,
-    use_finish_search: bool,
-) -> Dict[str, Any]:
-    """获取使用不同工具配置时的prompt token消耗
-    
-    Args:
-        question: 要查询的问题
-        chat_id: 聊天ID
-        use_finish_search: 是否使用 finish_search 工具
-        
-    Returns:
-        包含prompt token信息的字典
-    """
-    # 先初始化 prompt（如果还未初始化）
-    # 注意：init_memory_retrieval_prompt 会调用 init_all_tools，所以我们需要在它之后重新设置工具
-    from src.chat.utils.prompt_builder import global_prompt_manager
-    if "memory_retrieval_question_prompt" not in global_prompt_manager._prompts:
-        init_memory_retrieval_prompt, _ = _import_memory_retrieval()
-        init_memory_retrieval_prompt()
-    
-    # 初始化工具（根据参数决定是否包含 finish_search）
-    # 必须在 init_memory_retrieval_prompt 之后调用，因为它会调用 init_all_tools
-    if use_finish_search:
-        _init_tools_with_finish_search()
-    else:
-        _init_tools_without_finish_search()
-    
-    # 获取工具注册器
-    from src.memory_system.retrieval_tools.tool_registry import get_tool_registry
-    tool_registry = get_tool_registry()
-    tool_definitions = tool_registry.get_tool_definitions()
-    
-    # 验证工具列表（调试用）
-    tool_names = [tool["name"] for tool in tool_definitions]
-    if use_finish_search:
-        if "finish_search" not in tool_names:
-            logger.warning("期望包含 finish_search 工具，但工具列表中未找到")
-    else:
-        if "finish_search" in tool_names:
-            logger.warning("期望不包含 finish_search 工具，但工具列表中找到了，将移除")
-            # 移除 finish_search 工具
-            tool_registry.tools.pop("finish_search", None)
-            tool_definitions = tool_registry.get_tool_definitions()
-            tool_names = [tool["name"] for tool in tool_definitions]
-    
-    # 构建第一次调用的prompt（模拟_react_agent_solve_question的第一次调用）
-    from src.config.config import global_config
-    bot_name = global_config.bot.nickname
-    time_now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
-    
-    # 构建head_prompt
-    head_prompt = await global_prompt_manager.format_prompt(
-        "memory_retrieval_react_prompt_head",
-        bot_name=bot_name,
-        time_now=time_now,
-        question=question,
-        collected_info="",
-        current_iteration=1,
-        remaining_iterations=global_config.memory.max_agent_iterations - 1,
-        max_iterations=global_config.memory.max_agent_iterations,
-    )
-    
-    # 构建消息列表（只包含system message，模拟第一次调用）
-    from src.llm_models.payload_content.message import MessageBuilder, RoleType
-    messages = []
-    system_builder = MessageBuilder()
-    system_builder.set_role(RoleType.System)
-    system_builder.add_text_content(head_prompt)
-    messages.append(system_builder.build())
-    
-    # 调用LLM API来计算token（只调用一次，不实际执行）
-    from src.llm_models.utils_model import LLMRequest, RequestType
-    from src.config.config import model_config
-    
-    # 创建LLM请求对象
-    llm_request = LLMRequest(model_set=model_config.model_task_config.tool_use, request_type="memory.react.compare")
-    
-    # 构建工具选项
-    tool_built = llm_request._build_tool_options(tool_definitions)
-    
-    # 直接调用 _execute_request 以获取完整的响应对象（包含 usage）
-    response, model_info = await llm_request._execute_request(
-        request_type=RequestType.RESPONSE,
-        message_factory=lambda _client, *, _messages=messages: _messages,
-        temperature=None,
-        max_tokens=None,
-        tool_options=tool_built,
-    )
-    
-    # 从响应中获取token使用情况
-    prompt_tokens = 0
-    completion_tokens = 0
-    total_tokens = 0
-    
-    if response and hasattr(response, 'usage') and response.usage:
-        prompt_tokens = response.usage.prompt_tokens or 0
-        completion_tokens = response.usage.completion_tokens or 0
-        total_tokens = response.usage.total_tokens or 0
-    
-    return {
-        "use_finish_search": use_finish_search,
-        "prompt_tokens": prompt_tokens,
-        "completion_tokens": completion_tokens,
-        "total_tokens": total_tokens,
-        "tool_count": len(tool_definitions),
-        "tool_names": [tool["name"] for tool in tool_definitions],
-    }
-
-
-async def compare_prompt_tokens(
-    question: str,
-    chat_id: str = "compare_finish_search",
-) -> Dict[str, Any]:
-    """对比使用 finish_search 工具与否的输入 token 差异
-    
-    只运行一次，只计算输入 token 的差异，确保除了工具定义外其他内容一致
-    
-    Args:
-        question: 要查询的问题
-        chat_id: 聊天ID
-        
-    Returns:
-        包含对比结果的字典
-    """
-    print("\n" + "=" * 80)
-    print("finish_search 工具 输入 Token 消耗对比测试")
-    print("=" * 80)
-    print(f"\n[测试问题] {question}")
-    print(f"[聊天ID] {chat_id}")
-    print("\n注意: 只对比第一次LLM调用的输入token差异，不运行完整迭代流程")
-    
-    # 第一次测试：不使用 finish_search
-    print("\n" + "-" * 80)
-    print("[测试 1/2] 不使用 finish_search 工具")
-    print("-" * 80)
-    result_without = await get_prompt_tokens_for_tools(
-        question=question,
-        chat_id=f"{chat_id}_without",
-        use_finish_search=False,
-    )
-    
-    print(f"\n[结果]")
-    print(f"  工具数量: {result_without['tool_count']}")
-    print(f"  工具列表: {', '.join(result_without['tool_names'])}")
-    print(f"  输入 Prompt Tokens: {result_without['prompt_tokens']:,}")
-    
-    # 等待一下，确保数据库记录已写入
-    await asyncio.sleep(1)
-    
-    # 第二次测试：使用 finish_search
-    print("\n" + "-" * 80)
-    print("[测试 2/2] 使用 finish_search 工具")
-    print("-" * 80)
-    result_with = await get_prompt_tokens_for_tools(
-        question=question,
-        chat_id=f"{chat_id}_with",
-        use_finish_search=True,
-    )
-    
-    print(f"\n[结果]")
-    print(f"  工具数量: {result_with['tool_count']}")
-    print(f"  工具列表: {', '.join(result_with['tool_names'])}")
-    print(f"  输入 Prompt Tokens: {result_with['prompt_tokens']:,}")
-    
-    # 对比结果
-    print("\n" + "=" * 80)
-    print("[对比结果]")
-    print("=" * 80)
-    
-    prompt_token_diff = result_with['prompt_tokens'] - result_without['prompt_tokens']
-    prompt_token_diff_percent = (prompt_token_diff / result_without['prompt_tokens'] * 100) if result_without['prompt_tokens'] > 0 else 0
-    
-    tool_count_diff = result_with['tool_count'] - result_without['tool_count']
-    
-    print(f"\n[输入 Prompt Token 对比]")
-    print(f"  不使用 finish_search: {result_without['prompt_tokens']:,} tokens")
-    print(f"  使用 finish_search:    {result_with['prompt_tokens']:,} tokens")
-    print(f"  差异: {prompt_token_diff:+,} tokens ({prompt_token_diff_percent:+.2f}%)")
-    
-    print(f"\n[工具数量对比]")
-    print(f"  不使用 finish_search: {result_without['tool_count']} 个工具")
-    print(f"  使用 finish_search:    {result_with['tool_count']} 个工具")
-    print(f"  差异: {tool_count_diff:+d} 个工具")
-    
-    print(f"\n[工具列表对比]")
-    without_tools = set(result_without['tool_names'])
-    with_tools = set(result_with['tool_names'])
-    only_with = with_tools - without_tools
-    only_without = without_tools - with_tools
-    
-    if only_with:
-        print(f"  仅在 '使用 finish_search' 中的工具: {', '.join(only_with)}")
-    if only_without:
-        print(f"  仅在 '不使用 finish_search' 中的工具: {', '.join(only_without)}")
-    if not only_with and not only_without:
-        print(f"  工具列表相同（除了 finish_search）")
-    
-    # 显示其他token信息
-    print(f"\n[其他 Token 信息]")
-    print(f"  Completion Tokens (不使用 finish_search): {result_without.get('completion_tokens', 0):,}")
-    print(f"  Completion Tokens (使用 finish_search):    {result_with.get('completion_tokens', 0):,}")
-    print(f"  总 Tokens (不使用 finish_search): {result_without.get('total_tokens', 0):,}")
-    print(f"  总 Tokens (使用 finish_search):    {result_with.get('total_tokens', 0):,}")
-    
-    print("\n" + "=" * 80)
-    
-    return {
-        "question": question,
-        "without_finish_search": result_without,
-        "with_finish_search": result_with,
-        "comparison": {
-            "prompt_token_diff": prompt_token_diff,
-            "prompt_token_diff_percent": prompt_token_diff_percent,
-            "tool_count_diff": tool_count_diff,
-        },
-    }
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="对比使用 finish_search 工具与否的 token 消耗差异"
-    )
-    parser.add_argument(
-        "--chat-id",
-        default="compare_finish_search",
-        help="测试用的聊天ID（默认: compare_finish_search）",
-    )
-    parser.add_argument(
-        "--output",
-        "-o",
-        help="将结果保存到JSON文件（可选）",
-    )
-    
-    args = parser.parse_args()
-    
-    # 初始化日志（使用较低的详细程度，避免输出过多日志）
-    initialize_logging(verbose=False)
-    
-    # 交互式输入问题
-    print("\n" + "=" * 80)
-    print("finish_search 工具 Token 消耗对比测试工具")
-    print("=" * 80)
-    question = input("\n请输入要查询的问题: ").strip()
-    if not question:
-        print("错误: 问题不能为空")
-        return
-    
-    # 连接数据库
-    try:
-        db.connect(reuse_if_open=True)
-    except Exception as e:
-        logger.error(f"数据库连接失败: {e}")
-        print(f"错误: 数据库连接失败: {e}")
-        return
-    
-    # 运行对比测试
-    try:
-        result = asyncio.run(
-            compare_prompt_tokens(
-                question=question,
-                chat_id=args.chat_id,
-            )
-        )
-        
-        # 如果指定了输出文件，保存结果
-        if args.output:
-            # 将thinking_steps转换为可序列化的格式
-            output_result = result.copy()
-            with open(args.output, "w", encoding="utf-8") as f:
-                json.dump(output_result, f, ensure_ascii=False, indent=2)
-            print(f"\n[结果已保存] {args.output}")
-        
-    except KeyboardInterrupt:
-        print("\n\n[中断] 用户中断测试")
-    except Exception as e:
-        logger.error(f"测试失败: {e}", exc_info=True)
-        print(f"\n[错误] 测试失败: {e}")
-    finally:
-        try:
-            db.close()
-        except Exception:
-            pass
-
-
-if __name__ == "__main__":
-    main()
-
--- a/scripts/evaluate_expressions_v5.py
+++ b/scripts/evaluate_expressions_v5.py
@ -1,476 +0,0 @@
-"""
-表达方式评估脚本
-
-功能：
-1. 随机读取指定数量的表达方式，获取其situation和style
-2. 先进行人工评估（逐条手动评估）
-3. 然后使用LLM进行评估
-4. 对比人工评估和LLM评估的正确率、精确率、召回率、F1分数等指标（以人工评估为标准）
-5. 不真正修改数据库，只是做评估
-"""
-
-import asyncio
-import random
-import json
-import sys
-import os
-from typing import List, Dict
-
-# 添加项目根目录到路径
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-sys.path.insert(0, project_root)
-
-from src.common.database.database_model import Expression
-from src.common.database.database import db
-from src.llm_models.utils_model import LLMRequest
-from src.config.config import model_config
-from src.common.logger import get_logger
-
-logger = get_logger("expression_evaluator_comparison")
-
-
-def get_random_expressions(count: int = 10) -> List[Expression]:
-    """
-    随机读取指定数量的表达方式
-    
-    Args:
-        count: 要读取的数量，默认10条
-        
-    Returns:
-        表达方式列表
-    """
-    try:
-        # 查询所有表达方式
-        all_expressions = list(Expression.select())
-        
-        if not all_expressions:
-            logger.warning("数据库中没有表达方式记录")
-            return []
-        
-        # 如果总数少于请求数量，返回所有
-        if len(all_expressions) <= count:
-            logger.info(f"数据库中共有 {len(all_expressions)} 条表达方式，全部返回")
-            return all_expressions
-        
-        # 随机选择指定数量
-        selected = random.sample(all_expressions, count)
-        logger.info(f"从 {len(all_expressions)} 条表达方式中随机选择了 {len(selected)} 条")
-        return selected
-        
-    except Exception as e:
-        logger.error(f"随机读取表达方式失败: {e}")
-        import traceback
-        logger.error(traceback.format_exc())
-        return []
-
-
-def manual_evaluate_expression(expression: Expression, index: int, total: int) -> Dict:
-    """
-    人工评估单个表达方式
-    
-    Args:
-        expression: 表达方式对象
-        index: 当前索引（从1开始）
-        total: 总数
-        
-    Returns:
-        评估结果字典，包含：
-        - expression_id: 表达方式ID
-        - situation: 情境
-        - style: 风格
-        - suitable: 是否合适（人工评估）
-        - reason: 评估理由（始终为None）
-    """
-    print("\n" + "=" * 60)
-    print(f"人工评估 [{index}/{total}]")
-    print("=" * 60)
-    print(f"Situation: {expression.situation}")
-    print(f"Style: {expression.style}")
-    print("\n请评估该表达方式是否合适：")
-    print("  输入 'y' 或 'yes' 或 '1' 表示合适（通过）")
-    print("  输入 'n' 或 'no' 或 '0' 表示不合适（不通过）")
-    print("  输入 'q' 或 'quit' 退出评估")
-    
-    while True:
-        user_input = input("\n您的评估 (y/n/q): ").strip().lower()
-        
-        if user_input in ['q', 'quit']:
-            print("退出评估")
-            return None
-        
-        if user_input in ['y', 'yes', '1', '是', '通过']:
-            suitable = True
-            break
-        elif user_input in ['n', 'no', '0', '否', '不通过']:
-            suitable = False
-            break
-        else:
-            print("输入无效，请重新输入 (y/n/q)")
-    
-    result = {
-        "expression_id": expression.id,
-        "situation": expression.situation,
-        "style": expression.style,
-        "suitable": suitable,
-        "reason": None,
-        "evaluator": "manual"
-    }
-    
-    print(f"\n✓ 已记录：{'通过' if suitable else '不通过'}")
-    
-    return result
-
-
-def create_evaluation_prompt(situation: str, style: str) -> str:
-    """
-    创建评估提示词
-    
-    Args:
-        situation: 情境
-        style: 风格
-        
-    Returns:
-        评估提示词
-    """
-    prompt = f"""请评估以下表达方式是否合适：
-
-情境（situation）：{situation}
-风格（style）：{style}
-
-请从以下方面进行评估：
-1. 情境描述是否清晰、准确
-2. 风格表达是否合理、自然
-3. 情境和风格是否匹配
-4. 允许部分语法错误出现
-5. 允许口头化或缺省表达
-6. 允许部分上下文缺失
-
-请以JSON格式输出评估结果：
-{{
-    "suitable": true/false,
-    "reason": "评估理由（如果不合适，请说明原因）"
-}}
-
-如果合适，suitable设为true；如果不合适，suitable设为false，并在reason中说明原因。
-请严格按照JSON格式输出，不要包含其他内容。"""
-    
-    return prompt
-
-
-async def _single_llm_evaluation(expression: Expression, llm: LLMRequest) -> tuple[bool, str, str | None]:
-    """
-    执行单次LLM评估
-    
-    Args:
-        expression: 表达方式对象
-        llm: LLM请求实例
-        
-    Returns:
-        (suitable, reason, error) 元组，如果出错则 suitable 为 False，error 包含错误信息
-    """
-    try:
-        prompt = create_evaluation_prompt(expression.situation, expression.style)
-        logger.debug(f"正在评估表达方式 ID: {expression.id}")
-        
-        response, (reasoning, model_name, _) = await llm.generate_response_async(
-            prompt=prompt,
-            temperature=0.6,
-            max_tokens=1024
-        )
-        
-        logger.debug(f"LLM响应: {response}")
-        
-        # 解析JSON响应
-        try:
-            evaluation = json.loads(response)
-        except json.JSONDecodeError:
-            import re
-            json_match = re.search(r'\{[^{}]*"suitable"[^{}]*\}', response, re.DOTALL)
-            if json_match:
-                evaluation = json.loads(json_match.group())
-            else:
-                raise ValueError("无法从响应中提取JSON格式的评估结果")
-        
-        suitable = evaluation.get("suitable", False)
-        reason = evaluation.get("reason", "未提供理由")
-        
-        logger.debug(f"评估结果: {'通过' if suitable else '不通过'}")
-        return suitable, reason, None
-            
-    except Exception as e:
-        logger.error(f"评估表达方式 ID: {expression.id} 时出错: {e}")
-        return False, f"评估过程出错: {str(e)}", str(e)
-
-
-async def evaluate_expression_llm(expression: Expression, llm: LLMRequest) -> Dict:
-    """
-    使用LLM评估单个表达方式
-    
-    Args:
-        expression: 表达方式对象
-        llm: LLM请求实例
-        
-    Returns:
-        评估结果字典
-    """
-    logger.info(f"开始评估表达方式 ID: {expression.id}")
-    
-    suitable, reason, error = await _single_llm_evaluation(expression, llm)
-    
-    if error:
-        suitable = False
-    
-    logger.info(f"评估完成: {'通过' if suitable else '不通过'}")
-    
-    return {
-        "expression_id": expression.id,
-        "situation": expression.situation,
-        "style": expression.style,
-        "suitable": suitable,
-        "reason": reason,
-        "error": error,
-        "evaluator": "llm"
-    }
-
-
-def compare_evaluations(manual_results: List[Dict], llm_results: List[Dict], method_name: str) -> Dict:
-    """
-    对比人工评估和LLM评估的结果
-    
-    Args:
-        manual_results: 人工评估结果列表
-        llm_results: LLM评估结果列表
-        method_name: 评估方法名称（用于标识）
-        
-    Returns:
-        对比分析结果字典
-    """
-    # 按expression_id建立映射
-    llm_dict = {r["expression_id"]: r for r in llm_results}
-    
-    total = len(manual_results)
-    matched = 0
-    true_positives = 0
-    true_negatives = 0
-    false_positives = 0
-    false_negatives = 0
-    
-    for manual_result in manual_results:
-        llm_result = llm_dict.get(manual_result["expression_id"])
-        if llm_result is None:
-            continue
-        
-        manual_suitable = manual_result["suitable"]
-        llm_suitable = llm_result["suitable"]
-        
-        if manual_suitable == llm_suitable:
-            matched += 1
-        
-        if manual_suitable and llm_suitable:
-            true_positives += 1
-        elif not manual_suitable and not llm_suitable:
-            true_negatives += 1
-        elif not manual_suitable and llm_suitable:
-            false_positives += 1
-        elif manual_suitable and not llm_suitable:
-            false_negatives += 1
-    
-    accuracy = (matched / total * 100) if total > 0 else 0
-    precision = (true_positives / (true_positives + false_positives) * 100) if (true_positives + false_positives) > 0 else 0
-    recall = (true_positives / (true_positives + false_negatives) * 100) if (true_positives + false_negatives) > 0 else 0
-    f1_score = (2 * precision * recall / (precision + recall)) if (precision + recall) > 0 else 0
-    specificity = (true_negatives / (true_negatives + false_positives) * 100) if (true_negatives + false_positives) > 0 else 0
-    
-    random_baseline = 50.0
-    accuracy_above_random = accuracy - random_baseline
-    accuracy_improvement_ratio = (accuracy / random_baseline) if random_baseline > 0 else 0
-    
-    return {
-        "method": method_name,
-        "total": total,
-        "matched": matched,
-        "accuracy": accuracy,
-        "accuracy_above_random": accuracy_above_random,
-        "accuracy_improvement_ratio": accuracy_improvement_ratio,
-        "true_positives": true_positives,
-        "true_negatives": true_negatives,
-        "false_positives": false_positives,
-        "false_negatives": false_negatives,
-        "precision": precision,
-        "recall": recall,
-        "f1_score": f1_score,
-        "specificity": specificity
-    }
-
-
-
-
-async def main():
-    """主函数"""
-    logger.info("=" * 60)
-    logger.info("开始表达方式评估")
-    logger.info("=" * 60)
-    
-    # 初始化数据库连接
-    try:
-        db.connect(reuse_if_open=True)
-        logger.info("数据库连接成功")
-    except Exception as e:
-        logger.error(f"数据库连接失败: {e}")
-        return
-    
-    # 1. 随机读取表达方式
-    logger.info("\n步骤1: 随机读取表达方式")
-    expressions = get_random_expressions(10)
-    if not expressions:
-        logger.error("没有可用的表达方式，退出")
-        return
-    logger.info(f"成功读取 {len(expressions)} 条表达方式")
-    
-    # 2. 人工评估
-    print("\n" + "=" * 60)
-    print("开始人工评估")
-    print("=" * 60)
-    print(f"共需要评估 {len(expressions)} 条表达方式")
-    print("请逐条进行评估...\n")
-    
-    manual_results = []
-    for i, expression in enumerate(expressions, 1):
-        manual_result = manual_evaluate_expression(expression, i, len(expressions))
-        if manual_result is None:
-            print("\n评估已中断")
-            return
-        manual_results.append(manual_result)
-    
-    print("\n" + "=" * 60)
-    print("人工评估完成")
-    print("=" * 60)
-    
-    # 3. 创建LLM实例并评估
-    logger.info("\n步骤3: 创建LLM实例")
-    try:
-        llm = LLMRequest(
-            model_set=model_config.model_task_config.tool_use,
-            request_type="expression_evaluator_comparison"
-        )
-    except Exception as e:
-        logger.error(f"创建LLM实例失败: {e}")
-        import traceback
-        logger.error(traceback.format_exc())
-        return
-    
-    logger.info("\n步骤4: 开始LLM评估")
-    llm_results = []
-    for i, expression in enumerate(expressions, 1):
-        logger.info(f"LLM评估进度: {i}/{len(expressions)}")
-        llm_results.append(await evaluate_expression_llm(expression, llm))
-        await asyncio.sleep(0.3)
-    
-    # 4. 对比分析并输出结果
-    comparison = compare_evaluations(manual_results, llm_results, "LLM评估")
-    
-    print("\n" + "=" * 60)
-    print("评估结果（以人工评估为标准）")
-    print("=" * 60)
-    print("\n评估目标：")
-    print("  1. 核心能力：将不合适的项目正确提取出来（特定负类召回率）")
-    print("  2. 次要能力：尽可能少的误删合适的项目（召回率）")
-    
-    # 详细评估结果（核心指标优先）
-    print("\n【详细对比】")
-    print(f"\n--- {comparison['method']} ---")
-    print(f"  总数: {comparison['total']} 条")
-    print()
-    print("  【核心能力指标】")
-    print(f"  ⭐ 特定负类召回率: {comparison['specificity']:.2f}% (将不合适项目正确提取出来的能力)")
-    print(f"     - 计算: TN / (TN + FP) = {comparison['true_negatives']} / ({comparison['true_negatives']} + {comparison['false_positives']})")
-    print(f"     - 含义: 在 {comparison['true_negatives'] + comparison['false_positives']} 个实际不合适的项目中，正确识别出 {comparison['true_negatives']} 个")
-    print(f"     - 随机水平: 50.00% (当前高于随机: {comparison['specificity'] - 50.0:+.2f}%)")
-    print()
-    print(f"  ⭐ 召回率: {comparison['recall']:.2f}% (尽可能少的误删合适项目的能力)")
-    print(f"     - 计算: TP / (TP + FN) = {comparison['true_positives']} / ({comparison['true_positives']} + {comparison['false_negatives']})")
-    print(f"     - 含义: 在 {comparison['true_positives'] + comparison['false_negatives']} 个实际合适的项目中，正确识别出 {comparison['true_positives']} 个")
-    print(f"     - 随机水平: 50.00% (当前高于随机: {comparison['recall'] - 50.0:+.2f}%)")
-    print()
-    print("  【其他指标】")
-    print(f"  准确率: {comparison['accuracy']:.2f}% (整体判断正确率)")
-    print(f"  精确率: {comparison['precision']:.2f}% (判断为合适的项目中，实际合适的比例)")
-    print(f"  F1分数: {comparison['f1_score']:.2f} (精确率和召回率的调和平均)")
-    print(f"  匹配数: {comparison['matched']}/{comparison['total']}")
-    print()
-    print("  【分类统计】")
-    print(f"  TP (正确识别为合适): {comparison['true_positives']}")
-    print(f"  TN (正确识别为不合适): {comparison['true_negatives']} ⭐")
-    print(f"  FP (误判为合适): {comparison['false_positives']} ⚠️")
-    print(f"  FN (误删合适项目): {comparison['false_negatives']} ⚠️")
-    
-    # 5. 输出人工评估不通过但LLM误判为通过的详细信息
-    print("\n" + "=" * 60)
-    print("人工评估不通过但LLM误判为通过的项目（FP - False Positive）")
-    print("=" * 60)
-    
-    # 按expression_id建立映射
-    llm_dict = {r["expression_id"]: r for r in llm_results}
-    
-    fp_items = []
-    for manual_result in manual_results:
-        llm_result = llm_dict.get(manual_result["expression_id"])
-        if llm_result is None:
-            continue
-        
-        # 人工评估不通过，但LLM评估通过（FP情况）
-        if not manual_result["suitable"] and llm_result["suitable"]:
-            fp_items.append({
-                "expression_id": manual_result["expression_id"],
-                "situation": manual_result["situation"],
-                "style": manual_result["style"],
-                "manual_suitable": manual_result["suitable"],
-                "llm_suitable": llm_result["suitable"],
-                "llm_reason": llm_result.get("reason", "未提供理由"),
-                "llm_error": llm_result.get("error")
-            })
-    
-    if fp_items:
-        print(f"\n共找到 {len(fp_items)} 条误判项目：\n")
-        for idx, item in enumerate(fp_items, 1):
-            print(f"--- [{idx}] 项目 ID: {item['expression_id']} ---")
-            print(f"Situation: {item['situation']}")
-            print(f"Style: {item['style']}")
-            print("人工评估: 不通过 ❌")
-            print("LLM评估: 通过 ✅ (误判)")
-            if item.get('llm_error'):
-                print(f"LLM错误: {item['llm_error']}")
-            print(f"LLM理由: {item['llm_reason']}")
-            print()
-    else:
-        print("\n✓ 没有误判项目（所有人工评估不通过的项目都被LLM正确识别为不通过）")
-    
-    # 6. 保存结果到JSON文件
-    output_file = os.path.join(project_root, "data", "expression_evaluation_comparison.json")
-    try:
-        os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        with open(output_file, "w", encoding="utf-8") as f:
-            json.dump({
-                "manual_results": manual_results,
-                "llm_results": llm_results,
-                "comparison": comparison
-            }, f, ensure_ascii=False, indent=2)
-        logger.info(f"\n评估结果已保存到: {output_file}")
-    except Exception as e:
-        logger.warning(f"保存结果到文件失败: {e}")
-    
-    print("\n" + "=" * 60)
-    print("评估完成")
-    print("=" * 60)
-    
-    # 关闭数据库连接
-    try:
-        db.close()
-        logger.info("数据库连接已关闭")
-    except Exception as e:
-        logger.warning(f"关闭数据库连接时出错: {e}")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
--- a/src/memory_system/memory_retrieval.py
+++ b/src/memory_system/memory_retrieval.py
@ -1,7 +1,6 @@
 import time
 import json
 import asyncio
-import re
 from typing import List, Dict, Any, Optional, Tuple
 from src.common.logger import get_logger
 from src.config.config import global_config, model_config
@ -108,7 +107,7 @@ def init_memory_retrieval_prompt():
 - 你可以对查询思路给出简短的思考：思考要简短，直接切入要点
 - 先思考当前信息是否足够回答问题
 - 如果信息不足，则需要使用tool查询信息，你必须给出使用什么工具进行查询
- 如果当前已收集的信息足够或信息不足确定无法找到答案，你必须调用finish_search工具结束查询
+- 如果当前已收集的信息足够或信息不足确定无法找到答案，你必须调用found_answer工具结束查询
 """,
        name="memory_retrieval_react_prompt_head",
    )
@ -312,7 +311,7 @@ async def _react_agent_solve_question(

            return None

-        # 正常迭代：使用head_prompt决定调用哪些工具（包含finish_search工具）
+        # 正常迭代：使用head_prompt决定调用哪些工具（包含found_answer工具）
        tool_definitions = tool_registry.get_tool_definitions()
        # tool_names = [tool_def["name"] for tool_def in tool_definitions]
        # logger.debug(f"ReAct Agent 第 {iteration + 1} 次迭代，问题: {question}|可用工具: {', '.join(tool_names)} (共{len(tool_definitions)}个)")
@ -373,7 +372,7 @@ async def _react_agent_solve_question(
            logger.error(f"ReAct Agent LLM调用失败: {response}")
            break

-        # 注意：这里会检查finish_search工具调用，如果检测到finish_search工具，会根据found_answer参数决定返回答案或退出查询
+        # 注意：这里会检查found_answer工具调用，如果检测到found_answer工具，会根据answer参数决定返回答案或退出查询

        assistant_message: Optional[Message] = None
        if tool_calls:
@ -403,115 +402,146 @@ async def _react_agent_solve_question(

        # 处理工具调用
        if not tool_calls:
-            # 如果没有工具调用，检查响应文本中是否包含finish_search函数调用格式
+            # 如果没有工具调用，检查响应文本中是否包含found_answer函数调用格式或JSON格式
            if response and response.strip():
-                # 尝试从文本中解析finish_search函数调用
-                def parse_finish_search_from_text(text: str):
-                    """从文本中解析finish_search函数调用，返回(found_answer, answer)元组，如果未找到则返回(None, None)"""
+                # 首先尝试解析JSON格式的found_answer
+                def parse_json_found_answer(text: str):
+                    """从文本中解析JSON格式的found_answer，返回(found_answer, answer)元组，如果未找到则返回(None, None)"""
                    if not text:
                        return None, None
+                    
+                    try:
+                        # 尝试提取JSON对象（可能包含在代码块中或直接是JSON）
+                        json_text = text.strip()
+                        
+                        # 如果包含代码块标记，提取JSON部分
+                        if "```json" in json_text:
+                            start = json_text.find("```json") + 7
+                            end = json_text.find("```", start)
+                            if end != -1:
+                                json_text = json_text[start:end].strip()
+                        elif "```" in json_text:
+                            start = json_text.find("```") + 3
+                            end = json_text.find("```", start)
+                            if end != -1:
+                                json_text = json_text[start:end].strip()
+                        
+                        # 尝试解析JSON
+                        data = json.loads(json_text)
+                        
+                        # 检查是否包含found_answer字段
+                        if isinstance(data, dict) and "found_answer" in data:
+                            found_answer = bool(data.get("found_answer", False))
+                            answer = data.get("answer", "")
+                            return found_answer, answer
+                    except (json.JSONDecodeError, ValueError, TypeError):
+                        # 如果JSON解析失败，尝试在文本中查找JSON对象
+                        try:
+                            # 查找第一个 { 和最后一个 } 之间的内容（更健壮的JSON提取）
+                            first_brace = text.find('{')
+                            if first_brace != -1:
+                                # 从第一个 { 开始，找到匹配的 }
+                                brace_count = 0
+                                json_end = -1
+                                for i in range(first_brace, len(text)):
+                                    if text[i] == '{':
+                                        brace_count += 1
+                                    elif text[i] == '}':
+                                        brace_count -= 1
+                                        if brace_count == 0:
+                                            json_end = i + 1
+                                            break
+                                
+                                if json_end != -1:
+                                    json_text = text[first_brace:json_end]
+                                    data = json.loads(json_text)
+                                    if isinstance(data, dict) and "found_answer" in data:
+                                        found_answer = bool(data.get("found_answer", False))
+                                        answer = data.get("answer", "")
+                                        return found_answer, answer
+                        except (json.JSONDecodeError, ValueError, TypeError):
+                            pass
+                    
+                    return None, None
+                
+                # 尝试从文本中解析found_answer函数调用
+                def parse_found_answer_from_text(text: str):
+                    """从文本中解析found_answer函数调用，返回answer字符串，如果未找到则返回None
+                    如果answer存在且非空，表示找到答案；如果answer为空或不存在，表示未找到答案"""
+                    if not text:
+                        return None

-                    # 查找finish_search函数调用位置（不区分大小写）
-                    func_pattern = "finish_search"
+                    # 查找found_answer函数调用位置（不区分大小写）
+                    func_pattern = "found_answer"
                    text_lower = text.lower()
                    func_pos = text_lower.find(func_pattern)
                    if func_pos == -1:
-                        return None, None
-
-                    # 查找函数调用的开始和结束位置
-                    # 从func_pos开始向后查找左括号
-                    start_pos = text.find("(", func_pos)
-                    if start_pos == -1:
-                        return None, None
-
-                    # 查找匹配的右括号（考虑嵌套）
-                    paren_count = 0
-                    end_pos = start_pos
-                    for i in range(start_pos, len(text)):
-                        if text[i] == "(":
-                            paren_count += 1
-                        elif text[i] == ")":
-                            paren_count -= 1
-                            if paren_count == 0:
-                                end_pos = i
-                                break
-                    else:
-                        # 没有找到匹配的右括号
-                        return None, None
-
-                    # 提取函数参数部分
-                    params_text = text[start_pos + 1 : end_pos]
-
-                    # 解析found_answer参数（布尔值，可能是true/false/True/False）
-                    found_answer = None
-                    found_answer_patterns = [
-                        r"found_answer\s*=\s*true",
-                        r"found_answer\s*=\s*True",
-                        r"found_answer\s*=\s*false",
-                        r"found_answer\s*=\s*False",
-                    ]
-                    for pattern in found_answer_patterns:
-                        match = re.search(pattern, params_text, re.IGNORECASE)
-                        if match:
-                            found_answer = "true" in match.group(0).lower()
-                            break
+                        return None

                    # 解析answer参数（字符串，使用extract_quoted_content）
-                    answer = extract_quoted_content(text, "finish_search", "answer")
+                    answer = extract_quoted_content(text, "found_answer", "answer")
+                    
+                    # 如果answer存在（即使是空字符串），也返回它（空字符串表示未找到答案）
+                    return answer

-                    return found_answer, answer
+                # 首先尝试解析JSON格式
+                parsed_found_answer_json, parsed_answer_json = parse_json_found_answer(response)
+                is_json_format = parsed_found_answer_json is not None
+                
+                # 如果JSON解析成功，使用JSON结果
+                if is_json_format:
+                    parsed_answer = parsed_answer_json
+                    has_answer = parsed_found_answer_json and parsed_answer and parsed_answer.strip()
+                else:
+                    # 如果JSON解析失败，尝试解析函数调用格式
+                    parsed_answer = parse_found_answer_from_text(response)
+                    # 如果answer存在且非空，表示找到答案；否则表示未找到答案
+                    has_answer = parsed_answer is not None and parsed_answer.strip() != ""

-                parsed_found_answer, parsed_answer = parse_finish_search_from_text(response)
-
-                if parsed_found_answer is not None:
-                    # 检测到finish_search函数调用格式
-                    if parsed_found_answer:
+                if parsed_answer is not None or is_json_format:
+                    # 检测到found_answer格式（可能是JSON格式或函数调用格式）
+                    format_type = "JSON格式" if is_json_format else "函数调用格式"
+                    if has_answer:
                        # 找到了答案
-                        if parsed_answer:
-                            step["actions"].append(
-                                {
-                                    "action_type": "finish_search",
-                                    "action_params": {"found_answer": True, "answer": parsed_answer},
-                                }
-                            )
-                            step["observations"] = ["检测到finish_search文本格式调用，找到答案"]
-                            thinking_steps.append(step)
-                            logger.info(
-                                f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search文本格式找到关于问题{question}的答案: {parsed_answer}"
-                            )
-
-                            _log_conversation_messages(
-                                conversation_messages,
-                                head_prompt=first_head_prompt,
-                                final_status=f"找到答案：{parsed_answer}",
-                            )
-
-                            return True, parsed_answer, thinking_steps, False
-                        else:
-                            # found_answer为True但没有提供answer，视为错误，继续迭代
-                            logger.warning(
-                                f"{react_log_prefix}第 {iteration + 1} 次迭代 finish_search文本格式found_answer为True但未提供answer"
-                            )
-                    else:
-                        # 未找到答案，直接退出查询
                        step["actions"].append(
-                            {"action_type": "finish_search", "action_params": {"found_answer": False}}
+                            {
+                                "action_type": "found_answer",
+                                "action_params": {"answer": parsed_answer},
+                            }
                        )
-                        step["observations"] = ["检测到finish_search文本格式调用，未找到答案"]
+                        step["observations"] = [f"检测到found_answer{format_type}调用，找到答案"]
                        thinking_steps.append(step)
                        logger.info(
-                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search文本格式判断未找到答案"
+                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过found_answer{format_type}找到关于问题{question}的答案: {parsed_answer[:100]}..."
                        )

                        _log_conversation_messages(
                            conversation_messages,
                            head_prompt=first_head_prompt,
-                            final_status="未找到答案：通过finish_search文本格式判断未找到答案",
+                            final_status=f"找到答案：{parsed_answer}",
+                        )
+
+                        return True, parsed_answer, thinking_steps, False
+                    else:
+                        # 未找到答案，直接退出查询
+                        step["actions"].append(
+                            {"action_type": "found_answer", "action_params": {"answer": ""}}
+                        )
+                        step["observations"] = [f"检测到found_answer{format_type}调用，未找到答案"]
+                        thinking_steps.append(step)
+                        logger.info(
+                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过found_answer{format_type}判断未找到答案"
+                        )
+
+                        _log_conversation_messages(
+                            conversation_messages,
+                            head_prompt=first_head_prompt,
+                            final_status="未找到答案：通过found_answer文本格式判断未找到答案",
                        )

                        return False, "", thinking_steps, False

-                # 如果没有检测到finish_search格式，记录思考过程，继续下一轮迭代
+                # 如果没有检测到found_answer格式，记录思考过程，继续下一轮迭代
                step["observations"] = [f"思考完成，但未调用工具。响应: {response}"]
                logger.info(
                    f"{react_log_prefix}第 {iteration + 1} 次迭代 思考完成但未调用工具: {response}"
@ -525,62 +555,55 @@ async def _react_agent_solve_question(
            continue

        # 处理工具调用
-        # 首先检查是否有finish_search工具调用，如果有则立即返回，不再处理其他工具
-        finish_search_found = None
-        finish_search_answer = None
+        # 首先检查是否有found_answer工具调用，如果有则立即返回，不再处理其他工具
+        found_answer_answer = None
        for tool_call in tool_calls:
            tool_name = tool_call.func_name
            tool_args = tool_call.args or {}

-            if tool_name == "finish_search":
-                finish_search_found = tool_args.get("found_answer", False)
-                finish_search_answer = tool_args.get("answer", "")
+            if tool_name == "found_answer":
+                found_answer_answer = tool_args.get("answer", "")

-                if finish_search_found:
+                # 如果answer存在且非空，表示找到答案；否则表示未找到答案
+                if found_answer_answer and found_answer_answer.strip():
                    # 找到了答案
-                    if finish_search_answer:
-                        step["actions"].append(
-                            {
-                                "action_type": "finish_search",
-                                "action_params": {"found_answer": True, "answer": finish_search_answer},
-                            }
-                        )
-                        step["observations"] = ["检测到finish_search工具调用，找到答案"]
-                        thinking_steps.append(step)
-                        logger.info(
-                            f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search工具找到关于问题{question}的答案: {finish_search_answer}"
-                        )
-
-                        _log_conversation_messages(
-                            conversation_messages,
-                            head_prompt=first_head_prompt,
-                            final_status=f"找到答案：{finish_search_answer}",
-                        )
-
-                        return True, finish_search_answer, thinking_steps, False
-                    else:
-                        # found_answer为True但没有提供answer，视为错误
-                        logger.warning(
-                            f"{react_log_prefix}第 {iteration + 1} 次迭代 finish_search工具found_answer为True但未提供answer"
-                        )
-                else:
-                    # 未找到答案，直接退出查询
-                    step["actions"].append({"action_type": "finish_search", "action_params": {"found_answer": False}})
-                    step["observations"] = ["检测到finish_search工具调用，未找到答案"]
+                    step["actions"].append(
+                        {
+                            "action_type": "found_answer",
+                            "action_params": {"answer": found_answer_answer},
+                        }
+                    )
+                    step["observations"] = ["检测到found_answer工具调用，找到答案"]
                    thinking_steps.append(step)
                    logger.info(
-                        f"{react_log_prefix}第 {iteration + 1} 次迭代 通过finish_search工具判断未找到答案"
+                        f"{react_log_prefix}第 {iteration + 1} 次迭代 通过found_answer工具找到关于问题{question}的答案: {found_answer_answer}"
                    )

                    _log_conversation_messages(
                        conversation_messages,
                        head_prompt=first_head_prompt,
-                        final_status="未找到答案：通过finish_search工具判断未找到答案",
+                        final_status=f"找到答案：{found_answer_answer}",
+                    )
+
+                    return True, found_answer_answer, thinking_steps, False
+                else:
+                    # 未找到答案，直接退出查询
+                    step["actions"].append({"action_type": "found_answer", "action_params": {"answer": ""}})
+                    step["observations"] = ["检测到found_answer工具调用，未找到答案"]
+                    thinking_steps.append(step)
+                    logger.info(
+                        f"{react_log_prefix}第 {iteration + 1} 次迭代 通过found_answer工具判断未找到答案"
+                    )
+
+                    _log_conversation_messages(
+                        conversation_messages,
+                        head_prompt=first_head_prompt,
+                        final_status="未找到答案：通过found_answer工具判断未找到答案",
                    )

                    return False, "", thinking_steps, False

-        # 如果没有finish_search工具调用，继续处理其他工具
+        # 如果没有found_answer工具调用，继续处理其他工具
        tool_tasks = []
        for i, tool_call in enumerate(tool_calls):
            tool_name = tool_call.func_name
@ -590,8 +613,8 @@ async def _react_agent_solve_question(
                f"{react_log_prefix}第 {iteration + 1} 次迭代 工具调用 {i + 1}/{len(tool_calls)}: {tool_name}({tool_args})"
            )

-            # 跳过finish_search工具调用（已经在上面处理过了）
-            if tool_name == "finish_search":
+            # 跳过found_answer工具调用（已经在上面处理过了）
+            if tool_name == "found_answer":
                continue

            # 记录最后一次使用的工具名称（用于判断是否需要额外迭代）
--- a/src/memory_system/retrieval_tools/init.py
+++ b/src/memory_system/retrieval_tools/init.py
@ -15,7 +15,7 @@ from .query_chat_history import register_tool as register_query_chat_history
 from .query_lpmm_knowledge import register_tool as register_lpmm_knowledge
 from .query_person_info import register_tool as register_query_person_info
 from .query_words import register_tool as register_query_words
-from .found_answer import register_tool as register_finish_search
+from .found_answer import register_tool as register_found_answer
 from src.config.config import global_config


@ -24,7 +24,7 @@ def init_all_tools():
    register_query_chat_history()
    register_query_person_info()
    register_query_words()  # 注册query_words工具
-    register_finish_search()  # 注册finish_search工具
+    register_found_answer()  # 注册found_answer工具

    if global_config.lpmm_knowledge.lpmm_mode == "agent":
        register_lpmm_knowledge()
--- a/src/memory_system/retrieval_tools/found_answer.py
+++ b/src/memory_system/retrieval_tools/found_answer.py
@ -1,5 +1,5 @@
 """
-finish_search工具 - 用于在记忆检索过程中结束查询
+found_answer工具 - 用于在记忆检索过程中结束查询
 """

 from src.common.logger import get_logger
@ -8,17 +8,16 @@ from .tool_registry import register_memory_retrieval_tool
 logger = get_logger("memory_retrieval_tools")


-async def finish_search(found_answer: bool, answer: str = "") -> str:
+async def found_answer(answer: str = "") -> str:
    """结束查询

    Args:
-        found_answer: 是否找到了答案
-        answer: 如果找到了答案，提供答案内容；如果未找到，可以为空
+        answer: 如果找到了答案，提供答案内容；如果未找到答案，可以为空或不提供此参数

    Returns:
        str: 确认信息
    """
-    if found_answer:
+    if answer and answer.strip():
        logger.info(f"找到答案: {answer}")
        return f"已确认找到答案: {answer}"
    else:
@ -27,23 +26,17 @@ async def finish_search(found_answer: bool, answer: str = "") -> str:


 def register_tool():
-    """注册finish_search工具"""
+    """注册found_answer工具"""
    register_memory_retrieval_tool(
-        name="finish_search",
-        description="当你决定结束查询时，调用此工具。如果找到了明确答案，设置found_answer为true并在answer中提供答案；如果未找到答案，设置found_answer为false。只有在检索到明确、具体的答案时才设置found_answer为true，不要编造信息。",
+        name="found_answer",
+        description="当你决定结束查询时，调用此工具。如果找到了明确答案，在answer参数中提供答案内容；如果未找到答案，可以不提供answer参数或提供空字符串。只有在检索到明确、具体的答案时才提供answer，不要编造信息。",
        parameters=[
-            {
-                "name": "found_answer",
-                "type": "boolean",
-                "description": "是否找到了答案",
-                "required": True,
-            },
            {
                "name": "answer",
                "type": "string",
-                "description": "如果found_answer为true，提供找到的答案内容，必须基于已收集的信息，不要编造；如果found_answer为false，可以为空",
+                "description": "如果找到了答案，提供找到的答案内容，必须基于已收集的信息，不要编造；如果未找到答案，可以不提供此参数或提供空字符串",
                "required": False,
            },
        ],
-        execute_func=finish_search,
+        execute_func=found_answer,
    )