From 10cd2474affda9ce1f2901ee3511a149b612fa99 Mon Sep 17 00:00:00 2001
From: SengokuCola <1026294844@qq.com>
Date: Mon, 10 Nov 2025 01:13:02 +0800
Subject: [PATCH] =?UTF-8?q?better=EF=BC=9A=E4=BC=98=E5=8C=96=E9=94=99?=
 =?UTF-8?q?=E5=88=AB=E5=AD=97=E7=94=9F=E6=88=90=E5=92=8C=E5=88=86=E6=AE=B5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/chat/utils/utils.py               |  17 +++--
 src/memory_system/memory_retrieval.py | 101 +++++++++++++++++---------
 2 files changed, 77 insertions(+), 41 deletions(-)

diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py
index 823e3cd3..d85f8143 100644
--- a/src/chat/utils/utils.py
+++ b/src/chat/utils/utils.py
@@ -221,13 +221,13 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]:
     while i < len(text):
         char = text[i]
         if char in separators:
-            # 检查分割条件：如果分隔符左右都是英文字母，则不分割
+            # 检查分割条件：如果空格左右都是英文字母，则不分割（仅对空格应用此规则）
             can_split = True
             if 0 < i < len(text) - 1:
                 prev_char = text[i - 1]
                 next_char = text[i + 1]
-                # if is_english_letter(prev_char) and is_english_letter(next_char) and char == ' ': # 原计划只对空格应用此规则，现应用于所有分隔符
-                if is_english_letter(prev_char) and is_english_letter(next_char):
+                # 只对空格应用"不分割两个英文之间的空格"规则
+                if char == ' ' and is_english_letter(prev_char) and is_english_letter(next_char):
                     can_split = False
 
             if can_split:
@@ -388,9 +388,16 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese
     for sentence in split_sentences:
         if global_config.chinese_typo.enable and enable_chinese_typo:
             typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
-            sentences.append(typoed_text)
             if typo_corrections:
-                sentences.append(typo_corrections)
+                # 50%概率新增正确字/词，50%概率用正确分句替换错别字分句
+                if random.random() < 0.5:
+                    sentences.append(typoed_text)
+                    sentences.append(typo_corrections)
+                else:
+                    # 用正确的分句替换错别字分句
+                    sentences.append(sentence)
+            else:
+                sentences.append(typoed_text)
         else:
             sentences.append(sentence)
 
diff --git a/src/memory_system/memory_retrieval.py b/src/memory_system/memory_retrieval.py
index ac7eb4bc..fd7a2daf 100644
--- a/src/memory_system/memory_retrieval.py
+++ b/src/memory_system/memory_retrieval.py
@@ -1,6 +1,7 @@
 import time
 import json
 import re
+import random
 from typing import List, Dict, Any, Optional, Tuple
 from src.common.logger import get_logger
 from src.config.config import global_config, model_config
@@ -63,8 +64,7 @@ def init_memory_retrieval_prompt():
     
     # 第二步：ReAct Agent prompt（工具描述会在运行时动态生成）
     Prompt(
-        """
-你是一个记忆检索助手，需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。
+        """你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。
 
 当前问题：{question}
 已收集的信息：
@@ -77,14 +77,13 @@ def init_memory_retrieval_prompt():
 ```json
 {{
   "thought": "你的思考过程，分析当前情况，决定下一步行动",
-  "action": "要执行的动作，格式为：工具名(参数)",
   "action_type": {action_types_list},
   "action_params": {{参数名: 参数值}} 或 null
 }}
 ```
 
 你可以选择以下动作：
-1. 如果已经收集到足够的信息可以回答问题，请设置action_type为"final_answer"，并在thought中说明答案。
+1. 如果已经收集到足够的信息可以回答问题，请设置action_type为"final_answer"，并在thought中说明答案。除非明确找到答案，否则不要设置为final_answer。
 2. 如果经过多次查询后，确认无法找到相关信息或答案，请设置action_type为"no_answer"，并在thought中说明原因。
 
 请只输出JSON，不要输出其他内容：
@@ -341,17 +340,18 @@ def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, st
         question: 问题
         
     Returns:
-        Optional[Tuple[bool, str]]: 如果找到答案，返回(True, answer)，否则返回None
+        Optional[Tuple[bool, str]]: 如果找到记录，返回(found_answer, answer)，否则返回None
+            found_answer: 是否找到答案（True表示found_answer=1，False表示found_answer=0）
+            answer: 答案内容
     """
     try:
-        # 查询相同chat_id和问题，且found_answer为True的记录
-        # 按更新时间倒序，获取最新的答案
+        # 查询相同chat_id和问题的所有记录（包括found_answer为0和1的）
+        # 按更新时间倒序，获取最新的记录
         records = (
             ThinkingBack.select()
             .where(
                 (ThinkingBack.chat_id == chat_id) &
-                (ThinkingBack.question == question) &
-                (ThinkingBack.found_answer == 1)
+                (ThinkingBack.question == question)
             )
             .order_by(ThinkingBack.update_time.desc())
             .limit(1)
@@ -359,8 +359,10 @@ def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, st
         
         if records.exists():
             record = records.get()
-            logger.info(f"在thinking_back中找到现成答案，问题: {question[:50]}...")
-            return True, record.answer or ""
+            found_answer = bool(record.found_answer)
+            answer = record.answer or ""
+            logger.info(f"在thinking_back中找到记录，问题: {question[:50]}...，found_answer: {found_answer}")
+            return found_answer, answer
         
         return None
         
@@ -503,34 +505,61 @@ async def build_memory_retrieval_prompt(
             
             # 先检查thinking_back数据库中是否有现成答案
             cached_result = _query_thinking_back(chat_id, question)
+            should_requery = False
+            
             if cached_result:
-                found_answer, answer = cached_result
+                cached_found_answer, cached_answer = cached_result
+                
+                # 根据found_answer的值决定是否重新查询
+                if cached_found_answer:  # found_answer == 1 (True)
+                    # found_answer == 1：20%概率重新查询
+                    if random.random() < 0.2:
+                        should_requery = True
+                        logger.info(f"found_answer=1，触发20%概率重新查询，问题: {question[:50]}...")
+                    else:
+                        # 使用缓存答案
+                        if cached_answer:
+                            logger.info(f"从thinking_back缓存中获取答案（found_answer=1），问题: {question[:50]}...")
+                            all_results.append(f"问题：{question}\n答案：{cached_answer}")
+                            continue  # 跳过ReAct Agent查询
+                else:  # found_answer == 0 (False)
+                    # found_answer == 0：40%概率重新查询
+                    if random.random() < 0.4:
+                        should_requery = True
+                        logger.info(f"found_answer=0，触发40%概率重新查询，问题: {question[:50]}...")
+                    else:
+                        # 使用缓存答案（即使found_answer=0，也可能有部分答案）
+                        if cached_answer:
+                            logger.info(f"从thinking_back缓存中获取答案（found_answer=0），问题: {question[:50]}...")
+                            all_results.append(f"问题：{question}\n答案：{cached_answer}")
+                            continue  # 跳过ReAct Agent查询
+            
+            # 如果没有缓存答案或需要重新查询，使用ReAct Agent查询
+            if not cached_result or should_requery:
+                if should_requery:
+                    logger.info(f"概率触发重新查询，使用ReAct Agent查询，问题: {question[:50]}...")
+                else:
+                    logger.info(f"未找到缓存答案，使用ReAct Agent查询，问题: {question[:50]}...")
+                
+                found_answer, answer, thinking_steps = await _react_agent_solve_question(
+                    question=question,
+                    chat_id=chat_id,
+                    max_iterations=5,
+                    timeout=30.0
+                )
+                
+                # 存储到数据库
+                _store_thinking_back(
+                    chat_id=chat_id,
+                    question=question,
+                    context=message,  # 只存储前500字符作为上下文
+                    found_answer=found_answer,
+                    answer=answer,
+                    thinking_steps=thinking_steps
+                )
+                
                 if found_answer and answer:
-                    logger.info(f"从thinking_back缓存中获取答案，问题: {question[:50]}...")
                     all_results.append(f"问题：{question}\n答案：{answer}")
-                    continue  # 跳过ReAct Agent查询
-            
-            # 如果没有缓存答案，使用ReAct Agent查询
-            logger.info(f"未找到缓存答案，使用ReAct Agent查询，问题: {question[:50]}...")
-            found_answer, answer, thinking_steps = await _react_agent_solve_question(
-                question=question,
-                chat_id=chat_id,
-                max_iterations=5,
-                timeout=30.0
-            )
-            
-            # 存储到数据库
-            _store_thinking_back(
-                chat_id=chat_id,
-                question=question,
-                context=message,  # 只存储前500字符作为上下文
-                found_answer=found_answer,
-                answer=answer,
-                thinking_steps=thinking_steps
-            )
-            
-            if found_answer and answer:
-                all_results.append(f"问题：{question}\n答案：{answer}")
         
         end_time = time.time()