From 10cd2474affda9ce1f2901ee3511a149b612fa99 Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Mon, 10 Nov 2025 01:13:02 +0800 Subject: [PATCH] =?UTF-8?q?better=EF=BC=9A=E4=BC=98=E5=8C=96=E9=94=99?= =?UTF-8?q?=E5=88=AB=E5=AD=97=E7=94=9F=E6=88=90=E5=92=8C=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/utils/utils.py | 17 +++-- src/memory_system/memory_retrieval.py | 101 +++++++++++++++++--------- 2 files changed, 77 insertions(+), 41 deletions(-) diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 823e3cd3..d85f8143 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -221,13 +221,13 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]: while i < len(text): char = text[i] if char in separators: - # 检查分割条件:如果分隔符左右都是英文字母,则不分割 + # 检查分割条件:如果空格左右都是英文字母,则不分割(仅对空格应用此规则) can_split = True if 0 < i < len(text) - 1: prev_char = text[i - 1] next_char = text[i + 1] - # if is_english_letter(prev_char) and is_english_letter(next_char) and char == ' ': # 原计划只对空格应用此规则,现应用于所有分隔符 - if is_english_letter(prev_char) and is_english_letter(next_char): + # 只对空格应用"不分割两个英文之间的空格"规则 + if char == ' ' and is_english_letter(prev_char) and is_english_letter(next_char): can_split = False if can_split: @@ -388,9 +388,16 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese for sentence in split_sentences: if global_config.chinese_typo.enable and enable_chinese_typo: typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence) - sentences.append(typoed_text) if typo_corrections: - sentences.append(typo_corrections) + # 50%概率新增正确字/词,50%概率用正确分句替换错别字分句 + if random.random() < 0.5: + sentences.append(typoed_text) + sentences.append(typo_corrections) + else: + # 用正确的分句替换错别字分句 + sentences.append(sentence) + else: + sentences.append(typoed_text) else: sentences.append(sentence) diff --git a/src/memory_system/memory_retrieval.py b/src/memory_system/memory_retrieval.py index ac7eb4bc..fd7a2daf 100644 --- a/src/memory_system/memory_retrieval.py +++ b/src/memory_system/memory_retrieval.py @@ -1,6 +1,7 @@ import time import json import re +import random from typing import List, Dict, Any, Optional, Tuple from src.common.logger import get_logger from src.config.config import global_config, model_config @@ -63,8 +64,7 @@ def init_memory_retrieval_prompt(): # 第二步:ReAct Agent prompt(工具描述会在运行时动态生成) Prompt( - """ -你是一个记忆检索助手,需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。 + """你需要通过思考(Think)、行动(Action)、观察(Observation)的循环来回答问题。 当前问题:{question} 已收集的信息: @@ -77,14 +77,13 @@ def init_memory_retrieval_prompt(): ```json {{ "thought": "你的思考过程,分析当前情况,决定下一步行动", - "action": "要执行的动作,格式为:工具名(参数)", "action_type": {action_types_list}, "action_params": {{参数名: 参数值}} 或 null }} ``` 你可以选择以下动作: -1. 如果已经收集到足够的信息可以回答问题,请设置action_type为"final_answer",并在thought中说明答案。 +1. 如果已经收集到足够的信息可以回答问题,请设置action_type为"final_answer",并在thought中说明答案。除非明确找到答案,否则不要设置为final_answer。 2. 如果经过多次查询后,确认无法找到相关信息或答案,请设置action_type为"no_answer",并在thought中说明原因。 请只输出JSON,不要输出其他内容: @@ -341,17 +340,18 @@ def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, st question: 问题 Returns: - Optional[Tuple[bool, str]]: 如果找到答案,返回(True, answer),否则返回None + Optional[Tuple[bool, str]]: 如果找到记录,返回(found_answer, answer),否则返回None + found_answer: 是否找到答案(True表示found_answer=1,False表示found_answer=0) + answer: 答案内容 """ try: - # 查询相同chat_id和问题,且found_answer为True的记录 - # 按更新时间倒序,获取最新的答案 + # 查询相同chat_id和问题的所有记录(包括found_answer为0和1的) + # 按更新时间倒序,获取最新的记录 records = ( ThinkingBack.select() .where( (ThinkingBack.chat_id == chat_id) & - (ThinkingBack.question == question) & - (ThinkingBack.found_answer == 1) + (ThinkingBack.question == question) ) .order_by(ThinkingBack.update_time.desc()) .limit(1) @@ -359,8 +359,10 @@ def _query_thinking_back(chat_id: str, question: str) -> Optional[Tuple[bool, st if records.exists(): record = records.get() - logger.info(f"在thinking_back中找到现成答案,问题: {question[:50]}...") - return True, record.answer or "" + found_answer = bool(record.found_answer) + answer = record.answer or "" + logger.info(f"在thinking_back中找到记录,问题: {question[:50]}...,found_answer: {found_answer}") + return found_answer, answer return None @@ -503,34 +505,61 @@ async def build_memory_retrieval_prompt( # 先检查thinking_back数据库中是否有现成答案 cached_result = _query_thinking_back(chat_id, question) + should_requery = False + if cached_result: - found_answer, answer = cached_result + cached_found_answer, cached_answer = cached_result + + # 根据found_answer的值决定是否重新查询 + if cached_found_answer: # found_answer == 1 (True) + # found_answer == 1:20%概率重新查询 + if random.random() < 0.2: + should_requery = True + logger.info(f"found_answer=1,触发20%概率重新查询,问题: {question[:50]}...") + else: + # 使用缓存答案 + if cached_answer: + logger.info(f"从thinking_back缓存中获取答案(found_answer=1),问题: {question[:50]}...") + all_results.append(f"问题:{question}\n答案:{cached_answer}") + continue # 跳过ReAct Agent查询 + else: # found_answer == 0 (False) + # found_answer == 0:40%概率重新查询 + if random.random() < 0.4: + should_requery = True + logger.info(f"found_answer=0,触发40%概率重新查询,问题: {question[:50]}...") + else: + # 使用缓存答案(即使found_answer=0,也可能有部分答案) + if cached_answer: + logger.info(f"从thinking_back缓存中获取答案(found_answer=0),问题: {question[:50]}...") + all_results.append(f"问题:{question}\n答案:{cached_answer}") + continue # 跳过ReAct Agent查询 + + # 如果没有缓存答案或需要重新查询,使用ReAct Agent查询 + if not cached_result or should_requery: + if should_requery: + logger.info(f"概率触发重新查询,使用ReAct Agent查询,问题: {question[:50]}...") + else: + logger.info(f"未找到缓存答案,使用ReAct Agent查询,问题: {question[:50]}...") + + found_answer, answer, thinking_steps = await _react_agent_solve_question( + question=question, + chat_id=chat_id, + max_iterations=5, + timeout=30.0 + ) + + # 存储到数据库 + _store_thinking_back( + chat_id=chat_id, + question=question, + context=message, # 只存储前500字符作为上下文 + found_answer=found_answer, + answer=answer, + thinking_steps=thinking_steps + ) + if found_answer and answer: - logger.info(f"从thinking_back缓存中获取答案,问题: {question[:50]}...") all_results.append(f"问题:{question}\n答案:{answer}") - continue # 跳过ReAct Agent查询 - - # 如果没有缓存答案,使用ReAct Agent查询 - logger.info(f"未找到缓存答案,使用ReAct Agent查询,问题: {question[:50]}...") - found_answer, answer, thinking_steps = await _react_agent_solve_question( - question=question, - chat_id=chat_id, - max_iterations=5, - timeout=30.0 - ) - - # 存储到数据库 - _store_thinking_back( - chat_id=chat_id, - question=question, - context=message, # 只存储前500字符作为上下文 - found_answer=found_answer, - answer=answer, - thinking_steps=thinking_steps - ) - - if found_answer and answer: - all_results.append(f"问题:{question}\n答案:{answer}") end_time = time.time()