mirror of https://github.com/Mai-with-u/MaiBot.git
🤖 自动格式化代码 [skip ci]
parent
1e015aed9f
commit
1d3ba474bf
|
|
@ -2,9 +2,9 @@ from .observation import ChattingObservation
|
|||
from src.plugins.knowledge.knowledge_lib import qa_manager
|
||||
from src.plugins.models.utils_model import LLMRequest
|
||||
from src.config.config import global_config
|
||||
from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat,build_readable_messages
|
||||
from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, build_readable_messages
|
||||
import time
|
||||
import re
|
||||
import re
|
||||
import traceback
|
||||
from src.common.logger_manager import get_logger
|
||||
from src.individuality.individuality import Individuality
|
||||
|
|
@ -129,7 +129,7 @@ def parse_knowledge_and_get_max_relevance(knowledge_str: str) -> (str, float):
|
|||
max_relevance = max(float(score) for score in relevance_scores)
|
||||
except ValueError:
|
||||
logger.warning(f"解析相关性得分时出错: {relevance_scores}")
|
||||
return knowledge_str, 0.0 # 出错时返回0.0
|
||||
return knowledge_str, 0.0 # 出错时返回0.0
|
||||
else:
|
||||
# 如果没有找到 "该条知识对于问题的相关性:" 这样的模式,
|
||||
# 说明可能 qa_manager 返回的格式有变,或者没有有效的知识。
|
||||
|
|
@ -172,8 +172,6 @@ def calculate_replacement_probability(similarity: float) -> float:
|
|||
# p = s + 0.1
|
||||
probability = similarity + 0.1
|
||||
return min(1.0, max(0.0, probability))
|
||||
|
||||
|
||||
|
||||
|
||||
class SubMind:
|
||||
|
|
@ -199,12 +197,12 @@ class SubMind:
|
|||
name = chat_manager.get_stream_name(self.subheartflow_id)
|
||||
self.log_prefix = f"[{name}] "
|
||||
self._update_structured_info_str()
|
||||
# 阶梯式筛选
|
||||
# 阶梯式筛选
|
||||
self.knowledge_retrieval_steps = self.knowledge_retrieval_steps = [
|
||||
{"name": "latest_1_msg", "limit": 1, "relevance_threshold": 0.75}, # 新增:最新1条,极高阈值
|
||||
{"name": "latest_2_msgs", "limit": 2, "relevance_threshold": 0.65}, # 新增:最新2条,较高阈值
|
||||
{"name": "short_window_3_msgs", "limit": 3, "relevance_threshold": 0.50}, # 原有的3条,阈值可保持或微调
|
||||
{"name": "medium_window_8_msgs", "limit": 8, "relevance_threshold": 0.30}, # 原有的8条,阈值可保持或微调
|
||||
{"name": "latest_1_msg", "limit": 1, "relevance_threshold": 0.75}, # 新增:最新1条,极高阈值
|
||||
{"name": "latest_2_msgs", "limit": 2, "relevance_threshold": 0.65}, # 新增:最新2条,较高阈值
|
||||
{"name": "short_window_3_msgs", "limit": 3, "relevance_threshold": 0.50}, # 原有的3条,阈值可保持或微调
|
||||
{"name": "medium_window_8_msgs", "limit": 8, "relevance_threshold": 0.30}, # 原有的8条,阈值可保持或微调
|
||||
# 完整窗口的回退逻辑保持不变
|
||||
]
|
||||
|
||||
|
|
@ -254,16 +252,16 @@ class SubMind:
|
|||
)
|
||||
# 筛选出所有不是 lpmm_knowledge 类型的条目,或者其他需要保留的条目
|
||||
info_to_keep = [item for item in self.structured_info if item.get("type") != "lpmm_knowledge"]
|
||||
|
||||
|
||||
# 针对我们仅希望 lpmm_knowledge "用完即弃" 的情况:
|
||||
processed_info_to_keep = []
|
||||
for item in info_to_keep: # info_to_keep 已经不包含 lpmm_knowledge
|
||||
for item in info_to_keep: # info_to_keep 已经不包含 lpmm_knowledge
|
||||
item["ttl"] -= 1
|
||||
if item["ttl"] > 0:
|
||||
processed_info_to_keep.append(item)
|
||||
else:
|
||||
logger.debug(f"{self.log_prefix} 移除过期的非lpmm_knowledge项: {item.get('id', '未知ID')}")
|
||||
|
||||
|
||||
self.structured_info = processed_info_to_keep
|
||||
logger.debug(
|
||||
f"{self.log_prefix} 清理后 structured_info (仅保留非lpmm_knowledge且TTL有效项): "
|
||||
|
|
@ -338,7 +336,6 @@ class SubMind:
|
|||
logger.error(f"{self.log_prefix} 获取记忆时出错: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
|
||||
# ---------- 2.5 阶梯式获取知识库信息 ----------
|
||||
final_knowledge_to_add = None
|
||||
retrieval_source_info = "未进行知识检索"
|
||||
|
|
@ -352,91 +349,101 @@ class SubMind:
|
|||
step_name = step_config["name"]
|
||||
limit = step_config["limit"]
|
||||
threshold = step_config["relevance_threshold"]
|
||||
|
||||
|
||||
logger.info(f"{self.log_prefix} 尝试阶梯检索 - 阶段: {step_name} (最近{limit}条, 阈值>{threshold})")
|
||||
|
||||
|
||||
try:
|
||||
# 1. 获取当前阶段的聊天记录上下文
|
||||
# 我们需要从 observation 中获取原始消息列表来构建特定长度的上下文
|
||||
# get_raw_msg_before_timestamp_with_chat 在 observation.py 中被导入
|
||||
# from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, build_readable_messages
|
||||
|
||||
|
||||
# 需要确保 ChattingObservation 的实例 (self.observations[0]) 能提供 chat_id
|
||||
# 并且 build_readable_messages 可用
|
||||
context_messages_dicts = get_raw_msg_before_timestamp_with_chat(
|
||||
chat_id=observation.chat_id,
|
||||
timestamp=time.time(),
|
||||
limit=limit
|
||||
chat_id=observation.chat_id, timestamp=time.time(), limit=limit
|
||||
)
|
||||
|
||||
|
||||
if not context_messages_dicts:
|
||||
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 未获取到聊天记录,跳过此阶段。")
|
||||
continue
|
||||
|
||||
current_context_text = await build_readable_messages(
|
||||
messages=context_messages_dicts,
|
||||
timestamp_mode="lite" # 或者您认为适合知识检索的模式
|
||||
timestamp_mode="lite", # 或者您认为适合知识检索的模式
|
||||
)
|
||||
|
||||
if not current_context_text:
|
||||
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 构建的上下文为空,跳过此阶段。")
|
||||
continue
|
||||
|
||||
|
||||
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 使用上下文: '{current_context_text[:150]}...'")
|
||||
|
||||
|
||||
# 2. 调用知识库进行检索
|
||||
raw_knowledge_str = qa_manager.get_knowledge(current_context_text)
|
||||
|
||||
|
||||
if raw_knowledge_str:
|
||||
# 3. 解析知识并检查相关性
|
||||
knowledge_content, max_relevance = parse_knowledge_and_get_max_relevance(raw_knowledge_str)
|
||||
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 检索到知识,最高相关性: {max_relevance:.4f}")
|
||||
|
||||
|
||||
if max_relevance >= threshold:
|
||||
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 满足阈值 ({max_relevance:.4f} >= {threshold}),采纳此知识。")
|
||||
logger.info(
|
||||
f"{self.log_prefix} 阶段 '{step_name}' 满足阈值 ({max_relevance:.4f} >= {threshold}),采纳此知识。"
|
||||
)
|
||||
final_knowledge_to_add = knowledge_content
|
||||
retrieval_source_info = f"阶段 '{step_name}' (最近{limit}条, 相关性 {max_relevance:.4f})"
|
||||
break # 找到符合条件的知识,跳出阶梯循环
|
||||
break # 找到符合条件的知识,跳出阶梯循环
|
||||
else:
|
||||
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 未满足阈值 ({max_relevance:.4f} < {threshold}),继续下一阶段。")
|
||||
logger.info(
|
||||
f"{self.log_prefix} 阶段 '{step_name}' 未满足阈值 ({max_relevance:.4f} < {threshold}),继续下一阶段。"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 未从知识库检索到任何内容。")
|
||||
|
||||
|
||||
except Exception as e_step:
|
||||
logger.error(f"{self.log_prefix} 阶梯检索阶段 '{step_name}' 发生错误: {e_step}")
|
||||
logger.error(traceback.format_exc())
|
||||
continue # 当前阶段出错,尝试下一阶段
|
||||
continue # 当前阶段出错,尝试下一阶段
|
||||
|
||||
# 阶段3: 如果前面的阶梯都没有成功,则使用完整的 chat_observe_info (即您配置的20条)
|
||||
if not final_knowledge_to_add and chat_observe_info: # 确保 chat_observe_info 可用
|
||||
logger.info(f"{self.log_prefix} 前序阶梯均未满足条件,尝试使用完整观察窗口 ('{observation.max_now_obs_len}'条)进行检索。")
|
||||
if not final_knowledge_to_add and chat_observe_info: # 确保 chat_observe_info 可用
|
||||
logger.info(
|
||||
f"{self.log_prefix} 前序阶梯均未满足条件,尝试使用完整观察窗口 ('{observation.max_now_obs_len}'条)进行检索。"
|
||||
)
|
||||
try:
|
||||
raw_knowledge_str = qa_manager.get_knowledge(chat_observe_info)
|
||||
if raw_knowledge_str:
|
||||
# 对于完整窗口,我们可能不强制要求阈值,或者使用一个较低的阈值
|
||||
# 或者,您可以选择在这里仍然应用一个阈值,例如 self.knowledge_retrieval_steps 中最后一个的阈值,或一个特定值
|
||||
knowledge_content, max_relevance = parse_knowledge_and_get_max_relevance(raw_knowledge_str)
|
||||
logger.info(f"{self.log_prefix} 完整窗口检索到知识,(此处未设阈值,或相关性: {max_relevance:.4f})。")
|
||||
final_knowledge_to_add = knowledge_content # 默认采纳
|
||||
retrieval_source_info = f"完整窗口 (最多{observation.max_now_obs_len}条, 相关性 {max_relevance:.4f})"
|
||||
logger.info(
|
||||
f"{self.log_prefix} 完整窗口检索到知识,(此处未设阈值,或相关性: {max_relevance:.4f})。"
|
||||
)
|
||||
final_knowledge_to_add = knowledge_content # 默认采纳
|
||||
retrieval_source_info = (
|
||||
f"完整窗口 (最多{observation.max_now_obs_len}条, 相关性 {max_relevance:.4f})"
|
||||
)
|
||||
else:
|
||||
logger.debug(f"{self.log_prefix} 完整窗口检索也未找到知识。")
|
||||
except Exception as e_full:
|
||||
logger.error(f"{self.log_prefix} 完整窗口知识检索发生错误: {e_full}")
|
||||
logger.error(traceback.format_exc())
|
||||
|
||||
|
||||
# 将最终选定的知识(如果有)添加到 structured_info
|
||||
if final_knowledge_to_add:
|
||||
knowledge_item = {
|
||||
"type": "lpmm_knowledge",
|
||||
"id": f"lpmm_knowledge_{time.time()}",
|
||||
"content": final_knowledge_to_add,
|
||||
"ttl": 1 # 由于是当轮精心选择的,可以让TTL短一些,下次重新评估(或者按照您的意愿设为3)
|
||||
"ttl": 1, # 由于是当轮精心选择的,可以让TTL短一些,下次重新评估(或者按照您的意愿设为3)
|
||||
}
|
||||
# 我们在方法开头已经清理了旧的 lpmm_knowledge,这里直接添加新的
|
||||
self.structured_info.append(knowledge_item)
|
||||
logger.info(f"{self.log_prefix} 添加了来自 '{retrieval_source_info}' 的知识到 structured_info (ID: {knowledge_item['id']})")
|
||||
self._update_structured_info_str() # 更新字符串表示
|
||||
logger.info(
|
||||
f"{self.log_prefix} 添加了来自 '{retrieval_source_info}' 的知识到 structured_info (ID: {knowledge_item['id']})"
|
||||
)
|
||||
self._update_structured_info_str() # 更新字符串表示
|
||||
else:
|
||||
logger.info(f"{self.log_prefix} 经过所有阶梯检索后,没有最终采纳的知识。")
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue