🤖 自动格式化代码 [skip ci]

pull/937/head
github-actions[bot] 2025-05-12 13:10:43 +00:00
parent 1e015aed9f
commit 1d3ba474bf
1 changed files with 47 additions and 40 deletions

View File

@ -2,9 +2,9 @@ from .observation import ChattingObservation
from src.plugins.knowledge.knowledge_lib import qa_manager
from src.plugins.models.utils_model import LLMRequest
from src.config.config import global_config
from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat,build_readable_messages
from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, build_readable_messages
import time
import re
import re
import traceback
from src.common.logger_manager import get_logger
from src.individuality.individuality import Individuality
@ -129,7 +129,7 @@ def parse_knowledge_and_get_max_relevance(knowledge_str: str) -> (str, float):
max_relevance = max(float(score) for score in relevance_scores)
except ValueError:
logger.warning(f"解析相关性得分时出错: {relevance_scores}")
return knowledge_str, 0.0 # 出错时返回0.0
return knowledge_str, 0.0 # 出错时返回0.0
else:
# 如果没有找到 "该条知识对于问题的相关性:" 这样的模式,
# 说明可能 qa_manager 返回的格式有变,或者没有有效的知识。
@ -172,8 +172,6 @@ def calculate_replacement_probability(similarity: float) -> float:
# p = s + 0.1
probability = similarity + 0.1
return min(1.0, max(0.0, probability))
class SubMind:
@ -199,12 +197,12 @@ class SubMind:
name = chat_manager.get_stream_name(self.subheartflow_id)
self.log_prefix = f"[{name}] "
self._update_structured_info_str()
# 阶梯式筛选
# 阶梯式筛选
self.knowledge_retrieval_steps = self.knowledge_retrieval_steps = [
{"name": "latest_1_msg", "limit": 1, "relevance_threshold": 0.75}, # 新增最新1条极高阈值
{"name": "latest_2_msgs", "limit": 2, "relevance_threshold": 0.65}, # 新增最新2条较高阈值
{"name": "short_window_3_msgs", "limit": 3, "relevance_threshold": 0.50}, # 原有的3条阈值可保持或微调
{"name": "medium_window_8_msgs", "limit": 8, "relevance_threshold": 0.30}, # 原有的8条阈值可保持或微调
{"name": "latest_1_msg", "limit": 1, "relevance_threshold": 0.75}, # 新增最新1条极高阈值
{"name": "latest_2_msgs", "limit": 2, "relevance_threshold": 0.65}, # 新增最新2条较高阈值
{"name": "short_window_3_msgs", "limit": 3, "relevance_threshold": 0.50}, # 原有的3条阈值可保持或微调
{"name": "medium_window_8_msgs", "limit": 8, "relevance_threshold": 0.30}, # 原有的8条阈值可保持或微调
# 完整窗口的回退逻辑保持不变
]
@ -254,16 +252,16 @@ class SubMind:
)
# 筛选出所有不是 lpmm_knowledge 类型的条目,或者其他需要保留的条目
info_to_keep = [item for item in self.structured_info if item.get("type") != "lpmm_knowledge"]
# 针对我们仅希望 lpmm_knowledge "用完即弃" 的情况:
processed_info_to_keep = []
for item in info_to_keep: # info_to_keep 已经不包含 lpmm_knowledge
for item in info_to_keep: # info_to_keep 已经不包含 lpmm_knowledge
item["ttl"] -= 1
if item["ttl"] > 0:
processed_info_to_keep.append(item)
else:
logger.debug(f"{self.log_prefix} 移除过期的非lpmm_knowledge项: {item.get('id', '未知ID')}")
self.structured_info = processed_info_to_keep
logger.debug(
f"{self.log_prefix} 清理后 structured_info (仅保留非lpmm_knowledge且TTL有效项): "
@ -338,7 +336,6 @@ class SubMind:
logger.error(f"{self.log_prefix} 获取记忆时出错: {e}")
logger.error(traceback.format_exc())
# ---------- 2.5 阶梯式获取知识库信息 ----------
final_knowledge_to_add = None
retrieval_source_info = "未进行知识检索"
@ -352,91 +349,101 @@ class SubMind:
step_name = step_config["name"]
limit = step_config["limit"]
threshold = step_config["relevance_threshold"]
logger.info(f"{self.log_prefix} 尝试阶梯检索 - 阶段: {step_name} (最近{limit}条, 阈值>{threshold})")
try:
# 1. 获取当前阶段的聊天记录上下文
# 我们需要从 observation 中获取原始消息列表来构建特定长度的上下文
# get_raw_msg_before_timestamp_with_chat 在 observation.py 中被导入
# from src.plugins.utils.chat_message_builder import get_raw_msg_before_timestamp_with_chat, build_readable_messages
# 需要确保 ChattingObservation 的实例 (self.observations[0]) 能提供 chat_id
# 并且 build_readable_messages 可用
context_messages_dicts = get_raw_msg_before_timestamp_with_chat(
chat_id=observation.chat_id,
timestamp=time.time(),
limit=limit
chat_id=observation.chat_id, timestamp=time.time(), limit=limit
)
if not context_messages_dicts:
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 未获取到聊天记录,跳过此阶段。")
continue
current_context_text = await build_readable_messages(
messages=context_messages_dicts,
timestamp_mode="lite" # 或者您认为适合知识检索的模式
timestamp_mode="lite", # 或者您认为适合知识检索的模式
)
if not current_context_text:
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 构建的上下文为空,跳过此阶段。")
continue
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 使用上下文: '{current_context_text[:150]}...'")
# 2. 调用知识库进行检索
raw_knowledge_str = qa_manager.get_knowledge(current_context_text)
if raw_knowledge_str:
# 3. 解析知识并检查相关性
knowledge_content, max_relevance = parse_knowledge_and_get_max_relevance(raw_knowledge_str)
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 检索到知识,最高相关性: {max_relevance:.4f}")
if max_relevance >= threshold:
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 满足阈值 ({max_relevance:.4f} >= {threshold}),采纳此知识。")
logger.info(
f"{self.log_prefix} 阶段 '{step_name}' 满足阈值 ({max_relevance:.4f} >= {threshold}),采纳此知识。"
)
final_knowledge_to_add = knowledge_content
retrieval_source_info = f"阶段 '{step_name}' (最近{limit}条, 相关性 {max_relevance:.4f})"
break # 找到符合条件的知识,跳出阶梯循环
break # 找到符合条件的知识,跳出阶梯循环
else:
logger.info(f"{self.log_prefix} 阶段 '{step_name}' 未满足阈值 ({max_relevance:.4f} < {threshold}),继续下一阶段。")
logger.info(
f"{self.log_prefix} 阶段 '{step_name}' 未满足阈值 ({max_relevance:.4f} < {threshold}),继续下一阶段。"
)
else:
logger.debug(f"{self.log_prefix} 阶段 '{step_name}' 未从知识库检索到任何内容。")
except Exception as e_step:
logger.error(f"{self.log_prefix} 阶梯检索阶段 '{step_name}' 发生错误: {e_step}")
logger.error(traceback.format_exc())
continue # 当前阶段出错,尝试下一阶段
continue # 当前阶段出错,尝试下一阶段
# 阶段3: 如果前面的阶梯都没有成功,则使用完整的 chat_observe_info (即您配置的20条)
if not final_knowledge_to_add and chat_observe_info: # 确保 chat_observe_info 可用
logger.info(f"{self.log_prefix} 前序阶梯均未满足条件,尝试使用完整观察窗口 ('{observation.max_now_obs_len}'条)进行检索。")
if not final_knowledge_to_add and chat_observe_info: # 确保 chat_observe_info 可用
logger.info(
f"{self.log_prefix} 前序阶梯均未满足条件,尝试使用完整观察窗口 ('{observation.max_now_obs_len}'条)进行检索。"
)
try:
raw_knowledge_str = qa_manager.get_knowledge(chat_observe_info)
if raw_knowledge_str:
# 对于完整窗口,我们可能不强制要求阈值,或者使用一个较低的阈值
# 或者,您可以选择在这里仍然应用一个阈值,例如 self.knowledge_retrieval_steps 中最后一个的阈值,或一个特定值
knowledge_content, max_relevance = parse_knowledge_and_get_max_relevance(raw_knowledge_str)
logger.info(f"{self.log_prefix} 完整窗口检索到知识,(此处未设阈值,或相关性: {max_relevance:.4f})。")
final_knowledge_to_add = knowledge_content # 默认采纳
retrieval_source_info = f"完整窗口 (最多{observation.max_now_obs_len}条, 相关性 {max_relevance:.4f})"
logger.info(
f"{self.log_prefix} 完整窗口检索到知识,(此处未设阈值,或相关性: {max_relevance:.4f})。"
)
final_knowledge_to_add = knowledge_content # 默认采纳
retrieval_source_info = (
f"完整窗口 (最多{observation.max_now_obs_len}条, 相关性 {max_relevance:.4f})"
)
else:
logger.debug(f"{self.log_prefix} 完整窗口检索也未找到知识。")
except Exception as e_full:
logger.error(f"{self.log_prefix} 完整窗口知识检索发生错误: {e_full}")
logger.error(traceback.format_exc())
# 将最终选定的知识(如果有)添加到 structured_info
if final_knowledge_to_add:
knowledge_item = {
"type": "lpmm_knowledge",
"id": f"lpmm_knowledge_{time.time()}",
"content": final_knowledge_to_add,
"ttl": 1 # 由于是当轮精心选择的可以让TTL短一些下次重新评估或者按照您的意愿设为3
"ttl": 1, # 由于是当轮精心选择的可以让TTL短一些下次重新评估或者按照您的意愿设为3
}
# 我们在方法开头已经清理了旧的 lpmm_knowledge这里直接添加新的
self.structured_info.append(knowledge_item)
logger.info(f"{self.log_prefix} 添加了来自 '{retrieval_source_info}' 的知识到 structured_info (ID: {knowledge_item['id']})")
self._update_structured_info_str() # 更新字符串表示
logger.info(
f"{self.log_prefix} 添加了来自 '{retrieval_source_info}' 的知识到 structured_info (ID: {knowledge_item['id']})"
)
self._update_structured_info_str() # 更新字符串表示
else:
logger.info(f"{self.log_prefix} 经过所有阶梯检索后,没有最终采纳的知识。")