mirror of https://github.com/Mai-with-u/MaiBot.git
feat:加入人物信息检索
parent
aa7fd1df90
commit
ff56bd043c
|
|
@ -34,8 +34,9 @@ def init_memory_retrieval_prompt():
|
|||
1. 对话中是否提到了过去发生的事情、人物、事件或信息
|
||||
2. 是否有需要回忆的内容(比如"之前说过"、"上次"、"以前"等)
|
||||
3. 是否有需要查找历史信息的问题
|
||||
4. 是否有问题可以搜集信息帮助你聊天
|
||||
5. 对话中是否包含黑话、俚语、缩写等可能需要查询的概念
|
||||
4. 是否需要查找某人的信息(person: 如果对话中提到人名、昵称、用户ID等,需要查询该人物的详细信息)
|
||||
5. 是否有问题可以搜集信息帮助你聊天
|
||||
6. 对话中是否包含黑话、俚语、缩写等可能需要查询的概念
|
||||
|
||||
重要提示:
|
||||
- **每次只能提出一个问题**,选择最需要查询的关键问题
|
||||
|
|
@ -44,8 +45,9 @@ def init_memory_retrieval_prompt():
|
|||
- 如果之前已经查询过某个问题并找到了答案,可以直接参考已有结果,不需要重复查询
|
||||
|
||||
如果你认为需要从记忆中检索信息来回答,请:
|
||||
1. 先识别对话中可能需要查询的概念(黑话/俚语/缩写/人名/专有名词等关键词)
|
||||
2. 然后根据上下文提出**一个**最关键的问题来帮助你回复目标消息
|
||||
1. 先识别对话中可能需要查询的概念(黑话/俚语/缩写/专有名词等关键词),放入"concepts"字段
|
||||
2. 识别对话中提到的人物名称(人名、昵称等),放入"person"字段
|
||||
3. 然后根据上下文提出**一个**最关键的问题来帮助你回复目标消息,放入"questions"字段
|
||||
|
||||
问题格式示例:
|
||||
- "xxx在前几天干了什么"
|
||||
|
|
@ -53,14 +55,16 @@ def init_memory_retrieval_prompt():
|
|||
- "xxxx和xxx的关系是什么"
|
||||
- "xxx在某个时间点发生了什么"
|
||||
|
||||
请输出JSON格式,包含两个字段:
|
||||
请输出JSON格式,包含三个字段:
|
||||
- "concepts": 需要检索的概念列表(字符串数组),如果不需要检索概念则输出空数组[]
|
||||
- "person": 需要查询的人物名称列表(字符串数组),如果不需要查询人物信息则输出空数组[]
|
||||
- "questions": 问题数组(字符串数组),如果不需要检索记忆则输出空数组[],如果需要检索则只输出包含一个问题的数组
|
||||
|
||||
输出格式示例(需要检索时):
|
||||
```json
|
||||
{{
|
||||
"concepts": ["AAA", "BBB", "CCC"],
|
||||
"person": ["张三", "李四"],
|
||||
"questions": ["张三在前几天干了什么"]
|
||||
}}
|
||||
```
|
||||
|
|
@ -69,6 +73,7 @@ def init_memory_retrieval_prompt():
|
|||
```json
|
||||
{{
|
||||
"concepts": [],
|
||||
"person": [],
|
||||
"questions": []
|
||||
}}
|
||||
```
|
||||
|
|
@ -305,6 +310,47 @@ async def _retrieve_concepts_with_jargon(
|
|||
return ""
|
||||
|
||||
|
||||
async def _retrieve_persons_info(
|
||||
persons: List[str],
|
||||
chat_id: str
|
||||
) -> str:
|
||||
"""对人物列表进行信息检索
|
||||
|
||||
Args:
|
||||
persons: 人物名称列表
|
||||
chat_id: 聊天ID
|
||||
|
||||
Returns:
|
||||
str: 检索结果字符串
|
||||
"""
|
||||
if not persons:
|
||||
return ""
|
||||
|
||||
from src.memory_system.retrieval_tools.query_person_info import query_person_info
|
||||
|
||||
results = []
|
||||
for person in persons:
|
||||
person = person.strip()
|
||||
if not person:
|
||||
continue
|
||||
|
||||
try:
|
||||
person_info = await query_person_info(person)
|
||||
if person_info and "未找到" not in person_info:
|
||||
results.append(f"【{person}】\n{person_info}")
|
||||
logger.info(f"查询到人物信息: {person}")
|
||||
else:
|
||||
results.append(f"未找到人物'{person}'的信息")
|
||||
logger.info(f"未找到人物信息: {person}")
|
||||
except Exception as e:
|
||||
logger.error(f"查询人物信息失败: {person}, 错误: {e}")
|
||||
results.append(f"查询人物'{person}'信息时发生错误: {str(e)}")
|
||||
|
||||
if results:
|
||||
return "【人物信息检索结果】\n" + "\n\n".join(results) + "\n"
|
||||
return ""
|
||||
|
||||
|
||||
async def _react_agent_solve_question(
|
||||
question: str,
|
||||
chat_id: str,
|
||||
|
|
@ -972,27 +1018,39 @@ async def build_memory_retrieval_prompt(
|
|||
logger.error(f"LLM生成问题失败: {response}")
|
||||
return ""
|
||||
|
||||
# 解析概念列表和问题列表
|
||||
concepts, questions = _parse_questions_json(response)
|
||||
# 解析概念列表、人物列表和问题列表
|
||||
concepts, persons, questions = _parse_questions_json(response)
|
||||
logger.info(f"解析到 {len(concepts)} 个概念: {concepts}")
|
||||
logger.info(f"解析到 {len(persons)} 个人物: {persons}")
|
||||
logger.info(f"解析到 {len(questions)} 个问题: {questions}")
|
||||
|
||||
# 对概念进行jargon检索,作为初始信息
|
||||
initial_info = ""
|
||||
if concepts:
|
||||
logger.info(f"开始对 {len(concepts)} 个概念进行jargon检索")
|
||||
initial_info = await _retrieve_concepts_with_jargon(concepts, chat_id)
|
||||
if initial_info:
|
||||
logger.info(f"概念检索完成,结果: {initial_info[:200]}...")
|
||||
concept_info = await _retrieve_concepts_with_jargon(concepts, chat_id)
|
||||
if concept_info:
|
||||
initial_info += concept_info
|
||||
logger.info(f"概念检索完成,结果: {concept_info[:200]}...")
|
||||
else:
|
||||
logger.info("概念检索未找到任何结果")
|
||||
|
||||
# 对人物进行信息检索,添加到初始信息
|
||||
if persons:
|
||||
logger.info(f"开始对 {len(persons)} 个人物进行信息检索")
|
||||
person_info = await _retrieve_persons_info(persons, chat_id)
|
||||
if person_info:
|
||||
initial_info += person_info
|
||||
logger.info(f"人物信息检索完成,结果: {person_info[:200]}...")
|
||||
else:
|
||||
logger.info("人物信息检索未找到任何结果")
|
||||
|
||||
# 获取缓存的记忆(与question时使用相同的时间窗口和数量限制)
|
||||
cached_memories = _get_cached_memories(chat_id, time_window_seconds=300.0)
|
||||
|
||||
if not questions:
|
||||
logger.debug("模型认为不需要检索记忆或解析失败")
|
||||
# 即使没有当次查询,也返回缓存的记忆和概念检索结果
|
||||
# 即使没有当次查询,也返回缓存的记忆、概念检索结果和人物信息检索结果
|
||||
all_results = []
|
||||
if initial_info:
|
||||
all_results.append(initial_info.strip())
|
||||
|
|
@ -1002,7 +1060,7 @@ async def build_memory_retrieval_prompt(
|
|||
if all_results:
|
||||
retrieved_memory = "\n\n".join(all_results)
|
||||
end_time = time.time()
|
||||
logger.info(f"无当次查询,返回缓存记忆和概念检索结果,耗时: {(end_time - start_time):.3f}秒")
|
||||
logger.info(f"无当次查询,返回缓存记忆、概念检索和人物信息检索结果,耗时: {(end_time - start_time):.3f}秒")
|
||||
return f"你回忆起了以下信息:\n{retrieved_memory}\n如果与回复内容相关,可以参考这些回忆的信息。\n"
|
||||
else:
|
||||
return ""
|
||||
|
|
@ -1063,14 +1121,14 @@ async def build_memory_retrieval_prompt(
|
|||
return ""
|
||||
|
||||
|
||||
def _parse_questions_json(response: str) -> Tuple[List[str], List[str]]:
|
||||
"""解析问题JSON,返回概念列表和问题列表
|
||||
def _parse_questions_json(response: str) -> Tuple[List[str], List[str], List[str]]:
|
||||
"""解析问题JSON,返回概念列表、人物列表和问题列表
|
||||
|
||||
Args:
|
||||
response: LLM返回的响应
|
||||
|
||||
Returns:
|
||||
Tuple[List[str], List[str]]: (概念列表, 问题列表)
|
||||
Tuple[List[str], List[str], List[str]]: (概念列表, 人物列表, 问题列表)
|
||||
"""
|
||||
try:
|
||||
# 尝试提取JSON(可能包含在```json代码块中)
|
||||
|
|
@ -1089,26 +1147,30 @@ def _parse_questions_json(response: str) -> Tuple[List[str], List[str]]:
|
|||
# 解析JSON
|
||||
parsed = json.loads(repaired_json)
|
||||
|
||||
# 只支持新格式:包含concepts和questions的对象
|
||||
# 只支持新格式:包含concepts、person和questions的对象
|
||||
if not isinstance(parsed, dict):
|
||||
logger.warning(f"解析的JSON不是对象格式: {parsed}")
|
||||
return [], []
|
||||
return [], [], []
|
||||
|
||||
concepts_raw = parsed.get("concepts", [])
|
||||
persons_raw = parsed.get("person", [])
|
||||
questions_raw = parsed.get("questions", [])
|
||||
|
||||
# 确保是列表
|
||||
if not isinstance(concepts_raw, list):
|
||||
concepts_raw = []
|
||||
if not isinstance(persons_raw, list):
|
||||
persons_raw = []
|
||||
if not isinstance(questions_raw, list):
|
||||
questions_raw = []
|
||||
|
||||
# 确保所有元素都是字符串
|
||||
concepts = [c for c in concepts_raw if isinstance(c, str) and c.strip()]
|
||||
persons = [p for p in persons_raw if isinstance(p, str) and p.strip()]
|
||||
questions = [q for q in questions_raw if isinstance(q, str) and q.strip()]
|
||||
|
||||
return concepts, questions
|
||||
return concepts, persons, questions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析问题JSON失败: {e}, 响应内容: {response[:200]}...")
|
||||
return [], []
|
||||
return [], [], []
|
||||
|
|
|
|||
|
|
@ -14,12 +14,14 @@ from .tool_registry import (
|
|||
from .query_jargon import register_tool as register_query_jargon
|
||||
from .query_chat_history import register_tool as register_query_chat_history
|
||||
from .query_lpmm_knowledge import register_tool as register_lpmm_knowledge
|
||||
from .query_person_info import register_tool as register_query_person_info
|
||||
from src.config.config import global_config
|
||||
|
||||
def init_all_tools():
|
||||
"""初始化并注册所有记忆检索工具"""
|
||||
register_query_jargon()
|
||||
register_query_chat_history()
|
||||
register_query_person_info()
|
||||
|
||||
if global_config.lpmm_knowledge.lpmm_mode == "agent":
|
||||
register_lpmm_knowledge()
|
||||
|
|
|
|||
|
|
@ -0,0 +1,233 @@
|
|||
"""
|
||||
根据person_name查询用户信息 - 工具实现
|
||||
支持模糊查询,可以查询某个用户的所有信息
|
||||
"""
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from src.common.logger import get_logger
|
||||
from src.common.database.database_model import PersonInfo
|
||||
from .tool_registry import register_memory_retrieval_tool
|
||||
|
||||
logger = get_logger("memory_retrieval_tools")
|
||||
|
||||
|
||||
async def query_person_info(person_name: str) -> str:
|
||||
"""根据person_name查询用户信息,使用模糊查询
|
||||
|
||||
Args:
|
||||
person_name: 用户名称(person_name字段)
|
||||
|
||||
Returns:
|
||||
str: 查询结果,包含用户的所有信息
|
||||
"""
|
||||
try:
|
||||
person_name = str(person_name).strip()
|
||||
if not person_name:
|
||||
return "用户名称为空"
|
||||
|
||||
# 构建查询条件(使用模糊查询)
|
||||
query = PersonInfo.select().where(
|
||||
PersonInfo.person_name.contains(person_name)
|
||||
)
|
||||
|
||||
# 执行查询
|
||||
records = list(query.limit(20)) # 最多返回20条记录
|
||||
|
||||
if not records:
|
||||
return f"未找到模糊匹配'{person_name}'的用户信息"
|
||||
|
||||
# 区分精确匹配和模糊匹配的结果
|
||||
exact_matches = []
|
||||
fuzzy_matches = []
|
||||
|
||||
for record in records:
|
||||
# 检查是否是精确匹配
|
||||
if record.person_name and record.person_name.strip() == person_name:
|
||||
exact_matches.append(record)
|
||||
else:
|
||||
fuzzy_matches.append(record)
|
||||
|
||||
# 构建结果文本
|
||||
results = []
|
||||
|
||||
# 先处理精确匹配的结果
|
||||
for record in exact_matches:
|
||||
result_parts = []
|
||||
result_parts.append("【精确匹配】") # 标注为精确匹配
|
||||
|
||||
# 基本信息
|
||||
if record.person_name:
|
||||
result_parts.append(f"用户名称:{record.person_name}")
|
||||
if record.nickname:
|
||||
result_parts.append(f"昵称:{record.nickname}")
|
||||
if record.person_id:
|
||||
result_parts.append(f"用户ID:{record.person_id}")
|
||||
if record.platform:
|
||||
result_parts.append(f"平台:{record.platform}")
|
||||
if record.user_id:
|
||||
result_parts.append(f"平台用户ID:{record.user_id}")
|
||||
|
||||
# 名称设定原因
|
||||
if record.name_reason:
|
||||
result_parts.append(f"名称设定原因:{record.name_reason}")
|
||||
|
||||
# 认识状态
|
||||
result_parts.append(f"是否已认识:{'是' if record.is_known else '否'}")
|
||||
|
||||
# 时间信息
|
||||
if record.know_since:
|
||||
know_since_str = datetime.fromtimestamp(record.know_since).strftime("%Y-%m-%d %H:%M:%S")
|
||||
result_parts.append(f"首次认识时间:{know_since_str}")
|
||||
if record.last_know:
|
||||
last_know_str = datetime.fromtimestamp(record.last_know).strftime("%Y-%m-%d %H:%M:%S")
|
||||
result_parts.append(f"最后认识时间:{last_know_str}")
|
||||
if record.know_times:
|
||||
result_parts.append(f"认识次数:{int(record.know_times)}")
|
||||
|
||||
# 记忆点(memory_points)
|
||||
if record.memory_points:
|
||||
try:
|
||||
memory_points_data = json.loads(record.memory_points) if isinstance(record.memory_points, str) else record.memory_points
|
||||
if isinstance(memory_points_data, list) and memory_points_data:
|
||||
# 解析记忆点格式:category:content:weight
|
||||
memory_list = []
|
||||
for memory_point in memory_points_data:
|
||||
if memory_point and isinstance(memory_point, str):
|
||||
parts = memory_point.split(":", 2)
|
||||
if len(parts) >= 3:
|
||||
category = parts[0].strip()
|
||||
content = parts[1].strip()
|
||||
weight = parts[2].strip()
|
||||
memory_list.append(f" - [{category}] {content} (权重: {weight})")
|
||||
else:
|
||||
memory_list.append(f" - {memory_point}")
|
||||
|
||||
if memory_list:
|
||||
result_parts.append("记忆点:\n" + "\n".join(memory_list))
|
||||
except (json.JSONDecodeError, TypeError, ValueError) as e:
|
||||
logger.warning(f"解析用户 {record.person_id} 的memory_points失败: {e}")
|
||||
# 如果解析失败,直接显示原始内容(截断)
|
||||
memory_preview = str(record.memory_points)[:200]
|
||||
if len(str(record.memory_points)) > 200:
|
||||
memory_preview += "..."
|
||||
result_parts.append(f"记忆点(原始数据):{memory_preview}")
|
||||
|
||||
results.append("\n".join(result_parts))
|
||||
|
||||
# 再处理模糊匹配的结果
|
||||
for record in fuzzy_matches:
|
||||
result_parts = []
|
||||
result_parts.append("【模糊匹配】") # 标注为模糊匹配
|
||||
|
||||
# 基本信息
|
||||
if record.person_name:
|
||||
result_parts.append(f"用户名称:{record.person_name}")
|
||||
if record.nickname:
|
||||
result_parts.append(f"昵称:{record.nickname}")
|
||||
if record.person_id:
|
||||
result_parts.append(f"用户ID:{record.person_id}")
|
||||
if record.platform:
|
||||
result_parts.append(f"平台:{record.platform}")
|
||||
if record.user_id:
|
||||
result_parts.append(f"平台用户ID:{record.user_id}")
|
||||
|
||||
# 名称设定原因
|
||||
if record.name_reason:
|
||||
result_parts.append(f"名称设定原因:{record.name_reason}")
|
||||
|
||||
# 认识状态
|
||||
result_parts.append(f"是否已认识:{'是' if record.is_known else '否'}")
|
||||
|
||||
# 时间信息
|
||||
if record.know_since:
|
||||
know_since_str = datetime.fromtimestamp(record.know_since).strftime("%Y-%m-%d %H:%M:%S")
|
||||
result_parts.append(f"首次认识时间:{know_since_str}")
|
||||
if record.last_know:
|
||||
last_know_str = datetime.fromtimestamp(record.last_know).strftime("%Y-%m-%d %H:%M:%S")
|
||||
result_parts.append(f"最后认识时间:{last_know_str}")
|
||||
if record.know_times:
|
||||
result_parts.append(f"认识次数:{int(record.know_times)}")
|
||||
|
||||
# 记忆点(memory_points)
|
||||
if record.memory_points:
|
||||
try:
|
||||
memory_points_data = json.loads(record.memory_points) if isinstance(record.memory_points, str) else record.memory_points
|
||||
if isinstance(memory_points_data, list) and memory_points_data:
|
||||
# 解析记忆点格式:category:content:weight
|
||||
memory_list = []
|
||||
for memory_point in memory_points_data:
|
||||
if memory_point and isinstance(memory_point, str):
|
||||
parts = memory_point.split(":", 2)
|
||||
if len(parts) >= 3:
|
||||
category = parts[0].strip()
|
||||
content = parts[1].strip()
|
||||
weight = parts[2].strip()
|
||||
memory_list.append(f" - [{category}] {content} (权重: {weight})")
|
||||
else:
|
||||
memory_list.append(f" - {memory_point}")
|
||||
|
||||
if memory_list:
|
||||
result_parts.append("记忆点:\n" + "\n".join(memory_list))
|
||||
except (json.JSONDecodeError, TypeError, ValueError) as e:
|
||||
logger.warning(f"解析用户 {record.person_id} 的memory_points失败: {e}")
|
||||
# 如果解析失败,直接显示原始内容(截断)
|
||||
memory_preview = str(record.memory_points)[:200]
|
||||
if len(str(record.memory_points)) > 200:
|
||||
memory_preview += "..."
|
||||
result_parts.append(f"记忆点(原始数据):{memory_preview}")
|
||||
|
||||
results.append("\n".join(result_parts))
|
||||
|
||||
# 组合所有结果
|
||||
if not results:
|
||||
return f"未找到匹配'{person_name}'的用户信息"
|
||||
|
||||
response_text = "\n\n---\n\n".join(results)
|
||||
|
||||
# 添加统计信息
|
||||
total_count = len(records)
|
||||
exact_count = len(exact_matches)
|
||||
fuzzy_count = len(fuzzy_matches)
|
||||
|
||||
# 显示精确匹配和模糊匹配的统计
|
||||
if exact_count > 0 or fuzzy_count > 0:
|
||||
stats_parts = []
|
||||
if exact_count > 0:
|
||||
stats_parts.append(f"精确匹配:{exact_count} 条")
|
||||
if fuzzy_count > 0:
|
||||
stats_parts.append(f"模糊匹配:{fuzzy_count} 条")
|
||||
stats_text = ",".join(stats_parts)
|
||||
response_text = f"找到 {total_count} 条匹配的用户信息({stats_text}):\n\n{response_text}"
|
||||
elif total_count > 1:
|
||||
response_text = f"找到 {total_count} 条匹配的用户信息:\n\n{response_text}"
|
||||
else:
|
||||
response_text = f"找到用户信息:\n\n{response_text}"
|
||||
|
||||
# 如果结果数量达到限制,添加提示
|
||||
if total_count >= 20:
|
||||
response_text += "\n\n(已显示前20条结果,可能还有更多匹配记录)"
|
||||
|
||||
return response_text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询用户信息失败: {e}")
|
||||
return f"查询失败: {str(e)}"
|
||||
|
||||
|
||||
def register_tool():
|
||||
"""注册工具"""
|
||||
register_memory_retrieval_tool(
|
||||
name="query_person_info",
|
||||
description="根据查询某个用户的所有信息。名称、昵称、平台、用户ID、qq号等",
|
||||
parameters=[
|
||||
{
|
||||
"name": "person_name",
|
||||
"type": "string",
|
||||
"description": "用户名称,用于查询用户信息",
|
||||
"required": True
|
||||
}
|
||||
],
|
||||
execute_func=query_person_info
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue