mirror of https://github.com/Mai-with-u/MaiBot.git
remove:移除记忆的 关键点 项目
parent
0debe0efcf
commit
465fb9d865
|
|
@ -368,7 +368,7 @@ class ChatHistory(BaseModel):
|
|||
theme = TextField() # 主题:这段对话的主要内容,一个简短的标题
|
||||
keywords = TextField() # 关键词:这段对话的关键词,JSON格式存储
|
||||
summary = TextField() # 概括:对这段话的平文本概括
|
||||
key_point = TextField(null=True) # 关键信息:话题中的关键信息点,JSON格式存储
|
||||
# key_point = TextField(null=True) # 关键信息:话题中的关键信息点,JSON格式存储
|
||||
count = IntegerField(default=0) # 被检索次数
|
||||
forget_times = IntegerField(default=0) # 被遗忘检查的次数
|
||||
|
||||
|
|
|
|||
|
|
@ -192,7 +192,6 @@ def init_dream_tools(chat_id: str) -> None:
|
|||
("theme", ToolParamType.STRING, "新的主题标题,如果不需要修改可不填。", False, None),
|
||||
("summary", ToolParamType.STRING, "新的概括内容,如果不需要修改可不填。", False, None),
|
||||
("keywords", ToolParamType.STRING, "新的关键词 JSON 字符串,如 ['关键词1','关键词2']。", False, None),
|
||||
("key_point", ToolParamType.STRING, "新的关键信息 JSON 字符串,如 ['要点1','要点2']。", False, None),
|
||||
],
|
||||
update_chat_history,
|
||||
)
|
||||
|
|
@ -201,7 +200,7 @@ def init_dream_tools(chat_id: str) -> None:
|
|||
_dream_tool_registry.register_tool(
|
||||
DreamTool(
|
||||
"create_chat_history",
|
||||
"根据整理后的理解创建一条新的 ChatHistory 概括记录(主题、概括、关键词、关键信息等)。",
|
||||
"根据整理后的理解创建一条新的 ChatHistory 概括记录(主题、概括、关键词等)。",
|
||||
[
|
||||
("theme", ToolParamType.STRING, "新的主题标题(必填)。", True, None),
|
||||
("summary", ToolParamType.STRING, "新的概括内容(必填)。", True, None),
|
||||
|
|
@ -212,10 +211,11 @@ def init_dream_tools(chat_id: str) -> None:
|
|||
True,
|
||||
None,
|
||||
),
|
||||
("original_text", ToolParamType.STRING, "对话原文内容(必填)。", True, None),
|
||||
(
|
||||
"key_point",
|
||||
"participants",
|
||||
ToolParamType.STRING,
|
||||
"新的关键信息 JSON 字符串,如 ['要点1','要点2'](必填)。",
|
||||
"参与人的 JSON 字符串,如 ['用户1','用户2'](必填)。",
|
||||
True,
|
||||
None,
|
||||
),
|
||||
|
|
@ -313,8 +313,7 @@ async def run_dream_agent_once(
|
|||
f"主题={record.theme or '无'}\n"
|
||||
f"关键词={record.keywords or '无'}\n"
|
||||
f"参与者={record.participants or '无'}\n"
|
||||
f"概括={record.summary or '无'}\n"
|
||||
f"关键信息={record.key_point or '无'}"
|
||||
f"概括={record.summary or '无'}"
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@ def make_create_chat_history(chat_id: str):
|
|||
theme: str,
|
||||
summary: str,
|
||||
keywords: str,
|
||||
key_point: str,
|
||||
original_text: str,
|
||||
participants: str,
|
||||
start_time: float,
|
||||
end_time: float,
|
||||
) -> str:
|
||||
|
|
@ -20,7 +21,8 @@ def make_create_chat_history(chat_id: str):
|
|||
logger.info(
|
||||
f"[dream][tool] 调用 create_chat_history("
|
||||
f"theme={bool(theme)}, summary={bool(summary)}, "
|
||||
f"keywords={bool(keywords)}, key_point={bool(key_point)}, "
|
||||
f"keywords={bool(keywords)}, original_text={bool(original_text)}, "
|
||||
f"participants={bool(participants)}, "
|
||||
f"start_time={start_time}, end_time={end_time}) (chat_id={chat_id})"
|
||||
)
|
||||
|
||||
|
|
@ -43,7 +45,8 @@ def make_create_chat_history(chat_id: str):
|
|||
theme=theme,
|
||||
summary=summary,
|
||||
keywords=keywords,
|
||||
key_point=key_point,
|
||||
original_text=original_text,
|
||||
participants=participants,
|
||||
# 对于由 dream 整理产生的新概括,时间范围优先使用工具提供的时间,否则使用当前时间占位
|
||||
start_time=start_ts,
|
||||
end_time=end_ts,
|
||||
|
|
|
|||
|
|
@ -32,8 +32,7 @@ def make_get_chat_history_detail(chat_id: str): # chat_id 目前未直接使用
|
|||
f"主题={record.theme or '无'}\n"
|
||||
f"关键词={record.keywords or '无'}\n"
|
||||
f"参与者={record.participants or '无'}\n"
|
||||
f"概括={record.summary or '无'}\n"
|
||||
f"关键信息={record.key_point or '无'}"
|
||||
f"概括={record.summary or '无'}"
|
||||
)
|
||||
logger.debug(f"[dream][tool] get_chat_history_detail 成功,预览: {result[:200].replace(chr(10), ' ')}")
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -13,13 +13,12 @@ def make_update_chat_history(chat_id: str): # chat_id 目前未直接使用,
|
|||
theme: Optional[str] = None,
|
||||
summary: Optional[str] = None,
|
||||
keywords: Optional[str] = None,
|
||||
key_point: Optional[str] = None,
|
||||
) -> str:
|
||||
"""按字段更新 chat_history(字符串字段要求 JSON 的字段须传入已序列化的字符串)"""
|
||||
try:
|
||||
logger.info(
|
||||
f"[dream][tool] 调用 update_chat_history(memory_id={memory_id}, "
|
||||
f"theme={bool(theme)}, summary={bool(summary)}, keywords={bool(keywords)}, key_point={bool(key_point)})"
|
||||
f"theme={bool(theme)}, summary={bool(summary)}, keywords={bool(keywords)})"
|
||||
)
|
||||
record = ChatHistory.get_or_none(ChatHistory.id == memory_id)
|
||||
if not record:
|
||||
|
|
@ -34,8 +33,6 @@ def make_update_chat_history(chat_id: str): # chat_id 目前未直接使用,
|
|||
data["summary"] = summary
|
||||
if keywords is not None:
|
||||
data["keywords"] = keywords
|
||||
if key_point is not None:
|
||||
data["key_point"] = key_point
|
||||
|
||||
if not data:
|
||||
return "未提供任何需要更新的字段。"
|
||||
|
|
|
|||
|
|
@ -71,16 +71,14 @@ def init_prompt():
|
|||
1. 关键词:提取与话题相关的关键词,用列表形式返回(3-10个关键词)
|
||||
2. 概括:对这段话的平文本概括(50-200字),要求:
|
||||
- 仔细地转述发生的事件和聊天内容;
|
||||
- 可以适当摘取聊天记录中的原文;
|
||||
- 重点突出事件的发展过程和结果;
|
||||
- 围绕话题这个中心进行概括。
|
||||
3. 关键信息:提取话题中的关键信息点,用列表形式返回(3-8个关键信息点),每个关键信息点应该简洁明了。
|
||||
- 提取话题中的关键信息点,关键信息点应该简洁明了。
|
||||
|
||||
请以JSON格式返回,格式如下:
|
||||
{{
|
||||
"keywords": ["关键词1", "关键词2", ...],
|
||||
"summary": "概括内容",
|
||||
"key_point": ["关键信息1", "关键信息2", ...]
|
||||
"summary": "概括内容"
|
||||
}}
|
||||
|
||||
聊天记录:
|
||||
|
|
@ -815,12 +813,38 @@ class ChatHistorySummarizer:
|
|||
original_text = "\n".join(item.messages)
|
||||
|
||||
logger.info(
|
||||
f"{self.log_prefix} 开始打包话题[{topic}] | 消息数: {len(item.messages)} | 时间范围: {start_time:.2f} - {end_time:.2f}"
|
||||
f"{self.log_prefix} 开始将聊天记录构建成记忆:[{topic}] | 消息数: {len(item.messages)} | 时间范围: {start_time:.2f} - {end_time:.2f}"
|
||||
)
|
||||
|
||||
# 使用 LLM 进行总结(基于话题名)
|
||||
success, keywords, summary, key_point = await self._compress_with_llm(original_text, topic)
|
||||
if not success:
|
||||
# 使用 LLM 进行总结(基于话题名),带重试机制
|
||||
max_retries = 3
|
||||
attempt = 0
|
||||
success = False
|
||||
keywords = []
|
||||
summary = ""
|
||||
|
||||
while attempt < max_retries:
|
||||
attempt += 1
|
||||
success, keywords, summary = await self._compress_with_llm(original_text, topic)
|
||||
|
||||
if success and keywords and summary:
|
||||
# 成功获取到有效的 keywords 和 summary
|
||||
if attempt > 1:
|
||||
logger.info(
|
||||
f"{self.log_prefix} 话题[{topic}] LLM 概括在第 {attempt} 次重试后成功"
|
||||
)
|
||||
break
|
||||
|
||||
if attempt < max_retries:
|
||||
logger.warning(
|
||||
f"{self.log_prefix} 话题[{topic}] LLM 概括失败(第 {attempt} 次尝试),准备重试"
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
f"{self.log_prefix} 话题[{topic}] LLM 概括连续 {max_retries} 次失败,放弃存储"
|
||||
)
|
||||
|
||||
if not success or not keywords or not summary:
|
||||
logger.warning(f"{self.log_prefix} 话题[{topic}] LLM 概括失败,不写入数据库")
|
||||
return
|
||||
|
||||
|
|
@ -834,14 +858,13 @@ class ChatHistorySummarizer:
|
|||
theme=topic, # 主题直接使用话题名
|
||||
keywords=keywords,
|
||||
summary=summary,
|
||||
key_point=key_point,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"{self.log_prefix} 话题[{topic}] 成功打包并存储 | 消息数: {len(item.messages)} | 参与者数: {len(participants)}"
|
||||
)
|
||||
|
||||
async def _compress_with_llm(self, original_text: str, topic: str) -> tuple[bool, List[str], str, List[str]]:
|
||||
async def _compress_with_llm(self, original_text: str, topic: str) -> tuple[bool, List[str], str]:
|
||||
"""
|
||||
使用LLM压缩聊天内容(用于单个话题的最终总结)
|
||||
|
||||
|
|
@ -850,7 +873,7 @@ class ChatHistorySummarizer:
|
|||
topic: 话题名称
|
||||
|
||||
Returns:
|
||||
tuple[bool, List[str], str, List[str]]: (是否成功, 关键词列表, 概括, 关键信息列表)
|
||||
tuple[bool, List[str], str]: (是否成功, 关键词列表, 概括)
|
||||
"""
|
||||
prompt = await global_prompt_manager.format_prompt(
|
||||
"hippo_topic_summary_prompt",
|
||||
|
|
@ -920,24 +943,24 @@ class ChatHistorySummarizer:
|
|||
|
||||
keywords = result.get("keywords", [])
|
||||
summary = result.get("summary", "")
|
||||
key_point = result.get("key_point", [])
|
||||
|
||||
if not (keywords and summary) and key_point:
|
||||
logger.warning(f"{self.log_prefix} LLM返回的JSON中缺少字段,原文\n{response}")
|
||||
# 检查必需字段是否为空
|
||||
if not keywords or not summary:
|
||||
logger.warning(f"{self.log_prefix} LLM返回的JSON中缺少必需字段,原文\n{response}")
|
||||
# 返回失败,和模型出错一样,让上层进行重试
|
||||
return False, [], ""
|
||||
|
||||
# 确保keywords和key_point是列表
|
||||
# 确保keywords是列表
|
||||
if isinstance(keywords, str):
|
||||
keywords = [keywords]
|
||||
if isinstance(key_point, str):
|
||||
key_point = [key_point]
|
||||
|
||||
return True, keywords, summary, key_point
|
||||
return True, keywords, summary
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"{self.log_prefix} LLM压缩聊天内容时出错: {e}")
|
||||
logger.error(f"{self.log_prefix} LLM响应: {response if 'response' in locals() else 'N/A'}")
|
||||
# 返回失败标志和默认值
|
||||
return False, [], "压缩失败,无法生成概括", []
|
||||
return False, [], "压缩失败,无法生成概括"
|
||||
|
||||
async def _store_to_database(
|
||||
self,
|
||||
|
|
@ -948,7 +971,6 @@ class ChatHistorySummarizer:
|
|||
theme: str,
|
||||
keywords: List[str],
|
||||
summary: str,
|
||||
key_point: Optional[List[str]] = None,
|
||||
):
|
||||
"""存储到数据库"""
|
||||
try:
|
||||
|
|
@ -968,10 +990,6 @@ class ChatHistorySummarizer:
|
|||
"count": 0,
|
||||
}
|
||||
|
||||
# 存储 key_point(如果存在)
|
||||
if key_point is not None:
|
||||
data["key_point"] = json.dumps(key_point, ensure_ascii=False)
|
||||
|
||||
# 使用db_save存储(使用start_time和chat_id作为唯一标识)
|
||||
# 由于可能有多条记录,我们使用组合键,但peewee不支持,所以使用start_time作为唯一标识
|
||||
# 但为了避免冲突,我们使用组合键:chat_id + start_time
|
||||
|
|
@ -991,7 +1009,6 @@ class ChatHistorySummarizer:
|
|||
await self._import_to_lpmm_knowledge(
|
||||
theme=theme,
|
||||
summary=summary,
|
||||
key_point=key_point,
|
||||
participants=participants,
|
||||
original_text=original_text,
|
||||
)
|
||||
|
|
@ -1007,7 +1024,6 @@ class ChatHistorySummarizer:
|
|||
self,
|
||||
theme: str,
|
||||
summary: str,
|
||||
key_point: Optional[List[str]],
|
||||
participants: List[str],
|
||||
original_text: str,
|
||||
):
|
||||
|
|
@ -1017,7 +1033,6 @@ class ChatHistorySummarizer:
|
|||
Args:
|
||||
theme: 话题主题
|
||||
summary: 概括内容
|
||||
key_point: 关键信息点列表
|
||||
participants: 参与者列表
|
||||
original_text: 原始文本(可能很长,需要截断)
|
||||
"""
|
||||
|
|
@ -1025,7 +1040,8 @@ class ChatHistorySummarizer:
|
|||
from src.chat.knowledge.lpmm_ops import lpmm_ops
|
||||
|
||||
# 构造要导入的文本内容
|
||||
# 格式:主题 + 概括 + 关键信息点 + 参与者信息
|
||||
# 格式:主题 + 概括 + 参与者信息 + 原始内容摘要
|
||||
# 注意:使用单换行符连接,确保整个内容作为一段导入,不被LPMM分段
|
||||
content_parts = []
|
||||
|
||||
# 1. 话题主题
|
||||
|
|
@ -1036,17 +1052,12 @@ class ChatHistorySummarizer:
|
|||
if summary:
|
||||
content_parts.append(f"概括:{summary}")
|
||||
|
||||
# 3. 关键信息点
|
||||
if key_point:
|
||||
key_points_text = "、".join(key_point)
|
||||
content_parts.append(f"关键信息:{key_points_text}")
|
||||
|
||||
# 4. 参与者信息
|
||||
# 3. 参与者信息
|
||||
if participants:
|
||||
participants_text = "、".join(participants)
|
||||
content_parts.append(f"参与者:{participants_text}")
|
||||
|
||||
# 5. 原始文本摘要(如果原始文本太长,只取前500字)
|
||||
# 4. 原始文本摘要(如果原始文本太长,只取前500字)
|
||||
if original_text:
|
||||
# 截断原始文本,避免过长
|
||||
max_original_length = 500
|
||||
|
|
@ -1056,8 +1067,9 @@ class ChatHistorySummarizer:
|
|||
else:
|
||||
content_parts.append(f"原始内容:{original_text}")
|
||||
|
||||
# 将所有部分合并为一个段落(用双换行分隔,符合lpmm_ops.add_content的格式要求)
|
||||
content_to_import = "\n\n".join(content_parts)
|
||||
# 将所有部分合并为一个完整段落(使用单换行符,避免被LPMM分段)
|
||||
# LPMM使用 \n\n 作为段落分隔符,所以这里使用 \n 确保不会被分段
|
||||
content_to_import = "\n".join(content_parts)
|
||||
|
||||
if not content_to_import.strip():
|
||||
logger.warning(f"{self.log_prefix} 聊天历史总结内容为空,跳过导入知识库")
|
||||
|
|
|
|||
|
|
@ -463,18 +463,6 @@ async def get_chat_history_detail(chat_id: str, memory_ids: str) -> str:
|
|||
if record.summary:
|
||||
result_parts.append(f"概括:{record.summary}")
|
||||
|
||||
# 添加关键信息点
|
||||
if record.key_point:
|
||||
try:
|
||||
key_point_data = (
|
||||
json.loads(record.key_point) if isinstance(record.key_point, str) else record.key_point
|
||||
)
|
||||
if isinstance(key_point_data, list) and key_point_data:
|
||||
key_point_str = "\n".join([f" - {str(kp)}" for kp in key_point_data])
|
||||
result_parts.append(f"关键信息点:\n{key_point_str}")
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
|
||||
results.append("\n".join(result_parts))
|
||||
|
||||
if not results:
|
||||
|
|
|
|||
Loading…
Reference in New Issue