MaiBot/src/chat/memory_system/Memory_chest.py

import json
import re

from src.llm_models.utils_model import LLMRequest
from src.config.config import model_config
from src.common.database.database_model import MemoryChest as MemoryChestModel
from src.common.logger import get_logger
from src.config.config import global_config
from src.plugin_system.apis.message_api import build_readable_messages
import time
from src.plugin_system.apis.message_api import get_raw_msg_by_timestamp_with_chat
from json_repair import repair_json

logger = get_logger("memory_chest")

class MemoryChest:
    def __init__(self):

        self.LLMRequest = LLMRequest(
            model_set=model_config.model_task_config.utils_small,
            request_type="memory_chest",
        )

        self.LLMRequest_build = LLMRequest(
            model_set=model_config.model_task_config.utils,
            request_type="memory_chest_build",
        )

        self.memory_build_threshold = 30
        self.memory_size_limit = global_config.memory.max_memory_size

        self.running_content_list = {}  # {chat_id: {"content": running_content, "last_update_time": timestamp}}
        self.fetched_memory_list = []  # [(chat_id, (question, answer, timestamp)), ...]

    async def build_running_content(self, chat_id: str = None) -> str:
        """
        构建记忆仓库的运行内容

        Args:
            message_str: 消息内容
            chat_id: 聊天ID，用于提取对应的运行内容

        Returns:
            str: 构建后的运行内容
        """
        # 检查是否需要更新：上次更新时间和现在时间的消息数量大于30
        if chat_id not in self.running_content_list:
            self.running_content_list[chat_id] = {
                "content": "",
                "last_update_time": time.time()
            }

        should_update = True
        if chat_id and chat_id in self.running_content_list:
            last_update_time = self.running_content_list[chat_id]["last_update_time"]
            current_time = time.time()
            # 使用message_api获取消息数量
            message_list =  get_raw_msg_by_timestamp_with_chat(
                timestamp_start=last_update_time,
                timestamp_end=current_time,
                chat_id=chat_id,
                limit=global_config.chat.max_context_size * 2,
            )

            new_messages_count = len(message_list)
            should_update = new_messages_count > self.memory_build_threshold
            logger.info(f"chat_id {chat_id} 自上次更新后有 {new_messages_count} 条新消息，{'需要' if should_update else '不需要'}更新")


        if should_update:
            # 如果有chat_id，先提取对应的running_content
            message_str = build_readable_messages(
                message_list,
                replace_bot_name=True,
                timestamp_mode="relative",
                read_mark=0.0,
                show_actions=True,
            )


            current_running_content = ""
            if chat_id and chat_id in self.running_content_list:
                current_running_content = self.running_content_list[chat_id]["content"]

            prompt = f"""
以下是你的记忆内容：
{current_running_content}

请将下面的新聊天记录内的有用的信息，添加到你的记忆中
请主要关注概念和知识，而不是聊天的琐事
如果有表情包，仅在意表情包对上下文的影响，不要在意表情包本身
如果有图片，尽在意内容，不要在意图片的名称和编号
记忆为一段纯文本，逻辑清晰，指出事件，概念的含义，并说明关系
请输出添加后的记忆内容，不要输出其他内容：
{message_str}
"""

            if global_config.debug.show_prompt:
                logger.info(f"记忆仓库构建运行内容 prompt: {prompt}")
            else:
                logger.debug(f"记忆仓库构建运行内容 prompt: {prompt}")

            running_content, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(prompt)

            print(f"记忆仓库构建运行内容: {running_content}")

            # 如果有chat_id，更新对应的running_content
            if chat_id and running_content:
                self.running_content_list[chat_id] = {
                    "content": running_content,
                    "last_update_time": time.time()
                }

                # 检查running_content长度是否大于500
                if len(running_content) > self.memory_size_limit:
                    await self._save_to_database_and_clear(chat_id, running_content)


            return running_content


    def get_all_titles(self) -> list[str]:
        """
        获取记忆仓库中的所有标题

        Returns:
            list: 包含所有标题的列表
        """
        try:
            # 查询所有记忆记录的标题
            titles = []
            for memory in MemoryChestModel.select():
                if memory.title:
                    titles.append(memory.title)
            return titles
        except Exception as e:
            print(f"获取记忆标题时出错: {e}")
            return []

    async def get_answer_by_question(self, chat_id: str = "", question: str = "") -> str:
        """
        根据问题获取答案
        """
        title = await self.select_title_by_question(question)

        if not title:
            return ""

        for memory in MemoryChestModel.select():
            if memory.title == title:
                content =  memory.content

        prompt = f"""
{content}

请根据问题：{question}
在上方内容中，提取相关信息的原文并输出，请务必提取上面原文，不要输出其他内容：
"""

        if global_config.debug.show_prompt:
            logger.info(f"记忆仓库获取答案 prompt: {prompt}")
        else:
            logger.debug(f"记忆仓库获取答案 prompt: {prompt}")

        answer, (reasoning_content, model_name, tool_calls) = await self.LLMRequest.generate_response_async(prompt)


        logger.info(f"记忆仓库获取答案: {answer}")

        # 将问题和答案存到fetched_memory_list
        if chat_id and answer:
            self.fetched_memory_list.append((chat_id, (question, answer, time.time())))

            # 清理fetched_memory_list
            self._cleanup_fetched_memory_list()

        return answer

    def get_chat_memories_as_string(self, chat_id: str) -> str:
        """
        获取某个chat_id的所有记忆，并构建成字符串

        Args:
            chat_id: 聊天ID

        Returns:
            str: 格式化的记忆字符串，格式：问题：xxx,答案:xxxxx\n问题：xxx,答案:xxxxx\n...
        """
        try:
            memories = []

            # 从fetched_memory_list中获取该chat_id的所有记忆
            for cid, (question, answer, timestamp) in self.fetched_memory_list:
                if cid == chat_id:
                    memories.append(f"问题：{question},答案:{answer}")

            # 按时间戳排序（最新的在后面）
            memories.sort()

            # 用换行符连接所有记忆
            result = "\n".join(memories)

            logger.info(f"chat_id {chat_id} 共有 {len(memories)} 条记忆")
            return result

        except Exception as e:
            logger.error(f"获取chat_id {chat_id} 的记忆时出错: {e}")
            return ""


    async def select_title_by_question(self, question: str) -> str:
        """
        根据消息内容选择最匹配的标题

        Args:
            question: 问题

        Returns:
            str: 选择的标题
        """
        # 获取所有标题并构建格式化字符串
        titles = self.get_all_titles()
        formatted_titles = ""
        for title in titles:
            formatted_titles += f"{title}\n"

        prompt = f"""
所有主题：
{formatted_titles}

请根据以下问题，选择一个能够回答问题的主题：
问题：{question}
请你输出主题，不要输出其他内容，完整输出主题名：
"""

        if global_config.debug.show_prompt:
            logger.info(f"记忆仓库选择标题 prompt: {prompt}")
        else:
            logger.debug(f"记忆仓库选择标题 prompt: {prompt}")


        title, (reasoning_content, model_name, tool_calls) = await self.LLMRequest.generate_response_async(prompt)

        # 根据 title 获取 titles 里的对应项
        titles = self.get_all_titles()
        selected_title = None

        # 查找完全匹配的标题
        for t in titles:
            if t == title:
                selected_title = t
                break


        logger.info(f"记忆仓库选择标题: {selected_title}")

        return selected_title

    def _cleanup_fetched_memory_list(self):
        """
        清理fetched_memory_list，移除超过10分钟的记忆和超过10条的最旧记忆
        """
        try:
            current_time = time.time()
            ten_minutes_ago = current_time - 600  # 10分钟 = 600秒

            # 移除超过10分钟的记忆
            self.fetched_memory_list = [
                (chat_id, (question, answer, timestamp))
                for chat_id, (question, answer, timestamp) in self.fetched_memory_list
                if timestamp > ten_minutes_ago
            ]

            # 如果记忆条数超过10条，移除最旧的5条
            if len(self.fetched_memory_list) > 10:
                # 按时间戳排序，移除最旧的5条
                self.fetched_memory_list.sort(key=lambda x: x[1][2])  # 按timestamp排序
                self.fetched_memory_list = self.fetched_memory_list[5:]  # 保留最新的5条

            logger.debug(f"fetched_memory_list清理后，当前有 {len(self.fetched_memory_list)} 条记忆")

        except Exception as e:
            logger.error(f"清理fetched_memory_list时出错: {e}")

    async def _save_to_database_and_clear(self, chat_id: str, content: str):
        """
        生成标题，保存到数据库，并清空对应chat_id的running_content

        Args:
            chat_id: 聊天ID
            content: 要保存的内容
        """
        try:
            # 生成标题
            title_prompt = f"""
请为以下内容生成一个描述全面的标题，要求描述内容的主要概念和事件：
{content}

请只输出标题，不要输出其他内容：
"""

            if global_config.debug.show_prompt:
                logger.info(f"记忆仓库生成标题 prompt: {title_prompt}")
            else:
                logger.debug(f"记忆仓库生成标题 prompt: {title_prompt}")

            title, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(title_prompt)

            if title:
                # 保存到数据库
                MemoryChestModel.create(
                    title=title.strip(),
                    content=content
                )
                logger.info(f"已保存记忆仓库内容，标题: {title.strip()}, chat_id: {chat_id}")

                # 清空对应chat_id的running_content
                if chat_id in self.running_content_list:
                    del self.running_content_list[chat_id]
                    logger.info(f"已清空chat_id {chat_id} 的running_content")
            else:
                logger.warning(f"生成标题失败，chat_id: {chat_id}")

        except Exception as e:
            logger.error(f"保存记忆仓库内容时出错: {e}")

    async def choose_merge_target(self, memory_title: str) -> list[str]:
        """
        选择与给定记忆标题相关的记忆目标

        Args:
            memory_title: 要匹配的记忆标题

        Returns:
            list[str]: 选中的记忆内容列表
        """
        try:
            all_titles = self.get_all_titles()
            content = ""
            for title in all_titles:
                content += f"{title}\n"

            prompt = f"""
所有记忆列表
{content}

请根据以上记忆列表，选择一个与"{memory_title}"相关的记忆，用json输出：
可以选择多个相关的记忆，但最多不超过5个
例如：
{{
    "selected_title": "选择的相关记忆标题"
}},
{{
    "selected_title": "选择的相关记忆标题"
}},
{{
    "selected_title": "选择的相关记忆标题"
}}
...
请输出JSON格式，不要输出其他内容：
"""
            if global_config.debug.show_prompt:
                logger.info(f"选择合并目标 prompt: {prompt}")
            else:
                logger.debug(f"选择合并目标 prompt: {prompt}")

            merge_target_response, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(prompt)

            # 解析JSON响应
            selected_titles = self._parse_merge_target_json(merge_target_response)

            # 根据标题查找对应的内容
            selected_contents = self._get_memories_by_titles(selected_titles)

            logger.info(f"选择合并目标结果: {len(selected_contents)} 条记忆")
            return selected_contents

        except Exception as e:
            logger.error(f"选择合并目标时出错: {e}")
            return []

    def _get_memories_by_titles(self, titles: list[str]) -> list[str]:
        """
        根据标题列表查找对应的记忆内容

        Args:
            titles: 记忆标题列表

        Returns:
            list[str]: 记忆内容列表
        """
        try:
            from src.common.database.database_model import MemoryChest as MemoryChestModel

            contents = []
            for title in titles:
                if not title or not title.strip():
                    continue

                # 在数据库中查找匹配的记忆
                try:
                    memory_record = MemoryChestModel.select().where(MemoryChestModel.title == title.strip()).first()
                    if memory_record:
                        contents.append(memory_record.content)
                        logger.debug(f"找到记忆: {memory_record.title}")
                    else:
                        logger.warning(f"未找到标题为 '{title}' 的记忆")
                except Exception as e:
                    logger.error(f"查找标题 '{title}' 的记忆时出错: {e}")
                    continue

            logger.info(f"成功找到 {len(contents)} 条记忆内容")
            return contents

        except Exception as e:
            logger.error(f"根据标题查找记忆时出错: {e}")
            return []

    def _parse_merge_target_json(self, json_text: str) -> list[str]:
        """
        解析choose_merge_target生成的JSON响应

        Args:
            json_text: LLM返回的JSON文本

        Returns:
            list[str]: 解析出的记忆标题列表
        """
        try:
            # 清理JSON文本，移除可能的额外内容
            repaired_content = repair_json(json_text)

            # 尝试直接解析JSON
            try:
                parsed_data = json.loads(repaired_content)
                if isinstance(parsed_data, list):
                    # 如果是列表，提取selected_title字段
                    titles = []
                    for item in parsed_data:
                        if isinstance(item, dict) and "selected_title" in item:
                            titles.append(item["selected_title"])
                    return titles
                elif isinstance(parsed_data, dict) and "selected_title" in parsed_data:
                    # 如果是单个对象
                    return [parsed_data["selected_title"]]
            except json.JSONDecodeError:
                pass

            # 如果直接解析失败，尝试提取JSON对象
            # 查找所有包含selected_title的JSON对象
            pattern = r'\{[^}]*"selected_title"[^}]*\}'
            matches = re.findall(pattern, repaired_content)

            titles = []
            for match in matches:
                try:
                    obj = json.loads(match)
                    if "selected_title" in obj:
                        titles.append(obj["selected_title"])
                except json.JSONDecodeError:
                    continue

            if titles:
                return titles

            logger.warning(f"无法解析JSON响应: {json_text[:200]}...")
            return []

        except Exception as e:
            logger.error(f"解析合并目标JSON时出错: {e}")
            return []

    async def merge_memory(self,memory_list: list[str]) -> tuple[str, str]:
        """
        合并记忆
        """
        try:
            content = ""
            for memory in memory_list:
                content += f"{memory.content}\n"

            prompt = f"""
以下是多段记忆内容，请将它们合并成一段记忆：
{content}

请主要关注概念和知识，而不是聊天的琐事
记忆为一段纯文本，逻辑清晰，指出事件，概念的含义，并说明关系
请输出添加后的记忆内容，不要输出其他内容：
"""

            if global_config.debug.show_prompt:
                logger.info(f"合并记忆 prompt: {prompt}")
            else:
                logger.debug(f"合并记忆 prompt: {prompt}")

            merged_memory, (reasoning_content, model_name, tool_calls) = await self.LLMRequest_build.generate_response_async(prompt)

            return merged_memory
        except Exception as e:
            logger.error(f"合并记忆时出错: {e}")


global_memory_chest = MemoryChest()