From e9538096e06c2645f92aa9407658cdc899254690 Mon Sep 17 00:00:00 2001 From: MuWinds Date: Fri, 14 Mar 2025 09:06:27 +0800 Subject: [PATCH] =?UTF-8?q?UpdateL=E5=8E=BB=E9=87=8D=E6=9C=BA=E5=88=B6?= =?UTF-8?q?=E6=94=B9=E8=BF=9B=EF=BC=8C=E6=96=B0=E5=A2=9Ememorized=E5=B1=9E?= =?UTF-8?q?=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/plugins/chat/utils.py | 50 +++++++++++-------- src/plugins/memory_system/memory.py | 34 ++----------- .../memory_system/memory_manual_build.py | 4 ++ 3 files changed, 35 insertions(+), 53 deletions(-) diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py index fa6a7649..1636f0c1 100644 --- a/src/plugins/chat/utils.py +++ b/src/plugins/chat/utils.py @@ -78,41 +78,47 @@ def calculate_information_content(text): def get_closest_chat_from_db(length: int, timestamp: str): - """从数据库中获取最接近指定时间戳的聊天记录 + """从数据库中获取最接近指定时间戳的聊天记录,并记录读取次数 - Args: - length: 要获取的消息数量 - timestamp: 时间戳 - Returns: - list: 消息记录列表,每个记录包含时间和文本信息 + list: 消息记录字典列表,每个字典包含消息内容和时间信息 """ chat_records = [] closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) - + # 如果memorized属性不存在就加一个memoried并设置初始值为0 + db.messages.update_many( + {"memorized": {"$exists": False}}, + {"$set": {"memorized": 0}} + ) if closest_record: closest_time = closest_record['time'] - chat_id = closest_record['chat_id'] # 获取chat_id - # 获取该时间戳之后的length条消息,保持相同的chat_id - chat_records = list(db.messages.find( - { - "time": {"$gt": closest_time}, - "chat_id": chat_id # 添加chat_id过滤 - } + chat_id = closest_record['chat_id'] + # 获取该时间戳之后的length条消息,且chat_id相同 + records = list(db.messages.find( + {"time": {"$gt": closest_time}, "chat_id": chat_id} ).sort('time', 1).limit(length)) - # 转换记录格式 - formatted_records = [] - for record in chat_records: - formatted_records.append({ - 'message_id': record['_id'], + # 更新每条消息的memorized属性 + for record in records: + current_memorized = record["memorized"] + if current_memorized > 1: + print("消息已读取1次,跳过") + continue + + # 更新memorized值 + db.messages.update_one( + {"_id": record["_id"]}, + {"$set": {"memorized": current_memorized + 1}} + ) + + # 添加到记录列表中 + chat_records.append({ 'time': record["time"], 'chat_id': record["chat_id"], - 'detailed_plain_text': record.get("detailed_plain_text", "") # 添加文本内容 + 'detailed_plain_text': record["detailed_plain_text"], }) - return formatted_records - return [] + return chat_records async def get_recent_group_messages(chat_id:str, limit: int = 12) -> list: diff --git a/src/plugins/memory_system/memory.py b/src/plugins/memory_system/memory.py index 7354e29a..cce71ada 100644 --- a/src/plugins/memory_system/memory.py +++ b/src/plugins/memory_system/memory.py @@ -192,52 +192,24 @@ class Hippocampus: """ current_timestamp = datetime.datetime.now().timestamp() chat_samples = [] - added_ids = set() # 全局已添加消息ID集合,用于去重 # 短期:1h 中期:4h 长期:24h for _ in range(time_frequency.get('near')): random_time = current_timestamp - random.randint(1, 3600) messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time) if messages: - # 过滤已存在的消息 - filtered = [] - for msg in messages: - msg_id = msg.get("message_id") # 假设消息中存在唯一标识字段 - if msg_id not in added_ids: - filtered.append(msg) - added_ids.add(msg_id) - - if filtered: - chat_samples.append(filtered) + chat_samples.append(messages) for _ in range(time_frequency.get('mid')): random_time = current_timestamp - random.randint(3600, 3600 * 4) messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time) if messages: - # 过滤已存在的消息 - filtered = [] - for msg in messages: - msg_id = msg.get("message_id") # 假设消息中存在唯一标识字段 - if msg_id not in added_ids: - filtered.append(msg) - added_ids.add(msg_id) - - if filtered: - chat_samples.append(filtered) + chat_samples.append(messages) for _ in range(time_frequency.get('far')): random_time = current_timestamp - random.randint(3600 * 4, 3600 * 24) messages = get_closest_chat_from_db(length=chat_size, timestamp=random_time) if messages: - # 过滤已存在的消息 - filtered = [] - for msg in messages: - msg_id = msg.get("message_id") # 假设消息中存在唯一标识字段 - if msg_id not in added_ids: - filtered.append(msg) - added_ids.add(msg_id) - - if filtered: - chat_samples.append(filtered) + chat_samples.append(messages) return chat_samples async def memory_compress(self, messages: list, compress_rate=0.1): diff --git a/src/plugins/memory_system/memory_manual_build.py b/src/plugins/memory_system/memory_manual_build.py index 2d16998e..447f75f3 100644 --- a/src/plugins/memory_system/memory_manual_build.py +++ b/src/plugins/memory_system/memory_manual_build.py @@ -56,6 +56,10 @@ def get_closest_chat_from_db(length: int, timestamp: str): list: 消息记录字典列表,每个字典包含消息内容和时间信息 """ chat_records = [] + db.messages.update_many( + {"memorized": {"$exists": False}}, + {"$set": {"memorized": 0}} + ) closest_record = db.messages.find_one({"time": {"$lte": timestamp}}, sort=[('time', -1)]) if closest_record and closest_record.get('memorized', 0) < 4: