新数据库模型

pull/1496/head
UnCLAS-Prommer 2026-01-31 21:59:08 +08:00
parent f44598a331
commit 17aff36bec
No known key found for this signature in database
4 changed files with 613 additions and 715 deletions

View File

@ -33,4 +33,72 @@ version 0.3.0 - 2026-01-11
### ModelConfig
- [x] 迁移了原来在`ModelConfig`中的方法到一个单独的临时类`TempMethodsLLMUtils`中
- [x] get_model_info
- [x] get_provider
- [x] get_provider
## 数据库模型设计
仅保留要点说明
### General Modifications
- [x] 所有项目增加自增编号主键`id`
- [x] 统一使用了SQLModel作为基类
- [x] 复杂类型使用JSON格式存储
- [x] 所有时间戳字段统一命名为`timestamp`
### 消息模型 MaiMessage
- [x] 自增编号主键`id`
- [x] 消息元数据
- [x] 消息id`message_id`
- [x] 消息时间戳`time`
- [x] 平台名`platform`
- [x] 用户元数据
- [x] 用户id`user_id`
- [x] 用户昵称`user_nickname`
- [x] 用户备注名`user_cardname`
- [x] 用户平台`user_platform`
- [x] 群组元数据
- [x] 群组id`group_id`
- [x] 群组名称`group_name`
- [x] 群组平台`group_platform`
- [x] 被提及/at字段
- [x] 是否被提及`is_mentioned`
- [x] 是否被at`is_at`
- [x] 消息内容
- [x] 原始消息内容`raw_content`base64编码存储
- [x] 处理后的纯文本内容`processed_plain_text`
- [x] 真正放入Prompt的消息内容`display_message`
- [x] 消息内部元数据
- [x] 聊天会话id`session_id`
- [x] 回复的消息id`reply_to`
- [x] 是否为表情包消息`is_emoji`
- [x] 是否为图片消息`is_picture`
- [x] 是否为命令消息`is_command`
- [x] 是否为通知消息`is_notify`
- [x] 其他配置`additional_config`JSON格式存储
### 模型使用情况 ModelUsage
- [x] 模型相关信息
- [x] 请求相关信息
- [x] Token使用情况
### 图片数据模型
- [x] 图片元信息
- [x] 图片哈希值`image_hash`,使用`sha256`同时作为图片唯一ID
- [x] 表情包的情感标签`emotion`
- [x] 是否已经被注册`is_registered`
- [x] 是否被手动禁用`is_banned`
### 动作记录模型 ActionRecord
### 命令执行记录模型 CommandRecord
新增此记录
### 在线时间记录模型 OnlineTime
### 表达方式模型
### 黑话模型
- [x] 重命名`inference_content_only`为`inference_with_content_only`
### 聊天记录模型
- [x] 重命名`original_text`为`original_message`
- [x] 重命名`forget_times`为`query_forget_count`
### 细枝末节
- [ ] 统一所有的`stream_id`和`chat_id`命名为`session_id`
- [ ] 更换Hash方式为`sha256`
## 一些细枝末节的东西
- [ ] 将`stream_id`和`chat_id`统一命名为`session_id`
- [ ] 映射表
- [ ] `platform_group_user_session_id_map` `平台_群组_用户`-`会话ID` 映射表

View File

@ -0,0 +1,127 @@
from pathlib import Path
import ast
import subprocess
import sys
base_file_path = Path(__file__).parent.parent.absolute().resolve() / "src" / "common" / "database" / "database_model.py"
target_file_path = (
Path(__file__).parent.parent.absolute().resolve() / "src" / "common" / "database" / "database_datamodel.py"
)
with open(base_file_path, "r", encoding="utf-8") as f:
source_text = f.read()
source_lines = source_text.splitlines()
try:
tree = ast.parse(source_text)
except SyntaxError as e:
raise e
code_lines = [
"from typing import Optional",
"from pydantic import BaseModel",
"from datetime import datetime",
"from .database_model import ModelUser, ImageType",
]
def src(node):
seg = ast.get_source_segment(source_text, node)
return seg if seg is not None else ast.unparse(node)
for node in tree.body:
if not isinstance(node, ast.ClassDef):
continue
# 判断是否 SQLModel 且 table=True
has_sqlmodel = any(
(isinstance(b, ast.Name) and b.id == "SQLModel") or (isinstance(b, ast.Attribute) and b.attr == "SQLModel")
for b in node.bases
)
has_table_kw = any(
(kw.arg == "table" and isinstance(kw.value, ast.Constant) and kw.value.value is True) for kw in node.keywords
)
if not (has_sqlmodel and has_table_kw):
continue
class_name = node.name
code_lines.append("")
code_lines.append(f"class {class_name}(BaseModel):")
fields_added = 0
for item in node.body:
# 跳过 __tablename__ 等
if isinstance(item, ast.Assign):
if len(item.targets) != 1 or not isinstance(item.targets[0], ast.Name):
continue
name = item.targets[0].id
if name == "__tablename__":
continue
value_src = src(item.value)
line = f" {name} = {value_src}"
fields_added += 1
lineno = getattr(item, "lineno", None)
elif isinstance(item, ast.AnnAssign):
# 注解赋值
if not isinstance(item.target, ast.Name):
continue
name = item.target.id
ann = src(item.annotation) if item.annotation is not None else None
if item.value is None:
line = f" {name}: {ann}" if ann else f" {name}"
elif isinstance(item.value, ast.Call) and (
(isinstance(item.value.func, ast.Name) and item.value.func.id == "Field")
or (isinstance(item.value.func, ast.Attribute) and item.value.func.attr == "Field")
):
default_kw = next((kw for kw in item.value.keywords if kw.arg == "default"), None)
if default_kw is None:
# 没有 default保留类型但不赋值
line = f" {name}: {ann}" if ann else f" {name}"
else:
default_src = src(default_kw.value)
line = f" {name}: {ann} = {default_src}"
else:
value_src = src(item.value)
line = f" {name}: {ann} = {value_src}" if ann else f" {name} = {value_src}"
fields_added += 1
lineno = getattr(item, "lineno", None)
else:
continue
# 提取同一行的行内注释作为字段说明(如果存在)
comment = None
if lineno is not None:
src_line = source_lines[lineno - 1]
if "#" in src_line:
# 取第一个 #
comment = src_line.split("#", 1)[1].strip()
# 避免三引号冲突
comment = comment.replace('"""', '\\"""')
code_lines.append(line)
if comment:
code_lines.append(f' """{comment}"""')
else:
print(f"Warning: No comment found for field '{name}' in class '{class_name}'.")
if fields_added == 0:
code_lines.append(" pass")
with open(target_file_path, "w", encoding="utf-8") as f:
f.write("\n".join(code_lines) + "\n")
try:
result = subprocess.run(["ruff", "format", str(target_file_path)], capture_output=True, text=True)
except FileNotFoundError:
print("ruff 未找到,请安装 ruff 并确保其在 PATH 中例如pip install ruff", file=sys.stderr)
sys.exit(127)
# 输出 ruff 的 stdout/stderr
if result.stdout:
print(result.stdout, end="")
if result.stderr:
print(result.stderr, file=sys.stderr, end="")
if result.returncode != 0:
print(f"ruff 检查失败,退出码:{result.returncode}", file=sys.stderr)
sys.exit(result.returncode)

View File

@ -0,0 +1,246 @@
from typing import Optional
from pydantic import BaseModel
from datetime import datetime
from .database_model import ModelUser, ImageType
class MaiMessage(BaseModel):
id: Optional[int] = None
"""自增主键"""
message_id: str
"""消息id"""
time: float
"""消息时间,单位为秒"""
platform: str
"""顶层平台字段"""
user_id: str
"""发送者用户id"""
user_nickname: str
"""发送者昵称"""
user_cardname: Optional[str] = None
"""发送者备注名"""
user_platform: Optional[str] = None
"""发送者平台"""
group_id: Optional[str] = None
"""群组id"""
group_name: Optional[str] = None
"""群组名称"""
group_platform: Optional[str] = None
"""群组平台"""
is_mentioned: bool = False
"""被提及"""
is_at: bool = False
"""被at"""
session_id: str
"""聊天会话id"""
reply_to: Optional[str] = None
"""回复的消息id"""
is_emoji: bool = False
"""是否为表情包消息"""
is_picture: bool = False
"""是否为图片消息"""
is_command: bool = False
"""是否为命令"""
is_notify: bool = False
"""是否为通知消息"""
raw_content: str
"""base64编码的原始消息内容"""
processed_plain_text: str
"""平面化处理后的纯文本消息"""
display_message: str
"""显示的消息内容被放入Prompt"""
additional_config: Optional[str] = None
"""额外配置JSON格式存储"""
class ModelUsage(BaseModel):
id: Optional[int] = None
"""自增主键"""
model_name: str
"""模型实际名称(供应商名称)"""
model_assign_name: Optional[str] = None
"""模型分配名称(用户自定义名称)"""
model_api_provider_name: str
"""模型API供应商名称"""
endpoint: Optional[str] = None
"""模型API的具体endpoint"""
user_type: ModelUser = ModelUser.SYSTEM
"""模型使用者类型"""
request_type: str
"""内部请求类型,记录哪种模块使用了此模型"""
time_cost: float
"""本次请求耗时,单位秒"""
timestamp: datetime
"""请求时间戳"""
prompt_tokens: int
"""提示词令牌数"""
completion_tokens: int
"""完成词令牌数"""
total_tokens: int
"""总令牌数"""
cost: float
"""本次请求的费用,单位元"""
class Images(BaseModel):
id: Optional[int] = None
"""自增主键"""
image_hash: str = ""
"""图片哈希使用sha256哈希值亦作为图片唯一ID"""
description: str
"""图片的描述"""
full_path: str
"""文件的完整路径 (包括文件名)"""
image_type: ImageType = ImageType.EMOJI
emotion: Optional[str] = None
"""表情包的情感标签,逗号分隔"""
query_count: int = 0
"""被查询次数"""
is_registered: bool = False
"""是否已经注册"""
is_banned: bool = False
"""被手动禁用"""
record_time: datetime
"""记录时间(被创建的时间)"""
register_time: Optional[datetime] = None
"""注册时间(被注册为可用表情包的时间)"""
vlm_processed: bool = False
"""是否已经过VLM处理"""
class ActionRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
action_id: str
"""动作ID"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
action_name: str
"""动作名称"""
action_reasoning: Optional[str] = None
"""动作推理过程"""
action_data: Optional[str] = None
"""动作数据JSON格式存储"""
action_builtin_prompt: Optional[str] = None
"""内置动作提示"""
action_display_prompt: Optional[str] = None
"""最终输入到Prompt的内容"""
class CommandRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
command_name: str
"""命令名称"""
command_data: Optional[str] = None
"""命令数据JSON格式存储"""
command_result: Optional[str] = None
"""命令执行结果"""
class OnlineTime(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""时间戳"""
duration_minutes: int
"""时长,单位秒"""
start_timestamp: datetime
"""上线时间"""
end_timestamp: datetime
"""下线时间"""
class Expression(BaseModel):
id: Optional[int] = None
"""自增主键"""
situation: str
"""情景"""
style: str
"""风格"""
context: str
"""上下文"""
up_content: str
content_list: str
"""内容列表JSON格式存储"""
count: int = 0
"""使用次数"""
last_active_time: datetime
"""上次使用时间"""
create_time: datetime
"""创建时间"""
session_id: Optional[str] = None
"""会话ID区分是否为全局表达方式"""
class Jargon(BaseModel):
id: Optional[int] = None
"""自增主键"""
content: str
"""黑话内容"""
raw_content: Optional[str] = None
"""原始内容,未处理的黑话内容"""
meaning: str
"""黑话含义"""
session_id: Optional[str] = None
"""会话ID区分是否为全局黑话"""
count: int = 0
"""使用次数"""
is_jargon: Optional[bool] = True
"""是否为黑话False表示为白话"""
is_complete: bool = False
"""是否为已经完成全部推断count > 100后不再推断"""
inference_with_context: Optional[str] = None
"""带上下文的推断结果JSON格式"""
inference_with_content_only: Optional[str] = None
"""只基于词条的推断结果JSON格式"""
class ChatHistory(BaseModel):
id: Optional[int] = None
"""自增主键"""
session_id: str
"""聊天会话ID"""
start_timestamp: datetime
"""聊天开始时间"""
end_timestamp: datetime
"""聊天结束时间"""
query_count: int = 0
"""被检索次数"""
query_forget_count: int = 0
"""被遗忘检查的次数"""
original_messages: str
"""对话原文"""
participants: str
"""参与者列表JSON格式存储"""
theme: str
"""对话主题:这段对话的主要内容,一个简短的标题"""
keywords: str
"""关键词这段对话的关键词JSON格式存储"""
summary: str
"""概括:对这段话的平文本概括"""
class ThinkingQuestion(BaseModel):
id: Optional[int] = None
"""自增主键"""
question: str
"""问题内容"""
context: Optional[str] = None
"""上下文"""
found_answer: bool = False
"""是否找到答案"""
answer: Optional[str] = None
"""问题答案"""
thinking_steps: Optional[str] = None
"""思考步骤JSON格式存储"""
created_timestamp: datetime
"""创建时间"""
updated_timestamp: datetime
"""最后更新时间"""

View File

@ -1,778 +1,235 @@
from peewee import Model, DoubleField, IntegerField, BooleanField, TextField, FloatField, DateTimeField
from .database import db
import datetime
from src.common.logger import get_logger
logger = get_logger("database_model")
# 请在此处定义您的数据库实例。
# 您需要取消注释并配置适合您的数据库的部分。
# 例如,对于 SQLite:
# db = SqliteDatabase('MaiBot.db')
#
# 对于 PostgreSQL:
# db = PostgresqlDatabase('your_db_name', user='your_user', password='your_password',
# host='localhost', port=5432)
#
# 对于 MySQL:
# db = MySQLDatabase('your_db_name', user='your_user', password='your_password',
# host='localhost', port=3306)
from typing import Optional
from sqlalchemy import Column, Float, Enum as SQLEnum
from sqlmodel import SQLModel, Field
from enum import Enum
from datetime import datetime
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。
# 这允许您在一个地方为所有模型指定数据库。
class ModelUser(str, Enum):
SYSTEM = "system"
PLUGIN = "plugin"
class BaseModel(Model):
class Meta:
# 将下面的 'db' 替换为您实际的数据库实例变量名。
database = db # 例如: database = my_actual_db_instance
pass # 在用户定义数据库实例之前,此处为占位符
class ImageType(str, Enum):
EMOJI = "emoji"
IMAGE = "image"
class ChatStreams(BaseModel):
"""
用于存储流式记录数据的模型类似于提供的 MongoDB 结构
"""
class MaiMessage(SQLModel, table=True):
__tablename__ = "mai_messages" # type: ignore
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# stream_id: "a544edeb1a9b73e3e1d77dff36e41264"
# 假设 stream_id 是唯一的,并为其创建索引以提高查询性能。
stream_id = TextField(unique=True, index=True)
# 消息元数据
message_id: str = Field(index=True, max_length=255) # 消息id
time: float = Field(sa_column=Column(Float)) # 消息时间,单位为秒
platform: str = Field(index=True, max_length=100) # 顶层平台字段
# 消息发送者信息
user_id: str = Field(index=True, max_length=255) # 发送者用户id
user_nickname: str = Field(index=True, max_length=255) # 发送者昵称
user_cardname: Optional[str] = Field(default=None, max_length=255, nullable=True) # 发送者备注名
user_platform: Optional[str] = Field(default=None, max_length=100, nullable=True) # 发送者平台
# 群聊信息(如果有)
group_id: Optional[str] = Field(index=True, default=None, max_length=255, nullable=True) # 群组id
group_name: Optional[str] = Field(default=None, max_length=255, nullable=True) # 群组名称
group_platform: Optional[str] = Field(default=None, max_length=100, nullable=True) # 群组平台
# 被提及/at字段
is_mentioned: bool = Field(default=False) # 被提及
is_at: bool = Field(default=False) # 被at
# create_time: 1746096761.4490178 (时间戳精确到小数点后7位)
# DoubleField 用于存储浮点数,适合此类时间戳。
create_time = DoubleField()
# 消息内部元数据
session_id: str = Field(index=True, max_length=255) # 聊天会话id
reply_to: Optional[str] = Field(default=None, max_length=255, nullable=True) # 回复的消息id
is_emoji: bool = Field(default=False) # 是否为表情包消息
is_picture: bool = Field(default=False) # 是否为图片消息
is_command: bool = Field(default=False) # 是否为命令
is_notify: bool = Field(default=False) # 是否为通知消息
# group_info 字段:
# platform: "qq"
# group_id: "941657197"
# group_name: "测试"
group_platform = TextField(null=True) # 群聊信息可能不存在
group_id = TextField(null=True)
group_name = TextField(null=True)
# 消息内容
raw_content: str # base64编码的原始消息内容
processed_plain_text: str = Field(index=True) # 平面化处理后的纯文本消息
display_message: str # 显示的消息内容被放入Prompt
# last_active_time: 1746623771.4825106 (时间戳精确到小数点后7位)
last_active_time = DoubleField()
# platform: "qq" (顶层平台字段)
platform = TextField()
# user_info 字段:
# platform: "qq"
# user_id: "1787882683"
# user_nickname: "墨梓柒(IceSakurary)"
# user_cardname: ""
user_platform = TextField()
user_id = TextField()
user_nickname = TextField()
# user_cardname 可能为空字符串或不存在,设置 null=True 更具灵活性。
user_cardname = TextField(null=True)
class Meta:
# 如果 BaseModel.Meta.database 已设置,则此模型将继承该数据库配置。
# 如果不使用带有数据库实例的 BaseModel或者想覆盖它
# 请取消注释并在下面设置数据库实例:
# database = db
table_name = "chat_streams" # 可选:明确指定数据库中的表名
# 其他配置
additional_config: Optional[str] = Field(default=None) # 额外配置JSON格式存储
class LLMUsage(BaseModel):
"""
用于存储 API 使用日志数据的模型
"""
class ModelUsage(SQLModel, table=True):
__tablename__ = "llm_usage" # type: ignore
model_name = TextField(index=True) # 添加索引
model_assign_name = TextField(null=True) # 添加索引
model_api_provider = TextField(null=True) # 添加索引
user_id = TextField(index=True) # 添加索引
request_type = TextField(index=True) # 添加索引
endpoint = TextField()
prompt_tokens = IntegerField()
completion_tokens = IntegerField()
total_tokens = IntegerField()
cost = DoubleField()
time_cost = DoubleField(null=True)
status = TextField()
timestamp = DateTimeField(index=True) # 更改为 DateTimeField 并添加索引
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
class Meta:
# 如果 BaseModel.Meta.database 已设置,则此模型将继承该数据库配置。
# database = db
table_name = "llm_usage"
# 模型相关信息
model_name: str = Field(index=True, max_length=255) # 模型实际名称(供应商名称)
model_assign_name: Optional[str] = Field(index=True, default=None, max_length=255) # 模型分配名称(用户自定义名称)
model_api_provider_name: str = Field(index=True, max_length=255) # 模型API供应商名称
# 请求相关信息
endpoint: Optional[str] = Field(default=None, max_length=255, nullable=True) # 模型API的具体endpoint
user_type: ModelUser = Field(sa_column=Column(SQLEnum(ModelUser)), default=ModelUser.SYSTEM) # 模型使用者类型
request_type: str = Field(max_length=50) # 内部请求类型,记录哪种模块使用了此模型
time_cost: float = Field(sa_column=Column(Float)) # 本次请求耗时,单位秒
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 请求时间戳
# Token使用情况
prompt_tokens: int # 提示词令牌数
completion_tokens: int # 完成词令牌数
total_tokens: int # 总令牌数
cost: float # 本次请求的费用,单位元
class Emoji(BaseModel):
"""表情包"""
class Images(SQLModel, table=True):
"""用于同时存储表情包和图片的数据库模型。"""
full_path = TextField(unique=True, index=True) # 文件的完整路径 (包括文件名)
format = TextField() # 图片格式
emoji_hash = TextField(index=True) # 表情包的哈希值
description = TextField() # 表情包的描述
query_count = IntegerField(default=0) # 查询次数(用于统计表情包被查询描述的次数)
is_registered = BooleanField(default=False) # 是否已注册
is_banned = BooleanField(default=False) # 是否被禁止注册
# emotion: list[str] # 表情包的情感标签 - 存储为文本,应用层处理序列化/反序列化
emotion = TextField(null=True)
record_time = FloatField() # 记录时间(被创建的时间)
register_time = FloatField(null=True) # 注册时间(被注册为可用表情包的时间)
usage_count = IntegerField(default=0) # 使用次数(被使用的次数)
last_used_time = FloatField(null=True) # 上次使用时间
__tablename__ = "images" # type: ignore
class Meta:
# database = db # 继承自 BaseModel
table_name = "emoji"
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# 元信息
image_hash: str = Field(default="", max_length=255) # 图片哈希使用sha256哈希值亦作为图片唯一ID
description: str # 图片的描述
full_path: str = Field(index=True, max_length=1024) # 文件的完整路径 (包括文件名)
image_type: ImageType = Field(sa_column=Column(SQLEnum(ImageType)), default=ImageType.EMOJI)
"""图片类型,例如 'emoji''image'"""
emotion: Optional[str] = Field(default=None, nullable=True) # 表情包的情感标签,逗号分隔
query_count: int = Field(default=0) # 被查询次数
is_registered: bool = Field(default=False) # 是否已经注册
is_banned: bool = Field(default=False) # 被手动禁用
record_time: datetime = Field(default_factory=datetime.now, index=True) # 记录时间(被创建的时间)
register_time: Optional[datetime] = Field(default=None, nullable=True) # 注册时间(被注册为可用表情包的时间)
vlm_processed: bool = Field(default=False) # 是否已经过VLM处理
class Messages(BaseModel):
"""
用于存储消息数据的模型
"""
class ActionRecord(SQLModel, table=True):
"""存储动作记录"""
message_id = TextField(index=True) # 消息 ID (更改自 IntegerField)
time = DoubleField() # 消息时间戳
__tablename__ = "action_records" # type: ignore
chat_id = TextField(index=True) # 对应的 ChatStreams stream_id
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
reply_to = TextField(null=True)
# 元信息
action_id: str = Field(index=True, max_length=255) # 动作ID
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 记录时间戳
session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
interest_value = DoubleField(null=True)
key_words = TextField(null=True)
key_words_lite = TextField(null=True)
# 调用信息
action_name: str = Field(max_length=255) # 动作名称
action_reasoning: Optional[str] = Field(default=None) # 动作推理过程
action_data: Optional[str] = Field(default=None) # 动作数据JSON格式存储
is_mentioned = BooleanField(null=True)
is_at = BooleanField(null=True)
reply_probability_boost = DoubleField(null=True)
# 从 chat_info 扁平化而来的字段
chat_info_stream_id = TextField()
chat_info_platform = TextField()
chat_info_user_platform = TextField()
chat_info_user_id = TextField()
chat_info_user_nickname = TextField()
chat_info_user_cardname = TextField(null=True)
chat_info_group_platform = TextField(null=True) # 群聊信息可能不存在
chat_info_group_id = TextField(null=True)
chat_info_group_name = TextField(null=True)
chat_info_create_time = DoubleField()
chat_info_last_active_time = DoubleField()
# 从顶层 user_info 扁平化而来的字段 (消息发送者信息)
user_platform = TextField(null=True)
user_id = TextField(null=True)
user_nickname = TextField(null=True)
user_cardname = TextField(null=True)
processed_plain_text = TextField(null=True) # 处理后的纯文本消息
display_message = TextField(null=True) # 显示的消息
priority_mode = TextField(null=True)
priority_info = TextField(null=True)
additional_config = TextField(null=True)
is_emoji = BooleanField(default=False)
is_picid = BooleanField(default=False)
is_command = BooleanField(default=False)
intercept_message_level = IntegerField(default=0)
is_notify = BooleanField(default=False)
selected_expressions = TextField(null=True)
class Meta:
# database = db # 继承自 BaseModel
table_name = "messages"
action_builtin_prompt: Optional[str] = Field(default=None) # 内置动作提示
action_display_prompt: Optional[str] = Field(default=None) # 最终输入到Prompt的内容
class ActionRecords(BaseModel):
"""
用于存储动作记录数据的模型
"""
class CommandRecord(SQLModel, table=True):
"""记录命令执行情况"""
action_id = TextField(index=True) # 消息 ID (更改自 IntegerField)
time = DoubleField() # 消息时间戳
__tablename__ = "command_records" # type: ignore
action_reasoning = TextField(null=True)
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
action_name = TextField()
action_data = TextField()
action_done = BooleanField(default=False)
# 元信息
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 记录时间戳
session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
action_build_into_prompt = BooleanField(default=False)
action_prompt_display = TextField()
chat_id = TextField(index=True) # 对应的 ChatStreams stream_id
chat_info_stream_id = TextField()
chat_info_platform = TextField()
class Meta:
# database = db # 继承自 BaseModel
table_name = "action_records"
# 调用信息
command_name: str = Field(index=True, max_length=255) # 命令名称
command_data: Optional[str] = Field(default=None) # 命令数据JSON格式存储
command_result: Optional[str] = Field(default=None) # 命令执行结果
class Images(BaseModel):
"""
用于存储图像信息的模型
"""
image_id = TextField(default="") # 图片唯一ID
emoji_hash = TextField(index=True) # 图像的哈希值
description = TextField(null=True) # 图像的描述
path = TextField(unique=True) # 图像文件的路径
# base64 = TextField() # 图片的base64编码
count = IntegerField(default=1) # 图片被引用的次数
timestamp = FloatField() # 时间戳
type = TextField() # 图像类型,例如 "emoji"
vlm_processed = BooleanField(default=False) # 是否已经过VLM处理
class Meta:
table_name = "images"
class ImageDescriptions(BaseModel):
"""
用于存储图像描述信息的模型
"""
type = TextField() # 类型,例如 "emoji"
image_description_hash = TextField(index=True) # 图像的哈希值
description = TextField() # 图像的描述
timestamp = FloatField() # 时间戳
class Meta:
# database = db # 继承自 BaseModel
table_name = "image_descriptions"
class EmojiDescriptionCache(BaseModel):
"""
存储表情包的详细描述和情感标签缓存
"""
emoji_hash = TextField(unique=True, index=True)
description = TextField() # 详细描述
emotion_tags = TextField(null=True) # 情感标签,逗号分隔
timestamp = FloatField()
class Meta:
table_name = "emoji_description_cache"
class OnlineTime(BaseModel):
class OnlineTime(SQLModel, table=True):
"""
用于存储在线时长记录的模型
"""
# timestamp: "$date": "2025-05-01T18:52:18.191Z" (存储为字符串)
timestamp = TextField(default=datetime.datetime.now) # 时间戳
duration = IntegerField() # 时长,单位分钟
start_timestamp = DateTimeField(default=datetime.datetime.now)
end_timestamp = DateTimeField(index=True)
__tablename__ = "online_time" # type: ignore
class Meta:
# database = db # 继承自 BaseModel
table_name = "online_time"
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 时间戳
duration_minutes: int = Field() # 时长,单位秒
start_timestamp: datetime = Field(default_factory=datetime.now) # 上线时间
end_timestamp: datetime = Field(index=True) # 下线时间
class PersonInfo(BaseModel):
"""
用于存储个人信息数据的模型
"""
is_known = BooleanField(default=False) # 是否已认识
person_id = TextField(unique=True, index=True) # 个人唯一ID
person_name = TextField(null=True) # 个人名称 (允许为空)
name_reason = TextField(null=True) # 名称设定的原因
platform = TextField() # 平台
user_id = TextField(index=True) # 用户ID
nickname = TextField(null=True) # 用户昵称
group_nick_name = TextField(null=True) # 群昵称列表 (JSON格式存储 [{"group_id": str, "group_nick_name": str}])
memory_points = TextField(null=True) # 个人印象的点
know_times = FloatField(null=True) # 认识时间 (时间戳)
know_since = FloatField(null=True) # 首次印象总结时间
last_know = FloatField(null=True) # 最后一次印象总结时间
class Expression(SQLModel, table=True):
"""用于存储表达方式的模型"""
class Meta:
# database = db # 继承自 BaseModel
table_name = "person_info"
__tablename__ = "expressions" # type: ignore
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
class GroupInfo(BaseModel):
"""
用于存储群组信息数据的模型
"""
situation: str = Field(index=True, max_length=255, primary_key=True) # 情景
style: str = Field(index=True, max_length=255, primary_key=True) # 风格
group_id = TextField(unique=True, index=True) # 群组唯一ID
group_name = TextField(null=True) # 群组名称 (允许为空)
platform = TextField() # 平台
group_impression = TextField(null=True) # 群组印象
member_list = TextField(null=True) # 群成员列表 (JSON格式)
topic = TextField(null=True) # 群组基本信息
context: str # 上下文
up_content: str
create_time = FloatField(null=True) # 创建时间 (时间戳)
last_active = FloatField(null=True) # 最后活跃时间
member_count = IntegerField(null=True, default=0) # 成员数量
content_list: str # 内容列表JSON格式存储
count: int = Field(default=0) # 使用次数
last_active_time: datetime = Field(default_factory=datetime.now, index=True) # 上次使用时间
create_time: datetime = Field(default_factory=datetime.now) # 创建时间
session_id: Optional[str] = Field(default=None, max_length=255, nullable=True) # 会话ID区分是否为全局表达方式
class Meta:
# database = db # 继承自 BaseModel
table_name = "group_info"
class Jargon(SQLModel, table=True):
"""存黑话的模型"""
class Expression(BaseModel):
"""
用于存储表达风格的模型
"""
__tablename__ = "jargons" # type: ignore
situation = TextField()
style = TextField()
content_list = TextField(null=True)
count = IntegerField(default=1)
last_active_time = FloatField()
chat_id = TextField(index=True)
create_date = FloatField(null=True) # 创建日期,允许为空以兼容老数据
checked = BooleanField(default=False) # 是否已检查
rejected = BooleanField(default=False) # 是否被拒绝但未更新
modified_by = TextField(null=True) # 最后修改来源:'ai' 或 'user',为空表示未检查
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
class Meta:
table_name = "expression"
content: str = Field(index=True, max_length=255, primary_key=True) # 黑话内容
raw_content: Optional[str] = Field(default=None, nullable=True) # 原始内容,未处理的黑话内容
meaning: str # 黑话含义
session_id: Optional[str] = Field(default=None, max_length=255, nullable=True) # 会话ID区分是否为全局黑话
class Jargon(BaseModel):
"""
用于存储俚语的模型
"""
count: int = Field(default=0) # 使用次数
is_jargon: Optional[bool] = Field(default=True) # 是否为黑话False表示为白话
is_complete: bool = Field(default=False) # 是否为已经完成全部推断count > 100后不再推断
inference_with_context: Optional[str] = Field(default=None, nullable=True) # 带上下文的推断结果JSON格式
inference_with_content_only: Optional[str] = Field(default=None, nullable=True) # 只基于词条的推断结果JSON格式
content = TextField()
raw_content = TextField(null=True)
meaning = TextField(null=True)
chat_id = TextField(index=True)
is_global = BooleanField(default=False)
count = IntegerField(default=0)
is_jargon = BooleanField(null=True) # None表示未判定True表示是黑话False表示不是黑话
last_inference_count = IntegerField(null=True) # 最后一次判定的count值用于避免重启后重复判定
is_complete = BooleanField(default=False) # 是否已完成所有推断count>=100后不再推断
inference_with_context = TextField(null=True) # 基于上下文的推断结果JSON格式
inference_content_only = TextField(null=True) # 仅基于词条的推断结果JSON格式
class Meta:
table_name = "jargon"
class ChatHistory(SQLModel, table=True):
"""存储聊天历史记录的模型"""
__tablename__ = "chat_history" # type: ignore
class ChatHistory(BaseModel):
"""
用于存储聊天历史概括的模型
"""
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
chat_id = TextField(index=True) # 聊天ID
start_time = DoubleField() # 起始时间
end_time = DoubleField() # 结束时间
original_text = TextField() # 对话原文
participants = TextField() # 参与的所有人的昵称JSON格式存储
theme = TextField() # 主题:这段对话的主要内容,一个简短的标题
keywords = TextField() # 关键词这段对话的关键词JSON格式存储
summary = TextField() # 概括:对这段话的平文本概括
# key_point = TextField(null=True) # 关键信息话题中的关键信息点JSON格式存储
count = IntegerField(default=0) # 被检索次数
forget_times = IntegerField(default=0) # 被遗忘检查的次数
# 元信息
session_id: str = Field(index=True, max_length=255) # 聊天会话ID
start_timestamp: datetime = Field(index=True) # 聊天开始时间
end_timestamp: datetime = Field(index=True) # 聊天结束时间
query_count: int = Field(default=0) # 被检索次数
query_forget_count: int = Field(default=0) # 被遗忘检查的次数
class Meta:
table_name = "chat_history"
# 历史消息内容
original_messages: str # 对话原文
participants: str # 参与者列表JSON格式存储
theme: str # 对话主题:这段对话的主要内容,一个简短的标题
keywords: str # 关键词这段对话的关键词JSON格式存储
summary: str # 概括:对这段话的平文本概括
class ThinkingBack(BaseModel):
"""
用于存储记忆检索思考过程的模型
"""
class ThinkingQuestion(SQLModel, table=True):
"""存储思考型问题的模型"""
chat_id = TextField(index=True) # 聊天ID
question = TextField() # 提出的问题
context = TextField(null=True) # 上下文信息
found_answer = BooleanField(default=False) # 是否找到答案
answer = TextField(null=True) # 答案内容
thinking_steps = TextField(null=True) # 思考步骤JSON格式
create_time = DoubleField() # 创建时间
update_time = DoubleField() # 更新时间
__tablename__ = "thinking_questions" # type: ignore
class Meta:
table_name = "thinking_back"
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
MODELS = [
ChatStreams,
LLMUsage,
Emoji,
Messages,
Images,
ImageDescriptions,
EmojiDescriptionCache,
OnlineTime,
PersonInfo,
Expression,
ActionRecords,
Jargon,
ChatHistory,
ThinkingBack,
]
def create_tables():
"""
创建所有在模型中定义的数据库表
"""
with db:
db.create_tables(MODELS)
def initialize_database(sync_constraints=False):
"""
检查所有定义的表是否存在如果不存在则创建它们
检查所有表的所有字段是否存在如果缺失则自动添加
Args:
sync_constraints (bool): 是否同步字段约束默认为 False
如果为 True会检查并修复字段的 NULL 约束不一致问题
"""
try:
with db: # 管理 table_exists 检查的连接
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 未找到,正在创建...")
db.create_tables([model])
logger.info(f"'{table_name}' 创建成功")
continue
# 检查字段
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
existing_columns = {row[1] for row in cursor.fetchall()}
model_fields = set(model._meta.fields.keys())
if missing_fields := model_fields - existing_columns:
logger.warning(f"'{table_name}' 缺失字段: {missing_fields}")
for field_name, field_obj in model._meta.fields.items():
if field_name not in existing_columns:
logger.info(f"'{table_name}' 缺失字段 '{field_name}',正在添加...")
field_type = field_obj.__class__.__name__
sql_type = {
"TextField": "TEXT",
"IntegerField": "INTEGER",
"FloatField": "FLOAT",
"DoubleField": "DOUBLE",
"BooleanField": "INTEGER",
"DateTimeField": "DATETIME",
}.get(field_type, "TEXT")
alter_sql = f"ALTER TABLE {table_name} ADD COLUMN {field_name} {sql_type}"
alter_sql += " NULL" if field_obj.null else " NOT NULL"
if hasattr(field_obj, "default") and field_obj.default is not None:
# 正确处理不同类型的默认值跳过lambda函数
default_value = field_obj.default
if callable(default_value):
# 跳过lambda函数或其他可调用对象这些无法在SQL中表示
pass
elif isinstance(default_value, str):
alter_sql += f" DEFAULT '{default_value}'"
elif isinstance(default_value, bool):
alter_sql += f" DEFAULT {int(default_value)}"
else:
alter_sql += f" DEFAULT {default_value}"
try:
db.execute_sql(alter_sql)
logger.info(f"字段 '{field_name}' 添加成功")
except Exception as e:
logger.error(f"添加字段 '{field_name}' 失败: {e}")
# 检查并删除多余字段(新增逻辑)
extra_fields = existing_columns - model_fields
if extra_fields:
logger.warning(f"'{table_name}' 存在多余字段: {extra_fields}")
for field_name in extra_fields:
try:
logger.warning(f"'{table_name}' 存在多余字段 '{field_name}',正在尝试删除...")
db.execute_sql(f"ALTER TABLE {table_name} DROP COLUMN {field_name}")
logger.info(f"字段 '{field_name}' 删除成功")
except Exception as e:
logger.error(f"删除字段 '{field_name}' 失败: {e}")
# 如果启用了约束同步,执行约束检查和修复
if sync_constraints:
logger.debug("开始同步数据库字段约束...")
sync_field_constraints()
logger.debug("数据库字段约束同步完成")
except Exception as e:
logger.exception(f"检查表或字段是否存在时出错: {e}")
# 如果检查失败(例如数据库不可用),则退出
return
logger.info("数据库初始化完成")
def sync_field_constraints():
"""
同步数据库字段约束确保现有数据库字段的 NULL 约束与模型定义一致
如果发现不一致会自动修复字段约束
"""
try:
with db:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 不存在,跳过约束检查")
continue
logger.debug(f"检查表 '{table_name}' 的字段约束...")
# 获取当前表结构信息
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
current_schema = {
row[1]: {"type": row[2], "notnull": bool(row[3]), "default": row[4]} for row in cursor.fetchall()
}
# 检查每个模型字段的约束
constraints_to_fix = []
for field_name, field_obj in model._meta.fields.items():
if field_name not in current_schema:
continue # 字段不存在,跳过
current_notnull = current_schema[field_name]["notnull"]
model_allows_null = field_obj.null
# 如果模型允许 null 但数据库字段不允许 null需要修复
if model_allows_null and current_notnull:
constraints_to_fix.append(
{
"field_name": field_name,
"field_obj": field_obj,
"action": "allow_null",
"current_constraint": "NOT NULL",
"target_constraint": "NULL",
}
)
logger.warning(f"字段 '{field_name}' 约束不一致: 模型允许NULL但数据库为NOT NULL")
# 如果模型不允许 null 但数据库字段允许 null也需要修复但要小心
elif not model_allows_null and not current_notnull:
constraints_to_fix.append(
{
"field_name": field_name,
"field_obj": field_obj,
"action": "disallow_null",
"current_constraint": "NULL",
"target_constraint": "NOT NULL",
}
)
logger.warning(f"字段 '{field_name}' 约束不一致: 模型不允许NULL但数据库允许NULL")
# 修复约束不一致的字段
if constraints_to_fix:
logger.info(f"'{table_name}' 需要修复 {len(constraints_to_fix)} 个字段约束")
_fix_table_constraints(table_name, model, constraints_to_fix)
else:
logger.debug(f"'{table_name}' 的字段约束已同步")
except Exception as e:
logger.exception(f"同步字段约束时出错: {e}")
def _fix_table_constraints(table_name, model, constraints_to_fix):
"""
修复表的字段约束
对于 SQLite由于不支持直接修改列约束需要重建表
"""
try:
# 备份表名
backup_table = f"{table_name}_backup_{int(datetime.datetime.now().timestamp())}"
logger.info(f"开始修复表 '{table_name}' 的字段约束...")
# 1. 创建备份表
db.execute_sql(f"CREATE TABLE {backup_table} AS SELECT * FROM {table_name}")
logger.info(f"已创建备份表 '{backup_table}'")
# 2. 获取原始行数(在删除表之前)
original_count = db.execute_sql(f"SELECT COUNT(*) FROM {backup_table}").fetchone()[0]
logger.info(f"备份表 '{backup_table}' 包含 {original_count} 行数据")
# 3. 删除原表
db.execute_sql(f"DROP TABLE {table_name}")
logger.info(f"已删除原表 '{table_name}'")
# 4. 重新创建表(使用当前模型定义)
db.create_tables([model])
logger.info(f"已重新创建表 '{table_name}' 使用新的约束")
# 5. 从备份表恢复数据
# 获取字段列表,排除主键字段(让数据库自动生成新的主键)
fields = list(model._meta.fields.keys())
# Peewee 默认使用 'id' 作为主键字段名
# 尝试获取主键字段名,如果获取失败则默认使用 'id'
primary_key_name = "id" # 默认值
try:
if hasattr(model._meta, "primary_key") and model._meta.primary_key:
if hasattr(model._meta.primary_key, "name"):
primary_key_name = model._meta.primary_key.name
elif isinstance(model._meta.primary_key, str):
primary_key_name = model._meta.primary_key
except Exception:
pass # 如果获取失败,使用默认值 'id'
# 如果字段列表包含主键,则排除它
if primary_key_name in fields:
fields_without_pk = [f for f in fields if f != primary_key_name]
logger.info(f"排除主键字段 '{primary_key_name}',让数据库自动生成新的主键")
else:
fields_without_pk = fields
fields_str = ", ".join(fields_without_pk)
# 检查是否有字段需要从 NULL 改为 NOT NULL
null_to_notnull_fields = [
constraint["field_name"] for constraint in constraints_to_fix if constraint["action"] == "disallow_null"
]
if null_to_notnull_fields:
# 需要处理 NULL 值,为这些字段设置默认值
logger.warning(f"字段 {null_to_notnull_fields} 将从允许NULL改为不允许NULL需要处理现有的NULL值")
# 构建更复杂的 SELECT 语句来处理 NULL 值
select_fields = []
for field_name in fields_without_pk:
if field_name in null_to_notnull_fields:
field_obj = model._meta.fields[field_name]
# 根据字段类型设置默认值
if isinstance(field_obj, (TextField,)):
default_value = "''"
elif isinstance(field_obj, (IntegerField, FloatField, DoubleField)):
default_value = "0"
elif isinstance(field_obj, BooleanField):
default_value = "0"
elif isinstance(field_obj, DateTimeField):
default_value = f"'{datetime.datetime.now()}'"
else:
default_value = "''"
select_fields.append(f"COALESCE({field_name}, {default_value}) as {field_name}")
else:
select_fields.append(field_name)
select_str = ", ".join(select_fields)
insert_sql = f"INSERT INTO {table_name} ({fields_str}) SELECT {select_str} FROM {backup_table}"
else:
# 没有需要处理 NULL 的字段,直接复制数据(排除主键)
insert_sql = f"INSERT INTO {table_name} ({fields_str}) SELECT {fields_str} FROM {backup_table}"
db.execute_sql(insert_sql)
logger.info(f"已从备份表恢复数据到 '{table_name}'")
new_count = db.execute_sql(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
if original_count == new_count:
logger.info(f"数据完整性验证通过: {original_count} 行数据")
# 删除备份表
db.execute_sql(f"DROP TABLE {backup_table}")
logger.info(f"已删除备份表 '{backup_table}'")
else:
logger.error(f"数据完整性验证失败: 原始 {original_count} 行,新表 {new_count}")
logger.error(f"备份表 '{backup_table}' 已保留,请手动检查")
# 记录修复的约束
for constraint in constraints_to_fix:
logger.info(
f"已修复字段 '{constraint['field_name']}': "
f"{constraint['current_constraint']} -> {constraint['target_constraint']}"
)
except Exception as e:
logger.exception(f"修复表 '{table_name}' 约束时出错: {e}")
# 尝试恢复
try:
if db.table_exists(backup_table):
logger.info(f"尝试从备份表 '{backup_table}' 恢复...")
db.execute_sql(f"DROP TABLE IF EXISTS {table_name}")
db.execute_sql(f"ALTER TABLE {backup_table} RENAME TO {table_name}")
logger.info(f"已从备份恢复表 '{table_name}'")
except Exception as restore_error:
logger.exception(f"恢复表失败: {restore_error}")
def check_field_constraints():
"""
检查但不修复字段约束返回不一致的字段信息
用于在修复前预览需要修复的内容
"""
inconsistencies = {}
try:
with db:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
continue
# 获取当前表结构信息
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
current_schema = {
row[1]: {"type": row[2], "notnull": bool(row[3]), "default": row[4]} for row in cursor.fetchall()
}
table_inconsistencies = []
# 检查每个模型字段的约束
for field_name, field_obj in model._meta.fields.items():
if field_name not in current_schema:
continue
current_notnull = current_schema[field_name]["notnull"]
model_allows_null = field_obj.null
if model_allows_null and current_notnull:
table_inconsistencies.append(
{
"field_name": field_name,
"issue": "model_allows_null_but_db_not_null",
"model_constraint": "NULL",
"db_constraint": "NOT NULL",
"recommended_action": "allow_null",
}
)
elif not model_allows_null and not current_notnull:
table_inconsistencies.append(
{
"field_name": field_name,
"issue": "model_not_null_but_db_allows_null",
"model_constraint": "NOT NULL",
"db_constraint": "NULL",
"recommended_action": "disallow_null",
}
)
if table_inconsistencies:
inconsistencies[table_name] = table_inconsistencies
except Exception as e:
logger.exception(f"检查字段约束时出错: {e}")
return inconsistencies
def fix_image_id():
"""
修复表情包的 image_id 字段
"""
import uuid
try:
with db:
for img in Images.select():
if not img.image_id:
img.image_id = str(uuid.uuid4())
img.save()
logger.info(f"已为表情包 {img.id} 生成新的 image_id: {img.image_id}")
except Exception as e:
logger.exception(f"修复 image_id 时出错: {e}")
# 模块加载时调用初始化函数
initialize_database(sync_constraints=True)
fix_image_id()
# 问答对
question: str # 问题内容
context: Optional[str] = Field(default=None, nullable=True) # 上下文
found_answer: bool = Field(default=False) # 是否找到答案
answer: Optional[str] = Field(default=None, nullable=True) # 问题答案
thinking_steps: Optional[str] = Field(default=None, nullable=True) # 思考步骤JSON格式存储
created_timestamp: datetime = Field(default_factory=datetime.now, index=True) # 创建时间
updated_timestamp: datetime = Field(default_factory=datetime.now, index=True) # 最后更新时间