新数据库模型

pull/1496/head
UnCLAS-Prommer 2026-01-31 21:59:08 +08:00
parent f44598a331
commit 17aff36bec
No known key found for this signature in database
4 changed files with 613 additions and 715 deletions

View File

@ -33,4 +33,72 @@ version 0.3.0 - 2026-01-11
### ModelConfig ### ModelConfig
- [x] 迁移了原来在`ModelConfig`中的方法到一个单独的临时类`TempMethodsLLMUtils`中 - [x] 迁移了原来在`ModelConfig`中的方法到一个单独的临时类`TempMethodsLLMUtils`中
- [x] get_model_info - [x] get_model_info
- [x] get_provider - [x] get_provider
## 数据库模型设计
仅保留要点说明
### General Modifications
- [x] 所有项目增加自增编号主键`id`
- [x] 统一使用了SQLModel作为基类
- [x] 复杂类型使用JSON格式存储
- [x] 所有时间戳字段统一命名为`timestamp`
### 消息模型 MaiMessage
- [x] 自增编号主键`id`
- [x] 消息元数据
- [x] 消息id`message_id`
- [x] 消息时间戳`time`
- [x] 平台名`platform`
- [x] 用户元数据
- [x] 用户id`user_id`
- [x] 用户昵称`user_nickname`
- [x] 用户备注名`user_cardname`
- [x] 用户平台`user_platform`
- [x] 群组元数据
- [x] 群组id`group_id`
- [x] 群组名称`group_name`
- [x] 群组平台`group_platform`
- [x] 被提及/at字段
- [x] 是否被提及`is_mentioned`
- [x] 是否被at`is_at`
- [x] 消息内容
- [x] 原始消息内容`raw_content`base64编码存储
- [x] 处理后的纯文本内容`processed_plain_text`
- [x] 真正放入Prompt的消息内容`display_message`
- [x] 消息内部元数据
- [x] 聊天会话id`session_id`
- [x] 回复的消息id`reply_to`
- [x] 是否为表情包消息`is_emoji`
- [x] 是否为图片消息`is_picture`
- [x] 是否为命令消息`is_command`
- [x] 是否为通知消息`is_notify`
- [x] 其他配置`additional_config`JSON格式存储
### 模型使用情况 ModelUsage
- [x] 模型相关信息
- [x] 请求相关信息
- [x] Token使用情况
### 图片数据模型
- [x] 图片元信息
- [x] 图片哈希值`image_hash`,使用`sha256`同时作为图片唯一ID
- [x] 表情包的情感标签`emotion`
- [x] 是否已经被注册`is_registered`
- [x] 是否被手动禁用`is_banned`
### 动作记录模型 ActionRecord
### 命令执行记录模型 CommandRecord
新增此记录
### 在线时间记录模型 OnlineTime
### 表达方式模型
### 黑话模型
- [x] 重命名`inference_content_only`为`inference_with_content_only`
### 聊天记录模型
- [x] 重命名`original_text`为`original_message`
- [x] 重命名`forget_times`为`query_forget_count`
### 细枝末节
- [ ] 统一所有的`stream_id`和`chat_id`命名为`session_id`
- [ ] 更换Hash方式为`sha256`
## 一些细枝末节的东西
- [ ] 将`stream_id`和`chat_id`统一命名为`session_id`
- [ ] 映射表
- [ ] `platform_group_user_session_id_map` `平台_群组_用户`-`会话ID` 映射表

View File

@ -0,0 +1,127 @@
from pathlib import Path
import ast
import subprocess
import sys
base_file_path = Path(__file__).parent.parent.absolute().resolve() / "src" / "common" / "database" / "database_model.py"
target_file_path = (
Path(__file__).parent.parent.absolute().resolve() / "src" / "common" / "database" / "database_datamodel.py"
)
with open(base_file_path, "r", encoding="utf-8") as f:
source_text = f.read()
source_lines = source_text.splitlines()
try:
tree = ast.parse(source_text)
except SyntaxError as e:
raise e
code_lines = [
"from typing import Optional",
"from pydantic import BaseModel",
"from datetime import datetime",
"from .database_model import ModelUser, ImageType",
]
def src(node):
seg = ast.get_source_segment(source_text, node)
return seg if seg is not None else ast.unparse(node)
for node in tree.body:
if not isinstance(node, ast.ClassDef):
continue
# 判断是否 SQLModel 且 table=True
has_sqlmodel = any(
(isinstance(b, ast.Name) and b.id == "SQLModel") or (isinstance(b, ast.Attribute) and b.attr == "SQLModel")
for b in node.bases
)
has_table_kw = any(
(kw.arg == "table" and isinstance(kw.value, ast.Constant) and kw.value.value is True) for kw in node.keywords
)
if not (has_sqlmodel and has_table_kw):
continue
class_name = node.name
code_lines.append("")
code_lines.append(f"class {class_name}(BaseModel):")
fields_added = 0
for item in node.body:
# 跳过 __tablename__ 等
if isinstance(item, ast.Assign):
if len(item.targets) != 1 or not isinstance(item.targets[0], ast.Name):
continue
name = item.targets[0].id
if name == "__tablename__":
continue
value_src = src(item.value)
line = f" {name} = {value_src}"
fields_added += 1
lineno = getattr(item, "lineno", None)
elif isinstance(item, ast.AnnAssign):
# 注解赋值
if not isinstance(item.target, ast.Name):
continue
name = item.target.id
ann = src(item.annotation) if item.annotation is not None else None
if item.value is None:
line = f" {name}: {ann}" if ann else f" {name}"
elif isinstance(item.value, ast.Call) and (
(isinstance(item.value.func, ast.Name) and item.value.func.id == "Field")
or (isinstance(item.value.func, ast.Attribute) and item.value.func.attr == "Field")
):
default_kw = next((kw for kw in item.value.keywords if kw.arg == "default"), None)
if default_kw is None:
# 没有 default保留类型但不赋值
line = f" {name}: {ann}" if ann else f" {name}"
else:
default_src = src(default_kw.value)
line = f" {name}: {ann} = {default_src}"
else:
value_src = src(item.value)
line = f" {name}: {ann} = {value_src}" if ann else f" {name} = {value_src}"
fields_added += 1
lineno = getattr(item, "lineno", None)
else:
continue
# 提取同一行的行内注释作为字段说明(如果存在)
comment = None
if lineno is not None:
src_line = source_lines[lineno - 1]
if "#" in src_line:
# 取第一个 #
comment = src_line.split("#", 1)[1].strip()
# 避免三引号冲突
comment = comment.replace('"""', '\\"""')
code_lines.append(line)
if comment:
code_lines.append(f' """{comment}"""')
else:
print(f"Warning: No comment found for field '{name}' in class '{class_name}'.")
if fields_added == 0:
code_lines.append(" pass")
with open(target_file_path, "w", encoding="utf-8") as f:
f.write("\n".join(code_lines) + "\n")
try:
result = subprocess.run(["ruff", "format", str(target_file_path)], capture_output=True, text=True)
except FileNotFoundError:
print("ruff 未找到,请安装 ruff 并确保其在 PATH 中例如pip install ruff", file=sys.stderr)
sys.exit(127)
# 输出 ruff 的 stdout/stderr
if result.stdout:
print(result.stdout, end="")
if result.stderr:
print(result.stderr, file=sys.stderr, end="")
if result.returncode != 0:
print(f"ruff 检查失败,退出码:{result.returncode}", file=sys.stderr)
sys.exit(result.returncode)

View File

@ -0,0 +1,246 @@
from typing import Optional
from pydantic import BaseModel
from datetime import datetime
from .database_model import ModelUser, ImageType
class MaiMessage(BaseModel):
id: Optional[int] = None
"""自增主键"""
message_id: str
"""消息id"""
time: float
"""消息时间,单位为秒"""
platform: str
"""顶层平台字段"""
user_id: str
"""发送者用户id"""
user_nickname: str
"""发送者昵称"""
user_cardname: Optional[str] = None
"""发送者备注名"""
user_platform: Optional[str] = None
"""发送者平台"""
group_id: Optional[str] = None
"""群组id"""
group_name: Optional[str] = None
"""群组名称"""
group_platform: Optional[str] = None
"""群组平台"""
is_mentioned: bool = False
"""被提及"""
is_at: bool = False
"""被at"""
session_id: str
"""聊天会话id"""
reply_to: Optional[str] = None
"""回复的消息id"""
is_emoji: bool = False
"""是否为表情包消息"""
is_picture: bool = False
"""是否为图片消息"""
is_command: bool = False
"""是否为命令"""
is_notify: bool = False
"""是否为通知消息"""
raw_content: str
"""base64编码的原始消息内容"""
processed_plain_text: str
"""平面化处理后的纯文本消息"""
display_message: str
"""显示的消息内容被放入Prompt"""
additional_config: Optional[str] = None
"""额外配置JSON格式存储"""
class ModelUsage(BaseModel):
id: Optional[int] = None
"""自增主键"""
model_name: str
"""模型实际名称(供应商名称)"""
model_assign_name: Optional[str] = None
"""模型分配名称(用户自定义名称)"""
model_api_provider_name: str
"""模型API供应商名称"""
endpoint: Optional[str] = None
"""模型API的具体endpoint"""
user_type: ModelUser = ModelUser.SYSTEM
"""模型使用者类型"""
request_type: str
"""内部请求类型,记录哪种模块使用了此模型"""
time_cost: float
"""本次请求耗时,单位秒"""
timestamp: datetime
"""请求时间戳"""
prompt_tokens: int
"""提示词令牌数"""
completion_tokens: int
"""完成词令牌数"""
total_tokens: int
"""总令牌数"""
cost: float
"""本次请求的费用,单位元"""
class Images(BaseModel):
id: Optional[int] = None
"""自增主键"""
image_hash: str = ""
"""图片哈希使用sha256哈希值亦作为图片唯一ID"""
description: str
"""图片的描述"""
full_path: str
"""文件的完整路径 (包括文件名)"""
image_type: ImageType = ImageType.EMOJI
emotion: Optional[str] = None
"""表情包的情感标签,逗号分隔"""
query_count: int = 0
"""被查询次数"""
is_registered: bool = False
"""是否已经注册"""
is_banned: bool = False
"""被手动禁用"""
record_time: datetime
"""记录时间(被创建的时间)"""
register_time: Optional[datetime] = None
"""注册时间(被注册为可用表情包的时间)"""
vlm_processed: bool = False
"""是否已经过VLM处理"""
class ActionRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
action_id: str
"""动作ID"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
action_name: str
"""动作名称"""
action_reasoning: Optional[str] = None
"""动作推理过程"""
action_data: Optional[str] = None
"""动作数据JSON格式存储"""
action_builtin_prompt: Optional[str] = None
"""内置动作提示"""
action_display_prompt: Optional[str] = None
"""最终输入到Prompt的内容"""
class CommandRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
command_name: str
"""命令名称"""
command_data: Optional[str] = None
"""命令数据JSON格式存储"""
command_result: Optional[str] = None
"""命令执行结果"""
class OnlineTime(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""时间戳"""
duration_minutes: int
"""时长,单位秒"""
start_timestamp: datetime
"""上线时间"""
end_timestamp: datetime
"""下线时间"""
class Expression(BaseModel):
id: Optional[int] = None
"""自增主键"""
situation: str
"""情景"""
style: str
"""风格"""
context: str
"""上下文"""
up_content: str
content_list: str
"""内容列表JSON格式存储"""
count: int = 0
"""使用次数"""
last_active_time: datetime
"""上次使用时间"""
create_time: datetime
"""创建时间"""
session_id: Optional[str] = None
"""会话ID区分是否为全局表达方式"""
class Jargon(BaseModel):
id: Optional[int] = None
"""自增主键"""
content: str
"""黑话内容"""
raw_content: Optional[str] = None
"""原始内容,未处理的黑话内容"""
meaning: str
"""黑话含义"""
session_id: Optional[str] = None
"""会话ID区分是否为全局黑话"""
count: int = 0
"""使用次数"""
is_jargon: Optional[bool] = True
"""是否为黑话False表示为白话"""
is_complete: bool = False
"""是否为已经完成全部推断count > 100后不再推断"""
inference_with_context: Optional[str] = None
"""带上下文的推断结果JSON格式"""
inference_with_content_only: Optional[str] = None
"""只基于词条的推断结果JSON格式"""
class ChatHistory(BaseModel):
id: Optional[int] = None
"""自增主键"""
session_id: str
"""聊天会话ID"""
start_timestamp: datetime
"""聊天开始时间"""
end_timestamp: datetime
"""聊天结束时间"""
query_count: int = 0
"""被检索次数"""
query_forget_count: int = 0
"""被遗忘检查的次数"""
original_messages: str
"""对话原文"""
participants: str
"""参与者列表JSON格式存储"""
theme: str
"""对话主题:这段对话的主要内容,一个简短的标题"""
keywords: str
"""关键词这段对话的关键词JSON格式存储"""
summary: str
"""概括:对这段话的平文本概括"""
class ThinkingQuestion(BaseModel):
id: Optional[int] = None
"""自增主键"""
question: str
"""问题内容"""
context: Optional[str] = None
"""上下文"""
found_answer: bool = False
"""是否找到答案"""
answer: Optional[str] = None
"""问题答案"""
thinking_steps: Optional[str] = None
"""思考步骤JSON格式存储"""
created_timestamp: datetime
"""创建时间"""
updated_timestamp: datetime
"""最后更新时间"""

View File

@ -1,778 +1,235 @@
from peewee import Model, DoubleField, IntegerField, BooleanField, TextField, FloatField, DateTimeField from typing import Optional
from .database import db from sqlalchemy import Column, Float, Enum as SQLEnum
import datetime from sqlmodel import SQLModel, Field
from src.common.logger import get_logger from enum import Enum
from datetime import datetime
logger = get_logger("database_model")
# 请在此处定义您的数据库实例。
# 您需要取消注释并配置适合您的数据库的部分。
# 例如,对于 SQLite:
# db = SqliteDatabase('MaiBot.db')
#
# 对于 PostgreSQL:
# db = PostgresqlDatabase('your_db_name', user='your_user', password='your_password',
# host='localhost', port=5432)
#
# 对于 MySQL:
# db = MySQLDatabase('your_db_name', user='your_user', password='your_password',
# host='localhost', port=3306)
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。 class ModelUser(str, Enum):
# 这允许您在一个地方为所有模型指定数据库。 SYSTEM = "system"
PLUGIN = "plugin"
class BaseModel(Model): class ImageType(str, Enum):
class Meta: EMOJI = "emoji"
# 将下面的 'db' 替换为您实际的数据库实例变量名。 IMAGE = "image"
database = db # 例如: database = my_actual_db_instance
pass # 在用户定义数据库实例之前,此处为占位符
class ChatStreams(BaseModel): class MaiMessage(SQLModel, table=True):
""" __tablename__ = "mai_messages" # type: ignore
用于存储流式记录数据的模型类似于提供的 MongoDB 结构 id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
"""
# stream_id: "a544edeb1a9b73e3e1d77dff36e41264" # 消息元数据
# 假设 stream_id 是唯一的,并为其创建索引以提高查询性能。 message_id: str = Field(index=True, max_length=255) # 消息id
stream_id = TextField(unique=True, index=True) time: float = Field(sa_column=Column(Float)) # 消息时间,单位为秒
platform: str = Field(index=True, max_length=100) # 顶层平台字段
# 消息发送者信息
user_id: str = Field(index=True, max_length=255) # 发送者用户id
user_nickname: str = Field(index=True, max_length=255) # 发送者昵称
user_cardname: Optional[str] = Field(default=None, max_length=255, nullable=True) # 发送者备注名
user_platform: Optional[str] = Field(default=None, max_length=100, nullable=True) # 发送者平台
# 群聊信息(如果有)
group_id: Optional[str] = Field(index=True, default=None, max_length=255, nullable=True) # 群组id
group_name: Optional[str] = Field(default=None, max_length=255, nullable=True) # 群组名称
group_platform: Optional[str] = Field(default=None, max_length=100, nullable=True) # 群组平台
# 被提及/at字段
is_mentioned: bool = Field(default=False) # 被提及
is_at: bool = Field(default=False) # 被at
# create_time: 1746096761.4490178 (时间戳精确到小数点后7位) # 消息内部元数据
# DoubleField 用于存储浮点数,适合此类时间戳。 session_id: str = Field(index=True, max_length=255) # 聊天会话id
create_time = DoubleField() reply_to: Optional[str] = Field(default=None, max_length=255, nullable=True) # 回复的消息id
is_emoji: bool = Field(default=False) # 是否为表情包消息
is_picture: bool = Field(default=False) # 是否为图片消息
is_command: bool = Field(default=False) # 是否为命令
is_notify: bool = Field(default=False) # 是否为通知消息
# group_info 字段: # 消息内容
# platform: "qq" raw_content: str # base64编码的原始消息内容
# group_id: "941657197" processed_plain_text: str = Field(index=True) # 平面化处理后的纯文本消息
# group_name: "测试" display_message: str # 显示的消息内容被放入Prompt
group_platform = TextField(null=True) # 群聊信息可能不存在
group_id = TextField(null=True)
group_name = TextField(null=True)
# last_active_time: 1746623771.4825106 (时间戳精确到小数点后7位) # 其他配置
last_active_time = DoubleField() additional_config: Optional[str] = Field(default=None) # 额外配置JSON格式存储
# platform: "qq" (顶层平台字段)
platform = TextField()
# user_info 字段:
# platform: "qq"
# user_id: "1787882683"
# user_nickname: "墨梓柒(IceSakurary)"
# user_cardname: ""
user_platform = TextField()
user_id = TextField()
user_nickname = TextField()
# user_cardname 可能为空字符串或不存在,设置 null=True 更具灵活性。
user_cardname = TextField(null=True)
class Meta:
# 如果 BaseModel.Meta.database 已设置,则此模型将继承该数据库配置。
# 如果不使用带有数据库实例的 BaseModel或者想覆盖它
# 请取消注释并在下面设置数据库实例:
# database = db
table_name = "chat_streams" # 可选:明确指定数据库中的表名
class LLMUsage(BaseModel): class ModelUsage(SQLModel, table=True):
""" __tablename__ = "llm_usage" # type: ignore
用于存储 API 使用日志数据的模型
"""
model_name = TextField(index=True) # 添加索引 id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
model_assign_name = TextField(null=True) # 添加索引
model_api_provider = TextField(null=True) # 添加索引
user_id = TextField(index=True) # 添加索引
request_type = TextField(index=True) # 添加索引
endpoint = TextField()
prompt_tokens = IntegerField()
completion_tokens = IntegerField()
total_tokens = IntegerField()
cost = DoubleField()
time_cost = DoubleField(null=True)
status = TextField()
timestamp = DateTimeField(index=True) # 更改为 DateTimeField 并添加索引
class Meta: # 模型相关信息
# 如果 BaseModel.Meta.database 已设置,则此模型将继承该数据库配置。 model_name: str = Field(index=True, max_length=255) # 模型实际名称(供应商名称)
# database = db model_assign_name: Optional[str] = Field(index=True, default=None, max_length=255) # 模型分配名称(用户自定义名称)
table_name = "llm_usage" model_api_provider_name: str = Field(index=True, max_length=255) # 模型API供应商名称
# 请求相关信息
endpoint: Optional[str] = Field(default=None, max_length=255, nullable=True) # 模型API的具体endpoint
user_type: ModelUser = Field(sa_column=Column(SQLEnum(ModelUser)), default=ModelUser.SYSTEM) # 模型使用者类型
request_type: str = Field(max_length=50) # 内部请求类型,记录哪种模块使用了此模型
time_cost: float = Field(sa_column=Column(Float)) # 本次请求耗时,单位秒
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 请求时间戳
# Token使用情况
prompt_tokens: int # 提示词令牌数
completion_tokens: int # 完成词令牌数
total_tokens: int # 总令牌数
cost: float # 本次请求的费用,单位元
class Emoji(BaseModel): class Images(SQLModel, table=True):
"""表情包""" """用于同时存储表情包和图片的数据库模型。"""
full_path = TextField(unique=True, index=True) # 文件的完整路径 (包括文件名) __tablename__ = "images" # type: ignore
format = TextField() # 图片格式
emoji_hash = TextField(index=True) # 表情包的哈希值
description = TextField() # 表情包的描述
query_count = IntegerField(default=0) # 查询次数(用于统计表情包被查询描述的次数)
is_registered = BooleanField(default=False) # 是否已注册
is_banned = BooleanField(default=False) # 是否被禁止注册
# emotion: list[str] # 表情包的情感标签 - 存储为文本,应用层处理序列化/反序列化
emotion = TextField(null=True)
record_time = FloatField() # 记录时间(被创建的时间)
register_time = FloatField(null=True) # 注册时间(被注册为可用表情包的时间)
usage_count = IntegerField(default=0) # 使用次数(被使用的次数)
last_used_time = FloatField(null=True) # 上次使用时间
class Meta: id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# database = db # 继承自 BaseModel
table_name = "emoji" # 元信息
image_hash: str = Field(default="", max_length=255) # 图片哈希使用sha256哈希值亦作为图片唯一ID
description: str # 图片的描述
full_path: str = Field(index=True, max_length=1024) # 文件的完整路径 (包括文件名)
image_type: ImageType = Field(sa_column=Column(SQLEnum(ImageType)), default=ImageType.EMOJI)
"""图片类型,例如 'emoji''image'"""
emotion: Optional[str] = Field(default=None, nullable=True) # 表情包的情感标签,逗号分隔
query_count: int = Field(default=0) # 被查询次数
is_registered: bool = Field(default=False) # 是否已经注册
is_banned: bool = Field(default=False) # 被手动禁用
record_time: datetime = Field(default_factory=datetime.now, index=True) # 记录时间(被创建的时间)
register_time: Optional[datetime] = Field(default=None, nullable=True) # 注册时间(被注册为可用表情包的时间)
vlm_processed: bool = Field(default=False) # 是否已经过VLM处理
class Messages(BaseModel): class ActionRecord(SQLModel, table=True):
""" """存储动作记录"""
用于存储消息数据的模型
"""
message_id = TextField(index=True) # 消息 ID (更改自 IntegerField) __tablename__ = "action_records" # type: ignore
time = DoubleField() # 消息时间戳
chat_id = TextField(index=True) # 对应的 ChatStreams stream_id id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
reply_to = TextField(null=True) # 元信息
action_id: str = Field(index=True, max_length=255) # 动作ID
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 记录时间戳
session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
interest_value = DoubleField(null=True) # 调用信息
key_words = TextField(null=True) action_name: str = Field(max_length=255) # 动作名称
key_words_lite = TextField(null=True) action_reasoning: Optional[str] = Field(default=None) # 动作推理过程
action_data: Optional[str] = Field(default=None) # 动作数据JSON格式存储
is_mentioned = BooleanField(null=True) action_builtin_prompt: Optional[str] = Field(default=None) # 内置动作提示
is_at = BooleanField(null=True) action_display_prompt: Optional[str] = Field(default=None) # 最终输入到Prompt的内容
reply_probability_boost = DoubleField(null=True)
# 从 chat_info 扁平化而来的字段
chat_info_stream_id = TextField()
chat_info_platform = TextField()
chat_info_user_platform = TextField()
chat_info_user_id = TextField()
chat_info_user_nickname = TextField()
chat_info_user_cardname = TextField(null=True)
chat_info_group_platform = TextField(null=True) # 群聊信息可能不存在
chat_info_group_id = TextField(null=True)
chat_info_group_name = TextField(null=True)
chat_info_create_time = DoubleField()
chat_info_last_active_time = DoubleField()
# 从顶层 user_info 扁平化而来的字段 (消息发送者信息)
user_platform = TextField(null=True)
user_id = TextField(null=True)
user_nickname = TextField(null=True)
user_cardname = TextField(null=True)
processed_plain_text = TextField(null=True) # 处理后的纯文本消息
display_message = TextField(null=True) # 显示的消息
priority_mode = TextField(null=True)
priority_info = TextField(null=True)
additional_config = TextField(null=True)
is_emoji = BooleanField(default=False)
is_picid = BooleanField(default=False)
is_command = BooleanField(default=False)
intercept_message_level = IntegerField(default=0)
is_notify = BooleanField(default=False)
selected_expressions = TextField(null=True)
class Meta:
# database = db # 继承自 BaseModel
table_name = "messages"
class ActionRecords(BaseModel): class CommandRecord(SQLModel, table=True):
""" """记录命令执行情况"""
用于存储动作记录数据的模型
"""
action_id = TextField(index=True) # 消息 ID (更改自 IntegerField) __tablename__ = "command_records" # type: ignore
time = DoubleField() # 消息时间戳
action_reasoning = TextField(null=True) id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
action_name = TextField() # 元信息
action_data = TextField() timestamp: datetime = Field(default_factory=datetime.now, index=True) # 记录时间戳
action_done = BooleanField(default=False) session_id: str = Field(index=True, max_length=255) # 对应的 ChatSession session_id
action_build_into_prompt = BooleanField(default=False) # 调用信息
action_prompt_display = TextField() command_name: str = Field(index=True, max_length=255) # 命令名称
command_data: Optional[str] = Field(default=None) # 命令数据JSON格式存储
chat_id = TextField(index=True) # 对应的 ChatStreams stream_id command_result: Optional[str] = Field(default=None) # 命令执行结果
chat_info_stream_id = TextField()
chat_info_platform = TextField()
class Meta:
# database = db # 继承自 BaseModel
table_name = "action_records"
class Images(BaseModel): class OnlineTime(SQLModel, table=True):
"""
用于存储图像信息的模型
"""
image_id = TextField(default="") # 图片唯一ID
emoji_hash = TextField(index=True) # 图像的哈希值
description = TextField(null=True) # 图像的描述
path = TextField(unique=True) # 图像文件的路径
# base64 = TextField() # 图片的base64编码
count = IntegerField(default=1) # 图片被引用的次数
timestamp = FloatField() # 时间戳
type = TextField() # 图像类型,例如 "emoji"
vlm_processed = BooleanField(default=False) # 是否已经过VLM处理
class Meta:
table_name = "images"
class ImageDescriptions(BaseModel):
"""
用于存储图像描述信息的模型
"""
type = TextField() # 类型,例如 "emoji"
image_description_hash = TextField(index=True) # 图像的哈希值
description = TextField() # 图像的描述
timestamp = FloatField() # 时间戳
class Meta:
# database = db # 继承自 BaseModel
table_name = "image_descriptions"
class EmojiDescriptionCache(BaseModel):
"""
存储表情包的详细描述和情感标签缓存
"""
emoji_hash = TextField(unique=True, index=True)
description = TextField() # 详细描述
emotion_tags = TextField(null=True) # 情感标签,逗号分隔
timestamp = FloatField()
class Meta:
table_name = "emoji_description_cache"
class OnlineTime(BaseModel):
""" """
用于存储在线时长记录的模型 用于存储在线时长记录的模型
""" """
# timestamp: "$date": "2025-05-01T18:52:18.191Z" (存储为字符串) __tablename__ = "online_time" # type: ignore
timestamp = TextField(default=datetime.datetime.now) # 时间戳
duration = IntegerField() # 时长,单位分钟
start_timestamp = DateTimeField(default=datetime.datetime.now)
end_timestamp = DateTimeField(index=True)
class Meta: id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
# database = db # 继承自 BaseModel
table_name = "online_time"
timestamp: datetime = Field(default_factory=datetime.now, index=True) # 时间戳
duration_minutes: int = Field() # 时长,单位秒
start_timestamp: datetime = Field(default_factory=datetime.now) # 上线时间
end_timestamp: datetime = Field(index=True) # 下线时间
class PersonInfo(BaseModel):
"""
用于存储个人信息数据的模型
"""
is_known = BooleanField(default=False) # 是否已认识 class Expression(SQLModel, table=True):
person_id = TextField(unique=True, index=True) # 个人唯一ID """用于存储表达方式的模型"""
person_name = TextField(null=True) # 个人名称 (允许为空)
name_reason = TextField(null=True) # 名称设定的原因
platform = TextField() # 平台
user_id = TextField(index=True) # 用户ID
nickname = TextField(null=True) # 用户昵称
group_nick_name = TextField(null=True) # 群昵称列表 (JSON格式存储 [{"group_id": str, "group_nick_name": str}])
memory_points = TextField(null=True) # 个人印象的点
know_times = FloatField(null=True) # 认识时间 (时间戳)
know_since = FloatField(null=True) # 首次印象总结时间
last_know = FloatField(null=True) # 最后一次印象总结时间
class Meta: __tablename__ = "expressions" # type: ignore
# database = db # 继承自 BaseModel
table_name = "person_info"
id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
class GroupInfo(BaseModel): situation: str = Field(index=True, max_length=255, primary_key=True) # 情景
""" style: str = Field(index=True, max_length=255, primary_key=True) # 风格
用于存储群组信息数据的模型
"""
group_id = TextField(unique=True, index=True) # 群组唯一ID context: str # 上下文
group_name = TextField(null=True) # 群组名称 (允许为空) up_content: str
platform = TextField() # 平台
group_impression = TextField(null=True) # 群组印象
member_list = TextField(null=True) # 群成员列表 (JSON格式)
topic = TextField(null=True) # 群组基本信息
create_time = FloatField(null=True) # 创建时间 (时间戳) content_list: str # 内容列表JSON格式存储
last_active = FloatField(null=True) # 最后活跃时间 count: int = Field(default=0) # 使用次数
member_count = IntegerField(null=True, default=0) # 成员数量 last_active_time: datetime = Field(default_factory=datetime.now, index=True) # 上次使用时间
create_time: datetime = Field(default_factory=datetime.now) # 创建时间
session_id: Optional[str] = Field(default=None, max_length=255, nullable=True) # 会话ID区分是否为全局表达方式
class Meta:
# database = db # 继承自 BaseModel
table_name = "group_info"
class Jargon(SQLModel, table=True):
"""存黑话的模型"""
class Expression(BaseModel): __tablename__ = "jargons" # type: ignore
"""
用于存储表达风格的模型
"""
situation = TextField() id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
style = TextField()
content_list = TextField(null=True)
count = IntegerField(default=1)
last_active_time = FloatField()
chat_id = TextField(index=True)
create_date = FloatField(null=True) # 创建日期,允许为空以兼容老数据
checked = BooleanField(default=False) # 是否已检查
rejected = BooleanField(default=False) # 是否被拒绝但未更新
modified_by = TextField(null=True) # 最后修改来源:'ai' 或 'user',为空表示未检查
class Meta: content: str = Field(index=True, max_length=255, primary_key=True) # 黑话内容
table_name = "expression" raw_content: Optional[str] = Field(default=None, nullable=True) # 原始内容,未处理的黑话内容
meaning: str # 黑话含义
session_id: Optional[str] = Field(default=None, max_length=255, nullable=True) # 会话ID区分是否为全局黑话
class Jargon(BaseModel): count: int = Field(default=0) # 使用次数
""" is_jargon: Optional[bool] = Field(default=True) # 是否为黑话False表示为白话
用于存储俚语的模型 is_complete: bool = Field(default=False) # 是否为已经完成全部推断count > 100后不再推断
""" inference_with_context: Optional[str] = Field(default=None, nullable=True) # 带上下文的推断结果JSON格式
inference_with_content_only: Optional[str] = Field(default=None, nullable=True) # 只基于词条的推断结果JSON格式
content = TextField()
raw_content = TextField(null=True)
meaning = TextField(null=True)
chat_id = TextField(index=True)
is_global = BooleanField(default=False)
count = IntegerField(default=0)
is_jargon = BooleanField(null=True) # None表示未判定True表示是黑话False表示不是黑话
last_inference_count = IntegerField(null=True) # 最后一次判定的count值用于避免重启后重复判定
is_complete = BooleanField(default=False) # 是否已完成所有推断count>=100后不再推断
inference_with_context = TextField(null=True) # 基于上下文的推断结果JSON格式
inference_content_only = TextField(null=True) # 仅基于词条的推断结果JSON格式
class Meta: class ChatHistory(SQLModel, table=True):
table_name = "jargon" """存储聊天历史记录的模型"""
__tablename__ = "chat_history" # type: ignore
class ChatHistory(BaseModel): id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
"""
用于存储聊天历史概括的模型
"""
chat_id = TextField(index=True) # 聊天ID # 元信息
start_time = DoubleField() # 起始时间 session_id: str = Field(index=True, max_length=255) # 聊天会话ID
end_time = DoubleField() # 结束时间 start_timestamp: datetime = Field(index=True) # 聊天开始时间
original_text = TextField() # 对话原文 end_timestamp: datetime = Field(index=True) # 聊天结束时间
participants = TextField() # 参与的所有人的昵称JSON格式存储 query_count: int = Field(default=0) # 被检索次数
theme = TextField() # 主题:这段对话的主要内容,一个简短的标题 query_forget_count: int = Field(default=0) # 被遗忘检查的次数
keywords = TextField() # 关键词这段对话的关键词JSON格式存储
summary = TextField() # 概括:对这段话的平文本概括
# key_point = TextField(null=True) # 关键信息话题中的关键信息点JSON格式存储
count = IntegerField(default=0) # 被检索次数
forget_times = IntegerField(default=0) # 被遗忘检查的次数
class Meta: # 历史消息内容
table_name = "chat_history" original_messages: str # 对话原文
participants: str # 参与者列表JSON格式存储
theme: str # 对话主题:这段对话的主要内容,一个简短的标题
keywords: str # 关键词这段对话的关键词JSON格式存储
summary: str # 概括:对这段话的平文本概括
class ThinkingBack(BaseModel): class ThinkingQuestion(SQLModel, table=True):
""" """存储思考型问题的模型"""
用于存储记忆检索思考过程的模型
"""
chat_id = TextField(index=True) # 聊天ID __tablename__ = "thinking_questions" # type: ignore
question = TextField() # 提出的问题
context = TextField(null=True) # 上下文信息
found_answer = BooleanField(default=False) # 是否找到答案
answer = TextField(null=True) # 答案内容
thinking_steps = TextField(null=True) # 思考步骤JSON格式
create_time = DoubleField() # 创建时间
update_time = DoubleField() # 更新时间
class Meta: id: Optional[int] = Field(default=None, primary_key=True) # 自增主键
table_name = "thinking_back"
# 问答对
MODELS = [ question: str # 问题内容
ChatStreams, context: Optional[str] = Field(default=None, nullable=True) # 上下文
LLMUsage, found_answer: bool = Field(default=False) # 是否找到答案
Emoji, answer: Optional[str] = Field(default=None, nullable=True) # 问题答案
Messages,
Images, thinking_steps: Optional[str] = Field(default=None, nullable=True) # 思考步骤JSON格式存储
ImageDescriptions, created_timestamp: datetime = Field(default_factory=datetime.now, index=True) # 创建时间
EmojiDescriptionCache, updated_timestamp: datetime = Field(default_factory=datetime.now, index=True) # 最后更新时间
OnlineTime,
PersonInfo,
Expression,
ActionRecords,
Jargon,
ChatHistory,
ThinkingBack,
]
def create_tables():
"""
创建所有在模型中定义的数据库表
"""
with db:
db.create_tables(MODELS)
def initialize_database(sync_constraints=False):
"""
检查所有定义的表是否存在如果不存在则创建它们
检查所有表的所有字段是否存在如果缺失则自动添加
Args:
sync_constraints (bool): 是否同步字段约束默认为 False
如果为 True会检查并修复字段的 NULL 约束不一致问题
"""
try:
with db: # 管理 table_exists 检查的连接
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 未找到,正在创建...")
db.create_tables([model])
logger.info(f"'{table_name}' 创建成功")
continue
# 检查字段
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
existing_columns = {row[1] for row in cursor.fetchall()}
model_fields = set(model._meta.fields.keys())
if missing_fields := model_fields - existing_columns:
logger.warning(f"'{table_name}' 缺失字段: {missing_fields}")
for field_name, field_obj in model._meta.fields.items():
if field_name not in existing_columns:
logger.info(f"'{table_name}' 缺失字段 '{field_name}',正在添加...")
field_type = field_obj.__class__.__name__
sql_type = {
"TextField": "TEXT",
"IntegerField": "INTEGER",
"FloatField": "FLOAT",
"DoubleField": "DOUBLE",
"BooleanField": "INTEGER",
"DateTimeField": "DATETIME",
}.get(field_type, "TEXT")
alter_sql = f"ALTER TABLE {table_name} ADD COLUMN {field_name} {sql_type}"
alter_sql += " NULL" if field_obj.null else " NOT NULL"
if hasattr(field_obj, "default") and field_obj.default is not None:
# 正确处理不同类型的默认值跳过lambda函数
default_value = field_obj.default
if callable(default_value):
# 跳过lambda函数或其他可调用对象这些无法在SQL中表示
pass
elif isinstance(default_value, str):
alter_sql += f" DEFAULT '{default_value}'"
elif isinstance(default_value, bool):
alter_sql += f" DEFAULT {int(default_value)}"
else:
alter_sql += f" DEFAULT {default_value}"
try:
db.execute_sql(alter_sql)
logger.info(f"字段 '{field_name}' 添加成功")
except Exception as e:
logger.error(f"添加字段 '{field_name}' 失败: {e}")
# 检查并删除多余字段(新增逻辑)
extra_fields = existing_columns - model_fields
if extra_fields:
logger.warning(f"'{table_name}' 存在多余字段: {extra_fields}")
for field_name in extra_fields:
try:
logger.warning(f"'{table_name}' 存在多余字段 '{field_name}',正在尝试删除...")
db.execute_sql(f"ALTER TABLE {table_name} DROP COLUMN {field_name}")
logger.info(f"字段 '{field_name}' 删除成功")
except Exception as e:
logger.error(f"删除字段 '{field_name}' 失败: {e}")
# 如果启用了约束同步,执行约束检查和修复
if sync_constraints:
logger.debug("开始同步数据库字段约束...")
sync_field_constraints()
logger.debug("数据库字段约束同步完成")
except Exception as e:
logger.exception(f"检查表或字段是否存在时出错: {e}")
# 如果检查失败(例如数据库不可用),则退出
return
logger.info("数据库初始化完成")
def sync_field_constraints():
"""
同步数据库字段约束确保现有数据库字段的 NULL 约束与模型定义一致
如果发现不一致会自动修复字段约束
"""
try:
with db:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
logger.warning(f"'{table_name}' 不存在,跳过约束检查")
continue
logger.debug(f"检查表 '{table_name}' 的字段约束...")
# 获取当前表结构信息
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
current_schema = {
row[1]: {"type": row[2], "notnull": bool(row[3]), "default": row[4]} for row in cursor.fetchall()
}
# 检查每个模型字段的约束
constraints_to_fix = []
for field_name, field_obj in model._meta.fields.items():
if field_name not in current_schema:
continue # 字段不存在,跳过
current_notnull = current_schema[field_name]["notnull"]
model_allows_null = field_obj.null
# 如果模型允许 null 但数据库字段不允许 null需要修复
if model_allows_null and current_notnull:
constraints_to_fix.append(
{
"field_name": field_name,
"field_obj": field_obj,
"action": "allow_null",
"current_constraint": "NOT NULL",
"target_constraint": "NULL",
}
)
logger.warning(f"字段 '{field_name}' 约束不一致: 模型允许NULL但数据库为NOT NULL")
# 如果模型不允许 null 但数据库字段允许 null也需要修复但要小心
elif not model_allows_null and not current_notnull:
constraints_to_fix.append(
{
"field_name": field_name,
"field_obj": field_obj,
"action": "disallow_null",
"current_constraint": "NULL",
"target_constraint": "NOT NULL",
}
)
logger.warning(f"字段 '{field_name}' 约束不一致: 模型不允许NULL但数据库允许NULL")
# 修复约束不一致的字段
if constraints_to_fix:
logger.info(f"'{table_name}' 需要修复 {len(constraints_to_fix)} 个字段约束")
_fix_table_constraints(table_name, model, constraints_to_fix)
else:
logger.debug(f"'{table_name}' 的字段约束已同步")
except Exception as e:
logger.exception(f"同步字段约束时出错: {e}")
def _fix_table_constraints(table_name, model, constraints_to_fix):
"""
修复表的字段约束
对于 SQLite由于不支持直接修改列约束需要重建表
"""
try:
# 备份表名
backup_table = f"{table_name}_backup_{int(datetime.datetime.now().timestamp())}"
logger.info(f"开始修复表 '{table_name}' 的字段约束...")
# 1. 创建备份表
db.execute_sql(f"CREATE TABLE {backup_table} AS SELECT * FROM {table_name}")
logger.info(f"已创建备份表 '{backup_table}'")
# 2. 获取原始行数(在删除表之前)
original_count = db.execute_sql(f"SELECT COUNT(*) FROM {backup_table}").fetchone()[0]
logger.info(f"备份表 '{backup_table}' 包含 {original_count} 行数据")
# 3. 删除原表
db.execute_sql(f"DROP TABLE {table_name}")
logger.info(f"已删除原表 '{table_name}'")
# 4. 重新创建表(使用当前模型定义)
db.create_tables([model])
logger.info(f"已重新创建表 '{table_name}' 使用新的约束")
# 5. 从备份表恢复数据
# 获取字段列表,排除主键字段(让数据库自动生成新的主键)
fields = list(model._meta.fields.keys())
# Peewee 默认使用 'id' 作为主键字段名
# 尝试获取主键字段名,如果获取失败则默认使用 'id'
primary_key_name = "id" # 默认值
try:
if hasattr(model._meta, "primary_key") and model._meta.primary_key:
if hasattr(model._meta.primary_key, "name"):
primary_key_name = model._meta.primary_key.name
elif isinstance(model._meta.primary_key, str):
primary_key_name = model._meta.primary_key
except Exception:
pass # 如果获取失败,使用默认值 'id'
# 如果字段列表包含主键,则排除它
if primary_key_name in fields:
fields_without_pk = [f for f in fields if f != primary_key_name]
logger.info(f"排除主键字段 '{primary_key_name}',让数据库自动生成新的主键")
else:
fields_without_pk = fields
fields_str = ", ".join(fields_without_pk)
# 检查是否有字段需要从 NULL 改为 NOT NULL
null_to_notnull_fields = [
constraint["field_name"] for constraint in constraints_to_fix if constraint["action"] == "disallow_null"
]
if null_to_notnull_fields:
# 需要处理 NULL 值,为这些字段设置默认值
logger.warning(f"字段 {null_to_notnull_fields} 将从允许NULL改为不允许NULL需要处理现有的NULL值")
# 构建更复杂的 SELECT 语句来处理 NULL 值
select_fields = []
for field_name in fields_without_pk:
if field_name in null_to_notnull_fields:
field_obj = model._meta.fields[field_name]
# 根据字段类型设置默认值
if isinstance(field_obj, (TextField,)):
default_value = "''"
elif isinstance(field_obj, (IntegerField, FloatField, DoubleField)):
default_value = "0"
elif isinstance(field_obj, BooleanField):
default_value = "0"
elif isinstance(field_obj, DateTimeField):
default_value = f"'{datetime.datetime.now()}'"
else:
default_value = "''"
select_fields.append(f"COALESCE({field_name}, {default_value}) as {field_name}")
else:
select_fields.append(field_name)
select_str = ", ".join(select_fields)
insert_sql = f"INSERT INTO {table_name} ({fields_str}) SELECT {select_str} FROM {backup_table}"
else:
# 没有需要处理 NULL 的字段,直接复制数据(排除主键)
insert_sql = f"INSERT INTO {table_name} ({fields_str}) SELECT {fields_str} FROM {backup_table}"
db.execute_sql(insert_sql)
logger.info(f"已从备份表恢复数据到 '{table_name}'")
new_count = db.execute_sql(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
if original_count == new_count:
logger.info(f"数据完整性验证通过: {original_count} 行数据")
# 删除备份表
db.execute_sql(f"DROP TABLE {backup_table}")
logger.info(f"已删除备份表 '{backup_table}'")
else:
logger.error(f"数据完整性验证失败: 原始 {original_count} 行,新表 {new_count}")
logger.error(f"备份表 '{backup_table}' 已保留,请手动检查")
# 记录修复的约束
for constraint in constraints_to_fix:
logger.info(
f"已修复字段 '{constraint['field_name']}': "
f"{constraint['current_constraint']} -> {constraint['target_constraint']}"
)
except Exception as e:
logger.exception(f"修复表 '{table_name}' 约束时出错: {e}")
# 尝试恢复
try:
if db.table_exists(backup_table):
logger.info(f"尝试从备份表 '{backup_table}' 恢复...")
db.execute_sql(f"DROP TABLE IF EXISTS {table_name}")
db.execute_sql(f"ALTER TABLE {backup_table} RENAME TO {table_name}")
logger.info(f"已从备份恢复表 '{table_name}'")
except Exception as restore_error:
logger.exception(f"恢复表失败: {restore_error}")
def check_field_constraints():
"""
检查但不修复字段约束返回不一致的字段信息
用于在修复前预览需要修复的内容
"""
inconsistencies = {}
try:
with db:
for model in MODELS:
table_name = model._meta.table_name
if not db.table_exists(model):
continue
# 获取当前表结构信息
cursor = db.execute_sql(f"PRAGMA table_info('{table_name}')")
current_schema = {
row[1]: {"type": row[2], "notnull": bool(row[3]), "default": row[4]} for row in cursor.fetchall()
}
table_inconsistencies = []
# 检查每个模型字段的约束
for field_name, field_obj in model._meta.fields.items():
if field_name not in current_schema:
continue
current_notnull = current_schema[field_name]["notnull"]
model_allows_null = field_obj.null
if model_allows_null and current_notnull:
table_inconsistencies.append(
{
"field_name": field_name,
"issue": "model_allows_null_but_db_not_null",
"model_constraint": "NULL",
"db_constraint": "NOT NULL",
"recommended_action": "allow_null",
}
)
elif not model_allows_null and not current_notnull:
table_inconsistencies.append(
{
"field_name": field_name,
"issue": "model_not_null_but_db_allows_null",
"model_constraint": "NOT NULL",
"db_constraint": "NULL",
"recommended_action": "disallow_null",
}
)
if table_inconsistencies:
inconsistencies[table_name] = table_inconsistencies
except Exception as e:
logger.exception(f"检查字段约束时出错: {e}")
return inconsistencies
def fix_image_id():
"""
修复表情包的 image_id 字段
"""
import uuid
try:
with db:
for img in Images.select():
if not img.image_id:
img.image_id = str(uuid.uuid4())
img.save()
logger.info(f"已为表情包 {img.id} 生成新的 image_id: {img.image_id}")
except Exception as e:
logger.exception(f"修复 image_id 时出错: {e}")
# 模块加载时调用初始化函数
initialize_database(sync_constraints=True)
fix_image_id()