数据库微调

pull/1496/head
UnCLAS-Prommer 2026-02-02 19:00:54 +08:00
parent 54a760b559
commit 1e3dfb9ff1
No known key found for this signature in database
3 changed files with 125 additions and 264 deletions

View File

@ -1,27 +1,133 @@
import os
from peewee import SqliteDatabase
from rich.traceback import install
from pathlib import Path
from contextlib import contextmanager
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, sessionmaker
from typing import TYPE_CHECKING, Generator
if TYPE_CHECKING:
from sqlite3 import Connection as SQLite3Connection
install(extra_lines=3)
# 定义数据库文件路径
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
_DB_DIR = os.path.join(ROOT_PATH, "data")
_DB_FILE = os.path.join(_DB_DIR, "MaiBot.db")
ROOT_PATH = Path(__file__).parent.parent.parent.parent.absolute().resolve()
_DB_DIR = ROOT_PATH / "data"
_DB_FILE = _DB_DIR / "MaiBot.db"
# 确保数据库目录存在
os.makedirs(_DB_DIR, exist_ok=True)
_DB_DIR.mkdir(parents=True, exist_ok=True)
DATABASE_URL = f"sqlite:///{_DB_FILE}"
# 全局 Peewee SQLite 数据库访问点
db = SqliteDatabase(
_DB_FILE,
pragmas={
"journal_mode": "wal", # WAL模式提高并发性能
"cache_size": -64 * 1000, # 64MB缓存
"foreign_keys": 1,
"ignore_check_constraints": 0,
"synchronous": 0, # 异步写入提高性能
"busy_timeout": 1000, # 1秒超时而不是3秒
},
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection: "SQLite3Connection", connection_record):
"""
为每个新的数据库连接设置 SQLite PRAGMA
这些设置优化了并发性能和数据安全性:
- journal_mode=WAL: 启用预写式日志,提高并发性能
- cache_size: 设置缓存大小为 64MB
- foreign_keys: 启用外键约束
- synchronous=NORMAL: 平衡性能和数据安全
- busy_timeout: 设置1秒超时,避免锁定冲突
"""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA cache_size=-64000") # 负值表示KB,64000KB = 64MB
cursor.execute("PRAGMA foreign_keys=ON")
cursor.execute("PRAGMA synchronous=NORMAL") # NORMAL 模式在WAL下是安全的
cursor.execute("PRAGMA busy_timeout=1000") # 1秒超时
cursor.close()
# 连接数据库
engine = create_engine(
DATABASE_URL,
echo=False,
connect_args={"check_same_thread": False},
pool_pre_ping=True,
)
# 创建会话工厂
SessionLocal = sessionmaker(
autocommit=False,
autoflush=False,
bind=engine,
)
@contextmanager
def get_db_session(auto_commit: bool = True) -> Generator[Session, None, None]:
"""
获取数据库会话的上下文管理器 (推荐使用,自动提交)
Examples:
----
.. code-block:: python
# 方式1: 自动提交 (推荐 - 默认行为)
with get_db_session() as session:
user = User(name="张三", age=25)
session.add(user)
# 退出时自动 commit,无需手动调用
# 方式2: 手动控制事务 (高级用法)
with get_db_session(auto_commit=False) as session:
user1 = User(name="张三", age=25)
user2 = User(name="李四", age=30)
session.add_all([user1, user2])
session.commit() # 手动提交
Args:
auto_commit (bool): 是否在退出上下文时自动提交默认: True
Yields:
Session: SQLAlchemy 数据库会话
注意:
- 会话会在退出上下文时自动关闭
- 如果发生异常会自动回滚事务
- auto_commit=True ,成功执行完会自动提交
- auto_commit=False ,需要手动调用 session.commit()
"""
session = SessionLocal()
try:
yield session
# 如果启用自动提交且没有异常,则提交事务
if auto_commit:
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
def get_db_session_manual():
"""获取数据库会话的上下文管理器 (手动提交模式)。"""
return get_db_session(auto_commit=False)
def get_db() -> Generator[Session, None, None]:
"""
获取数据库会话的生成器函数
适用于依赖注入场景( FastAPI)
使用示例 (FastAPI):
----
.. code-block:: python
@app.get("/users/{user_id}")
def read_user(user_id: int, db: Session = Depends(get_db)):
return db.get(User, user_id)
Yields:
Session: SQLAlchemy 数据库会话
"""
session = SessionLocal()
try:
yield session
finally:
session.close()

View File

@ -1,246 +0,0 @@
from typing import Optional
from pydantic import BaseModel
from datetime import datetime
from .database_model import ModelUser, ImageType
class MaiMessage(BaseModel):
id: Optional[int] = None
"""自增主键"""
message_id: str
"""消息id"""
time: float
"""消息时间,单位为秒"""
platform: str
"""顶层平台字段"""
user_id: str
"""发送者用户id"""
user_nickname: str
"""发送者昵称"""
user_cardname: Optional[str] = None
"""发送者备注名"""
user_platform: Optional[str] = None
"""发送者平台"""
group_id: Optional[str] = None
"""群组id"""
group_name: Optional[str] = None
"""群组名称"""
group_platform: Optional[str] = None
"""群组平台"""
is_mentioned: bool = False
"""被提及"""
is_at: bool = False
"""被at"""
session_id: str
"""聊天会话id"""
reply_to: Optional[str] = None
"""回复的消息id"""
is_emoji: bool = False
"""是否为表情包消息"""
is_picture: bool = False
"""是否为图片消息"""
is_command: bool = False
"""是否为命令"""
is_notify: bool = False
"""是否为通知消息"""
raw_content: str
"""base64编码的原始消息内容"""
processed_plain_text: str
"""平面化处理后的纯文本消息"""
display_message: str
"""显示的消息内容被放入Prompt"""
additional_config: Optional[str] = None
"""额外配置JSON格式存储"""
class ModelUsage(BaseModel):
id: Optional[int] = None
"""自增主键"""
model_name: str
"""模型实际名称(供应商名称)"""
model_assign_name: Optional[str] = None
"""模型分配名称(用户自定义名称)"""
model_api_provider_name: str
"""模型API供应商名称"""
endpoint: Optional[str] = None
"""模型API的具体endpoint"""
user_type: ModelUser = ModelUser.SYSTEM
"""模型使用者类型"""
request_type: str
"""内部请求类型,记录哪种模块使用了此模型"""
time_cost: float
"""本次请求耗时,单位秒"""
timestamp: datetime
"""请求时间戳"""
prompt_tokens: int
"""提示词令牌数"""
completion_tokens: int
"""完成词令牌数"""
total_tokens: int
"""总令牌数"""
cost: float
"""本次请求的费用,单位元"""
class Images(BaseModel):
id: Optional[int] = None
"""自增主键"""
image_hash: str = ""
"""图片哈希使用sha256哈希值亦作为图片唯一ID"""
description: str
"""图片的描述"""
full_path: str
"""文件的完整路径 (包括文件名)"""
image_type: ImageType = ImageType.EMOJI
emotion: Optional[str] = None
"""表情包的情感标签,逗号分隔"""
query_count: int = 0
"""被查询次数"""
is_registered: bool = False
"""是否已经注册"""
is_banned: bool = False
"""被手动禁用"""
record_time: datetime
"""记录时间(被创建的时间)"""
register_time: Optional[datetime] = None
"""注册时间(被注册为可用表情包的时间)"""
vlm_processed: bool = False
"""是否已经过VLM处理"""
class ActionRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
action_id: str
"""动作ID"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
action_name: str
"""动作名称"""
action_reasoning: Optional[str] = None
"""动作推理过程"""
action_data: Optional[str] = None
"""动作数据JSON格式存储"""
action_builtin_prompt: Optional[str] = None
"""内置动作提示"""
action_display_prompt: Optional[str] = None
"""最终输入到Prompt的内容"""
class CommandRecord(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""记录时间戳"""
session_id: str
"""对应的 ChatSession session_id"""
command_name: str
"""命令名称"""
command_data: Optional[str] = None
"""命令数据JSON格式存储"""
command_result: Optional[str] = None
"""命令执行结果"""
class OnlineTime(BaseModel):
id: Optional[int] = None
"""自增主键"""
timestamp: datetime
"""时间戳"""
duration_minutes: int
"""时长,单位秒"""
start_timestamp: datetime
"""上线时间"""
end_timestamp: datetime
"""下线时间"""
class Expression(BaseModel):
id: Optional[int] = None
"""自增主键"""
situation: str
"""情景"""
style: str
"""风格"""
context: str
"""上下文"""
up_content: str
content_list: str
"""内容列表JSON格式存储"""
count: int = 0
"""使用次数"""
last_active_time: datetime
"""上次使用时间"""
create_time: datetime
"""创建时间"""
session_id: Optional[str] = None
"""会话ID区分是否为全局表达方式"""
class Jargon(BaseModel):
id: Optional[int] = None
"""自增主键"""
content: str
"""黑话内容"""
raw_content: Optional[str] = None
"""原始内容,未处理的黑话内容"""
meaning: str
"""黑话含义"""
session_id: Optional[str] = None
"""会话ID区分是否为全局黑话"""
count: int = 0
"""使用次数"""
is_jargon: Optional[bool] = True
"""是否为黑话False表示为白话"""
is_complete: bool = False
"""是否为已经完成全部推断count > 100后不再推断"""
inference_with_context: Optional[str] = None
"""带上下文的推断结果JSON格式"""
inference_with_content_only: Optional[str] = None
"""只基于词条的推断结果JSON格式"""
class ChatHistory(BaseModel):
id: Optional[int] = None
"""自增主键"""
session_id: str
"""聊天会话ID"""
start_timestamp: datetime
"""聊天开始时间"""
end_timestamp: datetime
"""聊天结束时间"""
query_count: int = 0
"""被检索次数"""
query_forget_count: int = 0
"""被遗忘检查的次数"""
original_messages: str
"""对话原文"""
participants: str
"""参与者列表JSON格式存储"""
theme: str
"""对话主题:这段对话的主要内容,一个简短的标题"""
keywords: str
"""关键词这段对话的关键词JSON格式存储"""
summary: str
"""概括:对这段话的平文本概括"""
class ThinkingQuestion(BaseModel):
id: Optional[int] = None
"""自增主键"""
question: str
"""问题内容"""
context: Optional[str] = None
"""上下文"""
found_answer: bool = False
"""是否找到答案"""
answer: Optional[str] = None
"""问题答案"""
thinking_steps: Optional[str] = None
"""思考步骤JSON格式存储"""
created_timestamp: datetime
"""创建时间"""
updated_timestamp: datetime
"""最后更新时间"""

View File

@ -77,7 +77,7 @@ class ModelUsage(SQLModel, table=True):
cost: float # 本次请求的费用,单位元
class Images(SQLModel, table=True):
class Image(SQLModel, table=True):
"""用于同时存储表情包和图片的数据库模型。"""
__tablename__ = "images" # type: ignore
@ -98,6 +98,7 @@ class Images(SQLModel, table=True):
record_time: datetime = Field(default_factory=datetime.now, index=True) # 记录时间(被创建的时间)
register_time: Optional[datetime] = Field(default=None, nullable=True) # 注册时间(被注册为可用表情包的时间)
last_used_time: Optional[datetime] = Field(default=None, nullable=True) # 上次使用时间
vlm_processed: bool = Field(default=False) # 是否已经过VLM处理