mirror of https://github.com/Mai-with-u/MaiBot.git
新表情包系统;对应的测试;TODO更新;Prompt拆分;工具拆分
parent
d27d73f329
commit
4ff070c08e
|
|
@ -142,6 +142,18 @@ version 0.3.0 - 2026-01-11
|
|||
- [x] Prompt删除
|
||||
- [x] **只保存被标记为需要保存的Prompt,其他的Prompt文件全部删除**
|
||||
|
||||
## LLM相关内容
|
||||
- [ ] 统一LLM调用接口
|
||||
- [ ] 统一LLM调用返回格式为专有数据模型
|
||||
- [ ] 取消所有__init__方法中对LLM Client的初始化,转而使用获取方式
|
||||
- [ ] 统一使用`get_llm_client`方法获取LLM Client实例
|
||||
- [ ] __init__方法中只保存配置信息
|
||||
- [ ] LLM Client管理器
|
||||
- [ ] LLM Client单例/多例管理
|
||||
- [ ] LLM Client缓存管理/生命周期管理
|
||||
- [ ] LLM Client根据配置热重载
|
||||
|
||||
|
||||
## 一些细枝末节的东西
|
||||
- [ ] 将`stream_id`和`chat_id`统一命名为`session_id`
|
||||
- [ ] 映射表
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
这是一个聊天场景中的表情包描述:"{description}"
|
||||
|
||||
请你识别这个表情包的含义和适用场景,给我简短的描述,每个描述不要超过15个字
|
||||
你可以关注其幽默和讽刺意味,动用贴吧,微博,小红书的知识,必须从互联网梗、meme的角度去分析
|
||||
请直接输出描述,不要出现任何其他内容,如果有多个描述,可以用逗号分隔
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
这是一个表情包,请对这个表情包进行审核,标准如下:
|
||||
1. 必须符合"{demand}"的要求
|
||||
2. 不能是色情、暴力、等违法违规内容,必须符合公序良俗
|
||||
3. 不能是任何形式的截图,聊天记录或视频截图
|
||||
4. 不要出现5个以上文字
|
||||
请回答这个表情包是否满足上述要求,是则回答是,否则回答否,不要出现任何其他内容
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
{nickname}的表情包存储已满({emoji_num}/{emoji_num_max}),需要决定是否删除一个旧表情包来为新表情包腾出空间。
|
||||
|
||||
新表情包信息:
|
||||
描述: {description}
|
||||
|
||||
现有表情包列表:
|
||||
{emoji_list}
|
||||
|
||||
请决定:
|
||||
1. 是否要删除某个现有表情包来为新表情包腾出空间?
|
||||
2. 如果要删除,应该删除哪一个(给出编号)?
|
||||
请只回答:'不删除'或'删除编号X'(X为表情包编号)。
|
||||
|
|
@ -4,8 +4,8 @@ version = "0.11.6"
|
|||
description = "MaiCore 是一个基于大语言模型的可交互智能体"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"aiohttp>=3.12.14",
|
||||
"aiohttp-cors>=0.8.1",
|
||||
"aiohttp>=3.12.14",
|
||||
"colorama>=0.4.6",
|
||||
"faiss-cpu>=1.11.0",
|
||||
"fastapi>=0.116.0",
|
||||
|
|
@ -14,7 +14,6 @@ dependencies = [
|
|||
"json-repair>=0.47.6",
|
||||
"maim-message>=0.6.2",
|
||||
"matplotlib>=3.10.3",
|
||||
"msgpack>=1.1.2",
|
||||
"numpy>=2.2.6",
|
||||
"openai>=1.95.0",
|
||||
"pandas>=2.3.1",
|
||||
|
|
@ -25,6 +24,7 @@ dependencies = [
|
|||
"pypinyin>=0.54.0",
|
||||
"python-dotenv>=1.1.1",
|
||||
"python-multipart>=0.0.20",
|
||||
"python-levenshtein",
|
||||
"quick-algo>=0.1.3",
|
||||
"rich>=14.0.0",
|
||||
"ruff>=0.12.2",
|
||||
|
|
@ -34,7 +34,6 @@ dependencies = [
|
|||
"tomlkit>=0.13.3",
|
||||
"urllib3>=2.5.0",
|
||||
"uvicorn>=0.35.0",
|
||||
"zstandard>=0.25.0",
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -1,12 +1,12 @@
|
|||
aiohttp>=3.12.14
|
||||
aiohttp-cors>=0.8.1
|
||||
aiohttp>=3.12.14
|
||||
colorama>=0.4.6
|
||||
faiss-cpu>=1.11.0
|
||||
fastapi>=0.116.0
|
||||
google-genai>=1.39.1
|
||||
jieba>=0.42.1
|
||||
json-repair>=0.47.6
|
||||
maim-message
|
||||
maim-message>=0.6.2
|
||||
matplotlib>=3.10.3
|
||||
numpy>=2.2.6
|
||||
openai>=1.95.0
|
||||
|
|
@ -17,6 +17,7 @@ pyarrow>=20.0.0
|
|||
pydantic>=2.11.7
|
||||
pypinyin>=0.54.0
|
||||
python-dotenv>=1.1.1
|
||||
python-levenshtein
|
||||
python-multipart>=0.0.20
|
||||
quick-algo>=0.1.3
|
||||
rich>=14.0.0
|
||||
|
|
@ -26,6 +27,4 @@ structlog>=25.4.0
|
|||
toml>=0.10.2
|
||||
tomlkit>=0.13.3
|
||||
urllib3>=2.5.0
|
||||
uvicorn>=0.35.0
|
||||
msgpack
|
||||
zstandard
|
||||
uvicorn>=0.35.0
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,165 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from PIL import Image as PILImage
|
||||
from rich.traceback import install
|
||||
from typing import Optional, List
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import io
|
||||
import traceback
|
||||
|
||||
from src.common.database.database_model import Images, ImageType
|
||||
from src.common.logger import get_logger
|
||||
|
||||
|
||||
install(extra_lines=3)
|
||||
|
||||
logger = get_logger("emoji")
|
||||
|
||||
|
||||
class BaseImageDataModel(ABC):
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def from_db_instance(cls, image: "Images"):
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def to_db_instance(self) -> "Images":
|
||||
raise NotImplementedError
|
||||
|
||||
def read_image_bytes(self, path: Path) -> bytes:
|
||||
"""
|
||||
同步读取图片文件的字节内容
|
||||
|
||||
Args:
|
||||
path (Path): 图片文件的完整路径
|
||||
Returns:
|
||||
return (bytes): 图片文件的字节内容
|
||||
Raises:
|
||||
FileNotFoundError: 如果文件不存在则抛出该异常
|
||||
Exception: 其他读取文件时发生的异常
|
||||
"""
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
return f.read()
|
||||
except FileNotFoundError as e:
|
||||
logger.error(f"[读取图片文件] 文件未找到: {path}")
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.error(f"[读取图片文件] 读取文件时发生错误: {e}")
|
||||
raise e
|
||||
|
||||
def get_image_format(self, image_bytes: bytes) -> str:
|
||||
"""
|
||||
获取图片的格式
|
||||
|
||||
Args:
|
||||
image_bytes (bytes): 图片的字节内容
|
||||
|
||||
Returns:
|
||||
return (str): 图片的格式(小写)
|
||||
|
||||
Raises:
|
||||
ValueError: 如果无法识别图片格式
|
||||
Exception: 其他读取图片格式时发生的异常
|
||||
"""
|
||||
try:
|
||||
with PILImage.open(io.BytesIO(image_bytes)) as img:
|
||||
if not img.format:
|
||||
raise ValueError("无法识别图片格式")
|
||||
return img.format.lower()
|
||||
except Exception as e:
|
||||
logger.error(f"[获取图片格式] 读取图片格式时发生错误: {e}")
|
||||
raise e
|
||||
|
||||
|
||||
class ImageDataModel(BaseImageDataModel):
|
||||
pass
|
||||
|
||||
|
||||
class MaiEmoji(BaseImageDataModel):
|
||||
def __init__(self, full_path: str | Path):
|
||||
if not full_path:
|
||||
# 创建时候即检测文件路径合法性
|
||||
raise ValueError("表情包路径不能为空")
|
||||
if Path(full_path).is_dir() or not Path(full_path).exists():
|
||||
raise FileNotFoundError(f"表情包路径无效: {full_path}")
|
||||
resolved_path = Path(full_path).absolute().resolve()
|
||||
self.full_path: Path = resolved_path
|
||||
self.dir_path: Path = resolved_path.parent.resolve()
|
||||
self.file_name: str = resolved_path.name
|
||||
# self.embedding = []
|
||||
self.emoji_hash: str = None # type: ignore
|
||||
self.description = ""
|
||||
self.emotion: List[str] = []
|
||||
self.query_count = 0
|
||||
self.register_time: Optional[datetime] = None
|
||||
self.last_used_time: Optional[datetime] = None
|
||||
|
||||
# 私有属性
|
||||
self.is_deleted = False
|
||||
self._format: str = "" # 图片格式
|
||||
|
||||
@classmethod
|
||||
def from_db_instance(cls, image: Images):
|
||||
obj = cls(image.full_path)
|
||||
obj.emoji_hash = image.image_hash
|
||||
obj.description = image.description
|
||||
if image.emotion:
|
||||
obj.emotion = image.emotion.split(",")
|
||||
obj.query_count = image.query_count
|
||||
obj.last_used_time = image.last_used_time
|
||||
obj.register_time = image.register_time
|
||||
return obj
|
||||
|
||||
def to_db_instance(self) -> Images:
|
||||
emotion_str = ",".join(self.emotion) if self.emotion else None
|
||||
return Images(
|
||||
image_hash=self.emoji_hash,
|
||||
description=self.description,
|
||||
full_path=str(self.full_path),
|
||||
image_type=ImageType.EMOJI,
|
||||
emotion=emotion_str,
|
||||
query_count=self.query_count,
|
||||
last_used_time=self.last_used_time,
|
||||
register_time=self.register_time,
|
||||
)
|
||||
|
||||
async def calculate_hash_format(self) -> bool:
|
||||
"""
|
||||
异步计算表情包的哈希值和格式
|
||||
|
||||
Returns:
|
||||
return (bool): 如果成功计算哈希值和格式则返回True,否则返回False
|
||||
"""
|
||||
logger.debug(f"[初始化] 正在读取文件: {self.full_path}")
|
||||
try:
|
||||
# 计算哈希值
|
||||
logger.debug(f"[初始化] 计算 {self.file_name} 的哈希值...")
|
||||
image_bytes = await asyncio.to_thread(self.read_image_bytes, self.full_path)
|
||||
self.emoji_hash = hashlib.sha256(image_bytes).hexdigest()
|
||||
logger.debug(f"[初始化] {self.file_name} 计算哈希值成功: {self.emoji_hash}")
|
||||
|
||||
# 用PIL读取图片格式
|
||||
logger.debug(f"[初始化] 读取 {self.file_name} 的图片格式...")
|
||||
self._format = await asyncio.to_thread(self.get_image_format, image_bytes)
|
||||
logger.debug(f"[初始化] {self.file_name} 读取图片格式成功: {self._format}")
|
||||
|
||||
# 比对文件扩展名和实际格式
|
||||
file_ext = self.file_name.split(".")[-1].lower()
|
||||
if file_ext != self._format:
|
||||
logger.warning(f"[初始化] {self.file_name} 文件扩展名与实际格式不符: ext`{file_ext}`!=`{self._format}`")
|
||||
# 重命名文件以匹配实际格式
|
||||
new_file_name = ".".join(self.file_name.split(".")[:-1] + [self._format])
|
||||
new_full_path = self.dir_path / new_file_name
|
||||
self.full_path.rename(new_full_path)
|
||||
self.full_path = new_full_path
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[初始化] 初始化表情包时发生错误: {e}")
|
||||
logger.error(traceback.format_exc())
|
||||
self.is_deleted = True
|
||||
return False
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
from PIL import Image as PILImage, ImageSequence
|
||||
|
||||
import base64
|
||||
import io
|
||||
import numpy as np
|
||||
|
||||
from src.common.logger import get_logger
|
||||
|
||||
logger = get_logger("image")
|
||||
|
||||
|
||||
class ImageUtils:
|
||||
@staticmethod
|
||||
def gif_2_static_image(gif_bytes: bytes, similarity_threshold: float = 1000.0, max_frames: int = 15) -> bytes:
|
||||
"""
|
||||
将GIF图片水平拼接为静态图像,跳过相似帧
|
||||
|
||||
Args:
|
||||
gif_bytes (bytes): 输入的GIF图片字节数据
|
||||
similarity_threshold (float): 判定帧相似的阈值 (MSE),越小表示要求差异越大才算不同帧,默认1000.0
|
||||
max_frames (int): 最大抽取的帧数,默认15
|
||||
Returns:
|
||||
bytes: 拼接后的静态图像字节数据,格式为JPEG
|
||||
Raises:
|
||||
ValueError: 如果输入的GIF无效或无法处理
|
||||
MemoryError: 如果处理过程中内存不足
|
||||
Exception: 其他异常
|
||||
"""
|
||||
with PILImage.open(io.BytesIO(gif_bytes)) as gif_image:
|
||||
if not gif_image.format or gif_image.format.lower() != "gif":
|
||||
logger.error("输入的图片不是有效的GIF格式")
|
||||
raise ValueError("输入的图片不是有效的GIF格式")
|
||||
# --- 流式迭代并选择帧(避免一次性加载所有帧) ---
|
||||
selected_frames: list[PILImage.Image] = []
|
||||
last_selected_frame_np = None
|
||||
frame_index = 0
|
||||
|
||||
for frame in ImageSequence.Iterator(gif_image):
|
||||
# 确保是RGB格式方便比较
|
||||
frame_rgb = frame.convert("RGB")
|
||||
frame_np = np.array(frame_rgb)
|
||||
|
||||
if frame_index == 0:
|
||||
selected_frames.append(frame_rgb.copy())
|
||||
last_selected_frame_np = frame_np
|
||||
else:
|
||||
# 计算和上一张选中帧的差异(均方误差 MSE)
|
||||
mse = np.mean((frame_np - last_selected_frame_np) ** 2)
|
||||
# logger.debug(f"帧 {frame_index} 与上一选中帧的 MSE: {mse}")
|
||||
if mse > similarity_threshold:
|
||||
selected_frames.append(frame_rgb.copy())
|
||||
last_selected_frame_np = frame_np
|
||||
if len(selected_frames) >= max_frames:
|
||||
break
|
||||
frame_index += 1
|
||||
|
||||
if not selected_frames:
|
||||
logger.error("未能抽取到任何有效帧")
|
||||
raise ValueError("未能抽取到任何有效帧")
|
||||
|
||||
# 获取选中的第一帧的尺寸(假设所有帧尺寸一致)
|
||||
frame_width, frame_height = selected_frames[0].size
|
||||
# 防止除以零
|
||||
if frame_height == 0:
|
||||
raise ValueError("帧高度为0,无法计算缩放尺寸")
|
||||
|
||||
# 计算目标尺寸,保持宽高比
|
||||
target_height = 200 # 固定高度
|
||||
target_width = int((target_height / frame_height) * frame_width)
|
||||
# 宽度也不能是0
|
||||
if target_width == 0:
|
||||
logger.warning(f"计算出的目标宽度为0 (原始尺寸 {frame_width}x{frame_height}),调整为1")
|
||||
target_width = 1
|
||||
# 调整所有选中帧的大小
|
||||
resized_frames = [
|
||||
frame.resize((target_width, target_height), PILImage.Resampling.LANCZOS) for frame in selected_frames
|
||||
]
|
||||
|
||||
# 创建拼接图像
|
||||
total_width = target_width * len(resized_frames)
|
||||
combined_image = PILImage.new("RGB", (total_width, target_height))
|
||||
# 水平拼接图像
|
||||
for idx, frame in enumerate(resized_frames):
|
||||
combined_image.paste(frame, (idx * target_width, 0))
|
||||
buffer = io.BytesIO()
|
||||
combined_image.save(buffer, format="JPEG", quality=85) # 保存为JPEG
|
||||
return buffer.getvalue()
|
||||
|
||||
@staticmethod
|
||||
def image_bytes_to_base64(image_bytes: bytes) -> str:
|
||||
"""
|
||||
将图片字节数据转换为Base64编码字符串
|
||||
|
||||
Args:
|
||||
image_bytes (bytes): 输入的图片字节数据
|
||||
Returns:
|
||||
str: Base64编码的图片字符串
|
||||
Raises:
|
||||
ValueError: 如果输入的图片字节数据无效
|
||||
"""
|
||||
if not image_bytes:
|
||||
logger.error("输入的图片字节数据无效")
|
||||
raise ValueError("输入的图片字节数据无效")
|
||||
return base64.b64encode(image_bytes).decode("utf-8")
|
||||
Loading…
Reference in New Issue