新表情包系统;对应的测试;TODO更新;Prompt拆分;工具拆分

pull/1496/head
UnCLAS-Prommer 2026-02-04 22:23:41 +08:00
parent d27d73f329
commit 4ff070c08e
No known key found for this signature in database
10 changed files with 2748 additions and 1092 deletions

View File

@ -142,6 +142,18 @@ version 0.3.0 - 2026-01-11
- [x] Prompt删除
- [x] **只保存被标记为需要保存的Prompt其他的Prompt文件全部删除**
## LLM相关内容
- [ ] 统一LLM调用接口
- [ ] 统一LLM调用返回格式为专有数据模型
- [ ] 取消所有__init__方法中对LLM Client的初始化转而使用获取方式
- [ ] 统一使用`get_llm_client`方法获取LLM Client实例
- [ ] __init__方法中只保存配置信息
- [ ] LLM Client管理器
- [ ] LLM Client单例/多例管理
- [ ] LLM Client缓存管理/生命周期管理
- [ ] LLM Client根据配置热重载
## 一些细枝末节的东西
- [ ] 将`stream_id`和`chat_id`统一命名为`session_id`
- [ ] 映射表

View File

@ -0,0 +1,5 @@
这是一个聊天场景中的表情包描述:"{description}"
请你识别这个表情包的含义和适用场景给我简短的描述每个描述不要超过15个字
你可以关注其幽默和讽刺意味动用贴吧微博小红书的知识必须从互联网梗、meme的角度去分析
请直接输出描述,不要出现任何其他内容,如果有多个描述,可以用逗号分隔

View File

@ -0,0 +1,6 @@
这是一个表情包,请对这个表情包进行审核,标准如下:
1. 必须符合"{demand}"的要求
2. 不能是色情、暴力、等违法违规内容,必须符合公序良俗
3. 不能是任何形式的截图,聊天记录或视频截图
4. 不要出现5个以上文字
请回答这个表情包是否满足上述要求,是则回答是,否则回答否,不要出现任何其他内容

View File

@ -0,0 +1,12 @@
{nickname}的表情包存储已满({emoji_num}/{emoji_num_max}),需要决定是否删除一个旧表情包来为新表情包腾出空间。
新表情包信息:
描述: {description}
现有表情包列表:
{emoji_list}
请决定:
1. 是否要删除某个现有表情包来为新表情包腾出空间?
2. 如果要删除,应该删除哪一个(给出编号)
请只回答:'不删除'或'删除编号X'(X为表情包编号)。

View File

@ -4,8 +4,8 @@ version = "0.11.6"
description = "MaiCore 是一个基于大语言模型的可交互智能体"
requires-python = ">=3.10"
dependencies = [
"aiohttp>=3.12.14",
"aiohttp-cors>=0.8.1",
"aiohttp>=3.12.14",
"colorama>=0.4.6",
"faiss-cpu>=1.11.0",
"fastapi>=0.116.0",
@ -14,7 +14,6 @@ dependencies = [
"json-repair>=0.47.6",
"maim-message>=0.6.2",
"matplotlib>=3.10.3",
"msgpack>=1.1.2",
"numpy>=2.2.6",
"openai>=1.95.0",
"pandas>=2.3.1",
@ -25,6 +24,7 @@ dependencies = [
"pypinyin>=0.54.0",
"python-dotenv>=1.1.1",
"python-multipart>=0.0.20",
"python-levenshtein",
"quick-algo>=0.1.3",
"rich>=14.0.0",
"ruff>=0.12.2",
@ -34,7 +34,6 @@ dependencies = [
"tomlkit>=0.13.3",
"urllib3>=2.5.0",
"uvicorn>=0.35.0",
"zstandard>=0.25.0",
]

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,12 @@
aiohttp>=3.12.14
aiohttp-cors>=0.8.1
aiohttp>=3.12.14
colorama>=0.4.6
faiss-cpu>=1.11.0
fastapi>=0.116.0
google-genai>=1.39.1
jieba>=0.42.1
json-repair>=0.47.6
maim-message
maim-message>=0.6.2
matplotlib>=3.10.3
numpy>=2.2.6
openai>=1.95.0
@ -17,6 +17,7 @@ pyarrow>=20.0.0
pydantic>=2.11.7
pypinyin>=0.54.0
python-dotenv>=1.1.1
python-levenshtein
python-multipart>=0.0.20
quick-algo>=0.1.3
rich>=14.0.0
@ -26,6 +27,4 @@ structlog>=25.4.0
toml>=0.10.2
tomlkit>=0.13.3
urllib3>=2.5.0
uvicorn>=0.35.0
msgpack
zstandard
uvicorn>=0.35.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,165 @@
from abc import ABC, abstractmethod
from datetime import datetime
from pathlib import Path
from PIL import Image as PILImage
from rich.traceback import install
from typing import Optional, List
import asyncio
import hashlib
import io
import traceback
from src.common.database.database_model import Images, ImageType
from src.common.logger import get_logger
install(extra_lines=3)
logger = get_logger("emoji")
class BaseImageDataModel(ABC):
@classmethod
@abstractmethod
def from_db_instance(cls, image: "Images"):
raise NotImplementedError
@abstractmethod
def to_db_instance(self) -> "Images":
raise NotImplementedError
def read_image_bytes(self, path: Path) -> bytes:
"""
同步读取图片文件的字节内容
Args:
path (Path): 图片文件的完整路径
Returns:
return (bytes): 图片文件的字节内容
Raises:
FileNotFoundError: 如果文件不存在则抛出该异常
Exception: 其他读取文件时发生的异常
"""
try:
with open(path, "rb") as f:
return f.read()
except FileNotFoundError as e:
logger.error(f"[读取图片文件] 文件未找到: {path}")
raise e
except Exception as e:
logger.error(f"[读取图片文件] 读取文件时发生错误: {e}")
raise e
def get_image_format(self, image_bytes: bytes) -> str:
"""
获取图片的格式
Args:
image_bytes (bytes): 图片的字节内容
Returns:
return (str): 图片的格式小写
Raises:
ValueError: 如果无法识别图片格式
Exception: 其他读取图片格式时发生的异常
"""
try:
with PILImage.open(io.BytesIO(image_bytes)) as img:
if not img.format:
raise ValueError("无法识别图片格式")
return img.format.lower()
except Exception as e:
logger.error(f"[获取图片格式] 读取图片格式时发生错误: {e}")
raise e
class ImageDataModel(BaseImageDataModel):
pass
class MaiEmoji(BaseImageDataModel):
def __init__(self, full_path: str | Path):
if not full_path:
# 创建时候即检测文件路径合法性
raise ValueError("表情包路径不能为空")
if Path(full_path).is_dir() or not Path(full_path).exists():
raise FileNotFoundError(f"表情包路径无效: {full_path}")
resolved_path = Path(full_path).absolute().resolve()
self.full_path: Path = resolved_path
self.dir_path: Path = resolved_path.parent.resolve()
self.file_name: str = resolved_path.name
# self.embedding = []
self.emoji_hash: str = None # type: ignore
self.description = ""
self.emotion: List[str] = []
self.query_count = 0
self.register_time: Optional[datetime] = None
self.last_used_time: Optional[datetime] = None
# 私有属性
self.is_deleted = False
self._format: str = "" # 图片格式
@classmethod
def from_db_instance(cls, image: Images):
obj = cls(image.full_path)
obj.emoji_hash = image.image_hash
obj.description = image.description
if image.emotion:
obj.emotion = image.emotion.split(",")
obj.query_count = image.query_count
obj.last_used_time = image.last_used_time
obj.register_time = image.register_time
return obj
def to_db_instance(self) -> Images:
emotion_str = ",".join(self.emotion) if self.emotion else None
return Images(
image_hash=self.emoji_hash,
description=self.description,
full_path=str(self.full_path),
image_type=ImageType.EMOJI,
emotion=emotion_str,
query_count=self.query_count,
last_used_time=self.last_used_time,
register_time=self.register_time,
)
async def calculate_hash_format(self) -> bool:
"""
异步计算表情包的哈希值和格式
Returns:
return (bool): 如果成功计算哈希值和格式则返回True否则返回False
"""
logger.debug(f"[初始化] 正在读取文件: {self.full_path}")
try:
# 计算哈希值
logger.debug(f"[初始化] 计算 {self.file_name} 的哈希值...")
image_bytes = await asyncio.to_thread(self.read_image_bytes, self.full_path)
self.emoji_hash = hashlib.sha256(image_bytes).hexdigest()
logger.debug(f"[初始化] {self.file_name} 计算哈希值成功: {self.emoji_hash}")
# 用PIL读取图片格式
logger.debug(f"[初始化] 读取 {self.file_name} 的图片格式...")
self._format = await asyncio.to_thread(self.get_image_format, image_bytes)
logger.debug(f"[初始化] {self.file_name} 读取图片格式成功: {self._format}")
# 比对文件扩展名和实际格式
file_ext = self.file_name.split(".")[-1].lower()
if file_ext != self._format:
logger.warning(f"[初始化] {self.file_name} 文件扩展名与实际格式不符: ext`{file_ext}`!=`{self._format}`")
# 重命名文件以匹配实际格式
new_file_name = ".".join(self.file_name.split(".")[:-1] + [self._format])
new_full_path = self.dir_path / new_file_name
self.full_path.rename(new_full_path)
self.full_path = new_full_path
return True
except Exception as e:
logger.error(f"[初始化] 初始化表情包时发生错误: {e}")
logger.error(traceback.format_exc())
self.is_deleted = True
return False

View File

@ -0,0 +1,104 @@
from PIL import Image as PILImage, ImageSequence
import base64
import io
import numpy as np
from src.common.logger import get_logger
logger = get_logger("image")
class ImageUtils:
@staticmethod
def gif_2_static_image(gif_bytes: bytes, similarity_threshold: float = 1000.0, max_frames: int = 15) -> bytes:
"""
将GIF图片水平拼接为静态图像跳过相似帧
Args:
gif_bytes (bytes): 输入的GIF图片字节数据
similarity_threshold (float): 判定帧相似的阈值 (MSE)越小表示要求差异越大才算不同帧默认1000.0
max_frames (int): 最大抽取的帧数默认15
Returns:
bytes: 拼接后的静态图像字节数据格式为JPEG
Raises:
ValueError: 如果输入的GIF无效或无法处理
MemoryError: 如果处理过程中内存不足
Exception: 其他异常
"""
with PILImage.open(io.BytesIO(gif_bytes)) as gif_image:
if not gif_image.format or gif_image.format.lower() != "gif":
logger.error("输入的图片不是有效的GIF格式")
raise ValueError("输入的图片不是有效的GIF格式")
# --- 流式迭代并选择帧(避免一次性加载所有帧) ---
selected_frames: list[PILImage.Image] = []
last_selected_frame_np = None
frame_index = 0
for frame in ImageSequence.Iterator(gif_image):
# 确保是RGB格式方便比较
frame_rgb = frame.convert("RGB")
frame_np = np.array(frame_rgb)
if frame_index == 0:
selected_frames.append(frame_rgb.copy())
last_selected_frame_np = frame_np
else:
# 计算和上一张选中帧的差异(均方误差 MSE
mse = np.mean((frame_np - last_selected_frame_np) ** 2)
# logger.debug(f"帧 {frame_index} 与上一选中帧的 MSE: {mse}")
if mse > similarity_threshold:
selected_frames.append(frame_rgb.copy())
last_selected_frame_np = frame_np
if len(selected_frames) >= max_frames:
break
frame_index += 1
if not selected_frames:
logger.error("未能抽取到任何有效帧")
raise ValueError("未能抽取到任何有效帧")
# 获取选中的第一帧的尺寸(假设所有帧尺寸一致)
frame_width, frame_height = selected_frames[0].size
# 防止除以零
if frame_height == 0:
raise ValueError("帧高度为0无法计算缩放尺寸")
# 计算目标尺寸,保持宽高比
target_height = 200 # 固定高度
target_width = int((target_height / frame_height) * frame_width)
# 宽度也不能是0
if target_width == 0:
logger.warning(f"计算出的目标宽度为0 (原始尺寸 {frame_width}x{frame_height})调整为1")
target_width = 1
# 调整所有选中帧的大小
resized_frames = [
frame.resize((target_width, target_height), PILImage.Resampling.LANCZOS) for frame in selected_frames
]
# 创建拼接图像
total_width = target_width * len(resized_frames)
combined_image = PILImage.new("RGB", (total_width, target_height))
# 水平拼接图像
for idx, frame in enumerate(resized_frames):
combined_image.paste(frame, (idx * target_width, 0))
buffer = io.BytesIO()
combined_image.save(buffer, format="JPEG", quality=85) # 保存为JPEG
return buffer.getvalue()
@staticmethod
def image_bytes_to_base64(image_bytes: bytes) -> str:
"""
将图片字节数据转换为Base64编码字符串
Args:
image_bytes (bytes): 输入的图片字节数据
Returns:
str: Base64编码的图片字符串
Raises:
ValueError: 如果输入的图片字节数据无效
"""
if not image_bytes:
logger.error("输入的图片字节数据无效")
raise ValueError("输入的图片字节数据无效")
return base64.b64encode(image_bytes).decode("utf-8")