mirror of https://github.com/Mai-with-u/MaiBot.git
Merge branch 'Mai-with-u:dev' into dev
commit
3ba0c06558
|
|
@ -940,13 +940,13 @@ class EmojiManager:
|
||||||
image_base64 = get_image_manager().transform_gif(image_base64) # type: ignore
|
image_base64 = get_image_manager().transform_gif(image_base64) # type: ignore
|
||||||
if not image_base64:
|
if not image_base64:
|
||||||
raise RuntimeError("GIF表情包转换失败")
|
raise RuntimeError("GIF表情包转换失败")
|
||||||
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,描述细节,从互联网梗,meme的角度去分析"
|
prompt = "这是一个动态图表情包,每一张图代表了动态图的某一帧,黑色背景代表透明,简短描述一下表情包表达的情感和内容,从互联网梗,meme的角度去分析,精简回答"
|
||||||
description, _ = await self.vlm.generate_response_for_image(
|
description, _ = await self.vlm.generate_response_for_image(
|
||||||
prompt, image_base64, "jpg", temperature=0.5
|
prompt, image_base64, "jpg", temperature=0.5
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
prompt = (
|
prompt = (
|
||||||
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析"
|
"这是一个表情包,请详细描述一下表情包所表达的情感和内容,简短描述细节,从互联网梗,meme的角度去分析,精简回答"
|
||||||
)
|
)
|
||||||
description, _ = await self.vlm.generate_response_for_image(
|
description, _ = await self.vlm.generate_response_for_image(
|
||||||
prompt, image_base64, image_format, temperature=0.5
|
prompt, image_base64, image_format, temperature=0.5
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,12 @@ from src.chat.planner_actions.planner import ActionPlanner
|
||||||
from src.chat.planner_actions.action_modifier import ActionModifier
|
from src.chat.planner_actions.action_modifier import ActionModifier
|
||||||
from src.chat.planner_actions.action_manager import ActionManager
|
from src.chat.planner_actions.action_manager import ActionManager
|
||||||
from src.chat.heart_flow.hfc_utils import CycleDetail
|
from src.chat.heart_flow.hfc_utils import CycleDetail
|
||||||
from src.chat.heart_flow.hfc_utils import send_typing, stop_typing
|
|
||||||
from src.express.expression_learner import expression_learner_manager
|
from src.express.expression_learner import expression_learner_manager
|
||||||
from src.chat.frequency_control.frequency_control import frequency_control_manager
|
from src.chat.frequency_control.frequency_control import frequency_control_manager
|
||||||
from src.memory_system.question_maker import QuestionMaker
|
from src.memory_system.question_maker import QuestionMaker
|
||||||
from src.memory_system.questions import global_conflict_tracker
|
from src.memory_system.questions import global_conflict_tracker
|
||||||
from src.memory_system.curious import check_and_make_question
|
from src.memory_system.curious import check_and_make_question
|
||||||
|
from src.jargon import extract_and_store_jargon
|
||||||
from src.person_info.person_info import Person
|
from src.person_info.person_info import Person
|
||||||
from src.plugin_system.base.component_types import EventType, ActionInfo
|
from src.plugin_system.base.component_types import EventType, ActionInfo
|
||||||
from src.plugin_system.core import events_manager
|
from src.plugin_system.core import events_manager
|
||||||
|
|
@ -107,6 +107,7 @@ class HeartFChatting:
|
||||||
|
|
||||||
self.last_active_time = time.time() # 记录上一次非noreply时间
|
self.last_active_time = time.time() # 记录上一次非noreply时间
|
||||||
|
|
||||||
|
self.question_probability_multiplier = 1
|
||||||
self.questioned = False
|
self.questioned = False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -192,7 +193,7 @@ class HeartFChatting:
|
||||||
else:
|
else:
|
||||||
question_probability = 0.00003
|
question_probability = 0.00003
|
||||||
|
|
||||||
question_probability = question_probability * global_config.chat.get_auto_chat_value(self.stream_id)
|
question_probability = question_probability * global_config.chat.get_auto_chat_value(self.stream_id) * self.question_probability_multiplier
|
||||||
|
|
||||||
# print(f"{self.log_prefix} questioned: {self.questioned},len: {len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id))}")
|
# print(f"{self.log_prefix} questioned: {self.questioned},len: {len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id))}")
|
||||||
if question_probability > 0 and not self.questioned and len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id)) == 0: #长久没有回复,可以试试主动发言,提问概率随着时间增加
|
if question_probability > 0 and not self.questioned and len(global_conflict_tracker.get_questions_by_chat_id(self.stream_id)) == 0: #长久没有回复,可以试试主动发言,提问概率随着时间增加
|
||||||
|
|
@ -335,7 +336,9 @@ class HeartFChatting:
|
||||||
asyncio.create_task(frequency_control_manager.get_or_create_frequency_control(self.stream_id).trigger_frequency_adjust())
|
asyncio.create_task(frequency_control_manager.get_or_create_frequency_control(self.stream_id).trigger_frequency_adjust())
|
||||||
|
|
||||||
# 添加curious检测任务 - 检测聊天记录中的矛盾、冲突或需要提问的内容
|
# 添加curious检测任务 - 检测聊天记录中的矛盾、冲突或需要提问的内容
|
||||||
asyncio.create_task(check_and_make_question(self.stream_id, recent_messages_list))
|
asyncio.create_task(check_and_make_question(self.stream_id))
|
||||||
|
# 添加jargon提取任务 - 提取聊天中的黑话/俚语并入库(内部自行取消息并带冷却)
|
||||||
|
asyncio.create_task(extract_and_store_jargon(self.stream_id))
|
||||||
|
|
||||||
|
|
||||||
cycle_timers, thinking_id = self.start_cycle()
|
cycle_timers, thinking_id = self.start_cycle()
|
||||||
|
|
|
||||||
|
|
@ -639,6 +639,83 @@ class DefaultReplyer:
|
||||||
prompt_personality = f"{prompt_personality};"
|
prompt_personality = f"{prompt_personality};"
|
||||||
return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}"
|
return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}"
|
||||||
|
|
||||||
|
def _parse_chat_prompt_config_to_chat_id(self, chat_prompt_str: str) -> Optional[tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
解析聊天prompt配置字符串并生成对应的 chat_id 和 prompt内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_prompt_str: 格式为 "platform:id:type:prompt内容" 的字符串
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (chat_id, prompt_content),如果解析失败则返回 None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 使用 split 分割,但限制分割次数为3,因为prompt内容可能包含冒号
|
||||||
|
parts = chat_prompt_str.split(":", 3)
|
||||||
|
if len(parts) != 4:
|
||||||
|
return None
|
||||||
|
|
||||||
|
platform = parts[0]
|
||||||
|
id_str = parts[1]
|
||||||
|
stream_type = parts[2]
|
||||||
|
prompt_content = parts[3]
|
||||||
|
|
||||||
|
# 判断是否为群聊
|
||||||
|
is_group = stream_type == "group"
|
||||||
|
|
||||||
|
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
if is_group:
|
||||||
|
components = [platform, str(id_str)]
|
||||||
|
else:
|
||||||
|
components = [platform, str(id_str), "private"]
|
||||||
|
key = "_".join(components)
|
||||||
|
chat_id = hashlib.md5(key.encode()).hexdigest()
|
||||||
|
|
||||||
|
return chat_id, prompt_content
|
||||||
|
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_chat_prompt_for_chat(self, chat_id: str) -> str:
|
||||||
|
"""
|
||||||
|
根据聊天流ID获取匹配的额外prompt(仅匹配group类型)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id: 聊天流ID(哈希值)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 匹配的额外prompt内容,如果没有匹配则返回空字符串
|
||||||
|
"""
|
||||||
|
if not global_config.experimental.chat_prompts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
for chat_prompt_str in global_config.experimental.chat_prompts:
|
||||||
|
if not isinstance(chat_prompt_str, str):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 解析配置字符串,检查类型是否为group
|
||||||
|
parts = chat_prompt_str.split(":", 3)
|
||||||
|
if len(parts) != 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
stream_type = parts[2]
|
||||||
|
# 只匹配group类型
|
||||||
|
if stream_type != "group":
|
||||||
|
continue
|
||||||
|
|
||||||
|
result = self._parse_chat_prompt_config_to_chat_id(chat_prompt_str)
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
config_chat_id, prompt_content = result
|
||||||
|
if config_chat_id == chat_id:
|
||||||
|
logger.debug(f"匹配到群聊prompt配置,chat_id: {chat_id}, prompt: {prompt_content[:50]}...")
|
||||||
|
return prompt_content
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
async def build_prompt_reply_context(
|
async def build_prompt_reply_context(
|
||||||
self,
|
self,
|
||||||
reply_message: Optional[DatabaseMessages] = None,
|
reply_message: Optional[DatabaseMessages] = None,
|
||||||
|
|
@ -820,6 +897,11 @@ class DefaultReplyer:
|
||||||
# 构建分离的对话 prompt
|
# 构建分离的对话 prompt
|
||||||
dialogue_prompt = self.build_chat_history_prompts(message_list_before_now_long, user_id, sender)
|
dialogue_prompt = self.build_chat_history_prompts(message_list_before_now_long, user_id, sender)
|
||||||
|
|
||||||
|
# 获取匹配的额外prompt
|
||||||
|
chat_prompt_content = self.get_chat_prompt_for_chat(chat_id)
|
||||||
|
chat_prompt_block = f"{chat_prompt_content}\n" if chat_prompt_content else ""
|
||||||
|
|
||||||
|
# 固定使用群聊回复模板
|
||||||
return await global_prompt_manager.format_prompt(
|
return await global_prompt_manager.format_prompt(
|
||||||
"replyer_prompt",
|
"replyer_prompt",
|
||||||
expression_habits_block=expression_habits_block,
|
expression_habits_block=expression_habits_block,
|
||||||
|
|
@ -840,6 +922,7 @@ class DefaultReplyer:
|
||||||
keywords_reaction_prompt=keywords_reaction_prompt,
|
keywords_reaction_prompt=keywords_reaction_prompt,
|
||||||
moderation_prompt=moderation_prompt_block,
|
moderation_prompt=moderation_prompt_block,
|
||||||
question_block=question_block,
|
question_block=question_block,
|
||||||
|
chat_prompt=chat_prompt_block,
|
||||||
), selected_expressions
|
), selected_expressions
|
||||||
|
|
||||||
async def build_prompt_rewrite_context(
|
async def build_prompt_rewrite_context(
|
||||||
|
|
|
||||||
|
|
@ -536,6 +536,83 @@ class PrivateReplyer:
|
||||||
prompt_personality = f"{prompt_personality};"
|
prompt_personality = f"{prompt_personality};"
|
||||||
return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}"
|
return f"你的名字是{bot_name}{bot_nickname},你{prompt_personality}"
|
||||||
|
|
||||||
|
def _parse_chat_prompt_config_to_chat_id(self, chat_prompt_str: str) -> Optional[tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
解析聊天prompt配置字符串并生成对应的 chat_id 和 prompt内容
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_prompt_str: 格式为 "platform:id:type:prompt内容" 的字符串
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (chat_id, prompt_content),如果解析失败则返回 None
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 使用 split 分割,但限制分割次数为3,因为prompt内容可能包含冒号
|
||||||
|
parts = chat_prompt_str.split(":", 3)
|
||||||
|
if len(parts) != 4:
|
||||||
|
return None
|
||||||
|
|
||||||
|
platform = parts[0]
|
||||||
|
id_str = parts[1]
|
||||||
|
stream_type = parts[2]
|
||||||
|
prompt_content = parts[3]
|
||||||
|
|
||||||
|
# 判断是否为群聊
|
||||||
|
is_group = stream_type == "group"
|
||||||
|
|
||||||
|
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
|
||||||
|
import hashlib
|
||||||
|
|
||||||
|
if is_group:
|
||||||
|
components = [platform, str(id_str)]
|
||||||
|
else:
|
||||||
|
components = [platform, str(id_str), "private"]
|
||||||
|
key = "_".join(components)
|
||||||
|
chat_id = hashlib.md5(key.encode()).hexdigest()
|
||||||
|
|
||||||
|
return chat_id, prompt_content
|
||||||
|
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_chat_prompt_for_chat(self, chat_id: str) -> str:
|
||||||
|
"""
|
||||||
|
根据聊天流ID获取匹配的额外prompt(仅匹配private类型)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id: 聊天流ID(哈希值)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 匹配的额外prompt内容,如果没有匹配则返回空字符串
|
||||||
|
"""
|
||||||
|
if not global_config.experimental.chat_prompts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
for chat_prompt_str in global_config.experimental.chat_prompts:
|
||||||
|
if not isinstance(chat_prompt_str, str):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 解析配置字符串,检查类型是否为private
|
||||||
|
parts = chat_prompt_str.split(":", 3)
|
||||||
|
if len(parts) != 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
stream_type = parts[2]
|
||||||
|
# 只匹配private类型
|
||||||
|
if stream_type != "private":
|
||||||
|
continue
|
||||||
|
|
||||||
|
result = self._parse_chat_prompt_config_to_chat_id(chat_prompt_str)
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
config_chat_id, prompt_content = result
|
||||||
|
if config_chat_id == chat_id:
|
||||||
|
logger.debug(f"匹配到私聊prompt配置,chat_id: {chat_id}, prompt: {prompt_content[:50]}...")
|
||||||
|
return prompt_content
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
async def build_prompt_reply_context(
|
async def build_prompt_reply_context(
|
||||||
self,
|
self,
|
||||||
reply_message: Optional[DatabaseMessages] = None,
|
reply_message: Optional[DatabaseMessages] = None,
|
||||||
|
|
@ -718,6 +795,10 @@ class PrivateReplyer:
|
||||||
# 其他情况(空内容等)
|
# 其他情况(空内容等)
|
||||||
reply_target_block = f"现在对方说的:{target}。引起了你的注意"
|
reply_target_block = f"现在对方说的:{target}。引起了你的注意"
|
||||||
|
|
||||||
|
# 获取匹配的额外prompt
|
||||||
|
chat_prompt_content = self.get_chat_prompt_for_chat(chat_id)
|
||||||
|
chat_prompt_block = f"{chat_prompt_content}\n" if chat_prompt_content else ""
|
||||||
|
|
||||||
if global_config.bot.qq_account == user_id and platform == global_config.bot.platform:
|
if global_config.bot.qq_account == user_id and platform == global_config.bot.platform:
|
||||||
return await global_prompt_manager.format_prompt(
|
return await global_prompt_manager.format_prompt(
|
||||||
"private_replyer_self_prompt",
|
"private_replyer_self_prompt",
|
||||||
|
|
@ -738,6 +819,7 @@ class PrivateReplyer:
|
||||||
reply_style=global_config.personality.reply_style,
|
reply_style=global_config.personality.reply_style,
|
||||||
keywords_reaction_prompt=keywords_reaction_prompt,
|
keywords_reaction_prompt=keywords_reaction_prompt,
|
||||||
moderation_prompt=moderation_prompt_block,
|
moderation_prompt=moderation_prompt_block,
|
||||||
|
chat_prompt=chat_prompt_block,
|
||||||
), selected_expressions
|
), selected_expressions
|
||||||
else:
|
else:
|
||||||
return await global_prompt_manager.format_prompt(
|
return await global_prompt_manager.format_prompt(
|
||||||
|
|
@ -758,6 +840,7 @@ class PrivateReplyer:
|
||||||
keywords_reaction_prompt=keywords_reaction_prompt,
|
keywords_reaction_prompt=keywords_reaction_prompt,
|
||||||
moderation_prompt=moderation_prompt_block,
|
moderation_prompt=moderation_prompt_block,
|
||||||
sender_name=sender,
|
sender_name=sender,
|
||||||
|
chat_prompt=chat_prompt_block,
|
||||||
), selected_expressions
|
), selected_expressions
|
||||||
|
|
||||||
async def build_prompt_rewrite_context(
|
async def build_prompt_rewrite_context(
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ def init_replyer_prompt():
|
||||||
|
|
||||||
{reply_target_block}。
|
{reply_target_block}。
|
||||||
{identity}
|
{identity}
|
||||||
你正在群里聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state}
|
{chat_prompt}你正在群里聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state}
|
||||||
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
||||||
{reply_style}
|
{reply_style}
|
||||||
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出一句回复内容就好。
|
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出一句回复内容就好。
|
||||||
|
|
@ -41,7 +41,7 @@ def init_replyer_prompt():
|
||||||
|
|
||||||
{reply_target_block}。
|
{reply_target_block}。
|
||||||
{identity}
|
{identity}
|
||||||
你正在和{sender_name}聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state}
|
{chat_prompt}你正在和{sender_name}聊天,现在请你读读之前的聊天记录,然后给出日常且口语化的回复,平淡一些,{mood_state}
|
||||||
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
||||||
{reply_style}
|
{reply_style}
|
||||||
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
|
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
|
||||||
|
|
@ -61,7 +61,7 @@ def init_replyer_prompt():
|
||||||
你现在想补充说明你刚刚自己的发言内容:{target},原因是{reason}
|
你现在想补充说明你刚刚自己的发言内容:{target},原因是{reason}
|
||||||
请你根据聊天内容,组织一条新回复。注意,{target} 是刚刚你自己的发言,你要在这基础上进一步发言,请按照你自己的角度来继续进行回复。注意保持上下文的连贯性。{mood_state}
|
请你根据聊天内容,组织一条新回复。注意,{target} 是刚刚你自己的发言,你要在这基础上进一步发言,请按照你自己的角度来继续进行回复。注意保持上下文的连贯性。{mood_state}
|
||||||
{identity}
|
{identity}
|
||||||
尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
{chat_prompt}尽量简短一些。{keywords_reaction_prompt}请注意把握聊天内容,不要回复的太有条理,可以有个性。
|
||||||
{reply_style}
|
{reply_style}
|
||||||
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
|
请注意不要输出多余内容(包括前后缀,冒号和引号,括号,表情等),只输出回复内容。
|
||||||
{moderation_prompt}不要输出多余内容(包括冒号和引号,括号,表情包,at或 @等 )。
|
{moderation_prompt}不要输出多余内容(包括冒号和引号,括号,表情包,at或 @等 )。
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ logger = get_logger("database_model")
|
||||||
|
|
||||||
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。
|
# 定义一个基础模型是一个好习惯,所有其他模型都应继承自它。
|
||||||
# 这允许您在一个地方为所有模型指定数据库。
|
# 这允许您在一个地方为所有模型指定数据库。
|
||||||
|
|
||||||
|
|
||||||
class BaseModel(Model):
|
class BaseModel(Model):
|
||||||
class Meta:
|
class Meta:
|
||||||
# 将下面的 'db' 替换为您实际的数据库实例变量名。
|
# 将下面的 'db' 替换为您实际的数据库实例变量名。
|
||||||
|
|
@ -343,30 +345,48 @@ class MemoryConflict(BaseModel):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
table_name = "memory_conflicts"
|
table_name = "memory_conflicts"
|
||||||
|
|
||||||
|
class Jargon(BaseModel):
|
||||||
|
"""
|
||||||
|
用于存储俚语的模型
|
||||||
|
"""
|
||||||
|
content = TextField()
|
||||||
|
raw_content = TextField(null=True)
|
||||||
|
type = TextField(null=True)
|
||||||
|
translation = TextField(null=True)
|
||||||
|
meaning = TextField(null=True)
|
||||||
|
chat_id = TextField(index=True)
|
||||||
|
is_global = BooleanField(default=False)
|
||||||
|
count = IntegerField(default=0)
|
||||||
|
is_jargon = BooleanField(null=True) # None表示未判定,True表示是黑话,False表示不是黑话
|
||||||
|
last_inference_count = IntegerField(null=True) # 最后一次判定的count值,用于避免重启后重复判定
|
||||||
|
is_complete = BooleanField(default=False) # 是否已完成所有推断(count>=100后不再推断)
|
||||||
|
|
||||||
|
class Meta:
|
||||||
|
table_name = "jargon"
|
||||||
|
|
||||||
|
MODELS = [
|
||||||
|
ChatStreams,
|
||||||
|
LLMUsage,
|
||||||
|
Emoji,
|
||||||
|
Messages,
|
||||||
|
Images,
|
||||||
|
ImageDescriptions,
|
||||||
|
OnlineTime,
|
||||||
|
PersonInfo,
|
||||||
|
Expression,
|
||||||
|
ActionRecords,
|
||||||
|
MemoryChest,
|
||||||
|
MemoryConflict,
|
||||||
|
Jargon,
|
||||||
|
]
|
||||||
|
|
||||||
def create_tables():
|
def create_tables():
|
||||||
"""
|
"""
|
||||||
创建所有在模型中定义的数据库表。
|
创建所有在模型中定义的数据库表。
|
||||||
"""
|
"""
|
||||||
with db:
|
with db:
|
||||||
db.create_tables(
|
db.create_tables(MODELS)
|
||||||
[
|
|
||||||
ChatStreams,
|
|
||||||
LLMUsage,
|
|
||||||
Emoji,
|
|
||||||
Messages,
|
|
||||||
Images,
|
|
||||||
ImageDescriptions,
|
|
||||||
OnlineTime,
|
|
||||||
PersonInfo,
|
|
||||||
Expression,
|
|
||||||
ActionRecords, # 添加 ActionRecords 到初始化列表
|
|
||||||
MemoryChest,
|
|
||||||
MemoryConflict, # 添加记忆冲突表
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def initialize_database(sync_constraints=False):
|
def initialize_database(sync_constraints=False):
|
||||||
|
|
@ -379,24 +399,9 @@ def initialize_database(sync_constraints=False):
|
||||||
如果为 True,会检查并修复字段的 NULL 约束不一致问题。
|
如果为 True,会检查并修复字段的 NULL 约束不一致问题。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
models = [
|
|
||||||
ChatStreams,
|
|
||||||
LLMUsage,
|
|
||||||
Emoji,
|
|
||||||
Messages,
|
|
||||||
Images,
|
|
||||||
ImageDescriptions,
|
|
||||||
OnlineTime,
|
|
||||||
PersonInfo,
|
|
||||||
Expression,
|
|
||||||
ActionRecords, # 添加 ActionRecords 到初始化列表
|
|
||||||
MemoryChest,
|
|
||||||
MemoryConflict,
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with db: # 管理 table_exists 检查的连接
|
with db: # 管理 table_exists 检查的连接
|
||||||
for model in models:
|
for model in MODELS:
|
||||||
table_name = model._meta.table_name
|
table_name = model._meta.table_name
|
||||||
if not db.table_exists(model):
|
if not db.table_exists(model):
|
||||||
logger.warning(f"表 '{table_name}' 未找到,正在创建...")
|
logger.warning(f"表 '{table_name}' 未找到,正在创建...")
|
||||||
|
|
@ -476,24 +481,9 @@ def sync_field_constraints():
|
||||||
如果发现不一致,会自动修复字段约束。
|
如果发现不一致,会自动修复字段约束。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
models = [
|
|
||||||
ChatStreams,
|
|
||||||
LLMUsage,
|
|
||||||
Emoji,
|
|
||||||
Messages,
|
|
||||||
Images,
|
|
||||||
ImageDescriptions,
|
|
||||||
OnlineTime,
|
|
||||||
PersonInfo,
|
|
||||||
Expression,
|
|
||||||
ActionRecords,
|
|
||||||
MemoryChest,
|
|
||||||
MemoryConflict,
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with db:
|
with db:
|
||||||
for model in models:
|
for model in MODELS:
|
||||||
table_name = model._meta.table_name
|
table_name = model._meta.table_name
|
||||||
if not db.table_exists(model):
|
if not db.table_exists(model):
|
||||||
logger.warning(f"表 '{table_name}' 不存在,跳过约束检查")
|
logger.warning(f"表 '{table_name}' 不存在,跳过约束检查")
|
||||||
|
|
@ -660,26 +650,11 @@ def check_field_constraints():
|
||||||
用于在修复前预览需要修复的内容。
|
用于在修复前预览需要修复的内容。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
models = [
|
|
||||||
ChatStreams,
|
|
||||||
LLMUsage,
|
|
||||||
Emoji,
|
|
||||||
Messages,
|
|
||||||
Images,
|
|
||||||
ImageDescriptions,
|
|
||||||
OnlineTime,
|
|
||||||
PersonInfo,
|
|
||||||
Expression,
|
|
||||||
ActionRecords,
|
|
||||||
MemoryChest,
|
|
||||||
MemoryConflict,
|
|
||||||
]
|
|
||||||
|
|
||||||
inconsistencies = {}
|
inconsistencies = {}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with db:
|
with db:
|
||||||
for model in models:
|
for model in MODELS:
|
||||||
table_name = model._meta.table_name
|
table_name = model._meta.table_name
|
||||||
if not db.table_exists(model):
|
if not db.table_exists(model):
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -656,6 +656,25 @@ class ExperimentalConfig(ConfigBase):
|
||||||
enable_friend_chat: bool = False
|
enable_friend_chat: bool = False
|
||||||
"""是否启用好友聊天"""
|
"""是否启用好友聊天"""
|
||||||
|
|
||||||
|
chat_prompts: list[str] = field(default_factory=lambda: [])
|
||||||
|
"""
|
||||||
|
为指定聊天添加额外的prompt配置列表
|
||||||
|
格式: ["platform:id:type:prompt内容", ...]
|
||||||
|
|
||||||
|
示例:
|
||||||
|
[
|
||||||
|
"qq:114514:group:这是一个摄影群,你精通摄影知识",
|
||||||
|
"qq:19198:group:这是一个二次元交流群",
|
||||||
|
"qq:114514:private:这是你与好朋友的私聊"
|
||||||
|
]
|
||||||
|
|
||||||
|
说明:
|
||||||
|
- platform: 平台名称,如 "qq"
|
||||||
|
- id: 群ID或用户ID
|
||||||
|
- type: "group" 或 "private"
|
||||||
|
- prompt内容: 要添加的额外prompt文本
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MaimMessageConfig(ConfigBase):
|
class MaimMessageConfig(ConfigBase):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
from .jargon_miner import extract_and_store_jargon
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"extract_and_store_jargon",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,613 @@
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import asyncio
|
||||||
|
from typing import List
|
||||||
|
from json_repair import repair_json
|
||||||
|
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
from src.common.database.database_model import Jargon
|
||||||
|
from src.llm_models.utils_model import LLMRequest
|
||||||
|
from src.config.config import model_config
|
||||||
|
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||||
|
from src.chat.utils.chat_message_builder import (
|
||||||
|
build_anonymous_messages,
|
||||||
|
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||||
|
)
|
||||||
|
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||||
|
|
||||||
|
|
||||||
|
logger = get_logger("jargon")
|
||||||
|
|
||||||
|
|
||||||
|
def _init_prompt() -> None:
|
||||||
|
prompt_str = """
|
||||||
|
**聊天内容**
|
||||||
|
{chat_str}
|
||||||
|
|
||||||
|
请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
||||||
|
- 必须为对话中真实出现过的短词或短语
|
||||||
|
- 必须是你无法理解含义的词语,或者出现频率较高的词语
|
||||||
|
- 请不要选择有明确含义,或者含义清晰的词语
|
||||||
|
- 必须是这几种类别之一:英文或中文缩写、中文拼音短语、字母数字混合
|
||||||
|
- 排除:人名、@、明显的表情/图片占位、纯标点、常规功能词(如的、了、呢、啊等)
|
||||||
|
- 每个词条长度建议 2-8 个字符(不强制),尽量短小
|
||||||
|
- 合并重复项,去重
|
||||||
|
|
||||||
|
分类规则,type必须根据规则填写:
|
||||||
|
- p(拼音缩写):由字母或字母和汉字构成的,用汉语拼音简写词,或汉语拼音首字母的简写词,例如:nb、yyds、xswl
|
||||||
|
- c(中文缩写):中文词语的缩写,用几个汉字概括一个词汇或含义,例如:社死、内卷
|
||||||
|
- e(英文缩写):英文词语的缩写,用英文字母概括一个词汇或含义,例如:CPU、GPU、API
|
||||||
|
- x(谐音梗):谐音梗,用谐音词概括一个词汇或含义,例如:好似,难崩
|
||||||
|
|
||||||
|
以 JSON 数组输出,元素为对象(严格按以下结构):
|
||||||
|
[
|
||||||
|
{{"content": "词条", "raw_content": "包含该词条的完整对话原文", "type": "p"}},
|
||||||
|
{{"content": "词条2", "raw_content": "包含该词条的完整对话原文", "type": "c"}}
|
||||||
|
]
|
||||||
|
|
||||||
|
现在请输出:
|
||||||
|
"""
|
||||||
|
Prompt(prompt_str, "extract_jargon_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
def _init_inference_prompts() -> None:
|
||||||
|
"""初始化含义推断相关的prompt"""
|
||||||
|
# Prompt 1: 基于raw_content和content推断
|
||||||
|
prompt1_str = """
|
||||||
|
**词条内容**
|
||||||
|
{content}
|
||||||
|
|
||||||
|
**词条出现的上下文(raw_content)**
|
||||||
|
{raw_content_list}
|
||||||
|
|
||||||
|
请根据以上词条内容和上下文,推断这个词条的含义。
|
||||||
|
- 如果这是一个黑话、俚语或网络用语,请推断其含义和翻译
|
||||||
|
- 如果含义明确(常规词汇),也请说明
|
||||||
|
|
||||||
|
以 JSON 格式输出:
|
||||||
|
{{
|
||||||
|
"meaning": "含义说明",
|
||||||
|
"translation": "翻译或解释"
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
Prompt(prompt1_str, "jargon_inference_with_context_prompt")
|
||||||
|
|
||||||
|
# Prompt 2: 仅基于content推断
|
||||||
|
prompt2_str = """
|
||||||
|
**词条内容**
|
||||||
|
{content}
|
||||||
|
|
||||||
|
请仅根据这个词条本身,推断其含义。
|
||||||
|
- 如果这是一个黑话、俚语或网络用语,请推断其含义和翻译
|
||||||
|
- 如果含义明确(常规词汇),也请说明
|
||||||
|
|
||||||
|
以 JSON 格式输出:
|
||||||
|
{{
|
||||||
|
"meaning": "含义说明",
|
||||||
|
"translation": "翻译或解释"
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
Prompt(prompt2_str, "jargon_inference_content_only_prompt")
|
||||||
|
|
||||||
|
# Prompt 3: 比较两个推断结果
|
||||||
|
prompt3_str = """
|
||||||
|
**推断结果1(基于上下文)**
|
||||||
|
{inference1}
|
||||||
|
|
||||||
|
**推断结果2(仅基于词条)**
|
||||||
|
{inference2}
|
||||||
|
|
||||||
|
请比较这两个推断结果,判断它们是否相同或类似。
|
||||||
|
- 如果两个推断结果的"含义"相同或类似,说明这个词条不是黑话(含义明确)
|
||||||
|
- 如果两个推断结果有差异,说明这个词条可能是黑话(需要上下文才能理解)
|
||||||
|
|
||||||
|
以 JSON 格式输出:
|
||||||
|
{{
|
||||||
|
"is_similar": true/false,
|
||||||
|
"reason": "判断理由"
|
||||||
|
}}
|
||||||
|
"""
|
||||||
|
Prompt(prompt3_str, "jargon_compare_inference_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
_init_prompt()
|
||||||
|
_init_inference_prompts()
|
||||||
|
|
||||||
|
|
||||||
|
def _should_infer_meaning(jargon_obj: Jargon) -> bool:
|
||||||
|
"""
|
||||||
|
判断是否需要进行含义推断
|
||||||
|
在 count 达到 5, 10, 20, 40, 60, 100 时进行推断
|
||||||
|
并且count必须大于last_inference_count,避免重启后重复判定
|
||||||
|
如果is_complete为True,不再进行推断
|
||||||
|
"""
|
||||||
|
# 如果已完成所有推断,不再推断
|
||||||
|
if jargon_obj.is_complete:
|
||||||
|
return False
|
||||||
|
|
||||||
|
count = jargon_obj.count or 0
|
||||||
|
last_inference = jargon_obj.last_inference_count or 0
|
||||||
|
|
||||||
|
# 阈值列表:5, 10, 20, 40, 60, 100
|
||||||
|
thresholds = [5, 10, 20, 40, 60, 100]
|
||||||
|
|
||||||
|
if count < thresholds[0]:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 如果count没有超过上次判定值,不需要判定
|
||||||
|
if count <= last_inference:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 找到第一个大于last_inference的阈值
|
||||||
|
next_threshold = None
|
||||||
|
for threshold in thresholds:
|
||||||
|
if threshold > last_inference:
|
||||||
|
next_threshold = threshold
|
||||||
|
break
|
||||||
|
|
||||||
|
# 如果没有找到下一个阈值,说明已经超过100,不应该再推断
|
||||||
|
if next_threshold is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 检查count是否达到或超过这个阈值
|
||||||
|
return count >= next_threshold
|
||||||
|
|
||||||
|
|
||||||
|
class JargonMiner:
|
||||||
|
def __init__(self, chat_id: str) -> None:
|
||||||
|
self.chat_id = chat_id
|
||||||
|
self.last_learning_time: float = time.time()
|
||||||
|
# 频率控制,可按需调整
|
||||||
|
self.min_messages_for_learning: int = 20
|
||||||
|
self.min_learning_interval: float = 30
|
||||||
|
|
||||||
|
self.llm = LLMRequest(
|
||||||
|
model_set=model_config.model_task_config.utils,
|
||||||
|
request_type="jargon.extract",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _infer_meaning_by_id(self, jargon_id: int) -> None:
|
||||||
|
"""通过ID加载对象并推断"""
|
||||||
|
try:
|
||||||
|
jargon_obj = Jargon.get_by_id(jargon_id)
|
||||||
|
# 再次检查is_complete,因为可能在异步任务执行时已被标记为完成
|
||||||
|
if jargon_obj.is_complete:
|
||||||
|
logger.debug(f"jargon {jargon_obj.content} 已完成所有推断,跳过")
|
||||||
|
return
|
||||||
|
await self.infer_meaning(jargon_obj)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"通过ID推断jargon失败: {e}")
|
||||||
|
|
||||||
|
async def infer_meaning(self, jargon_obj: Jargon) -> None:
|
||||||
|
"""
|
||||||
|
对jargon进行含义推断
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
content = jargon_obj.content
|
||||||
|
raw_content_str = jargon_obj.raw_content or ""
|
||||||
|
|
||||||
|
# 解析raw_content列表
|
||||||
|
raw_content_list = []
|
||||||
|
if raw_content_str:
|
||||||
|
try:
|
||||||
|
raw_content_list = json.loads(raw_content_str) if isinstance(raw_content_str, str) else raw_content_str
|
||||||
|
if not isinstance(raw_content_list, list):
|
||||||
|
raw_content_list = [raw_content_list] if raw_content_list else []
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
raw_content_list = [raw_content_str] if raw_content_str else []
|
||||||
|
|
||||||
|
if not raw_content_list:
|
||||||
|
logger.warning(f"jargon {content} 没有raw_content,跳过推断")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 步骤1: 基于raw_content和content推断
|
||||||
|
raw_content_text = "\n".join(raw_content_list)
|
||||||
|
prompt1 = await global_prompt_manager.format_prompt(
|
||||||
|
"jargon_inference_with_context_prompt",
|
||||||
|
content=content,
|
||||||
|
raw_content_list=raw_content_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
response1, _ = await self.llm.generate_response_async(prompt1, temperature=0.3)
|
||||||
|
if not response1:
|
||||||
|
logger.warning(f"jargon {content} 推断1失败:无响应")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 解析推断1结果
|
||||||
|
inference1 = None
|
||||||
|
try:
|
||||||
|
resp1 = response1.strip()
|
||||||
|
if resp1.startswith("{") and resp1.endswith("}"):
|
||||||
|
inference1 = json.loads(resp1)
|
||||||
|
else:
|
||||||
|
repaired = repair_json(resp1)
|
||||||
|
inference1 = json.loads(repaired) if isinstance(repaired, str) else repaired
|
||||||
|
if not isinstance(inference1, dict):
|
||||||
|
logger.warning(f"jargon {content} 推断1结果格式错误")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"jargon {content} 推断1解析失败: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 步骤2: 仅基于content推断
|
||||||
|
prompt2 = await global_prompt_manager.format_prompt(
|
||||||
|
"jargon_inference_content_only_prompt",
|
||||||
|
content=content,
|
||||||
|
)
|
||||||
|
|
||||||
|
response2, _ = await self.llm.generate_response_async(prompt2, temperature=0.3)
|
||||||
|
if not response2:
|
||||||
|
logger.warning(f"jargon {content} 推断2失败:无响应")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 解析推断2结果
|
||||||
|
inference2 = None
|
||||||
|
try:
|
||||||
|
resp2 = response2.strip()
|
||||||
|
if resp2.startswith("{") and resp2.endswith("}"):
|
||||||
|
inference2 = json.loads(resp2)
|
||||||
|
else:
|
||||||
|
repaired = repair_json(resp2)
|
||||||
|
inference2 = json.loads(repaired) if isinstance(repaired, str) else repaired
|
||||||
|
if not isinstance(inference2, dict):
|
||||||
|
logger.warning(f"jargon {content} 推断2结果格式错误")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"jargon {content} 推断2解析失败: {e}")
|
||||||
|
return
|
||||||
|
logger.info(f"jargon {content} 推断2提示词: {prompt2}")
|
||||||
|
logger.info(f"jargon {content} 推断2结果: {response2}")
|
||||||
|
# logger.info(f"jargon {content} 推断2结果: {inference2}")
|
||||||
|
logger.info(f"jargon {content} 推断1提示词: {prompt1}")
|
||||||
|
logger.info(f"jargon {content} 推断1结果: {response1}")
|
||||||
|
# logger.info(f"jargon {content} 推断1结果: {inference1}")
|
||||||
|
|
||||||
|
# 步骤3: 比较两个推断结果
|
||||||
|
prompt3 = await global_prompt_manager.format_prompt(
|
||||||
|
"jargon_compare_inference_prompt",
|
||||||
|
inference1=json.dumps(inference1, ensure_ascii=False),
|
||||||
|
inference2=json.dumps(inference2, ensure_ascii=False),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"jargon {content} 比较提示词: {prompt3}")
|
||||||
|
|
||||||
|
response3, _ = await self.llm.generate_response_async(prompt3, temperature=0.3)
|
||||||
|
if not response3:
|
||||||
|
logger.warning(f"jargon {content} 比较失败:无响应")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 解析比较结果
|
||||||
|
comparison = None
|
||||||
|
try:
|
||||||
|
resp3 = response3.strip()
|
||||||
|
if resp3.startswith("{") and resp3.endswith("}"):
|
||||||
|
comparison = json.loads(resp3)
|
||||||
|
else:
|
||||||
|
repaired = repair_json(resp3)
|
||||||
|
comparison = json.loads(repaired) if isinstance(repaired, str) else repaired
|
||||||
|
if not isinstance(comparison, dict):
|
||||||
|
logger.warning(f"jargon {content} 比较结果格式错误")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"jargon {content} 比较解析失败: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 判断是否为黑话
|
||||||
|
is_similar = comparison.get("is_similar", False)
|
||||||
|
is_jargon = not is_similar # 如果相似,说明不是黑话;如果有差异,说明是黑话
|
||||||
|
|
||||||
|
# 更新数据库记录
|
||||||
|
jargon_obj.is_jargon = is_jargon
|
||||||
|
if is_jargon:
|
||||||
|
# 是黑话,使用推断1的结果(基于上下文,更准确)
|
||||||
|
jargon_obj.meaning = inference1.get("meaning", "")
|
||||||
|
jargon_obj.translation = inference1.get("translation", "")
|
||||||
|
else:
|
||||||
|
# 不是黑话,也记录含义(使用推断2的结果,因为含义明确)
|
||||||
|
jargon_obj.meaning = inference2.get("meaning", "")
|
||||||
|
jargon_obj.translation = inference2.get("translation", "")
|
||||||
|
|
||||||
|
# 更新最后一次判定的count值,避免重启后重复判定
|
||||||
|
jargon_obj.last_inference_count = jargon_obj.count or 0
|
||||||
|
|
||||||
|
# 如果count>=100,标记为完成,不再进行推断
|
||||||
|
if (jargon_obj.count or 0) >= 100:
|
||||||
|
jargon_obj.is_complete = True
|
||||||
|
|
||||||
|
jargon_obj.save()
|
||||||
|
logger.info(f"jargon {content} 推断完成: is_jargon={is_jargon}, meaning={jargon_obj.meaning}, last_inference_count={jargon_obj.last_inference_count}, is_complete={jargon_obj.is_complete}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"jargon推断失败: {e}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
def should_trigger(self) -> bool:
|
||||||
|
# 冷却时间检查
|
||||||
|
if time.time() - self.last_learning_time < self.min_learning_interval:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 拉取最近消息数量是否足够
|
||||||
|
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
timestamp_start=self.last_learning_time,
|
||||||
|
timestamp_end=time.time(),
|
||||||
|
)
|
||||||
|
return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning)
|
||||||
|
|
||||||
|
async def run_once(self) -> None:
|
||||||
|
try:
|
||||||
|
if not self.should_trigger():
|
||||||
|
return
|
||||||
|
|
||||||
|
chat_stream = get_chat_manager().get_stream(self.chat_id)
|
||||||
|
if not chat_stream:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 记录本次提取的时间窗口,避免重复提取
|
||||||
|
extraction_start_time = self.last_learning_time
|
||||||
|
extraction_end_time = time.time()
|
||||||
|
|
||||||
|
# 拉取学习窗口内的消息
|
||||||
|
messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
timestamp_start=extraction_start_time,
|
||||||
|
timestamp_end=extraction_end_time,
|
||||||
|
limit=20,
|
||||||
|
)
|
||||||
|
if not messages:
|
||||||
|
return
|
||||||
|
|
||||||
|
chat_str: str = await build_anonymous_messages(messages)
|
||||||
|
if not chat_str.strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
prompt: str = await global_prompt_manager.format_prompt(
|
||||||
|
"extract_jargon_prompt",
|
||||||
|
chat_str=chat_str,
|
||||||
|
)
|
||||||
|
|
||||||
|
response, _ = await self.llm.generate_response_async(prompt, temperature=0.2)
|
||||||
|
if not response:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"jargon提取提示词: {prompt}")
|
||||||
|
logger.info(f"jargon提取结果: {response}")
|
||||||
|
|
||||||
|
# 解析为JSON
|
||||||
|
entries: List[dict] = []
|
||||||
|
try:
|
||||||
|
resp = response.strip()
|
||||||
|
parsed = None
|
||||||
|
if resp.startswith("[") and resp.endswith("]"):
|
||||||
|
parsed = json.loads(resp)
|
||||||
|
else:
|
||||||
|
repaired = repair_json(resp)
|
||||||
|
if isinstance(repaired, str):
|
||||||
|
parsed = json.loads(repaired)
|
||||||
|
else:
|
||||||
|
parsed = repaired
|
||||||
|
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
parsed = [parsed]
|
||||||
|
|
||||||
|
if not isinstance(parsed, list):
|
||||||
|
return
|
||||||
|
|
||||||
|
for item in parsed:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
content = str(item.get("content", "")).strip()
|
||||||
|
raw_content_value = item.get("raw_content", "")
|
||||||
|
|
||||||
|
# 处理raw_content:可能是字符串或列表
|
||||||
|
raw_content_list = []
|
||||||
|
if isinstance(raw_content_value, list):
|
||||||
|
raw_content_list = [str(rc).strip() for rc in raw_content_value if str(rc).strip()]
|
||||||
|
elif isinstance(raw_content_value, str):
|
||||||
|
raw_content_str = raw_content_value.strip()
|
||||||
|
if raw_content_str:
|
||||||
|
raw_content_list = [raw_content_str]
|
||||||
|
|
||||||
|
type_str = str(item.get("type", "")).strip().lower()
|
||||||
|
|
||||||
|
# 验证type是否为有效值
|
||||||
|
if type_str not in ["p", "c", "e"]:
|
||||||
|
type_str = "p" # 默认值
|
||||||
|
|
||||||
|
if content and raw_content_list:
|
||||||
|
entries.append({
|
||||||
|
"content": content,
|
||||||
|
"raw_content": raw_content_list,
|
||||||
|
"type": type_str
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"解析jargon JSON失败: {e}; 原始: {response}")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
return
|
||||||
|
|
||||||
|
# 去重并写入DB(按 chat_id + content 去重)
|
||||||
|
# 使用content作为去重键
|
||||||
|
seen = set()
|
||||||
|
uniq_entries = []
|
||||||
|
for entry in entries:
|
||||||
|
content_key = entry["content"]
|
||||||
|
if content_key not in seen:
|
||||||
|
seen.add(content_key)
|
||||||
|
uniq_entries.append(entry)
|
||||||
|
|
||||||
|
saved = 0
|
||||||
|
updated = 0
|
||||||
|
merged = 0
|
||||||
|
for entry in uniq_entries:
|
||||||
|
content = entry["content"]
|
||||||
|
raw_content_list = entry["raw_content"] # 已经是列表
|
||||||
|
type_str = entry["type"]
|
||||||
|
try:
|
||||||
|
# 步骤1: 检查同chat_id的记录,默认纳入global项目
|
||||||
|
# 查询条件:chat_id匹配 OR (is_global为True且content匹配)
|
||||||
|
query = (
|
||||||
|
Jargon.select()
|
||||||
|
.where(
|
||||||
|
((Jargon.chat_id == self.chat_id) | Jargon.is_global) &
|
||||||
|
(Jargon.content == content)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if query.exists():
|
||||||
|
obj = query.get()
|
||||||
|
try:
|
||||||
|
obj.count = (obj.count or 0) + 1
|
||||||
|
except Exception:
|
||||||
|
obj.count = 1
|
||||||
|
|
||||||
|
# 合并raw_content列表:读取现有列表,追加新值,去重
|
||||||
|
existing_raw_content = []
|
||||||
|
if obj.raw_content:
|
||||||
|
try:
|
||||||
|
existing_raw_content = json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content
|
||||||
|
if not isinstance(existing_raw_content, list):
|
||||||
|
existing_raw_content = [existing_raw_content] if existing_raw_content else []
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
existing_raw_content = [obj.raw_content] if obj.raw_content else []
|
||||||
|
|
||||||
|
# 合并并去重
|
||||||
|
merged_list = list(dict.fromkeys(existing_raw_content + raw_content_list))
|
||||||
|
obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
|
||||||
|
|
||||||
|
# 更新type(如果为空)
|
||||||
|
if type_str and not obj.type:
|
||||||
|
obj.type = type_str
|
||||||
|
obj.save()
|
||||||
|
|
||||||
|
# 检查是否需要推断(达到阈值且超过上次判定值)
|
||||||
|
if _should_infer_meaning(obj):
|
||||||
|
# 异步触发推断,不阻塞主流程
|
||||||
|
# 重新加载对象以确保数据最新
|
||||||
|
jargon_id = obj.id
|
||||||
|
asyncio.create_task(self._infer_meaning_by_id(jargon_id))
|
||||||
|
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
# 步骤2: 同chat_id没有找到,检查所有chat_id中是否有相同content的记录
|
||||||
|
# 查询所有非global的记录(global的已经在步骤1检查过了)
|
||||||
|
all_content_query = (
|
||||||
|
Jargon.select()
|
||||||
|
.where(
|
||||||
|
(Jargon.content == content) &
|
||||||
|
(~Jargon.is_global)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
all_matching = list(all_content_query)
|
||||||
|
|
||||||
|
# 如果找到3个或更多相同content的记录,合并它们
|
||||||
|
if len(all_matching) >= 3:
|
||||||
|
# 找到3个或更多已有记录,合并它们(新条目也会被包含在合并中)
|
||||||
|
total_count = sum((obj.count or 0) for obj in all_matching) + 1 # +1 是因为当前新条目
|
||||||
|
|
||||||
|
# 合并所有raw_content列表
|
||||||
|
all_raw_content = []
|
||||||
|
for obj in all_matching:
|
||||||
|
if obj.raw_content:
|
||||||
|
try:
|
||||||
|
obj_raw = json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content
|
||||||
|
if not isinstance(obj_raw, list):
|
||||||
|
obj_raw = [obj_raw] if obj_raw else []
|
||||||
|
all_raw_content.extend(obj_raw)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
if obj.raw_content:
|
||||||
|
all_raw_content.append(obj.raw_content)
|
||||||
|
|
||||||
|
# 添加当前新条目的raw_content
|
||||||
|
all_raw_content.extend(raw_content_list)
|
||||||
|
# 去重
|
||||||
|
merged_raw_content = list(dict.fromkeys(all_raw_content))
|
||||||
|
|
||||||
|
# 合并type:优先使用非空的值
|
||||||
|
merged_type = type_str
|
||||||
|
for obj in all_matching:
|
||||||
|
if obj.type and not merged_type:
|
||||||
|
merged_type = obj.type
|
||||||
|
break
|
||||||
|
|
||||||
|
# 合并其他字段:优先使用已有值
|
||||||
|
merged_meaning = None
|
||||||
|
merged_translation = None
|
||||||
|
merged_is_jargon = None
|
||||||
|
merged_last_inference_count = None
|
||||||
|
merged_is_complete = False
|
||||||
|
|
||||||
|
for obj in all_matching:
|
||||||
|
if obj.meaning and not merged_meaning:
|
||||||
|
merged_meaning = obj.meaning
|
||||||
|
if obj.translation and not merged_translation:
|
||||||
|
merged_translation = obj.translation
|
||||||
|
if obj.is_jargon is not None and merged_is_jargon is None:
|
||||||
|
merged_is_jargon = obj.is_jargon
|
||||||
|
if obj.last_inference_count is not None and merged_last_inference_count is None:
|
||||||
|
merged_last_inference_count = obj.last_inference_count
|
||||||
|
if obj.is_complete:
|
||||||
|
merged_is_complete = True
|
||||||
|
|
||||||
|
# 删除旧的记录
|
||||||
|
for obj in all_matching:
|
||||||
|
obj.delete_instance()
|
||||||
|
|
||||||
|
# 创建新的global记录
|
||||||
|
Jargon.create(
|
||||||
|
content=content,
|
||||||
|
raw_content=json.dumps(merged_raw_content, ensure_ascii=False),
|
||||||
|
type=merged_type,
|
||||||
|
chat_id="global",
|
||||||
|
is_global=True,
|
||||||
|
count=total_count,
|
||||||
|
meaning=merged_meaning,
|
||||||
|
translation=merged_translation,
|
||||||
|
is_jargon=merged_is_jargon,
|
||||||
|
last_inference_count=merged_last_inference_count,
|
||||||
|
is_complete=merged_is_complete
|
||||||
|
)
|
||||||
|
merged += 1
|
||||||
|
logger.info(f"合并jargon为global: content={content}, 合并了{len(all_matching)}条已有记录+1条新记录(共{len(all_matching)+1}条),总count={total_count}")
|
||||||
|
else:
|
||||||
|
# 找到少于3个已有记录,正常创建新记录
|
||||||
|
Jargon.create(
|
||||||
|
content=content,
|
||||||
|
raw_content=json.dumps(raw_content_list, ensure_ascii=False),
|
||||||
|
type=type_str,
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
is_global=False,
|
||||||
|
count=1
|
||||||
|
)
|
||||||
|
saved += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"保存jargon失败: chat_id={self.chat_id}, content={content}, err={e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if saved or updated or merged:
|
||||||
|
logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated} 条,合并为global {merged} 条,chat_id={self.chat_id}")
|
||||||
|
# 更新为本次提取的结束时间,确保不会重复提取相同的消息窗口
|
||||||
|
self.last_learning_time = extraction_end_time
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"JargonMiner 运行失败: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class JargonMinerManager:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._miners: dict[str, JargonMiner] = {}
|
||||||
|
|
||||||
|
def get_miner(self, chat_id: str) -> JargonMiner:
|
||||||
|
if chat_id not in self._miners:
|
||||||
|
self._miners[chat_id] = JargonMiner(chat_id)
|
||||||
|
return self._miners[chat_id]
|
||||||
|
|
||||||
|
|
||||||
|
miner_manager = JargonMinerManager()
|
||||||
|
|
||||||
|
|
||||||
|
async def extract_and_store_jargon(chat_id: str) -> None:
|
||||||
|
miner = miner_manager.get_miner(chat_id)
|
||||||
|
await miner.run_once()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -444,7 +444,7 @@ def _default_normal_response_parser(
|
||||||
choice0 = resp.choices[0]
|
choice0 = resp.choices[0]
|
||||||
reason = getattr(choice0, "finish_reason", None)
|
reason = getattr(choice0, "finish_reason", None)
|
||||||
if reason and reason == "length":
|
if reason and reason == "length":
|
||||||
print(resp)
|
# print(resp)
|
||||||
_model_name = resp.model
|
_model_name = resp.model
|
||||||
# 统一日志格式
|
# 统一日志格式
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,8 @@
|
||||||
import time
|
import time
|
||||||
import asyncio
|
from typing import List, Optional
|
||||||
from typing import List, Optional, Tuple
|
|
||||||
from src.common.logger import get_logger
|
from src.common.logger import get_logger
|
||||||
from src.chat.utils.chat_message_builder import (
|
from src.chat.utils.chat_message_builder import (
|
||||||
get_raw_msg_before_timestamp_with_chat,
|
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||||
build_readable_messages_with_id,
|
build_readable_messages_with_id,
|
||||||
)
|
)
|
||||||
from src.llm_models.utils_model import LLMRequest
|
from src.llm_models.utils_model import LLMRequest
|
||||||
|
|
@ -25,7 +24,21 @@ class CuriousDetector:
|
||||||
model_set=model_config.model_task_config.utils,
|
model_set=model_config.model_task_config.utils,
|
||||||
request_type="curious_detector",
|
request_type="curious_detector",
|
||||||
)
|
)
|
||||||
|
# 触发控制
|
||||||
|
self.last_detection_time: float = time.time()
|
||||||
|
self.min_interval_seconds: float = 60.0
|
||||||
|
self.min_messages: int = 20
|
||||||
|
|
||||||
|
def should_trigger(self) -> bool:
|
||||||
|
if time.time() - self.last_detection_time < self.min_interval_seconds:
|
||||||
|
return False
|
||||||
|
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
timestamp_start=self.last_detection_time,
|
||||||
|
timestamp_end=time.time(),
|
||||||
|
)
|
||||||
|
return bool(recent_messages and len(recent_messages) >= self.min_messages)
|
||||||
|
|
||||||
async def detect_questions(self, recent_messages: List) -> Optional[str]:
|
async def detect_questions(self, recent_messages: List) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
检测最近消息中是否有需要提问的内容
|
检测最近消息中是否有需要提问的内容
|
||||||
|
|
@ -91,6 +104,9 @@ class CuriousDetector:
|
||||||
|
|
||||||
result_text, _ = await self.llm_request.generate_response_async(prompt, temperature=0.3)
|
result_text, _ = await self.llm_request.generate_response_async(prompt, temperature=0.3)
|
||||||
|
|
||||||
|
logger.info(f"好奇心检测提示词: {prompt}")
|
||||||
|
logger.info(f"好奇心检测结果: {result_text}")
|
||||||
|
|
||||||
if not result_text:
|
if not result_text:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -154,7 +170,20 @@ class CuriousDetector:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
class CuriousManager:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._detectors: dict[str, CuriousDetector] = {}
|
||||||
|
|
||||||
|
def get_detector(self, chat_id: str) -> CuriousDetector:
|
||||||
|
if chat_id not in self._detectors:
|
||||||
|
self._detectors[chat_id] = CuriousDetector(chat_id)
|
||||||
|
return self._detectors[chat_id]
|
||||||
|
|
||||||
|
|
||||||
|
curious_manager = CuriousManager()
|
||||||
|
|
||||||
|
|
||||||
|
async def check_and_make_question(chat_id: str) -> bool:
|
||||||
"""
|
"""
|
||||||
检查聊天记录并生成问题(如果检测到需要提问的内容)
|
检查聊天记录并生成问题(如果检测到需要提问的内容)
|
||||||
|
|
||||||
|
|
@ -166,8 +195,20 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
||||||
bool: 是否检测到并记录了问题
|
bool: 是否检测到并记录了问题
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
detector = CuriousDetector(chat_id)
|
detector = curious_manager.get_detector(chat_id)
|
||||||
|
if not detector.should_trigger():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 拉取窗口内消息
|
||||||
|
recent_messages = get_raw_msg_by_timestamp_with_chat_inclusive(
|
||||||
|
chat_id=chat_id,
|
||||||
|
timestamp_start=detector.last_detection_time,
|
||||||
|
timestamp_end=time.time(),
|
||||||
|
limit=80,
|
||||||
|
)
|
||||||
|
if not recent_messages:
|
||||||
|
return False
|
||||||
|
|
||||||
# 检测是否需要提问
|
# 检测是否需要提问
|
||||||
question = await detector.detect_questions(recent_messages)
|
question = await detector.detect_questions(recent_messages)
|
||||||
|
|
||||||
|
|
@ -176,6 +217,7 @@ async def check_and_make_question(chat_id: str, recent_messages: List) -> bool:
|
||||||
success = await detector.make_question_from_detection(question)
|
success = await detector.make_question_from_detection(question)
|
||||||
if success:
|
if success:
|
||||||
logger.info(f"成功检测并记录问题: {question}")
|
logger.info(f"成功检测并记录问题: {question}")
|
||||||
|
detector.last_detection_time = time.time()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ from .apis import (
|
||||||
person_api,
|
person_api,
|
||||||
plugin_manage_api,
|
plugin_manage_api,
|
||||||
send_api,
|
send_api,
|
||||||
|
auto_talk_api,
|
||||||
register_plugin,
|
register_plugin,
|
||||||
get_logger,
|
get_logger,
|
||||||
)
|
)
|
||||||
|
|
@ -83,6 +84,7 @@ __all__ = [
|
||||||
"person_api",
|
"person_api",
|
||||||
"plugin_manage_api",
|
"plugin_manage_api",
|
||||||
"send_api",
|
"send_api",
|
||||||
|
"auto_talk_api",
|
||||||
"register_plugin",
|
"register_plugin",
|
||||||
"get_logger",
|
"get_logger",
|
||||||
# 基础类
|
# 基础类
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ from src.plugin_system.apis import (
|
||||||
tool_api,
|
tool_api,
|
||||||
frequency_api,
|
frequency_api,
|
||||||
mood_api,
|
mood_api,
|
||||||
|
auto_talk_api,
|
||||||
)
|
)
|
||||||
from .logging_api import get_logger
|
from .logging_api import get_logger
|
||||||
from .plugin_register_api import register_plugin
|
from .plugin_register_api import register_plugin
|
||||||
|
|
@ -42,4 +43,5 @@ __all__ = [
|
||||||
"tool_api",
|
"tool_api",
|
||||||
"frequency_api",
|
"frequency_api",
|
||||||
"mood_api",
|
"mood_api",
|
||||||
|
"auto_talk_api",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
|
||||||
|
logger = get_logger("auto_talk_api")
|
||||||
|
|
||||||
|
|
||||||
|
def set_question_probability_multiplier(chat_id: str, multiplier: float) -> bool:
|
||||||
|
"""
|
||||||
|
设置指定 chat_id 的主动发言概率乘数。
|
||||||
|
|
||||||
|
返回:
|
||||||
|
bool: 设置是否成功。仅当目标聊天为群聊(HeartFChatting)且存在时为 True。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if not isinstance(chat_id, str):
|
||||||
|
raise TypeError("chat_id 必须是 str")
|
||||||
|
if not isinstance(multiplier, (int, float)):
|
||||||
|
raise TypeError("multiplier 必须是数值类型")
|
||||||
|
|
||||||
|
# 延迟导入以避免循环依赖
|
||||||
|
from src.chat.heart_flow.heartflow import heartflow as _heartflow
|
||||||
|
|
||||||
|
chat = _heartflow.heartflow_chat_list.get(chat_id)
|
||||||
|
if chat is None:
|
||||||
|
logger.warning(f"未找到 chat_id={chat_id} 的心流实例,无法设置乘数")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 仅对拥有该属性的群聊心流生效(鸭子类型,避免导入类)
|
||||||
|
if not hasattr(chat, "question_probability_multiplier"):
|
||||||
|
logger.warning(f"chat_id={chat_id} 实例不支持主动发言乘数设置")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 约束:不允许负值
|
||||||
|
value = float(multiplier)
|
||||||
|
if value < 0:
|
||||||
|
value = 0.0
|
||||||
|
|
||||||
|
chat.question_probability_multiplier = value
|
||||||
|
logger.info(f"[auto_talk_api] chat_id={chat_id} 主动发言乘数已设为 {value}")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"设置主动发言乘数失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_question_probability_multiplier(chat_id: str) -> float:
|
||||||
|
"""获取指定 chat_id 的主动发言概率乘数,未找到则返回 0。"""
|
||||||
|
try:
|
||||||
|
# 延迟导入以避免循环依赖
|
||||||
|
from src.chat.heart_flow.heartflow import heartflow as _heartflow
|
||||||
|
|
||||||
|
chat = _heartflow.heartflow_chat_list.get(chat_id)
|
||||||
|
if chat is None:
|
||||||
|
return 0.0
|
||||||
|
return float(getattr(chat, "question_probability_multiplier", 0.0))
|
||||||
|
except Exception:
|
||||||
|
return 0.0
|
||||||
|
|
@ -57,7 +57,7 @@ class BaseTool(ABC):
|
||||||
Returns:
|
Returns:
|
||||||
dict: 工具定义字典
|
dict: 工具定义字典
|
||||||
"""
|
"""
|
||||||
if not cls.name or not cls.description or not cls.parameters:
|
if not cls.name or not cls.description or cls.parameters is None:
|
||||||
raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性")
|
raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性")
|
||||||
|
|
||||||
return {"name": cls.name, "description": cls.description, "parameters": cls.parameters}
|
return {"name": cls.name, "description": cls.description, "parameters": cls.parameters}
|
||||||
|
|
@ -65,7 +65,7 @@ class BaseTool(ABC):
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_tool_info(cls) -> ToolInfo:
|
def get_tool_info(cls) -> ToolInfo:
|
||||||
"""获取工具信息"""
|
"""获取工具信息"""
|
||||||
if not cls.name or not cls.description or not cls.parameters:
|
if not cls.name or not cls.description or cls.parameters is None:
|
||||||
raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性")
|
raise NotImplementedError(f"工具类 {cls.__name__} 必须定义 name, description 和 parameters 属性")
|
||||||
|
|
||||||
return ToolInfo(
|
return ToolInfo(
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,36 @@
|
||||||
|
{
|
||||||
|
"manifest_version": 1,
|
||||||
|
"name": "Jargon插件",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "记录和管理jargon(黑话/俚语)的解释",
|
||||||
|
"author": {
|
||||||
|
"name": "Mai",
|
||||||
|
"url": "https://github.com/MaiM-with-u"
|
||||||
|
},
|
||||||
|
"license": "GPL-v3.0-or-later",
|
||||||
|
|
||||||
|
"host_application": {
|
||||||
|
"min_version": "0.10.4"
|
||||||
|
},
|
||||||
|
"homepage_url": "https://github.com/MaiM-with-u/maibot",
|
||||||
|
"repository_url": "https://github.com/MaiM-with-u/maibot",
|
||||||
|
"keywords": ["jargon", "slang", "built-in"],
|
||||||
|
"categories": ["Jargon"],
|
||||||
|
|
||||||
|
"default_locale": "zh-CN",
|
||||||
|
"locales_path": "_locales",
|
||||||
|
|
||||||
|
"plugin_info": {
|
||||||
|
"is_built_in": true,
|
||||||
|
"plugin_type": "tool_provider",
|
||||||
|
"components": [
|
||||||
|
{
|
||||||
|
"type": "record_jargon_explanation",
|
||||||
|
"name": "record_jargon_explanation",
|
||||||
|
"description": "记录聊天中明确解释的jargon词义"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,180 @@
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
from src.common.database.database_model import Jargon
|
||||||
|
from src.plugin_system import BaseTool, ToolParamType
|
||||||
|
|
||||||
|
logger = get_logger("jargon_explanation")
|
||||||
|
|
||||||
|
|
||||||
|
class RecordJargonExplanationTool(BaseTool):
|
||||||
|
"""记录jargon解释工具
|
||||||
|
|
||||||
|
检测聊天记录中是否有对某个词义的明确解释,如果有则记录到jargon表中
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "record_explanation"
|
||||||
|
description: str = (
|
||||||
|
"当检测到有人明确解释了某个缩写,拼音缩写,中文缩写,英文缩写的含义时(例如:'xxx是yyy的意思'、'xxx指的是yyy'等)"
|
||||||
|
"当某人明确纠正了对某个词汇的错误解释时(例如:'xxx不是yyy的意思'、'xxx不是指的是yyy'等)"
|
||||||
|
)
|
||||||
|
parameters: List[Tuple[str, ToolParamType, str, bool, None]] = [
|
||||||
|
("content", ToolParamType.STRING, "被解释的目标词汇(黑话/俚语/缩写),例如:yyds、内卷、社死等", True, None),
|
||||||
|
("translation", ToolParamType.STRING, "词汇的翻译或简称,例如:永远的神、社会性死亡等", True, None),
|
||||||
|
("meaning", ToolParamType.STRING, "词汇的详细含义说明", True, None),
|
||||||
|
]
|
||||||
|
available_for_llm: bool = True
|
||||||
|
|
||||||
|
async def execute(self, function_args: Dict[str, Any]) -> Dict[str, str]:
|
||||||
|
"""执行jargon解释检测和记录
|
||||||
|
|
||||||
|
Args:
|
||||||
|
function_args: 工具参数,包含content、translation、meaning
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: 工具执行结果
|
||||||
|
"""
|
||||||
|
if not self.chat_id:
|
||||||
|
return {"name": self.name, "content": "无法记录jargon解释:缺少chat_id"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 从参数中获取信息
|
||||||
|
content = str(function_args.get("content", "")).strip()
|
||||||
|
translation = str(function_args.get("translation", "")).strip()
|
||||||
|
meaning = str(function_args.get("meaning", "")).strip()
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return {"name": self.name, "content": "目标词汇不能为空"}
|
||||||
|
|
||||||
|
if not translation and not meaning:
|
||||||
|
return {"name": self.name, "content": "翻译和含义至少需要提供一个"}
|
||||||
|
|
||||||
|
# 检查是否已存在相同的jargon
|
||||||
|
query = Jargon.select().where(
|
||||||
|
(Jargon.chat_id == self.chat_id) &
|
||||||
|
(Jargon.content == content)
|
||||||
|
)
|
||||||
|
|
||||||
|
if query.exists():
|
||||||
|
# 已存在,更新translation和meaning(追加,用/分隔)
|
||||||
|
obj = query.get()
|
||||||
|
existing_translation = obj.translation or ""
|
||||||
|
existing_meaning = obj.meaning or ""
|
||||||
|
|
||||||
|
# 追加新内容
|
||||||
|
if translation:
|
||||||
|
if existing_translation:
|
||||||
|
obj.translation = f"{existing_translation}/{translation}"
|
||||||
|
else:
|
||||||
|
obj.translation = translation
|
||||||
|
|
||||||
|
if meaning:
|
||||||
|
if existing_meaning:
|
||||||
|
obj.meaning = f"{existing_meaning}/{meaning}"
|
||||||
|
else:
|
||||||
|
obj.meaning = meaning
|
||||||
|
|
||||||
|
# 确保is_jargon为True
|
||||||
|
obj.is_jargon = True
|
||||||
|
obj.save()
|
||||||
|
|
||||||
|
logger.info(f"更新jargon解释: {content}, translation={obj.translation}, meaning={obj.meaning}")
|
||||||
|
# 优先使用meaning,如果没有则使用translation
|
||||||
|
explanation = obj.meaning or obj.translation or ""
|
||||||
|
return {"name": self.name, "content": f"你了解到 {content}的含义应该是 {explanation}"}
|
||||||
|
else:
|
||||||
|
# 新建记录
|
||||||
|
Jargon.create(
|
||||||
|
content=content,
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
translation=translation,
|
||||||
|
meaning=meaning,
|
||||||
|
is_jargon=True,
|
||||||
|
is_global=False,
|
||||||
|
count=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"记录新jargon解释: {content}, translation={translation}, meaning={meaning}")
|
||||||
|
# 优先使用meaning,如果没有则使用translation
|
||||||
|
explanation = meaning or translation or ""
|
||||||
|
return {"name": self.name, "content": f"你了解到 {content}的含义应该是 {explanation}"}
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"记录jargon解释失败: {exc}", exc_info=True)
|
||||||
|
return {"name": self.name, "content": f"记录jargon解释失败: {exc}"}
|
||||||
|
|
||||||
|
|
||||||
|
class LookupJargonMeaningTool(BaseTool):
|
||||||
|
"""查询jargon含义工具
|
||||||
|
|
||||||
|
输入一个可能意义不明的词或缩写,查询数据库中是否已有匹配且带有含义或翻译的记录。
|
||||||
|
命中则返回解释字符串(优先meaning,其次translation),未命中返回空字符串。
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "lookup_jargon_meaning"
|
||||||
|
description: str = (
|
||||||
|
"查询是否存在已知的jargon解释(含meaning或translation),若存在返回解释,否则返回空字符串"
|
||||||
|
)
|
||||||
|
parameters: List[Tuple[str, ToolParamType, str, bool, None]] = [
|
||||||
|
("content", ToolParamType.STRING, "待查询的目标词汇(黑话/俚语/缩写)", True, None),
|
||||||
|
]
|
||||||
|
available_for_llm: bool = True
|
||||||
|
|
||||||
|
async def execute(self, function_args: Dict[str, Any]) -> Dict[str, str]:
|
||||||
|
if not self.chat_id:
|
||||||
|
# 和其它工具保持一致的返回结构
|
||||||
|
return {"name": self.name, "content": ""}
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = str(function_args.get("content", "")).strip()
|
||||||
|
if not content:
|
||||||
|
return {"name": self.name, "content": ""}
|
||||||
|
|
||||||
|
# 优先在当前会话或global中查找该content,且需要meaning或translation非空
|
||||||
|
# Peewee 条件:
|
||||||
|
# (content == 输入) AND ((chat_id == 当前chat) OR is_global) AND ((meaning非空) OR (translation非空))
|
||||||
|
candidates = (
|
||||||
|
Jargon.select()
|
||||||
|
.where(
|
||||||
|
(Jargon.content == content)
|
||||||
|
& ((Jargon.chat_id == self.chat_id) | Jargon.is_global)
|
||||||
|
& (
|
||||||
|
((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
|
||||||
|
| ((Jargon.translation.is_null(False)) & (Jargon.translation != ""))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
|
||||||
|
if candidates.exists():
|
||||||
|
obj = candidates.get()
|
||||||
|
translation = (obj.translation or "").strip()
|
||||||
|
meaning = (obj.meaning or "").strip()
|
||||||
|
formatted = f"“{content}可能为黑话或者网络简写,翻译为:{translation},含义为:{meaning}”"
|
||||||
|
return {"name": self.name, "content": formatted}
|
||||||
|
|
||||||
|
# 未命中:允许退化为全库搜索(不限chat_id),以提升命中率
|
||||||
|
fallback = (
|
||||||
|
Jargon.select()
|
||||||
|
.where(
|
||||||
|
(Jargon.content == content)
|
||||||
|
& (
|
||||||
|
((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
|
||||||
|
| ((Jargon.translation.is_null(False)) & (Jargon.translation != ""))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.limit(1)
|
||||||
|
)
|
||||||
|
if fallback.exists():
|
||||||
|
obj = fallback.get()
|
||||||
|
translation = (obj.translation or "").strip()
|
||||||
|
meaning = (obj.meaning or "").strip()
|
||||||
|
formatted = f"“{content}可能为黑话或者网络简写,翻译为:{translation},含义为:{meaning}”"
|
||||||
|
return {"name": self.name, "content": formatted}
|
||||||
|
|
||||||
|
# 彻底未命中
|
||||||
|
return {"name": self.name, "content": ""}
|
||||||
|
except Exception as exc:
|
||||||
|
logger.error(f"查询jargon解释失败: {exc}", exc_info=True)
|
||||||
|
return {"name": self.name, "content": ""}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,56 @@
|
||||||
|
from typing import List, Tuple, Type
|
||||||
|
|
||||||
|
# 导入新插件系统
|
||||||
|
from src.plugin_system import BasePlugin, ComponentInfo, register_plugin
|
||||||
|
from src.plugin_system.base.config_types import ConfigField
|
||||||
|
|
||||||
|
# 导入依赖的系统组件
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
|
||||||
|
from src.plugins.built_in.jargon.jargon_explanation import RecordJargonExplanationTool, LookupJargonMeaningTool
|
||||||
|
|
||||||
|
logger = get_logger("jargon_plugin")
|
||||||
|
|
||||||
|
|
||||||
|
@register_plugin
|
||||||
|
class JargonPlugin(BasePlugin):
|
||||||
|
"""Jargon插件
|
||||||
|
|
||||||
|
系统内置插件,提供jargon相关的功能:
|
||||||
|
- RecordJargonExplanation: 记录聊天中明确解释的jargon词义
|
||||||
|
- LookupJargonMeaning: 查询未知词是否已有解释
|
||||||
|
|
||||||
|
注意:插件基本信息优先从_manifest.json文件中读取
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 插件基本信息
|
||||||
|
plugin_name: str = "jargon" # 内部标识符
|
||||||
|
enable_plugin: bool = True
|
||||||
|
dependencies: list[str] = [] # 插件依赖列表
|
||||||
|
python_dependencies: list[str] = [] # Python包依赖列表
|
||||||
|
config_file_name: str = "config.toml"
|
||||||
|
|
||||||
|
# 配置节描述
|
||||||
|
config_section_descriptions = {
|
||||||
|
"plugin": "插件启用配置",
|
||||||
|
"components": "核心组件启用配置",
|
||||||
|
}
|
||||||
|
|
||||||
|
# 配置Schema定义
|
||||||
|
config_schema: dict = {
|
||||||
|
"plugin": {
|
||||||
|
"enabled": ConfigField(type=bool, default=True, description="是否启用插件"),
|
||||||
|
"config_version": ConfigField(type=str, default="1.0.0", description="配置文件版本"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_plugin_components(self) -> List[Tuple[ComponentInfo, Type]]:
|
||||||
|
"""返回插件包含的组件列表"""
|
||||||
|
|
||||||
|
# --- 根据配置注册组件 ---
|
||||||
|
components = []
|
||||||
|
components.append((RecordJargonExplanationTool.get_tool_info(), RecordJargonExplanationTool))
|
||||||
|
components.append((LookupJargonMeaningTool.get_tool_info(), LookupJargonMeaningTool))
|
||||||
|
|
||||||
|
return components
|
||||||
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[inner]
|
[inner]
|
||||||
version = "6.19.2"
|
version = "6.20.1"
|
||||||
|
|
||||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||||
#如果你想要修改配置文件,请递增version的值
|
#如果你想要修改配置文件,请递增version的值
|
||||||
|
|
@ -239,7 +239,15 @@ key_file = "" # SSL密钥文件路径,仅在use_wss=true时有效
|
||||||
enable = true
|
enable = true
|
||||||
|
|
||||||
[experimental] #实验性功能
|
[experimental] #实验性功能
|
||||||
none = false # 暂无
|
# 为指定聊天添加额外的prompt配置
|
||||||
|
# 格式: ["platform:id:type:prompt内容", ...]
|
||||||
|
# 示例:
|
||||||
|
# chat_prompts = [
|
||||||
|
# "qq:114514:group:这是一个摄影群,你精通摄影知识",
|
||||||
|
# "qq:19198:group:这是一个二次元交流群",
|
||||||
|
# "qq:114514:private:这是你与好朋友的私聊"
|
||||||
|
# ]
|
||||||
|
chat_prompts = []
|
||||||
|
|
||||||
|
|
||||||
#此系统暂时移除,无效配置
|
#此系统暂时移除,无效配置
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue