mirror of https://github.com/Mai-with-u/MaiBot.git
黑话提取新模式
parent
f5b5c009bf
commit
bc2431ce98
|
|
@ -34,7 +34,7 @@ def init_prompt() -> None:
|
|||
你的名字是{bot_name},现在请你完成两个提取任务
|
||||
任务1:请从上面这段群聊中用户的语言风格和说话方式
|
||||
1. 只考虑文字,不要考虑表情包和图片
|
||||
2. 不要总结SELF的发言
|
||||
2. 不要总结SELF的发言,因为这是你自己的发言,不要重复学习你自己的发言
|
||||
3. 不要涉及具体的人名,也不要涉及具体名词
|
||||
4. 思考有没有特殊的梗,一并总结成语言风格
|
||||
5. 例子仅供参考,请严格根据群聊内容总结!!!
|
||||
|
|
|
|||
|
|
@ -588,12 +588,27 @@ class BrainChatting:
|
|||
|
||||
elif action_planner_info.action_type == "reply":
|
||||
try:
|
||||
# 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用)
|
||||
unknown_words = None
|
||||
if isinstance(action_planner_info.action_data, dict):
|
||||
uw = action_planner_info.action_data.get("unknown_words")
|
||||
if isinstance(uw, list):
|
||||
cleaned_uw: List[str] = []
|
||||
for item in uw:
|
||||
if isinstance(item, str):
|
||||
s = item.strip()
|
||||
if s:
|
||||
cleaned_uw.append(s)
|
||||
if cleaned_uw:
|
||||
unknown_words = cleaned_uw
|
||||
|
||||
success, llm_response = await generator_api.generate_reply(
|
||||
chat_stream=self.chat_stream,
|
||||
reply_message=action_planner_info.action_message,
|
||||
available_actions=available_actions,
|
||||
chosen_actions=chosen_action_plan_infos,
|
||||
reply_reason=action_planner_info.reasoning or "",
|
||||
unknown_words=unknown_words,
|
||||
enable_tool=global_config.tool.enable_tool,
|
||||
request_type="replyer",
|
||||
from_plugin=False,
|
||||
|
|
|
|||
|
|
@ -638,12 +638,27 @@ class HeartFChatting:
|
|||
action_reasoning=reason,
|
||||
)
|
||||
|
||||
# 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用)
|
||||
unknown_words = None
|
||||
if isinstance(action_planner_info.action_data, dict):
|
||||
uw = action_planner_info.action_data.get("unknown_words")
|
||||
if isinstance(uw, list):
|
||||
cleaned_uw: List[str] = []
|
||||
for item in uw:
|
||||
if isinstance(item, str):
|
||||
s = item.strip()
|
||||
if s:
|
||||
cleaned_uw.append(s)
|
||||
if cleaned_uw:
|
||||
unknown_words = cleaned_uw
|
||||
|
||||
success, llm_response = await generator_api.generate_reply(
|
||||
chat_stream=self.chat_stream,
|
||||
reply_message=action_planner_info.action_message,
|
||||
available_actions=available_actions,
|
||||
chosen_actions=chosen_action_plan_infos,
|
||||
reply_reason=planner_reasoning,
|
||||
unknown_words=unknown_words,
|
||||
enable_tool=global_config.tool.enable_tool,
|
||||
request_type="replyer",
|
||||
from_plugin=False,
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ reply
|
|||
3.最好一次对一个话题进行回复,免得啰嗦或者回复内容太乱。
|
||||
4.不要选择回复你自己发送的消息
|
||||
5.不要单独对表情包进行回复
|
||||
6.将上下文中所有含义不明的,疑似黑话的,缩写词均写入unknown_words中
|
||||
7.用一句简单的话来描述当前回复场景,不超过10个字
|
||||
{reply_action_example}
|
||||
|
||||
no_reply
|
||||
|
|
@ -478,10 +480,19 @@ class ActionPlanner:
|
|||
name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。"
|
||||
|
||||
# 根据 think_mode 配置决定 reply action 的示例 JSON
|
||||
# 在 JSON 中直接作为 action 参数携带 unknown_words
|
||||
if global_config.chat.think_mode == "classic":
|
||||
reply_action_example = '{{"action":"reply", "target_messamge_id":"消息id(m+数字)"}}'
|
||||
reply_action_example = (
|
||||
'{{"action":"reply", "target_messamge_id":"消息id(m+数字)", '
|
||||
'"unknown_words":["词语1","词语2"]}}'
|
||||
)
|
||||
else:
|
||||
reply_action_example = '5.think_level表示思考深度,0表示该回复不需要思考和回忆,1表示该回复需要进行回忆和思考\n{{"action":"reply", "think_level":数值等级(0或1), "target_messamge_id":"消息id(m+数字)"}}'
|
||||
reply_action_example = (
|
||||
"5.think_level表示思考深度,0表示该回复不需要思考和回忆,1表示该回复需要进行回忆和思考\n"
|
||||
+ '{{"action":"reply", "think_level":数值等级(0或1), '
|
||||
'"target_messamge_id":"消息id(m+数字)", '
|
||||
'"unknown_words":["词语1","词语2"]}}'
|
||||
)
|
||||
|
||||
planner_prompt_template = await global_prompt_manager.get_prompt_async("planner_prompt")
|
||||
prompt = planner_prompt_template.format(
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
|
|||
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
|
||||
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
|
||||
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
|
||||
from src.bw_learner.jargon_explainer import explain_jargon_in_context
|
||||
from src.bw_learner.jargon_explainer import explain_jargon_in_context, retrieve_concepts_with_jargon
|
||||
|
||||
init_lpmm_prompt()
|
||||
init_replyer_prompt()
|
||||
|
|
@ -73,6 +73,7 @@ class DefaultReplyer:
|
|||
reply_message: Optional[DatabaseMessages] = None,
|
||||
reply_time_point: Optional[float] = time.time(),
|
||||
think_level: int = 1,
|
||||
unknown_words: Optional[List[str]] = None,
|
||||
) -> Tuple[bool, LLMGenerationDataModel]:
|
||||
# sourcery skip: merge-nested-ifs
|
||||
"""
|
||||
|
|
@ -110,6 +111,7 @@ class DefaultReplyer:
|
|||
reply_reason=reply_reason,
|
||||
reply_time_point=reply_time_point,
|
||||
think_level=think_level,
|
||||
unknown_words=unknown_words,
|
||||
)
|
||||
llm_response.prompt = prompt
|
||||
llm_response.selected_expressions = selected_expressions
|
||||
|
|
@ -492,6 +494,53 @@ class DefaultReplyer:
|
|||
"""当关闭黑话解释时使用的占位协程,避免额外的LLM调用"""
|
||||
return ""
|
||||
|
||||
async def _build_unknown_words_jargon(self, unknown_words: Optional[List[str]], chat_id: str) -> str:
|
||||
"""针对 Planner 提供的未知词语列表执行黑话检索"""
|
||||
if not unknown_words:
|
||||
return ""
|
||||
# 清洗未知词语列表,只保留非空字符串
|
||||
concepts: List[str] = []
|
||||
for item in unknown_words:
|
||||
if isinstance(item, str):
|
||||
s = item.strip()
|
||||
if s:
|
||||
concepts.append(s)
|
||||
if not concepts:
|
||||
return ""
|
||||
try:
|
||||
return await retrieve_concepts_with_jargon(concepts, chat_id)
|
||||
except Exception as e:
|
||||
logger.error(f"未知词语黑话检索失败: {e}")
|
||||
return ""
|
||||
|
||||
async def _build_jargon_explanation(
|
||||
self,
|
||||
chat_id: str,
|
||||
messages_short: List[DatabaseMessages],
|
||||
chat_talking_prompt_short: str,
|
||||
unknown_words: Optional[List[str]],
|
||||
) -> str:
|
||||
"""
|
||||
统一的黑话解释构建函数:
|
||||
- 根据 enable_jargon_explanation / jargon_mode 决定具体策略
|
||||
"""
|
||||
enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True)
|
||||
if not enable_jargon_explanation:
|
||||
return ""
|
||||
|
||||
jargon_mode = getattr(global_config.expression, "jargon_mode", "context")
|
||||
|
||||
# planner 模式:仅使用 Planner 的 unknown_words
|
||||
if jargon_mode == "planner":
|
||||
return await self._build_unknown_words_jargon(unknown_words, chat_id)
|
||||
|
||||
# 默认 / context 模式:使用上下文自动匹配黑话
|
||||
try:
|
||||
return await explain_jargon_in_context(chat_id, messages_short, chat_talking_prompt_short)
|
||||
except Exception as e:
|
||||
logger.error(f"上下文黑话解释失败: {e}")
|
||||
return ""
|
||||
|
||||
def build_chat_history_prompts(
|
||||
self, message_list_before_now: List[DatabaseMessages], target_user_id: str, sender: str
|
||||
) -> Tuple[str, str]:
|
||||
|
|
@ -676,16 +725,10 @@ class DefaultReplyer:
|
|||
# 判断是否为群聊
|
||||
is_group = stream_type == "group"
|
||||
|
||||
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
|
||||
import hashlib
|
||||
|
||||
if is_group:
|
||||
components = [platform, str(id_str)]
|
||||
else:
|
||||
components = [platform, str(id_str), "private"]
|
||||
key = "_".join(components)
|
||||
chat_id = hashlib.md5(key.encode()).hexdigest()
|
||||
# 使用 ChatManager 提供的接口生成 chat_id,避免在此重复实现逻辑
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
chat_id = get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
|
||||
return chat_id, prompt_content
|
||||
|
||||
except (ValueError, IndexError):
|
||||
|
|
@ -739,6 +782,7 @@ class DefaultReplyer:
|
|||
enable_tool: bool = True,
|
||||
reply_time_point: Optional[float] = time.time(),
|
||||
think_level: int = 1,
|
||||
unknown_words: Optional[List[str]] = None,
|
||||
) -> Tuple[str, List[int], List[str], str]:
|
||||
"""
|
||||
构建回复器上下文
|
||||
|
|
@ -823,14 +867,12 @@ class DefaultReplyer:
|
|||
show_actions=True,
|
||||
)
|
||||
|
||||
# 根据配置决定是否启用黑话解释
|
||||
enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True)
|
||||
if enable_jargon_explanation:
|
||||
jargon_coroutine = explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short)
|
||||
else:
|
||||
jargon_coroutine = self._build_disabled_jargon_explanation()
|
||||
# 统一黑话解释构建:根据配置选择上下文或 Planner 模式
|
||||
jargon_coroutine = self._build_jargon_explanation(
|
||||
chat_id, message_list_before_short, chat_talking_prompt_short, unknown_words
|
||||
)
|
||||
|
||||
# 并行执行八个构建任务(包括黑话解释,可配置关闭)
|
||||
# 并行执行构建任务(包括黑话解释,可配置关闭)
|
||||
task_results = await asyncio.gather(
|
||||
self._time_and_run_task(
|
||||
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level),
|
||||
|
|
|
|||
|
|
@ -135,14 +135,9 @@ class ChatConfig(ConfigBase):
|
|||
|
||||
is_group = stream_type == "group"
|
||||
|
||||
import hashlib
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
if is_group:
|
||||
components = [platform, str(id_str)]
|
||||
else:
|
||||
components = [platform, str(id_str), "private"]
|
||||
key = "_".join(components)
|
||||
return hashlib.md5(key.encode()).hexdigest()
|
||||
return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
|
||||
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
|
@ -328,6 +323,13 @@ class ExpressionConfig(ConfigBase):
|
|||
enable_jargon_explanation: bool = True
|
||||
"""是否在回复前尝试对上下文中的黑话进行解释(关闭可减少一次LLM调用,仅影响回复前的黑话匹配与解释,不影响黑话学习)"""
|
||||
|
||||
jargon_mode: Literal["context", "planner"] = "context"
|
||||
"""
|
||||
黑话解释来源模式:
|
||||
- "context": 使用上下文自动匹配黑话并解释(原有模式)
|
||||
- "planner": 仅使用 Planner 在 reply 动作中给出的 unknown_words 列表进行黑话检索
|
||||
"""
|
||||
|
||||
def _parse_stream_config_to_chat_id(self, stream_config_str: str) -> Optional[str]:
|
||||
"""
|
||||
解析流配置字符串并生成对应的 chat_id
|
||||
|
|
@ -350,15 +352,10 @@ class ExpressionConfig(ConfigBase):
|
|||
# 判断是否为群聊
|
||||
is_group = stream_type == "group"
|
||||
|
||||
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
|
||||
import hashlib
|
||||
# 使用 ChatManager 提供的接口生成 chat_id,避免在此重复实现逻辑
|
||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||
|
||||
if is_group:
|
||||
components = [platform, str(id_str)]
|
||||
else:
|
||||
components = [platform, str(id_str), "private"]
|
||||
key = "_".join(components)
|
||||
return hashlib.md5(key.encode()).hexdigest()
|
||||
return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
|
||||
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ async def generate_reply(
|
|||
reply_reason: str = "",
|
||||
available_actions: Optional[Dict[str, ActionInfo]] = None,
|
||||
chosen_actions: Optional[List["ActionPlannerInfo"]] = None,
|
||||
unknown_words: Optional[List[str]] = None,
|
||||
enable_tool: bool = False,
|
||||
enable_splitter: bool = True,
|
||||
enable_chinese_typo: bool = True,
|
||||
|
|
@ -104,6 +105,7 @@ async def generate_reply(
|
|||
reply_reason: 回复原因
|
||||
available_actions: 可用动作
|
||||
chosen_actions: 已选动作
|
||||
unknown_words: Planner 在 reply 动作中给出的未知词语列表,用于黑话检索
|
||||
enable_tool: 是否启用工具调用
|
||||
enable_splitter: 是否启用消息分割器
|
||||
enable_chinese_typo: 是否启用错字生成器
|
||||
|
|
@ -123,11 +125,24 @@ async def generate_reply(
|
|||
logger.error("[GeneratorAPI] 无法获取回复器")
|
||||
return False, None
|
||||
|
||||
if not extra_info and action_data:
|
||||
extra_info = action_data.get("extra_info", "")
|
||||
|
||||
if not reply_reason and action_data:
|
||||
reply_reason = action_data.get("reason", "")
|
||||
if action_data:
|
||||
if not extra_info:
|
||||
extra_info = action_data.get("extra_info", "")
|
||||
if not reply_reason:
|
||||
reply_reason = action_data.get("reason", "")
|
||||
# 仅在 reply 场景下使用的未知词语解析(Planner JSON 中下发)
|
||||
if unknown_words is None:
|
||||
uw = action_data.get("unknown_words")
|
||||
if isinstance(uw, list):
|
||||
# 只保留非空字符串
|
||||
cleaned: List[str] = []
|
||||
for item in uw:
|
||||
if isinstance(item, str):
|
||||
s = item.strip()
|
||||
if s:
|
||||
cleaned.append(s)
|
||||
if cleaned:
|
||||
unknown_words = cleaned
|
||||
|
||||
# 调用回复器生成回复
|
||||
success, llm_response = await replyer.generate_reply_with_context(
|
||||
|
|
@ -137,6 +152,7 @@ async def generate_reply(
|
|||
enable_tool=enable_tool,
|
||||
reply_message=reply_message,
|
||||
reply_reason=reply_reason,
|
||||
unknown_words=unknown_words,
|
||||
think_level=think_level,
|
||||
from_plugin=from_plugin,
|
||||
stream_id=chat_stream.stream_id if chat_stream else chat_id,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
[inner]
|
||||
version = "7.2.0"
|
||||
version = "7.2.1"
|
||||
|
||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||
# 如果你想要修改配置文件,请递增version的值
|
||||
|
|
@ -94,6 +94,7 @@ allow_reflect = [] # 允许进行表达反思的聊天流ID列表,格式:["q
|
|||
|
||||
all_global_jargon = true # 是否开启全局黑话模式,注意,此功能关闭后,已经记录的全局黑话不会改变,需要手动删除
|
||||
enable_jargon_explanation = true # 是否在回复前尝试对上下文中的黑话进行解释(关闭可减少一次LLM调用,仅影响回复前的黑话匹配与解释,不影响黑话学习)
|
||||
jargon_mode = "context" # 黑话解释来源模式,可选: "context"(使用上下文自动匹配黑话) 或 "planner"(仅使用Planner在reply动作中给出的unknown_words列表)
|
||||
|
||||
|
||||
[chat] # 麦麦的聊天设置
|
||||
|
|
|
|||
Loading…
Reference in New Issue