黑话提取新模式

pull/1445/head
SengokuCola 2025-12-18 21:01:32 +08:00
parent f5b5c009bf
commit bc2431ce98
8 changed files with 138 additions and 41 deletions

View File

@ -34,7 +34,7 @@ def init_prompt() -> None:
你的名字是{bot_name},现在请你完成两个提取任务
任务1请从上面这段群聊中用户的语言风格和说话方式
1. 只考虑文字不要考虑表情包和图片
2. 不要总结SELF的发言
2. 不要总结SELF的发言因为这是你自己的发言不要重复学习你自己的发言
3. 不要涉及具体的人名也不要涉及具体名词
4. 思考有没有特殊的梗一并总结成语言风格
5. 例子仅供参考请严格根据群聊内容总结!!!

View File

@ -588,12 +588,27 @@ class BrainChatting:
elif action_planner_info.action_type == "reply":
try:
# 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用)
unknown_words = None
if isinstance(action_planner_info.action_data, dict):
uw = action_planner_info.action_data.get("unknown_words")
if isinstance(uw, list):
cleaned_uw: List[str] = []
for item in uw:
if isinstance(item, str):
s = item.strip()
if s:
cleaned_uw.append(s)
if cleaned_uw:
unknown_words = cleaned_uw
success, llm_response = await generator_api.generate_reply(
chat_stream=self.chat_stream,
reply_message=action_planner_info.action_message,
available_actions=available_actions,
chosen_actions=chosen_action_plan_infos,
reply_reason=action_planner_info.reasoning or "",
unknown_words=unknown_words,
enable_tool=global_config.tool.enable_tool,
request_type="replyer",
from_plugin=False,

View File

@ -638,12 +638,27 @@ class HeartFChatting:
action_reasoning=reason,
)
# 从 Planner 的 action_data 中提取未知词语列表(仅在 reply 时使用)
unknown_words = None
if isinstance(action_planner_info.action_data, dict):
uw = action_planner_info.action_data.get("unknown_words")
if isinstance(uw, list):
cleaned_uw: List[str] = []
for item in uw:
if isinstance(item, str):
s = item.strip()
if s:
cleaned_uw.append(s)
if cleaned_uw:
unknown_words = cleaned_uw
success, llm_response = await generator_api.generate_reply(
chat_stream=self.chat_stream,
reply_message=action_planner_info.action_message,
available_actions=available_actions,
chosen_actions=chosen_action_plan_infos,
reply_reason=planner_reasoning,
unknown_words=unknown_words,
enable_tool=global_config.tool.enable_tool,
request_type="replyer",
from_plugin=False,

View File

@ -51,6 +51,8 @@ reply
3.最好一次对一个话题进行回复免得啰嗦或者回复内容太乱
4.不要选择回复你自己发送的消息
5.不要单独对表情包进行回复
6.将上下文中所有含义不明的疑似黑话的缩写词均写入unknown_words中
7.用一句简单的话来描述当前回复场景不超过10个字
{reply_action_example}
no_reply
@ -478,10 +480,19 @@ class ActionPlanner:
name_block = f"你的名字是{bot_name}{bot_nickname},请注意哪些是你自己的发言。"
# 根据 think_mode 配置决定 reply action 的示例 JSON
# 在 JSON 中直接作为 action 参数携带 unknown_words
if global_config.chat.think_mode == "classic":
reply_action_example = '{{"action":"reply", "target_messamge_id":"消息id(m+数字)"}}'
reply_action_example = (
'{{"action":"reply", "target_messamge_id":"消息id(m+数字)", '
'"unknown_words":["词语1","词语2"]}}'
)
else:
reply_action_example = '5.think_level表示思考深度0表示该回复不需要思考和回忆1表示该回复需要进行回忆和思考\n{{"action":"reply", "think_level":数值等级(0或1), "target_messamge_id":"消息id(m+数字)"}}'
reply_action_example = (
"5.think_level表示思考深度0表示该回复不需要思考和回忆1表示该回复需要进行回忆和思考\n"
+ '{{"action":"reply", "think_level":数值等级(0或1), '
'"target_messamge_id":"消息id(m+数字)", '
'"unknown_words":["词语1","词语2"]}}'
)
planner_prompt_template = await global_prompt_manager.get_prompt_async("planner_prompt")
prompt = planner_prompt_template.format(

View File

@ -35,7 +35,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
from src.bw_learner.jargon_explainer import explain_jargon_in_context
from src.bw_learner.jargon_explainer import explain_jargon_in_context, retrieve_concepts_with_jargon
init_lpmm_prompt()
init_replyer_prompt()
@ -73,6 +73,7 @@ class DefaultReplyer:
reply_message: Optional[DatabaseMessages] = None,
reply_time_point: Optional[float] = time.time(),
think_level: int = 1,
unknown_words: Optional[List[str]] = None,
) -> Tuple[bool, LLMGenerationDataModel]:
# sourcery skip: merge-nested-ifs
"""
@ -110,6 +111,7 @@ class DefaultReplyer:
reply_reason=reply_reason,
reply_time_point=reply_time_point,
think_level=think_level,
unknown_words=unknown_words,
)
llm_response.prompt = prompt
llm_response.selected_expressions = selected_expressions
@ -492,6 +494,53 @@ class DefaultReplyer:
"""当关闭黑话解释时使用的占位协程避免额外的LLM调用"""
return ""
async def _build_unknown_words_jargon(self, unknown_words: Optional[List[str]], chat_id: str) -> str:
"""针对 Planner 提供的未知词语列表执行黑话检索"""
if not unknown_words:
return ""
# 清洗未知词语列表,只保留非空字符串
concepts: List[str] = []
for item in unknown_words:
if isinstance(item, str):
s = item.strip()
if s:
concepts.append(s)
if not concepts:
return ""
try:
return await retrieve_concepts_with_jargon(concepts, chat_id)
except Exception as e:
logger.error(f"未知词语黑话检索失败: {e}")
return ""
async def _build_jargon_explanation(
self,
chat_id: str,
messages_short: List[DatabaseMessages],
chat_talking_prompt_short: str,
unknown_words: Optional[List[str]],
) -> str:
"""
统一的黑话解释构建函数
- 根据 enable_jargon_explanation / jargon_mode 决定具体策略
"""
enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True)
if not enable_jargon_explanation:
return ""
jargon_mode = getattr(global_config.expression, "jargon_mode", "context")
# planner 模式:仅使用 Planner 的 unknown_words
if jargon_mode == "planner":
return await self._build_unknown_words_jargon(unknown_words, chat_id)
# 默认 / context 模式:使用上下文自动匹配黑话
try:
return await explain_jargon_in_context(chat_id, messages_short, chat_talking_prompt_short)
except Exception as e:
logger.error(f"上下文黑话解释失败: {e}")
return ""
def build_chat_history_prompts(
self, message_list_before_now: List[DatabaseMessages], target_user_id: str, sender: str
) -> Tuple[str, str]:
@ -676,16 +725,10 @@ class DefaultReplyer:
# 判断是否为群聊
is_group = stream_type == "group"
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
import hashlib
if is_group:
components = [platform, str(id_str)]
else:
components = [platform, str(id_str), "private"]
key = "_".join(components)
chat_id = hashlib.md5(key.encode()).hexdigest()
# 使用 ChatManager 提供的接口生成 chat_id避免在此重复实现逻辑
from src.chat.message_receive.chat_stream import get_chat_manager
chat_id = get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
return chat_id, prompt_content
except (ValueError, IndexError):
@ -739,6 +782,7 @@ class DefaultReplyer:
enable_tool: bool = True,
reply_time_point: Optional[float] = time.time(),
think_level: int = 1,
unknown_words: Optional[List[str]] = None,
) -> Tuple[str, List[int], List[str], str]:
"""
构建回复器上下文
@ -823,14 +867,12 @@ class DefaultReplyer:
show_actions=True,
)
# 根据配置决定是否启用黑话解释
enable_jargon_explanation = getattr(global_config.expression, "enable_jargon_explanation", True)
if enable_jargon_explanation:
jargon_coroutine = explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short)
else:
jargon_coroutine = self._build_disabled_jargon_explanation()
# 统一黑话解释构建:根据配置选择上下文或 Planner 模式
jargon_coroutine = self._build_jargon_explanation(
chat_id, message_list_before_short, chat_talking_prompt_short, unknown_words
)
# 并行执行八个构建任务(包括黑话解释,可配置关闭)
# 并行执行构建任务(包括黑话解释,可配置关闭)
task_results = await asyncio.gather(
self._time_and_run_task(
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level),

View File

@ -135,14 +135,9 @@ class ChatConfig(ConfigBase):
is_group = stream_type == "group"
import hashlib
from src.chat.message_receive.chat_stream import get_chat_manager
if is_group:
components = [platform, str(id_str)]
else:
components = [platform, str(id_str), "private"]
key = "_".join(components)
return hashlib.md5(key.encode()).hexdigest()
return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
except (ValueError, IndexError):
return None
@ -328,6 +323,13 @@ class ExpressionConfig(ConfigBase):
enable_jargon_explanation: bool = True
"""是否在回复前尝试对上下文中的黑话进行解释关闭可减少一次LLM调用仅影响回复前的黑话匹配与解释不影响黑话学习"""
jargon_mode: Literal["context", "planner"] = "context"
"""
黑话解释来源模式
- "context": 使用上下文自动匹配黑话并解释原有模式
- "planner": 仅使用 Planner reply 动作中给出的 unknown_words 列表进行黑话检索
"""
def _parse_stream_config_to_chat_id(self, stream_config_str: str) -> Optional[str]:
"""
解析流配置字符串并生成对应的 chat_id
@ -350,15 +352,10 @@ class ExpressionConfig(ConfigBase):
# 判断是否为群聊
is_group = stream_type == "group"
# 使用与 ChatStream.get_stream_id 相同的逻辑生成 chat_id
import hashlib
# 使用 ChatManager 提供的接口生成 chat_id避免在此重复实现逻辑
from src.chat.message_receive.chat_stream import get_chat_manager
if is_group:
components = [platform, str(id_str)]
else:
components = [platform, str(id_str), "private"]
key = "_".join(components)
return hashlib.md5(key.encode()).hexdigest()
return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group)
except (ValueError, IndexError):
return None

View File

@ -86,6 +86,7 @@ async def generate_reply(
reply_reason: str = "",
available_actions: Optional[Dict[str, ActionInfo]] = None,
chosen_actions: Optional[List["ActionPlannerInfo"]] = None,
unknown_words: Optional[List[str]] = None,
enable_tool: bool = False,
enable_splitter: bool = True,
enable_chinese_typo: bool = True,
@ -104,6 +105,7 @@ async def generate_reply(
reply_reason: 回复原因
available_actions: 可用动作
chosen_actions: 已选动作
unknown_words: Planner reply 动作中给出的未知词语列表用于黑话检索
enable_tool: 是否启用工具调用
enable_splitter: 是否启用消息分割器
enable_chinese_typo: 是否启用错字生成器
@ -123,11 +125,24 @@ async def generate_reply(
logger.error("[GeneratorAPI] 无法获取回复器")
return False, None
if not extra_info and action_data:
extra_info = action_data.get("extra_info", "")
if not reply_reason and action_data:
reply_reason = action_data.get("reason", "")
if action_data:
if not extra_info:
extra_info = action_data.get("extra_info", "")
if not reply_reason:
reply_reason = action_data.get("reason", "")
# 仅在 reply 场景下使用的未知词语解析Planner JSON 中下发)
if unknown_words is None:
uw = action_data.get("unknown_words")
if isinstance(uw, list):
# 只保留非空字符串
cleaned: List[str] = []
for item in uw:
if isinstance(item, str):
s = item.strip()
if s:
cleaned.append(s)
if cleaned:
unknown_words = cleaned
# 调用回复器生成回复
success, llm_response = await replyer.generate_reply_with_context(
@ -137,6 +152,7 @@ async def generate_reply(
enable_tool=enable_tool,
reply_message=reply_message,
reply_reason=reply_reason,
unknown_words=unknown_words,
think_level=think_level,
from_plugin=from_plugin,
stream_id=chat_stream.stream_id if chat_stream else chat_id,

View File

@ -1,5 +1,5 @@
[inner]
version = "7.2.0"
version = "7.2.1"
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
# 如果你想要修改配置文件请递增version的值
@ -94,6 +94,7 @@ allow_reflect = [] # 允许进行表达反思的聊天流ID列表格式["q
all_global_jargon = true # 是否开启全局黑话模式,注意,此功能关闭后,已经记录的全局黑话不会改变,需要手动删除
enable_jargon_explanation = true # 是否在回复前尝试对上下文中的黑话进行解释关闭可减少一次LLM调用仅影响回复前的黑话匹配与解释不影响黑话学习
jargon_mode = "context" # 黑话解释来源模式,可选: "context"(使用上下文自动匹配黑话) 或 "planner"仅使用Planner在reply动作中给出的unknown_words列表
[chat] # 麦麦的聊天设置