mirror of https://github.com/Mai-with-u/MaiBot.git
parent
39ab2b5fab
commit
644d470558
|
|
@ -36,6 +36,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
|
||||||
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
|
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
|
||||||
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
|
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
|
||||||
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
|
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
|
||||||
|
from src.jargon.jargon_explainer import explain_jargon_in_context
|
||||||
|
|
||||||
init_lpmm_prompt()
|
init_lpmm_prompt()
|
||||||
init_replyer_prompt()
|
init_replyer_prompt()
|
||||||
|
|
@ -786,7 +787,7 @@ class DefaultReplyer:
|
||||||
show_actions=True,
|
show_actions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 并行执行七个构建任务
|
# 并行执行八个构建任务(包括黑话解释)
|
||||||
task_results = await asyncio.gather(
|
task_results = await asyncio.gather(
|
||||||
self._time_and_run_task(
|
self._time_and_run_task(
|
||||||
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
|
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
|
||||||
|
|
@ -804,6 +805,10 @@ class DefaultReplyer:
|
||||||
),
|
),
|
||||||
"memory_retrieval",
|
"memory_retrieval",
|
||||||
),
|
),
|
||||||
|
self._time_and_run_task(
|
||||||
|
explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short),
|
||||||
|
"jargon_explanation",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 任务名称中英文映射
|
# 任务名称中英文映射
|
||||||
|
|
@ -816,6 +821,7 @@ class DefaultReplyer:
|
||||||
"personality_prompt": "人格信息",
|
"personality_prompt": "人格信息",
|
||||||
"mood_state_prompt": "情绪状态",
|
"mood_state_prompt": "情绪状态",
|
||||||
"memory_retrieval": "记忆检索",
|
"memory_retrieval": "记忆检索",
|
||||||
|
"jargon_explanation": "黑话解释",
|
||||||
}
|
}
|
||||||
|
|
||||||
# 处理结果
|
# 处理结果
|
||||||
|
|
@ -846,6 +852,7 @@ class DefaultReplyer:
|
||||||
memory_retrieval: str = results_dict["memory_retrieval"]
|
memory_retrieval: str = results_dict["memory_retrieval"]
|
||||||
keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
|
keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
|
||||||
mood_state_prompt: str = results_dict["mood_state_prompt"]
|
mood_state_prompt: str = results_dict["mood_state_prompt"]
|
||||||
|
jargon_explanation: Optional[str] = results_dict.get("jargon_explanation")
|
||||||
|
|
||||||
# 从 chosen_actions 中提取 planner 的整体思考理由
|
# 从 chosen_actions 中提取 planner 的整体思考理由
|
||||||
planner_reasoning = ""
|
planner_reasoning = ""
|
||||||
|
|
@ -896,6 +903,7 @@ class DefaultReplyer:
|
||||||
mood_state=mood_state_prompt,
|
mood_state=mood_state_prompt,
|
||||||
# relation_info_block=relation_info,
|
# relation_info_block=relation_info,
|
||||||
extra_info_block=extra_info_block,
|
extra_info_block=extra_info_block,
|
||||||
|
jargon_explanation=jargon_explanation,
|
||||||
identity=personality_prompt,
|
identity=personality_prompt,
|
||||||
action_descriptions=actions_info,
|
action_descriptions=actions_info,
|
||||||
sender_name=sender,
|
sender_name=sender,
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ from src.chat.replyer.prompt.lpmm_prompt import init_lpmm_prompt
|
||||||
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
|
from src.chat.replyer.prompt.replyer_prompt import init_replyer_prompt
|
||||||
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
|
from src.chat.replyer.prompt.rewrite_prompt import init_rewrite_prompt
|
||||||
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
|
from src.memory_system.memory_retrieval import init_memory_retrieval_prompt, build_memory_retrieval_prompt
|
||||||
|
from src.jargon.jargon_explainer import explain_jargon_in_context
|
||||||
|
|
||||||
init_lpmm_prompt()
|
init_lpmm_prompt()
|
||||||
init_replyer_prompt()
|
init_replyer_prompt()
|
||||||
|
|
@ -706,7 +707,7 @@ class PrivateReplyer:
|
||||||
show_actions=True,
|
show_actions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 并行执行八个构建任务
|
# 并行执行九个构建任务(包括黑话解释)
|
||||||
task_results = await asyncio.gather(
|
task_results = await asyncio.gather(
|
||||||
self._time_and_run_task(
|
self._time_and_run_task(
|
||||||
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
|
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
|
||||||
|
|
@ -725,6 +726,10 @@ class PrivateReplyer:
|
||||||
),
|
),
|
||||||
"memory_retrieval",
|
"memory_retrieval",
|
||||||
),
|
),
|
||||||
|
self._time_and_run_task(
|
||||||
|
explain_jargon_in_context(chat_id, message_list_before_short, chat_talking_prompt_short),
|
||||||
|
"jargon_explanation",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
# 任务名称中英文映射
|
# 任务名称中英文映射
|
||||||
|
|
@ -737,6 +742,7 @@ class PrivateReplyer:
|
||||||
"personality_prompt": "人格信息",
|
"personality_prompt": "人格信息",
|
||||||
"mood_state_prompt": "情绪状态",
|
"mood_state_prompt": "情绪状态",
|
||||||
"memory_retrieval": "记忆检索",
|
"memory_retrieval": "记忆检索",
|
||||||
|
"jargon_explanation": "黑话解释",
|
||||||
}
|
}
|
||||||
|
|
||||||
# 处理结果
|
# 处理结果
|
||||||
|
|
@ -767,6 +773,7 @@ class PrivateReplyer:
|
||||||
mood_state_prompt: str = results_dict["mood_state_prompt"]
|
mood_state_prompt: str = results_dict["mood_state_prompt"]
|
||||||
memory_retrieval: str = results_dict["memory_retrieval"]
|
memory_retrieval: str = results_dict["memory_retrieval"]
|
||||||
keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
|
keywords_reaction_prompt = await self.build_keywords_reaction_prompt(target)
|
||||||
|
jargon_explanation: Optional[str] = results_dict.get("jargon_explanation")
|
||||||
|
|
||||||
# 从 chosen_actions 中提取 planner 的整体思考理由
|
# 从 chosen_actions 中提取 planner 的整体思考理由
|
||||||
planner_reasoning = ""
|
planner_reasoning = ""
|
||||||
|
|
@ -813,6 +820,7 @@ class PrivateReplyer:
|
||||||
identity=personality_prompt,
|
identity=personality_prompt,
|
||||||
action_descriptions=actions_info,
|
action_descriptions=actions_info,
|
||||||
dialogue_prompt=dialogue_prompt,
|
dialogue_prompt=dialogue_prompt,
|
||||||
|
jargon_explanation=jargon_explanation,
|
||||||
time_block=time_block,
|
time_block=time_block,
|
||||||
target=target,
|
target=target,
|
||||||
reason=reply_reason,
|
reason=reply_reason,
|
||||||
|
|
@ -835,6 +843,7 @@ class PrivateReplyer:
|
||||||
identity=personality_prompt,
|
identity=personality_prompt,
|
||||||
action_descriptions=actions_info,
|
action_descriptions=actions_info,
|
||||||
dialogue_prompt=dialogue_prompt,
|
dialogue_prompt=dialogue_prompt,
|
||||||
|
jargon_explanation=jargon_explanation,
|
||||||
time_block=time_block,
|
time_block=time_block,
|
||||||
reply_target_block=reply_target_block,
|
reply_target_block=reply_target_block,
|
||||||
reply_style=global_config.personality.reply_style,
|
reply_style=global_config.personality.reply_style,
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ def init_replyer_prompt():
|
||||||
|
|
||||||
Prompt(
|
Prompt(
|
||||||
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
||||||
{expression_habits_block}{memory_retrieval}
|
{expression_habits_block}{memory_retrieval}{jargon_explanation}
|
||||||
|
|
||||||
你正在qq群里聊天,下面是群里正在聊的内容,其中包含聊天记录和聊天中的图片
|
你正在qq群里聊天,下面是群里正在聊的内容,其中包含聊天记录和聊天中的图片
|
||||||
其中标注 {bot_name}(你) 的发言是你自己的发言,请注意区分:
|
其中标注 {bot_name}(你) 的发言是你自己的发言,请注意区分:
|
||||||
|
|
@ -29,7 +29,7 @@ def init_replyer_prompt():
|
||||||
|
|
||||||
Prompt(
|
Prompt(
|
||||||
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
||||||
{expression_habits_block}{memory_retrieval}
|
{expression_habits_block}{memory_retrieval}{jargon_explanation}
|
||||||
|
|
||||||
你正在和{sender_name}聊天,这是你们之前聊的内容:
|
你正在和{sender_name}聊天,这是你们之前聊的内容:
|
||||||
{time_block}
|
{time_block}
|
||||||
|
|
@ -48,7 +48,7 @@ def init_replyer_prompt():
|
||||||
|
|
||||||
Prompt(
|
Prompt(
|
||||||
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
"""{knowledge_prompt}{tool_info_block}{extra_info_block}
|
||||||
{expression_habits_block}{memory_retrieval}
|
{expression_habits_block}{memory_retrieval}{jargon_explanation}
|
||||||
|
|
||||||
你正在和{sender_name}聊天,这是你们之前聊的内容:
|
你正在和{sender_name}聊天,这是你们之前聊的内容:
|
||||||
{time_block}
|
{time_block}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,261 @@
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from typing import List, Dict, Optional, Any
|
||||||
|
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
from src.common.database.database_model import Jargon
|
||||||
|
from src.llm_models.utils_model import LLMRequest
|
||||||
|
from src.config.config import model_config, global_config
|
||||||
|
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||||
|
from src.jargon.jargon_miner import search_jargon
|
||||||
|
from src.jargon.jargon_utils import is_bot_message, contains_bot_self_name, parse_chat_id_list, chat_id_list_contains
|
||||||
|
|
||||||
|
logger = get_logger("jargon")
|
||||||
|
|
||||||
|
|
||||||
|
def _init_explainer_prompts() -> None:
|
||||||
|
"""初始化黑话解释器相关的prompt"""
|
||||||
|
# Prompt:概括黑话解释结果
|
||||||
|
summarize_prompt_str = """
|
||||||
|
**上下文聊天内容**
|
||||||
|
{chat_context}
|
||||||
|
|
||||||
|
**提取到的黑话及其含义**
|
||||||
|
{jargon_explanations}
|
||||||
|
|
||||||
|
请根据上述信息,对黑话解释进行概括和整理。
|
||||||
|
- 如果上下文中有黑话出现,请简要说明这些黑话在上下文中的使用情况
|
||||||
|
- 将黑话解释整理成简洁、易读的格式
|
||||||
|
- 如果某个黑话在上下文中没有出现,可以省略
|
||||||
|
- 输出格式要自然,适合作为回复参考信息
|
||||||
|
|
||||||
|
请输出概括后的黑话解释(直接输出文本,不要使用JSON格式):
|
||||||
|
"""
|
||||||
|
Prompt(summarize_prompt_str, "jargon_explainer_summarize_prompt")
|
||||||
|
|
||||||
|
|
||||||
|
_init_explainer_prompts()
|
||||||
|
|
||||||
|
|
||||||
|
class JargonExplainer:
|
||||||
|
"""黑话解释器,用于在回复前识别和解释上下文中的黑话"""
|
||||||
|
|
||||||
|
def __init__(self, chat_id: str) -> None:
|
||||||
|
self.chat_id = chat_id
|
||||||
|
self.llm = LLMRequest(
|
||||||
|
model_set=model_config.model_task_config.utils,
|
||||||
|
request_type="jargon.explain",
|
||||||
|
)
|
||||||
|
|
||||||
|
def match_jargon_from_messages(
|
||||||
|
self, messages: List[Any]
|
||||||
|
) -> List[Dict[str, str]]:
|
||||||
|
"""
|
||||||
|
通过直接匹配数据库中的jargon字符串来提取黑话
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: 消息列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[Dict[str, str]]: 提取到的黑话列表,每个元素包含content
|
||||||
|
"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
if not messages:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 收集所有消息的文本内容
|
||||||
|
message_texts: List[str] = []
|
||||||
|
for msg in messages:
|
||||||
|
# 跳过机器人自己的消息
|
||||||
|
if is_bot_message(msg):
|
||||||
|
continue
|
||||||
|
|
||||||
|
msg_text = (getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "").strip()
|
||||||
|
if msg_text:
|
||||||
|
message_texts.append(msg_text)
|
||||||
|
|
||||||
|
if not message_texts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 合并所有消息文本
|
||||||
|
combined_text = " ".join(message_texts)
|
||||||
|
|
||||||
|
# 查询所有有meaning的jargon记录
|
||||||
|
query = Jargon.select().where(
|
||||||
|
(Jargon.meaning.is_null(False)) & (Jargon.meaning != "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# 根据all_global配置决定查询逻辑
|
||||||
|
if global_config.jargon.all_global:
|
||||||
|
# 开启all_global:只查询is_global=True的记录
|
||||||
|
query = query.where(Jargon.is_global)
|
||||||
|
else:
|
||||||
|
# 关闭all_global:查询is_global=True或chat_id列表包含当前chat_id的记录
|
||||||
|
# 这里先查询所有,然后在Python层面过滤
|
||||||
|
pass
|
||||||
|
|
||||||
|
# 按count降序排序,优先匹配出现频率高的
|
||||||
|
query = query.order_by(Jargon.count.desc())
|
||||||
|
|
||||||
|
# 执行查询并匹配
|
||||||
|
matched_jargon: Dict[str, Dict[str, str]] = {}
|
||||||
|
query_time = time.time()
|
||||||
|
|
||||||
|
for jargon in query:
|
||||||
|
content = jargon.content or ""
|
||||||
|
if not content or not content.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 跳过包含机器人昵称的词条
|
||||||
|
if contains_bot_self_name(content):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 检查chat_id(如果all_global=False)
|
||||||
|
if not global_config.jargon.all_global:
|
||||||
|
if jargon.is_global:
|
||||||
|
# 全局黑话,包含
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# 检查chat_id列表是否包含当前chat_id
|
||||||
|
chat_id_list = parse_chat_id_list(jargon.chat_id)
|
||||||
|
if not chat_id_list_contains(chat_id_list, self.chat_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 在文本中查找匹配(大小写不敏感)
|
||||||
|
pattern = re.escape(content)
|
||||||
|
# 使用单词边界或中文字符边界来匹配,避免部分匹配
|
||||||
|
# 对于中文,使用Unicode字符类;对于英文,使用单词边界
|
||||||
|
if re.search(r'[\u4e00-\u9fff]', content):
|
||||||
|
# 包含中文,使用更宽松的匹配
|
||||||
|
search_pattern = pattern
|
||||||
|
else:
|
||||||
|
# 纯英文/数字,使用单词边界
|
||||||
|
search_pattern = r'\b' + pattern + r'\b'
|
||||||
|
|
||||||
|
if re.search(search_pattern, combined_text, re.IGNORECASE):
|
||||||
|
# 找到匹配,记录(去重)
|
||||||
|
if content not in matched_jargon:
|
||||||
|
matched_jargon[content] = {"content": content}
|
||||||
|
|
||||||
|
match_time = time.time()
|
||||||
|
total_time = match_time - start_time
|
||||||
|
query_duration = query_time - start_time
|
||||||
|
match_duration = match_time - query_time
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"黑话匹配完成: 查询耗时 {query_duration:.3f}s, 匹配耗时 {match_duration:.3f}s, "
|
||||||
|
f"总耗时 {total_time:.3f}s, 匹配到 {len(matched_jargon)} 个黑话"
|
||||||
|
)
|
||||||
|
|
||||||
|
return list(matched_jargon.values())
|
||||||
|
|
||||||
|
async def explain_jargon(
|
||||||
|
self, messages: List[Any], chat_context: str
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
解释上下文中的黑话
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: 消息列表
|
||||||
|
chat_context: 聊天上下文的文本表示
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[str]: 黑话解释的概括文本,如果没有黑话则返回None
|
||||||
|
"""
|
||||||
|
if not messages:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 直接匹配方式:从数据库中查询jargon并在消息中匹配
|
||||||
|
jargon_entries = self.match_jargon_from_messages(messages)
|
||||||
|
|
||||||
|
if not jargon_entries:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 去重(按content)
|
||||||
|
unique_jargon: Dict[str, Dict[str, str]] = {}
|
||||||
|
for entry in jargon_entries:
|
||||||
|
content = entry["content"]
|
||||||
|
if content not in unique_jargon:
|
||||||
|
unique_jargon[content] = entry
|
||||||
|
|
||||||
|
jargon_list = list(unique_jargon.values())
|
||||||
|
logger.info(f"从上下文中提取到 {len(jargon_list)} 个黑话: {[j['content'] for j in jargon_list]}")
|
||||||
|
|
||||||
|
# 查询每个黑话的含义
|
||||||
|
jargon_explanations: List[str] = []
|
||||||
|
for entry in jargon_list:
|
||||||
|
content = entry["content"]
|
||||||
|
|
||||||
|
# 根据是否开启全局黑话,决定查询方式
|
||||||
|
if global_config.jargon.all_global:
|
||||||
|
# 开启全局黑话:查询所有is_global=True的记录
|
||||||
|
results = search_jargon(
|
||||||
|
keyword=content,
|
||||||
|
chat_id=None, # 不指定chat_id,查询全局黑话
|
||||||
|
limit=1,
|
||||||
|
case_sensitive=False,
|
||||||
|
fuzzy=False, # 精确匹配
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# 关闭全局黑话:优先查询当前聊天或全局的黑话
|
||||||
|
results = search_jargon(
|
||||||
|
keyword=content,
|
||||||
|
chat_id=self.chat_id,
|
||||||
|
limit=1,
|
||||||
|
case_sensitive=False,
|
||||||
|
fuzzy=False, # 精确匹配
|
||||||
|
)
|
||||||
|
|
||||||
|
if results and len(results) > 0:
|
||||||
|
meaning = results[0].get("meaning", "").strip()
|
||||||
|
if meaning:
|
||||||
|
jargon_explanations.append(f"- {content}: {meaning}")
|
||||||
|
else:
|
||||||
|
logger.info(f"黑话 {content} 没有找到含义")
|
||||||
|
else:
|
||||||
|
logger.info(f"黑话 {content} 未在数据库中找到")
|
||||||
|
|
||||||
|
if not jargon_explanations:
|
||||||
|
logger.info("没有找到任何黑话的含义,跳过解释")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 拼接所有黑话解释
|
||||||
|
explanations_text = "\n".join(jargon_explanations)
|
||||||
|
|
||||||
|
# 使用LLM概括黑话解释
|
||||||
|
summarize_prompt = await global_prompt_manager.format_prompt(
|
||||||
|
"jargon_explainer_summarize_prompt",
|
||||||
|
chat_context=chat_context,
|
||||||
|
jargon_explanations=explanations_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
summary, _ = await self.llm.generate_response_async(summarize_prompt, temperature=0.3)
|
||||||
|
if not summary:
|
||||||
|
# 如果LLM概括失败,直接返回原始解释
|
||||||
|
return f"上下文中的黑话解释:\n{explanations_text}"
|
||||||
|
|
||||||
|
summary = summary.strip()
|
||||||
|
if not summary:
|
||||||
|
return f"上下文中的黑话解释:\n{explanations_text}"
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
|
||||||
|
async def explain_jargon_in_context(
|
||||||
|
chat_id: str, messages: List[Any], chat_context: str
|
||||||
|
) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
解释上下文中的黑话(便捷函数)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id: 聊天ID
|
||||||
|
messages: 消息列表
|
||||||
|
chat_context: 聊天上下文的文本表示
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[str]: 黑话解释的概括文本,如果没有黑话则返回None
|
||||||
|
"""
|
||||||
|
explainer = JargonExplainer(chat_id)
|
||||||
|
return await explainer.explain_jargon(messages, chat_context)
|
||||||
|
|
||||||
|
|
@ -11,127 +11,24 @@ from src.common.database.database_model import Jargon
|
||||||
from src.llm_models.utils_model import LLMRequest
|
from src.llm_models.utils_model import LLMRequest
|
||||||
from src.config.config import model_config, global_config
|
from src.config.config import model_config, global_config
|
||||||
from src.chat.message_receive.chat_stream import get_chat_manager
|
from src.chat.message_receive.chat_stream import get_chat_manager
|
||||||
from src.plugin_system.apis import llm_api
|
|
||||||
from src.chat.utils.chat_message_builder import (
|
from src.chat.utils.chat_message_builder import (
|
||||||
build_readable_messages,
|
|
||||||
build_readable_messages_with_id,
|
build_readable_messages_with_id,
|
||||||
get_raw_msg_by_timestamp_with_chat_inclusive,
|
get_raw_msg_by_timestamp_with_chat_inclusive,
|
||||||
get_raw_msg_before_timestamp_with_chat,
|
|
||||||
build_readable_messages_with_list,
|
|
||||||
)
|
)
|
||||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||||
from src.chat.utils.utils import parse_platform_accounts
|
from src.jargon.jargon_utils import (
|
||||||
|
is_bot_message,
|
||||||
|
build_context_paragraph,
|
||||||
|
contains_bot_self_name,
|
||||||
|
parse_chat_id_list,
|
||||||
|
chat_id_list_contains,
|
||||||
|
update_chat_id_list
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
logger = get_logger("jargon")
|
logger = get_logger("jargon")
|
||||||
|
|
||||||
|
|
||||||
def _contains_bot_self_name(content: str) -> bool:
|
|
||||||
"""
|
|
||||||
判断词条是否包含机器人的昵称或别名
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return False
|
|
||||||
|
|
||||||
bot_config = getattr(global_config, "bot", None)
|
|
||||||
if not bot_config:
|
|
||||||
return False
|
|
||||||
|
|
||||||
target = content.strip().lower()
|
|
||||||
nickname = str(getattr(bot_config, "nickname", "") or "").strip().lower()
|
|
||||||
alias_names = [str(alias or "").strip().lower() for alias in getattr(bot_config, "alias_names", []) or []]
|
|
||||||
|
|
||||||
candidates = [name for name in [nickname, *alias_names] if name]
|
|
||||||
|
|
||||||
return any(name in target for name in candidates if target)
|
|
||||||
|
|
||||||
|
|
||||||
def _build_context_paragraph(messages: List[Any], center_index: int) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
构建包含中心消息上下文的段落(前3条+后3条),使用标准的 readable builder 输出
|
|
||||||
"""
|
|
||||||
if not messages or center_index < 0 or center_index >= len(messages):
|
|
||||||
return None
|
|
||||||
|
|
||||||
context_start = max(0, center_index - 3)
|
|
||||||
context_end = min(len(messages), center_index + 1 + 3)
|
|
||||||
context_messages = messages[context_start:context_end]
|
|
||||||
|
|
||||||
if not context_messages:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
paragraph = build_readable_messages(
|
|
||||||
messages=context_messages,
|
|
||||||
replace_bot_name=True,
|
|
||||||
timestamp_mode="relative",
|
|
||||||
read_mark=0.0,
|
|
||||||
truncate=False,
|
|
||||||
show_actions=False,
|
|
||||||
show_pic=True,
|
|
||||||
message_id_list=None,
|
|
||||||
remove_emoji_stickers=False,
|
|
||||||
pic_single=True,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"构建上下文段落失败: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
paragraph = paragraph.strip()
|
|
||||||
return paragraph or None
|
|
||||||
|
|
||||||
|
|
||||||
def _is_bot_message(msg: Any) -> bool:
|
|
||||||
"""判断消息是否来自机器人自身"""
|
|
||||||
if msg is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
bot_config = getattr(global_config, "bot", None)
|
|
||||||
if not bot_config:
|
|
||||||
return False
|
|
||||||
|
|
||||||
platform = (
|
|
||||||
str(getattr(msg, "user_platform", "") or getattr(getattr(msg, "user_info", None), "platform", "") or "")
|
|
||||||
.strip()
|
|
||||||
.lower()
|
|
||||||
)
|
|
||||||
user_id = (
|
|
||||||
str(getattr(msg, "user_id", "") or getattr(getattr(msg, "user_info", None), "user_id", "") or "")
|
|
||||||
.strip()
|
|
||||||
)
|
|
||||||
|
|
||||||
if not platform or not user_id:
|
|
||||||
return False
|
|
||||||
|
|
||||||
platform_accounts = {}
|
|
||||||
try:
|
|
||||||
platform_accounts = parse_platform_accounts(getattr(bot_config, "platforms", []) or [])
|
|
||||||
except Exception:
|
|
||||||
platform_accounts = {}
|
|
||||||
|
|
||||||
bot_accounts: Dict[str, str] = {}
|
|
||||||
qq_account = str(getattr(bot_config, "qq_account", "") or "").strip()
|
|
||||||
if qq_account:
|
|
||||||
bot_accounts["qq"] = qq_account
|
|
||||||
|
|
||||||
telegram_account = str(getattr(bot_config, "telegram_account", "") or "").strip()
|
|
||||||
if telegram_account:
|
|
||||||
bot_accounts["telegram"] = telegram_account
|
|
||||||
|
|
||||||
for plat, account in platform_accounts.items():
|
|
||||||
if account and plat not in bot_accounts:
|
|
||||||
bot_accounts[plat] = account
|
|
||||||
|
|
||||||
bot_account = bot_accounts.get(platform)
|
|
||||||
return bool(bot_account and user_id == bot_account)
|
|
||||||
|
|
||||||
|
|
||||||
def _has_adjacent_bot_message(messages: List[Any], center_index: int) -> bool:
|
|
||||||
"""检查目标消息的上一条或下一条是否为机器人发言"""
|
|
||||||
for neighbor in (center_index - 1, center_index + 1):
|
|
||||||
if 0 <= neighbor < len(messages) and _is_bot_message(messages[neighbor]):
|
|
||||||
return True
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _init_prompt() -> None:
|
def _init_prompt() -> None:
|
||||||
|
|
@ -176,6 +73,7 @@ def _init_inference_prompts() -> None:
|
||||||
请根据上下文,推断"{content}"这个词条的含义。
|
请根据上下文,推断"{content}"这个词条的含义。
|
||||||
- 如果这是一个黑话、俚语或网络用语,请推断其含义
|
- 如果这是一个黑话、俚语或网络用语,请推断其含义
|
||||||
- 如果含义明确(常规词汇),也请说明
|
- 如果含义明确(常规词汇),也请说明
|
||||||
|
- {bot_name} 的发言内容可能包含错误,请不要参考其发言内容
|
||||||
- 如果上下文信息不足,无法推断含义,请设置 no_info 为 true
|
- 如果上下文信息不足,无法推断含义,请设置 no_info 为 true
|
||||||
|
|
||||||
以 JSON 格式输出:
|
以 JSON 格式输出:
|
||||||
|
|
@ -228,94 +126,6 @@ _init_prompt()
|
||||||
_init_inference_prompts()
|
_init_inference_prompts()
|
||||||
|
|
||||||
|
|
||||||
async def _enrich_raw_content_if_needed(
|
|
||||||
content: str,
|
|
||||||
raw_content_list: List[str],
|
|
||||||
chat_id: str,
|
|
||||||
messages: List[Any],
|
|
||||||
extraction_start_time: float,
|
|
||||||
extraction_end_time: float,
|
|
||||||
) -> List[str]:
|
|
||||||
"""
|
|
||||||
检查raw_content是否只包含黑话本身,如果是,则获取该消息的前三条消息作为原始内容
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: 黑话内容
|
|
||||||
raw_content_list: 原始raw_content列表
|
|
||||||
chat_id: 聊天ID
|
|
||||||
messages: 当前时间窗口内的消息列表
|
|
||||||
extraction_start_time: 提取开始时间
|
|
||||||
extraction_end_time: 提取结束时间
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
处理后的raw_content列表
|
|
||||||
"""
|
|
||||||
enriched_list = []
|
|
||||||
|
|
||||||
for raw_content in raw_content_list:
|
|
||||||
# 检查raw_content是否只包含黑话本身(去除空白字符后比较)
|
|
||||||
raw_content_clean = raw_content.strip()
|
|
||||||
content_clean = content.strip()
|
|
||||||
|
|
||||||
# 如果raw_content只包含黑话本身(可能有一些标点或空白),则尝试获取上下文
|
|
||||||
# 去除所有空白字符后比较,确保只包含黑话本身
|
|
||||||
raw_content_normalized = raw_content_clean.replace(" ", "").replace("\n", "").replace("\t", "")
|
|
||||||
content_normalized = content_clean.replace(" ", "").replace("\n", "").replace("\t", "")
|
|
||||||
|
|
||||||
if raw_content_normalized == content_normalized:
|
|
||||||
# 在消息列表中查找只包含该黑话的消息(去除空白后比较)
|
|
||||||
target_message = None
|
|
||||||
for msg in messages:
|
|
||||||
msg_content = (msg.processed_plain_text or msg.display_message or "").strip()
|
|
||||||
msg_content_normalized = msg_content.replace(" ", "").replace("\n", "").replace("\t", "")
|
|
||||||
# 检查消息内容是否只包含黑话本身(去除空白后完全匹配)
|
|
||||||
if msg_content_normalized == content_normalized:
|
|
||||||
target_message = msg
|
|
||||||
break
|
|
||||||
|
|
||||||
if target_message and target_message.time:
|
|
||||||
# 获取该消息的前三条消息
|
|
||||||
try:
|
|
||||||
previous_messages = get_raw_msg_before_timestamp_with_chat(
|
|
||||||
chat_id=chat_id, timestamp=target_message.time, limit=3
|
|
||||||
)
|
|
||||||
|
|
||||||
if previous_messages:
|
|
||||||
# 将前三条消息和当前消息一起格式化
|
|
||||||
context_messages = previous_messages + [target_message]
|
|
||||||
# 按时间排序
|
|
||||||
context_messages.sort(key=lambda x: x.time or 0)
|
|
||||||
|
|
||||||
# 格式化为可读消息
|
|
||||||
formatted_context, _ = await build_readable_messages_with_list(
|
|
||||||
context_messages,
|
|
||||||
replace_bot_name=True,
|
|
||||||
timestamp_mode="relative",
|
|
||||||
truncate=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
if formatted_context.strip():
|
|
||||||
enriched_list.append(formatted_context.strip())
|
|
||||||
logger.warning(f"为黑话 {content} 补充了上下文消息")
|
|
||||||
else:
|
|
||||||
# 如果格式化失败,使用原始raw_content
|
|
||||||
enriched_list.append(raw_content)
|
|
||||||
else:
|
|
||||||
# 没有找到前三条消息,使用原始raw_content
|
|
||||||
enriched_list.append(raw_content)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"获取黑话 {content} 的上下文消息失败: {e}")
|
|
||||||
# 出错时使用原始raw_content
|
|
||||||
enriched_list.append(raw_content)
|
|
||||||
else:
|
|
||||||
# 没有找到包含黑话的消息,使用原始raw_content
|
|
||||||
enriched_list.append(raw_content)
|
|
||||||
else:
|
|
||||||
# raw_content包含更多内容,直接使用
|
|
||||||
enriched_list.append(raw_content)
|
|
||||||
|
|
||||||
return enriched_list
|
|
||||||
|
|
||||||
|
|
||||||
def _should_infer_meaning(jargon_obj: Jargon) -> bool:
|
def _should_infer_meaning(jargon_obj: Jargon) -> bool:
|
||||||
"""
|
"""
|
||||||
|
|
@ -402,7 +212,7 @@ class JargonMiner:
|
||||||
|
|
||||||
for idx, msg in enumerate(messages):
|
for idx, msg in enumerate(messages):
|
||||||
msg_text = (getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "").strip()
|
msg_text = (getattr(msg, "display_message", None) or getattr(msg, "processed_plain_text", None) or "").strip()
|
||||||
if not msg_text or _is_bot_message(msg):
|
if not msg_text or is_bot_message(msg):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for content in self.cache.keys():
|
for content in self.cache.keys():
|
||||||
|
|
@ -411,9 +221,7 @@ class JargonMiner:
|
||||||
if (content, idx) in processed_pairs:
|
if (content, idx) in processed_pairs:
|
||||||
continue
|
continue
|
||||||
if content in msg_text:
|
if content in msg_text:
|
||||||
if _has_adjacent_bot_message(messages, idx):
|
paragraph = build_context_paragraph(messages, idx)
|
||||||
continue
|
|
||||||
paragraph = _build_context_paragraph(messages, idx)
|
|
||||||
if not paragraph:
|
if not paragraph:
|
||||||
continue
|
continue
|
||||||
cached_entries.append({"content": content, "raw_content": [paragraph]})
|
cached_entries.append({"content": content, "raw_content": [paragraph]})
|
||||||
|
|
@ -719,7 +527,7 @@ class JargonMiner:
|
||||||
if not content:
|
if not content:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if _contains_bot_self_name(content):
|
if contains_bot_self_name(content):
|
||||||
logger.info(f"解析阶段跳过包含机器人昵称/别名的词条: {content}")
|
logger.info(f"解析阶段跳过包含机器人昵称/别名的词条: {content}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -734,16 +542,11 @@ class JargonMiner:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
target_msg = messages[msg_index]
|
target_msg = messages[msg_index]
|
||||||
if _is_bot_message(target_msg):
|
if is_bot_message(target_msg):
|
||||||
logger.info(f"解析阶段跳过引用机器人自身消息的词条: content={content}, msg_id={msg_id_str}")
|
logger.info(f"解析阶段跳过引用机器人自身消息的词条: content={content}, msg_id={msg_id_str}")
|
||||||
continue
|
continue
|
||||||
if _has_adjacent_bot_message(messages, msg_index):
|
|
||||||
logger.info(
|
|
||||||
f"解析阶段跳过因邻近机器人发言的词条: content={content}, msg_id={msg_id_str}"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
context_paragraph = _build_context_paragraph(messages, msg_index)
|
context_paragraph = build_context_paragraph(messages, msg_index)
|
||||||
if not context_paragraph:
|
if not context_paragraph:
|
||||||
logger.warning(f"解析jargon失败:上下文为空,content={content}, msg_id={msg_id_str}")
|
logger.warning(f"解析jargon失败:上下文为空,content={content}, msg_id={msg_id_str}")
|
||||||
continue
|
continue
|
||||||
|
|
@ -785,27 +588,27 @@ class JargonMiner:
|
||||||
content = entry["content"]
|
content = entry["content"]
|
||||||
raw_content_list = entry["raw_content"] # 已经是列表
|
raw_content_list = entry["raw_content"] # 已经是列表
|
||||||
|
|
||||||
# 检查并补充raw_content:如果只包含黑话本身,则获取前三条消息作为上下文
|
|
||||||
# raw_content_list = await _enrich_raw_content_if_needed(
|
|
||||||
# content=content,
|
|
||||||
# raw_content_list=raw_content_list,
|
|
||||||
# chat_id=self.chat_id,
|
|
||||||
# messages=messages,
|
|
||||||
# extraction_start_time=extraction_start_time,
|
|
||||||
# extraction_end_time=extraction_end_time,
|
|
||||||
# )
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 根据all_global配置决定查询逻辑
|
# 查询所有content匹配的记录
|
||||||
if global_config.jargon.all_global:
|
query = Jargon.select().where(Jargon.content == content)
|
||||||
# 开启all_global:无视chat_id,查询所有content匹配的记录(所有记录都是全局的)
|
|
||||||
query = Jargon.select().where(Jargon.content == content)
|
|
||||||
else:
|
|
||||||
# 关闭all_global:只查询chat_id匹配的记录(不考虑is_global)
|
|
||||||
query = Jargon.select().where((Jargon.chat_id == self.chat_id) & (Jargon.content == content))
|
|
||||||
|
|
||||||
if query.exists():
|
# 查找匹配的记录
|
||||||
obj = query.get()
|
matched_obj = None
|
||||||
|
for obj in query:
|
||||||
|
if global_config.jargon.all_global:
|
||||||
|
# 开启all_global:所有content匹配的记录都可以
|
||||||
|
matched_obj = obj
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# 关闭all_global:需要检查chat_id列表是否包含目标chat_id
|
||||||
|
chat_id_list = parse_chat_id_list(obj.chat_id)
|
||||||
|
if chat_id_list_contains(chat_id_list, self.chat_id):
|
||||||
|
matched_obj = obj
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched_obj:
|
||||||
|
obj = matched_obj
|
||||||
try:
|
try:
|
||||||
obj.count = (obj.count or 0) + 1
|
obj.count = (obj.count or 0) + 1
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -827,6 +630,11 @@ class JargonMiner:
|
||||||
merged_list = list(dict.fromkeys(existing_raw_content + raw_content_list))
|
merged_list = list(dict.fromkeys(existing_raw_content + raw_content_list))
|
||||||
obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
|
obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
|
||||||
|
|
||||||
|
# 更新chat_id列表:增加当前chat_id的计数
|
||||||
|
chat_id_list = parse_chat_id_list(obj.chat_id)
|
||||||
|
updated_chat_id_list = update_chat_id_list(chat_id_list, self.chat_id, increment=1)
|
||||||
|
obj.chat_id = json.dumps(updated_chat_id_list, ensure_ascii=False)
|
||||||
|
|
||||||
# 开启all_global时,确保记录标记为is_global=True
|
# 开启all_global时,确保记录标记为is_global=True
|
||||||
if global_config.jargon.all_global:
|
if global_config.jargon.all_global:
|
||||||
obj.is_global = True
|
obj.is_global = True
|
||||||
|
|
@ -851,10 +659,14 @@ class JargonMiner:
|
||||||
# 关闭all_global:新记录is_global=False
|
# 关闭all_global:新记录is_global=False
|
||||||
is_global_new = False
|
is_global_new = False
|
||||||
|
|
||||||
|
# 使用新格式创建chat_id列表:[[chat_id, count]]
|
||||||
|
chat_id_list = [[self.chat_id, 1]]
|
||||||
|
chat_id_json = json.dumps(chat_id_list, ensure_ascii=False)
|
||||||
|
|
||||||
Jargon.create(
|
Jargon.create(
|
||||||
content=content,
|
content=content,
|
||||||
raw_content=json.dumps(raw_content_list, ensure_ascii=False),
|
raw_content=json.dumps(raw_content_list, ensure_ascii=False),
|
||||||
chat_id=self.chat_id,
|
chat_id=chat_id_json,
|
||||||
is_global=is_global_new,
|
is_global=is_global_new,
|
||||||
count=1,
|
count=1,
|
||||||
)
|
)
|
||||||
|
|
@ -924,8 +736,8 @@ def search_jargon(
|
||||||
|
|
||||||
keyword = keyword.strip()
|
keyword = keyword.strip()
|
||||||
|
|
||||||
# 构建查询
|
# 构建查询(选择所有需要的字段,以便后续过滤)
|
||||||
query = Jargon.select(Jargon.content, Jargon.meaning)
|
query = Jargon.select()
|
||||||
|
|
||||||
# 构建搜索条件
|
# 构建搜索条件
|
||||||
if case_sensitive:
|
if case_sensitive:
|
||||||
|
|
@ -951,102 +763,34 @@ def search_jargon(
|
||||||
if global_config.jargon.all_global:
|
if global_config.jargon.all_global:
|
||||||
# 开启all_global:所有记录都是全局的,查询所有is_global=True的记录(无视chat_id)
|
# 开启all_global:所有记录都是全局的,查询所有is_global=True的记录(无视chat_id)
|
||||||
query = query.where(Jargon.is_global)
|
query = query.where(Jargon.is_global)
|
||||||
else:
|
# 注意:对于all_global=False的情况,chat_id过滤在Python层面进行,以便兼容新旧格式
|
||||||
# 关闭all_global:如果提供了chat_id,优先搜索该聊天或global的jargon
|
|
||||||
if chat_id:
|
|
||||||
query = query.where((Jargon.chat_id == chat_id) | Jargon.is_global)
|
|
||||||
|
|
||||||
# 只返回有meaning的记录
|
# 注意:meaning的过滤移到Python层面,因为我们需要先过滤chat_id
|
||||||
query = query.where((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
|
|
||||||
|
|
||||||
# 按count降序排序,优先返回出现频率高的
|
# 按count降序排序,优先返回出现频率高的
|
||||||
query = query.order_by(Jargon.count.desc())
|
query = query.order_by(Jargon.count.desc())
|
||||||
|
|
||||||
# 限制结果数量
|
# 限制结果数量(先多取一些,因为后面可能过滤)
|
||||||
query = query.limit(limit)
|
query = query.limit(limit * 2)
|
||||||
|
|
||||||
# 执行查询并返回结果
|
# 执行查询并返回结果,过滤chat_id
|
||||||
results = []
|
results = []
|
||||||
for jargon in query:
|
for jargon in query:
|
||||||
|
# 如果提供了chat_id且all_global=False,需要检查chat_id列表是否包含目标chat_id
|
||||||
|
if chat_id and not global_config.jargon.all_global:
|
||||||
|
chat_id_list = parse_chat_id_list(jargon.chat_id)
|
||||||
|
# 如果记录是is_global=True,或者chat_id列表包含目标chat_id,则包含
|
||||||
|
if not jargon.is_global and not chat_id_list_contains(chat_id_list, chat_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 只返回有meaning的记录
|
||||||
|
if not jargon.meaning or jargon.meaning.strip() == "":
|
||||||
|
continue
|
||||||
|
|
||||||
results.append({"content": jargon.content or "", "meaning": jargon.meaning or ""})
|
results.append({"content": jargon.content or "", "meaning": jargon.meaning or ""})
|
||||||
|
|
||||||
|
# 达到限制数量后停止
|
||||||
|
if len(results) >= limit:
|
||||||
|
break
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def store_jargon_from_answer(jargon_keyword: str, answer: str, chat_id: str) -> None:
|
|
||||||
"""将黑话存入jargon系统
|
|
||||||
|
|
||||||
Args:
|
|
||||||
jargon_keyword: 黑话关键词
|
|
||||||
answer: 答案内容(将概括为raw_content)
|
|
||||||
chat_id: 聊天ID
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# 概括答案为简短的raw_content
|
|
||||||
summary_prompt = f"""请将以下答案概括为一句简短的话(不超过50字),作为黑话"{jargon_keyword}"的使用示例:
|
|
||||||
|
|
||||||
答案:{answer}
|
|
||||||
|
|
||||||
只输出概括后的内容,不要输出其他内容:"""
|
|
||||||
|
|
||||||
success, summary, _, _ = await llm_api.generate_with_model(
|
|
||||||
summary_prompt,
|
|
||||||
model_config=model_config.model_task_config.utils_small,
|
|
||||||
request_type="memory.summarize_jargon",
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info(f"概括答案提示: {summary_prompt}")
|
|
||||||
logger.info(f"概括答案: {summary}")
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
logger.warning(f"概括答案失败,使用原始答案: {summary}")
|
|
||||||
summary = answer[:100] # 截取前100字符作为备用
|
|
||||||
|
|
||||||
raw_content = summary.strip()[:200] # 限制长度
|
|
||||||
|
|
||||||
# 检查是否已存在
|
|
||||||
if global_config.jargon.all_global:
|
|
||||||
query = Jargon.select().where(Jargon.content == jargon_keyword)
|
|
||||||
else:
|
|
||||||
query = Jargon.select().where((Jargon.chat_id == chat_id) & (Jargon.content == jargon_keyword))
|
|
||||||
|
|
||||||
if query.exists():
|
|
||||||
# 更新现有记录
|
|
||||||
obj = query.get()
|
|
||||||
obj.count = (obj.count or 0) + 1
|
|
||||||
|
|
||||||
# 合并raw_content列表
|
|
||||||
existing_raw_content = []
|
|
||||||
if obj.raw_content:
|
|
||||||
try:
|
|
||||||
existing_raw_content = (
|
|
||||||
json.loads(obj.raw_content) if isinstance(obj.raw_content, str) else obj.raw_content
|
|
||||||
)
|
|
||||||
if not isinstance(existing_raw_content, list):
|
|
||||||
existing_raw_content = [existing_raw_content] if existing_raw_content else []
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
existing_raw_content = [obj.raw_content] if obj.raw_content else []
|
|
||||||
|
|
||||||
# 合并并去重
|
|
||||||
merged_list = list(dict.fromkeys(existing_raw_content + [raw_content]))
|
|
||||||
obj.raw_content = json.dumps(merged_list, ensure_ascii=False)
|
|
||||||
|
|
||||||
if global_config.jargon.all_global:
|
|
||||||
obj.is_global = True
|
|
||||||
|
|
||||||
obj.save()
|
|
||||||
logger.info(f"更新jargon记录: {jargon_keyword}")
|
|
||||||
else:
|
|
||||||
# 创建新记录
|
|
||||||
is_global_new = True if global_config.jargon.all_global else False
|
|
||||||
Jargon.create(
|
|
||||||
content=jargon_keyword,
|
|
||||||
raw_content=json.dumps([raw_content], ensure_ascii=False),
|
|
||||||
chat_id=chat_id,
|
|
||||||
is_global=is_global_new,
|
|
||||||
count=1,
|
|
||||||
)
|
|
||||||
logger.info(f"创建新jargon记录: {jargon_keyword}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"存储jargon失败: {e}")
|
|
||||||
|
|
@ -0,0 +1,199 @@
|
||||||
|
import json
|
||||||
|
from typing import List, Dict, Optional, Any
|
||||||
|
|
||||||
|
from src.common.logger import get_logger
|
||||||
|
from src.common.database.database_model import Jargon
|
||||||
|
from src.config.config import global_config
|
||||||
|
from src.chat.utils.chat_message_builder import (
|
||||||
|
build_readable_messages,
|
||||||
|
build_readable_messages_with_id,
|
||||||
|
)
|
||||||
|
from src.chat.utils.utils import parse_platform_accounts
|
||||||
|
|
||||||
|
|
||||||
|
logger = get_logger("jargon")
|
||||||
|
|
||||||
|
def parse_chat_id_list(chat_id_value: Any) -> List[List[Any]]:
|
||||||
|
"""
|
||||||
|
解析chat_id字段,兼容旧格式(字符串)和新格式(JSON列表)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id_value: 可能是字符串(旧格式)或JSON字符串(新格式)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[List[Any]]: 格式为 [[chat_id, count], ...] 的列表
|
||||||
|
"""
|
||||||
|
if not chat_id_value:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 如果是字符串,尝试解析为JSON
|
||||||
|
if isinstance(chat_id_value, str):
|
||||||
|
# 尝试解析JSON
|
||||||
|
try:
|
||||||
|
parsed = json.loads(chat_id_value)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
# 新格式:已经是列表
|
||||||
|
return parsed
|
||||||
|
elif isinstance(parsed, str):
|
||||||
|
# 解析后还是字符串,说明是旧格式
|
||||||
|
return [[parsed, 1]]
|
||||||
|
else:
|
||||||
|
# 其他类型,当作旧格式处理
|
||||||
|
return [[str(chat_id_value), 1]]
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
# 解析失败,当作旧格式(纯字符串)
|
||||||
|
return [[str(chat_id_value), 1]]
|
||||||
|
elif isinstance(chat_id_value, list):
|
||||||
|
# 已经是列表格式
|
||||||
|
return chat_id_value
|
||||||
|
else:
|
||||||
|
# 其他类型,转换为旧格式
|
||||||
|
return [[str(chat_id_value), 1]]
|
||||||
|
|
||||||
|
|
||||||
|
def update_chat_id_list(chat_id_list: List[List[Any]], target_chat_id: str, increment: int = 1) -> List[List[Any]]:
|
||||||
|
"""
|
||||||
|
更新chat_id列表,如果target_chat_id已存在则增加计数,否则添加新条目
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id_list: 当前的chat_id列表,格式为 [[chat_id, count], ...]
|
||||||
|
target_chat_id: 要更新或添加的chat_id
|
||||||
|
increment: 增加的计数,默认为1
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[List[Any]]: 更新后的chat_id列表
|
||||||
|
"""
|
||||||
|
# 查找是否已存在该chat_id
|
||||||
|
found = False
|
||||||
|
for item in chat_id_list:
|
||||||
|
if isinstance(item, list) and len(item) >= 1 and str(item[0]) == str(target_chat_id):
|
||||||
|
# 找到匹配的chat_id,增加计数
|
||||||
|
if len(item) >= 2:
|
||||||
|
item[1] = (item[1] if isinstance(item[1], (int, float)) else 0) + increment
|
||||||
|
else:
|
||||||
|
item.append(increment)
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not found:
|
||||||
|
# 未找到,添加新条目
|
||||||
|
chat_id_list.append([target_chat_id, increment])
|
||||||
|
|
||||||
|
return chat_id_list
|
||||||
|
|
||||||
|
|
||||||
|
def chat_id_list_contains(chat_id_list: List[List[Any]], target_chat_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
检查chat_id列表中是否包含指定的chat_id
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id_list: chat_id列表,格式为 [[chat_id, count], ...]
|
||||||
|
target_chat_id: 要查找的chat_id
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: 如果包含则返回True
|
||||||
|
"""
|
||||||
|
for item in chat_id_list:
|
||||||
|
if isinstance(item, list) and len(item) >= 1 and str(item[0]) == str(target_chat_id):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def contains_bot_self_name(content: str) -> bool:
|
||||||
|
"""
|
||||||
|
判断词条是否包含机器人的昵称或别名
|
||||||
|
"""
|
||||||
|
if not content:
|
||||||
|
return False
|
||||||
|
|
||||||
|
bot_config = getattr(global_config, "bot", None)
|
||||||
|
if not bot_config:
|
||||||
|
return False
|
||||||
|
|
||||||
|
target = content.strip().lower()
|
||||||
|
nickname = str(getattr(bot_config, "nickname", "") or "").strip().lower()
|
||||||
|
alias_names = [str(alias or "").strip().lower() for alias in getattr(bot_config, "alias_names", []) or []]
|
||||||
|
|
||||||
|
candidates = [name for name in [nickname, *alias_names] if name]
|
||||||
|
|
||||||
|
return any(name in target for name in candidates if target)
|
||||||
|
|
||||||
|
|
||||||
|
def build_context_paragraph(messages: List[Any], center_index: int) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
构建包含中心消息上下文的段落(前3条+后3条),使用标准的 readable builder 输出
|
||||||
|
"""
|
||||||
|
if not messages or center_index < 0 or center_index >= len(messages):
|
||||||
|
return None
|
||||||
|
|
||||||
|
context_start = max(0, center_index - 3)
|
||||||
|
context_end = min(len(messages), center_index + 1 + 3)
|
||||||
|
context_messages = messages[context_start:context_end]
|
||||||
|
|
||||||
|
if not context_messages:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
paragraph = build_readable_messages(
|
||||||
|
messages=context_messages,
|
||||||
|
replace_bot_name=True,
|
||||||
|
timestamp_mode="relative",
|
||||||
|
read_mark=0.0,
|
||||||
|
truncate=False,
|
||||||
|
show_actions=False,
|
||||||
|
show_pic=True,
|
||||||
|
message_id_list=None,
|
||||||
|
remove_emoji_stickers=False,
|
||||||
|
pic_single=True,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"构建上下文段落失败: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
paragraph = paragraph.strip()
|
||||||
|
return paragraph or None
|
||||||
|
|
||||||
|
|
||||||
|
def is_bot_message(msg: Any) -> bool:
|
||||||
|
"""判断消息是否来自机器人自身"""
|
||||||
|
if msg is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
bot_config = getattr(global_config, "bot", None)
|
||||||
|
if not bot_config:
|
||||||
|
return False
|
||||||
|
|
||||||
|
platform = (
|
||||||
|
str(getattr(msg, "user_platform", "") or getattr(getattr(msg, "user_info", None), "platform", "") or "")
|
||||||
|
.strip()
|
||||||
|
.lower()
|
||||||
|
)
|
||||||
|
user_id = (
|
||||||
|
str(getattr(msg, "user_id", "") or getattr(getattr(msg, "user_info", None), "user_id", "") or "")
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
if not platform or not user_id:
|
||||||
|
return False
|
||||||
|
|
||||||
|
platform_accounts = {}
|
||||||
|
try:
|
||||||
|
platform_accounts = parse_platform_accounts(getattr(bot_config, "platforms", []) or [])
|
||||||
|
except Exception:
|
||||||
|
platform_accounts = {}
|
||||||
|
|
||||||
|
bot_accounts: Dict[str, str] = {}
|
||||||
|
qq_account = str(getattr(bot_config, "qq_account", "") or "").strip()
|
||||||
|
if qq_account:
|
||||||
|
bot_accounts["qq"] = qq_account
|
||||||
|
|
||||||
|
telegram_account = str(getattr(bot_config, "telegram_account", "") or "").strip()
|
||||||
|
if telegram_account:
|
||||||
|
bot_accounts["telegram"] = telegram_account
|
||||||
|
|
||||||
|
for plat, account in platform_accounts.items():
|
||||||
|
if account and plat not in bot_accounts:
|
||||||
|
bot_accounts[plat] = account
|
||||||
|
|
||||||
|
bot_account = bot_accounts.get(platform)
|
||||||
|
return bool(bot_account and user_id == bot_account)
|
||||||
|
|
@ -8,11 +8,12 @@ from src.common.logger import get_logger
|
||||||
from src.config.config import global_config, model_config
|
from src.config.config import global_config, model_config
|
||||||
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
from src.chat.utils.prompt_builder import Prompt, global_prompt_manager
|
||||||
from src.plugin_system.apis import llm_api
|
from src.plugin_system.apis import llm_api
|
||||||
from src.common.database.database_model import ThinkingBack
|
from src.common.database.database_model import ThinkingBack, Jargon
|
||||||
from json_repair import repair_json
|
from json_repair import repair_json
|
||||||
from src.memory_system.retrieval_tools import get_tool_registry, init_all_tools
|
from src.memory_system.retrieval_tools import get_tool_registry, init_all_tools
|
||||||
from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
|
from src.memory_system.retrieval_tools.query_lpmm_knowledge import query_lpmm_knowledge
|
||||||
from src.llm_models.payload_content.message import MessageBuilder, RoleType, Message
|
from src.llm_models.payload_content.message import MessageBuilder, RoleType, Message
|
||||||
|
from src.jargon.jargon_utils import parse_chat_id_list, chat_id_list_contains, contains_bot_self_name
|
||||||
|
|
||||||
logger = get_logger("memory_retrieval")
|
logger = get_logger("memory_retrieval")
|
||||||
|
|
||||||
|
|
@ -63,27 +64,23 @@ def init_memory_retrieval_prompt():
|
||||||
2. 是否有需要回忆的内容(比如"之前说过"、"上次"、"以前"等)
|
2. 是否有需要回忆的内容(比如"之前说过"、"上次"、"以前"等)
|
||||||
3. 是否有需要查找历史信息的问题
|
3. 是否有需要查找历史信息的问题
|
||||||
4. 是否有问题可以搜集信息帮助你聊天
|
4. 是否有问题可以搜集信息帮助你聊天
|
||||||
5. 对话中是否包含黑话、俚语、缩写等可能需要查询的概念
|
|
||||||
|
|
||||||
重要提示:
|
重要提示:
|
||||||
- **每次只能提出一个问题**,选择最需要查询的关键问题
|
- **每次只能提出一个问题**,选择最需要查询的关键问题
|
||||||
- 如果"最近已查询的问题和结果"中已经包含了类似的问题并得到了答案,请避免重复生成相同或相似的问题,不需要重复查询
|
- 如果"最近已查询的问题和结果"中已经包含了类似的问题并得到了答案,请避免重复生成相同或相似的问题,不需要重复查询
|
||||||
- 如果之前已经查询过某个问题但未找到答案,可以尝试用不同的方式提问或更具体的问题
|
- 如果之前已经查询过某个问题但未找到答案,可以尝试用不同的方式提问或更具体的问题
|
||||||
|
|
||||||
如果你认为需要从记忆中检索信息来回答,请:
|
如果你认为需要从记忆中检索信息来回答,请根据上下文提出**一个**最关键的问题来帮助你回复目标消息,放入"questions"字段
|
||||||
1. 识别对话中可能需要查询的概念(黑话/俚语/缩写/专有名词等关键词),放入"concepts"字段
|
|
||||||
2. 根据上下文提出**一个**最关键的问题来帮助你回复目标消息,放入"questions"字段
|
|
||||||
|
|
||||||
问题格式示例:
|
问题格式示例:
|
||||||
- "xxx在前几天干了什么"
|
- "xxx在前几天干了什么"
|
||||||
- "xxx是什么"
|
- "xxx是什么,在什么时候提到过?"
|
||||||
- "xxxx和xxx的关系是什么"
|
- "xxxx和xxx的关系是什么"
|
||||||
- "xxx在某个时间点发生了什么"
|
- "xxx在某个时间点发生了什么"
|
||||||
|
|
||||||
输出格式示例(需要检索时):
|
输出格式示例(需要检索时):
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"concepts": ["AAA", "BBB", "CCC"], #需要检索的概念列表(字符串数组),如果不需要检索概念则输出空数组[]
|
|
||||||
"questions": ["张三在前几天干了什么"] #问题数组(字符串数组),如果不需要检索记忆则输出空数组[],如果需要检索则只输出包含一个问题的数组
|
"questions": ["张三在前几天干了什么"] #问题数组(字符串数组),如果不需要检索记忆则输出空数组[],如果需要检索则只输出包含一个问题的数组
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
|
|
@ -91,7 +88,6 @@ def init_memory_retrieval_prompt():
|
||||||
输出格式示例(不需要检索时):
|
输出格式示例(不需要检索时):
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"concepts": [],
|
|
||||||
"questions": []
|
"questions": []
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
|
|
@ -280,6 +276,54 @@ async def _retrieve_concepts_with_jargon(concepts: List[str], chat_id: str) -> s
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _match_jargon_from_text(chat_text: str, chat_id: str) -> List[str]:
|
||||||
|
"""直接在聊天文本中匹配已知的jargon,返回出现过的黑话列表"""
|
||||||
|
if not chat_text or not chat_text.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
query = Jargon.select().where((Jargon.meaning.is_null(False)) & (Jargon.meaning != ""))
|
||||||
|
if global_config.jargon.all_global:
|
||||||
|
query = query.where(Jargon.is_global)
|
||||||
|
|
||||||
|
query = query.order_by(Jargon.count.desc())
|
||||||
|
|
||||||
|
query_time = time.time()
|
||||||
|
matched: Dict[str, None] = {}
|
||||||
|
|
||||||
|
for jargon in query:
|
||||||
|
content = (jargon.content or "").strip()
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if contains_bot_self_name(content):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not global_config.jargon.all_global and not jargon.is_global:
|
||||||
|
chat_id_list = parse_chat_id_list(jargon.chat_id)
|
||||||
|
if not chat_id_list_contains(chat_id_list, chat_id):
|
||||||
|
continue
|
||||||
|
|
||||||
|
pattern = re.escape(content)
|
||||||
|
if re.search(r"[\u4e00-\u9fff]", content):
|
||||||
|
search_pattern = pattern
|
||||||
|
else:
|
||||||
|
search_pattern = r"\b" + pattern + r"\b"
|
||||||
|
|
||||||
|
if re.search(search_pattern, chat_text, re.IGNORECASE):
|
||||||
|
matched[content] = None
|
||||||
|
|
||||||
|
end_time = time.time()
|
||||||
|
logger.info(
|
||||||
|
f"记忆检索黑话匹配: 查询耗时 {(query_time - start_time):.3f}s, "
|
||||||
|
f"匹配耗时 {(end_time - query_time):.3f}s, 总耗时 {(end_time - start_time):.3f}s, "
|
||||||
|
f"匹配到 {len(matched)} 个黑话"
|
||||||
|
)
|
||||||
|
|
||||||
|
return list(matched.keys())
|
||||||
|
|
||||||
|
|
||||||
async def _react_agent_solve_question(
|
async def _react_agent_solve_question(
|
||||||
question: str, chat_id: str, max_iterations: int = 5, timeout: float = 30.0, initial_info: str = ""
|
question: str, chat_id: str, max_iterations: int = 5, timeout: float = 30.0, initial_info: str = ""
|
||||||
) -> Tuple[bool, str, List[Dict[str, Any]], bool]:
|
) -> Tuple[bool, str, List[Dict[str, Any]], bool]:
|
||||||
|
|
@ -991,11 +1035,17 @@ async def build_memory_retrieval_prompt(
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# 解析概念列表和问题列表
|
# 解析概念列表和问题列表
|
||||||
concepts, questions = _parse_questions_json(response)
|
_, questions = _parse_questions_json(response)
|
||||||
logger.info(f"解析到 {len(concepts)} 个概念: {concepts}")
|
|
||||||
logger.info(f"解析到 {len(questions)} 个问题: {questions}")
|
logger.info(f"解析到 {len(questions)} 个问题: {questions}")
|
||||||
|
|
||||||
# 对概念进行jargon检索,作为初始信息
|
# 使用匹配逻辑自动识别聊天中的黑话概念
|
||||||
|
concepts = _match_jargon_from_text(message, chat_id)
|
||||||
|
if concepts:
|
||||||
|
logger.info(f"黑话匹配命中 {len(concepts)} 个概念: {concepts}")
|
||||||
|
else:
|
||||||
|
logger.info("黑话匹配未命中任何概念")
|
||||||
|
|
||||||
|
# 对匹配到的概念进行jargon检索,作为初始信息
|
||||||
initial_info = ""
|
initial_info = ""
|
||||||
if concepts:
|
if concepts:
|
||||||
logger.info(f"开始对 {len(concepts)} 个概念进行jargon检索")
|
logger.info(f"开始对 {len(concepts)} 个概念进行jargon检索")
|
||||||
|
|
@ -1026,8 +1076,6 @@ async def build_memory_retrieval_prompt(
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
logger.info(f"解析到 {len(questions)} 个问题: {questions}")
|
|
||||||
|
|
||||||
# 第二步:并行处理所有问题(使用配置的最大迭代次数/120秒超时)
|
# 第二步:并行处理所有问题(使用配置的最大迭代次数/120秒超时)
|
||||||
max_iterations = global_config.memory.max_agent_iterations
|
max_iterations = global_config.memory.max_agent_iterations
|
||||||
logger.info(f"问题数量: {len(questions)},设置最大迭代次数: {max_iterations},超时时间: 120秒")
|
logger.info(f"问题数量: {len(questions)},设置最大迭代次数: {max_iterations},超时时间: 120秒")
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[inner]
|
[inner]
|
||||||
version = "6.23.1"
|
version = "6.23.4"
|
||||||
|
|
||||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||||
#如果你想要修改配置文件,请递增version的值
|
#如果你想要修改配置文件,请递增version的值
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue