mirror of https://github.com/Mai-with-u/MaiBot.git
feat:表达方式根据不同thinking level进分别处理
parent
d2a4abc33f
commit
20c97b9cc9
|
|
@ -34,6 +34,8 @@ def init_prompt() -> None:
|
||||||
5. 例子仅供参考,请严格根据群聊内容总结!!!
|
5. 例子仅供参考,请严格根据群聊内容总结!!!
|
||||||
注意:总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
注意:总结成如下格式的规律,总结的内容要详细,但具有概括性:
|
||||||
例如:当"AAAAA"时,可以"BBBBB", AAAAA代表某个场景,不超过20个字。BBBBB代表对应的语言风格,特定句式或表达方式,不超过20个字。
|
例如:当"AAAAA"时,可以"BBBBB", AAAAA代表某个场景,不超过20个字。BBBBB代表对应的语言风格,特定句式或表达方式,不超过20个字。
|
||||||
|
表达方式在3-5个左右,不要超过10个
|
||||||
|
|
||||||
|
|
||||||
任务2:请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
任务2:请从上面这段聊天内容中提取"可能是黑话"的候选项(黑话/俚语/网络缩写/口头禅)。
|
||||||
- 必须为对话中真实出现过的短词或短语
|
- 必须为对话中真实出现过的短词或短语
|
||||||
|
|
@ -49,6 +51,7 @@ def init_prompt() -> None:
|
||||||
|
|
||||||
输出要求:
|
输出要求:
|
||||||
将表达方式,语言风格和黑话以 JSON 数组输出,每个元素为一个对象,结构如下(注意字段名):
|
将表达方式,语言风格和黑话以 JSON 数组输出,每个元素为一个对象,结构如下(注意字段名):
|
||||||
|
注意请不要输出重复内容,请对表达方式和黑话进行去重。
|
||||||
|
|
||||||
[
|
[
|
||||||
{{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}},
|
{{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}},
|
||||||
|
|
@ -132,6 +135,16 @@ class ExpressionLearner:
|
||||||
expressions, jargon_entries = self.parse_expression_response(response)
|
expressions, jargon_entries = self.parse_expression_response(response)
|
||||||
expressions = self._filter_self_reference_styles(expressions)
|
expressions = self._filter_self_reference_styles(expressions)
|
||||||
|
|
||||||
|
# 检查表达方式数量,如果超过10个则放弃本次表达学习
|
||||||
|
if len(expressions) > 10:
|
||||||
|
logger.info(f"表达方式提取数量超过10个(实际{len(expressions)}个),放弃本次表达学习")
|
||||||
|
expressions = []
|
||||||
|
|
||||||
|
# 检查黑话数量,如果超过30个则放弃本次黑话学习
|
||||||
|
if len(jargon_entries) > 30:
|
||||||
|
logger.info(f"黑话提取数量超过30个(实际{len(jargon_entries)}个),放弃本次黑话学习")
|
||||||
|
jargon_entries = []
|
||||||
|
|
||||||
# 处理黑话条目,路由到 jargon_miner(即使没有表达方式也要处理黑话)
|
# 处理黑话条目,路由到 jargon_miner(即使没有表达方式也要处理黑话)
|
||||||
if jargon_entries:
|
if jargon_entries:
|
||||||
await self._process_jargon_entries(jargon_entries, random_msg)
|
await self._process_jargon_entries(jargon_entries, random_msg)
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,65 @@ class ExpressionSelector:
|
||||||
return group_chat_ids
|
return group_chat_ids
|
||||||
return [chat_id]
|
return [chat_id]
|
||||||
|
|
||||||
|
def _select_expressions_simple(self, chat_id: str, max_num: int) -> Tuple[List[Dict[str, Any]], List[int]]:
|
||||||
|
"""
|
||||||
|
简单模式:只选择 count > 1 的项目,要求至少有10个才进行选择,随机选5个,不进行LLM选择
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_id: 聊天流ID
|
||||||
|
max_num: 最大选择数量(此参数在此模式下不使用,固定选择5个)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# 支持多chat_id合并抽选
|
||||||
|
related_chat_ids = self.get_related_chat_ids(chat_id)
|
||||||
|
|
||||||
|
# 查询所有相关chat_id的表达方式,排除 rejected=1 的,且只选择 count > 1 的
|
||||||
|
style_query = Expression.select().where(
|
||||||
|
(Expression.chat_id.in_(related_chat_ids))
|
||||||
|
& (~Expression.rejected)
|
||||||
|
& (Expression.count > 1)
|
||||||
|
)
|
||||||
|
|
||||||
|
style_exprs = [
|
||||||
|
{
|
||||||
|
"id": expr.id,
|
||||||
|
"situation": expr.situation,
|
||||||
|
"style": expr.style,
|
||||||
|
"last_active_time": expr.last_active_time,
|
||||||
|
"source_id": expr.chat_id,
|
||||||
|
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
|
||||||
|
"count": expr.count if getattr(expr, "count", None) is not None else 1,
|
||||||
|
"checked": expr.checked if getattr(expr, "checked", None) is not None else False,
|
||||||
|
}
|
||||||
|
for expr in style_query
|
||||||
|
]
|
||||||
|
|
||||||
|
# 要求至少有10个 count > 1 的表达方式才进行选择
|
||||||
|
min_required = 10
|
||||||
|
if len(style_exprs) < min_required:
|
||||||
|
logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_required} 个(实际 {len(style_exprs)} 个),不进行选择")
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
# 固定选择5个
|
||||||
|
select_count = 5
|
||||||
|
import random
|
||||||
|
selected_style = random.sample(style_exprs, select_count)
|
||||||
|
|
||||||
|
# 更新last_active_time
|
||||||
|
if selected_style:
|
||||||
|
self.update_expressions_last_active_time(selected_style)
|
||||||
|
|
||||||
|
selected_ids = [expr["id"] for expr in selected_style]
|
||||||
|
logger.debug(f"think_level=0: 从 {len(style_exprs)} 个 count>1 的表达方式中随机选择了 {len(selected_style)} 个")
|
||||||
|
return selected_style, selected_ids
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"简单模式选择表达方式失败: {e}")
|
||||||
|
return [], []
|
||||||
|
|
||||||
def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
|
def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
随机选择表达方式
|
随机选择表达方式
|
||||||
|
|
@ -164,6 +223,7 @@ class ExpressionSelector:
|
||||||
max_num: int = 10,
|
max_num: int = 10,
|
||||||
target_message: Optional[str] = None,
|
target_message: Optional[str] = None,
|
||||||
reply_reason: Optional[str] = None,
|
reply_reason: Optional[str] = None,
|
||||||
|
think_level: int = 1,
|
||||||
) -> Tuple[List[Dict[str, Any]], List[int]]:
|
) -> Tuple[List[Dict[str, Any]], List[int]]:
|
||||||
"""
|
"""
|
||||||
选择适合的表达方式(使用classic模式:随机选择+LLM选择)
|
选择适合的表达方式(使用classic模式:随机选择+LLM选择)
|
||||||
|
|
@ -174,6 +234,7 @@ class ExpressionSelector:
|
||||||
max_num: 最大选择数量
|
max_num: 最大选择数量
|
||||||
target_message: 目标消息内容
|
target_message: 目标消息内容
|
||||||
reply_reason: planner给出的回复理由
|
reply_reason: planner给出的回复理由
|
||||||
|
think_level: 思考级别,0/1/2
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
|
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
|
||||||
|
|
@ -184,8 +245,8 @@ class ExpressionSelector:
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
# 使用classic模式(随机选择+LLM选择)
|
# 使用classic模式(随机选择+LLM选择)
|
||||||
logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式")
|
logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式,think_level={think_level}")
|
||||||
return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason)
|
return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason, think_level)
|
||||||
|
|
||||||
async def _select_expressions_classic(
|
async def _select_expressions_classic(
|
||||||
self,
|
self,
|
||||||
|
|
@ -194,6 +255,7 @@ class ExpressionSelector:
|
||||||
max_num: int = 10,
|
max_num: int = 10,
|
||||||
target_message: Optional[str] = None,
|
target_message: Optional[str] = None,
|
||||||
reply_reason: Optional[str] = None,
|
reply_reason: Optional[str] = None,
|
||||||
|
think_level: int = 1,
|
||||||
) -> Tuple[List[Dict[str, Any]], List[int]]:
|
) -> Tuple[List[Dict[str, Any]], List[int]]:
|
||||||
"""
|
"""
|
||||||
classic模式:随机选择+LLM选择
|
classic模式:随机选择+LLM选择
|
||||||
|
|
@ -204,24 +266,88 @@ class ExpressionSelector:
|
||||||
max_num: 最大选择数量
|
max_num: 最大选择数量
|
||||||
target_message: 目标消息内容
|
target_message: 目标消息内容
|
||||||
reply_reason: planner给出的回复理由
|
reply_reason: planner给出的回复理由
|
||||||
|
think_level: 思考级别,0/1/2
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
|
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 1. 使用随机抽样选择表达方式
|
# think_level == 0: 只选择 count > 1 的项目,随机选10个,不进行LLM选择
|
||||||
style_exprs = self._random_expressions(chat_id, 20)
|
if think_level == 0:
|
||||||
|
return self._select_expressions_simple(chat_id, max_num)
|
||||||
|
|
||||||
|
# think_level == 1 或 2: 先选高count,再从所有表达方式中随机抽样
|
||||||
|
# 1. 获取所有表达方式并分离 count > 1 和 count <= 1 的
|
||||||
|
related_chat_ids = self.get_related_chat_ids(chat_id)
|
||||||
|
style_query = Expression.select().where(
|
||||||
|
(Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected)
|
||||||
|
)
|
||||||
|
|
||||||
|
all_style_exprs = [
|
||||||
|
{
|
||||||
|
"id": expr.id,
|
||||||
|
"situation": expr.situation,
|
||||||
|
"style": expr.style,
|
||||||
|
"last_active_time": expr.last_active_time,
|
||||||
|
"source_id": expr.chat_id,
|
||||||
|
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
|
||||||
|
"count": expr.count if getattr(expr, "count", None) is not None else 1,
|
||||||
|
"checked": expr.checked if getattr(expr, "checked", None) is not None else False,
|
||||||
|
}
|
||||||
|
for expr in style_query
|
||||||
|
]
|
||||||
|
|
||||||
if len(style_exprs) < 10:
|
# 分离 count > 1 和 count <= 1 的表达方式
|
||||||
logger.info(f"聊天流 {chat_id} 表达方式正在积累中")
|
high_count_exprs = [expr for expr in all_style_exprs if (expr.get("count", 1) or 1) > 1]
|
||||||
|
|
||||||
|
# 根据 think_level 设置要求
|
||||||
|
if think_level == 1:
|
||||||
|
# level 1: 需要至少10个高count和10个总数
|
||||||
|
min_high_count = 10
|
||||||
|
min_total_count = 10
|
||||||
|
select_high_count = 5
|
||||||
|
select_random_count = 5
|
||||||
|
else: # think_level == 2
|
||||||
|
# level 2: 需要至少20个高count和20个总数
|
||||||
|
min_high_count = 20
|
||||||
|
min_total_count = 20
|
||||||
|
select_high_count = 10
|
||||||
|
select_random_count = 10
|
||||||
|
|
||||||
|
# 检查数量要求
|
||||||
|
if len(high_count_exprs) < min_high_count:
|
||||||
|
logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_high_count} 个(实际 {len(high_count_exprs)} 个),不进行选择")
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
|
if len(all_style_exprs) < min_total_count:
|
||||||
|
logger.info(f"聊天流 {chat_id} 总表达方式不足 {min_total_count} 个(实际 {len(all_style_exprs)} 个),不进行选择")
|
||||||
|
return [], []
|
||||||
|
|
||||||
|
# 先选取高count的表达方式
|
||||||
|
selected_high = weighted_sample(high_count_exprs, min(len(high_count_exprs), select_high_count))
|
||||||
|
|
||||||
|
# 然后从所有表达方式中随机抽样(使用加权抽样)
|
||||||
|
remaining_num = select_random_count
|
||||||
|
selected_random = weighted_sample(all_style_exprs, min(len(all_style_exprs), remaining_num))
|
||||||
|
|
||||||
|
# 合并候选池(去重,避免重复)
|
||||||
|
candidate_exprs = selected_high.copy()
|
||||||
|
candidate_ids = {expr["id"] for expr in candidate_exprs}
|
||||||
|
for expr in selected_random:
|
||||||
|
if expr["id"] not in candidate_ids:
|
||||||
|
candidate_exprs.append(expr)
|
||||||
|
candidate_ids.add(expr["id"])
|
||||||
|
|
||||||
|
# 打乱顺序,避免高count的都在前面
|
||||||
|
import random
|
||||||
|
random.shuffle(candidate_exprs)
|
||||||
|
|
||||||
# 2. 构建所有表达方式的索引和情境列表
|
# 2. 构建所有表达方式的索引和情境列表
|
||||||
all_expressions: List[Dict[str, Any]] = []
|
all_expressions: List[Dict[str, Any]] = []
|
||||||
all_situations: List[str] = []
|
all_situations: List[str] = []
|
||||||
|
|
||||||
# 添加style表达方式
|
# 添加style表达方式
|
||||||
for expr in style_exprs:
|
for expr in candidate_exprs:
|
||||||
expr = expr.copy()
|
expr = expr.copy()
|
||||||
all_expressions.append(expr)
|
all_expressions.append(expr)
|
||||||
all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}")
|
all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}")
|
||||||
|
|
@ -233,7 +359,7 @@ class ExpressionSelector:
|
||||||
all_situations_str = "\n".join(all_situations)
|
all_situations_str = "\n".join(all_situations)
|
||||||
|
|
||||||
if target_message:
|
if target_message:
|
||||||
target_message_str = f",现在你想要对这条消息进行回复:“{target_message}”"
|
target_message_str = f",现在你想要对这条消息进行回复:\"{target_message}\""
|
||||||
target_message_extra_block = "4.考虑你要回复的目标消息"
|
target_message_extra_block = "4.考虑你要回复的目标消息"
|
||||||
else:
|
else:
|
||||||
target_message_str = ""
|
target_message_str = ""
|
||||||
|
|
|
||||||
|
|
@ -229,7 +229,7 @@ class DefaultReplyer:
|
||||||
return False, llm_response
|
return False, llm_response
|
||||||
|
|
||||||
async def build_expression_habits(
|
async def build_expression_habits(
|
||||||
self, chat_history: str, target: str, reply_reason: str = ""
|
self, chat_history: str, target: str, reply_reason: str = "", think_level: int = 1
|
||||||
) -> Tuple[str, List[int]]:
|
) -> Tuple[str, List[int]]:
|
||||||
# sourcery skip: for-append-to-extend
|
# sourcery skip: for-append-to-extend
|
||||||
"""构建表达习惯块
|
"""构建表达习惯块
|
||||||
|
|
@ -238,6 +238,7 @@ class DefaultReplyer:
|
||||||
chat_history: 聊天历史记录
|
chat_history: 聊天历史记录
|
||||||
target: 目标消息内容
|
target: 目标消息内容
|
||||||
reply_reason: planner给出的回复理由
|
reply_reason: planner给出的回复理由
|
||||||
|
think_level: 思考级别,0/1/2
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: 表达习惯信息字符串
|
str: 表达习惯信息字符串
|
||||||
|
|
@ -250,7 +251,7 @@ class DefaultReplyer:
|
||||||
# 使用从处理器传来的选中表达方式
|
# 使用从处理器传来的选中表达方式
|
||||||
# 使用模型预测选择表达方式
|
# 使用模型预测选择表达方式
|
||||||
selected_expressions, selected_ids = await expression_selector.select_suitable_expressions(
|
selected_expressions, selected_ids = await expression_selector.select_suitable_expressions(
|
||||||
self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason
|
self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason, think_level=think_level
|
||||||
)
|
)
|
||||||
|
|
||||||
if selected_expressions:
|
if selected_expressions:
|
||||||
|
|
@ -788,7 +789,7 @@ class DefaultReplyer:
|
||||||
# 并行执行八个构建任务(包括黑话解释)
|
# 并行执行八个构建任务(包括黑话解释)
|
||||||
task_results = await asyncio.gather(
|
task_results = await asyncio.gather(
|
||||||
self._time_and_run_task(
|
self._time_and_run_task(
|
||||||
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
|
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level), "expression_habits"
|
||||||
),
|
),
|
||||||
self._time_and_run_task(
|
self._time_and_run_task(
|
||||||
self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"
|
self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[inner]
|
[inner]
|
||||||
version = "7.1.0"
|
version = "7.1.2"
|
||||||
|
|
||||||
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
|
||||||
# 如果你想要修改配置文件,请递增version的值
|
# 如果你想要修改配置文件,请递增version的值
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue