feat:表达方式根据不同thinking level进分别处理

pull/1421/head
SengokuCola 2025-12-07 23:38:20 +08:00
parent d2a4abc33f
commit 20c97b9cc9
4 changed files with 152 additions and 12 deletions

View File

@ -34,6 +34,8 @@ def init_prompt() -> None:
5. 例子仅供参考请严格根据群聊内容总结!!!
注意总结成如下格式的规律总结的内容要详细但具有概括性
例如"AAAAA"可以"BBBBB", AAAAA代表某个场景不超过20个字BBBBB代表对应的语言风格特定句式或表达方式不超过20个字
表达方式在3-5个左右不要超过10个
任务2请从上面这段聊天内容中提取"可能是黑话"的候选项黑话/俚语/网络缩写/口头禅
- 必须为对话中真实出现过的短词或短语
@ -49,6 +51,7 @@ def init_prompt() -> None:
输出要求
将表达方式语言风格和黑话以 JSON 数组输出每个元素为一个对象结构如下注意字段名
注意请不要输出重复内容请对表达方式和黑话进行去重
[
{{"situation": "AAAAA", "style": "BBBBB", "source_id": "3"}},
@ -132,6 +135,16 @@ class ExpressionLearner:
expressions, jargon_entries = self.parse_expression_response(response)
expressions = self._filter_self_reference_styles(expressions)
# 检查表达方式数量如果超过10个则放弃本次表达学习
if len(expressions) > 10:
logger.info(f"表达方式提取数量超过10个实际{len(expressions)}个),放弃本次表达学习")
expressions = []
# 检查黑话数量如果超过30个则放弃本次黑话学习
if len(jargon_entries) > 30:
logger.info(f"黑话提取数量超过30个实际{len(jargon_entries)}个),放弃本次黑话学习")
jargon_entries = []
# 处理黑话条目,路由到 jargon_miner即使没有表达方式也要处理黑话
if jargon_entries:
await self._process_jargon_entries(jargon_entries, random_msg)

View File

@ -111,6 +111,65 @@ class ExpressionSelector:
return group_chat_ids
return [chat_id]
def _select_expressions_simple(self, chat_id: str, max_num: int) -> Tuple[List[Dict[str, Any]], List[int]]:
"""
简单模式只选择 count > 1 的项目要求至少有10个才进行选择随机选5个不进行LLM选择
Args:
chat_id: 聊天流ID
max_num: 最大选择数量此参数在此模式下不使用固定选择5个
Returns:
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
"""
try:
# 支持多chat_id合并抽选
related_chat_ids = self.get_related_chat_ids(chat_id)
# 查询所有相关chat_id的表达方式排除 rejected=1 的,且只选择 count > 1 的
style_query = Expression.select().where(
(Expression.chat_id.in_(related_chat_ids))
& (~Expression.rejected)
& (Expression.count > 1)
)
style_exprs = [
{
"id": expr.id,
"situation": expr.situation,
"style": expr.style,
"last_active_time": expr.last_active_time,
"source_id": expr.chat_id,
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
"count": expr.count if getattr(expr, "count", None) is not None else 1,
"checked": expr.checked if getattr(expr, "checked", None) is not None else False,
}
for expr in style_query
]
# 要求至少有10个 count > 1 的表达方式才进行选择
min_required = 10
if len(style_exprs) < min_required:
logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_required} 个(实际 {len(style_exprs)} 个),不进行选择")
return [], []
# 固定选择5个
select_count = 5
import random
selected_style = random.sample(style_exprs, select_count)
# 更新last_active_time
if selected_style:
self.update_expressions_last_active_time(selected_style)
selected_ids = [expr["id"] for expr in selected_style]
logger.debug(f"think_level=0: 从 {len(style_exprs)} 个 count>1 的表达方式中随机选择了 {len(selected_style)}")
return selected_style, selected_ids
except Exception as e:
logger.error(f"简单模式选择表达方式失败: {e}")
return [], []
def _random_expressions(self, chat_id: str, total_num: int) -> List[Dict[str, Any]]:
"""
随机选择表达方式
@ -164,6 +223,7 @@ class ExpressionSelector:
max_num: int = 10,
target_message: Optional[str] = None,
reply_reason: Optional[str] = None,
think_level: int = 1,
) -> Tuple[List[Dict[str, Any]], List[int]]:
"""
选择适合的表达方式使用classic模式随机选择+LLM选择
@ -174,6 +234,7 @@ class ExpressionSelector:
max_num: 最大选择数量
target_message: 目标消息内容
reply_reason: planner给出的回复理由
think_level: 思考级别0/1/2
Returns:
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
@ -184,8 +245,8 @@ class ExpressionSelector:
return [], []
# 使用classic模式随机选择+LLM选择
logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式")
return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason)
logger.debug(f"使用classic模式为聊天流 {chat_id} 选择表达方式think_level={think_level}")
return await self._select_expressions_classic(chat_id, chat_info, max_num, target_message, reply_reason, think_level)
async def _select_expressions_classic(
self,
@ -194,6 +255,7 @@ class ExpressionSelector:
max_num: int = 10,
target_message: Optional[str] = None,
reply_reason: Optional[str] = None,
think_level: int = 1,
) -> Tuple[List[Dict[str, Any]], List[int]]:
"""
classic模式随机选择+LLM选择
@ -204,24 +266,88 @@ class ExpressionSelector:
max_num: 最大选择数量
target_message: 目标消息内容
reply_reason: planner给出的回复理由
think_level: 思考级别0/1/2
Returns:
Tuple[List[Dict[str, Any]], List[int]]: 选中的表达方式列表和ID列表
"""
try:
# 1. 使用随机抽样选择表达方式
style_exprs = self._random_expressions(chat_id, 20)
# think_level == 0: 只选择 count > 1 的项目随机选10个不进行LLM选择
if think_level == 0:
return self._select_expressions_simple(chat_id, max_num)
# think_level == 1 或 2: 先选高count再从所有表达方式中随机抽样
# 1. 获取所有表达方式并分离 count > 1 和 count <= 1 的
related_chat_ids = self.get_related_chat_ids(chat_id)
style_query = Expression.select().where(
(Expression.chat_id.in_(related_chat_ids)) & (~Expression.rejected)
)
all_style_exprs = [
{
"id": expr.id,
"situation": expr.situation,
"style": expr.style,
"last_active_time": expr.last_active_time,
"source_id": expr.chat_id,
"create_date": expr.create_date if expr.create_date is not None else expr.last_active_time,
"count": expr.count if getattr(expr, "count", None) is not None else 1,
"checked": expr.checked if getattr(expr, "checked", None) is not None else False,
}
for expr in style_query
]
if len(style_exprs) < 10:
logger.info(f"聊天流 {chat_id} 表达方式正在积累中")
# 分离 count > 1 和 count <= 1 的表达方式
high_count_exprs = [expr for expr in all_style_exprs if (expr.get("count", 1) or 1) > 1]
# 根据 think_level 设置要求
if think_level == 1:
# level 1: 需要至少10个高count和10个总数
min_high_count = 10
min_total_count = 10
select_high_count = 5
select_random_count = 5
else: # think_level == 2
# level 2: 需要至少20个高count和20个总数
min_high_count = 20
min_total_count = 20
select_high_count = 10
select_random_count = 10
# 检查数量要求
if len(high_count_exprs) < min_high_count:
logger.info(f"聊天流 {chat_id} count > 1 的表达方式不足 {min_high_count} 个(实际 {len(high_count_exprs)} 个),不进行选择")
return [], []
if len(all_style_exprs) < min_total_count:
logger.info(f"聊天流 {chat_id} 总表达方式不足 {min_total_count} 个(实际 {len(all_style_exprs)} 个),不进行选择")
return [], []
# 先选取高count的表达方式
selected_high = weighted_sample(high_count_exprs, min(len(high_count_exprs), select_high_count))
# 然后从所有表达方式中随机抽样(使用加权抽样)
remaining_num = select_random_count
selected_random = weighted_sample(all_style_exprs, min(len(all_style_exprs), remaining_num))
# 合并候选池(去重,避免重复)
candidate_exprs = selected_high.copy()
candidate_ids = {expr["id"] for expr in candidate_exprs}
for expr in selected_random:
if expr["id"] not in candidate_ids:
candidate_exprs.append(expr)
candidate_ids.add(expr["id"])
# 打乱顺序避免高count的都在前面
import random
random.shuffle(candidate_exprs)
# 2. 构建所有表达方式的索引和情境列表
all_expressions: List[Dict[str, Any]] = []
all_situations: List[str] = []
# 添加style表达方式
for expr in style_exprs:
for expr in candidate_exprs:
expr = expr.copy()
all_expressions.append(expr)
all_situations.append(f"{len(all_expressions)}.当 {expr['situation']} 时,使用 {expr['style']}")
@ -233,7 +359,7 @@ class ExpressionSelector:
all_situations_str = "\n".join(all_situations)
if target_message:
target_message_str = f",现在你想要对这条消息进行回复:{target_message}"
target_message_str = f",现在你想要对这条消息进行回复:\"{target_message}\""
target_message_extra_block = "4.考虑你要回复的目标消息"
else:
target_message_str = ""

View File

@ -229,7 +229,7 @@ class DefaultReplyer:
return False, llm_response
async def build_expression_habits(
self, chat_history: str, target: str, reply_reason: str = ""
self, chat_history: str, target: str, reply_reason: str = "", think_level: int = 1
) -> Tuple[str, List[int]]:
# sourcery skip: for-append-to-extend
"""构建表达习惯块
@ -238,6 +238,7 @@ class DefaultReplyer:
chat_history: 聊天历史记录
target: 目标消息内容
reply_reason: planner给出的回复理由
think_level: 思考级别0/1/2
Returns:
str: 表达习惯信息字符串
@ -250,7 +251,7 @@ class DefaultReplyer:
# 使用从处理器传来的选中表达方式
# 使用模型预测选择表达方式
selected_expressions, selected_ids = await expression_selector.select_suitable_expressions(
self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason
self.chat_stream.stream_id, chat_history, max_num=8, target_message=target, reply_reason=reply_reason, think_level=think_level
)
if selected_expressions:
@ -788,7 +789,7 @@ class DefaultReplyer:
# 并行执行八个构建任务(包括黑话解释)
task_results = await asyncio.gather(
self._time_and_run_task(
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason), "expression_habits"
self.build_expression_habits(chat_talking_prompt_short, target, reply_reason, think_level=think_level), "expression_habits"
),
self._time_and_run_task(
self.build_tool_info(chat_talking_prompt_short, sender, target, enable_tool=enable_tool), "tool_info"

View File

@ -1,5 +1,5 @@
[inner]
version = "7.1.0"
version = "7.1.2"
#----以下是给开发人员阅读的,如果你只是部署了麦麦,不需要阅读----
# 如果你想要修改配置文件请递增version的值