fix:修复并发导致的重复表达学习问题

pull/1397/head
SengokuCola 2025-12-02 12:47:54 +08:00
parent ec90951539
commit c562ebe97a
3 changed files with 364 additions and 323 deletions

View File

@ -1,5 +1,6 @@
from datetime import datetime from datetime import datetime
import time import time
import asyncio
from typing import Dict from typing import Dict
from src.chat.utils.chat_message_builder import ( from src.chat.utils.chat_message_builder import (
@ -46,6 +47,8 @@ class FrequencyControl:
self.frequency_model = LLMRequest( self.frequency_model = LLMRequest(
model_set=model_config.model_task_config.utils_small, request_type="frequency.adjust" model_set=model_config.model_task_config.utils_small, request_type="frequency.adjust"
) )
# 频率调整锁,防止并发执行
self._adjust_lock = asyncio.Lock()
def get_talk_frequency_adjust(self) -> float: def get_talk_frequency_adjust(self) -> float:
"""获取发言频率调整值""" """获取发言频率调整值"""
@ -56,19 +59,29 @@ class FrequencyControl:
self.talk_frequency_adjust = max(0.1, min(5.0, value)) self.talk_frequency_adjust = max(0.1, min(5.0, value))
async def trigger_frequency_adjust(self) -> None: async def trigger_frequency_adjust(self) -> None:
# 使用异步锁防止并发执行
async with self._adjust_lock:
# 在锁内检查,避免并发触发
current_time = time.time()
previous_adjust_time = self.last_frequency_adjust_time
msg_list = get_raw_msg_by_timestamp_with_chat( msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id, chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time, timestamp_start=previous_adjust_time,
timestamp_end=time.time(), timestamp_end=current_time,
) )
if time.time() - self.last_frequency_adjust_time < 160 or len(msg_list) <= 20: if current_time - previous_adjust_time < 160 or len(msg_list) <= 20:
return return
else:
# 立即更新调整时间,防止并发触发
self.last_frequency_adjust_time = current_time
try:
new_msg_list = get_raw_msg_by_timestamp_with_chat( new_msg_list = get_raw_msg_by_timestamp_with_chat(
chat_id=self.chat_id, chat_id=self.chat_id,
timestamp_start=self.last_frequency_adjust_time, timestamp_start=previous_adjust_time,
timestamp_end=time.time(), timestamp_end=current_time,
limit=20, limit=20,
limit_mode="latest", limit_mode="latest",
) )
@ -115,9 +128,9 @@ class FrequencyControl:
elif "过少" in response: elif "过少" in response:
logger.info(f"频率调整: 过少,调整值到{final_value_by_api}") logger.info(f"频率调整: 过少,调整值到{final_value_by_api}")
self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2)) self.talk_frequency_adjust = max(0.1, min(1.5, self.talk_frequency_adjust * 1.2))
self.last_frequency_adjust_time = time.time() except Exception as e:
else: logger.error(f"频率调整失败: {e}")
logger.info("频率调整response不符合要求取消本次调整") # 即使失败也保持时间戳更新,避免频繁重试
class FrequencyControlManager: class FrequencyControlManager:

View File

@ -2,6 +2,7 @@ import time
import json import json
import os import os
import re import re
import asyncio
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import traceback import traceback
from src.common.logger import get_logger from src.common.logger import get_logger
@ -91,6 +92,9 @@ class ExpressionLearner:
# 维护每个chat的上次学习时间 # 维护每个chat的上次学习时间
self.last_learning_time: float = time.time() self.last_learning_time: float = time.time()
# 学习锁,防止并发执行学习任务
self._learning_lock = asyncio.Lock()
# 学习参数 # 学习参数
_, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat( _, self.enable_learning, self.learning_intensity = global_config.expression.get_expression_config_for_chat(
self.chat_id self.chat_id
@ -139,16 +143,24 @@ class ExpressionLearner:
Returns: Returns:
bool: 是否成功触发学习 bool: 是否成功触发学习
""" """
# 使用异步锁防止并发执行
async with self._learning_lock:
# 在锁内检查,避免并发触发
# 如果锁被持有,其他协程会等待,但等待期间条件可能已变化,所以需要再次检查
if not self.should_trigger_learning(): if not self.should_trigger_learning():
return return
# 保存学习开始前的时间戳,用于获取消息范围
learning_start_timestamp = time.time()
previous_learning_time = self.last_learning_time
# 立即更新学习时间,防止并发触发
self.last_learning_time = learning_start_timestamp
try: try:
logger.info(f"在聊天流 {self.chat_name} 学习表达方式") logger.info(f"在聊天流 {self.chat_name} 学习表达方式")
# 学习语言风格 # 学习语言风格,传递学习开始前的时间戳
learnt_style = await self.learn_and_store(num=25) learnt_style = await self.learn_and_store(num=25, timestamp_start=previous_learning_time)
# 更新学习时间
self.last_learning_time = time.time()
if learnt_style: if learnt_style:
logger.info(f"聊天流 {self.chat_name} 表达学习完成") logger.info(f"聊天流 {self.chat_name} 表达学习完成")
@ -158,13 +170,18 @@ class ExpressionLearner:
except Exception as e: except Exception as e:
logger.error(f"为聊天流 {self.chat_name} 触发学习失败: {e}") logger.error(f"为聊天流 {self.chat_name} 触发学习失败: {e}")
traceback.print_exc() traceback.print_exc()
# 即使失败也保持时间戳更新,避免频繁重试
return return
async def learn_and_store(self, num: int = 10) -> List[Tuple[str, str, str]]: async def learn_and_store(self, num: int = 10, timestamp_start: Optional[float] = None) -> List[Tuple[str, str, str]]:
""" """
学习并存储表达方式 学习并存储表达方式
Args:
num: 学习数量
timestamp_start: 学习开始的时间戳如果为None则使用self.last_learning_time
""" """
learnt_expressions = await self.learn_expression(num) learnt_expressions = await self.learn_expression(num, timestamp_start=timestamp_start)
if learnt_expressions is None: if learnt_expressions is None:
logger.info("没有学习到表达风格") logger.info("没有学习到表达风格")
@ -374,18 +391,22 @@ class ExpressionLearner:
return matched_expressions return matched_expressions
async def learn_expression(self, num: int = 10) -> Optional[List[Tuple[str, str, str, str]]]: async def learn_expression(self, num: int = 10, timestamp_start: Optional[float] = None) -> Optional[List[Tuple[str, str, str, str]]]:
"""从指定聊天流学习表达方式 """从指定聊天流学习表达方式
Args: Args:
num: 学习数量 num: 学习数量
timestamp_start: 学习开始的时间戳如果为None则使用self.last_learning_time
""" """
current_time = time.time() current_time = time.time()
# 使用传入的时间戳如果没有则使用self.last_learning_time
start_timestamp = timestamp_start if timestamp_start is not None else self.last_learning_time
# 获取上次学习之后的消息 # 获取上次学习之后的消息
random_msg = get_raw_msg_by_timestamp_with_chat_inclusive( random_msg = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id, chat_id=self.chat_id,
timestamp_start=self.last_learning_time, timestamp_start=start_timestamp,
timestamp_end=current_time, timestamp_end=current_time,
limit=num, limit=num,
) )

View File

@ -183,6 +183,9 @@ class JargonMiner:
self.cache_limit = 100 self.cache_limit = 100
self.cache: OrderedDict[str, None] = OrderedDict() self.cache: OrderedDict[str, None] = OrderedDict()
# 黑话提取锁,防止并发执行
self._extraction_lock = asyncio.Lock()
def _add_to_cache(self, content: str) -> None: def _add_to_cache(self, content: str) -> None:
"""将提取到的黑话加入缓存保持LRU语义""" """将提取到的黑话加入缓存保持LRU语义"""
if not content: if not content:
@ -436,7 +439,10 @@ class JargonMiner:
return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning) return bool(recent_messages and len(recent_messages) >= self.min_messages_for_learning)
async def run_once(self) -> None: async def run_once(self) -> None:
# 使用异步锁防止并发执行
async with self._extraction_lock:
try: try:
# 在锁内检查,避免并发触发
if not self.should_trigger(): if not self.should_trigger():
return return
@ -448,6 +454,9 @@ class JargonMiner:
extraction_start_time = self.last_learning_time extraction_start_time = self.last_learning_time
extraction_end_time = time.time() extraction_end_time = time.time()
# 立即更新学习时间,防止并发触发
self.last_learning_time = extraction_end_time
# 拉取学习窗口内的消息 # 拉取学习窗口内的消息
messages = get_raw_msg_by_timestamp_with_chat_inclusive( messages = get_raw_msg_by_timestamp_with_chat_inclusive(
chat_id=self.chat_id, chat_id=self.chat_id,
@ -684,13 +693,11 @@ class JargonMiner:
# 输出格式化的结果使用logger.info会自动应用jargon模块的颜色 # 输出格式化的结果使用logger.info会自动应用jargon模块的颜色
logger.info(f"[{self.stream_name}]疑似黑话: {jargon_str}") logger.info(f"[{self.stream_name}]疑似黑话: {jargon_str}")
# 更新为本次提取的结束时间,确保不会重复提取相同的消息窗口
self.last_learning_time = extraction_end_time
if saved or updated: if saved or updated:
logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated}chat_id={self.chat_id}") logger.info(f"jargon写入: 新增 {saved} 条,更新 {updated}chat_id={self.chat_id}")
except Exception as e: except Exception as e:
logger.error(f"JargonMiner 运行失败: {e}") logger.error(f"JargonMiner 运行失败: {e}")
# 即使失败也保持时间戳更新,避免频繁重试
class JargonMinerManager: class JargonMinerManager: