mirror of https://github.com/Mai-with-u/MaiBot.git
improve 新款表情包系统
parent
42144ed943
commit
583c276c91
|
|
@ -12,6 +12,7 @@ from ..models.utils_model import LLM_request
|
||||||
from ..utils.typo_generator import ChineseTypoGenerator
|
from ..utils.typo_generator import ChineseTypoGenerator
|
||||||
from .config import global_config
|
from .config import global_config
|
||||||
from .message import Message
|
from .message import Message
|
||||||
|
from ..moods.moods import MoodManager
|
||||||
|
|
||||||
driver = get_driver()
|
driver = get_driver()
|
||||||
config = driver.config
|
config = driver.config
|
||||||
|
|
@ -326,7 +327,7 @@ def random_remove_punctuation(text: str) -> str:
|
||||||
|
|
||||||
def process_llm_response(text: str) -> List[str]:
|
def process_llm_response(text: str) -> List[str]:
|
||||||
# processed_response = process_text_with_typos(content)
|
# processed_response = process_text_with_typos(content)
|
||||||
if len(text) > 300:
|
if len(text) > 200:
|
||||||
print(f"回复过长 ({len(text)} 字符),返回默认回复")
|
print(f"回复过长 ({len(text)} 字符),返回默认回复")
|
||||||
return ['懒得说']
|
return ['懒得说']
|
||||||
# 处理长消息
|
# 处理长消息
|
||||||
|
|
@ -336,30 +337,55 @@ def process_llm_response(text: str) -> List[str]:
|
||||||
tone_error_rate=0.2,
|
tone_error_rate=0.2,
|
||||||
word_replace_rate=0.02
|
word_replace_rate=0.02
|
||||||
)
|
)
|
||||||
typoed_text = typo_generator.create_typo_sentence(text)[0]
|
split_sentences = split_into_sentences_w_remove_punctuation(text)
|
||||||
sentences = split_into_sentences_w_remove_punctuation(typoed_text)
|
sentences = []
|
||||||
|
for sentence in split_sentences:
|
||||||
|
typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence)
|
||||||
|
sentences.append(typoed_text)
|
||||||
|
if typo_corrections:
|
||||||
|
sentences.append(typo_corrections)
|
||||||
# 检查分割后的消息数量是否过多(超过3条)
|
# 检查分割后的消息数量是否过多(超过3条)
|
||||||
if len(sentences) > 4:
|
|
||||||
|
if len(sentences) > 5:
|
||||||
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复")
|
||||||
return [f'{global_config.BOT_NICKNAME}不知道哦']
|
return [f'{global_config.BOT_NICKNAME}不知道哦']
|
||||||
|
|
||||||
return sentences
|
return sentences
|
||||||
|
|
||||||
|
|
||||||
def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float:
|
def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float:
|
||||||
"""
|
"""
|
||||||
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
|
计算输入字符串所需的时间,中文和英文字符有不同的输入时间
|
||||||
input_string (str): 输入的字符串
|
input_string (str): 输入的字符串
|
||||||
chinese_time (float): 中文字符的输入时间,默认为0.3秒
|
chinese_time (float): 中文字符的输入时间,默认为0.2秒
|
||||||
english_time (float): 英文字符的输入时间,默认为0.15秒
|
english_time (float): 英文字符的输入时间,默认为0.1秒
|
||||||
|
|
||||||
|
特殊情况:
|
||||||
|
- 如果只有一个中文字符,将使用3倍的中文输入时间
|
||||||
|
- 在所有输入结束后,额外加上回车时间0.3秒
|
||||||
"""
|
"""
|
||||||
|
mood_manager = MoodManager.get_instance()
|
||||||
|
# 将0-1的唤醒度映射到-1到1
|
||||||
|
mood_arousal = mood_manager.current_mood.arousal
|
||||||
|
# 映射到0.5到2倍的速度系数
|
||||||
|
typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半
|
||||||
|
chinese_time *= 1/typing_speed_multiplier
|
||||||
|
english_time *= 1/typing_speed_multiplier
|
||||||
|
# 计算中文字符数
|
||||||
|
chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff')
|
||||||
|
|
||||||
|
# 如果只有一个中文字符,使用3倍时间
|
||||||
|
if chinese_chars == 1 and len(input_string.strip()) == 1:
|
||||||
|
return chinese_time * 3 + 0.3 # 加上回车时间
|
||||||
|
|
||||||
|
# 正常计算所有字符的输入时间
|
||||||
total_time = 0.0
|
total_time = 0.0
|
||||||
for char in input_string:
|
for char in input_string:
|
||||||
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
|
if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符
|
||||||
total_time += chinese_time
|
total_time += chinese_time
|
||||||
else: # 其他字符(如英文)
|
else: # 其他字符(如英文)
|
||||||
total_time += english_time
|
total_time += english_time
|
||||||
return total_time
|
return total_time + 0.3 # 加上回车时间
|
||||||
|
|
||||||
|
|
||||||
def cosine_similarity(v1, v2):
|
def cosine_similarity(v1, v2):
|
||||||
|
|
|
||||||
|
|
@ -284,10 +284,13 @@ class ChineseTypoGenerator:
|
||||||
|
|
||||||
返回:
|
返回:
|
||||||
typo_sentence: 包含错别字的句子
|
typo_sentence: 包含错别字的句子
|
||||||
typo_info: 错别字信息列表
|
correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词
|
||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
typo_info = []
|
typo_info = []
|
||||||
|
word_typos = [] # 记录词语错误对(错词,正确词)
|
||||||
|
char_typos = [] # 记录单字错误对(错字,正确字)
|
||||||
|
current_pos = 0
|
||||||
|
|
||||||
# 分词
|
# 分词
|
||||||
words = self._segment_sentence(sentence)
|
words = self._segment_sentence(sentence)
|
||||||
|
|
@ -296,6 +299,7 @@ class ChineseTypoGenerator:
|
||||||
# 如果是标点符号或空格,直接添加
|
# 如果是标点符号或空格,直接添加
|
||||||
if all(not self._is_chinese_char(c) for c in word):
|
if all(not self._is_chinese_char(c) for c in word):
|
||||||
result.append(word)
|
result.append(word)
|
||||||
|
current_pos += len(word)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 获取词语的拼音
|
# 获取词语的拼音
|
||||||
|
|
@ -316,6 +320,8 @@ class ChineseTypoGenerator:
|
||||||
' '.join(word_pinyin),
|
' '.join(word_pinyin),
|
||||||
' '.join(self._get_word_pinyin(typo_word)),
|
' '.join(self._get_word_pinyin(typo_word)),
|
||||||
orig_freq, typo_freq))
|
orig_freq, typo_freq))
|
||||||
|
word_typos.append((typo_word, word)) # 记录(错词,正确词)对
|
||||||
|
current_pos += len(typo_word)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 如果不进行整词替换,则进行单字替换
|
# 如果不进行整词替换,则进行单字替换
|
||||||
|
|
@ -333,11 +339,15 @@ class ChineseTypoGenerator:
|
||||||
result.append(typo_char)
|
result.append(typo_char)
|
||||||
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
||||||
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
||||||
|
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
|
||||||
|
current_pos += 1
|
||||||
continue
|
continue
|
||||||
result.append(char)
|
result.append(char)
|
||||||
|
current_pos += 1
|
||||||
else:
|
else:
|
||||||
# 处理多字词的单字替换
|
# 处理多字词的单字替换
|
||||||
word_result = []
|
word_result = []
|
||||||
|
word_start_pos = current_pos
|
||||||
for i, (char, py) in enumerate(zip(word, word_pinyin)):
|
for i, (char, py) in enumerate(zip(word, word_pinyin)):
|
||||||
# 词中的字替换概率降低
|
# 词中的字替换概率降低
|
||||||
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
|
word_error_rate = self.error_rate * (0.7 ** (len(word) - 1))
|
||||||
|
|
@ -353,11 +363,24 @@ class ChineseTypoGenerator:
|
||||||
word_result.append(typo_char)
|
word_result.append(typo_char)
|
||||||
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
typo_py = pinyin(typo_char, style=Style.TONE3)[0][0]
|
||||||
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq))
|
||||||
|
char_typos.append((typo_char, char)) # 记录(错字,正确字)对
|
||||||
continue
|
continue
|
||||||
word_result.append(char)
|
word_result.append(char)
|
||||||
result.append(''.join(word_result))
|
result.append(''.join(word_result))
|
||||||
|
current_pos += len(word)
|
||||||
|
|
||||||
return ''.join(result), typo_info
|
# 优先从词语错误中选择,如果没有则从单字错误中选择
|
||||||
|
correction_suggestion = None
|
||||||
|
# 50%概率返回纠正建议
|
||||||
|
if random.random() < 0.5:
|
||||||
|
if word_typos:
|
||||||
|
wrong_word, correct_word = random.choice(word_typos)
|
||||||
|
correction_suggestion = correct_word
|
||||||
|
elif char_typos:
|
||||||
|
wrong_char, correct_char = random.choice(char_typos)
|
||||||
|
correction_suggestion = correct_char
|
||||||
|
|
||||||
|
return ''.join(result), correction_suggestion
|
||||||
|
|
||||||
def format_typo_info(self, typo_info):
|
def format_typo_info(self, typo_info):
|
||||||
"""
|
"""
|
||||||
|
|
@ -419,16 +442,16 @@ def main():
|
||||||
|
|
||||||
# 创建包含错别字的句子
|
# 创建包含错别字的句子
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence)
|
typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence)
|
||||||
|
|
||||||
# 打印结果
|
# 打印结果
|
||||||
print("\n原句:", sentence)
|
print("\n原句:", sentence)
|
||||||
print("错字版:", typo_sentence)
|
print("错字版:", typo_sentence)
|
||||||
|
|
||||||
# 打印错别字信息
|
# 打印纠正建议
|
||||||
if typo_info:
|
if correction_suggestion:
|
||||||
print("\n错别字信息:")
|
print("\n随机纠正建议:")
|
||||||
print(typo_generator.format_typo_info(typo_info))
|
print(f"应该改为:{correction_suggestion}")
|
||||||
|
|
||||||
# 计算并打印总耗时
|
# 计算并打印总耗时
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue