diff --git a/src/plugins/chat/utils.py b/src/plugins/chat/utils.py index b2583e86..d079a2eb 100644 --- a/src/plugins/chat/utils.py +++ b/src/plugins/chat/utils.py @@ -12,6 +12,7 @@ from ..models.utils_model import LLM_request from ..utils.typo_generator import ChineseTypoGenerator from .config import global_config from .message import Message +from ..moods.moods import MoodManager driver = get_driver() config = driver.config @@ -326,7 +327,7 @@ def random_remove_punctuation(text: str) -> str: def process_llm_response(text: str) -> List[str]: # processed_response = process_text_with_typos(content) - if len(text) > 300: + if len(text) > 200: print(f"回复过长 ({len(text)} 字符),返回默认回复") return ['懒得说'] # 处理长消息 @@ -336,30 +337,55 @@ def process_llm_response(text: str) -> List[str]: tone_error_rate=0.2, word_replace_rate=0.02 ) - typoed_text = typo_generator.create_typo_sentence(text)[0] - sentences = split_into_sentences_w_remove_punctuation(typoed_text) + split_sentences = split_into_sentences_w_remove_punctuation(text) + sentences = [] + for sentence in split_sentences: + typoed_text, typo_corrections = typo_generator.create_typo_sentence(sentence) + sentences.append(typoed_text) + if typo_corrections: + sentences.append(typo_corrections) # 检查分割后的消息数量是否过多(超过3条) - if len(sentences) > 4: + + if len(sentences) > 5: print(f"分割后消息数量过多 ({len(sentences)} 条),返回默认回复") return [f'{global_config.BOT_NICKNAME}不知道哦'] return sentences -def calculate_typing_time(input_string: str, chinese_time: float = 0.2, english_time: float = 0.1) -> float: +def calculate_typing_time(input_string: str, chinese_time: float = 0.4, english_time: float = 0.2) -> float: """ 计算输入字符串所需的时间,中文和英文字符有不同的输入时间 input_string (str): 输入的字符串 - chinese_time (float): 中文字符的输入时间,默认为0.3秒 - english_time (float): 英文字符的输入时间,默认为0.15秒 + chinese_time (float): 中文字符的输入时间,默认为0.2秒 + english_time (float): 英文字符的输入时间,默认为0.1秒 + + 特殊情况: + - 如果只有一个中文字符,将使用3倍的中文输入时间 + - 在所有输入结束后,额外加上回车时间0.3秒 """ + mood_manager = MoodManager.get_instance() + # 将0-1的唤醒度映射到-1到1 + mood_arousal = mood_manager.current_mood.arousal + # 映射到0.5到2倍的速度系数 + typing_speed_multiplier = 1.5 ** mood_arousal # 唤醒度为1时速度翻倍,为-1时速度减半 + chinese_time *= 1/typing_speed_multiplier + english_time *= 1/typing_speed_multiplier + # 计算中文字符数 + chinese_chars = sum(1 for char in input_string if '\u4e00' <= char <= '\u9fff') + + # 如果只有一个中文字符,使用3倍时间 + if chinese_chars == 1 and len(input_string.strip()) == 1: + return chinese_time * 3 + 0.3 # 加上回车时间 + + # 正常计算所有字符的输入时间 total_time = 0.0 for char in input_string: if '\u4e00' <= char <= '\u9fff': # 判断是否为中文字符 total_time += chinese_time else: # 其他字符(如英文) total_time += english_time - return total_time + return total_time + 0.3 # 加上回车时间 def cosine_similarity(v1, v2): diff --git a/src/plugins/utils/typo_generator.py b/src/plugins/utils/typo_generator.py index c743ec6e..aa72c387 100644 --- a/src/plugins/utils/typo_generator.py +++ b/src/plugins/utils/typo_generator.py @@ -284,10 +284,13 @@ class ChineseTypoGenerator: 返回: typo_sentence: 包含错别字的句子 - typo_info: 错别字信息列表 + correction_suggestion: 随机选择的一个纠正建议,返回正确的字/词 """ result = [] typo_info = [] + word_typos = [] # 记录词语错误对(错词,正确词) + char_typos = [] # 记录单字错误对(错字,正确字) + current_pos = 0 # 分词 words = self._segment_sentence(sentence) @@ -296,6 +299,7 @@ class ChineseTypoGenerator: # 如果是标点符号或空格,直接添加 if all(not self._is_chinese_char(c) for c in word): result.append(word) + current_pos += len(word) continue # 获取词语的拼音 @@ -316,6 +320,8 @@ class ChineseTypoGenerator: ' '.join(word_pinyin), ' '.join(self._get_word_pinyin(typo_word)), orig_freq, typo_freq)) + word_typos.append((typo_word, word)) # 记录(错词,正确词)对 + current_pos += len(typo_word) continue # 如果不进行整词替换,则进行单字替换 @@ -333,11 +339,15 @@ class ChineseTypoGenerator: result.append(typo_char) typo_py = pinyin(typo_char, style=Style.TONE3)[0][0] typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq)) + char_typos.append((typo_char, char)) # 记录(错字,正确字)对 + current_pos += 1 continue result.append(char) + current_pos += 1 else: # 处理多字词的单字替换 word_result = [] + word_start_pos = current_pos for i, (char, py) in enumerate(zip(word, word_pinyin)): # 词中的字替换概率降低 word_error_rate = self.error_rate * (0.7 ** (len(word) - 1)) @@ -353,11 +363,24 @@ class ChineseTypoGenerator: word_result.append(typo_char) typo_py = pinyin(typo_char, style=Style.TONE3)[0][0] typo_info.append((char, typo_char, py, typo_py, orig_freq, typo_freq)) + char_typos.append((typo_char, char)) # 记录(错字,正确字)对 continue word_result.append(char) result.append(''.join(word_result)) + current_pos += len(word) - return ''.join(result), typo_info + # 优先从词语错误中选择,如果没有则从单字错误中选择 + correction_suggestion = None + # 50%概率返回纠正建议 + if random.random() < 0.5: + if word_typos: + wrong_word, correct_word = random.choice(word_typos) + correction_suggestion = correct_word + elif char_typos: + wrong_char, correct_char = random.choice(char_typos) + correction_suggestion = correct_char + + return ''.join(result), correction_suggestion def format_typo_info(self, typo_info): """ @@ -419,16 +442,16 @@ def main(): # 创建包含错别字的句子 start_time = time.time() - typo_sentence, typo_info = typo_generator.create_typo_sentence(sentence) + typo_sentence, correction_suggestion = typo_generator.create_typo_sentence(sentence) # 打印结果 print("\n原句:", sentence) print("错字版:", typo_sentence) - # 打印错别字信息 - if typo_info: - print("\n错别字信息:") - print(typo_generator.format_typo_info(typo_info)) + # 打印纠正建议 + if correction_suggestion: + print("\n随机纠正建议:") + print(f"应该改为:{correction_suggestion}") # 计算并打印总耗时 end_time = time.time()