From 65e9790eb7ad15d143bc2eed6b19594cd6e46815 Mon Sep 17 00:00:00 2001 From: Bakadax Date: Thu, 15 May 2025 15:03:21 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=86=E5=89=B2=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/chat/utils/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/chat/utils/utils.py b/src/chat/utils/utils.py index 50d69d40..1974d1c8 100644 --- a/src/chat/utils/utils.py +++ b/src/chat/utils/utils.py @@ -325,15 +325,15 @@ def split_into_sentences_w_remove_punctuation(text: str) -> list[str]: return [] if len_text < 12: - split_strength = 0.2 - elif len_text < 32: split_strength = 0.5 - else: + elif len_text < 32: split_strength = 0.7 + else: + split_strength = 0.9 merge_probability = 1.0 - split_strength if merge_probability == 1.0 and len(preliminary_final_sentences) > 1 : # 只有多个句子才合并 - merged_text = " ".join(preliminary_final_sentences).strip() + merged_text = ",".join(preliminary_final_sentences).strip() # 移除末尾的逗号(中英文) if merged_text.endswith(',') or merged_text.endswith(','): merged_text = merged_text[:-1].strip()