fix：回复超长现可返回原文

2025-11-11 23:56:57 +08:00 · 2025-11-11 23:56:57 +08:00 · 250919f892
parent 33924e65c2
commit 250919f892
2 changed files with 51 additions and 11 deletions
--- a/src/chat/utils/chat_history_summarizer.py
+++ b/src/chat/utils/chat_history_summarizer.py
@ -334,20 +334,60 @@ class ChatHistorySummarizer:
            )
            
            # 解析JSON响应
-            # 尝试提取JSON部分
            import re
-            json_match = re.search(r'\{[^{}]*"theme"[^{}]*\}', response, re.DOTALL)
-            if json_match:
-                json_str = json_match.group(0)
-            else:
-                json_str = response.strip()
            
            # 移除可能的markdown代码块标记
-            json_str = re.sub(r'```json\s*', '', json_str)
-            json_str = re.sub(r'```\s*', '', json_str)
+            json_str = response.strip()
+            json_str = re.sub(r'^```json\s*', '', json_str, flags=re.MULTILINE)
+            json_str = re.sub(r'^```\s*', '', json_str, flags=re.MULTILINE)
            json_str = json_str.strip()
            
-            result = json.loads(json_str)
+            # 尝试找到JSON对象的开始和结束位置
+            # 查找第一个 { 和最后一个匹配的 }
+            start_idx = json_str.find('{')
+            if start_idx == -1:
+                raise ValueError("未找到JSON对象开始标记")
+            
+            # 从后往前查找最后一个 }
+            end_idx = json_str.rfind('}')
+            if end_idx == -1 or end_idx <= start_idx:
+                raise ValueError("未找到JSON对象结束标记")
+            
+            # 提取JSON字符串
+            json_str = json_str[start_idx:end_idx + 1]
+            
+            # 尝试解析JSON
+            try:
+                result = json.loads(json_str)
+            except json.JSONDecodeError:
+                # 如果解析失败，尝试修复字符串值中的中文引号
+                # 简单方法：将字符串值中的中文引号替换为转义的英文引号
+                # 使用状态机方法：遍历字符串，在字符串值内部替换中文引号
+                fixed_chars = []
+                in_string = False
+                escape_next = False
+                i = 0
+                while i < len(json_str):
+                    char = json_str[i]
+                    if escape_next:
+                        fixed_chars.append(char)
+                        escape_next = False
+                    elif char == '\\':
+                        fixed_chars.append(char)
+                        escape_next = True
+                    elif char == '"' and not escape_next:
+                        fixed_chars.append(char)
+                        in_string = not in_string
+                    elif in_string and (char == '"' or char == '"'):
+                        # 在字符串值内部，将中文引号替换为转义的英文引号
+                        fixed_chars.append('\\"')
+                    else:
+                        fixed_chars.append(char)
+                    i += 1
+                
+                json_str = ''.join(fixed_chars)
+                # 再次尝试解析
+                result = json.loads(json_str)
            
            theme = result.get("theme", "未命名对话")
            keywords = result.get("keywords", [])
--- a/src/chat/utils/utils.py
+++ b/src/chat/utils/utils.py
@ -406,8 +406,8 @@ def process_llm_response(text: str, enable_splitter: bool = True, enable_chinese

    if len(sentences) > max_sentence_num:
        if global_config.response_splitter.enable_overflow_return_all:
-            logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，合并后一次返回")
-            sentences = ["".join(sentences)]
+            logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，直接返回原文")
+            sentences = [cleaned_text]
        else:
            logger.warning(f"分割后消息数量过多 ({len(sentences)} 条)，返回默认回复")
            return [_get_random_default_reply()]