From 3c81102ffcc9ce77f5a1c9a0634e7736ca3a23b1 Mon Sep 17 00:00:00 2001
From: unknown <2994196236@qq.com>
Date: Sat, 6 Dec 2025 23:23:52 +0800
Subject: [PATCH] =?UTF-8?q?=E8=A1=A8=E8=BE=BE=E5=AD=A6=E4=B9=A0=E6=8F=90?=
 =?UTF-8?q?=E7=A4=BA=E8=AF=8D=E4=BC=98=E5=8C=96=E5=92=8C=E8=BF=87=E6=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/express/expression_learner.py | 73 +++++++++++++++++++++++++++++--
 1 file changed, 69 insertions(+), 4 deletions(-)

diff --git a/src/express/expression_learner.py b/src/express/expression_learner.py
index a1b59f65..1dd588a8 100644
--- a/src/express/expression_learner.py
+++ b/src/express/expression_learner.py
@@ -30,7 +30,7 @@ def init_prompt() -> None:
 请从上面这段群聊中概括除了人名为"SELF"之外的人的语言风格。
 每一行消息前面的方括号中的数字（如 [1]、[2]）是该行消息的唯一编号，请在输出中引用这些编号来标注“表达方式的来源行”。
 1. 只考虑文字，不要考虑表情包和图片
-2. 不要涉及具体的人名和事件以及话题内容，但是可以涉及具体名词
+2. 不要涉及具体的人名，但是可以涉及具体名词
 3. 思考有没有特殊的梗，一并总结成语言风格
 4. 必须符合"{global_config.expression.filtration_prompt}"的要求
 5. 例子仅供参考，请严格根据群聊内容总结!!!
@@ -320,9 +320,74 @@ class ExpressionLearner:
                     parsed = json.loads(repaired)
                 else:
                     parsed = repaired
-        except Exception:
-            logger.error(f"解析表达风格 JSON 失败，原始响应：{response}")
-            return []
+        except Exception as parse_error:
+            # 如果解析失败，尝试修复中文引号问题
+            # 使用状态机方法，在 JSON 字符串值内部将中文引号替换为转义的英文引号
+            try:
+                def fix_chinese_quotes_in_json(text):
+                    """使用状态机修复 JSON 字符串值中的中文引号"""
+                    result = []
+                    i = 0
+                    in_string = False
+                    escape_next = False
+                    
+                    while i < len(text):
+                        char = text[i]
+                        
+                        if escape_next:
+                            # 当前字符是转义字符后的字符，直接添加
+                            result.append(char)
+                            escape_next = False
+                            i += 1
+                            continue
+                        
+                        if char == '\\':
+                            # 转义字符
+                            result.append(char)
+                            escape_next = True
+                            i += 1
+                            continue
+                        
+                        if char == '"' and not escape_next:
+                            # 遇到英文引号，切换字符串状态
+                            in_string = not in_string
+                            result.append(char)
+                            i += 1
+                            continue
+                        
+                        if in_string:
+                            # 在字符串值内部，将中文引号替换为转义的英文引号
+                            if char == '"':  # 中文左引号
+                                result.append('\\"')
+                            elif char == '"':  # 中文右引号
+                                result.append('\\"')
+                            else:
+                                result.append(char)
+                        else:
+                            # 不在字符串内，直接添加
+                            result.append(char)
+                        
+                        i += 1
+                    
+                    return ''.join(result)
+                
+                fixed_raw = fix_chinese_quotes_in_json(raw)
+                
+                # 再次尝试解析
+                if fixed_raw.startswith("[") and fixed_raw.endswith("]"):
+                    parsed = json.loads(fixed_raw)
+                else:
+                    repaired = repair_json(fixed_raw)
+                    if isinstance(repaired, str):
+                        parsed = json.loads(repaired)
+                    else:
+                        parsed = repaired
+            except Exception as fix_error:
+                logger.error(f"解析表达风格 JSON 失败，初始错误: {type(parse_error).__name__}: {str(parse_error)}")
+                logger.error(f"修复中文引号后仍失败，错误: {type(fix_error).__name__}: {str(fix_error)}")
+                logger.error(f"解析表达风格 JSON 失败，原始响应：{response}")
+                logger.error(f"处理后的 JSON 字符串（前500字符）：{raw[:500]}")
+                return []
 
         if isinstance(parsed, dict):
             parsed_list = [parsed]