From ba9b9d26a2dc39ab83997b8e140f91b9e7a0e02c Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Sat, 27 Dec 2025 11:54:31 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E9=97=B4=E9=9A=94=E8=BF=87?= =?UTF-8?q?=E9=95=BF=E7=9A=84=E6=B6=88=E6=81=AF=E5=9C=A8=E5=9B=9E=E5=A4=8D?= =?UTF-8?q?=E5=99=A8=E4=B8=AD=E4=BC=9A=E7=89=B9=E6=AE=8A=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/analyze_evaluation_stats.py | 322 ++ scripts/evaluate_expressions_llm_v6.py | 67 +- scripts/expression_merge_simulation.py | 567 ---- scripts/expression_scatter_analysis.py | 342 --- scripts/expression_similarity_analysis.py | 559 ---- scripts/expression_stats.py | 196 -- scripts/manual_evaluation_results.json | 3236 +++++++++++++++++++++ src/chat/replyer/group_generator.py | 114 +- src/chat/replyer/private_generator.py | 1 + src/chat/utils/chat_message_builder.py | 32 + 10 files changed, 3653 insertions(+), 1783 deletions(-) create mode 100644 scripts/analyze_evaluation_stats.py delete mode 100644 scripts/expression_merge_simulation.py delete mode 100644 scripts/expression_scatter_analysis.py delete mode 100644 scripts/expression_similarity_analysis.py delete mode 100644 scripts/expression_stats.py create mode 100644 scripts/manual_evaluation_results.json diff --git a/scripts/analyze_evaluation_stats.py b/scripts/analyze_evaluation_stats.py new file mode 100644 index 00000000..ff7d7b3b --- /dev/null +++ b/scripts/analyze_evaluation_stats.py @@ -0,0 +1,322 @@ +""" +评估结果统计脚本 + +功能: +1. 扫描temp目录下所有JSON文件 +2. 分析每个文件的统计信息 +3. 输出详细的统计报告 +""" + +import json +import os +import sys +import glob +from collections import Counter, defaultdict +from datetime import datetime +from typing import Dict, List, Set, Tuple + +# 添加项目根目录到路径 +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +sys.path.insert(0, project_root) + +from src.common.logger import get_logger + +logger = get_logger("evaluation_stats_analyzer") + +# 评估结果文件路径 +TEMP_DIR = os.path.join(os.path.dirname(__file__), "temp") + + +def parse_datetime(dt_str: str) -> datetime | None: + """解析ISO格式的日期时间字符串""" + try: + return datetime.fromisoformat(dt_str) + except Exception: + return None + + +def analyze_single_file(file_path: str) -> Dict: + """ + 分析单个JSON文件的统计信息 + + Args: + file_path: JSON文件路径 + + Returns: + 统计信息字典 + """ + file_name = os.path.basename(file_path) + stats = { + "file_name": file_name, + "file_path": file_path, + "file_size": os.path.getsize(file_path), + "error": None, + "last_updated": None, + "total_count": 0, + "actual_count": 0, + "suitable_count": 0, + "unsuitable_count": 0, + "suitable_rate": 0.0, + "unique_pairs": 0, + "evaluators": Counter(), + "evaluation_dates": [], + "date_range": None, + "has_expression_id": False, + "has_reason": False, + "reason_count": 0, + } + + try: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + + # 基本信息 + stats["last_updated"] = data.get("last_updated") + stats["total_count"] = data.get("total_count", 0) + + results = data.get("manual_results", []) + stats["actual_count"] = len(results) + + if not results: + return stats + + # 统计通过/不通过 + suitable_count = sum(1 for r in results if r.get("suitable") is True) + unsuitable_count = sum(1 for r in results if r.get("suitable") is False) + stats["suitable_count"] = suitable_count + stats["unsuitable_count"] = unsuitable_count + stats["suitable_rate"] = (suitable_count / len(results) * 100) if results else 0.0 + + # 统计唯一的(situation, style)对 + pairs: Set[Tuple[str, str]] = set() + for r in results: + if "situation" in r and "style" in r: + pairs.add((r["situation"], r["style"])) + stats["unique_pairs"] = len(pairs) + + # 统计评估者 + for r in results: + evaluator = r.get("evaluator", "unknown") + stats["evaluators"][evaluator] += 1 + + # 统计评估时间 + evaluation_dates = [] + for r in results: + evaluated_at = r.get("evaluated_at") + if evaluated_at: + dt = parse_datetime(evaluated_at) + if dt: + evaluation_dates.append(dt) + + stats["evaluation_dates"] = evaluation_dates + if evaluation_dates: + min_date = min(evaluation_dates) + max_date = max(evaluation_dates) + stats["date_range"] = { + "start": min_date.isoformat(), + "end": max_date.isoformat(), + "duration_days": (max_date - min_date).days + 1 + } + + # 检查字段存在性 + stats["has_expression_id"] = any("expression_id" in r for r in results) + stats["has_reason"] = any(r.get("reason") for r in results) + stats["reason_count"] = sum(1 for r in results if r.get("reason")) + + except Exception as e: + stats["error"] = str(e) + logger.error(f"分析文件 {file_name} 时出错: {e}") + + return stats + + +def print_file_stats(stats: Dict, index: int = None): + """打印单个文件的统计信息""" + prefix = f"[{index}] " if index is not None else "" + print(f"\n{'=' * 80}") + print(f"{prefix}文件: {stats['file_name']}") + print(f"{'=' * 80}") + + if stats["error"]: + print(f"✗ 错误: {stats['error']}") + return + + print(f"文件路径: {stats['file_path']}") + print(f"文件大小: {stats['file_size']:,} 字节 ({stats['file_size'] / 1024:.2f} KB)") + + if stats["last_updated"]: + print(f"最后更新: {stats['last_updated']}") + + print(f"\n【记录统计】") + print(f" 文件中的 total_count: {stats['total_count']}") + print(f" 实际记录数: {stats['actual_count']}") + + if stats['total_count'] != stats['actual_count']: + diff = stats['total_count'] - stats['actual_count'] + print(f" ⚠️ 数量不一致,差值: {diff:+d}") + + print(f"\n【评估结果统计】") + print(f" 通过 (suitable=True): {stats['suitable_count']} 条 ({stats['suitable_rate']:.2f}%)") + print(f" 不通过 (suitable=False): {stats['unsuitable_count']} 条 ({100 - stats['suitable_rate']:.2f}%)") + + print(f"\n【唯一性统计】") + print(f" 唯一 (situation, style) 对: {stats['unique_pairs']} 条") + if stats['actual_count'] > 0: + duplicate_count = stats['actual_count'] - stats['unique_pairs'] + duplicate_rate = (duplicate_count / stats['actual_count'] * 100) if stats['actual_count'] > 0 else 0 + print(f" 重复记录: {duplicate_count} 条 ({duplicate_rate:.2f}%)") + + print(f"\n【评估者统计】") + if stats['evaluators']: + for evaluator, count in stats['evaluators'].most_common(): + rate = (count / stats['actual_count'] * 100) if stats['actual_count'] > 0 else 0 + print(f" {evaluator}: {count} 条 ({rate:.2f}%)") + else: + print(" 无评估者信息") + + print(f"\n【时间统计】") + if stats['date_range']: + print(f" 最早评估时间: {stats['date_range']['start']}") + print(f" 最晚评估时间: {stats['date_range']['end']}") + print(f" 评估时间跨度: {stats['date_range']['duration_days']} 天") + else: + print(" 无时间信息") + + print(f"\n【字段统计】") + print(f" 包含 expression_id: {'是' if stats['has_expression_id'] else '否'}") + print(f" 包含 reason: {'是' if stats['has_reason'] else '否'}") + if stats['has_reason']: + rate = (stats['reason_count'] / stats['actual_count'] * 100) if stats['actual_count'] > 0 else 0 + print(f" 有理由的记录: {stats['reason_count']} 条 ({rate:.2f}%)") + + +def print_summary(all_stats: List[Dict]): + """打印汇总统计信息""" + print(f"\n{'=' * 80}") + print("汇总统计") + print(f"{'=' * 80}") + + total_files = len(all_stats) + valid_files = [s for s in all_stats if not s.get("error")] + error_files = [s for s in all_stats if s.get("error")] + + print(f"\n【文件统计】") + print(f" 总文件数: {total_files}") + print(f" 成功解析: {len(valid_files)}") + print(f" 解析失败: {len(error_files)}") + + if error_files: + print(f"\n 失败文件列表:") + for stats in error_files: + print(f" - {stats['file_name']}: {stats['error']}") + + if not valid_files: + print("\n没有成功解析的文件") + return + + # 汇总记录统计 + total_records = sum(s['actual_count'] for s in valid_files) + total_suitable = sum(s['suitable_count'] for s in valid_files) + total_unsuitable = sum(s['unsuitable_count'] for s in valid_files) + total_unique_pairs = set() + + # 收集所有唯一的(situation, style)对 + for stats in valid_files: + try: + with open(stats['file_path'], "r", encoding="utf-8") as f: + data = json.load(f) + results = data.get("manual_results", []) + for r in results: + if "situation" in r and "style" in r: + total_unique_pairs.add((r["situation"], r["style"])) + except Exception: + pass + + print(f"\n【记录汇总】") + print(f" 总记录数: {total_records:,} 条") + print(f" 通过: {total_suitable:,} 条 ({total_suitable / total_records * 100:.2f}%)" if total_records > 0 else " 通过: 0 条") + print(f" 不通过: {total_unsuitable:,} 条 ({total_unsuitable / total_records * 100:.2f}%)" if total_records > 0 else " 不通过: 0 条") + print(f" 唯一 (situation, style) 对: {len(total_unique_pairs):,} 条") + + if total_records > 0: + duplicate_count = total_records - len(total_unique_pairs) + duplicate_rate = (duplicate_count / total_records * 100) if total_records > 0 else 0 + print(f" 重复记录: {duplicate_count:,} 条 ({duplicate_rate:.2f}%)") + + # 汇总评估者统计 + all_evaluators = Counter() + for stats in valid_files: + all_evaluators.update(stats['evaluators']) + + print(f"\n【评估者汇总】") + if all_evaluators: + for evaluator, count in all_evaluators.most_common(): + rate = (count / total_records * 100) if total_records > 0 else 0 + print(f" {evaluator}: {count:,} 条 ({rate:.2f}%)") + else: + print(" 无评估者信息") + + # 汇总时间范围 + all_dates = [] + for stats in valid_files: + all_dates.extend(stats['evaluation_dates']) + + if all_dates: + min_date = min(all_dates) + max_date = max(all_dates) + print(f"\n【时间汇总】") + print(f" 最早评估时间: {min_date.isoformat()}") + print(f" 最晚评估时间: {max_date.isoformat()}") + print(f" 总时间跨度: {(max_date - min_date).days + 1} 天") + + # 文件大小汇总 + total_size = sum(s['file_size'] for s in valid_files) + avg_size = total_size / len(valid_files) if valid_files else 0 + print(f"\n【文件大小汇总】") + print(f" 总大小: {total_size:,} 字节 ({total_size / 1024 / 1024:.2f} MB)") + print(f" 平均大小: {avg_size:,.0f} 字节 ({avg_size / 1024:.2f} KB)") + + +def main(): + """主函数""" + logger.info("=" * 80) + logger.info("开始分析评估结果统计信息") + logger.info("=" * 80) + + if not os.path.exists(TEMP_DIR): + print(f"\n✗ 错误:未找到temp目录: {TEMP_DIR}") + logger.error(f"未找到temp目录: {TEMP_DIR}") + return + + # 查找所有JSON文件 + json_files = glob.glob(os.path.join(TEMP_DIR, "*.json")) + + if not json_files: + print(f"\n✗ 错误:temp目录下未找到JSON文件: {TEMP_DIR}") + logger.error(f"temp目录下未找到JSON文件: {TEMP_DIR}") + return + + json_files.sort() # 按文件名排序 + + print(f"\n找到 {len(json_files)} 个JSON文件") + print("=" * 80) + + # 分析每个文件 + all_stats = [] + for i, json_file in enumerate(json_files, 1): + stats = analyze_single_file(json_file) + all_stats.append(stats) + print_file_stats(stats, index=i) + + # 打印汇总统计 + print_summary(all_stats) + + print(f"\n{'=' * 80}") + print("分析完成") + print(f"{'=' * 80}") + + +if __name__ == "__main__": + main() + + diff --git a/scripts/evaluate_expressions_llm_v6.py b/scripts/evaluate_expressions_llm_v6.py index 0a696e48..cb9a86ff 100644 --- a/scripts/evaluate_expressions_llm_v6.py +++ b/scripts/evaluate_expressions_llm_v6.py @@ -13,7 +13,8 @@ import json import random import sys import os -from typing import List, Dict +import glob +from typing import List, Dict, Set, Tuple # 添加项目根目录到路径 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) @@ -27,32 +28,66 @@ logger = get_logger("expression_evaluator_llm") # 评估结果文件路径 TEMP_DIR = os.path.join(os.path.dirname(__file__), "temp") -MANUAL_EVAL_FILE = os.path.join(TEMP_DIR, "manual_evaluation_results.json") def load_manual_results() -> List[Dict]: """ - 加载人工评估结果 + 加载人工评估结果(自动读取temp目录下所有JSON文件并合并) Returns: - 人工评估结果列表 + 人工评估结果列表(已去重) """ - if not os.path.exists(MANUAL_EVAL_FILE): - logger.error(f"未找到人工评估结果文件: {MANUAL_EVAL_FILE}") - print("\n✗ 错误:未找到人工评估结果文件") + if not os.path.exists(TEMP_DIR): + logger.error(f"未找到temp目录: {TEMP_DIR}") + print("\n✗ 错误:未找到temp目录") print(" 请先运行 evaluate_expressions_manual.py 进行人工评估") return [] - try: - with open(MANUAL_EVAL_FILE, "r", encoding="utf-8") as f: - data = json.load(f) - results = data.get("manual_results", []) - logger.info(f"成功加载 {len(results)} 条人工评估结果") - return results - except Exception as e: - logger.error(f"加载人工评估结果失败: {e}") - print(f"\n✗ 加载人工评估结果失败: {e}") + # 查找所有JSON文件 + json_files = glob.glob(os.path.join(TEMP_DIR, "*.json")) + + if not json_files: + logger.error(f"temp目录下未找到JSON文件: {TEMP_DIR}") + print("\n✗ 错误:temp目录下未找到JSON文件") + print(" 请先运行 evaluate_expressions_manual.py 进行人工评估") return [] + + logger.info(f"找到 {len(json_files)} 个JSON文件") + print(f"\n找到 {len(json_files)} 个JSON文件:") + for json_file in json_files: + print(f" - {os.path.basename(json_file)}") + + # 读取并合并所有JSON文件 + all_results = [] + seen_pairs: Set[Tuple[str, str]] = set() # 用于去重 + + for json_file in json_files: + try: + with open(json_file, "r", encoding="utf-8") as f: + data = json.load(f) + results = data.get("manual_results", []) + + # 去重:使用(situation, style)作为唯一标识 + for result in results: + if "situation" not in result or "style" not in result: + logger.warning(f"跳过无效数据(缺少必要字段): {result}") + continue + + pair = (result["situation"], result["style"]) + if pair not in seen_pairs: + seen_pairs.add(pair) + all_results.append(result) + + logger.info(f"从 {os.path.basename(json_file)} 加载了 {len(results)} 条结果") + except Exception as e: + logger.error(f"加载文件 {json_file} 失败: {e}") + print(f" 警告:加载文件 {os.path.basename(json_file)} 失败: {e}") + continue + + logger.info(f"成功合并 {len(all_results)} 条人工评估结果(去重后)") + print(f"\n✓ 成功合并 {len(all_results)} 条人工评估结果(已去重)") + + return all_results def create_evaluation_prompt(situation: str, style: str) -> str: diff --git a/scripts/expression_merge_simulation.py b/scripts/expression_merge_simulation.py deleted file mode 100644 index a4d9525a..00000000 --- a/scripts/expression_merge_simulation.py +++ /dev/null @@ -1,567 +0,0 @@ -""" -模拟 Expression 合并过程 - -用法: - python scripts/expression_merge_simulation.py - 或指定 chat_id: - python scripts/expression_merge_simulation.py --chat-id - 或指定相似度阈值: - python scripts/expression_merge_simulation.py --similarity-threshold 0.8 -""" - -import sys -import os -import json -import argparse -import asyncio -import random -from typing import List, Dict, Tuple, Optional -from collections import defaultdict -from datetime import datetime - -# Add project root to Python path -project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, project_root) - -# Import after setting up path (required for project imports) -from src.common.database.database_model import Expression, ChatStreams # noqa: E402 -from src.bw_learner.learner_utils import calculate_style_similarity # noqa: E402 -from src.llm_models.utils_model import LLMRequest # noqa: E402 -from src.config.config import model_config # noqa: E402 - - -def get_chat_name(chat_id: str) -> str: - """根据 chat_id 获取聊天名称""" - try: - chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id) - if chat_stream is None: - return f"未知聊天 ({chat_id[:8]}...)" - - if chat_stream.group_name: - return f"{chat_stream.group_name}" - elif chat_stream.user_nickname: - return f"{chat_stream.user_nickname}的私聊" - else: - return f"未知聊天 ({chat_id[:8]}...)" - except Exception: - return f"查询失败 ({chat_id[:8]}...)" - - -def parse_content_list(stored_list: Optional[str]) -> List[str]: - """解析 content_list JSON 字符串为列表""" - if not stored_list: - return [] - try: - data = json.loads(stored_list) - except json.JSONDecodeError: - return [] - return [str(item) for item in data if isinstance(item, str)] if isinstance(data, list) else [] - - -def parse_style_list(stored_list: Optional[str]) -> List[str]: - """解析 style_list JSON 字符串为列表""" - if not stored_list: - return [] - try: - data = json.loads(stored_list) - except json.JSONDecodeError: - return [] - return [str(item) for item in data if isinstance(item, str)] if isinstance(data, list) else [] - - -def find_exact_style_match( - expressions: List[Expression], - target_style: str, - chat_id: str, - exclude_ids: set -) -> Optional[Expression]: - """ - 查找具有完全匹配 style 的 Expression 记录 - 检查 style 字段和 style_list 中的每一项 - """ - for expr in expressions: - if expr.chat_id != chat_id or expr.id in exclude_ids: - continue - - # 检查 style 字段 - if expr.style == target_style: - return expr - - # 检查 style_list 中的每一项 - style_list = parse_style_list(expr.style_list) - if target_style in style_list: - return expr - - return None - - -def find_similar_style_expression( - expressions: List[Expression], - target_style: str, - chat_id: str, - similarity_threshold: float, - exclude_ids: set -) -> Optional[Tuple[Expression, float]]: - """ - 查找具有相似 style 的 Expression 记录 - 检查 style 字段和 style_list 中的每一项 - - Returns: - (Expression, similarity) 或 None - """ - best_match = None - best_similarity = 0.0 - - for expr in expressions: - if expr.chat_id != chat_id or expr.id in exclude_ids: - continue - - # 检查 style 字段 - similarity = calculate_style_similarity(target_style, expr.style) - if similarity >= similarity_threshold and similarity > best_similarity: - best_similarity = similarity - best_match = expr - - # 检查 style_list 中的每一项 - style_list = parse_style_list(expr.style_list) - for existing_style in style_list: - similarity = calculate_style_similarity(target_style, existing_style) - if similarity >= similarity_threshold and similarity > best_similarity: - best_similarity = similarity - best_match = expr - - if best_match: - return (best_match, best_similarity) - return None - - -async def compose_situation_text(content_list: List[str], summary_model: LLMRequest) -> str: - """组合 situation 文本,尝试使用 LLM 总结""" - sanitized = [c.strip() for c in content_list if c.strip()] - if not sanitized: - return "" - - if len(sanitized) == 1: - return sanitized[0] - - # 尝试使用 LLM 总结 - prompt = ( - "请阅读以下多个聊天情境描述,并将它们概括成一句简短的话," - "长度不超过20个字,保留共同特点:\n" - f"{chr(10).join(f'- {s}' for s in sanitized[-10:])}\n只输出概括内容。" - ) - - try: - summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2) - summary = summary.strip() - if summary: - return summary - except Exception as e: - print(f" ⚠️ LLM 总结 situation 失败: {e}") - - # 如果总结失败,返回用 "/" 连接的字符串 - return "/".join(sanitized) - - -async def compose_style_text(style_list: List[str], summary_model: LLMRequest) -> str: - """组合 style 文本,尝试使用 LLM 总结""" - sanitized = [s.strip() for s in style_list if s.strip()] - if not sanitized: - return "" - - if len(sanitized) == 1: - return sanitized[0] - - # 尝试使用 LLM 总结 - prompt = ( - "请阅读以下多个语言风格/表达方式,并将它们概括成一句简短的话," - "长度不超过20个字,保留共同特点:\n" - f"{chr(10).join(f'- {s}' for s in sanitized[-10:])}\n只输出概括内容。" - ) - - try: - summary, _ = await summary_model.generate_response_async(prompt, temperature=0.2) - - print(f"Prompt:{prompt} Summary:{summary}") - - summary = summary.strip() - if summary: - return summary - except Exception as e: - print(f" ⚠️ LLM 总结 style 失败: {e}") - - # 如果总结失败,返回第一个 - return sanitized[0] - - -async def simulate_merge( - expressions: List[Expression], - similarity_threshold: float = 0.75, - use_llm: bool = False, - max_samples: int = 10, -) -> Dict: - """ - 模拟合并过程 - - Args: - expressions: Expression 列表(从数据库读出的原始记录) - similarity_threshold: style 相似度阈值 - use_llm: 是否使用 LLM 进行实际总结 - max_samples: 最多随机抽取的 Expression 数量(为 0 或 None 时表示不限制) - - Returns: - 包含合并统计信息的字典 - """ - # 如果样本太多,随机抽取一部分进行模拟,避免运行时间过长 - if max_samples and len(expressions) > max_samples: - expressions = random.sample(expressions, max_samples) - - # 按 chat_id 分组 - expressions_by_chat = defaultdict(list) - for expr in expressions: - expressions_by_chat[expr.chat_id].append(expr) - - # 初始化 LLM 模型(如果需要) - summary_model = None - if use_llm: - try: - summary_model = LLMRequest( - model_set=model_config.model_task_config.tool_use, - request_type="expression.summary" - ) - print("✅ LLM 模型已初始化,将进行实际总结") - except Exception as e: - print(f"⚠️ LLM 模型初始化失败: {e},将跳过 LLM 总结") - use_llm = False - - merge_stats = { - "total_expressions": len(expressions), - "total_chats": len(expressions_by_chat), - "exact_matches": 0, - "similar_matches": 0, - "new_records": 0, - "merge_details": [], - "chat_stats": {}, - "use_llm": use_llm - } - - # 为每个 chat_id 模拟合并 - for chat_id, chat_expressions in expressions_by_chat.items(): - chat_name = get_chat_name(chat_id) - chat_stat = { - "chat_id": chat_id, - "chat_name": chat_name, - "total": len(chat_expressions), - "exact_matches": 0, - "similar_matches": 0, - "new_records": 0, - "merges": [] - } - - processed_ids = set() - - for expr in chat_expressions: - if expr.id in processed_ids: - continue - - target_style = expr.style - target_situation = expr.situation - - # 第一层:检查完全匹配 - exact_match = find_exact_style_match( - chat_expressions, - target_style, - chat_id, - {expr.id} - ) - - if exact_match: - # 完全匹配(不使用 LLM 总结) - # 模拟合并后的 content_list 和 style_list - target_content_list = parse_content_list(exact_match.content_list) - target_content_list.append(target_situation) - - target_style_list = parse_style_list(exact_match.style_list) - if exact_match.style and exact_match.style not in target_style_list: - target_style_list.append(exact_match.style) - if target_style not in target_style_list: - target_style_list.append(target_style) - - merge_info = { - "type": "exact", - "source_id": expr.id, - "target_id": exact_match.id, - "source_style": target_style, - "target_style": exact_match.style, - "source_situation": target_situation, - "target_situation": exact_match.situation, - "similarity": 1.0, - "merged_content_list": target_content_list, - "merged_style_list": target_style_list, - "merged_situation": exact_match.situation, # 完全匹配时保持原 situation - "merged_style": exact_match.style # 完全匹配时保持原 style - } - chat_stat["exact_matches"] += 1 - chat_stat["merges"].append(merge_info) - merge_stats["exact_matches"] += 1 - processed_ids.add(expr.id) - continue - - # 第二层:检查相似匹配 - similar_match = find_similar_style_expression( - chat_expressions, - target_style, - chat_id, - similarity_threshold, - {expr.id} - ) - - if similar_match: - match_expr, similarity = similar_match - # 相似匹配(使用 LLM 总结) - # 模拟合并后的 content_list 和 style_list - target_content_list = parse_content_list(match_expr.content_list) - target_content_list.append(target_situation) - - target_style_list = parse_style_list(match_expr.style_list) - if match_expr.style and match_expr.style not in target_style_list: - target_style_list.append(match_expr.style) - if target_style not in target_style_list: - target_style_list.append(target_style) - - # 使用 LLM 总结(如果启用) - merged_situation = match_expr.situation - merged_style = match_expr.style or target_style - - if use_llm and summary_model: - try: - merged_situation = await compose_situation_text(target_content_list, summary_model) - merged_style = await compose_style_text(target_style_list, summary_model) - except Exception as e: - print(f" ⚠️ 处理记录 {expr.id} 时 LLM 总结失败: {e}") - # 如果总结失败,使用 fallback - merged_situation = "/".join([c.strip() for c in target_content_list if c.strip()]) or match_expr.situation - merged_style = target_style_list[0] if target_style_list else (match_expr.style or target_style) - else: - # 不使用 LLM 时,使用简单拼接 - merged_situation = "/".join([c.strip() for c in target_content_list if c.strip()]) or match_expr.situation - merged_style = target_style_list[0] if target_style_list else (match_expr.style or target_style) - - merge_info = { - "type": "similar", - "source_id": expr.id, - "target_id": match_expr.id, - "source_style": target_style, - "target_style": match_expr.style, - "source_situation": target_situation, - "target_situation": match_expr.situation, - "similarity": similarity, - "merged_content_list": target_content_list, - "merged_style_list": target_style_list, - "merged_situation": merged_situation, - "merged_style": merged_style, - "llm_used": use_llm and summary_model is not None - } - chat_stat["similar_matches"] += 1 - chat_stat["merges"].append(merge_info) - merge_stats["similar_matches"] += 1 - processed_ids.add(expr.id) - continue - - # 没有匹配,作为新记录 - chat_stat["new_records"] += 1 - merge_stats["new_records"] += 1 - processed_ids.add(expr.id) - - merge_stats["chat_stats"][chat_id] = chat_stat - merge_stats["merge_details"].extend(chat_stat["merges"]) - - return merge_stats - - -def print_merge_results(stats: Dict, show_details: bool = True, max_details: int = 50): - """打印合并结果""" - print("\n" + "=" * 80) - print("Expression 合并模拟结果") - print("=" * 80) - - print("\n📊 总体统计:") - print(f" 总 Expression 数: {stats['total_expressions']}") - print(f" 总聊天数: {stats['total_chats']}") - print(f" 完全匹配合并: {stats['exact_matches']}") - print(f" 相似匹配合并: {stats['similar_matches']}") - print(f" 新记录(无匹配): {stats['new_records']}") - if stats.get('use_llm'): - print(" LLM 总结: 已启用") - else: - print(" LLM 总结: 未启用(仅模拟)") - - total_merges = stats['exact_matches'] + stats['similar_matches'] - if stats['total_expressions'] > 0: - merge_ratio = (total_merges / stats['total_expressions']) * 100 - print(f" 合并比例: {merge_ratio:.1f}%") - - # 按聊天分组显示 - print("\n📋 按聊天分组统计:") - for chat_id, chat_stat in stats['chat_stats'].items(): - print(f"\n {chat_stat['chat_name']} ({chat_id[:8]}...):") - print(f" 总数: {chat_stat['total']}") - print(f" 完全匹配: {chat_stat['exact_matches']}") - print(f" 相似匹配: {chat_stat['similar_matches']}") - print(f" 新记录: {chat_stat['new_records']}") - - # 显示合并详情 - if show_details and stats['merge_details']: - print(f"\n📝 合并详情 (显示前 {min(max_details, len(stats['merge_details']))} 条):") - print() - - for idx, merge in enumerate(stats['merge_details'][:max_details], 1): - merge_type = "完全匹配" if merge['type'] == 'exact' else f"相似匹配 (相似度: {merge['similarity']:.3f})" - print(f" {idx}. {merge_type}") - print(f" 源记录 ID: {merge['source_id']}") - print(f" 目标记录 ID: {merge['target_id']}") - print(f" 源 Style: {merge['source_style'][:50]}") - print(f" 目标 Style: {merge['target_style'][:50]}") - print(f" 源 Situation: {merge['source_situation'][:50]}") - print(f" 目标 Situation: {merge['target_situation'][:50]}") - - # 显示合并后的结果 - if 'merged_situation' in merge: - print(f" → 合并后 Situation: {merge['merged_situation'][:50]}") - if 'merged_style' in merge: - print(f" → 合并后 Style: {merge['merged_style'][:50]}") - if merge.get('llm_used'): - print(" → LLM 总结: 已使用") - elif merge['type'] == 'similar': - print(" → LLM 总结: 未使用(模拟模式)") - - # 显示合并后的列表 - if 'merged_content_list' in merge and len(merge['merged_content_list']) > 1: - print(f" → Content List ({len(merge['merged_content_list'])} 项): {', '.join(merge['merged_content_list'][:3])}") - if len(merge['merged_content_list']) > 3: - print(f" ... 还有 {len(merge['merged_content_list']) - 3} 项") - if 'merged_style_list' in merge and len(merge['merged_style_list']) > 1: - print(f" → Style List ({len(merge['merged_style_list'])} 项): {', '.join(merge['merged_style_list'][:3])}") - if len(merge['merged_style_list']) > 3: - print(f" ... 还有 {len(merge['merged_style_list']) - 3} 项") - print() - - if len(stats['merge_details']) > max_details: - print(f" ... 还有 {len(stats['merge_details']) - max_details} 条合并记录未显示") - - -def main(): - """主函数""" - parser = argparse.ArgumentParser(description="模拟 Expression 合并过程") - parser.add_argument( - "--chat-id", - type=str, - default=None, - help="指定要分析的 chat_id(不指定则分析所有)" - ) - parser.add_argument( - "--similarity-threshold", - type=float, - default=0.75, - help="相似度阈值 (0-1, 默认: 0.75)" - ) - parser.add_argument( - "--no-details", - action="store_true", - help="不显示详细信息,只显示统计" - ) - parser.add_argument( - "--max-details", - type=int, - default=50, - help="最多显示的合并详情数 (默认: 50)" - ) - parser.add_argument( - "--output", - type=str, - default=None, - help="输出文件路径 (默认: 自动生成带时间戳的文件)" - ) - parser.add_argument( - "--use-llm", - action="store_true", - help="启用 LLM 进行实际总结(默认: 仅模拟,不调用 LLM)" - ) - parser.add_argument( - "--max-samples", - type=int, - default=10, - help="最多随机抽取的 Expression 数量 (默认: 10,设置为 0 表示不限制)" - ) - - args = parser.parse_args() - - # 验证阈值 - if not 0 <= args.similarity_threshold <= 1: - print("错误: similarity-threshold 必须在 0-1 之间") - return - - # 确定输出文件路径 - if args.output: - output_file = args.output - else: - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - output_dir = os.path.join(project_root, "data", "temp") - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, f"expression_merge_simulation_{timestamp}.txt") - - # 查询 Expression 记录 - print("正在从数据库加载Expression数据...") - try: - if args.chat_id: - expressions = list(Expression.select().where(Expression.chat_id == args.chat_id)) - print(f"✅ 成功加载 {len(expressions)} 条Expression记录 (chat_id: {args.chat_id})") - else: - expressions = list(Expression.select()) - print(f"✅ 成功加载 {len(expressions)} 条Expression记录") - except Exception as e: - print(f"❌ 加载数据失败: {e}") - return - - if not expressions: - print("❌ 数据库中没有找到Expression记录") - return - - # 执行合并模拟 - print(f"\n正在模拟合并过程(相似度阈值: {args.similarity_threshold},最大样本数: {args.max_samples})...") - if args.use_llm: - print("⚠️ 已启用 LLM 总结,将进行实际的 API 调用") - else: - print("ℹ️ 未启用 LLM 总结,仅进行模拟(使用 --use-llm 启用实际 LLM 调用)") - - stats = asyncio.run( - simulate_merge( - expressions, - similarity_threshold=args.similarity_threshold, - use_llm=args.use_llm, - max_samples=args.max_samples, - ) - ) - - # 输出结果 - original_stdout = sys.stdout - try: - with open(output_file, "w", encoding="utf-8") as f: - sys.stdout = f - print_merge_results(stats, show_details=not args.no_details, max_details=args.max_details) - sys.stdout = original_stdout - - # 同时在控制台输出 - print_merge_results(stats, show_details=not args.no_details, max_details=args.max_details) - - except Exception as e: - sys.stdout = original_stdout - print(f"❌ 写入文件失败: {e}") - return - - print(f"\n✅ 模拟结果已保存到: {output_file}") - - -if __name__ == "__main__": - main() - diff --git a/scripts/expression_scatter_analysis.py b/scripts/expression_scatter_analysis.py deleted file mode 100644 index 3cb22f71..00000000 --- a/scripts/expression_scatter_analysis.py +++ /dev/null @@ -1,342 +0,0 @@ -import sys -import os -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -from datetime import datetime -from typing import List, Tuple -import numpy as np -from src.common.database.database_model import Expression, ChatStreams - -# Add project root to Python path -project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, project_root) - - -# 设置中文字体 -plt.rcParams["font.sans-serif"] = ["SimHei", "Microsoft YaHei", "DejaVu Sans"] -plt.rcParams["axes.unicode_minus"] = False - - -def get_chat_name(chat_id: str) -> str: - """Get chat name from chat_id by querying ChatStreams table directly""" - try: - chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id) - if chat_stream is None: - return f"未知聊天 ({chat_id})" - - if chat_stream.group_name: - return f"{chat_stream.group_name} ({chat_id})" - elif chat_stream.user_nickname: - return f"{chat_stream.user_nickname}的私聊 ({chat_id})" - else: - return f"未知聊天 ({chat_id})" - except Exception: - return f"查询失败 ({chat_id})" - - -def get_expression_data() -> List[Tuple[float, float, str, str]]: - """获取Expression表中的数据,返回(create_date, count, chat_id, expression_type)的列表""" - expressions = Expression.select() - data = [] - - for expr in expressions: - # 如果create_date为空,跳过该记录 - if expr.create_date is None: - continue - - data.append((expr.create_date, expr.count, expr.chat_id, expr.type)) - - return data - - -def create_scatter_plot(data: List[Tuple[float, float, str, str]], save_path: str = None): - """创建散点图""" - if not data: - print("没有找到有效的表达式数据") - return - - # 分离数据 - create_dates = [item[0] for item in data] - counts = [item[1] for item in data] - _chat_ids = [item[2] for item in data] - _expression_types = [item[3] for item in data] - - # 转换时间戳为datetime对象 - dates = [datetime.fromtimestamp(ts) for ts in create_dates] - - # 计算时间跨度,自动调整显示格式 - time_span = max(dates) - min(dates) - if time_span.days > 30: # 超过30天,按月显示 - date_format = "%Y-%m-%d" - major_locator = mdates.MonthLocator() - minor_locator = mdates.DayLocator(interval=7) - elif time_span.days > 7: # 超过7天,按天显示 - date_format = "%Y-%m-%d" - major_locator = mdates.DayLocator(interval=1) - minor_locator = mdates.HourLocator(interval=12) - else: # 7天内,按小时显示 - date_format = "%Y-%m-%d %H:%M" - major_locator = mdates.HourLocator(interval=6) - minor_locator = mdates.HourLocator(interval=1) - - # 创建图形 - fig, ax = plt.subplots(figsize=(12, 8)) - - # 创建散点图 - scatter = ax.scatter(dates, counts, alpha=0.6, s=30, c=range(len(dates)), cmap="viridis") - - # 设置标签和标题 - ax.set_xlabel("创建日期 (Create Date)", fontsize=12) - ax.set_ylabel("使用次数 (Count)", fontsize=12) - ax.set_title("表达式使用次数随时间分布散点图", fontsize=14, fontweight="bold") - - # 设置x轴日期格式 - 根据时间跨度自动调整 - ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format)) - ax.xaxis.set_major_locator(major_locator) - ax.xaxis.set_minor_locator(minor_locator) - plt.xticks(rotation=45) - - # 添加网格 - ax.grid(True, alpha=0.3) - - # 添加颜色条 - cbar = plt.colorbar(scatter) - cbar.set_label("数据点顺序", fontsize=10) - - # 调整布局 - plt.tight_layout() - - # 显示统计信息 - print("\n=== 数据统计 ===") - print(f"总数据点数量: {len(data)}") - print(f"时间范围: {min(dates).strftime('%Y-%m-%d %H:%M:%S')} 到 {max(dates).strftime('%Y-%m-%d %H:%M:%S')}") - print(f"使用次数范围: {min(counts):.1f} 到 {max(counts):.1f}") - print(f"平均使用次数: {np.mean(counts):.2f}") - print(f"中位数使用次数: {np.median(counts):.2f}") - - # 保存图片 - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"\n散点图已保存到: {save_path}") - - # 显示图片 - plt.show() - - -def create_grouped_scatter_plot(data: List[Tuple[float, float, str, str]], save_path: str = None): - """创建按聊天分组的散点图""" - if not data: - print("没有找到有效的表达式数据") - return - - # 按chat_id分组 - chat_groups = {} - for item in data: - chat_id = item[2] - if chat_id not in chat_groups: - chat_groups[chat_id] = [] - chat_groups[chat_id].append(item) - - # 计算时间跨度,自动调整显示格式 - all_dates = [datetime.fromtimestamp(item[0]) for item in data] - time_span = max(all_dates) - min(all_dates) - if time_span.days > 30: # 超过30天,按月显示 - date_format = "%Y-%m-%d" - major_locator = mdates.MonthLocator() - minor_locator = mdates.DayLocator(interval=7) - elif time_span.days > 7: # 超过7天,按天显示 - date_format = "%Y-%m-%d" - major_locator = mdates.DayLocator(interval=1) - minor_locator = mdates.HourLocator(interval=12) - else: # 7天内,按小时显示 - date_format = "%Y-%m-%d %H:%M" - major_locator = mdates.HourLocator(interval=6) - minor_locator = mdates.HourLocator(interval=1) - - # 创建图形 - fig, ax = plt.subplots(figsize=(14, 10)) - - # 为每个聊天分配不同颜色 - colors = plt.cm.Set3(np.linspace(0, 1, len(chat_groups))) - - for i, (chat_id, chat_data) in enumerate(chat_groups.items()): - create_dates = [item[0] for item in chat_data] - counts = [item[1] for item in chat_data] - dates = [datetime.fromtimestamp(ts) for ts in create_dates] - - chat_name = get_chat_name(chat_id) - # 截断过长的聊天名称 - display_name = chat_name[:20] + "..." if len(chat_name) > 20 else chat_name - - ax.scatter( - dates, - counts, - alpha=0.7, - s=40, - c=[colors[i]], - label=f"{display_name} ({len(chat_data)}个)", - edgecolors="black", - linewidth=0.5, - ) - - # 设置标签和标题 - ax.set_xlabel("创建日期 (Create Date)", fontsize=12) - ax.set_ylabel("使用次数 (Count)", fontsize=12) - ax.set_title("按聊天分组的表达式使用次数散点图", fontsize=14, fontweight="bold") - - # 设置x轴日期格式 - 根据时间跨度自动调整 - ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format)) - ax.xaxis.set_major_locator(major_locator) - ax.xaxis.set_minor_locator(minor_locator) - plt.xticks(rotation=45) - - # 添加图例 - ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=8) - - # 添加网格 - ax.grid(True, alpha=0.3) - - # 调整布局 - plt.tight_layout() - - # 显示统计信息 - print("\n=== 分组统计 ===") - print(f"总聊天数量: {len(chat_groups)}") - for chat_id, chat_data in chat_groups.items(): - chat_name = get_chat_name(chat_id) - counts = [item[1] for item in chat_data] - print(f"{chat_name}: {len(chat_data)}个表达式, 平均使用次数: {np.mean(counts):.2f}") - - # 保存图片 - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"\n分组散点图已保存到: {save_path}") - - # 显示图片 - plt.show() - - -def create_type_scatter_plot(data: List[Tuple[float, float, str, str]], save_path: str = None): - """创建按表达式类型分组的散点图""" - if not data: - print("没有找到有效的表达式数据") - return - - # 按type分组 - type_groups = {} - for item in data: - expr_type = item[3] - if expr_type not in type_groups: - type_groups[expr_type] = [] - type_groups[expr_type].append(item) - - # 计算时间跨度,自动调整显示格式 - all_dates = [datetime.fromtimestamp(item[0]) for item in data] - time_span = max(all_dates) - min(all_dates) - if time_span.days > 30: # 超过30天,按月显示 - date_format = "%Y-%m-%d" - major_locator = mdates.MonthLocator() - minor_locator = mdates.DayLocator(interval=7) - elif time_span.days > 7: # 超过7天,按天显示 - date_format = "%Y-%m-%d" - major_locator = mdates.DayLocator(interval=1) - minor_locator = mdates.HourLocator(interval=12) - else: # 7天内,按小时显示 - date_format = "%Y-%m-%d %H:%M" - major_locator = mdates.HourLocator(interval=6) - minor_locator = mdates.HourLocator(interval=1) - - # 创建图形 - fig, ax = plt.subplots(figsize=(12, 8)) - - # 为每个类型分配不同颜色 - colors = plt.cm.tab10(np.linspace(0, 1, len(type_groups))) - - for i, (expr_type, type_data) in enumerate(type_groups.items()): - create_dates = [item[0] for item in type_data] - counts = [item[1] for item in type_data] - dates = [datetime.fromtimestamp(ts) for ts in create_dates] - - ax.scatter( - dates, - counts, - alpha=0.7, - s=40, - c=[colors[i]], - label=f"{expr_type} ({len(type_data)}个)", - edgecolors="black", - linewidth=0.5, - ) - - # 设置标签和标题 - ax.set_xlabel("创建日期 (Create Date)", fontsize=12) - ax.set_ylabel("使用次数 (Count)", fontsize=12) - ax.set_title("按表达式类型分组的散点图", fontsize=14, fontweight="bold") - - # 设置x轴日期格式 - 根据时间跨度自动调整 - ax.xaxis.set_major_formatter(mdates.DateFormatter(date_format)) - ax.xaxis.set_major_locator(major_locator) - ax.xaxis.set_minor_locator(minor_locator) - plt.xticks(rotation=45) - - # 添加图例 - ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left") - - # 添加网格 - ax.grid(True, alpha=0.3) - - # 调整布局 - plt.tight_layout() - - # 显示统计信息 - print("\n=== 类型统计 ===") - for expr_type, type_data in type_groups.items(): - counts = [item[1] for item in type_data] - print(f"{expr_type}: {len(type_data)}个表达式, 平均使用次数: {np.mean(counts):.2f}") - - # 保存图片 - if save_path: - plt.savefig(save_path, dpi=300, bbox_inches="tight") - print(f"\n类型散点图已保存到: {save_path}") - - # 显示图片 - plt.show() - - -def main(): - """主函数""" - print("开始分析表达式数据...") - - # 获取数据 - data = get_expression_data() - - if not data: - print("没有找到有效的表达式数据(create_date不为空的数据)") - return - - print(f"找到 {len(data)} 条有效数据") - - # 创建输出目录 - output_dir = os.path.join(project_root, "data", "temp") - os.makedirs(output_dir, exist_ok=True) - - # 生成时间戳用于文件名 - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - # 1. 创建基础散点图 - print("\n1. 创建基础散点图...") - create_scatter_plot(data, os.path.join(output_dir, f"expression_scatter_{timestamp}.png")) - - # 2. 创建按聊天分组的散点图 - print("\n2. 创建按聊天分组的散点图...") - create_grouped_scatter_plot(data, os.path.join(output_dir, f"expression_scatter_by_chat_{timestamp}.png")) - - # 3. 创建按类型分组的散点图 - print("\n3. 创建按类型分组的散点图...") - create_type_scatter_plot(data, os.path.join(output_dir, f"expression_scatter_by_type_{timestamp}.png")) - - print("\n分析完成!") - - -if __name__ == "__main__": - main() diff --git a/scripts/expression_similarity_analysis.py b/scripts/expression_similarity_analysis.py deleted file mode 100644 index aa1b149e..00000000 --- a/scripts/expression_similarity_analysis.py +++ /dev/null @@ -1,559 +0,0 @@ -""" -分析expression库中situation和style的相似度 - -用法: - python scripts/expression_similarity_analysis.py - 或指定阈值: - python scripts/expression_similarity_analysis.py --situation-threshold 0.8 --style-threshold 0.7 -""" - -import sys -import os -import argparse -from typing import List, Tuple -from collections import defaultdict -from difflib import SequenceMatcher -from datetime import datetime - -# Add project root to Python path -project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, project_root) - -# Import after setting up path (required for project imports) -from src.common.database.database_model import Expression, ChatStreams # noqa: E402 -from src.config.config import global_config # noqa: E402 -from src.chat.message_receive.chat_stream import get_chat_manager # noqa: E402 - - -class TeeOutput: - """同时输出到控制台和文件的类""" - def __init__(self, file_path: str): - self.file = open(file_path, "w", encoding="utf-8") - self.console = sys.stdout - - def write(self, text: str): - """写入文本到控制台和文件""" - self.console.write(text) - self.file.write(text) - self.file.flush() # 立即刷新到文件 - - def flush(self): - """刷新输出""" - self.console.flush() - self.file.flush() - - def close(self): - """关闭文件""" - if self.file: - self.file.close() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - return False - - -def _parse_stream_config_to_chat_id(stream_config_str: str) -> str | None: - """ - 解析'platform:id:type'为chat_id,直接复用 ChatManager 的逻辑 - """ - try: - parts = stream_config_str.split(":") - if len(parts) != 3: - return None - platform = parts[0] - id_str = parts[1] - stream_type = parts[2] - is_group = stream_type == "group" - return get_chat_manager().get_stream_id(platform, str(id_str), is_group=is_group) - except Exception: - return None - - -def build_chat_id_groups() -> dict[str, set[str]]: - """ - 根据expression_groups配置,构建chat_id到相关chat_id集合的映射 - - Returns: - dict: {chat_id: set of related chat_ids (including itself)} - """ - groups = global_config.expression.expression_groups - chat_id_groups: dict[str, set[str]] = {} - - # 检查是否存在全局共享组(包含"*"的组) - global_group_exists = any("*" in group for group in groups) - - if global_group_exists: - # 如果存在全局共享组,收集所有配置中的chat_id - all_chat_ids = set() - for group in groups: - for stream_config_str in group: - if stream_config_str == "*": - continue - if chat_id_candidate := _parse_stream_config_to_chat_id(stream_config_str): - all_chat_ids.add(chat_id_candidate) - - # 所有chat_id都互相相关 - for chat_id in all_chat_ids: - chat_id_groups[chat_id] = all_chat_ids.copy() - else: - # 处理普通组 - for group in groups: - group_chat_ids = set() - for stream_config_str in group: - if chat_id_candidate := _parse_stream_config_to_chat_id(stream_config_str): - group_chat_ids.add(chat_id_candidate) - - # 组内的所有chat_id都互相相关 - for chat_id in group_chat_ids: - if chat_id not in chat_id_groups: - chat_id_groups[chat_id] = set() - chat_id_groups[chat_id].update(group_chat_ids) - - # 确保每个chat_id至少包含自身 - for chat_id in chat_id_groups: - chat_id_groups[chat_id].add(chat_id) - - return chat_id_groups - - -def are_chat_ids_related(chat_id1: str, chat_id2: str, chat_id_groups: dict[str, set[str]]) -> bool: - """ - 判断两个chat_id是否相关(相同或同组) - - Args: - chat_id1: 第一个chat_id - chat_id2: 第二个chat_id - chat_id_groups: chat_id到相关chat_id集合的映射 - - Returns: - bool: 如果两个chat_id相同或同组,返回True - """ - if chat_id1 == chat_id2: - return True - - # 如果chat_id1在映射中,检查chat_id2是否在其相关集合中 - if chat_id1 in chat_id_groups: - return chat_id2 in chat_id_groups[chat_id1] - - # 如果chat_id1不在映射中,说明它不在任何组中,只与自己相关 - return False - - -def get_chat_name(chat_id: str) -> str: - """根据 chat_id 获取聊天名称""" - try: - chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id) - if chat_stream is None: - return f"未知聊天 ({chat_id[:8]}...)" - - if chat_stream.group_name: - return f"{chat_stream.group_name}" - elif chat_stream.user_nickname: - return f"{chat_stream.user_nickname}的私聊" - else: - return f"未知聊天 ({chat_id[:8]}...)" - except Exception: - return f"查询失败 ({chat_id[:8]}...)" - - -def text_similarity(text1: str, text2: str) -> float: - """ - 计算两个文本的相似度 - 使用SequenceMatcher计算相似度,返回0-1之间的值 - 在计算前会移除"使用"和"句式"这两个词 - """ - if not text1 or not text2: - return 0.0 - - # 移除"使用"和"句式"这两个词 - def remove_ignored_words(text: str) -> str: - """移除需要忽略的词""" - text = text.replace("使用", "") - text = text.replace("句式", "") - return text.strip() - - cleaned_text1 = remove_ignored_words(text1) - cleaned_text2 = remove_ignored_words(text2) - - # 如果清理后文本为空,返回0 - if not cleaned_text1 or not cleaned_text2: - return 0.0 - - return SequenceMatcher(None, cleaned_text1, cleaned_text2).ratio() - - -def find_similar_pairs( - expressions: List[Expression], - field_name: str, - threshold: float, - max_pairs: int = None -) -> List[Tuple[int, int, float, str, str]]: - """ - 找出相似的expression对 - - Args: - expressions: Expression对象列表 - field_name: 要比较的字段名 ('situation' 或 'style') - threshold: 相似度阈值 (0-1) - max_pairs: 最多返回的对数,None表示返回所有 - - Returns: - List of (index1, index2, similarity, text1, text2) tuples - """ - similar_pairs = [] - n = len(expressions) - - print(f"正在分析 {field_name} 字段的相似度...") - print(f"总共需要比较 {n * (n - 1) // 2} 对...") - - for i in range(n): - if (i + 1) % 100 == 0: - print(f" 已处理 {i + 1}/{n} 个项目...") - - expr1 = expressions[i] - text1 = getattr(expr1, field_name, "") - - for j in range(i + 1, n): - expr2 = expressions[j] - text2 = getattr(expr2, field_name, "") - - similarity = text_similarity(text1, text2) - - if similarity >= threshold: - similar_pairs.append((i, j, similarity, text1, text2)) - - # 按相似度降序排序 - similar_pairs.sort(key=lambda x: x[2], reverse=True) - - if max_pairs: - similar_pairs = similar_pairs[:max_pairs] - - return similar_pairs - - -def group_similar_items( - expressions: List[Expression], - field_name: str, - threshold: float, - chat_id_groups: dict[str, set[str]] -) -> List[List[int]]: - """ - 将相似的expression分组(仅比较相同chat_id或同组的项目) - - Args: - expressions: Expression对象列表 - field_name: 要比较的字段名 ('situation' 或 'style') - threshold: 相似度阈值 (0-1) - chat_id_groups: chat_id到相关chat_id集合的映射 - - Returns: - List of groups, each group is a list of indices - """ - n = len(expressions) - # 使用并查集的思想来分组 - parent = list(range(n)) - - def find(x): - if parent[x] != x: - parent[x] = find(parent[x]) - return parent[x] - - def union(x, y): - px, py = find(x), find(y) - if px != py: - parent[px] = py - - print(f"正在对 {field_name} 字段进行分组(仅比较相同chat_id或同组的项目)...") - - # 统计需要比较的对数 - total_pairs = 0 - for i in range(n): - for j in range(i + 1, n): - if are_chat_ids_related(expressions[i].chat_id, expressions[j].chat_id, chat_id_groups): - total_pairs += 1 - - print(f"总共需要比较 {total_pairs} 对(已过滤不同chat_id且不同组的项目)...") - - compared_pairs = 0 - for i in range(n): - if (i + 1) % 100 == 0: - print(f" 已处理 {i + 1}/{n} 个项目...") - - expr1 = expressions[i] - text1 = getattr(expr1, field_name, "") - - for j in range(i + 1, n): - expr2 = expressions[j] - - # 只比较相同chat_id或同组的项目 - if not are_chat_ids_related(expr1.chat_id, expr2.chat_id, chat_id_groups): - continue - - compared_pairs += 1 - text2 = getattr(expr2, field_name, "") - - similarity = text_similarity(text1, text2) - - if similarity >= threshold: - union(i, j) - - # 收集分组 - groups = defaultdict(list) - for i in range(n): - root = find(i) - groups[root].append(i) - - # 只返回包含多个项目的组 - result = [group for group in groups.values() if len(group) > 1] - result.sort(key=len, reverse=True) - - return result - - -def print_similarity_analysis( - expressions: List[Expression], - field_name: str, - threshold: float, - chat_id_groups: dict[str, set[str]], - show_details: bool = True, - max_groups: int = 20 -): - """打印相似度分析结果""" - print("\n" + "=" * 80) - print(f"{field_name.upper()} 相似度分析 (阈值: {threshold})") - print("=" * 80) - - # 分组分析 - groups = group_similar_items(expressions, field_name, threshold, chat_id_groups) - - total_items = len(expressions) - similar_items_count = sum(len(group) for group in groups) - unique_groups = len(groups) - - print("\n📊 统计信息:") - print(f" 总项目数: {total_items}") - print(f" 相似项目数: {similar_items_count} ({similar_items_count / total_items * 100:.1f}%)") - print(f" 相似组数: {unique_groups}") - print(f" 平均每组项目数: {similar_items_count / unique_groups:.1f}" if unique_groups > 0 else " 平均每组项目数: 0") - - if not groups: - print(f"\n未找到相似度 >= {threshold} 的项目组") - return - - print(f"\n📋 相似组详情 (显示前 {min(max_groups, len(groups))} 组):") - print() - - for group_idx, group in enumerate(groups[:max_groups], 1): - print(f"组 {group_idx} (共 {len(group)} 个项目):") - - if show_details: - # 显示组内所有项目的详细信息 - for idx in group: - expr = expressions[idx] - text = getattr(expr, field_name, "") - chat_name = get_chat_name(expr.chat_id) - - # 截断过长的文本 - display_text = text[:60] + "..." if len(text) > 60 else text - - print(f" [{expr.id}] {display_text}") - print(f" 聊天: {chat_name}, Count: {expr.count}") - - # 计算组内平均相似度 - if len(group) > 1: - similarities = [] - above_threshold_pairs = [] # 存储满足阈值的相似对 - above_threshold_count = 0 - for i in range(len(group)): - for j in range(i + 1, len(group)): - text1 = getattr(expressions[group[i]], field_name, "") - text2 = getattr(expressions[group[j]], field_name, "") - sim = text_similarity(text1, text2) - similarities.append(sim) - if sim >= threshold: - above_threshold_count += 1 - # 存储满足阈值的对的信息 - expr1 = expressions[group[i]] - expr2 = expressions[group[j]] - display_text1 = text1[:40] + "..." if len(text1) > 40 else text1 - display_text2 = text2[:40] + "..." if len(text2) > 40 else text2 - above_threshold_pairs.append(( - expr1.id, display_text1, - expr2.id, display_text2, - sim - )) - - if similarities: - avg_sim = sum(similarities) / len(similarities) - min_sim = min(similarities) - max_sim = max(similarities) - above_threshold_ratio = above_threshold_count / len(similarities) * 100 - print(f" 平均相似度: {avg_sim:.3f} (范围: {min_sim:.3f} - {max_sim:.3f})") - print(f" 满足阈值({threshold})的比例: {above_threshold_ratio:.1f}% ({above_threshold_count}/{len(similarities)})") - - # 显示满足阈值的相似对(这些是直接连接,导致它们被分到一组) - if above_threshold_pairs: - print(" ⚠️ 直接相似的对 (这些对导致它们被分到一组):") - # 按相似度降序排序 - above_threshold_pairs.sort(key=lambda x: x[4], reverse=True) - for idx1, text1, idx2, text2, sim in above_threshold_pairs[:10]: # 最多显示10对 - print(f" [{idx1}] ↔ [{idx2}]: {sim:.3f}") - print(f" \"{text1}\" ↔ \"{text2}\"") - if len(above_threshold_pairs) > 10: - print(f" ... 还有 {len(above_threshold_pairs) - 10} 对满足阈值") - else: - print(f" ⚠️ 警告: 组内没有任何对满足阈值({threshold:.2f}),可能是通过传递性连接") - else: - # 只显示组内第一个项目作为示例 - expr = expressions[group[0]] - text = getattr(expr, field_name, "") - display_text = text[:60] + "..." if len(text) > 60 else text - print(f" 示例: {display_text}") - print(f" ... 还有 {len(group) - 1} 个相似项目") - - print() - - if len(groups) > max_groups: - print(f"... 还有 {len(groups) - max_groups} 组未显示") - - -def main(): - """主函数""" - parser = argparse.ArgumentParser(description="分析expression库中situation和style的相似度") - parser.add_argument( - "--situation-threshold", - type=float, - default=0.7, - help="situation相似度阈值 (0-1, 默认: 0.7)" - ) - parser.add_argument( - "--style-threshold", - type=float, - default=0.7, - help="style相似度阈值 (0-1, 默认: 0.7)" - ) - parser.add_argument( - "--no-details", - action="store_true", - help="不显示详细信息,只显示统计" - ) - parser.add_argument( - "--max-groups", - type=int, - default=20, - help="最多显示的组数 (默认: 20)" - ) - parser.add_argument( - "--output", - type=str, - default=None, - help="输出文件路径 (默认: 自动生成带时间戳的文件)" - ) - - args = parser.parse_args() - - # 验证阈值 - if not 0 <= args.situation_threshold <= 1: - print("错误: situation-threshold 必须在 0-1 之间") - return - if not 0 <= args.style_threshold <= 1: - print("错误: style-threshold 必须在 0-1 之间") - return - - # 确定输出文件路径 - if args.output: - output_file = args.output - else: - # 自动生成带时间戳的输出文件 - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - output_dir = os.path.join(project_root, "data", "temp") - os.makedirs(output_dir, exist_ok=True) - output_file = os.path.join(output_dir, f"expression_similarity_analysis_{timestamp}.txt") - - # 使用TeeOutput同时输出到控制台和文件 - with TeeOutput(output_file) as tee: - # 临时替换sys.stdout - original_stdout = sys.stdout - sys.stdout = tee - - try: - print("=" * 80) - print("Expression 相似度分析工具") - print("=" * 80) - print(f"输出文件: {output_file}") - print() - - _run_analysis(args) - - finally: - # 恢复原始stdout - sys.stdout = original_stdout - - print(f"\n✅ 分析结果已保存到: {output_file}") - - -def _run_analysis(args): - """执行分析的主逻辑""" - - # 查询所有Expression记录 - print("正在从数据库加载Expression数据...") - try: - expressions = list(Expression.select()) - except Exception as e: - print(f"❌ 加载数据失败: {e}") - return - - if not expressions: - print("❌ 数据库中没有找到Expression记录") - return - - print(f"✅ 成功加载 {len(expressions)} 条Expression记录") - print() - - # 构建chat_id分组映射 - print("正在构建chat_id分组映射(根据expression_groups配置)...") - try: - chat_id_groups = build_chat_id_groups() - print(f"✅ 成功构建 {len(chat_id_groups)} 个chat_id的分组映射") - if chat_id_groups: - # 统计分组信息 - total_related = sum(len(related) for related in chat_id_groups.values()) - avg_related = total_related / len(chat_id_groups) - print(f" 平均每个chat_id与 {avg_related:.1f} 个chat_id相关(包括自身)") - print() - except Exception as e: - print(f"⚠️ 构建chat_id分组映射失败: {e}") - print(" 将使用默认行为:只比较相同chat_id的项目") - chat_id_groups = {} - - # 分析situation相似度 - print_similarity_analysis( - expressions, - "situation", - args.situation_threshold, - chat_id_groups, - show_details=not args.no_details, - max_groups=args.max_groups - ) - - # 分析style相似度 - print_similarity_analysis( - expressions, - "style", - args.style_threshold, - chat_id_groups, - show_details=not args.no_details, - max_groups=args.max_groups - ) - - print("\n" + "=" * 80) - print("分析完成!") - print("=" * 80) - - -if __name__ == "__main__": - main() - diff --git a/scripts/expression_stats.py b/scripts/expression_stats.py deleted file mode 100644 index 133f3d73..00000000 --- a/scripts/expression_stats.py +++ /dev/null @@ -1,196 +0,0 @@ -import time -import sys -import os -from typing import Dict, List - -# Add project root to Python path -from src.common.database.database_model import Expression, ChatStreams - -project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, project_root) - - -def get_chat_name(chat_id: str) -> str: - """Get chat name from chat_id by querying ChatStreams table directly""" - try: - # 直接从数据库查询ChatStreams表 - chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id) - if chat_stream is None: - return f"未知聊天 ({chat_id})" - - # 如果有群组信息,显示群组名称 - if chat_stream.group_name: - return f"{chat_stream.group_name} ({chat_id})" - # 如果是私聊,显示用户昵称 - elif chat_stream.user_nickname: - return f"{chat_stream.user_nickname}的私聊 ({chat_id})" - else: - return f"未知聊天 ({chat_id})" - except Exception: - return f"查询失败 ({chat_id})" - - -def calculate_time_distribution(expressions) -> Dict[str, int]: - """Calculate distribution of last active time in days""" - now = time.time() - distribution = { - "0-1天": 0, - "1-3天": 0, - "3-7天": 0, - "7-14天": 0, - "14-30天": 0, - "30-60天": 0, - "60-90天": 0, - "90+天": 0, - } - for expr in expressions: - diff_days = (now - expr.last_active_time) / (24 * 3600) - if diff_days < 1: - distribution["0-1天"] += 1 - elif diff_days < 3: - distribution["1-3天"] += 1 - elif diff_days < 7: - distribution["3-7天"] += 1 - elif diff_days < 14: - distribution["7-14天"] += 1 - elif diff_days < 30: - distribution["14-30天"] += 1 - elif diff_days < 60: - distribution["30-60天"] += 1 - elif diff_days < 90: - distribution["60-90天"] += 1 - else: - distribution["90+天"] += 1 - return distribution - - -def calculate_count_distribution(expressions) -> Dict[str, int]: - """Calculate distribution of count values""" - distribution = {"0-1": 0, "1-2": 0, "2-3": 0, "3-4": 0, "4-5": 0, "5-10": 0, "10+": 0} - for expr in expressions: - cnt = expr.count - if cnt < 1: - distribution["0-1"] += 1 - elif cnt < 2: - distribution["1-2"] += 1 - elif cnt < 3: - distribution["2-3"] += 1 - elif cnt < 4: - distribution["3-4"] += 1 - elif cnt < 5: - distribution["4-5"] += 1 - elif cnt < 10: - distribution["5-10"] += 1 - else: - distribution["10+"] += 1 - return distribution - - -def get_top_expressions_by_chat(chat_id: str, top_n: int = 5) -> List[Expression]: - """Get top N most used expressions for a specific chat_id""" - return Expression.select().where(Expression.chat_id == chat_id).order_by(Expression.count.desc()).limit(top_n) - - -def show_overall_statistics(expressions, total: int) -> None: - """Show overall statistics""" - time_dist = calculate_time_distribution(expressions) - count_dist = calculate_count_distribution(expressions) - - print("\n=== 总体统计 ===") - print(f"总表达式数量: {total}") - - print("\n上次激活时间分布:") - for period, count in time_dist.items(): - print(f"{period}: {count} ({count / total * 100:.2f}%)") - - print("\ncount分布:") - for range_, count in count_dist.items(): - print(f"{range_}: {count} ({count / total * 100:.2f}%)") - - -def show_chat_statistics(chat_id: str, chat_name: str) -> None: - """Show statistics for a specific chat""" - chat_exprs = list(Expression.select().where(Expression.chat_id == chat_id)) - chat_total = len(chat_exprs) - - print(f"\n=== {chat_name} ===") - print(f"表达式数量: {chat_total}") - - if chat_total == 0: - print("该聊天没有表达式数据") - return - - # Time distribution for this chat - time_dist = calculate_time_distribution(chat_exprs) - print("\n上次激活时间分布:") - for period, count in time_dist.items(): - if count > 0: - print(f"{period}: {count} ({count / chat_total * 100:.2f}%)") - - # Count distribution for this chat - count_dist = calculate_count_distribution(chat_exprs) - print("\ncount分布:") - for range_, count in count_dist.items(): - if count > 0: - print(f"{range_}: {count} ({count / chat_total * 100:.2f}%)") - - # Top expressions - print("\nTop 10使用最多的表达式:") - top_exprs = get_top_expressions_by_chat(chat_id, 10) - for i, expr in enumerate(top_exprs, 1): - print(f"{i}. [{expr.type}] Count: {expr.count}") - print(f" Situation: {expr.situation}") - print(f" Style: {expr.style}") - print() - - -def interactive_menu() -> None: - """Interactive menu for expression statistics""" - # Get all expressions - expressions = list(Expression.select()) - if not expressions: - print("数据库中没有找到表达式") - return - - total = len(expressions) - - # Get unique chat_ids and their names - chat_ids = list(set(expr.chat_id for expr in expressions)) - chat_info = [(chat_id, get_chat_name(chat_id)) for chat_id in chat_ids] - chat_info.sort(key=lambda x: x[1]) # Sort by chat name - - while True: - print("\n" + "=" * 50) - print("表达式统计分析") - print("=" * 50) - print("0. 显示总体统计") - - for i, (chat_id, chat_name) in enumerate(chat_info, 1): - chat_count = sum(1 for expr in expressions if expr.chat_id == chat_id) - print(f"{i}. {chat_name} ({chat_count}个表达式)") - - print("q. 退出") - - choice = input("\n请选择要查看的统计 (输入序号): ").strip() - - if choice.lower() == "q": - print("再见!") - break - - try: - choice_num = int(choice) - if choice_num == 0: - show_overall_statistics(expressions, total) - elif 1 <= choice_num <= len(chat_info): - chat_id, chat_name = chat_info[choice_num - 1] - show_chat_statistics(chat_id, chat_name) - else: - print("无效的选择,请重新输入") - except ValueError: - print("请输入有效的数字") - - input("\n按回车键继续...") - - -if __name__ == "__main__": - interactive_menu() diff --git a/scripts/manual_evaluation_results.json b/scripts/manual_evaluation_results.json new file mode 100644 index 00000000..9e7f1cf3 --- /dev/null +++ b/scripts/manual_evaluation_results.json @@ -0,0 +1,3236 @@ +{ + "last_updated": "2025-12-26T16:33:12.430516", + "total_count": 360, + "manual_results": [ + { + "expression_id": 3169, + "situation": "调侃式回应他人疑问", + "style": "hhh", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:14.505956" + }, + { + "expression_id": 2488, + "situation": "建议被忽视,问题依旧存在", + "style": "简单回应 yes 后继续说失败", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:17.306742" + }, + { + "expression_id": 3535, + "situation": "提出反对意见时", + "style": "使用'我拒绝xxx'句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:18.516562" + }, + { + "expression_id": 5287, + "situation": "针对技术方案提出建议时", + "style": "采用“...一下”或“试试”句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:20.937777" + }, + { + "expression_id": 5136, + "situation": "对他人发言进行同步复读以强化语气", + "style": "复读对方原话并加@", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:38.770362" + }, + { + "expression_id": 3178, + "situation": "认可他人观点时表达认同", + "style": "使用 你别说...句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:42.465183" + }, + { + "expression_id": 5344, + "situation": "对突然变化的规则表示荒诞感", + "style": "今天这个是自动减的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:45.801959" + }, + { + "expression_id": 2379, + "situation": "讽刺技术荒诞的文学化表达", + "style": "用诗意语言描述技术崩溃", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:49.097711" + }, + { + "expression_id": 2508, + "situation": "被要求不回复时仍执意回应", + "style": "用“不要回复我”直接拒绝", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:51.913429" + }, + { + "expression_id": 2573, + "situation": "聊天中热议动漫角色", + "style": "使用'萌音'指代角色", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:17:54.017132" + }, + { + "expression_id": 2051, + "situation": "对比抽象概念时的对话特征", + "style": "使用 vs 连接两个对立概念", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:03.928730" + }, + { + "expression_id": 4327, + "situation": "对他人建议提出反问以质疑合理性", + "style": "要XX干嘛", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:05.944953" + }, + { + "expression_id": 3432, + "situation": "讨论插件功能时强调专注性", + "style": "使用 专注于某动作", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:08.425325" + }, + { + "expression_id": 3591, + "situation": "对他人建议表示认可并补充", + "style": "嗯,也是好选择", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:11.202821" + }, + { + "expression_id": 602, + "situation": "简短确认他人引用内容", + "style": "回 复 中", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:13.296995" + }, + { + "expression_id": 2552, + "situation": "系统状态描述均简洁明确", + "style": "使用 重启就会变", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:21.089313" + }, + { + "expression_id": 1566, + "situation": "讨论服务器资源占用情况", + "style": "使用 吃了太多电费", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:22.585119" + }, + { + "expression_id": 839, + "situation": "对某事持怀疑或否定态度", + "style": "我觉得肯定受不了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:24.680676" + }, + { + "expression_id": 3673, + "situation": "建议用无关方式解决矛盾", + "style": "删掉XX即可", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:28.842462" + }, + { + "expression_id": 1723, + "situation": "反复强调同一观点", + "style": "重复使用相同短句", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:30.728958" + }, + { + "expression_id": 2732, + "situation": "用否定句式表达无力回应建议", + "style": "无法起飞", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:35.472037" + }, + { + "expression_id": 2965, + "situation": "确认为其他账户", + "style": "对", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:37.170307" + }, + { + "expression_id": 4436, + "situation": "对他人操作流程表示质疑", + "style": "你虚拟环境呢", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:39.192119" + }, + { + "expression_id": 995, + "situation": "模仿他人发言", + "style": "直接重复他人原话", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:40.703669" + }, + { + "expression_id": 495, + "situation": "对技术建议常持否定态度", + "style": "补充说明细节或注意事项", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:44.968474" + }, + { + "expression_id": 4443, + "situation": "警告他人不要随意操作", + "style": "重复 '别乱动乱敲'", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:48.256356" + }, + { + "expression_id": 3059, + "situation": "质疑数据真实性", + "style": "结果不真啊,麦的xxx呢", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:51.080153" + }, + { + "expression_id": 4038, + "situation": "回应无厘头或荒诞内容时假装严肃", + "style": "你尔朵笼还是盐津虾", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:18:54.202207" + }, + { + "expression_id": 1121, + "situation": "发现技术细节时的对话特征", + "style": "连续使用woc表达惊讶", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:19:04.408294" + }, + { + "expression_id": 1328, + "situation": "AI输出异常或过度", + "style": "用“发力”形容模型生成内容,带调侃", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:19:06.559956" + }, + { + "expression_id": 5506, + "situation": "渴望得到他人反馈或急于求证时", + "style": "使用祈使短语“求回答”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:08.938962" + }, + { + "expression_id": 662, + "situation": "AI输出不稳定时的聊天情境", + "style": "使用“不收敛”形容失控", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:12.483859" + }, + { + "expression_id": 3944, + "situation": "对他人疑问进行引导性回应", + "style": "你问问课代表?", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:14.507348" + }, + { + "expression_id": 4220, + "situation": "半开玩笑地讨论学校政策", + "style": "使用‘确实爽’和‘也就’表达半开玩笑", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:16.395121" + }, + { + "expression_id": 1530, + "situation": "用无厘头回应化解尴尬", + "style": "还有上门服务", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:29.306839" + }, + { + "expression_id": 1236, + "situation": "系统故障时用户寻求帮助", + "style": "直接陈述故障原因", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:32.467370" + }, + { + "expression_id": 1635, + "situation": "表达惊讶或困惑", + "style": "使用 这是什么东西", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:35.506595" + }, + { + "expression_id": 4145, + "situation": "对他人行为表示接受并轻描淡写回应", + "style": "搜嘎", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:38.307144" + }, + { + "expression_id": 2385, + "situation": "聊天中突兀插入无关话题", + "style": "用无意义数字或感叹词填充", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:40.594576" + }, + { + "expression_id": 1179, + "situation": "游戏比较中常带主观评价", + "style": "使用“感觉不如...”句式进行对比", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:43.075143" + }, + { + "expression_id": 343, + "situation": "认可时表达赞同", + "style": "使用“中”等单字肯定", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:47.058887" + }, + { + "expression_id": 2303, + "situation": "用反常识鼓励回应消极情绪", + "style": "上药", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:49.586496" + }, + { + "expression_id": 4246, + "situation": "讨论技术问题时提出解决方案", + "style": "直接给出技术术语建议", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:53.050459" + }, + { + "expression_id": 3955, + "situation": "表达对问题的不满", + "style": "使用'有点慢'结构", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:56.305860" + }, + { + "expression_id": 1435, + "situation": "邀请聚餐时的社交互动", + "style": "使用 有没有一起去吃火锅的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:23:58.762824" + }, + { + "expression_id": 4800, + "situation": "对游戏机制表示不解", + "style": "问“怎么自动扣除了”", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:07.842563" + }, + { + "expression_id": 2632, + "situation": "犹豫不决,难以抉择", + "style": "强调 都试试", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:09.730461" + }, + { + "expression_id": 1872, + "situation": "确认他人建议", + "style": "对喵", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:10.866521" + }, + { + "expression_id": 1118, + "situation": "请求未获直接同意", + "style": "使用“烦死了!(自动回复)”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:14.370537" + }, + { + "expression_id": 1709, + "situation": "神秘化描述事物", + "style": "使用 神秘+名词", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:16.929684" + }, + { + "expression_id": 5010, + "situation": "质疑插件指令被误判为对话", + "style": "为啥插件触发的指令会被bot当成对话", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:22.425584" + }, + { + "expression_id": 3706, + "situation": "对他人建议的轻蔑态度", + "style": "使用'直接杀死'", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:26.681089" + }, + { + "expression_id": 4490, + "situation": "面对复杂问题时表达头大或压力大", + "style": "头大", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:28.010419" + }, + { + "expression_id": 3408, + "situation": "回应荒诞承诺时提出更荒诞条件", + "style": "只要...就可以了喵", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:31.538269" + }, + { + "expression_id": 1293, + "situation": "多人在线游戏组队求助", + "style": "使用 数字 表示确认参与", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:33.305892" + }, + { + "expression_id": 4322, + "situation": "回应他人提议或信息时表示认同", + "style": "说 还真是", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:53.640715" + }, + { + "expression_id": 4782, + "situation": "请求操作指导", + "style": "怎么做枫枫", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:24:55.329241" + }, + { + "expression_id": 4249, + "situation": "讨论工具优劣", + "style": "使用比较句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:25:16.202072" + }, + { + "expression_id": 357, + "situation": "讨论游戏机制时的交流", + "style": "使用 模块化框架", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:25:18.071625" + }, + { + "expression_id": 5503, + "situation": "表达无奈、疲惫或轻微叹息时", + "style": "使用单字语气助词“哎”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:25:19.896008" + }, + { + "expression_id": 1212, + "situation": "震惊于荒谬猎奇内容", + "style": "使用‘我艹’等感叹词表达强烈反应", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:42.274011" + }, + { + "expression_id": 1983, + "situation": "技术讨论中使用专业术语", + "style": "提及‘API’‘插件’", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:43.878236" + }, + { + "expression_id": 4106, + "situation": "对投票结果表示意外", + "style": "结合表情符号表达震惊", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:45.653868" + }, + { + "expression_id": 2450, + "situation": "提出模糊建议或方向", + "style": "看看", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:47.667071" + }, + { + "expression_id": 3993, + "situation": "对不明内容表示困惑", + "style": "直接说'不理解'", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:49.230999" + }, + { + "expression_id": 5291, + "situation": "追究责任或寻找原因时", + "style": "使用“...的锅”进行定性", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:50.621311" + }, + { + "expression_id": 2242, + "situation": "对技术问题感到困惑", + "style": "说命令和主分支一样,文档没写", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:52.797922" + }, + { + "expression_id": 4203, + "situation": "暗示等待经济到账后行动", + "style": "发米了就去找人玩", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:54.809907" + }, + { + "expression_id": 2342, + "situation": "用地域调侃化解严肃话题", + "style": "昌平县城", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:57.269840" + }, + { + "expression_id": 4797, + "situation": "对他人观点提出反问以示质疑", + "style": "为什么不要凭证(?)", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:51:59.469990" + }, + { + "expression_id": 1703, + "situation": "确认方案是否可行", + "style": "以'就可以'作结", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:11.254043" + }, + { + "expression_id": 2381, + "situation": "表达失望时的负面情绪反应", + "style": "使用粗俗比喻描述期待与现实的落差", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:12.918439" + }, + { + "expression_id": 2860, + "situation": "用夸张比喻描述抽象概念", + "style": "说 到不了会有阴兵的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:15.326344" + }, + { + "expression_id": 199, + "situation": "讨论技术实现方案", + "style": "使用项目化表述", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:18.646503" + }, + { + "expression_id": 3174, + "situation": "建议时总被忽略或反驳", + "style": "采用 直接建议句式", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:22.445332" + }, + { + "expression_id": 5195, + "situation": "对某种方案或条件表示极度认可", + "style": "使用“无敌”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:24.621702" + }, + { + "expression_id": 1012, + "situation": "反复威胁,情绪升级", + "style": "刻意模仿客服话术", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:26.349202" + }, + { + "expression_id": 3012, + "situation": "对术语困惑不解", + "style": "啊?那是啥平台的代金券", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:27.668688" + }, + { + "expression_id": 2620, + "situation": "表达赞同或认可时的回应", + "style": "使用“好看”、“666”等简短夸赞", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:28.765022" + }, + { + "expression_id": 2214, + "situation": "反差语气掩饰内心崩溃", + "style": "纯搞来的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T01:52:31.452526" + }, + { + "expression_id": 3847, + "situation": "对不想做的事表达抗拒", + "style": "但是我一直不想", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:23:50.924439" + }, + { + "expression_id": 5534, + "situation": "被说像某物时否认并强调不同", + "style": "说 不是xxx", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:23:56.427990" + }, + { + "expression_id": 4648, + "situation": "被质疑行为逻辑时强行合理化", + "style": "说‘逻辑上就是这样的’", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:24:02.995928" + }, + { + "expression_id": 2213, + "situation": "轻描淡写回应,单字默认认同", + "style": "6", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:24:04.060748" + }, + { + "expression_id": 374, + "situation": "质疑对方隐瞒信息", + "style": "使用 是不是在xxx里存xxx", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:24:09.139523" + }, + { + "expression_id": 1441, + "situation": "任务压力大,沟通紧张", + "style": "用 无开始搞 表达", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:03.202611" + }, + { + "expression_id": 2931, + "situation": "困惑或无法理解时寻求帮助", + "style": "使用“发啥了”询问具体情况", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:06.154571" + }, + { + "expression_id": 2431, + "situation": "询问接口性能时关注响应速度", + "style": "用“有没有几百token/s的”量化提问", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:08.186667" + }, + { + "expression_id": 3329, + "situation": "对他人行为表示惊讶或讽刺", + "style": "妈的气笑了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:09.673876" + }, + { + "expression_id": 3716, + "situation": "表达对系统改进的期待", + "style": "使用'再次伟大'类比", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:18.306136" + }, + { + "expression_id": 505, + "situation": "看到离谱内容时的震惊反应", + "style": "使用“逆天”表示惊叹", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:21.410431" + }, + { + "expression_id": 4796, + "situation": "对他人技术问题表示不解", + "style": "?你在说什么啊", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:22.954275" + }, + { + "expression_id": 2889, + "situation": "增进感情的亲密表达", + "style": "使用“贴贴”、“抱抱”等叠词", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:24.106546" + }, + { + "expression_id": 3698, + "situation": "对他人建议表示附和但无实质回应", + "style": "虽然我也不知道该hook啥", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:27.298723" + }, + { + "expression_id": 552, + "situation": "测试新功能时", + "style": "表达试用意愿", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:29.066189" + }, + { + "expression_id": 5362, + "situation": "催促对方开启加速器", + "style": "加速器开了吧", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:30.881502" + }, + { + "expression_id": 762, + "situation": "无厘头接话搞怪互动", + "style": "那就发人模狗样", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:34.914448" + }, + { + "expression_id": 3868, + "situation": "对他人观点进行轻蔑式贬低", + "style": "这有点阿姨了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:36.777608" + }, + { + "expression_id": 1690, + "situation": "对复杂功能感到惊讶或抗拒", + "style": "那有点过于...了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:40.578147" + }, + { + "expression_id": 389, + "situation": "问题解决后致谢", + "style": "说 爱你 + 表情", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:25:41.729439" + }, + { + "expression_id": 567, + "situation": "表达个人偏好时的主观陈述", + "style": "使用 肯定是取我喜欢的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:13.295939" + }, + { + "expression_id": 3068, + "situation": "被反复重复相同句式", + "style": "模仿并加重句式重复", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:16.903331" + }, + { + "expression_id": 3108, + "situation": "计划时模糊指代资源", + "style": "我先用...还打算从...", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:22.205836" + }, + { + "expression_id": 3725, + "situation": "对回复质量表示认可", + "style": "感觉还挺高的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:23.678708" + }, + { + "expression_id": 3280, + "situation": "谐音梗成聊天幽默利器", + "style": "使用 孔喵莲", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:25.717663" + }, + { + "expression_id": 3767, + "situation": "否定某种过度配置", + "style": "没必要开extra high", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:31.309638" + }, + { + "expression_id": 5507, + "situation": "对他人行为表示调侃式认同", + "style": "这是一种无声的反抗", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:35.772597" + }, + { + "expression_id": 3885, + "situation": "对明显跑题表示无奈", + "style": "串了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:36.972325" + }, + { + "expression_id": 4080, + "situation": "对某事表示认同并补充细节", + "style": "还真是", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:39.123904" + }, + { + "expression_id": 1000, + "situation": "用反讽接受他人调侃", + "style": "玩的还挺花的你这", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:42.140283" + }, + { + "expression_id": 4976, + "situation": "认为旧话题应该结束时", + "style": "使用 翻篇", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:45.379005" + }, + { + "expression_id": 3358, + "situation": "对他人建议表示轻微反驳或无奈", + "style": "不至于吧,这还没出几天", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:48.050954" + }, + { + "expression_id": 5267, + "situation": "对技术问题表示无奈或调侃", + "style": "用括号补充自嘲式备注", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:50.914664" + }, + { + "expression_id": 4063, + "situation": "对复杂问题表示难以判断", + "style": "说实话...真不好判断", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:54.594218" + }, + { + "expression_id": 855, + "situation": "系统崩溃引发多场景对话", + "style": "使用网络流行语", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:56.281794" + }, + { + "expression_id": 3075, + "situation": "认可不当建议", + "style": "使用 数字6 或 666 表示赞同", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:36:57.994884" + }, + { + "expression_id": 3861, + "situation": "质疑换方案的动机", + "style": "换他干嘛", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:37:01.857283" + }, + { + "expression_id": 1393, + "situation": "无奈接受技术限制", + "style": "这个模型最高不就是10mb么", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:37:04.168960" + }, + { + "expression_id": 4552, + "situation": "对他人关系发展进行猜测或八卦", + "style": "感觉要成了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:37:05.785418" + }, + { + "expression_id": 676, + "situation": "用重复关键词回应荒谬言论", + "style": "赛尔赛尔~", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:37:07.208807" + }, + { + "expression_id": 2072, + "situation": "讨论衣物搭配方案", + "style": "使用“xxx套xxx”描述叠穿", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:38.798708" + }, + { + "expression_id": 4683, + "situation": "对选择模型时强调性价比", + "style": "哪个上的快我先上哪个", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:42.150304" + }, + { + "expression_id": 1361, + "situation": "厌倦工作学习,想躺平休息", + "style": "用 几个月都不想碰 来描述状态", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:43.838510" + }, + { + "expression_id": 4577, + "situation": "描述某种负面现象的持续影响", + "style": "...后遗症", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:46.573887" + }, + { + "expression_id": 5103, + "situation": "对他人发言表示附和但无实质内容", + "style": "哦,知道了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:48.062051" + }, + { + "expression_id": 4010, + "situation": "对荒诞现象表示无语", + "style": "再无话说", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:49.781501" + }, + { + "expression_id": 2103, + "situation": "惊讶或无奈的反应", + "style": "使用单字“草”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:50.989742" + }, + { + "expression_id": 958, + "situation": "对他人发言感到震惊", + "style": "用‘我草’‘吓哭了’‘能说吗我真吓到了’", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:52.117589" + }, + { + "expression_id": 1864, + "situation": "调侃他人行为时的幽默互动", + "style": "用‘量子叠加态’等科学术语进行荒诞比喻", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:53:54.797521" + }, + { + "expression_id": 4726, + "situation": "对他人建议表示反对或讽刺", + "style": "用别人还是比用自己的爽", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:04.268461" + }, + { + "expression_id": 3071, + "situation": "聊天中常以亲昵方式打招呼", + "style": "使用 麦麦你好/麦麦你好香", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:08.882885" + }, + { + "expression_id": 4738, + "situation": "对复杂现象进行简化归因并带自嘲", + "style": "规划器全过了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:10.835122" + }, + { + "expression_id": 3884, + "situation": "突然引入无关信息转移话题", + "style": "其实叔叔的生日是12月3日 [图片2]", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:14.163726" + }, + { + "expression_id": 3838, + "situation": "讨论网络交易陷阱", + "style": "用'白嫖一个号'描述欺诈行为", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:17.066433" + }, + { + "expression_id": 2994, + "situation": "越解释越糊涂", + "style": "是这么说", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:20.706261" + }, + { + "expression_id": 2930, + "situation": "转发或观点引发讨论", + "style": "用“是这样的”表示认同或总结", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:22.378464" + }, + { + "expression_id": 4480, + "situation": "对模糊概念试图澄清", + "style": "就是那个", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:24.290415" + }, + { + "expression_id": 1229, + "situation": "认同他人,却无奈叹息", + "style": "现在知道了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:26.410167" + }, + { + "expression_id": 2830, + "situation": "用无厘头称呼调侃转移话题", + "style": "喊我妈妈", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:30.378153" + }, + { + "expression_id": 4893, + "situation": "对他人发言表示质疑并要求澄清", + "style": "追问 不是什么不是", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:34.264776" + }, + { + "expression_id": 4792, + "situation": "对他人发言进行重复引用并追问", + "style": "[回复 X:Y],说:@X Z", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:54:37.200473" + }, + { + "expression_id": 3764, + "situation": "表达对某事物不信任或不安", + "style": "感觉不太踏实", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:04.670326" + }, + { + "expression_id": 1452, + "situation": "技术建议常被忽视或质疑", + "style": "表示赞同并称呼对方昵称", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:08.054725" + }, + { + "expression_id": 559, + "situation": "面对模糊指令时表达困惑", + "style": "连续发送 ?", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:09.717423" + }, + { + "expression_id": 5559, + "situation": "用伪文言文语气调侃时", + "style": "使用 汝...之...也乎 句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:14.565000" + }, + { + "expression_id": 1077, + "situation": "模型回避身份提问", + "style": "使用 你是...吗 直接提问", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:16.844791" + }, + { + "expression_id": 2634, + "situation": "玩家常以夸张或幽默方式描述游戏行为", + "style": "使用 开大车", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:18.652901" + }, + { + "expression_id": 5113, + "situation": "请求帮助时情绪焦急", + "style": "使用 救一救孩子", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:20.468956" + }, + { + "expression_id": 2211, + "situation": "调侃他人突然自曝身份", + "style": "我搭的,看我看我", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:22.364349" + }, + { + "expression_id": 4358, + "situation": "提及特定模型", + "style": "使用 大香蕉", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T10:55:26.196360" + }, + { + "expression_id": 3525, + "situation": "拉取失败时表达无奈", + "style": "拉不下来了草", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:17.912143" + }, + { + "expression_id": 1035, + "situation": "任务无法及时完成", + "style": "说明时间安排", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:19.288011" + }, + { + "expression_id": 3327, + "situation": "对技术实现失败归因于提示词", + "style": "什么都好,但死在了怎么写提示词了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:23.344335" + }, + { + "expression_id": 4832, + "situation": "回应他人调侃时反向调侃", + "style": "叫妈妈", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:24.559084" + }, + { + "expression_id": 812, + "situation": "赞同他人发言", + "style": "使用单个词语如“豪德”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:27.127242" + }, + { + "expression_id": 3139, + "situation": "图片质量讨论中的常见争议", + "style": "远看有艺术感,近看糊", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:32.199112" + }, + { + "expression_id": 4113, + "situation": "对他人发言表示共鸣式附和", + "style": "3.2喜欢用...", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:33.742935" + }, + { + "expression_id": 1764, + "situation": "发现配置错误引发问题", + "style": "使用“事实证明”引出结论", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:37.238756" + }, + { + "expression_id": 1673, + "situation": "评价他人发言时态度谨慎", + "style": "先肯定再转折提出缺点", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:48.790587" + }, + { + "expression_id": 5089, + "situation": "描述对某事物投入情感的演变", + "style": "使用 第一章...第N章...的叙事体", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:53.574395" + }, + { + "expression_id": 756, + "situation": "回避他人隐藏动态的回应行为", + "style": "233", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:57.806483" + }, + { + "expression_id": 510, + "situation": "反驳他人时轻描淡写", + "style": "没", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:28:59.957683" + }, + { + "expression_id": 1655, + "situation": "质疑模型是否具备思考能力", + "style": "不思考的可以吗", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:02.485807" + }, + { + "expression_id": 3923, + "situation": "对不合理或荒诞观点表示无奈认同", + "style": "说 没事,我也是...", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:06.998147" + }, + { + "expression_id": 3721, + "situation": "对他人质疑进行反问式挑衅", + "style": "懂什么辣", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:09.238207" + }, + { + "expression_id": 742, + "situation": "对技术方案敷衍认可合规", + "style": "很擦边", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:12.613919" + }, + { + "expression_id": 5701, + "situation": "对他人提问表示认同", + "style": "true", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:13.733950" + }, + { + "expression_id": 3137, + "situation": "震惊或困惑的反应", + "style": "使用 我草了老铁", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:14.949674" + }, + { + "expression_id": 4237, + "situation": "对他人创作内容表示崇拜", + "style": "我写那种不乖的小孩被奇怪app拐走的文章", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:17.029639" + }, + { + "expression_id": 1941, + "situation": "回应建议时提出修正", + "style": "指出具体点并否定原方案", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:20.702203" + }, + { + "expression_id": 3078, + "situation": "遇责推诿,不愿担责", + "style": "说 你自己的锅啊 / 你自己发的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:27.717281" + }, + { + "expression_id": 5294, + "situation": "表达不确定或反问语气时", + "style": "句末添加空括号“()”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:30.253213" + }, + { + "expression_id": 4892, + "situation": "对他人突然文言文表示无奈", + "style": "吐槽 同一个毛病", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:36.428512" + }, + { + "expression_id": 5724, + "situation": "回应模糊或无效反馈时的无奈", + "style": "人机", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:38.380509" + }, + { + "expression_id": 2366, + "situation": "随和回应他人提议", + "style": "那我也试试", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:43.020770" + }, + { + "expression_id": 2201, + "situation": "无奈或荒谬时的无奈回应", + "style": "使用‘难蚌’、‘难评’等谐音梗", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:50.148141" + }, + { + "expression_id": 1620, + "situation": "敏感或违规发言需处理", + "style": "声称已向国安举报,带威胁语气", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:52.771962" + }, + { + "expression_id": 3214, + "situation": "资源受限下的讨论", + "style": "使用'没内存'", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:29:58.091857" + }, + { + "expression_id": 2988, + "situation": "用幽默回避敏感话题", + "style": "用‘课上也能发’‘刺激’等词淡化场合禁忌", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:00.628771" + }, + { + "expression_id": 3875, + "situation": "模糊指代某群体", + "style": "使用 你们形成", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:01.988036" + }, + { + "expression_id": 3808, + "situation": "对他人观点表示质疑或否定", + "style": "感觉还是智商欠缺", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:11.635279" + }, + { + "expression_id": 2114, + "situation": "对他人言行表示惊讶或不解", + "style": "?", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:12.619580" + }, + { + "expression_id": 3308, + "situation": "用括号补充调侃性备注", + "style": "(...)", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:14.219061" + }, + { + "expression_id": 2728, + "situation": "压力下用极端语气表达崩溃", + "style": "杀了我吧", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:16.523568" + }, + { + "expression_id": 5051, + "situation": "故意曲解词语制造谐音梗", + "style": "将'淫秽信息'替换为'银灰信息'", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:19.555559" + }, + { + "expression_id": 5233, + "situation": "评价他人行为不合规矩时", + "style": "使用 不合乎周礼", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:23.826708" + }, + { + "expression_id": 2096, + "situation": "对复杂问题的负面反应", + "style": "草", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:25.123482" + }, + { + "expression_id": 2924, + "situation": "强烈质疑某事", + "style": "使用单个“?”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:25.994508" + }, + { + "expression_id": 2763, + "situation": "轻度反驳或调侃他人建议", + "style": "肯定是你正在用的才要分享啊", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:30.123092" + }, + { + "expression_id": 2476, + "situation": "技术原理与原因的解释", + "style": "使用 因为...用了...", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:31.995015" + }, + { + "expression_id": 4210, + "situation": "对他人转发的荒诞内容表示否定", + "style": "见鬼", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:35.203748" + }, + { + "expression_id": 3991, + "situation": "对某人能力表示贬低但带幽默", + "style": "说'好菜'并集体跟风", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:38.818700" + }, + { + "expression_id": 3223, + "situation": "接受无奈并自嘲", + "style": "使用 不知道就不知道吧ⁿ", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:45.169767" + }, + { + "expression_id": 3892, + "situation": "提醒注意健康时", + "style": "使用 请停止麦麦开发", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:46.833699" + }, + { + "expression_id": 3359, + "situation": "表达自己资源充裕且不需使用", + "style": "我都用不完", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:52.386001" + }, + { + "expression_id": 4192, + "situation": "对不明款项提出疑问", + "style": "这是什么钱", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:30:54.210075" + }, + { + "expression_id": 4627, + "situation": "对版本差异感到困惑时", + "style": "用‘怎么一会儿...一会...’表达疑惑", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:31:02.825611" + }, + { + "expression_id": 2221, + "situation": "对他人行为感到无语", + "style": "使用“很唐了”评价", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:31:04.513956" + }, + { + "expression_id": 4999, + "situation": "质疑他人发言的合理性", + "style": "为什么会说出这种话", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:31:05.737494" + }, + { + "expression_id": 3774, + "situation": "对触发机制感到荒谬", + "style": "群友发色图也触发麦麦哈气", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:31:07.193435" + }, + { + "expression_id": 3395, + "situation": "对某现象进行归因并简化表达", + "style": "因为...特别容易被提取到", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:04.528007" + }, + { + "expression_id": 3333, + "situation": "回应荒谬言论时进行反讽式认同", + "style": "使用‘是吧’‘说是’结尾", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:06.574729" + }, + { + "expression_id": 4752, + "situation": "催促对方检查状态", + "style": "你看看", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:07.830497" + }, + { + "expression_id": 5673, + "situation": "评价某个事物或领域时", + "style": "使用“XX这一块”作为结尾", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:09.614544" + }, + { + "expression_id": 5269, + "situation": "重复他人话语以示调侃", + "style": "重复前句并加语气词", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:11.294108" + }, + { + "expression_id": 3912, + "situation": "表达个人目标时", + "style": "自嘲式‘妄想’表述", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:13.917935" + }, + { + "expression_id": 3728, + "situation": "表达体验后正面评价", + "style": "感觉不错", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:15.278186" + }, + { + "expression_id": 2802, + "situation": "纠正术语错误", + "style": "不对,ssl", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:16.270299" + }, + { + "expression_id": 432, + "situation": "重复回应时坚持自己正确", + "style": "是刚刚的我没说错", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:18.133442" + }, + { + "expression_id": 5524, + "situation": "对他人方案表示认可但无热情", + "style": "随便吧", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:20.166496" + }, + { + "expression_id": 792, + "situation": "事情顺利恢复", + "style": "使用 又回来了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:23.773650" + }, + { + "expression_id": 3465, + "situation": "对他人言论进行粗俗辱骂", + "style": "nmsl啊米诺斯", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:25.101676" + }, + { + "expression_id": 2604, + "situation": "故作深奥地炫耀技术", + "style": "说“我见过更残酷的公式”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:26.942360" + }, + { + "expression_id": 961, + "situation": "模仿程序异常反应", + "style": "复读+喵喵模式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:29.406044" + }, + { + "expression_id": 929, + "situation": "表达惊讶或意外的反应", + "style": "使用“好神奇”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:30.717473" + }, + { + "expression_id": 4349, + "situation": "对技术问题归因于环境", + "style": "env有冲突", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:33.236816" + }, + { + "expression_id": 3222, + "situation": "聊天中融入奇幻元素", + "style": "使用 神秘梦境", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:34.813846" + }, + { + "expression_id": 1609, + "situation": "调侃中化解技术难题", + "style": "要不要fork...(雾)", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:36.197532" + }, + { + "expression_id": 5545, + "situation": "对他人操作表示认可", + "style": "OK", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:37.092972" + }, + { + "expression_id": 4678, + "situation": "表达对新版本的期待", + "style": "用 '快点端上来罢' 带有催促的口语化请求", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:38.677816" + }, + { + "expression_id": 561, + "situation": "无奈应对突发任务", + "style": "使用 哎mcp", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:47.158634" + }, + { + "expression_id": 5695, + "situation": "吐槽对方不提供日志瞎猜时", + "style": "戏称对方 让我算命", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:50.357614" + }, + { + "expression_id": 842, + "situation": "确认事物归属或来源", + "style": "应该是的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:56.220459" + }, + { + "expression_id": 317, + "situation": "对不合理现象震惊崩溃", + "style": "心累啊……", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:57.687150" + }, + { + "expression_id": 2023, + "situation": "用户表达对技术问题的困惑", + "style": "使用 分不清哪一块是废弃代码", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:44:59.900852" + }, + { + "expression_id": 4407, + "situation": "对消失事物进行荒诞推测", + "style": "编造神秘场景描述", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:45:08.219787" + }, + { + "expression_id": 5438, + "situation": "被指责时反讽回应", + "style": "禁言你试试", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:45:09.860292" + }, + { + "expression_id": 1312, + "situation": "反复呼唤被忽视的对方", + "style": "老婆", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:45:11.396310" + }, + { + "expression_id": 4138, + "situation": "对他人困境表示夸张的共情", + "style": "冻傻了走着走着被创飞的可能性更大一些", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:45:13.155944" + }, + { + "expression_id": 5077, + "situation": "重复他人话语以强化戏谑", + "style": "就赖你喵", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:45:14.716376" + }, + { + "expression_id": 3206, + "situation": "资源使用讨论中常见争议", + "style": "强调免费资源价值", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:46:54.961181" + }, + { + "expression_id": 249, + "situation": "夸大技术参数描述", + "style": "228000W闪充", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:46:56.568483" + }, + { + "expression_id": 5487, + "situation": "对技术实现表示怀疑", + "style": "思路有,能实现吗", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:46:58.617160" + }, + { + "expression_id": 2348, + "situation": "指出他人明显错误的价格信息", + "style": "使用反问句质疑,如“没这么便宜吧”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:00.416830" + }, + { + "expression_id": 693, + "situation": "无奈面对网络或服务器问题", + "style": "使用“妈的真服了”等口语化抱怨", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:01.776104" + }, + { + "expression_id": 2275, + "situation": "认同他人建议的回应", + "style": "使用 6 表示认可", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:03.328723" + }, + { + "expression_id": 2652, + "situation": "对话中常表达疑问或不确定", + "style": "使用 似乎...根本...", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:06.184957" + }, + { + "expression_id": 2944, + "situation": "游戏配置讨论中常出现性能对比", + "style": "使用'样板/全套'术语", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:08.448459" + }, + { + "expression_id": 2989, + "situation": "认可社区管理变化", + "style": "还行吧", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:10.544008" + }, + { + "expression_id": 2080, + "situation": "对计划略感惊讶。", + "style": "使用 这么强", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:11.873988" + }, + { + "expression_id": 708, + "situation": "强调功能或架构的独立性", + "style": "使用“完全分离”“独立运行”", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:15.584648" + }, + { + "expression_id": 1003, + "situation": "对技术细节难以置信", + "style": "这么牛", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:17.160705" + }, + { + "expression_id": 2200, + "situation": "行为幼稚被指出", + "style": "使用‘小孩哥’等戏谑称呼", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:18.575640" + }, + { + "expression_id": 3650, + "situation": "对他人操作表示恍然大悟", + "style": "原来如此", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:19.784532" + }, + { + "expression_id": 2445, + "situation": "质疑异常现象,提出反问", + "style": "是不是选的...", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:21.479720" + }, + { + "expression_id": 1360, + "situation": "自嘲懒惰或拖延时的幽默表达", + "style": "使用 懒狗 自称,并描述短暂行动", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:23.568435" + }, + { + "expression_id": 3084, + "situation": "冷淡附和无厘头发言", + "style": "说 有道理 / ?", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:24.719714" + }, + { + "expression_id": 2806, + "situation": "对未知事物常持质疑态度", + "style": "有啥好...", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:26.992984" + }, + { + "expression_id": 1925, + "situation": "技术讨论中直接给出操作建议", + "style": "使用 爆一下", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:28.119852" + }, + { + "expression_id": 5222, + "situation": "回应技术话题时轻描淡写", + "style": "说还不去教人写插件", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T12:47:29.935147" + }, + { + "expression_id": 1721, + "situation": "回应挑衅时", + "style": "逗狗玩/你也当真", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:19.039617" + }, + { + "expression_id": 2156, + "situation": "提及技术使用条件", + "style": "有魔法才行", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:21.348062" + }, + { + "expression_id": 1973, + "situation": "要求SELF执行程序指令时", + "style": "写一个关机程序并立即执行", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:22.991510" + }, + { + "expression_id": 5469, + "situation": "功能使用不便被多次提及", + "style": "不得不吐槽", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:26.655334" + }, + { + "expression_id": 467, + "situation": "表达认同", + "style": "应该是的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:27.671871" + }, + { + "expression_id": 2062, + "situation": "强调SELF的群体归属时", + "style": "安卓AI", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:28.702077" + }, + { + "expression_id": 5067, + "situation": "关注AI模型的版本差异与更新特性。", + "style": "Claude4.5吧", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:30.351386" + }, + { + "expression_id": 3634, + "situation": "购买新装备的动机和考虑因素。", + "style": "不然哪有钱换新xxx", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:33.575785" + }, + { + "expression_id": 5268, + "situation": "自嘲自身状态不佳", + "style": "我烂完了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:34.768094" + }, + { + "expression_id": 3890, + "situation": "揣测他人后期操作", + "style": "你这个是...了吧", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:38.615499" + }, + { + "expression_id": 2838, + "situation": "聊天情境表达强烈赞叹或惊讶。", + "style": "太吊了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:45.118985" + }, + { + "expression_id": 4144, + "situation": "软件出现异常问题时的评价描述。", + "style": "神秘化表述+缩写简称", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:46.439213" + }, + { + "expression_id": 5800, + "situation": "对他人言论感到震惊或不适", + "style": "我去这我都不敢看", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:49.119702" + }, + { + "expression_id": 2835, + "situation": "发现新功能支持", + "style": "哦原来支持了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:52.662444" + }, + { + "expression_id": 70, + "situation": "对他人给出的答案感到疑惑需要进一步了解", + "style": "有多低", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:03:58.647406" + }, + { + "expression_id": 590, + "situation": "想嘲讽对话缺乏人性", + "style": "说话和个bot一样", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:04:00.014505" + }, + { + "expression_id": 3130, + "situation": "反驳他人关于内容推荐算法的观点", + "style": "不是", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:04:02.727223" + }, + { + "expression_id": 904, + "situation": "想转移矛盾时", + "style": "你家孩子欺负我家宝宝", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:04:04.543511" + }, + { + "expression_id": 4362, + "situation": "误入群聊后解释原因", + "style": "随便点了一个", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:04:06.750398" + }, + { + "expression_id": 2829, + "situation": "对现状表达负面评价。", + "style": "要完蛋/完了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:04:10.054870" + }, + { + "expression_id": 7047, + "situation": "对离谱内容无奈回应", + "style": "确实挺让人无语的", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:26:54.967027" + }, + { + "expression_id": 14104, + "situation": "自嘲式道歉", + "style": "对不起我的好兄弟啊", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:26:57.071384" + }, + { + "expression_id": 14331, + "situation": "用帧数对比硬件性能", + "style": "鸡血就能稳110帧", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:26:58.607418" + }, + { + "expression_id": 407, + "situation": "技能缺失时沟通受阻", + "style": "不会游泳", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:00.358280" + }, + { + "expression_id": 1598, + "situation": "极端对比化解硬件焦虑", + "style": "e5e3感觉完全没有性价比了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:05.374562" + }, + { + "expression_id": 1439, + "situation": "经济压力下消费无奈", + "style": "本地赚钱本地花", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:07.391032" + }, + { + "expression_id": 4270, + "situation": "质疑配置实际可用性", + "style": "咋不能用插?", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:09.934813" + }, + { + "expression_id": 1807, + "situation": "AMD历史策略被反复提及", + "style": "快死了就卖XX", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:12.998093" + }, + { + "expression_id": 3134, + "situation": "讨论内存频率限制问题", + "style": "可惜我这个XX上不了XXX", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:15.286033" + }, + { + "expression_id": 12694, + "situation": "确认对方提议", + "style": "语气词加简短肯定", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:20.630558" + }, + { + "expression_id": 12751, + "situation": "讨论产品来源时信息模糊", + "style": "提及'OEM盘'等专业概念", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:24.510050" + }, + { + "expression_id": 13259, + "situation": "轻蔑否定他人观点", + "style": "单字回复如 hh", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:26.294114" + }, + { + "expression_id": 8063, + "situation": "引导他人前往指定地点", + "style": "在哪儿呢?让我也过去体验一下", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:27.950256" + }, + { + "expression_id": 10884, + "situation": "羡慕他人成绩", + "style": "羡慕欧神", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:30.301667" + }, + { + "expression_id": 6751, + "situation": "试图用幽默或玩梗活跃气氛", + "style": "残樱早安", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:31.926564" + }, + { + "expression_id": 5382, + "situation": "彻底崩溃,不想玩了", + "style": "这游戏我玩不动了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:33.653993" + }, + { + "expression_id": 5520, + "situation": "回应硬件兼容性问题", + "style": "都能带", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:35.142178" + }, + { + "expression_id": 12260, + "situation": "回应模糊提问时态度含糊", + "style": "答非所问+诗意回应", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:36.493653" + }, + { + "expression_id": 11226, + "situation": "聊天中表达愤怒与不满", + "style": "使用日语骂人词汇加表情符号", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:38.550163" + }, + { + "expression_id": 13120, + "situation": "被指出错误时礼貌回应", + "style": "指正", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:44.261709" + }, + { + "expression_id": 8441, + "situation": "建议方案常被忽视或拒绝", + "style": "使用\"我建议是\"开头", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:52.766043" + }, + { + "expression_id": 13458, + "situation": "简单归因操作原因", + "style": "因为包满了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:54.157555" + }, + { + "expression_id": 656, + "situation": "犹豫不决选品牌或型号", + "style": "我想买XXX", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:27:58.125073" + }, + { + "expression_id": 4452, + "situation": "回应他人提及的设备", + "style": "你那个我当时有听过吗", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:00.742094" + }, + { + "expression_id": 13255, + "situation": "认同非主流系统使用", + "style": "还是...好玩", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:03.229237" + }, + { + "expression_id": 13441, + "situation": "表达惊讶或不满", + "style": "使用粗俗表达", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:04.860968" + }, + { + "expression_id": 5957, + "situation": "价格信息实时更新", + "style": "w3-2w4", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:06.237835" + }, + { + "expression_id": 9732, + "situation": "羡慕他人经历", + "style": "使用慕等简短表达", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:08.108808" + }, + { + "expression_id": 9115, + "situation": "询问性别时回应含糊或回避", + "style": "使用'你猜'来回应", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:09.612616" + }, + { + "expression_id": 13910, + "situation": "游戏更新慢,玩家期待提升", + "style": "使用md等语气词表达不满", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:10.957259" + }, + { + "expression_id": 7319, + "situation": "用自嘲反差回应夸赞", + "style": "除了哈气什么都不会", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:15.565359" + }, + { + "expression_id": 702, + "situation": "略带嘲讽,暗藏得意", + "style": "[表情:偷笑]", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:16.565295" + }, + { + "expression_id": 4922, + "situation": "用游戏术语包装主观判断", + "style": "红温王之力起手", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:18.605203" + }, + { + "expression_id": 11947, + "situation": "转发消息以共鸣或调侃", + "style": "转发并保留原消息格式", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:20.765222" + }, + { + "expression_id": 13209, + "situation": "回忆过往经历时的对话", + "style": "用'上次这样子'开头", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:22.837351" + }, + { + "expression_id": 4431, + "situation": "提醒注意消费风险", + "style": "真得小心点才行对得起这价格", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:24.077508" + }, + { + "expression_id": 10154, + "situation": "惊讶于物品价格极低", + "style": "快和我XXX一样的价了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:27.940540" + }, + { + "expression_id": 12377, + "situation": "群内颁奖互动温馨有趣", + "style": "使用 @人名 + 最xx 的句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:30.621031" + }, + { + "expression_id": 10267, + "situation": "冷淡回应他人疑问", + "style": "我刚好出", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:34.724283" + }, + { + "expression_id": 11121, + "situation": "调侃或惊讶回应他人发言", + "style": "说:我靠,你很久之前发的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:37.468246" + }, + { + "expression_id": 13390, + "situation": "对他人发言表现出不屑态度", + "style": "你傻了吧", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:40.956306" + }, + { + "expression_id": 12585, + "situation": "自豪使用老旧设备", + "style": "还在服役,健康度还是100%", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:43.244243" + }, + { + "expression_id": 12445, + "situation": "用短词收尾或转移话题", + "style": "粥", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:44.772466" + }, + { + "expression_id": 9420, + "situation": "拒绝亲密互动请求", + "style": "不要", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:46.940296" + }, + { + "expression_id": 5451, + "situation": "澄清概念歧义", + "style": "否定性等式表达,如‘萌新≠🦐’", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:49.484077" + }, + { + "expression_id": 13988, + "situation": "用简短感叹回应他人", + "style": "回复‘哈哈哈’", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:54.757175" + }, + { + "expression_id": 6927, + "situation": "突出套餐高性价比", + "style": "送XXX", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:28:57.891739" + }, + { + "expression_id": 6982, + "situation": "以地域或机构为由限制创作自由", + "style": "要是饺子去美国拍,拍出来也是这种东西", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:02.108265" + }, + { + "expression_id": 1220, + "situation": "互动中需确认事实", + "style": "简短疑问+状态确认", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:06.852099" + }, + { + "expression_id": 13238, + "situation": "比较产品性价比时的对话", + "style": "使用 这不比...香 的对比句式", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:08.747487" + }, + { + "expression_id": 4367, + "situation": "嘲讽不合常理的言论", + "style": "油亮那种", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:12.556811" + }, + { + "expression_id": 5223, + "situation": "偏好隐晦调侃,回避直白否定", + "style": "不喜欢吃生的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:17.195185" + }, + { + "expression_id": 9939, + "situation": "吐槽电脑折腾人", + "style": "使用反问句表达观点", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:18.843517" + }, + { + "expression_id": 2545, + "situation": "回应复杂硬件配置", + "style": "双路2698b v3+-30鸡血+双铜管", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:20.451810" + }, + { + "expression_id": 14530, + "situation": "硬件损坏时用户常表达无奈与焦急", + "style": "使用'炸了'形容故障", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:23.699650" + }, + { + "expression_id": 13447, + "situation": "调侃式解读模糊信息", + "style": "用 画饼 替代 空谈或虚假承诺", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:25.251309" + }, + { + "expression_id": 2882, + "situation": "否定物品的实用价值", + "style": "买了之后纯摆设", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:29.116293" + }, + { + "expression_id": 1103, + "situation": "以冷感附和应对荒诞对话", + "style": "()", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:30.435544" + }, + { + "expression_id": 10018, + "situation": "强调理论上的可能性", + "style": "使用'理论上都会'加强语气", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:32.363156" + }, + { + "expression_id": 9095, + "situation": "暗示替代方案更优", + "style": "加XX可以买XX", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:34.163820" + }, + { + "expression_id": 1797, + "situation": "调侃对方有异常生理反应", + "style": "喘的老厉害了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:46.723560" + }, + { + "expression_id": 13804, + "situation": "对二手主板持谨慎态度", + "style": "使用'不敢碰了,别问为什么'的隐晦表达", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:49.578844" + }, + { + "expression_id": 5877, + "situation": "调侃荒诞,无奈以对", + "style": "有点逆天", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:53.114753" + }, + { + "expression_id": 7112, + "situation": "讨论硬件配置优劣", + "style": "两条8比一条8一条16强吗", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:54.683008" + }, + { + "expression_id": 2951, + "situation": "聊敏感尴尬经历", + "style": "皮燕子有点疼,不能让他进来", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:29:58.474757" + }, + { + "expression_id": 5029, + "situation": "借模糊权威支撑观点", + "style": "都还有直播的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:01.386679" + }, + { + "expression_id": 13354, + "situation": "被骗时描述经历", + "style": "使用'被坑了'表达上当", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:02.995623" + }, + { + "expression_id": 6538, + "situation": "否认参与相关活动", + "style": "不太[游戏/行为]", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:04.874409" + }, + { + "expression_id": 7418, + "situation": "反对简化方案", + "style": "这多没意思啊", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:06.458290" + }, + { + "expression_id": 9182, + "situation": "游戏内事件多场景互动描述", + "style": "使用重复强调严重性", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:09.066521" + }, + { + "expression_id": 6136, + "situation": "告知商品缺货情况", + "style": "现在没货了", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:13.161961" + }, + { + "expression_id": 225, + "situation": "解释无法分享内容时", + "style": "只有一帧可以发", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:15.785816" + }, + { + "expression_id": 3948, + "situation": "抱怨快递运费太高", + "style": "起X斤了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:17.242756" + }, + { + "expression_id": 8024, + "situation": "随意提出替代建议", + "style": "用海晶灯", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:18.970183" + }, + { + "expression_id": 7911, + "situation": "贬低国产竞品以抬高自身产品", + "style": "豆包快速响应都比ds深度思考有质量", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:20.618513" + }, + { + "expression_id": 2997, + "situation": "对他人自述反应异常", + "style": "怪", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:23.498212" + }, + { + "expression_id": 8918, + "situation": "装傻回应他人调侃", + "style": "天冷了穿袜子有问题吗", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:25.234428" + }, + { + "expression_id": 10834, + "situation": "偏好特定角色表达", + "style": "使用明确比较级表达", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:29.434354" + }, + { + "expression_id": 3385, + "situation": "文化现象常被误解或过度解读", + "style": "吹雪果然出了国外就被扒衣服了", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:33.233914" + }, + { + "expression_id": 11395, + "situation": "讨论硬件性价比时注重实用与成本平衡", + "style": "用核数对比表达优势", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T15:30:35.065827" + }, + { + "situation": "承认自己能力有限但仍愿尝试", + "style": "那我尽力23333", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:32:52.388737" + }, + { + "situation": "游戏运行卡顿影响体验", + "style": "用夸张比喻形容帧率低下", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:32:56.114331" + }, + { + "situation": "回应模糊问题,彰显冷门话题存在感", + "style": "有的", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:32:58.278949" + }, + { + "situation": "用户询问产品真实体验对比", + "style": "使用疑问句加表情符号", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:00.620736" + }, + { + "situation": "对某话题感到厌倦或无奈", + "style": "...", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:01.669811" + }, + { + "situation": "无奈应对硬件圈的荒诞日常", + "style": "难怪这么熟练", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:03.797481" + }, + { + "situation": "回应困境时轻蔑否定对方能力", + "style": "fw", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:06.650547" + }, + { + "situation": "担心硬件安装过程", + "style": "使用 好吓人 表达不安", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:08.252688" + }, + { + "situation": "低成本方案引发共鸣", + "style": "用 只有...这条路了 表示别无选择", + "suitable": true, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:09.988369" + }, + { + "situation": "试图解释词源或文化梗", + "style": "这个是日语的空耳", + "suitable": false, + "reason": null, + "evaluator": "manual", + "evaluated_at": "2025-12-26T16:33:12.429537" + } + ] +} \ No newline at end of file diff --git a/src/chat/replyer/group_generator.py b/src/chat/replyer/group_generator.py index 5c8003f0..8a07847b 100644 --- a/src/chat/replyer/group_generator.py +++ b/src/chat/replyer/group_generator.py @@ -616,107 +616,6 @@ class DefaultReplyer: logger.error(f"上下文黑话解释失败: {e}") return "" - def build_chat_history_prompts( - self, message_list_before_now: List[DatabaseMessages], target_user_id: str, sender: str - ) -> Tuple[str, str]: - """ - - Args: - message_list_before_now: 历史消息列表 - target_user_id: 目标用户ID(当前对话对象) - - Returns: - Tuple[str, str]: (核心对话prompt, 背景对话prompt) - """ - # 构建背景对话 prompt - all_dialogue_prompt = "" - if message_list_before_now: - latest_msgs = message_list_before_now[-int(global_config.chat.max_context_size) :] - all_dialogue_prompt = build_readable_messages( - latest_msgs, - replace_bot_name=True, - timestamp_mode="normal_no_YMD", - truncate=True, - ) - - return all_dialogue_prompt - - def core_background_build_chat_history_prompts( - self, message_list_before_now: List[DatabaseMessages], target_user_id: str, sender: str - ) -> Tuple[str, str]: - """ - - Args: - message_list_before_now: 历史消息列表 - target_user_id: 目标用户ID(当前对话对象) - - Returns: - Tuple[str, str]: (核心对话prompt, 背景对话prompt) - """ - core_dialogue_list: List[DatabaseMessages] = [] - bot_id = str(global_config.bot.qq_account) - - # 过滤消息:分离bot和目标用户的对话 vs 其他用户的对话 - for msg in message_list_before_now: - try: - msg_user_id = str(msg.user_info.user_id) - reply_to = msg.reply_to - _platform, reply_to_user_id = self._parse_reply_target(reply_to) - if (msg_user_id == bot_id and reply_to_user_id == target_user_id) or msg_user_id == target_user_id: - # bot 和目标用户的对话 - core_dialogue_list.append(msg) - except Exception as e: - logger.error(f"处理消息记录时出错: {msg}, 错误: {e}") - - # 构建核心对话 prompt - core_dialogue_prompt = "" - if core_dialogue_list: - # 检查最新五条消息中是否包含bot自己说的消息 - latest_5_messages = core_dialogue_list[-5:] if len(core_dialogue_list) >= 5 else core_dialogue_list - has_bot_message = any(str(msg.user_info.user_id) == bot_id for msg in latest_5_messages) - - # logger.info(f"最新五条消息:{latest_5_messages}") - # logger.info(f"最新五条消息中是否包含bot自己说的消息:{has_bot_message}") - - # 如果最新五条消息中不包含bot的消息,则返回空字符串 - if not has_bot_message: - core_dialogue_prompt = "" - else: - core_dialogue_list = core_dialogue_list[ - -int(global_config.chat.max_context_size * 0.6) : - ] # 限制消息数量 - - core_dialogue_prompt_str = build_readable_messages( - core_dialogue_list, - replace_bot_name=True, - timestamp_mode="normal_no_YMD", - read_mark=0.0, - truncate=True, - show_actions=True, - ) - core_dialogue_prompt = f"""-------------------------------- -这是上述中你和{sender}的对话摘要,内容从上面的对话中截取,便于你理解: -{core_dialogue_prompt_str} --------------------------------- -""" - - # 构建背景对话 prompt - all_dialogue_prompt = "" - if message_list_before_now: - latest_25_msgs = message_list_before_now[-int(global_config.chat.max_context_size) :] - all_dialogue_prompt_str = build_readable_messages( - latest_25_msgs, - replace_bot_name=True, - timestamp_mode="normal_no_YMD", - truncate=True, - ) - if core_dialogue_prompt: - all_dialogue_prompt = f"所有用户的发言:\n{all_dialogue_prompt_str}" - else: - all_dialogue_prompt = f"{all_dialogue_prompt_str}" - - return core_dialogue_prompt, all_dialogue_prompt - async def build_actions_prompt( self, available_actions: Dict[str, ActionInfo], chosen_actions_info: Optional[List[ActionPlannerInfo]] = None ) -> str: @@ -940,6 +839,7 @@ class DefaultReplyer: timestamp_mode="relative", read_mark=0.0, show_actions=True, + long_time_notice=True, ) # 统一黑话解释构建:根据配置选择上下文或 Planner 模式 @@ -1047,8 +947,16 @@ class DefaultReplyer: else: reply_target_block = "" - # 构建分离的对话 prompt - dialogue_prompt = self.build_chat_history_prompts(message_list_before_now_long, user_id, sender) + + if message_list_before_now_long: + latest_msgs = message_list_before_now_long[-int(global_config.chat.max_context_size) :] + dialogue_prompt = build_readable_messages( + latest_msgs, + replace_bot_name=True, + timestamp_mode="normal_no_YMD", + truncate=True, + long_time_notice=True, + ) # 获取匹配的额外prompt chat_prompt_content = self.get_chat_prompt_for_chat(chat_id) diff --git a/src/chat/replyer/private_generator.py b/src/chat/replyer/private_generator.py index 1e573df7..c0f60322 100644 --- a/src/chat/replyer/private_generator.py +++ b/src/chat/replyer/private_generator.py @@ -667,6 +667,7 @@ class PrivateReplyer: timestamp_mode="relative", read_mark=0.0, show_actions=True, + long_time_notice=True ) message_list_before_short = get_raw_msg_before_timestamp_with_chat( diff --git a/src/chat/utils/chat_message_builder.py b/src/chat/utils/chat_message_builder.py index 156322ae..4fe49589 100644 --- a/src/chat/utils/chat_message_builder.py +++ b/src/chat/utils/chat_message_builder.py @@ -370,6 +370,7 @@ def _build_readable_messages_internal( show_pic: bool = True, message_id_list: Optional[List[Tuple[str, DatabaseMessages]]] = None, pic_single: bool = False, + long_time_notice: bool = False, ) -> Tuple[str, List[Tuple[float, str, str]], Dict[str, str], int]: # sourcery skip: use-getitem-for-re-match-groups """ @@ -523,7 +524,30 @@ def _build_readable_messages_internal( # 3: 格式化为字符串 output_lines: List[str] = [] + prev_timestamp: Optional[float] = None for timestamp, name, content, is_action in detailed_message: + # 检查是否需要插入长时间间隔提示 + if long_time_notice and prev_timestamp is not None: + time_diff = timestamp - prev_timestamp + time_diff_hours = time_diff / 3600 + + # 检查是否跨天 + prev_date = time.strftime("%Y-%m-%d", time.localtime(prev_timestamp)) + current_date = time.strftime("%Y-%m-%d", time.localtime(timestamp)) + is_cross_day = prev_date != current_date + + # 如果间隔大于8小时或跨天,插入提示 + if time_diff_hours > 8 or is_cross_day: + # 格式化日期为中文格式:xxxx年xx月xx日(去掉前导零) + current_time_struct = time.localtime(timestamp) + year = current_time_struct.tm_year + month = current_time_struct.tm_mon + day = current_time_struct.tm_mday + date_str = f"{year}年{month}月{day}日" + hours_str = f"{int(time_diff_hours)}h" + notice = f"以下聊天开始时间:{date_str}。距离上一条消息过去了{hours_str}\n" + output_lines.append(notice) + readable_time = translate_timestamp_to_human_readable(timestamp, mode=timestamp_mode) # 查找消息id(如果有)并构建id_prefix @@ -536,6 +560,8 @@ def _build_readable_messages_internal( else: output_lines.append(f"{id_prefix}{readable_time}, {name}: {content}") output_lines.append("\n") # 在每个消息块后添加换行,保持可读性 + + prev_timestamp = timestamp formatted_string = "".join(output_lines).strip() @@ -651,6 +677,7 @@ async def build_readable_messages_with_list( show_pic=True, message_id_list=None, pic_single=pic_single, + long_time_notice=False, ) if not pic_single: @@ -704,6 +731,7 @@ def build_readable_messages( message_id_list: Optional[List[Tuple[str, DatabaseMessages]]] = None, remove_emoji_stickers: bool = False, pic_single: bool = False, + long_time_notice: bool = False, ) -> str: # sourcery skip: extract-method """ 将消息列表转换为可读的文本格式。 @@ -719,6 +747,7 @@ def build_readable_messages( truncate: 是否截断长消息 show_actions: 是否显示动作记录 remove_emoji_stickers: 是否移除表情包并过滤空消息 + long_time_notice: 是否在消息间隔过长(>8小时)或跨天时插入时间提示 """ # WIP HERE and BELOW ---------------------------------------------- # 创建messages的深拷贝,避免修改原始列表 @@ -812,6 +841,7 @@ def build_readable_messages( show_pic=show_pic, message_id_list=message_id_list, pic_single=pic_single, + long_time_notice=long_time_notice, ) if not pic_single: @@ -839,6 +869,7 @@ def build_readable_messages( show_pic=show_pic, message_id_list=message_id_list, pic_single=pic_single, + long_time_notice=long_time_notice, ) formatted_after, _, pic_id_mapping, _ = _build_readable_messages_internal( messages_after_mark, @@ -850,6 +881,7 @@ def build_readable_messages( show_pic=show_pic, message_id_list=message_id_list, pic_single=pic_single, + long_time_notice=long_time_notice, ) read_mark_line = "\n--- 以上消息是你已经看过,请关注以下未读的新消息---\n"