mirror of https://github.com/Mai-with-u/MaiBot.git
log:修改一些log
parent
1ddedc1dc6
commit
20013a1a2c
|
|
@ -1,285 +0,0 @@
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
from typing import Dict, List, Tuple, Optional
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Add project root to Python path
|
|
||||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
sys.path.insert(0, project_root)
|
|
||||||
from src.common.database.database_model import Messages, ChatStreams # noqa
|
|
||||||
|
|
||||||
|
|
||||||
def get_chat_name(chat_id: str) -> str:
|
|
||||||
"""Get chat name from chat_id by querying ChatStreams table directly"""
|
|
||||||
try:
|
|
||||||
chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id)
|
|
||||||
if chat_stream is None:
|
|
||||||
return f"未知聊天 ({chat_id})"
|
|
||||||
|
|
||||||
if chat_stream.group_name:
|
|
||||||
return f"{chat_stream.group_name} ({chat_id})"
|
|
||||||
elif chat_stream.user_nickname:
|
|
||||||
return f"{chat_stream.user_nickname}的私聊 ({chat_id})"
|
|
||||||
else:
|
|
||||||
return f"未知聊天 ({chat_id})"
|
|
||||||
except Exception:
|
|
||||||
return f"查询失败 ({chat_id})"
|
|
||||||
|
|
||||||
|
|
||||||
def format_timestamp(timestamp: float) -> str:
|
|
||||||
"""Format timestamp to readable date string"""
|
|
||||||
try:
|
|
||||||
return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
except (ValueError, OSError):
|
|
||||||
return "未知时间"
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_interest_value_distribution(messages) -> Dict[str, int]:
|
|
||||||
"""Calculate distribution of interest_value"""
|
|
||||||
distribution = {
|
|
||||||
"0.000-0.010": 0,
|
|
||||||
"0.010-0.050": 0,
|
|
||||||
"0.050-0.100": 0,
|
|
||||||
"0.100-0.500": 0,
|
|
||||||
"0.500-1.000": 0,
|
|
||||||
"1.000-2.000": 0,
|
|
||||||
"2.000-5.000": 0,
|
|
||||||
"5.000-10.000": 0,
|
|
||||||
"10.000+": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if msg.interest_value is None or msg.interest_value == 0.0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
value = float(msg.interest_value)
|
|
||||||
if value < 0.010:
|
|
||||||
distribution["0.000-0.010"] += 1
|
|
||||||
elif value < 0.050:
|
|
||||||
distribution["0.010-0.050"] += 1
|
|
||||||
elif value < 0.100:
|
|
||||||
distribution["0.050-0.100"] += 1
|
|
||||||
elif value < 0.500:
|
|
||||||
distribution["0.100-0.500"] += 1
|
|
||||||
elif value < 1.000:
|
|
||||||
distribution["0.500-1.000"] += 1
|
|
||||||
elif value < 2.000:
|
|
||||||
distribution["1.000-2.000"] += 1
|
|
||||||
elif value < 5.000:
|
|
||||||
distribution["2.000-5.000"] += 1
|
|
||||||
elif value < 10.000:
|
|
||||||
distribution["5.000-10.000"] += 1
|
|
||||||
else:
|
|
||||||
distribution["10.000+"] += 1
|
|
||||||
|
|
||||||
return distribution
|
|
||||||
|
|
||||||
|
|
||||||
def get_interest_value_stats(messages) -> Dict[str, float]:
|
|
||||||
"""Calculate basic statistics for interest_value"""
|
|
||||||
values = [
|
|
||||||
float(msg.interest_value) for msg in messages if msg.interest_value is not None and msg.interest_value != 0.0
|
|
||||||
]
|
|
||||||
|
|
||||||
if not values:
|
|
||||||
return {"count": 0, "min": 0, "max": 0, "avg": 0, "median": 0}
|
|
||||||
|
|
||||||
values.sort()
|
|
||||||
count = len(values)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"count": count,
|
|
||||||
"min": min(values),
|
|
||||||
"max": max(values),
|
|
||||||
"avg": sum(values) / count,
|
|
||||||
"median": values[count // 2] if count % 2 == 1 else (values[count // 2 - 1] + values[count // 2]) / 2,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_available_chats() -> List[Tuple[str, str, int]]:
|
|
||||||
"""Get all available chats with message counts"""
|
|
||||||
try:
|
|
||||||
# 获取所有有消息的chat_id
|
|
||||||
chat_counts = {}
|
|
||||||
for msg in Messages.select(Messages.chat_id).distinct():
|
|
||||||
chat_id = msg.chat_id
|
|
||||||
count = (
|
|
||||||
Messages.select()
|
|
||||||
.where(
|
|
||||||
(Messages.chat_id == chat_id)
|
|
||||||
& (Messages.interest_value.is_null(False))
|
|
||||||
& (Messages.interest_value != 0.0)
|
|
||||||
)
|
|
||||||
.count()
|
|
||||||
)
|
|
||||||
if count > 0:
|
|
||||||
chat_counts[chat_id] = count
|
|
||||||
|
|
||||||
# 获取聊天名称
|
|
||||||
result = []
|
|
||||||
for chat_id, count in chat_counts.items():
|
|
||||||
chat_name = get_chat_name(chat_id)
|
|
||||||
result.append((chat_id, chat_name, count))
|
|
||||||
|
|
||||||
# 按消息数量排序
|
|
||||||
result.sort(key=lambda x: x[2], reverse=True)
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
print(f"获取聊天列表失败: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def get_time_range_input() -> Tuple[Optional[float], Optional[float]]:
|
|
||||||
"""Get time range input from user"""
|
|
||||||
print("\n时间范围选择:")
|
|
||||||
print("1. 最近1天")
|
|
||||||
print("2. 最近3天")
|
|
||||||
print("3. 最近7天")
|
|
||||||
print("4. 最近30天")
|
|
||||||
print("5. 自定义时间范围")
|
|
||||||
print("6. 不限制时间")
|
|
||||||
|
|
||||||
choice = input("请选择时间范围 (1-6): ").strip()
|
|
||||||
|
|
||||||
now = time.time()
|
|
||||||
|
|
||||||
if choice == "1":
|
|
||||||
return now - 24 * 3600, now
|
|
||||||
elif choice == "2":
|
|
||||||
return now - 3 * 24 * 3600, now
|
|
||||||
elif choice == "3":
|
|
||||||
return now - 7 * 24 * 3600, now
|
|
||||||
elif choice == "4":
|
|
||||||
return now - 30 * 24 * 3600, now
|
|
||||||
elif choice == "5":
|
|
||||||
print("请输入开始时间 (格式: YYYY-MM-DD HH:MM:SS):")
|
|
||||||
start_str = input().strip()
|
|
||||||
print("请输入结束时间 (格式: YYYY-MM-DD HH:MM:SS):")
|
|
||||||
end_str = input().strip()
|
|
||||||
|
|
||||||
try:
|
|
||||||
start_time = datetime.strptime(start_str, "%Y-%m-%d %H:%M:%S").timestamp()
|
|
||||||
end_time = datetime.strptime(end_str, "%Y-%m-%d %H:%M:%S").timestamp()
|
|
||||||
return start_time, end_time
|
|
||||||
except ValueError:
|
|
||||||
print("时间格式错误,将不限制时间范围")
|
|
||||||
return None, None
|
|
||||||
else:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_interest_values(
|
|
||||||
chat_id: Optional[str] = None, start_time: Optional[float] = None, end_time: Optional[float] = None
|
|
||||||
) -> None:
|
|
||||||
"""Analyze interest values with optional filters"""
|
|
||||||
|
|
||||||
# 构建查询条件
|
|
||||||
query = Messages.select().where((Messages.interest_value.is_null(False)) & (Messages.interest_value != 0.0))
|
|
||||||
|
|
||||||
if chat_id:
|
|
||||||
query = query.where(Messages.chat_id == chat_id)
|
|
||||||
|
|
||||||
if start_time:
|
|
||||||
query = query.where(Messages.time >= start_time)
|
|
||||||
|
|
||||||
if end_time:
|
|
||||||
query = query.where(Messages.time <= end_time)
|
|
||||||
|
|
||||||
messages = list(query)
|
|
||||||
|
|
||||||
if not messages:
|
|
||||||
print("没有找到符合条件的消息")
|
|
||||||
return
|
|
||||||
|
|
||||||
# 计算统计信息
|
|
||||||
distribution = calculate_interest_value_distribution(messages)
|
|
||||||
stats = get_interest_value_stats(messages)
|
|
||||||
|
|
||||||
# 显示结果
|
|
||||||
print("\n=== Interest Value 分析结果 ===")
|
|
||||||
if chat_id:
|
|
||||||
print(f"聊天: {get_chat_name(chat_id)}")
|
|
||||||
else:
|
|
||||||
print("聊天: 全部聊天")
|
|
||||||
|
|
||||||
if start_time and end_time:
|
|
||||||
print(f"时间范围: {format_timestamp(start_time)} 到 {format_timestamp(end_time)}")
|
|
||||||
elif start_time:
|
|
||||||
print(f"时间范围: {format_timestamp(start_time)} 之后")
|
|
||||||
elif end_time:
|
|
||||||
print(f"时间范围: {format_timestamp(end_time)} 之前")
|
|
||||||
else:
|
|
||||||
print("时间范围: 不限制")
|
|
||||||
|
|
||||||
print("\n基本统计:")
|
|
||||||
print(f"有效消息数量: {stats['count']} (排除null和0值)")
|
|
||||||
print(f"最小值: {stats['min']:.3f}")
|
|
||||||
print(f"最大值: {stats['max']:.3f}")
|
|
||||||
print(f"平均值: {stats['avg']:.3f}")
|
|
||||||
print(f"中位数: {stats['median']:.3f}")
|
|
||||||
|
|
||||||
print("\nInterest Value 分布:")
|
|
||||||
total = stats["count"]
|
|
||||||
for range_name, count in distribution.items():
|
|
||||||
if count > 0:
|
|
||||||
percentage = count / total * 100
|
|
||||||
print(f"{range_name}: {count} ({percentage:.2f}%)")
|
|
||||||
|
|
||||||
|
|
||||||
def interactive_menu() -> None:
|
|
||||||
"""Interactive menu for interest value analysis"""
|
|
||||||
|
|
||||||
while True:
|
|
||||||
print("\n" + "=" * 50)
|
|
||||||
print("Interest Value 分析工具")
|
|
||||||
print("=" * 50)
|
|
||||||
print("1. 分析全部聊天")
|
|
||||||
print("2. 选择特定聊天分析")
|
|
||||||
print("q. 退出")
|
|
||||||
|
|
||||||
choice = input("\n请选择分析模式 (1-2, q): ").strip()
|
|
||||||
|
|
||||||
if choice.lower() == "q":
|
|
||||||
print("再见!")
|
|
||||||
break
|
|
||||||
|
|
||||||
chat_id = None
|
|
||||||
|
|
||||||
if choice == "2":
|
|
||||||
# 显示可用的聊天列表
|
|
||||||
chats = get_available_chats()
|
|
||||||
if not chats:
|
|
||||||
print("没有找到有interest_value数据的聊天")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"\n可用的聊天 (共{len(chats)}个):")
|
|
||||||
for i, (_cid, name, count) in enumerate(chats, 1):
|
|
||||||
print(f"{i}. {name} ({count}条有效消息)")
|
|
||||||
|
|
||||||
try:
|
|
||||||
chat_choice = int(input(f"\n请选择聊天 (1-{len(chats)}): ").strip())
|
|
||||||
if 1 <= chat_choice <= len(chats):
|
|
||||||
chat_id = chats[chat_choice - 1][0]
|
|
||||||
else:
|
|
||||||
print("无效选择")
|
|
||||||
continue
|
|
||||||
except ValueError:
|
|
||||||
print("请输入有效数字")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif choice != "1":
|
|
||||||
print("无效选择")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 获取时间范围
|
|
||||||
start_time, end_time = get_time_range_input()
|
|
||||||
|
|
||||||
# 执行分析
|
|
||||||
analyze_interest_values(chat_id, start_time, end_time)
|
|
||||||
|
|
||||||
input("\n按回车键继续...")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
interactive_menu()
|
|
||||||
|
|
@ -1,397 +0,0 @@
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from typing import Dict, List, Tuple, Optional
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
# Add project root to Python path
|
|
||||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
sys.path.insert(0, project_root)
|
|
||||||
from src.common.database.database_model import Messages, ChatStreams # noqa
|
|
||||||
|
|
||||||
|
|
||||||
def contains_emoji_or_image_tags(text: str) -> bool:
|
|
||||||
"""Check if text contains [表情包xxxxx] or [图片xxxxx] tags"""
|
|
||||||
if not text:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 检查是否包含 [表情包] 或 [图片] 标记
|
|
||||||
emoji_pattern = r"\[表情包[^\]]*\]"
|
|
||||||
image_pattern = r"\[图片[^\]]*\]"
|
|
||||||
|
|
||||||
return bool(re.search(emoji_pattern, text) or re.search(image_pattern, text))
|
|
||||||
|
|
||||||
|
|
||||||
def clean_reply_text(text: str) -> str:
|
|
||||||
"""Remove reply references like [回复 xxxx...] from text"""
|
|
||||||
if not text:
|
|
||||||
return text
|
|
||||||
|
|
||||||
# 匹配 [回复 xxxx...] 格式的内容
|
|
||||||
# 使用非贪婪匹配,匹配到第一个 ] 就停止
|
|
||||||
cleaned_text = re.sub(r"\[回复[^\]]*\]", "", text)
|
|
||||||
|
|
||||||
# 去除多余的空白字符
|
|
||||||
cleaned_text = cleaned_text.strip()
|
|
||||||
|
|
||||||
return cleaned_text
|
|
||||||
|
|
||||||
|
|
||||||
def get_chat_name(chat_id: str) -> str:
|
|
||||||
"""Get chat name from chat_id by querying ChatStreams table directly"""
|
|
||||||
try:
|
|
||||||
chat_stream = ChatStreams.get_or_none(ChatStreams.stream_id == chat_id)
|
|
||||||
if chat_stream is None:
|
|
||||||
return f"未知聊天 ({chat_id})"
|
|
||||||
|
|
||||||
if chat_stream.group_name:
|
|
||||||
return f"{chat_stream.group_name} ({chat_id})"
|
|
||||||
elif chat_stream.user_nickname:
|
|
||||||
return f"{chat_stream.user_nickname}的私聊 ({chat_id})"
|
|
||||||
else:
|
|
||||||
return f"未知聊天 ({chat_id})"
|
|
||||||
except Exception:
|
|
||||||
return f"查询失败 ({chat_id})"
|
|
||||||
|
|
||||||
|
|
||||||
def format_timestamp(timestamp: float) -> str:
|
|
||||||
"""Format timestamp to readable date string"""
|
|
||||||
try:
|
|
||||||
return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
except (ValueError, OSError):
|
|
||||||
return "未知时间"
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_text_length_distribution(messages) -> Dict[str, int]:
|
|
||||||
"""Calculate distribution of processed_plain_text length"""
|
|
||||||
distribution = {
|
|
||||||
"0": 0, # 空文本
|
|
||||||
"1-5": 0, # 极短文本
|
|
||||||
"6-10": 0, # 很短文本
|
|
||||||
"11-20": 0, # 短文本
|
|
||||||
"21-30": 0, # 较短文本
|
|
||||||
"31-50": 0, # 中短文本
|
|
||||||
"51-70": 0, # 中等文本
|
|
||||||
"71-100": 0, # 较长文本
|
|
||||||
"101-150": 0, # 长文本
|
|
||||||
"151-200": 0, # 很长文本
|
|
||||||
"201-300": 0, # 超长文本
|
|
||||||
"301-500": 0, # 极长文本
|
|
||||||
"501-1000": 0, # 巨长文本
|
|
||||||
"1000+": 0, # 超巨长文本
|
|
||||||
}
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if msg.processed_plain_text is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 排除包含表情包或图片标记的消息
|
|
||||||
if contains_emoji_or_image_tags(msg.processed_plain_text):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 清理文本中的回复引用
|
|
||||||
cleaned_text = clean_reply_text(msg.processed_plain_text)
|
|
||||||
length = len(cleaned_text)
|
|
||||||
|
|
||||||
if length == 0:
|
|
||||||
distribution["0"] += 1
|
|
||||||
elif length <= 5:
|
|
||||||
distribution["1-5"] += 1
|
|
||||||
elif length <= 10:
|
|
||||||
distribution["6-10"] += 1
|
|
||||||
elif length <= 20:
|
|
||||||
distribution["11-20"] += 1
|
|
||||||
elif length <= 30:
|
|
||||||
distribution["21-30"] += 1
|
|
||||||
elif length <= 50:
|
|
||||||
distribution["31-50"] += 1
|
|
||||||
elif length <= 70:
|
|
||||||
distribution["51-70"] += 1
|
|
||||||
elif length <= 100:
|
|
||||||
distribution["71-100"] += 1
|
|
||||||
elif length <= 150:
|
|
||||||
distribution["101-150"] += 1
|
|
||||||
elif length <= 200:
|
|
||||||
distribution["151-200"] += 1
|
|
||||||
elif length <= 300:
|
|
||||||
distribution["201-300"] += 1
|
|
||||||
elif length <= 500:
|
|
||||||
distribution["301-500"] += 1
|
|
||||||
elif length <= 1000:
|
|
||||||
distribution["501-1000"] += 1
|
|
||||||
else:
|
|
||||||
distribution["1000+"] += 1
|
|
||||||
|
|
||||||
return distribution
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_length_stats(messages) -> Dict[str, float]:
|
|
||||||
"""Calculate basic statistics for processed_plain_text length"""
|
|
||||||
lengths = []
|
|
||||||
null_count = 0
|
|
||||||
excluded_count = 0 # 被排除的消息数量
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if msg.processed_plain_text is None:
|
|
||||||
null_count += 1
|
|
||||||
elif contains_emoji_or_image_tags(msg.processed_plain_text):
|
|
||||||
# 排除包含表情包或图片标记的消息
|
|
||||||
excluded_count += 1
|
|
||||||
else:
|
|
||||||
# 清理文本中的回复引用
|
|
||||||
cleaned_text = clean_reply_text(msg.processed_plain_text)
|
|
||||||
lengths.append(len(cleaned_text))
|
|
||||||
|
|
||||||
if not lengths:
|
|
||||||
return {
|
|
||||||
"count": 0,
|
|
||||||
"null_count": null_count,
|
|
||||||
"excluded_count": excluded_count,
|
|
||||||
"min": 0,
|
|
||||||
"max": 0,
|
|
||||||
"avg": 0,
|
|
||||||
"median": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
lengths.sort()
|
|
||||||
count = len(lengths)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"count": count,
|
|
||||||
"null_count": null_count,
|
|
||||||
"excluded_count": excluded_count,
|
|
||||||
"min": min(lengths),
|
|
||||||
"max": max(lengths),
|
|
||||||
"avg": sum(lengths) / count,
|
|
||||||
"median": lengths[count // 2] if count % 2 == 1 else (lengths[count // 2 - 1] + lengths[count // 2]) / 2,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_available_chats() -> List[Tuple[str, str, int]]:
|
|
||||||
"""Get all available chats with message counts"""
|
|
||||||
try:
|
|
||||||
# 获取所有有消息的chat_id,排除特殊类型消息
|
|
||||||
chat_counts = {}
|
|
||||||
for msg in Messages.select(Messages.chat_id).distinct():
|
|
||||||
chat_id = msg.chat_id
|
|
||||||
count = (
|
|
||||||
Messages.select()
|
|
||||||
.where(
|
|
||||||
(Messages.chat_id == chat_id)
|
|
||||||
& (Messages.is_emoji != 1)
|
|
||||||
& (Messages.is_picid != 1)
|
|
||||||
& (Messages.is_command != 1)
|
|
||||||
)
|
|
||||||
.count()
|
|
||||||
)
|
|
||||||
if count > 0:
|
|
||||||
chat_counts[chat_id] = count
|
|
||||||
|
|
||||||
# 获取聊天名称
|
|
||||||
result = []
|
|
||||||
for chat_id, count in chat_counts.items():
|
|
||||||
chat_name = get_chat_name(chat_id)
|
|
||||||
result.append((chat_id, chat_name, count))
|
|
||||||
|
|
||||||
# 按消息数量排序
|
|
||||||
result.sort(key=lambda x: x[2], reverse=True)
|
|
||||||
return result
|
|
||||||
except Exception as e:
|
|
||||||
print(f"获取聊天列表失败: {e}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
def get_time_range_input() -> Tuple[Optional[float], Optional[float]]:
|
|
||||||
"""Get time range input from user"""
|
|
||||||
print("\n时间范围选择:")
|
|
||||||
print("1. 最近1天")
|
|
||||||
print("2. 最近3天")
|
|
||||||
print("3. 最近7天")
|
|
||||||
print("4. 最近30天")
|
|
||||||
print("5. 自定义时间范围")
|
|
||||||
print("6. 不限制时间")
|
|
||||||
|
|
||||||
choice = input("请选择时间范围 (1-6): ").strip()
|
|
||||||
|
|
||||||
now = time.time()
|
|
||||||
|
|
||||||
if choice == "1":
|
|
||||||
return now - 24 * 3600, now
|
|
||||||
elif choice == "2":
|
|
||||||
return now - 3 * 24 * 3600, now
|
|
||||||
elif choice == "3":
|
|
||||||
return now - 7 * 24 * 3600, now
|
|
||||||
elif choice == "4":
|
|
||||||
return now - 30 * 24 * 3600, now
|
|
||||||
elif choice == "5":
|
|
||||||
print("请输入开始时间 (格式: YYYY-MM-DD HH:MM:SS):")
|
|
||||||
start_str = input().strip()
|
|
||||||
print("请输入结束时间 (格式: YYYY-MM-DD HH:MM:SS):")
|
|
||||||
end_str = input().strip()
|
|
||||||
|
|
||||||
try:
|
|
||||||
start_time = datetime.strptime(start_str, "%Y-%m-%d %H:%M:%S").timestamp()
|
|
||||||
end_time = datetime.strptime(end_str, "%Y-%m-%d %H:%M:%S").timestamp()
|
|
||||||
return start_time, end_time
|
|
||||||
except ValueError:
|
|
||||||
print("时间格式错误,将不限制时间范围")
|
|
||||||
return None, None
|
|
||||||
else:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
def get_top_longest_messages(messages, top_n: int = 10) -> List[Tuple[str, int, str, str]]:
|
|
||||||
"""Get top N longest messages"""
|
|
||||||
message_lengths = []
|
|
||||||
|
|
||||||
for msg in messages:
|
|
||||||
if msg.processed_plain_text is not None:
|
|
||||||
# 排除包含表情包或图片标记的消息
|
|
||||||
if contains_emoji_or_image_tags(msg.processed_plain_text):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 清理文本中的回复引用
|
|
||||||
cleaned_text = clean_reply_text(msg.processed_plain_text)
|
|
||||||
length = len(cleaned_text)
|
|
||||||
chat_name = get_chat_name(msg.chat_id)
|
|
||||||
time_str = format_timestamp(msg.time)
|
|
||||||
# 截取前100个字符作为预览
|
|
||||||
preview = cleaned_text[:100] + "..." if len(cleaned_text) > 100 else cleaned_text
|
|
||||||
message_lengths.append((chat_name, length, time_str, preview))
|
|
||||||
|
|
||||||
# 按长度排序,取前N个
|
|
||||||
message_lengths.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
return message_lengths[:top_n]
|
|
||||||
|
|
||||||
|
|
||||||
def analyze_text_lengths(
|
|
||||||
chat_id: Optional[str] = None, start_time: Optional[float] = None, end_time: Optional[float] = None
|
|
||||||
) -> None:
|
|
||||||
"""Analyze processed_plain_text lengths with optional filters"""
|
|
||||||
|
|
||||||
# 构建查询条件,排除特殊类型的消息
|
|
||||||
query = Messages.select().where((Messages.is_emoji != 1) & (Messages.is_picid != 1) & (Messages.is_command != 1))
|
|
||||||
|
|
||||||
if chat_id:
|
|
||||||
query = query.where(Messages.chat_id == chat_id)
|
|
||||||
|
|
||||||
if start_time:
|
|
||||||
query = query.where(Messages.time >= start_time)
|
|
||||||
|
|
||||||
if end_time:
|
|
||||||
query = query.where(Messages.time <= end_time)
|
|
||||||
|
|
||||||
messages = list(query)
|
|
||||||
|
|
||||||
if not messages:
|
|
||||||
print("没有找到符合条件的消息")
|
|
||||||
return
|
|
||||||
|
|
||||||
# 计算统计信息
|
|
||||||
distribution = calculate_text_length_distribution(messages)
|
|
||||||
stats = get_text_length_stats(messages)
|
|
||||||
top_longest = get_top_longest_messages(messages, 10)
|
|
||||||
|
|
||||||
# 显示结果
|
|
||||||
print("\n=== Processed Plain Text 长度分析结果 ===")
|
|
||||||
print("(已排除表情、图片ID、命令类型消息,已排除[表情包]和[图片]标记消息,已清理回复引用)")
|
|
||||||
if chat_id:
|
|
||||||
print(f"聊天: {get_chat_name(chat_id)}")
|
|
||||||
else:
|
|
||||||
print("聊天: 全部聊天")
|
|
||||||
|
|
||||||
if start_time and end_time:
|
|
||||||
print(f"时间范围: {format_timestamp(start_time)} 到 {format_timestamp(end_time)}")
|
|
||||||
elif start_time:
|
|
||||||
print(f"时间范围: {format_timestamp(start_time)} 之后")
|
|
||||||
elif end_time:
|
|
||||||
print(f"时间范围: {format_timestamp(end_time)} 之前")
|
|
||||||
else:
|
|
||||||
print("时间范围: 不限制")
|
|
||||||
|
|
||||||
print("\n基本统计:")
|
|
||||||
print(f"总消息数量: {len(messages)}")
|
|
||||||
print(f"有文本消息数量: {stats['count']}")
|
|
||||||
print(f"空文本消息数量: {stats['null_count']}")
|
|
||||||
print(f"被排除的消息数量: {stats['excluded_count']}")
|
|
||||||
if stats["count"] > 0:
|
|
||||||
print(f"最短长度: {stats['min']} 字符")
|
|
||||||
print(f"最长长度: {stats['max']} 字符")
|
|
||||||
print(f"平均长度: {stats['avg']:.2f} 字符")
|
|
||||||
print(f"中位数长度: {stats['median']:.2f} 字符")
|
|
||||||
|
|
||||||
print("\n文本长度分布:")
|
|
||||||
total = stats["count"]
|
|
||||||
if total > 0:
|
|
||||||
for range_name, count in distribution.items():
|
|
||||||
if count > 0:
|
|
||||||
percentage = count / total * 100
|
|
||||||
print(f"{range_name} 字符: {count} ({percentage:.2f}%)")
|
|
||||||
|
|
||||||
# 显示最长的消息
|
|
||||||
if top_longest:
|
|
||||||
print(f"\n最长的 {len(top_longest)} 条消息:")
|
|
||||||
for i, (chat_name, length, time_str, preview) in enumerate(top_longest, 1):
|
|
||||||
print(f"{i}. [{chat_name}] {time_str}")
|
|
||||||
print(f" 长度: {length} 字符")
|
|
||||||
print(f" 预览: {preview}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def interactive_menu() -> None:
|
|
||||||
"""Interactive menu for text length analysis"""
|
|
||||||
|
|
||||||
while True:
|
|
||||||
print("\n" + "=" * 50)
|
|
||||||
print("Processed Plain Text 长度分析工具")
|
|
||||||
print("=" * 50)
|
|
||||||
print("1. 分析全部聊天")
|
|
||||||
print("2. 选择特定聊天分析")
|
|
||||||
print("q. 退出")
|
|
||||||
|
|
||||||
choice = input("\n请选择分析模式 (1-2, q): ").strip()
|
|
||||||
|
|
||||||
if choice.lower() == "q":
|
|
||||||
print("再见!")
|
|
||||||
break
|
|
||||||
|
|
||||||
chat_id = None
|
|
||||||
|
|
||||||
if choice == "2":
|
|
||||||
# 显示可用的聊天列表
|
|
||||||
chats = get_available_chats()
|
|
||||||
if not chats:
|
|
||||||
print("没有找到聊天数据")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"\n可用的聊天 (共{len(chats)}个):")
|
|
||||||
for i, (_cid, name, count) in enumerate(chats, 1):
|
|
||||||
print(f"{i}. {name} ({count}条消息)")
|
|
||||||
|
|
||||||
try:
|
|
||||||
chat_choice = int(input(f"\n请选择聊天 (1-{len(chats)}): ").strip())
|
|
||||||
if 1 <= chat_choice <= len(chats):
|
|
||||||
chat_id = chats[chat_choice - 1][0]
|
|
||||||
else:
|
|
||||||
print("无效选择")
|
|
||||||
continue
|
|
||||||
except ValueError:
|
|
||||||
print("请输入有效数字")
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif choice != "1":
|
|
||||||
print("无效选择")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 获取时间范围
|
|
||||||
start_time, end_time = get_time_range_input()
|
|
||||||
|
|
||||||
# 执行分析
|
|
||||||
analyze_text_lengths(chat_id, start_time, end_time)
|
|
||||||
|
|
||||||
input("\n按回车键继续...")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
interactive_menu()
|
|
||||||
|
|
@ -492,7 +492,7 @@ class BrainPlanner:
|
||||||
action.action_data = action.action_data or {}
|
action.action_data = action.action_data or {}
|
||||||
action.action_data["loop_start_time"] = loop_start_time
|
action.action_data["loop_start_time"] = loop_start_time
|
||||||
|
|
||||||
logger.info(
|
logger.debug(
|
||||||
f"{self.log_prefix}规划器决定执行{len(actions)}个动作: {' '.join([a.action_type for a in actions])}"
|
f"{self.log_prefix}规划器决定执行{len(actions)}个动作: {' '.join([a.action_type for a in actions])}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -154,13 +154,16 @@ class HeartFChatting:
|
||||||
# 记录循环信息和计时器结果
|
# 记录循环信息和计时器结果
|
||||||
timer_strings = []
|
timer_strings = []
|
||||||
for name, elapsed in cycle_timers.items():
|
for name, elapsed in cycle_timers.items():
|
||||||
formatted_time = f"{elapsed * 1000:.2f}毫秒" if elapsed < 1 else f"{elapsed:.2f}秒"
|
if elapsed < 0.1:
|
||||||
|
# 不显示小于0.1秒的计时器
|
||||||
|
continue
|
||||||
|
formatted_time = f"{elapsed:.2f}秒"
|
||||||
timer_strings.append(f"{name}: {formatted_time}")
|
timer_strings.append(f"{name}: {formatted_time}")
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{self.log_prefix} 第{self._current_cycle_detail.cycle_id}次思考,"
|
f"{self.log_prefix} 第{self._current_cycle_detail.cycle_id}次思考,"
|
||||||
f"耗时: {self._current_cycle_detail.end_time - self._current_cycle_detail.start_time:.1f}秒" # type: ignore
|
f"耗时: {self._current_cycle_detail.end_time - self._current_cycle_detail.start_time:.1f}秒;" # type: ignore
|
||||||
+ (f"\n详情: {'; '.join(timer_strings)}" if timer_strings else "")
|
+ (f"详情: {'; '.join(timer_strings)}" if timer_strings else "")
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _loopbody(self): # sourcery skip: hoist-if-from-if
|
async def _loopbody(self): # sourcery skip: hoist-if-from-if
|
||||||
|
|
@ -346,8 +349,8 @@ class HeartFChatting:
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{self.log_prefix}决定执行{len(action_to_use_info)}个动作: {' '.join([a.action_type for a in action_to_use_info])}"
|
f"{self.log_prefix} 决定执行{len(action_to_use_info)}个动作: {' '.join([a.action_type for a in action_to_use_info])}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 3. 并行执行所有动作
|
# 3. 并行执行所有动作
|
||||||
action_tasks = [
|
action_tasks = [
|
||||||
|
|
|
||||||
|
|
@ -215,29 +215,30 @@ class DefaultReplyer:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return False, llm_response
|
return False, llm_response
|
||||||
|
|
||||||
async def build_relation_info(self, chat_content: str, sender: str, person_list: List[Person]):
|
#移动到 relation插件中构建
|
||||||
if not global_config.relationship.enable_relationship:
|
# async def build_relation_info(self, chat_content: str, sender: str, person_list: List[Person]):
|
||||||
return ""
|
# if not global_config.relationship.enable_relationship:
|
||||||
|
# return ""
|
||||||
|
|
||||||
if not sender:
|
# if not sender:
|
||||||
return ""
|
# return ""
|
||||||
|
|
||||||
if sender == global_config.bot.nickname:
|
# if sender == global_config.bot.nickname:
|
||||||
return ""
|
# return ""
|
||||||
|
|
||||||
# 获取用户ID
|
# # 获取用户ID
|
||||||
person = Person(person_name=sender)
|
# person = Person(person_name=sender)
|
||||||
if not is_person_known(person_name=sender):
|
# if not is_person_known(person_name=sender):
|
||||||
logger.warning(f"未找到用户 {sender} 的ID,跳过信息提取")
|
# logger.warning(f"未找到用户 {sender} 的ID,跳过信息提取")
|
||||||
return f"你完全不认识{sender},不理解ta的相关信息。"
|
# return f"你完全不认识{sender},不理解ta的相关信息。"
|
||||||
|
|
||||||
sender_relation = await person.build_relationship(chat_content)
|
# sender_relation = await person.build_relationship(chat_content)
|
||||||
others_relation = ""
|
# others_relation = ""
|
||||||
for person in person_list:
|
# for person in person_list:
|
||||||
person_relation = await person.build_relationship()
|
# person_relation = await person.build_relationship()
|
||||||
others_relation += person_relation
|
# others_relation += person_relation
|
||||||
|
|
||||||
return f"{sender_relation}\n{others_relation}"
|
# return f"{sender_relation}\n{others_relation}"
|
||||||
|
|
||||||
async def build_expression_habits(self, chat_history: str, target: str) -> Tuple[str, List[int]]:
|
async def build_expression_habits(self, chat_history: str, target: str) -> Tuple[str, List[int]]:
|
||||||
# sourcery skip: for-append-to-extend
|
# sourcery skip: for-append-to-extend
|
||||||
|
|
@ -680,8 +681,8 @@ class DefaultReplyer:
|
||||||
if person.is_known:
|
if person.is_known:
|
||||||
person_list_short.append(person)
|
person_list_short.append(person)
|
||||||
|
|
||||||
for person in person_list_short:
|
# for person in person_list_short:
|
||||||
print(person.person_name)
|
# print(person.person_name)
|
||||||
|
|
||||||
chat_talking_prompt_short = build_readable_messages(
|
chat_talking_prompt_short = build_readable_messages(
|
||||||
message_list_before_short,
|
message_list_before_short,
|
||||||
|
|
@ -956,7 +957,7 @@ class DefaultReplyer:
|
||||||
prompt
|
prompt
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"replyer生成内容: {content}")
|
logger.info(f"使用{model_name}生成回复内容: {content}")
|
||||||
return content, reasoning_content, model_name, tool_calls
|
return content, reasoning_content, model_name, tool_calls
|
||||||
|
|
||||||
async def get_prompt_info(self, message: str, sender: str, target: str):
|
async def get_prompt_info(self, message: str, sender: str, target: str):
|
||||||
|
|
|
||||||
|
|
@ -563,8 +563,8 @@ class PrivateReplyer:
|
||||||
if person.is_known:
|
if person.is_known:
|
||||||
person_list_short.append(person)
|
person_list_short.append(person)
|
||||||
|
|
||||||
for person in person_list_short:
|
# for person in person_list_short:
|
||||||
print(person.person_name)
|
# print(person.person_name)
|
||||||
|
|
||||||
chat_talking_prompt_short = build_readable_messages(
|
chat_talking_prompt_short = build_readable_messages(
|
||||||
message_list_before_short,
|
message_list_before_short,
|
||||||
|
|
@ -829,7 +829,7 @@ class PrivateReplyer:
|
||||||
prompt
|
prompt
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"replyer生成内容: {content}")
|
logger.info(f"使用 {model_name} 生成回复内容: {content}")
|
||||||
return content, reasoning_content, model_name, tool_calls
|
return content, reasoning_content, model_name, tool_calls
|
||||||
|
|
||||||
async def get_prompt_info(self, message: str, sender: str, target: str):
|
async def get_prompt_info(self, message: str, sender: str, target: str):
|
||||||
|
|
|
||||||
|
|
@ -363,8 +363,8 @@ MODULE_COLORS = {
|
||||||
"planner": "\033[36m",
|
"planner": "\033[36m",
|
||||||
"relation": "\033[38;5;139m", # 柔和的紫色,不刺眼
|
"relation": "\033[38;5;139m", # 柔和的紫色,不刺眼
|
||||||
# 聊天相关模块
|
# 聊天相关模块
|
||||||
"normal_chat": "\033[38;5;81m", # 亮蓝绿色
|
"hfc": "\033[38;5;175m", # 柔和的粉色,不显眼但保持粉色系
|
||||||
"heartflow": "\033[38;5;175m", # 柔和的粉色,不显眼但保持粉色系
|
"bc": "\033[38;5;175m", # 柔和的粉色,不显眼但保持粉色系
|
||||||
"sub_heartflow": "\033[38;5;207m", # 粉紫色
|
"sub_heartflow": "\033[38;5;207m", # 粉紫色
|
||||||
"subheartflow_manager": "\033[38;5;201m", # 深粉色
|
"subheartflow_manager": "\033[38;5;201m", # 深粉色
|
||||||
"background_tasks": "\033[38;5;240m", # 灰色
|
"background_tasks": "\033[38;5;240m", # 灰色
|
||||||
|
|
@ -372,8 +372,6 @@ MODULE_COLORS = {
|
||||||
"chat_stream": "\033[38;5;51m", # 亮青色
|
"chat_stream": "\033[38;5;51m", # 亮青色
|
||||||
"message_storage": "\033[38;5;33m", # 深蓝色
|
"message_storage": "\033[38;5;33m", # 深蓝色
|
||||||
"expressor": "\033[38;5;166m", # 橙色
|
"expressor": "\033[38;5;166m", # 橙色
|
||||||
# 专注聊天模块
|
|
||||||
"memory_activator": "\033[38;5;117m", # 天蓝色
|
|
||||||
# 插件系统
|
# 插件系统
|
||||||
"plugins": "\033[31m", # 红色
|
"plugins": "\033[31m", # 红色
|
||||||
"plugin_api": "\033[33m", # 黄色
|
"plugin_api": "\033[33m", # 黄色
|
||||||
|
|
|
||||||
|
|
@ -148,6 +148,8 @@ class LLMRequest:
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
|
logger.debug(f"LLM请求总耗时: {time.time() - start_time}")
|
||||||
|
logger.debug(f"LLM生成内容: {response}")
|
||||||
|
|
||||||
content = response.content
|
content = response.content
|
||||||
reasoning_content = response.reasoning_content or ""
|
reasoning_content = response.reasoning_content or ""
|
||||||
tool_calls = response.tool_calls
|
tool_calls = response.tool_calls
|
||||||
|
|
|
||||||
|
|
@ -466,8 +466,8 @@ class Person:
|
||||||
<分类1><分类2><分类3>......
|
<分类1><分类2><分类3>......
|
||||||
如果没有相关的分类,请输出<none>"""
|
如果没有相关的分类,请输出<none>"""
|
||||||
response, _ = await relation_selection_model.generate_response_async(prompt)
|
response, _ = await relation_selection_model.generate_response_async(prompt)
|
||||||
print(prompt)
|
# print(prompt)
|
||||||
print(response)
|
# print(response)
|
||||||
category_list = extract_categories_from_response(response)
|
category_list = extract_categories_from_response(response)
|
||||||
if "none" not in category_list:
|
if "none" not in category_list:
|
||||||
for category in category_list:
|
for category in category_list:
|
||||||
|
|
|
||||||
|
|
@ -66,8 +66,8 @@ async def generate_with_model(
|
||||||
Tuple[bool, str, str, str]: (是否成功, 生成的内容, 推理过程, 模型名称)
|
Tuple[bool, str, str, str]: (是否成功, 生成的内容, 推理过程, 模型名称)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
model_name_list = model_config.model_list
|
# model_name_list = model_config.model_list
|
||||||
logger.info(f"[LLMAPI] 使用模型集合 {model_name_list} 生成内容")
|
# logger.info(f"[LLMAPI] 使用模型集合 {model_name_list} 生成内容")
|
||||||
logger.debug(f"[LLMAPI] 完整提示词: {prompt}")
|
logger.debug(f"[LLMAPI] 完整提示词: {prompt}")
|
||||||
|
|
||||||
llm_request = LLMRequest(model_set=model_config, request_type=request_type)
|
llm_request = LLMRequest(model_set=model_config, request_type=request_type)
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ async def _send_to_target(
|
||||||
anchor_message: Union["MessageRecv", None] = None
|
anchor_message: Union["MessageRecv", None] = None
|
||||||
if reply_message:
|
if reply_message:
|
||||||
anchor_message = db_message_to_message_recv(reply_message)
|
anchor_message = db_message_to_message_recv(reply_message)
|
||||||
logger.info(f"[SendAPI] 找到匹配的回复消息,发送者: {anchor_message.message_info.user_info.user_id}") # type: ignore
|
logger.debug(f"[SendAPI] 找到匹配的回复消息,发送者: {anchor_message.message_info.user_info.user_id}") # type: ignore
|
||||||
if anchor_message:
|
if anchor_message:
|
||||||
anchor_message.update_chat_stream(target_stream)
|
anchor_message.update_chat_stream(target_stream)
|
||||||
assert anchor_message.message_info.user_info, "用户信息缺失"
|
assert anchor_message.message_info.user_info, "用户信息缺失"
|
||||||
|
|
|
||||||
|
|
@ -180,7 +180,6 @@ class ToolExecutor:
|
||||||
|
|
||||||
tool_results.append(tool_info)
|
tool_results.append(tool_info)
|
||||||
used_tools.append(tool_name)
|
used_tools.append(tool_name)
|
||||||
logger.info(f"{self.log_prefix}工具{tool_name}执行成功,类型: {tool_info['type']}")
|
|
||||||
preview = content[:200]
|
preview = content[:200]
|
||||||
logger.debug(f"{self.log_prefix}工具{tool_name}结果内容: {preview}...")
|
logger.debug(f"{self.log_prefix}工具{tool_name}结果内容: {preview}...")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue