fix:截断Log优化

pull/1336/head
SengokuCola 2025-10-30 11:30:48 +08:00
parent e0aa745ba4
commit c6dadc2872
3 changed files with 50 additions and 30 deletions

View File

@ -237,15 +237,8 @@ def _build_stream_api_resp(
resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
# 检查 max_tokens 截断
if finish_reason == "length":
if resp.content and resp.content.strip():
logger.warning(
"⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n"
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!"
)
else:
logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
# 检查 max_tokens 截断(流式的告警改由处理函数统一输出,这里不再输出)
# 保留 finish_reason 仅用于上层判断
if not resp.content and not resp.tool_calls:
raise EmptyResponseException()
@ -270,6 +263,7 @@ async def _default_stream_response_handler(
_tool_calls_buffer: list[tuple[str, str, io.StringIO]] = [] # 工具调用缓冲区,用于存储接收到的工具调用
_usage_record = None # 使用情况记录
finish_reason: str | None = None # 记录最后的 finish_reason
_model_name: str | None = None # 记录模型名
def _insure_buffer_closed():
# 确保缓冲区被关闭
@ -300,6 +294,9 @@ async def _default_stream_response_handler(
if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason:
finish_reason = event.choices[0].finish_reason
if hasattr(event, "model") and event.model and not _model_name:
_model_name = event.model # 记录模型名
if hasattr(delta, "reasoning_content") and delta.reasoning_content: # type: ignore
# 标记:有独立的推理内容块
_has_rc_attr_flag = True
@ -322,12 +319,34 @@ async def _default_stream_response_handler(
)
try:
return _build_stream_api_resp(
resp = _build_stream_api_resp(
_fc_delta_buffer,
_rc_delta_buffer,
_tool_calls_buffer,
finish_reason=finish_reason,
), _usage_record
)
# 统一在这里输出 max_tokens 截断的警告,并从 resp 中读取
if finish_reason == "length":
# 把模型名塞到 resp.raw_data后续严格“从 resp 提取”
try:
if _model_name:
resp.raw_data = {"model": _model_name}
except Exception:
pass
model_dbg = None
try:
if isinstance(resp.raw_data, dict):
model_dbg = resp.raw_data.get("model")
except Exception:
model_dbg = None
# 统一日志格式
logger.info(
"模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
% (model_dbg or "")
)
return resp, _usage_record
except Exception:
# 确保缓冲区被关闭
_insure_buffer_closed()
@ -402,14 +421,13 @@ def _default_normal_response_parser(
choice0 = resp.choices[0]
reason = getattr(choice0, "finish_reason", None)
if reason and reason == "length":
has_real_output = bool(api_response.content and api_response.content.strip())
if has_real_output:
logger.warning(
"⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n"
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!"
)
else:
logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
print(resp)
_model_name = resp.model
# 统一日志格式
logger.info(
"模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
% (_model_name or "")
)
return api_response, _usage_record
except Exception as e:
logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}")
@ -522,7 +540,7 @@ class OpenaiClient(BaseClient):
await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态
# logger.
logger.debug(f"OpenAI API响应(非流式): {req_task.result()}")
# logger.debug(f"OpenAI API响应(非流式): {req_task.result()}")
# logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}")

View File

@ -105,10 +105,12 @@ class MemoryChest:
return 0.60
elif percentage < 1.0:
return 0.50
elif percentage < 1.2:
elif percentage < 1.5:
return 0.40
elif percentage < 2:
return 0.30
else:
return 0.35
return 0.25
except Exception:
# 发生异常时使用保守阈值
return 0.70

View File

@ -58,16 +58,16 @@ class MemoryManagementTask(AsyncTask):
if percentage < 0.6:
# 小于50%每600秒执行一次
return 3600
elif percentage < 0.8:
elif percentage < 1:
# 大于等于50%每300秒执行一次
return 1800
elif percentage < 1.0:
elif percentage < 1.5:
# 大于等于100%每120秒执行一次
return 300
elif percentage < 1.2:
return 30
return 600
elif percentage < 1.8:
return 120
else:
return 10
return 30
except Exception as e:
logger.error(f"[记忆管理] 计算执行间隔时出错: {e}")
@ -93,10 +93,10 @@ class MemoryManagementTask(AsyncTask):
logger.info(f"当前记忆数量: {current_count}/{self.max_memory_number} ({percentage:.1%})")
# 当占比 > 1.6 时,持续删除直到占比 <= 1.6(越老/越新更易被删)
if percentage > 1.6:
if percentage > 2:
logger.info("记忆过多,开始遗忘记忆")
while True:
if percentage <= 1.6:
if percentage <= 1.8:
break
removed = global_memory_chest.remove_one_memory_by_age_weight()
if not removed: