fix:截断Log优化

pull/1336/head
SengokuCola 2025-10-30 11:30:48 +08:00
parent e0aa745ba4
commit c6dadc2872
3 changed files with 50 additions and 30 deletions

View File

@ -237,15 +237,8 @@ def _build_stream_api_resp(
resp.tool_calls.append(ToolCall(call_id, function_name, arguments)) resp.tool_calls.append(ToolCall(call_id, function_name, arguments))
# 检查 max_tokens 截断 # 检查 max_tokens 截断(流式的告警改由处理函数统一输出,这里不再输出)
if finish_reason == "length": # 保留 finish_reason 仅用于上层判断
if resp.content and resp.content.strip():
logger.warning(
"⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n"
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!"
)
else:
logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
if not resp.content and not resp.tool_calls: if not resp.content and not resp.tool_calls:
raise EmptyResponseException() raise EmptyResponseException()
@ -270,6 +263,7 @@ async def _default_stream_response_handler(
_tool_calls_buffer: list[tuple[str, str, io.StringIO]] = [] # 工具调用缓冲区,用于存储接收到的工具调用 _tool_calls_buffer: list[tuple[str, str, io.StringIO]] = [] # 工具调用缓冲区,用于存储接收到的工具调用
_usage_record = None # 使用情况记录 _usage_record = None # 使用情况记录
finish_reason: str | None = None # 记录最后的 finish_reason finish_reason: str | None = None # 记录最后的 finish_reason
_model_name: str | None = None # 记录模型名
def _insure_buffer_closed(): def _insure_buffer_closed():
# 确保缓冲区被关闭 # 确保缓冲区被关闭
@ -300,6 +294,9 @@ async def _default_stream_response_handler(
if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason: if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason:
finish_reason = event.choices[0].finish_reason finish_reason = event.choices[0].finish_reason
if hasattr(event, "model") and event.model and not _model_name:
_model_name = event.model # 记录模型名
if hasattr(delta, "reasoning_content") and delta.reasoning_content: # type: ignore if hasattr(delta, "reasoning_content") and delta.reasoning_content: # type: ignore
# 标记:有独立的推理内容块 # 标记:有独立的推理内容块
_has_rc_attr_flag = True _has_rc_attr_flag = True
@ -322,12 +319,34 @@ async def _default_stream_response_handler(
) )
try: try:
return _build_stream_api_resp( resp = _build_stream_api_resp(
_fc_delta_buffer, _fc_delta_buffer,
_rc_delta_buffer, _rc_delta_buffer,
_tool_calls_buffer, _tool_calls_buffer,
finish_reason=finish_reason, finish_reason=finish_reason,
), _usage_record )
# 统一在这里输出 max_tokens 截断的警告,并从 resp 中读取
if finish_reason == "length":
# 把模型名塞到 resp.raw_data后续严格“从 resp 提取”
try:
if _model_name:
resp.raw_data = {"model": _model_name}
except Exception:
pass
model_dbg = None
try:
if isinstance(resp.raw_data, dict):
model_dbg = resp.raw_data.get("model")
except Exception:
model_dbg = None
# 统一日志格式
logger.info(
"模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
% (model_dbg or "")
)
return resp, _usage_record
except Exception: except Exception:
# 确保缓冲区被关闭 # 确保缓冲区被关闭
_insure_buffer_closed() _insure_buffer_closed()
@ -402,14 +421,13 @@ def _default_normal_response_parser(
choice0 = resp.choices[0] choice0 = resp.choices[0]
reason = getattr(choice0, "finish_reason", None) reason = getattr(choice0, "finish_reason", None)
if reason and reason == "length": if reason and reason == "length":
has_real_output = bool(api_response.content and api_response.content.strip()) print(resp)
if has_real_output: _model_name = resp.model
logger.warning( # 统一日志格式
"⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n" logger.info(
" 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" "模型%s因为超过最大max_token限制可能仅输出部分内容可视情况调整"
) % (_model_name or "")
else: )
logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!")
return api_response, _usage_record return api_response, _usage_record
except Exception as e: except Exception as e:
logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}") logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}")
@ -522,7 +540,7 @@ class OpenaiClient(BaseClient):
await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态 await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态
# logger. # logger.
logger.debug(f"OpenAI API响应(非流式): {req_task.result()}") # logger.debug(f"OpenAI API响应(非流式): {req_task.result()}")
# logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}") # logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}")

View File

@ -105,10 +105,12 @@ class MemoryChest:
return 0.60 return 0.60
elif percentage < 1.0: elif percentage < 1.0:
return 0.50 return 0.50
elif percentage < 1.2: elif percentage < 1.5:
return 0.40 return 0.40
elif percentage < 2:
return 0.30
else: else:
return 0.35 return 0.25
except Exception: except Exception:
# 发生异常时使用保守阈值 # 发生异常时使用保守阈值
return 0.70 return 0.70

View File

@ -58,16 +58,16 @@ class MemoryManagementTask(AsyncTask):
if percentage < 0.6: if percentage < 0.6:
# 小于50%每600秒执行一次 # 小于50%每600秒执行一次
return 3600 return 3600
elif percentage < 0.8: elif percentage < 1:
# 大于等于50%每300秒执行一次 # 大于等于50%每300秒执行一次
return 1800 return 1800
elif percentage < 1.0: elif percentage < 1.5:
# 大于等于100%每120秒执行一次 # 大于等于100%每120秒执行一次
return 300 return 600
elif percentage < 1.2: elif percentage < 1.8:
return 30 return 120
else: else:
return 10 return 30
except Exception as e: except Exception as e:
logger.error(f"[记忆管理] 计算执行间隔时出错: {e}") logger.error(f"[记忆管理] 计算执行间隔时出错: {e}")
@ -93,10 +93,10 @@ class MemoryManagementTask(AsyncTask):
logger.info(f"当前记忆数量: {current_count}/{self.max_memory_number} ({percentage:.1%})") logger.info(f"当前记忆数量: {current_count}/{self.max_memory_number} ({percentage:.1%})")
# 当占比 > 1.6 时,持续删除直到占比 <= 1.6(越老/越新更易被删) # 当占比 > 1.6 时,持续删除直到占比 <= 1.6(越老/越新更易被删)
if percentage > 1.6: if percentage > 2:
logger.info("记忆过多,开始遗忘记忆") logger.info("记忆过多,开始遗忘记忆")
while True: while True:
if percentage <= 1.6: if percentage <= 1.8:
break break
removed = global_memory_chest.remove_one_memory_by_age_weight() removed = global_memory_chest.remove_one_memory_by_age_weight()
if not removed: if not removed: