From c6dadc2872c2faa0fcb311046fa504b89eef9efa Mon Sep 17 00:00:00 2001 From: SengokuCola <1026294844@qq.com> Date: Thu, 30 Oct 2025 11:30:48 +0800 Subject: [PATCH] =?UTF-8?q?fix=EF=BC=9A=E6=88=AA=E6=96=ADLog=E4=BC=98?= =?UTF-8?q?=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/llm_models/model_client/openai_client.py | 58 +++++++++++++------- src/memory_system/Memory_chest.py | 6 +- src/memory_system/memory_management_task.py | 16 +++--- 3 files changed, 50 insertions(+), 30 deletions(-) diff --git a/src/llm_models/model_client/openai_client.py b/src/llm_models/model_client/openai_client.py index dd92b9e8..137ca78e 100644 --- a/src/llm_models/model_client/openai_client.py +++ b/src/llm_models/model_client/openai_client.py @@ -237,15 +237,8 @@ def _build_stream_api_resp( resp.tool_calls.append(ToolCall(call_id, function_name, arguments)) - # 检查 max_tokens 截断 - if finish_reason == "length": - if resp.content and resp.content.strip(): - logger.warning( - "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n" - " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" - ) - else: - logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + # 检查 max_tokens 截断(流式的告警改由处理函数统一输出,这里不再输出) + # 保留 finish_reason 仅用于上层判断 if not resp.content and not resp.tool_calls: raise EmptyResponseException() @@ -270,6 +263,7 @@ async def _default_stream_response_handler( _tool_calls_buffer: list[tuple[str, str, io.StringIO]] = [] # 工具调用缓冲区,用于存储接收到的工具调用 _usage_record = None # 使用情况记录 finish_reason: str | None = None # 记录最后的 finish_reason + _model_name: str | None = None # 记录模型名 def _insure_buffer_closed(): # 确保缓冲区被关闭 @@ -300,6 +294,9 @@ async def _default_stream_response_handler( if hasattr(event.choices[0], "finish_reason") and event.choices[0].finish_reason: finish_reason = event.choices[0].finish_reason + if hasattr(event, "model") and event.model and not _model_name: + _model_name = event.model # 记录模型名 + if hasattr(delta, "reasoning_content") and delta.reasoning_content: # type: ignore # 标记:有独立的推理内容块 _has_rc_attr_flag = True @@ -322,12 +319,34 @@ async def _default_stream_response_handler( ) try: - return _build_stream_api_resp( + resp = _build_stream_api_resp( _fc_delta_buffer, _rc_delta_buffer, _tool_calls_buffer, finish_reason=finish_reason, - ), _usage_record + ) + # 统一在这里输出 max_tokens 截断的警告,并从 resp 中读取 + if finish_reason == "length": + # 把模型名塞到 resp.raw_data,后续严格“从 resp 提取” + try: + if _model_name: + resp.raw_data = {"model": _model_name} + except Exception: + pass + model_dbg = None + try: + if isinstance(resp.raw_data, dict): + model_dbg = resp.raw_data.get("model") + except Exception: + model_dbg = None + + # 统一日志格式 + logger.info( + "模型%s因为超过最大max_token限制,可能仅输出部分内容,可视情况调整" + % (model_dbg or "") + ) + + return resp, _usage_record except Exception: # 确保缓冲区被关闭 _insure_buffer_closed() @@ -402,14 +421,13 @@ def _default_normal_response_parser( choice0 = resp.choices[0] reason = getattr(choice0, "finish_reason", None) if reason and reason == "length": - has_real_output = bool(api_response.content and api_response.content.strip()) - if has_real_output: - logger.warning( - "⚠ OpenAI 响应因达到 max_tokens 限制被部分截断,\n" - " 可能会对回复内容造成影响,建议修改模型 max_tokens 配置!" - ) - else: - logger.warning("⚠ OpenAI 响应因达到 max_tokens 限制被截断,\n 请修改模型 max_tokens 配置!") + print(resp) + _model_name = resp.model + # 统一日志格式 + logger.info( + "模型%s因为超过最大max_token限制,可能仅输出部分内容,可视情况调整" + % (_model_name or "") + ) return api_response, _usage_record except Exception as e: logger.debug(f"检查 MAX_TOKENS 截断时异常: {e}") @@ -522,7 +540,7 @@ class OpenaiClient(BaseClient): await asyncio.sleep(0.1) # 等待0.5秒后再次检查任务&中断信号量状态 # logger. - logger.debug(f"OpenAI API响应(非流式): {req_task.result()}") + # logger.debug(f"OpenAI API响应(非流式): {req_task.result()}") # logger.info(f"OpenAI请求时间: {model_info.model_identifier} {time.time() - start_time} \n{messages}") diff --git a/src/memory_system/Memory_chest.py b/src/memory_system/Memory_chest.py index d8efec8f..9404cf21 100644 --- a/src/memory_system/Memory_chest.py +++ b/src/memory_system/Memory_chest.py @@ -105,10 +105,12 @@ class MemoryChest: return 0.60 elif percentage < 1.0: return 0.50 - elif percentage < 1.2: + elif percentage < 1.5: return 0.40 + elif percentage < 2: + return 0.30 else: - return 0.35 + return 0.25 except Exception: # 发生异常时使用保守阈值 return 0.70 diff --git a/src/memory_system/memory_management_task.py b/src/memory_system/memory_management_task.py index a9212862..90b6e2ca 100644 --- a/src/memory_system/memory_management_task.py +++ b/src/memory_system/memory_management_task.py @@ -58,16 +58,16 @@ class MemoryManagementTask(AsyncTask): if percentage < 0.6: # 小于50%,每600秒执行一次 return 3600 - elif percentage < 0.8: + elif percentage < 1: # 大于等于50%,每300秒执行一次 return 1800 - elif percentage < 1.0: + elif percentage < 1.5: # 大于等于100%,每120秒执行一次 - return 300 - elif percentage < 1.2: - return 30 + return 600 + elif percentage < 1.8: + return 120 else: - return 10 + return 30 except Exception as e: logger.error(f"[记忆管理] 计算执行间隔时出错: {e}") @@ -93,10 +93,10 @@ class MemoryManagementTask(AsyncTask): logger.info(f"当前记忆数量: {current_count}/{self.max_memory_number} ({percentage:.1%})") # 当占比 > 1.6 时,持续删除直到占比 <= 1.6(越老/越新更易被删) - if percentage > 1.6: + if percentage > 2: logger.info("记忆过多,开始遗忘记忆") while True: - if percentage <= 1.6: + if percentage <= 1.8: break removed = global_memory_chest.remove_one_memory_by_age_weight() if not removed: