mirror of https://github.com/Mai-with-u/MaiBot.git
修复负载均衡
parent
15d6a319d3
commit
fad8b82d8b
|
|
@ -26,18 +26,6 @@ install(extra_lines=3)
|
||||||
|
|
||||||
logger = get_logger("model_utils")
|
logger = get_logger("model_utils")
|
||||||
|
|
||||||
# 常见Error Code Mapping
|
|
||||||
error_code_mapping = {
|
|
||||||
400: "参数不正确",
|
|
||||||
401: "API key 错误,认证失败,请检查 config/model_config.toml 中的配置是否正确",
|
|
||||||
402: "账号余额不足",
|
|
||||||
403: "需要实名,或余额不足",
|
|
||||||
404: "Not Found",
|
|
||||||
429: "请求过于频繁,请稍后再试",
|
|
||||||
500: "服务器内部故障",
|
|
||||||
503: "服务器负载过高",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RequestType(Enum):
|
class RequestType(Enum):
|
||||||
"""请求类型枚举"""
|
"""请求类型枚举"""
|
||||||
|
|
@ -267,14 +255,14 @@ class LLMRequest:
|
||||||
extra_params=model_info.extra_params,
|
extra_params=model_info.extra_params,
|
||||||
)
|
)
|
||||||
elif request_type == RequestType.EMBEDDING:
|
elif request_type == RequestType.EMBEDDING:
|
||||||
assert embedding_input is not None
|
assert embedding_input is not None, "嵌入输入不能为空"
|
||||||
return await client.get_embedding(
|
return await client.get_embedding(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
embedding_input=embedding_input,
|
embedding_input=embedding_input,
|
||||||
extra_params=model_info.extra_params,
|
extra_params=model_info.extra_params,
|
||||||
)
|
)
|
||||||
elif request_type == RequestType.AUDIO:
|
elif request_type == RequestType.AUDIO:
|
||||||
assert audio_base64 is not None
|
assert audio_base64 is not None, "音频Base64不能为空"
|
||||||
return await client.get_audio_transcriptions(
|
return await client.get_audio_transcriptions(
|
||||||
model_info=model_info,
|
model_info=model_info,
|
||||||
audio_base64=audio_base64,
|
audio_base64=audio_base64,
|
||||||
|
|
@ -365,24 +353,23 @@ class LLMRequest:
|
||||||
embedding_input=embedding_input,
|
embedding_input=embedding_input,
|
||||||
audio_base64=audio_base64,
|
audio_base64=audio_base64,
|
||||||
)
|
)
|
||||||
|
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
||||||
|
if response_usage := response.usage:
|
||||||
|
total_tokens += response_usage.total_tokens
|
||||||
|
self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty - 1)
|
||||||
return response, model_info
|
return response, model_info
|
||||||
|
|
||||||
except ModelAttemptFailed as e:
|
except ModelAttemptFailed as e:
|
||||||
last_exception = e.original_exception or e
|
last_exception = e.original_exception or e
|
||||||
logger.warning(f"模型 '{model_info.name}' 尝试失败,切换到下一个模型。原因: {e}")
|
logger.warning(f"模型 '{model_info.name}' 尝试失败,切换到下一个模型。原因: {e}")
|
||||||
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
||||||
self.model_usage[model_info.name] = (total_tokens, penalty + 1, usage_penalty)
|
self.model_usage[model_info.name] = (total_tokens, penalty + 1, usage_penalty - 1)
|
||||||
failed_models_this_request.add(model_info.name)
|
failed_models_this_request.add(model_info.name)
|
||||||
|
|
||||||
if isinstance(last_exception, RespNotOkException) and last_exception.status_code == 400:
|
if isinstance(last_exception, RespNotOkException) and last_exception.status_code == 400:
|
||||||
logger.error("收到不可恢复的客户端错误 (400),中止所有尝试。")
|
logger.error("收到不可恢复的客户端错误 (400),中止所有尝试。")
|
||||||
raise last_exception from e
|
raise last_exception from e
|
||||||
|
|
||||||
finally:
|
|
||||||
total_tokens, penalty, usage_penalty = self.model_usage[model_info.name]
|
|
||||||
if usage_penalty > 0:
|
|
||||||
self.model_usage[model_info.name] = (total_tokens, penalty, usage_penalty - 1)
|
|
||||||
|
|
||||||
logger.error(f"所有 {max_attempts} 个模型均尝试失败。")
|
logger.error(f"所有 {max_attempts} 个模型均尝试失败。")
|
||||||
if last_exception:
|
if last_exception:
|
||||||
raise last_exception
|
raise last_exception
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue