feat:增强反爬虫的 IP 清理机制和静态资源校验

优化 IP 清理逻辑,完整扫描并清除最旧或空的 IP 记录,从而提升内存管理效果。更新静态资源的放行规则,改为基于路径前缀进行限制,并从允许的扩展名中移除 .json,以防止通过静态资源规则绕过 API。进一步明确 X-Forwarded-For 的信任逻辑,并更新相关环境变量的注释,提供更清晰的配置指引。
pull/1439/head
陈曦 2025-12-14 19:56:11 +08:00
parent 97c872f4f2
commit bccef9f104
2 changed files with 39 additions and 24 deletions

View File

@ -465,9 +465,9 @@ class AntiCrawlerMiddleware(BaseHTTPMiddleware):
# 清理最旧的记录删除最久未访问的IP
self._cleanup_oldest_ips()
# 获取或创建该IP的请求时间deque
# 获取或创建该IP的请求时间deque不使用maxlen避免限流变松
if client_ip not in self.request_times:
self.request_times[client_ip] = deque(maxlen=self.rate_limit_max_requests * 2)
self.request_times[client_ip] = deque()
request_times = self.request_times[client_ip]
@ -491,31 +491,33 @@ class AntiCrawlerMiddleware(BaseHTTPMiddleware):
self._cleanup_oldest_ips()
def _cleanup_oldest_ips(self):
"""清理最久未访问的IP记录避免全量遍历)"""
"""清理最久未访问的IP记录全量遍历找真正的oldest"""
if not self.request_times:
return
# 找到最久未访问的IPdeque为空或最旧时间戳
# 先收集空deque的IP优先删除
empty_ips = []
# 找到最久未访问的IP最旧时间戳
oldest_ip = None
oldest_time = float('inf')
# 只检查部分IP不全量遍历
check_count = min(100, len(self.request_times))
checked = 0
# 全量遍历找真正的oldest超限时性能可接受
for ip, times in self.request_times.items():
if checked >= check_count:
break
checked += 1
if not times:
# 空deque优先删除
del self.request_times[ip]
return
if times[0] < oldest_time:
oldest_time = times[0]
oldest_ip = ip
# 空deque记录待删除
empty_ips.append(ip)
else:
# 找到最旧的时间戳
if times[0] < oldest_time:
oldest_time = times[0]
oldest_ip = ip
# 删除最久未访问的IP
if oldest_ip:
# 先删除空deque的IP
for ip in empty_ips:
del self.request_times[ip]
# 如果没有空deque可删除且仍需要清理删除最旧的一个IP
if not empty_ips and oldest_ip:
del self.request_times[oldest_ip]
def _is_trusted_proxy(self, ip: str) -> bool:
@ -575,9 +577,10 @@ class AntiCrawlerMiddleware(BaseHTTPMiddleware):
direct_client_ip = request.client.host
# 检查是否信任X-Forwarded-For头
use_xff = TRUST_XFF
if not use_xff and TRUSTED_PROXIES and direct_client_ip:
# 如果配置了信任的代理列表检查直接连接的IP是否在信任列表中
# TRUST_XFF 只表示"启用代理解析能力",但仍要求直连 IP 在 TRUSTED_PROXIES 中
use_xff = False
if TRUST_XFF and TRUSTED_PROXIES and direct_client_ip:
# 只有在启用 TRUST_XFF 且直连 IP 在信任列表中时,才信任 XFF
use_xff = self._is_trusted_proxy(direct_client_ip)
# 如果信任代理,优先从 X-Forwarded-For 获取
@ -684,8 +687,19 @@ class AntiCrawlerMiddleware(BaseHTTPMiddleware):
return await call_next(request)
# 允许访问静态资源CSS、JS、图片等
static_extensions = {".css", ".js", ".json", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".woff", ".woff2", ".ttf", ".eot"}
if any(request.url.path.endswith(ext) for ext in static_extensions):
# 注意:.json 已移除,避免 API 路径绕过防护
# 静态资源只在特定前缀下放行(/static/、/assets/、/dist/
static_extensions = {".css", ".js", ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".woff", ".woff2", ".ttf", ".eot"}
static_prefixes = {"/static/", "/assets/", "/dist/"}
# 检查是否是静态资源路径(特定前缀下的静态文件)
path = request.url.path
is_static_path = any(path.startswith(prefix) for prefix in static_prefixes) and any(path.endswith(ext) for ext in static_extensions)
# 也允许根路径下的静态文件(如 /favicon.ico
is_root_static = path.count("/") == 1 and any(path.endswith(ext) for ext in static_extensions)
if is_static_path or is_root_static:
return await call_next(request)
# 获取客户端IP只获取一次避免重复调用

View File

@ -14,4 +14,5 @@ WEBUI_ALLOWED_IPS=127.0.0.1 # IP白名单逗号分隔支持精确IP、
# 示例: 127.0.0.1,192.168.1.0/24,172.17.0.0/16
WEBUI_TRUSTED_PROXIES= # 信任的代理IP列表逗号分隔只有来自这些IP的X-Forwarded-For才被信任
# 示例: 127.0.0.1,192.168.1.1,172.17.0.1
WEBUI_TRUST_XFF=false # 是否信任X-Forwarded-For头默认false需要配合TRUSTED_PROXIES使用
WEBUI_TRUST_XFF=false # 是否启用X-Forwarded-For代理解析默认false
# 启用后仍要求直连IP在TRUSTED_PROXIES中才会信任XFF头