Skip to content

Commit 8bb4b90

Browse files
phacopsclaude
andauthored
ref(ratelimiter): Reduce redis socket timeout and emit metric on timeout (#7662)
## Summary - Reduce the rate limiter Redis socket timeout from 0.5s to 0.1s to fail faster when Redis is slow or unavailable - Emit a new metric `ratelimiter_redis_timeout` when a Redis timeout occurs, tagged with the function (`start_request` or `finish_request`) where it happened ## Test plan - [ ] Deploy to staging and monitor for `ratelimiter_redis_timeout` metric - [ ] Verify rate limiting continues to work when Redis is healthy - [ ] Confirm requests are not blocked when Redis times out (existing fail-open behavior preserved) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 19f99c1 commit 8bb4b90

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

snuba/query/allocation_policies/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import Any, cast
88

99
import sentry_sdk
10+
from redis.exceptions import TimeoutError as RedisTimeoutError
1011

1112
from snuba import environment, settings
1213
from snuba.configs.configuration import (
@@ -482,6 +483,15 @@ def get_quota_allowance(
482483
quota_unit=NO_UNITS,
483484
suggestion=NO_SUGGESTION,
484485
)
486+
except RedisTimeoutError:
487+
# Emit metric for timeout, but don't log since this is expected
488+
# when Redis is slow. We fail open to avoid blocking requests.
489+
self.metrics.increment(
490+
"fail_open",
491+
1,
492+
tags={"method": "get_quota_allowance", "reason": "redis_timeout"},
493+
)
494+
return DEFAULT_PASSTHROUGH_POLICY.get_quota_allowance(tenant_ids, query_id)
485495
except Exception:
486496
self.metrics.increment("fail_open", 1, tags={"method": "get_quota_allowance"})
487497
logger.exception(
@@ -543,6 +553,12 @@ def update_quota_balance(
543553
except InvalidTenantsForAllocationPolicy:
544554
# the policy did not do anything because the tenants were invalid, updating is also not necessary
545555
pass
556+
except RedisTimeoutError:
557+
# Emit metric for timeout, but don't log since this is expected
558+
# when Redis is slow. We fail open to avoid blocking requests.
559+
self.metrics.increment(
560+
"fail_open", 1, tags={"method": "update_quota_balance", "reason": "redis_timeout"}
561+
)
546562
except Exception:
547563
self.metrics.increment("fail_open", 1, tags={"method": "update_quota_balance"})
548564
logger.exception(

snuba/redis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ class RedisClientKey(Enum):
128128
socket_timeout=1,
129129
),
130130
RedisClientKey.RATE_LIMITER: _initialize_specialized_redis_cluster(
131-
settings.REDIS_CLUSTERS["rate_limiter"], socket_timeout=0.5
131+
settings.REDIS_CLUSTERS["rate_limiter"], socket_timeout=0.1
132132
),
133133
RedisClientKey.SUBSCRIPTION_STORE: _initialize_specialized_redis_cluster(
134134
settings.REDIS_CLUSTERS["subscription_store"],

0 commit comments

Comments
 (0)