Skip to content

Commit 8739f22

Browse files
phacopsclaude
andauthored
fix: Propagate expected exceptions from rate limit functions (#7736)
Stop reporting errors on expected transient exceptions during allocation policy evaluation. `rate_limit_start_request` and `rate_limit_finish_request` have broad `except Exception` handlers that call `logger.exception()`, which reports errors even for expected conditions like Redis timeouts and unexpected pipeline result counts (`StopIteration`). However, the base `AllocationPolicy` class can handle these at a higher level — incrementing a `fail_open` metric and continuing without error logging. This change re-raises `RedisTimeoutError` and `StopIteration` from the low-level rate limit functions so they propagate to the allocation policy base class, which handles them by failing open with a metric (`reason` tagged by exception type) and avoiding noisy error reports. --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b53a7ac commit 8739f22

File tree

2 files changed

+18
-8
lines changed

2 files changed

+18
-8
lines changed

snuba/query/allocation_policies/__init__.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -483,13 +483,13 @@ def get_quota_allowance(
483483
quota_unit=NO_UNITS,
484484
suggestion=NO_SUGGESTION,
485485
)
486-
except RedisTimeoutError:
487-
# Emit metric for timeout, but don't log since this is expected
488-
# when Redis is slow. We fail open to avoid blocking requests.
486+
except (RedisTimeoutError, StopIteration) as e:
487+
# Expected transient errors (Redis timeouts, unexpected pipeline
488+
# result counts). Fail open to avoid blocking requests.
489489
self.metrics.increment(
490490
"fail_open",
491491
1,
492-
tags={"method": "get_quota_allowance", "reason": "redis_timeout"},
492+
tags={"method": "get_quota_allowance", "reason": type(e).__name__},
493493
)
494494
return DEFAULT_PASSTHROUGH_POLICY.get_quota_allowance(tenant_ids, query_id)
495495
except Exception:
@@ -553,11 +553,9 @@ def update_quota_balance(
553553
except InvalidTenantsForAllocationPolicy:
554554
# the policy did not do anything because the tenants were invalid, updating is also not necessary
555555
pass
556-
except RedisTimeoutError:
557-
# Emit metric for timeout, but don't log since this is expected
558-
# when Redis is slow. We fail open to avoid blocking requests.
556+
except (RedisTimeoutError, StopIteration) as e:
559557
self.metrics.increment(
560-
"fail_open", 1, tags={"method": "update_quota_balance", "reason": "redis_timeout"}
558+
"fail_open", 1, tags={"method": "update_quota_balance", "reason": type(e).__name__}
561559
)
562560
except Exception:
563561
self.metrics.increment("fail_open", 1, tags={"method": "update_quota_balance"})

snuba/state/rate_limit.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
from typing import Any, Iterator, MutableMapping, Optional, Sequence, Type, cast
1212
from typing import ChainMap as TypingChainMap
1313

14+
from redis.exceptions import TimeoutError as RedisTimeoutError
15+
1416
from snuba import environment, state
1517
from snuba.redis import RedisClientKey, get_redis_client
1618
from snuba.state import get_configs, set_config
@@ -279,6 +281,14 @@ def rate_limit_start_request(
279281
concurrent = sum(next(pipe_results) for _ in range(rate_limit_shard_factor))
280282
else:
281283
concurrent = 0
284+
except RedisTimeoutError:
285+
raise
286+
except StopIteration:
287+
metrics.increment(
288+
"rate_limit_fail_open",
289+
tags={"reason": "StopIteration", "func": "rate_limit_start_request"},
290+
)
291+
return RateLimitStats(rate=-1, concurrent=-1)
282292
except Exception as ex:
283293
# if something goes wrong, we don't want to block the request,
284294
# set the values such that they pass under any limit
@@ -315,6 +325,8 @@ def rate_limit_finish_request(
315325
pipe.zincrby(query_bucket, -float(max_query_duration_s), query_id)
316326
pipe.expire(query_bucket, max_query_duration_s)
317327
pipe.execute()
328+
except RedisTimeoutError:
329+
raise
318330
except Exception as ex:
319331
logger.exception(ex)
320332

0 commit comments

Comments
 (0)