Skip to content

Commit 5e9ad43

Browse files
Parse rate limit headers for better 429 error messages (#3570)
* add rate limit headers parser * fixes * simpler dataclass * review suggestions * Update src/huggingface_hub/utils/_http.py Co-authored-by: Lucain <[email protected]> * docstrings * nit --------- Co-authored-by: Lucain <[email protected]>
1 parent 781a5ac commit 5e9ad43

File tree

3 files changed

+197
-1
lines changed

3 files changed

+197
-1
lines changed

src/huggingface_hub/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@
5454
from ._http import (
5555
ASYNC_CLIENT_FACTORY_T,
5656
CLIENT_FACTORY_T,
57+
RateLimitInfo,
5758
close_session,
5859
fix_hf_endpoint_in_url,
5960
get_async_session,
6061
get_session,
6162
hf_raise_for_status,
6263
http_backoff,
6364
http_stream_backoff,
65+
parse_ratelimit_headers,
6466
set_async_client_factory,
6567
set_client_factory,
6668
)

src/huggingface_hub/utils/_http.py

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@
2323
import time
2424
import uuid
2525
from contextlib import contextmanager
26+
from dataclasses import dataclass
2627
from http import HTTPStatus
2728
from shlex import quote
28-
from typing import Any, Callable, Generator, Optional, Union
29+
from typing import Any, Callable, Generator, Mapping, Optional, Union
2930

3031
import httpx
3132

@@ -48,6 +49,94 @@
4849

4950
logger = logging.get_logger(__name__)
5051

52+
53+
@dataclass(frozen=True)
54+
class RateLimitInfo:
55+
"""
56+
Parsed rate limit information from HTTP response headers.
57+
58+
Attributes:
59+
resource_type (`str`): The type of resource being rate limited.
60+
remaining (`int`): The number of requests remaining in the current window.
61+
reset_in_seconds (`int`): The number of seconds until the rate limit resets.
62+
limit (`int`, *optional*): The maximum number of requests allowed in the current window.
63+
window_seconds (`int`, *optional*): The number of seconds in the current window.
64+
65+
"""
66+
67+
resource_type: str
68+
remaining: int
69+
reset_in_seconds: int
70+
limit: Optional[int] = None
71+
window_seconds: Optional[int] = None
72+
73+
74+
# Regex patterns for parsing rate limit headers
75+
# e.g.: "api";r=0;t=55 --> resource_type="api", r=0, t=55
76+
_RATELIMIT_REGEX = re.compile(r"\"(?P<resource_type>\w+)\"\s*;\s*r\s*=\s*(?P<r>\d+)\s*;\s*t\s*=\s*(?P<t>\d+)")
77+
# e.g.: "fixed window";"api";q=500;w=300 --> q=500, w=300
78+
_RATELIMIT_POLICY_REGEX = re.compile(r"q\s*=\s*(?P<q>\d+).*?w\s*=\s*(?P<w>\d+)")
79+
80+
81+
def parse_ratelimit_headers(headers: Mapping[str, str]) -> Optional[RateLimitInfo]:
82+
"""Parse rate limit information from HTTP response headers.
83+
84+
Follows IETF draft: https://www.ietf.org/archive/id/draft-ietf-httpapi-ratelimit-headers-09.html
85+
Only a subset is implemented.
86+
87+
Example:
88+
```python
89+
>>> from huggingface_hub.utils import parse_ratelimit_headers
90+
>>> headers = {
91+
... "ratelimit": '"api";r=0;t=55',
92+
... "ratelimit-policy": '"fixed window";"api";q=500;w=300',
93+
... }
94+
>>> info = parse_ratelimit_headers(headers)
95+
>>> info.remaining
96+
0
97+
>>> info.reset_in_seconds
98+
55
99+
```
100+
"""
101+
102+
ratelimit: Optional[str] = None
103+
policy: Optional[str] = None
104+
for key in headers:
105+
lower_key = key.lower()
106+
if lower_key == "ratelimit":
107+
ratelimit = headers[key]
108+
elif lower_key == "ratelimit-policy":
109+
policy = headers[key]
110+
111+
if not ratelimit:
112+
return None
113+
114+
match = _RATELIMIT_REGEX.search(ratelimit)
115+
if not match:
116+
return None
117+
118+
resource_type = match.group("resource_type")
119+
remaining = int(match.group("r"))
120+
reset_in_seconds = int(match.group("t"))
121+
122+
limit: Optional[int] = None
123+
window_seconds: Optional[int] = None
124+
125+
if policy:
126+
policy_match = _RATELIMIT_POLICY_REGEX.search(policy)
127+
if policy_match:
128+
limit = int(policy_match.group("q"))
129+
window_seconds = int(policy_match.group("w"))
130+
131+
return RateLimitInfo(
132+
resource_type=resource_type,
133+
remaining=remaining,
134+
reset_in_seconds=reset_in_seconds,
135+
limit=limit,
136+
window_seconds=window_seconds,
137+
)
138+
139+
51140
# Both headers are used by the Hub to debug failed requests.
52141
# `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
53142
# If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
@@ -619,6 +708,25 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
619708
)
620709
raise _format(HfHubHTTPError, message, response) from e
621710

711+
elif response.status_code == 429:
712+
ratelimit_info = parse_ratelimit_headers(response.headers)
713+
if ratelimit_info is not None:
714+
message = (
715+
f"\n\n429 Too Many Requests: you have reached your '{ratelimit_info.resource_type}' rate limit."
716+
)
717+
message += f"\nRetry after {ratelimit_info.reset_in_seconds} seconds"
718+
if ratelimit_info.limit is not None and ratelimit_info.window_seconds is not None:
719+
message += (
720+
f" ({ratelimit_info.remaining}/{ratelimit_info.limit} requests remaining"
721+
f" in current {ratelimit_info.window_seconds}s window)."
722+
)
723+
else:
724+
message += "."
725+
message += f"\nUrl: {response.url}."
726+
else:
727+
message = f"\n\n429 Too Many Requests for url: {response.url}."
728+
raise _format(HfHubHTTPError, message, response) from e
729+
622730
elif response.status_code == 416:
623731
range_header = response.request.headers.get("Range")
624732
message = f"{e}. Requested range: {range_header}. Content-Range: {response.headers.get('Content-Range')}."

tests/test_utils_http.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414
from huggingface_hub.constants import ENDPOINT
1515
from huggingface_hub.errors import HfHubHTTPError, OfflineModeIsEnabled
1616
from huggingface_hub.utils._http import (
17+
RateLimitInfo,
1718
_adjust_range_header,
1819
default_client_factory,
1920
fix_hf_endpoint_in_url,
2021
get_async_session,
2122
get_session,
2223
hf_raise_for_status,
2324
http_backoff,
25+
parse_ratelimit_headers,
2426
set_client_factory,
2527
)
2628

@@ -447,3 +449,87 @@ async def test_raise_on_status_async_non_stream(fake_server: str):
447449
async def test_raise_on_status_async_stream(fake_server: str):
448450
async with get_async_session().stream("GET", fake_server) as response:
449451
_check_raise_status(response)
452+
453+
454+
class TestParseRatelimitHeaders:
455+
def test_parse_full_headers(self):
456+
"""Test parsing both ratelimit and ratelimit-policy headers."""
457+
headers = {
458+
"ratelimit": '"api";r=0;t=55',
459+
"ratelimit-policy": '"fixed window";"api";q=500;w=300',
460+
}
461+
info = parse_ratelimit_headers(headers)
462+
assert info == RateLimitInfo(
463+
resource_type="api",
464+
remaining=0,
465+
reset_in_seconds=55,
466+
limit=500,
467+
window_seconds=300,
468+
)
469+
470+
def test_parse_ratelimit_only(self):
471+
"""Test parsing with only ratelimit header (no policy)."""
472+
headers = {"ratelimit": '"api";r=489;t=189'}
473+
info = parse_ratelimit_headers(headers)
474+
assert info is not None
475+
assert info.resource_type == "api"
476+
assert info.remaining == 489
477+
assert info.reset_in_seconds == 189
478+
assert info.limit is None
479+
assert info.window_seconds is None
480+
481+
def test_parse_missing_header(self):
482+
"""Test returns None when ratelimit header is missing."""
483+
assert parse_ratelimit_headers({}) is None
484+
485+
def test_parse_malformed_header(self):
486+
"""Test returns None when ratelimit header is malformed."""
487+
assert parse_ratelimit_headers({"ratelimit": "malformed"}) is None
488+
489+
def test_parse_case_insensitive(self):
490+
"""Test header lookup is case-insensitive."""
491+
headers = {"RateLimit": '"api";r=10;t=100', "RateLimit-Policy": '"fixed window";"api";q=500;w=300'}
492+
info = parse_ratelimit_headers(headers)
493+
assert info is not None
494+
assert info.remaining == 10
495+
496+
497+
class TestRateLimitErrorMessage:
498+
def test_429_with_ratelimit_headers(self):
499+
"""Test 429 error includes rate limit info when headers present."""
500+
response = Mock(spec=httpx.Response)
501+
response.status_code = 429
502+
response.url = "https://huggingface.co/api/models/username/reponame"
503+
response.headers = httpx.Headers(
504+
{
505+
"ratelimit": '"api";r=0;t=55',
506+
"ratelimit-policy": '"fixed window";"api";q=500;w=300',
507+
}
508+
)
509+
response.raise_for_status.side_effect = httpx.HTTPStatusError("429", request=Mock(), response=response)
510+
response.json.return_value = {}
511+
512+
with pytest.raises(HfHubHTTPError) as exc_info:
513+
hf_raise_for_status(response)
514+
515+
error_msg = str(exc_info.value)
516+
assert "429 Too Many Requests" in error_msg
517+
assert "'api' rate limit" in error_msg
518+
assert "55 seconds" in error_msg
519+
assert "0/500" in error_msg
520+
assert "api/models/username/reponame" in error_msg
521+
522+
def test_429_without_ratelimit_headers(self):
523+
"""Test 429 error fallback when headers missing."""
524+
response = Mock(spec=httpx.Response)
525+
response.status_code = 429
526+
response.url = "https://huggingface.co/api/models"
527+
response.headers = httpx.Headers({})
528+
response.raise_for_status.side_effect = httpx.HTTPStatusError("429", request=Mock(), response=response)
529+
response.json.return_value = {}
530+
531+
with pytest.raises(HfHubHTTPError) as exc_info:
532+
hf_raise_for_status(response)
533+
534+
assert "429 Too Many Requests" in str(exc_info.value)
535+
assert "api/models" in str(exc_info.value)

0 commit comments

Comments
 (0)