Parse rate limit headers for better 429 error messages (#3570)

hanouticelina · Wauplin · web-flow · commit 5e9ad43e7f36 · 2025-11-25T15:52:53.000+01:00
* add rate limit headers parser

* fixes

* simpler dataclass

* review suggestions

* Update src/huggingface_hub/utils/_http.py

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;

* docstrings

* nit

---------

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;
diff --git a/src/huggingface_hub/utils/__init__.py b/src/huggingface_hub/utils/__init__.py
@@ -54,13 +54,15 @@
 from ._http import (
     ASYNC_CLIENT_FACTORY_T,
     CLIENT_FACTORY_T,
+    RateLimitInfo,
     close_session,
     fix_hf_endpoint_in_url,
     get_async_session,
     get_session,
     hf_raise_for_status,
     http_backoff,
     http_stream_backoff,
+    parse_ratelimit_headers,
     set_async_client_factory,
     set_client_factory,
 )
diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py
@@ -23,9 +23,10 @@
 import time
 import uuid
 from contextlib import contextmanager
+from dataclasses import dataclass
 from http import HTTPStatus
 from shlex import quote
-from typing import Any, Callable, Generator, Optional, Union
+from typing import Any, Callable, Generator, Mapping, Optional, Union
 
 import httpx
 
@@ -48,6 +49,94 @@
 
 logger = logging.get_logger(__name__)
 
+
+@dataclass(frozen=True)
+class RateLimitInfo:
+    """
+    Parsed rate limit information from HTTP response headers.
+
+    Attributes:
+        resource_type (`str`): The type of resource being rate limited.
+        remaining (`int`): The number of requests remaining in the current window.
+        reset_in_seconds (`int`): The number of seconds until the rate limit resets.
+        limit (`int`, *optional*): The maximum number of requests allowed in the current window.
+        window_seconds (`int`, *optional*): The number of seconds in the current window.
+
+    """
+
+    resource_type: str
+    remaining: int
+    reset_in_seconds: int
+    limit: Optional[int] = None
+    window_seconds: Optional[int] = None
+
+
+# Regex patterns for parsing rate limit headers
+# e.g.: "api";r=0;t=55 --> resource_type="api", r=0, t=55
+_RATELIMIT_REGEX = re.compile(r"\"(?P<resource_type>\w+)\"\s*;\s*r\s*=\s*(?P<r>\d+)\s*;\s*t\s*=\s*(?P<t>\d+)")
+# e.g.: "fixed window";"api";q=500;w=300 --> q=500, w=300
+_RATELIMIT_POLICY_REGEX = re.compile(r"q\s*=\s*(?P<q>\d+).*?w\s*=\s*(?P<w>\d+)")
+
+
+def parse_ratelimit_headers(headers: Mapping[str, str]) -> Optional[RateLimitInfo]:
+    """Parse rate limit information from HTTP response headers.
+
+    Follows IETF draft: https://www.ietf.org/archive/id/draft-ietf-httpapi-ratelimit-headers-09.html
+    Only a subset is implemented.
+
+    Example:
+    ```python
+    >>> from huggingface_hub.utils import parse_ratelimit_headers
+    >>> headers = {
+    ...     "ratelimit": '"api";r=0;t=55',
+    ...     "ratelimit-policy": '"fixed window";"api";q=500;w=300',
+    ... }
+    >>> info = parse_ratelimit_headers(headers)
+    >>> info.remaining
+    0
+    >>> info.reset_in_seconds
+    55
+    ```
+    """
+
+    ratelimit: Optional[str] = None
+    policy: Optional[str] = None
+    for key in headers:
+        lower_key = key.lower()
+        if lower_key == "ratelimit":
+            ratelimit = headers[key]
+        elif lower_key == "ratelimit-policy":
+            policy = headers[key]
+
+    if not ratelimit:
+        return None
+
+    match = _RATELIMIT_REGEX.search(ratelimit)
+    if not match:
+        return None
+
+    resource_type = match.group("resource_type")
+    remaining = int(match.group("r"))
+    reset_in_seconds = int(match.group("t"))
+
+    limit: Optional[int] = None
+    window_seconds: Optional[int] = None
+
+    if policy:
+        policy_match = _RATELIMIT_POLICY_REGEX.search(policy)
+        if policy_match:
+            limit = int(policy_match.group("q"))
+            window_seconds = int(policy_match.group("w"))
+
+    return RateLimitInfo(
+        resource_type=resource_type,
+        remaining=remaining,
+        reset_in_seconds=reset_in_seconds,
+        limit=limit,
+        window_seconds=window_seconds,
+    )
+
+
 # Both headers are used by the Hub to debug failed requests.
 # `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB.
 # If `X_AMZN_TRACE_ID` is set, the Hub will use it as well.
@@ -619,6 +708,25 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] =
             )
             raise _format(HfHubHTTPError, message, response) from e
 
+        elif response.status_code == 429:
+            ratelimit_info = parse_ratelimit_headers(response.headers)
+            if ratelimit_info is not None:
+                message = (
+                    f"\n\n429 Too Many Requests: you have reached your '{ratelimit_info.resource_type}' rate limit."
+                )
+                message += f"\nRetry after {ratelimit_info.reset_in_seconds} seconds"
+                if ratelimit_info.limit is not None and ratelimit_info.window_seconds is not None:
+                    message += (
+                        f" ({ratelimit_info.remaining}/{ratelimit_info.limit} requests remaining"
+                        f" in current {ratelimit_info.window_seconds}s window)."
+                    )
+                else:
+                    message += "."
+                message += f"\nUrl: {response.url}."
+            else:
+                message = f"\n\n429 Too Many Requests for url: {response.url}."
+            raise _format(HfHubHTTPError, message, response) from e
+
         elif response.status_code == 416:
             range_header = response.request.headers.get("Range")
             message = f"{e}. Requested range: {range_header}. Content-Range: {response.headers.get('Content-Range')}."
diff --git a/tests/test_utils_http.py b/tests/test_utils_http.py
@@ -14,13 +14,15 @@
 from huggingface_hub.constants import ENDPOINT
 from huggingface_hub.errors import HfHubHTTPError, OfflineModeIsEnabled
 from huggingface_hub.utils._http import (
+    RateLimitInfo,
     _adjust_range_header,
     default_client_factory,
     fix_hf_endpoint_in_url,
     get_async_session,
     get_session,
     hf_raise_for_status,
     http_backoff,
+    parse_ratelimit_headers,
     set_client_factory,
 )
 
@@ -447,3 +449,87 @@ async def test_raise_on_status_async_non_stream(fake_server: str):
 async def test_raise_on_status_async_stream(fake_server: str):
     async with get_async_session().stream("GET", fake_server) as response:
         _check_raise_status(response)
+
+
+class TestParseRatelimitHeaders:
+    def test_parse_full_headers(self):
+        """Test parsing both ratelimit and ratelimit-policy headers."""
+        headers = {
+            "ratelimit": '"api";r=0;t=55',
+            "ratelimit-policy": '"fixed window";"api";q=500;w=300',
+        }
+        info = parse_ratelimit_headers(headers)
+        assert info == RateLimitInfo(
+            resource_type="api",
+            remaining=0,
+            reset_in_seconds=55,
+            limit=500,
+            window_seconds=300,
+        )
+
+    def test_parse_ratelimit_only(self):
+        """Test parsing with only ratelimit header (no policy)."""
+        headers = {"ratelimit": '"api";r=489;t=189'}
+        info = parse_ratelimit_headers(headers)
+        assert info is not None
+        assert info.resource_type == "api"
+        assert info.remaining == 489
+        assert info.reset_in_seconds == 189
+        assert info.limit is None
+        assert info.window_seconds is None
+
+    def test_parse_missing_header(self):
+        """Test returns None when ratelimit header is missing."""
+        assert parse_ratelimit_headers({}) is None
+
+    def test_parse_malformed_header(self):
+        """Test returns None when ratelimit header is malformed."""
+        assert parse_ratelimit_headers({"ratelimit": "malformed"}) is None
+
+    def test_parse_case_insensitive(self):
+        """Test header lookup is case-insensitive."""
+        headers = {"RateLimit": '"api";r=10;t=100', "RateLimit-Policy": '"fixed window";"api";q=500;w=300'}
+        info = parse_ratelimit_headers(headers)
+        assert info is not None
+        assert info.remaining == 10
+
+
+class TestRateLimitErrorMessage:
+    def test_429_with_ratelimit_headers(self):
+        """Test 429 error includes rate limit info when headers present."""
+        response = Mock(spec=httpx.Response)
+        response.status_code = 429
+        response.url = "https://huggingface.co/api/models/username/reponame"
+        response.headers = httpx.Headers(
+            {
+                "ratelimit": '"api";r=0;t=55',
+                "ratelimit-policy": '"fixed window";"api";q=500;w=300',
+            }
+        )
+        response.raise_for_status.side_effect = httpx.HTTPStatusError("429", request=Mock(), response=response)
+        response.json.return_value = {}
+
+        with pytest.raises(HfHubHTTPError) as exc_info:
+            hf_raise_for_status(response)
+
+        error_msg = str(exc_info.value)
+        assert "429 Too Many Requests" in error_msg
+        assert "'api' rate limit" in error_msg
+        assert "55 seconds" in error_msg
+        assert "0/500" in error_msg
+        assert "api/models/username/reponame" in error_msg
+
+    def test_429_without_ratelimit_headers(self):
+        """Test 429 error fallback when headers missing."""
+        response = Mock(spec=httpx.Response)
+        response.status_code = 429
+        response.url = "https://huggingface.co/api/models"
+        response.headers = httpx.Headers({})
+        response.raise_for_status.side_effect = httpx.HTTPStatusError("429", request=Mock(), response=response)
+        response.json.return_value = {}
+
+        with pytest.raises(HfHubHTTPError) as exc_info:
+            hf_raise_for_status(response)
+
+        assert "429 Too Many Requests" in str(exc_info.value)
+        assert "api/models" in str(exc_info.value)