|
23 | 23 | import time |
24 | 24 | import uuid |
25 | 25 | from contextlib import contextmanager |
| 26 | +from dataclasses import dataclass |
26 | 27 | from http import HTTPStatus |
27 | 28 | from shlex import quote |
28 | | -from typing import Any, Callable, Generator, Optional, Union |
| 29 | +from typing import Any, Callable, Generator, Mapping, Optional, Union |
29 | 30 |
|
30 | 31 | import httpx |
31 | 32 |
|
|
48 | 49 |
|
49 | 50 | logger = logging.get_logger(__name__) |
50 | 51 |
|
| 52 | + |
| 53 | +@dataclass(frozen=True) |
| 54 | +class RateLimitInfo: |
| 55 | + """ |
| 56 | + Parsed rate limit information from HTTP response headers. |
| 57 | +
|
| 58 | + Attributes: |
| 59 | + resource_type (`str`): The type of resource being rate limited. |
| 60 | + remaining (`int`): The number of requests remaining in the current window. |
| 61 | + reset_in_seconds (`int`): The number of seconds until the rate limit resets. |
| 62 | + limit (`int`, *optional*): The maximum number of requests allowed in the current window. |
| 63 | + window_seconds (`int`, *optional*): The number of seconds in the current window. |
| 64 | +
|
| 65 | + """ |
| 66 | + |
| 67 | + resource_type: str |
| 68 | + remaining: int |
| 69 | + reset_in_seconds: int |
| 70 | + limit: Optional[int] = None |
| 71 | + window_seconds: Optional[int] = None |
| 72 | + |
| 73 | + |
| 74 | +# Regex patterns for parsing rate limit headers |
| 75 | +# e.g.: "api";r=0;t=55 --> resource_type="api", r=0, t=55 |
| 76 | +_RATELIMIT_REGEX = re.compile(r"\"(?P<resource_type>\w+)\"\s*;\s*r\s*=\s*(?P<r>\d+)\s*;\s*t\s*=\s*(?P<t>\d+)") |
| 77 | +# e.g.: "fixed window";"api";q=500;w=300 --> q=500, w=300 |
| 78 | +_RATELIMIT_POLICY_REGEX = re.compile(r"q\s*=\s*(?P<q>\d+).*?w\s*=\s*(?P<w>\d+)") |
| 79 | + |
| 80 | + |
| 81 | +def parse_ratelimit_headers(headers: Mapping[str, str]) -> Optional[RateLimitInfo]: |
| 82 | + """Parse rate limit information from HTTP response headers. |
| 83 | +
|
| 84 | + Follows IETF draft: https://www.ietf.org/archive/id/draft-ietf-httpapi-ratelimit-headers-09.html |
| 85 | + Only a subset is implemented. |
| 86 | +
|
| 87 | + Example: |
| 88 | + ```python |
| 89 | + >>> from huggingface_hub.utils import parse_ratelimit_headers |
| 90 | + >>> headers = { |
| 91 | + ... "ratelimit": '"api";r=0;t=55', |
| 92 | + ... "ratelimit-policy": '"fixed window";"api";q=500;w=300', |
| 93 | + ... } |
| 94 | + >>> info = parse_ratelimit_headers(headers) |
| 95 | + >>> info.remaining |
| 96 | + 0 |
| 97 | + >>> info.reset_in_seconds |
| 98 | + 55 |
| 99 | + ``` |
| 100 | + """ |
| 101 | + |
| 102 | + ratelimit: Optional[str] = None |
| 103 | + policy: Optional[str] = None |
| 104 | + for key in headers: |
| 105 | + lower_key = key.lower() |
| 106 | + if lower_key == "ratelimit": |
| 107 | + ratelimit = headers[key] |
| 108 | + elif lower_key == "ratelimit-policy": |
| 109 | + policy = headers[key] |
| 110 | + |
| 111 | + if not ratelimit: |
| 112 | + return None |
| 113 | + |
| 114 | + match = _RATELIMIT_REGEX.search(ratelimit) |
| 115 | + if not match: |
| 116 | + return None |
| 117 | + |
| 118 | + resource_type = match.group("resource_type") |
| 119 | + remaining = int(match.group("r")) |
| 120 | + reset_in_seconds = int(match.group("t")) |
| 121 | + |
| 122 | + limit: Optional[int] = None |
| 123 | + window_seconds: Optional[int] = None |
| 124 | + |
| 125 | + if policy: |
| 126 | + policy_match = _RATELIMIT_POLICY_REGEX.search(policy) |
| 127 | + if policy_match: |
| 128 | + limit = int(policy_match.group("q")) |
| 129 | + window_seconds = int(policy_match.group("w")) |
| 130 | + |
| 131 | + return RateLimitInfo( |
| 132 | + resource_type=resource_type, |
| 133 | + remaining=remaining, |
| 134 | + reset_in_seconds=reset_in_seconds, |
| 135 | + limit=limit, |
| 136 | + window_seconds=window_seconds, |
| 137 | + ) |
| 138 | + |
| 139 | + |
51 | 140 | # Both headers are used by the Hub to debug failed requests. |
52 | 141 | # `X_AMZN_TRACE_ID` is better as it also works to debug on Cloudfront and ALB. |
53 | 142 | # If `X_AMZN_TRACE_ID` is set, the Hub will use it as well. |
@@ -619,6 +708,25 @@ def hf_raise_for_status(response: httpx.Response, endpoint_name: Optional[str] = |
619 | 708 | ) |
620 | 709 | raise _format(HfHubHTTPError, message, response) from e |
621 | 710 |
|
| 711 | + elif response.status_code == 429: |
| 712 | + ratelimit_info = parse_ratelimit_headers(response.headers) |
| 713 | + if ratelimit_info is not None: |
| 714 | + message = ( |
| 715 | + f"\n\n429 Too Many Requests: you have reached your '{ratelimit_info.resource_type}' rate limit." |
| 716 | + ) |
| 717 | + message += f"\nRetry after {ratelimit_info.reset_in_seconds} seconds" |
| 718 | + if ratelimit_info.limit is not None and ratelimit_info.window_seconds is not None: |
| 719 | + message += ( |
| 720 | + f" ({ratelimit_info.remaining}/{ratelimit_info.limit} requests remaining" |
| 721 | + f" in current {ratelimit_info.window_seconds}s window)." |
| 722 | + ) |
| 723 | + else: |
| 724 | + message += "." |
| 725 | + message += f"\nUrl: {response.url}." |
| 726 | + else: |
| 727 | + message = f"\n\n429 Too Many Requests for url: {response.url}." |
| 728 | + raise _format(HfHubHTTPError, message, response) from e |
| 729 | + |
622 | 730 | elif response.status_code == 416: |
623 | 731 | range_header = response.request.headers.get("Range") |
624 | 732 | message = f"{e}. Requested range: {range_header}. Content-Range: {response.headers.get('Content-Range')}." |
|
0 commit comments