@@ -362,6 +362,7 @@ def _http_backoff_base(
362362
363363 nb_tries = 0
364364 sleep_time = base_wait_time
365+ ratelimit_reset : Optional [int ] = None # seconds to wait for rate limit reset if 429 response
365366
366367 # If `data` is used and is a file object (or any IO), it will be consumed on the
367368 # first HTTP request. We need to save the initial position so that the full content
@@ -373,6 +374,7 @@ def _http_backoff_base(
373374 client = get_session ()
374375 while True :
375376 nb_tries += 1
377+ ratelimit_reset = None
376378 try :
377379 # If `data` is used and is a file object (or any IO), set back cursor to
378380 # initial position.
@@ -382,6 +384,8 @@ def _http_backoff_base(
382384 # Perform request and handle response
383385 def _should_retry (response : httpx .Response ) -> bool :
384386 """Handle response and return True if should retry, False if should return/yield."""
387+ nonlocal ratelimit_reset
388+
385389 if response .status_code not in retry_on_status_codes :
386390 return False # Success, don't retry
387391
@@ -393,6 +397,12 @@ def _should_retry(response: httpx.Response) -> bool:
393397 # user ask for retry on a status code that doesn't raise_for_status.
394398 return False # Don't retry, return/yield response
395399
400+ # get rate limit reset time from headers if 429 response
401+ if response .status_code == 429 :
402+ ratelimit_info = parse_ratelimit_headers (response .headers )
403+ if ratelimit_info is not None :
404+ ratelimit_reset = ratelimit_info .reset_in_seconds
405+
396406 return True # Should retry
397407
398408 if stream :
@@ -415,9 +425,14 @@ def _should_retry(response: httpx.Response) -> bool:
415425 if nb_tries > max_retries :
416426 raise err
417427
418- # Sleep for X seconds
419- logger .warning (f"Retrying in { sleep_time } s [Retry { nb_tries } /{ max_retries } ]." )
420- time .sleep (sleep_time )
428+ if ratelimit_reset is not None :
429+ actual_sleep = float (ratelimit_reset ) + 1 # +1s to avoid rounding issues
430+ logger .warning (f"Rate limited. Waiting { actual_sleep } s before retry [Retry { nb_tries } /{ max_retries } ]." )
431+ else :
432+ actual_sleep = sleep_time
433+ logger .warning (f"Retrying in { actual_sleep } s [Retry { nb_tries } /{ max_retries } ]." )
434+
435+ time .sleep (actual_sleep )
421436
422437 # Update sleep time for next retry
423438 sleep_time = min (max_wait_time , sleep_time * 2 ) # Exponential backoff
0 commit comments