Skip to content

Commit f2f58b3

Browse files
authored
Better streaming retries (504 and 429) (#7847)
better streaming retries
1 parent 5cb2925 commit f2f58b3

File tree

2 files changed

+20
-0
lines changed

2 files changed

+20
-0
lines changed

src/datasets/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,8 @@
248248
# Streaming
249249
STREAMING_READ_MAX_RETRIES = 20
250250
STREAMING_READ_RETRY_INTERVAL = 5
251+
STREAMING_READ_SERVER_UNAVAILABLE_RETRY_INTERVAL = 20
252+
STREAMING_READ_RATE_LIMIT_RETRY_INTERVAL = 60
251253
STREAMING_OPEN_MAX_RETRIES = 20
252254
STREAMING_OPEN_RETRY_INTERVAL = 5
253255

src/datasets/utils/file_utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ class _AiohttpClientError(Exception):
6868
requests.exceptions.Timeout,
6969
httpx.RequestError,
7070
)
71+
SERVER_UNAVAILABLE_CODE = 504
72+
RATE_LIMIT_CODE = 429
7173

7274

7375
def is_remote_url(url_or_filename: str) -> bool:
@@ -827,6 +829,22 @@ def read_with_retries(*args, **kwargs):
827829
f"Got disconnected from remote data host. Retrying in {config.STREAMING_READ_RETRY_INTERVAL}sec [{retry}/{max_retries}]"
828830
)
829831
time.sleep(config.STREAMING_READ_RETRY_INTERVAL)
832+
except huggingface_hub.errors.HfHubHTTPError as err:
833+
if err.response is not None and err.response.status_code == SERVER_UNAVAILABLE_CODE:
834+
disconnect_err = err
835+
logger.warning(
836+
f"Got disconnected from remote data host. Retrying in {config.STREAMING_READ_SERVER_UNAVAILABLE_RETRY_INTERVAL}sec [{retry}/{max_retries}]"
837+
)
838+
time.sleep(config.STREAMING_READ_SERVER_UNAVAILABLE_RETRY_INTERVAL)
839+
elif err.response is not None and err.response.status_code == RATE_LIMIT_CODE:
840+
disconnect_err = err
841+
logger.warning(str(err))
842+
logger.warning(
843+
f"Got disconnected from remote data host. Retrying in {config.STREAMING_READ_RATE_LIMIT_RETRY_INTERVAL}sec [{retry}/{max_retries}]"
844+
)
845+
time.sleep(config.STREAMING_READ_RATE_LIMIT_RETRY_INTERVAL)
846+
else:
847+
raise
830848
else:
831849
raise ConnectionError("Server Disconnected") from disconnect_err
832850
return out

0 commit comments

Comments
 (0)