Skip to content

The Snapshot download consistently fails when downloading large datasets (exceeding 5TB) #3457

@zjl775210697

Description

@zjl775210697

Describe the bug

Image

When using the solution mentioned above, I frequently encounter download failures with large datasets. How should I modify the configuration to improve the download success rate?

Reproduction

No response

Logs

Traceback (most recent call last): File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 468, in _make_request six.raise_from(e, None) File "", line 3, in raise_from File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 463, in _make_request httplib_response = conn.getresponse() ^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/http/client.py", line 1395, in getresponse response.begin() File "/home/ray/anaconda3/lib/python3.11/http/client.py", line 325, in begin version, status, reason = self._read_status() ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/http/client.py", line 286, in _read_status line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/socket.py", line 718, in readinto return self._sock.recv_into(b) ^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/ssl.py", line 1314, in recv_into return self.read(nbytes, buffer) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/ssl.py", line 1166, in read return self._sslobj.read(len, buffer) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TimeoutError: The read operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/ray/anaconda3/lib/python3.11/site-packages/requests/adapters.py", line 487, in send resp = conn.urlopen( ^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 802, in urlopen retries = retries.increment( ^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/util/retry.py", line 552, in increment raise six.reraise(type(error), error, _stacktrace) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/packages/six.py", line 770, in reraise raise value File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 716, in urlopen httplib_response = self._make_request( ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 470, in _make_request self._raise_timeout(err=e, url=url, timeout_value=read_timeout) File "/home/ray/anaconda3/lib/python3.11/site-packages/urllib3/connectionpool.py", line 358, in _raise_timeout raise ReadTimeoutError( urllib3.exceptions.ReadTimeoutError: HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1376, in _get_metadata_or_catch_error metadata = get_hf_file_metadata( ^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn return fn(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1296, in get_hf_file_metadata r = _request_wrapper( ^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 280, in _request_wrapper response = _request_wrapper( ^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 303, in _request_wrapper response = get_session().request(method=method, url=url, **params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/requests/sessions.py", line 587, in request resp = self.send(prep, **send_kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/requests/sessions.py", line 701, in send r = adapter.send(request, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_http.py", line 96, in send return super().send(request, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/requests/adapters.py", line 533, in send raise ReadTimeout(e, request=request) requests.exceptions.ReadTimeout: (ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: 30744138-8551-4c04-b6d4-b2c16636507d)')
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/home/admin/huggingface_download/strategy/snapshot_download_strategy.py", line 334, in download snapshot_download( File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn return fn(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/_snapshot_download.py", line 296, in snapshot_download thread_map( File "/home/ray/anaconda3/lib/python3.11/site-packages/tqdm/contrib/concurrent.py", line 69, in thread_map return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/tqdm/contrib/concurrent.py", line 51, in _executor_map return list(tqdm_class(ex.map(fn, *iterables, chunksize=chunksize), **kwargs)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/tqdm/std.py", line 1181, in iter for obj in iterable: File "/home/ray/anaconda3/lib/python3.11/concurrent/futures/_base.py", line 619, in result_iterator yield _result_or_cancel(fs.pop()) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/concurrent/futures/_base.py", line 317, in _result_or_cancel return fut.result(timeout) ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/concurrent/futures/_base.py", line 449, in result return self.__get_result() ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result raise self._exception File "/home/ray/anaconda3/lib/python3.11/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, **self.kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/_snapshot_download.py", line 270, in _inner_hf_hub_download return hf_hub_download( ^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py", line 114, in _inner_fn return fn(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 842, in hf_hub_download return _hf_hub_download_to_local_dir( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1091, in _hf_hub_download_to_local_dir _raise_on_head_call_error(head_call_error, force_download, local_files_only) File "/home/ray/anaconda3/lib/python3.11/site-packages/huggingface_hub/file_download.py", line 1473, in _raise_on_head_call_error raise ValueError("Force download failed due to the above error.") from head_call_error ValueError: Force download failed due to the above error.

System info

huggingface-hub 0.29.2
python version 3.11

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions