diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index 4776ffcb06..149d178535 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -172,6 +172,29 @@ Please note that using `hf_transfer` comes with certain limitations. Since it is +### HF_ENABLE_PARALLEL_DOWNLOADING + +Set to `True` for faster downloads. + +By default this is disabled. Enables the parallel downloading of models with sharded weight files. Can decrease the time to load large models significantly, often times producing _speed ups of greater than 50%_. + +Can be set to a string equal to `"false"` or `"true"`. e.g. `os.environ["HF_ENABLE_PARALLEL_DOWNLOADING"] = "true"` + +While downloading is already parallelized at the file level when `HF_HUB_ENABLE_HF_TRANSFER` is enabled, `HF_ENABLE_PARALLEL_DOWNLOADING` parallelizes the number of files that can be concurrently downloaded. Which can greatly speed up downloads if the machine you're using can handle it in terms of network and IO bandwidth. + +e.g. here's a comparison for `facebook/opt-30b` on an AWS EC2 `g4dn.metal`: + +- `HF_HUB_ENABLE_HF_TRANSFER` enabled, `HF_ENABLE_PARALLEL_DOWNLOADING` disabled + + - ~45s download + +- `HF_HUB_ENABLE_HF_TRANSFER` enabled, `HF_ENABLE_PARALLEL_DOWNLOADING` enabled + - ~12s download + +To fully saturate a machine capable of massive network bandwidth, set `HF_ENABLE_PARALLEL_DOWNLOADING="True"` and `HF_HUB_ENABLE_HF_TRANSFER="True"` + +_Note, you will want to profile your code before committing to using this environment variable, this will not produce speed ups for smaller models._ + ## Deprecated environment variables In order to standardize all environment variables within the Hugging Face ecosystem, some variables have been marked as deprecated. Although they remain functional, they no longer take precedence over their replacements. The following table outlines the deprecated variables and their corresponding alternatives: diff --git a/src/huggingface_hub/_snapshot_download.py b/src/huggingface_hub/_snapshot_download.py index b928dd3466..50a986f279 100644 --- a/src/huggingface_hub/_snapshot_download.py +++ b/src/huggingface_hub/_snapshot_download.py @@ -287,11 +287,10 @@ def _inner_hf_hub_download(repo_file: str): headers=headers, ) - if constants.HF_HUB_ENABLE_HF_TRANSFER: - # when using hf_transfer we don't want extra parallelism - # from the one hf_transfer provides - for file in filtered_repo_files: - _inner_hf_hub_download(file) + # Second condition allows it to skip serial file downloads with HF_HUB_ENABLE_HF_TRANSFER and instead also use the thread pool + if constants.HF_HUB_ENABLE_HF_TRANSFER and not constants.HF_ENABLE_PARALLEL_DOWNLOADING: + for file in filtered_repo_files: + _inner_hf_hub_download(file) else: thread_map( _inner_hf_hub_download, diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index cc21348770..c868367e4b 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -206,6 +206,9 @@ def _as_int(value: Optional[str]) -> Optional[int]: # - https://github.com/huggingface/hf_transfer (private) HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) +HF_ENABLE_PARALLEL_DOWNLOADING: bool = _is_true(os.environ.get("HF_ENABLE_PARALLEL_DOWNLOADING")) + + # UNUSED # We don't use symlinks in local dir anymore. diff --git a/tests/test_snapshot_download_parallel.py b/tests/test_snapshot_download_parallel.py new file mode 100644 index 0000000000..64ae35ae8c --- /dev/null +++ b/tests/test_snapshot_download_parallel.py @@ -0,0 +1,9 @@ +import os + + +# Set the env variable to enable parallel loading +os.environ["HF_ENABLE_PARALLEL_DOWNLOADING"] = "true" + + +# Declare the normal model_utils.py test as a sideffect of importing the module +from .test_snapshot_download import SnapshotDownloadTests # noqa