Skip to content

Commit 00140f5

Browse files
Fix snapshot_download when unreliable number of files (#3241)
* Fix snapshot_download when unreliable number of files * fix code quality --------- Co-authored-by: célina <[email protected]>
1 parent f01037c commit 00140f5

File tree

1 file changed

+11
-6
lines changed

1 file changed

+11
-6
lines changed

src/huggingface_hub/_snapshot_download.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,19 @@ def snapshot_download(
254254
# At this stage, internet connection is up and running
255255
# => let's download the files!
256256
assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
257-
assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
258257

259258
# Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
260259
# In that case, we need to use the `list_repo_tree` method to prevent caching issues.
261-
repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings]
262-
has_many_files = len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
263-
if has_many_files:
264-
logger.info("The repo has more than 50,000 files. Using `list_repo_tree` to ensure all files are listed.")
260+
repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else []
261+
unreliable_nb_files = (
262+
repo_info.siblings is None
263+
or len(repo_info.siblings) == 0
264+
or len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
265+
)
266+
if unreliable_nb_files:
267+
logger.info(
268+
"Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
269+
)
265270
repo_files = (
266271
f.rfilename
267272
for f in api.list_repo_tree(repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type)
@@ -274,7 +279,7 @@ def snapshot_download(
274279
ignore_patterns=ignore_patterns,
275280
)
276281

277-
if not has_many_files:
282+
if not unreliable_nb_files:
278283
filtered_repo_files = list(filtered_repo_files)
279284
tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
280285
else:

0 commit comments

Comments
 (0)