@@ -254,14 +254,19 @@ def snapshot_download(
254
254
# At this stage, internet connection is up and running
255
255
# => let's download the files!
256
256
assert repo_info .sha is not None , "Repo info returned from server must have a revision sha."
257
- assert repo_info .siblings is not None , "Repo info returned from server must have a siblings list."
258
257
259
258
# Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
260
259
# In that case, we need to use the `list_repo_tree` method to prevent caching issues.
261
- repo_files : Iterable [str ] = [f .rfilename for f in repo_info .siblings ]
262
- has_many_files = len (repo_info .siblings ) > VERY_LARGE_REPO_THRESHOLD
263
- if has_many_files :
264
- logger .info ("The repo has more than 50,000 files. Using `list_repo_tree` to ensure all files are listed." )
260
+ repo_files : Iterable [str ] = [f .rfilename for f in repo_info .siblings ] if repo_info .siblings is not None else []
261
+ unreliable_nb_files = (
262
+ repo_info .siblings is None
263
+ or len (repo_info .siblings ) == 0
264
+ or len (repo_info .siblings ) > VERY_LARGE_REPO_THRESHOLD
265
+ )
266
+ if unreliable_nb_files :
267
+ logger .info (
268
+ "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
269
+ )
265
270
repo_files = (
266
271
f .rfilename
267
272
for f in api .list_repo_tree (repo_id = repo_id , recursive = True , revision = revision , repo_type = repo_type )
@@ -274,7 +279,7 @@ def snapshot_download(
274
279
ignore_patterns = ignore_patterns ,
275
280
)
276
281
277
- if not has_many_files :
282
+ if not unreliable_nb_files :
278
283
filtered_repo_files = list (filtered_repo_files )
279
284
tqdm_desc = f"Fetching { len (filtered_repo_files )} files"
280
285
else :
0 commit comments