@@ -254,14 +254,19 @@ def snapshot_download(
254254 # At this stage, internet connection is up and running
255255 # => let's download the files!
256256 assert repo_info .sha is not None , "Repo info returned from server must have a revision sha."
257- assert repo_info .siblings is not None , "Repo info returned from server must have a siblings list."
258257
259258 # Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
260259 # In that case, we need to use the `list_repo_tree` method to prevent caching issues.
261- repo_files : Iterable [str ] = [f .rfilename for f in repo_info .siblings ]
262- has_many_files = len (repo_info .siblings ) > VERY_LARGE_REPO_THRESHOLD
263- if has_many_files :
264- logger .info ("The repo has more than 50,000 files. Using `list_repo_tree` to ensure all files are listed." )
260+ repo_files : Iterable [str ] = [f .rfilename for f in repo_info .siblings ] if repo_info .siblings is not None else []
261+ unreliable_nb_files = (
262+ repo_info .siblings is None
263+ or len (repo_info .siblings ) == 0
264+ or len (repo_info .siblings ) > VERY_LARGE_REPO_THRESHOLD
265+ )
266+ if unreliable_nb_files :
267+ logger .info (
268+ "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
269+ )
265270 repo_files = (
266271 f .rfilename
267272 for f in api .list_repo_tree (repo_id = repo_id , recursive = True , revision = revision , repo_type = repo_type )
@@ -274,7 +279,7 @@ def snapshot_download(
274279 ignore_patterns = ignore_patterns ,
275280 )
276281
277- if not has_many_files :
282+ if not unreliable_nb_files :
278283 filtered_repo_files = list (filtered_repo_files )
279284 tqdm_desc = f"Fetching { len (filtered_repo_files )} files"
280285 else :
0 commit comments