2222from typing import List , Optional , Type , TypeVar , Union
2323from urllib .parse import urljoin , urlparse
2424
25+ import fsspec
2526import huggingface_hub
2627import requests
2728from huggingface_hub import HfFolder
@@ -327,6 +328,28 @@ def _request_with_retry(
327328 return response
328329
329330
331+ def fsspec_head (url , timeout = 10.0 ):
332+ _raise_if_offline_mode_is_enabled (f"Tried to reach { url } " )
333+ fs , _ , paths = fsspec .get_fs_token_paths (url , storage_options = {"requests_timeout" : timeout })
334+ if len (paths ) > 1 :
335+ raise ValueError (f"HEAD can be called with at most one path but was called with { paths } " )
336+ return fs .info (paths [0 ])
337+
338+
339+ def fsspec_get (url , temp_file , timeout = 10.0 , desc = None ):
340+ _raise_if_offline_mode_is_enabled (f"Tried to reach { url } " )
341+ fs , _ , paths = fsspec .get_fs_token_paths (url , storage_options = {"requests_timeout" : timeout })
342+ if len (paths ) > 1 :
343+ raise ValueError (f"GET can be called with at most one path but was called with { paths } " )
344+ callback = fsspec .callbacks .TqdmCallback (
345+ tqdm_kwargs = {
346+ "desc" : desc or "Downloading" ,
347+ "disable" : not logging .is_progress_bar_enabled (),
348+ }
349+ )
350+ fs .get_file (paths [0 ], temp_file .name , callback = callback )
351+
352+
330353def ftp_head (url , timeout = 10.0 ):
331354 _raise_if_offline_mode_is_enabled (f"Tried to reach { url } " )
332355 try :
@@ -400,6 +423,8 @@ def http_head(
400423
401424
402425def request_etag (url : str , use_auth_token : Optional [Union [str , bool ]] = None ) -> Optional [str ]:
426+ if urlparse (url ).scheme not in ("http" , "https" ):
427+ return None
403428 headers = get_authentication_headers_for_url (url , use_auth_token = use_auth_token )
404429 response = http_head (url , headers = headers , max_retries = 3 )
405430 response .raise_for_status ()
@@ -453,6 +478,7 @@ def get_from_cache(
453478 cookies = None
454479 etag = None
455480 head_error = None
481+ scheme = None
456482
457483 # Try a first time to file the file on the local file system without eTag (None)
458484 # if we don't ask for 'force_download' then we spare a request
@@ -469,8 +495,14 @@ def get_from_cache(
469495
470496 # We don't have the file locally or we need an eTag
471497 if not local_files_only :
472- if url .startswith ("ftp://" ):
498+ scheme = urlparse (url ).scheme
499+ if scheme == "ftp" :
473500 connected = ftp_head (url )
501+ elif scheme not in ("http" , "https" ):
502+ response = fsspec_head (url )
503+ # s3fs uses "ETag", gcsfs uses "etag"
504+ etag = (response .get ("ETag" , None ) or response .get ("etag" , None )) if use_etag else None
505+ connected = True
474506 try :
475507 response = http_head (
476508 url ,
@@ -569,8 +601,10 @@ def _resumable_file_manager():
569601 logger .info (f"{ url } not found in cache or force_download set to True, downloading to { temp_file .name } " )
570602
571603 # GET file object
572- if url . startswith ( "ftp://" ) :
604+ if scheme == "ftp" :
573605 ftp_get (url , temp_file )
606+ elif scheme not in ("http" , "https" ):
607+ fsspec_get (url , temp_file , desc = download_desc )
574608 else :
575609 http_get (
576610 url ,
0 commit comments