83
83
# Regex to check if the file etag IS a valid sha256
84
84
REGEX_SHA256 = re .compile (r"^[0-9a-f]{64}$" )
85
85
86
+ # Redirect allowlist for use by relative redirect wrapper
87
+ # Example: HF_DOWNLOAD_REDIRECT_ALLOWLIST=opendns.com
88
+ REDIRECT_ALLOWLIST = os .environ .get ("HF_DOWNLOAD_REDIRECT_ALLOWLIST" , "" ).split ("," )
89
+ REDIRECT_ALLOWLIST = [domain for domain in REDIRECT_ALLOWLIST if len (domain ) > 0 ]
90
+
86
91
_are_symlinks_supported_in_dir : Dict [str , bool ] = {}
87
92
88
93
@@ -262,7 +267,7 @@ def hf_hub_url(
262
267
263
268
264
269
def _request_wrapper (
265
- method : HTTP_METHOD_T , url : str , * , follow_relative_redirects : bool = False , ** params
270
+ method : HTTP_METHOD_T , url : str , * , follow_relative_redirects : bool = False , base_domain = "" , ** params
266
271
) -> requests .Response :
267
272
"""Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
268
273
`allow_redirection=False`.
@@ -283,6 +288,10 @@ def _request_wrapper(
283
288
"""
284
289
# Recursively follow relative redirects
285
290
if follow_relative_redirects :
291
+ # Allow return to the initial domain
292
+ if base_domain == "" :
293
+ base_domain = urlparse (url ).netloc
294
+
286
295
response = _request_wrapper (
287
296
method = method ,
288
297
url = url ,
@@ -294,15 +303,18 @@ def _request_wrapper(
294
303
# This is useful in case of a renamed repository.
295
304
if 300 <= response .status_code <= 399 :
296
305
parsed_target = urlparse (response .headers ["Location" ])
297
- if parsed_target .netloc == "" :
306
+ if parsed_target .netloc == "" or any ( parsed_target . netloc . endswith ( domain ) for domain in REDIRECT_ALLOWLIST ) :
298
307
# This means it is a relative 'location' headers, as allowed by RFC 7231.
299
308
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
300
309
# We want to follow this relative redirect !
301
310
#
302
311
# Highly inspired by `resolve_redirects` from requests library.
303
312
# See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
304
- next_url = urlparse (url )._replace (path = parsed_target .path ).geturl ()
305
- return _request_wrapper (method = method , url = next_url , follow_relative_redirects = True , ** params )
313
+ if parsed_target .netloc == "" :
314
+ next_url = urlparse (url )._replace (path = parsed_target .path , query = parsed_target .query ).geturl ()
315
+ else :
316
+ next_url = parsed_target .geturl ()
317
+ return _request_wrapper (method = method , url = next_url , follow_relative_redirects = True , base_domain = base_domain , ** params )
306
318
return response
307
319
308
320
# Perform request and return if status_code is not in the retry list.
0 commit comments