Skip to content

Commit 47f0cc4

Browse files
committed
allowlist for request redirection
1 parent b698915 commit 47f0cc4

File tree

1 file changed

+16
-4
lines changed

1 file changed

+16
-4
lines changed

src/huggingface_hub/file_download.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@
8383
# Regex to check if the file etag IS a valid sha256
8484
REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
8585

86+
# Redirect allowlist for use by relative redirect wrapper
87+
# Example: HF_DOWNLOAD_REDIRECT_ALLOWLIST=opendns.com
88+
REDIRECT_ALLOWLIST = os.environ.get("HF_DOWNLOAD_REDIRECT_ALLOWLIST", "").split(",")
89+
REDIRECT_ALLOWLIST = [domain for domain in REDIRECT_ALLOWLIST if len(domain) > 0]
90+
8691
_are_symlinks_supported_in_dir: Dict[str, bool] = {}
8792

8893

@@ -262,7 +267,7 @@ def hf_hub_url(
262267

263268

264269
def _request_wrapper(
265-
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
270+
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, base_domain = "", **params
266271
) -> requests.Response:
267272
"""Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
268273
`allow_redirection=False`.
@@ -283,6 +288,10 @@ def _request_wrapper(
283288
"""
284289
# Recursively follow relative redirects
285290
if follow_relative_redirects:
291+
# Allow return to the initial domain
292+
if base_domain == "":
293+
base_domain = urlparse(url).netloc
294+
286295
response = _request_wrapper(
287296
method=method,
288297
url=url,
@@ -294,15 +303,18 @@ def _request_wrapper(
294303
# This is useful in case of a renamed repository.
295304
if 300 <= response.status_code <= 399:
296305
parsed_target = urlparse(response.headers["Location"])
297-
if parsed_target.netloc == "":
306+
if parsed_target.netloc == "" or any(parsed_target.netloc.endswith(domain) for domain in REDIRECT_ALLOWLIST):
298307
# This means it is a relative 'location' headers, as allowed by RFC 7231.
299308
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
300309
# We want to follow this relative redirect !
301310
#
302311
# Highly inspired by `resolve_redirects` from requests library.
303312
# See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
304-
next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
305-
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
313+
if parsed_target.netloc == "":
314+
next_url = urlparse(url)._replace(path=parsed_target.path, query=parsed_target.query).geturl()
315+
else:
316+
next_url = parsed_target.geturl()
317+
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, base_domain=base_domain, **params)
306318
return response
307319

308320
# Perform request and return if status_code is not in the retry list.

0 commit comments

Comments
 (0)