Skip to content

Commit 5f2febd

Browse files
committed
allowlist for request redirection
1 parent b698915 commit 5f2febd

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/huggingface_hub/file_download.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@
8383
# Regex to check if the file etag IS a valid sha256
8484
REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
8585

86+
# Redirect allowlist for use by relative redirect wrapper
87+
# Example: HF_DOWNLOAD_REDIRECT_ALLOWLIST=opendns.com
88+
REDIRECT_ALLOWLIST = os.environ.get("HF_DOWNLOAD_REDIRECT_ALLOWLIST", "").split(",")
89+
REDIRECT_ALLOWLIST = [domain for domain in REDIRECT_ALLOWLIST if len(domain) > 0]
90+
8691
_are_symlinks_supported_in_dir: Dict[str, bool] = {}
8792

8893

@@ -294,15 +299,18 @@ def _request_wrapper(
294299
# This is useful in case of a renamed repository.
295300
if 300 <= response.status_code <= 399:
296301
parsed_target = urlparse(response.headers["Location"])
297-
if parsed_target.netloc == "":
302+
if parsed_target.netloc == "" or any(parsed_target.netloc.endswith(domain) for domain in REDIRECT_ALLOWLIST):
298303
# This means it is a relative 'location' headers, as allowed by RFC 7231.
299304
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
300305
# We want to follow this relative redirect !
301306
#
302307
# Highly inspired by `resolve_redirects` from requests library.
303308
# See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
304-
next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
305-
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
309+
if parsed_target.netloc == "":
310+
next_url = urlparse(url)._replace(path=parsed_target.path, query=parsed_target.query).geturl()
311+
else:
312+
next_url = parsed_target.geturl()
313+
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, base_domain=base_domain, **params)
306314
return response
307315

308316
# Perform request and return if status_code is not in the retry list.

0 commit comments

Comments
 (0)