Skip to content

Commit b29e18a

Browse files
committed
implement ProxiedAsyncHTTPClient wrapper
this class can be used as a drop-in replacement for tornado's httpclient.AsyncHTTPClient and will automatically inject the correct proxy as specified in the environment variables
1 parent 53cf562 commit b29e18a

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

binderhub/utils.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,12 @@
33
from hashlib import blake2b
44

55
from traitlets import Integer, TraitError
6+
from tornado.httpclient import AsyncHTTPClient, HTTPRequest, HTTPResponse
7+
from typing import Any, Union, Awaitable
8+
from urllib.parse import urlparse
9+
import ipaddress
10+
import re
11+
import os
612

713

814
def blake2b_hash_as_int(b):
@@ -113,6 +119,106 @@ def set(self, key, value):
113119
self.pop(first_key)
114120

115121

122+
class ProxiedAsyncHTTPClient():
123+
"""wrapper for automatic proxy support in tornado's non-blocking HTTP client.
124+
125+
see tornado.httplib.AsyncHTTPClient for usage/documentation
126+
"""
127+
def __init__(self):
128+
self.client = AsyncHTTPClient()
129+
130+
# use the first found proxy environment variable
131+
self.http_proxy_host = None
132+
self.http_proxy_port = None
133+
for proxy_var in ['HTTPS_PROXY', 'https_proxy', 'HTTP_PROXY', 'http_proxy']:
134+
try:
135+
parsed_proxy = urlparse(os.environ[proxy_var])
136+
self.http_proxy_host = parsed_proxy.hostname
137+
proxy_port = parsed_proxy.port
138+
if proxy_port: # can be None
139+
self.http_proxy_port = int(proxy_port)
140+
else:
141+
self.http_proxy_port = 443 if parsed_proxy.scheme == 'https' else 80
142+
break
143+
except KeyError:
144+
pass
145+
146+
# sort no_proxy environment variable into CIDR ranges (e.g. 10.0.0.0/8)
147+
# and "simple" matches (e.g. my-institution.org or 10.1.2.3)
148+
self.no_proxy_simple = []
149+
self.no_proxy_cidr = []
150+
no_proxy = None
151+
for no_proxy_var in ['NO_PROXY', 'no_proxy']:
152+
try:
153+
no_proxy = os.environ[no_proxy_var]
154+
except KeyError:
155+
pass
156+
if no_proxy:
157+
for no_proxy_part in no_proxy.split(','):
158+
if self._is_cidr_range(no_proxy_part):
159+
self.no_proxy_cidr.append(no_proxy_part)
160+
else:
161+
self.no_proxy_simple.append(no_proxy_part)
162+
163+
@staticmethod
164+
def _is_cidr_range(test_string):
165+
range_parts = test_string.split('/')
166+
if len(range_parts) != 2:
167+
return False
168+
ip, suffix = range_parts
169+
ip_is_valid = ProxiedAsyncHTTPClient._is_ip(ip)
170+
suffix_is_valid = bool(re.fullmatch('(?:[0-9]|[12][0-9]|3[0-2])', suffix))
171+
return ip_is_valid and suffix_is_valid
172+
173+
@staticmethod
174+
def _is_ip(test_string):
175+
ip_digit = '(?:1[0-9]?[0-9]|[1-9][0-9]|[0-9]|2[0-4][0-9]|25[0-5])'
176+
return bool(re.fullmatch(rf'{ip_digit}\.{ip_digit}\.{ip_digit}\.{ip_digit}', test_string))
177+
178+
def fetch(
179+
self,
180+
request: Union[str, "HTTPRequest"],
181+
raise_error: bool = True,
182+
**kwargs: Any
183+
) -> Awaitable["HTTPResponse"]:
184+
"""Executes a request, asynchronously returning an `HTTPResponse`.
185+
186+
see tornado.httpclient.AsyncHTTPClient.fetch for documentation
187+
"""
188+
# convert request argument into HTTPRequest if necessary
189+
if isinstance(request, str):
190+
request = HTTPRequest(url=request, **kwargs)
191+
192+
# determine correct proxy host and port
193+
parsed_url = urlparse(request.url)
194+
if self.http_proxy_host and parsed_url.scheme in ('http', 'https'):
195+
bypass_proxy = False
196+
url_hostname = str(parsed_url.hostname)
197+
if ProxiedAsyncHTTPClient._is_ip(url_hostname):
198+
for no_proxy_cidr in self.no_proxy_cidr:
199+
if ipaddress.ip_address(url_hostname) in ipaddress.ip_network(no_proxy_cidr):
200+
bypass_proxy = True
201+
break
202+
for no_proxy_simple in self.no_proxy_simple:
203+
escaped_no_proxy = re.escape(no_proxy_simple)
204+
# try to match as full domain or last part of it
205+
# for example: when "my-institution.org" is given as part of no_proxy, try to match
206+
# "my-institution.org" and subdomains like "www.my-institution.org"
207+
if re.fullmatch(rf'(?:{escaped_no_proxy})|(?:.+\.{escaped_no_proxy})', url_hostname):
208+
bypass_proxy = True
209+
break
210+
211+
if not bypass_proxy:
212+
request.proxy_host = self.http_proxy_host
213+
request.proxy_port = self.http_proxy_port
214+
215+
# pass call on to AsyncHTTPClient's configured implementation
216+
return self.client.fetch(request, raise_error)
217+
218+
def close(self):
219+
return self.client.close()
220+
221+
116222
def url_path_join(*pieces):
117223
"""Join components of url into a relative url.
118224

0 commit comments

Comments
 (0)