Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions binderhub/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import threading
from urllib.parse import urlparse
import os

from kubernetes import client, watch
from tornado.ioloop import IOLoop
Expand Down Expand Up @@ -235,6 +236,24 @@ def submit(self):
if self.git_credentials:
env.append(client.V1EnvVar(name='GIT_CREDENTIAL_ENV', value=self.git_credentials))

# copy additional variables from current environment to new pod environment
proxy_environment_variables = [
'http_proxy',
'https_proxy',
'HTTP_PROXY',
'HTTPS_PROXY',
'no_proxy',
'NO_PROXY',
]

for env_var in proxy_environment_variables:
try:
env.append(client.V1EnvVar(name=env_var, value=os.environ[env_var]))
except KeyError:
# skip the environment variable if it isn't present
pass

component_label = "binderhub-build"
self.pod = client.V1Pod(
metadata=client.V1ObjectMeta(
name=self.name,
Expand Down
5 changes: 3 additions & 2 deletions binderhub/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
"""
import urllib.parse

from tornado.httpclient import AsyncHTTPClient, HTTPRequest
from tornado.httpclient import HTTPRequest
from tornado.web import HTTPError, authenticated
from tornado.httputil import url_concat
from tornado.log import app_log

from .base import BaseHandler
from .utils import ProxiedAsyncHTTPClient

SPEC_NAMES = {
"gh": "GitHub",
Expand Down Expand Up @@ -77,7 +78,7 @@ async def get(self, provider_prefix, _unescaped_spec):

# Check if the nbviewer URL is valid and would display something
# useful to the reader, if not we don't show it
client = AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
# quote any unicode characters in the URL
proto, rest = nbviewer_url.split("://")
rest = urllib.parse.quote(rest)
Expand Down
4 changes: 3 additions & 1 deletion binderhub/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from traitlets.config import LoggingConfigurable
from traitlets import Dict, Unicode, default

from .utils import ProxiedAsyncHTTPClient

DEFAULT_DOCKER_REGISTRY_URL = "https://registry.hub.docker.com"
DEFAULT_DOCKER_AUTH_URL = "https://index.docker.io/v1"

Expand Down Expand Up @@ -187,7 +189,7 @@ def _default_password(self):

@gen.coroutine
def get_image_manifest(self, image, tag):
client = httpclient.AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
url = "{}/v2/{}/manifests/{}".format(self.url, image, tag)
# first, get a token to perform the manifest request
if self.token_url:
Expand Down
11 changes: 6 additions & 5 deletions binderhub/repoproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
from prometheus_client import Gauge

from tornado import gen
from tornado.httpclient import AsyncHTTPClient, HTTPError, HTTPRequest
from tornado.httpclient import HTTPError, HTTPRequest
from tornado.httputil import url_concat

from traitlets import Dict, Unicode, Bool, default, List, observe
from traitlets.config import LoggingConfigurable

from .utils import Cache
from .utils import ProxiedAsyncHTTPClient

GITHUB_RATE_LIMIT = Gauge('binderhub_github_rate_limit_remaining', 'GitHub rate limit remaining')
SHA1_PATTERN = re.compile(r'[0-9a-f]{40}')
Expand Down Expand Up @@ -217,7 +218,7 @@ class ZenodoProvider(RepoProvider):

@gen.coroutine
def get_resolved_ref(self):
client = AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
req = HTTPRequest("https://doi.org/{}".format(self.spec),
user_agent="BinderHub")
r = yield client.fetch(req)
Expand Down Expand Up @@ -257,7 +258,7 @@ class FigshareProvider(RepoProvider):

@gen.coroutine
def get_resolved_ref(self):
client = AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
req = HTTPRequest("https://doi.org/{}".format(self.spec),
user_agent="BinderHub")
r = yield client.fetch(req)
Expand Down Expand Up @@ -439,7 +440,7 @@ def get_resolved_ref(self):
return self.resolved_ref

namespace = urllib.parse.quote(self.namespace, safe='')
client = AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
api_url = "https://{hostname}/api/v4/projects/{namespace}/repository/commits/{ref}".format(
hostname=self.hostname,
namespace=namespace,
Expand Down Expand Up @@ -582,7 +583,7 @@ async def get_resolved_ref_url(self):

@gen.coroutine
def github_api_request(self, api_url, etag=None):
client = AsyncHTTPClient()
client = ProxiedAsyncHTTPClient()
if self.auth:
# Add auth params. After logging!
api_url = url_concat(api_url, self.auth)
Expand Down
106 changes: 106 additions & 0 deletions binderhub/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
from hashlib import blake2b

from traitlets import Integer, TraitError
from tornado.httpclient import AsyncHTTPClient, HTTPRequest, HTTPResponse
from typing import Any, Union, Awaitable
from urllib.parse import urlparse
import ipaddress
import re
import os


def blake2b_hash_as_int(b):
Expand Down Expand Up @@ -113,6 +119,106 @@ def set(self, key, value):
self.pop(first_key)


class ProxiedAsyncHTTPClient():
"""wrapper for automatic proxy support in tornado's non-blocking HTTP client.

see tornado.httplib.AsyncHTTPClient for usage/documentation
"""
def __init__(self):
self.client = AsyncHTTPClient()

# use the first found proxy environment variable
self.http_proxy_host = None
self.http_proxy_port = None
for proxy_var in ['HTTPS_PROXY', 'https_proxy', 'HTTP_PROXY', 'http_proxy']:
try:
parsed_proxy = urlparse(os.environ[proxy_var])
self.http_proxy_host = parsed_proxy.hostname
proxy_port = parsed_proxy.port
if proxy_port: # can be None
self.http_proxy_port = int(proxy_port)
else:
self.http_proxy_port = 443 if parsed_proxy.scheme == 'https' else 80
break
except KeyError:
pass

# sort no_proxy environment variable into CIDR ranges (e.g. 10.0.0.0/8)
# and "simple" matches (e.g. my-institution.org or 10.1.2.3)
self.no_proxy_simple = []
self.no_proxy_cidr = []
no_proxy = None
for no_proxy_var in ['NO_PROXY', 'no_proxy']:
try:
no_proxy = os.environ[no_proxy_var]
except KeyError:
pass
if no_proxy:
for no_proxy_part in no_proxy.split(','):
if self._is_cidr_range(no_proxy_part):
self.no_proxy_cidr.append(no_proxy_part)
else:
self.no_proxy_simple.append(no_proxy_part)

@staticmethod
def _is_cidr_range(test_string):
range_parts = test_string.split('/')
if len(range_parts) != 2:
return False
ip, suffix = range_parts
ip_is_valid = ProxiedAsyncHTTPClient._is_ip(ip)
suffix_is_valid = bool(re.fullmatch('(?:[0-9]|[12][0-9]|3[0-2])', suffix))
return ip_is_valid and suffix_is_valid

@staticmethod
def _is_ip(test_string):
ip_digit = '(?:1[0-9]?[0-9]|[1-9][0-9]|[0-9]|2[0-4][0-9]|25[0-5])'
return bool(re.fullmatch(rf'{ip_digit}\.{ip_digit}\.{ip_digit}\.{ip_digit}', test_string))

def fetch(
self,
request: Union[str, "HTTPRequest"],
raise_error: bool = True,
**kwargs: Any
) -> Awaitable["HTTPResponse"]:
"""Executes a request, asynchronously returning an `HTTPResponse`.

see tornado.httpclient.AsyncHTTPClient.fetch for documentation
"""
# convert request argument into HTTPRequest if necessary
if isinstance(request, str):
request = HTTPRequest(url=request, **kwargs)

# determine correct proxy host and port
parsed_url = urlparse(request.url)
if self.http_proxy_host and parsed_url.scheme in ('http', 'https'):
bypass_proxy = False
url_hostname = str(parsed_url.hostname)
if ProxiedAsyncHTTPClient._is_ip(url_hostname):
for no_proxy_cidr in self.no_proxy_cidr:
if ipaddress.ip_address(url_hostname) in ipaddress.ip_network(no_proxy_cidr):
bypass_proxy = True
break
for no_proxy_simple in self.no_proxy_simple:
escaped_no_proxy = re.escape(no_proxy_simple)
# try to match as full domain or last part of it
# for example: when "my-institution.org" is given as part of no_proxy, try to match
# "my-institution.org" and subdomains like "www.my-institution.org"
if re.fullmatch(rf'(?:{escaped_no_proxy})|(?:.+\.{escaped_no_proxy})', url_hostname):
bypass_proxy = True
break

if not bypass_proxy:
request.proxy_host = self.http_proxy_host
request.proxy_port = self.http_proxy_port

# pass call on to AsyncHTTPClient's configured implementation
return self.client.fetch(request, raise_error)

def close(self):
return self.client.close()


def url_path_join(*pieces):
"""Join components of url into a relative url.

Expand Down