Skip to content

Commit a0100fe

Browse files
ctrl-schaffjschaff
andauthored
Bugfix client proxy autodiscovery (#40)
* Remove the HTTPTransport from the client building ... Adding a blank transport removes our auto-discovery of the proxy configurations. Removing it for the base level tests appears to allow for automatic proxy configuration discovery * Re-add the alwayslist utility function * Add custom proxy mount creation for the cache clients ... Due to the httpx client constructor design, if we pass and HTTPTransport, then it assumes the proxy has been configured and will not attempt to discover it from the environment. Therefore we have to do that manually for the cache clients as hishel requires an HTTPTransport. We due this through the mounts property provided by the client constructor * Disable client request timeouts ... I don't believe this will be permanent behavior but due to the transition from 0.3.1 -> 0.4.0 with the new HTTP library, I want to maintain as much backwards compatibility for the moment so that behavior doesn't suddenly change for users. In the future I'd likely set the timeout to around 60 seconds with the option for user configuration upon client creation to allow customization --------- Co-authored-by: jschaff <jschaff@scripps.edu>
1 parent 91100c2 commit a0100fe

File tree

7 files changed

+298
-10
lines changed

7 files changed

+298
-10
lines changed

biothings_client/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
from biothings_client.client.base import BiothingClient, get_client
77
from biothings_client.__version__ import __version__
88
from biothings_client._dependencies import _CACHING, _PANDAS
9+
from biothings_client.utils._external import alwayslist
910

1011
__all__ = [
1112
"AsyncBiothingClient",
1213
"BiothingClient",
1314
"_CACHING",
1415
"_PANDAS",
1516
"__version__",
17+
"alwayslist",
1618
"get_async_client",
1719
"get_client",
1820
]

biothings_client/client/asynchronous.py

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
logger.setLevel(logging.INFO)
4949

5050

51+
PROXY_MOUNT = Dict[str, Union[httpx.BaseTransport, None]]
52+
53+
5154
# Future work:
5255
# Consider use "verbose" settings to control default logging output level
5356
# by doing this instead of using branching throughout the application,
@@ -100,15 +103,20 @@ async def _build_http_client(self, cache_db: Union[str, Path] = None) -> None:
100103
This modifies the state of the BiothingsClient instance
101104
to set the values for the http_client property
102105
106+
For the moment, we have disabled timeouts. This matches our prior
107+
behavior we had with the requests library, which by default did not
108+
specify a timeout when making a request. In the future this should
109+
be modified to prevent indefinite hanging with potentially bad network
110+
connections
111+
103112
Inputs:
104113
:param cache_db: pathlike object to the local sqlite3 cache database file
105114
106115
Outputs:
107116
:return: None
108117
"""
109118
if not self.http_client_setup:
110-
http_transport = httpx.AsyncHTTPTransport()
111-
self.http_client = httpx.AsyncClient(transport=http_transport)
119+
self.http_client = httpx.AsyncClient(timeout=None)
112120
self.http_client_setup = True
113121
self.http_cache_client_setup = False
114122

@@ -122,6 +130,12 @@ async def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> N
122130
This modifies the state of the BiothingsClient instance
123131
to set the values for the http_client property and the cache_storage property
124132
133+
For the moment, we have disabled timeouts. This matches our prior
134+
behavior we had with the requests library, which by default did not
135+
specify a timeout when making a request. In the future this should
136+
be modified to prevent indefinite hanging with potentially bad network
137+
connections
138+
125139
Inputs:
126140
:param cache_db: pathlike object to the local sqlite3 cache database file
127141
@@ -135,15 +149,47 @@ async def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> N
135149

136150
self.cache_storage = AsyncBiothingsClientSqlite3Cache()
137151
await self.cache_storage.setup_database_connection(cache_db)
138-
cache_transport = hishel.AsyncCacheTransport(
139-
transport=httpx.AsyncHTTPTransport(), storage=self.cache_storage
140-
)
152+
153+
async_http_transport = httpx.AsyncHTTPTransport()
154+
cache_transport = hishel.AsyncCacheTransport(transport=async_http_transport, storage=self.cache_storage)
141155
cache_controller = hishel.Controller(cacheable_methods=["GET", "POST"])
156+
157+
# Have to manually build the proxy mounts as httpx will not auto-discover
158+
# proxies if we provide our own HTTPTransport to the Client constructor
159+
proxy_mounts = self._build_caching_proxy_mounts()
142160
self.http_client = hishel.AsyncCacheClient(
143-
controller=cache_controller, transport=cache_transport, storage=self.cache_storage
161+
controller=cache_controller,
162+
transport=cache_transport,
163+
storage=self.cache_storage,
164+
mounts=proxy_mounts,
165+
timeout=None,
144166
)
145167
self.http_client_setup = True
146168

169+
def _build_caching_proxy_mounts(self) -> PROXY_MOUNT:
170+
"""
171+
Builds the proxy mounts for case where we have a CacheTransport.
172+
Autodiscovery of proxies only works when don't provide a transport
173+
to the client so this method acts as a replacement for that
174+
"""
175+
proxy_map = httpx._utils.get_environment_proxies()
176+
proxy_mounts: PROXY_MOUNT = {}
177+
for key, proxy in proxy_map.items():
178+
proxy_transport = None
179+
if proxy is not None:
180+
proxy_transport = httpx.AsyncHTTPTransport(
181+
verify=True,
182+
cert=None,
183+
trust_env=True,
184+
http1=True,
185+
http2=False,
186+
limits=httpx._config.DEFAULT_LIMITS,
187+
proxy=proxy,
188+
)
189+
proxy_mounts[key] = proxy_transport
190+
proxy_mounts = dict(sorted(proxy_mounts.items()))
191+
return proxy_mounts
192+
147193
async def __del__(self):
148194
"""
149195
Destructor for the client to ensure that we close any potential

biothings_client/client/base.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
logger.setLevel(logging.INFO)
5050

5151

52+
PROXY_MOUNT = Dict[str, Union[httpx.BaseTransport, None]]
53+
54+
5255
# Future work:
5356
# Consider use "verbose" settings to control default logging output level
5457
# by doing this instead of using branching throughout the application,
@@ -103,10 +106,15 @@ def _build_http_client(self, cache_db: Union[str, Path] = None) -> None:
103106
104107
This modifies the state of the BiothingsClient instance
105108
to set the values for the http_client property
109+
110+
For the moment, we have disabled timeouts. This matches our prior
111+
behavior we had with the requests library, which by default did not
112+
specify a timeout when making a request. In the future this should
113+
be modified to prevent indefinite hanging with potentially bad network
114+
connections
106115
"""
107116
if not self.http_client_setup:
108-
http_transport = httpx.HTTPTransport()
109-
self.http_client = httpx.Client(transport=http_transport)
117+
self.http_client = httpx.Client(timeout=None)
110118
self.http_client_setup = True
111119
self.http_cache_client_setup = False
112120

@@ -119,6 +127,12 @@ def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> None:
119127
120128
This modifies the state of the BiothingsClient instance
121129
to set the values for the http_client property and the cache_storage property
130+
131+
For the moment, we have disabled timeouts. This matches our prior
132+
behavior we had with the requests library, which by default did not
133+
specify a timeout when making a request. In the future this should
134+
be modified to prevent indefinite hanging with potentially bad network
135+
connections
122136
"""
123137
if not self.http_client_setup:
124138
if cache_db is None:
@@ -127,13 +141,47 @@ def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> None:
127141

128142
self.cache_storage = BiothingsClientSqlite3Cache()
129143
self.cache_storage.setup_database_connection(cache_db)
130-
cache_transport = hishel.CacheTransport(transport=httpx.HTTPTransport(), storage=self.cache_storage)
144+
145+
http_transport = httpx.HTTPTransport()
146+
cache_transport = hishel.CacheTransport(transport=http_transport, storage=self.cache_storage)
131147
cache_controller = hishel.Controller(cacheable_methods=["GET", "POST"])
148+
149+
# Have to manually build the proxy mounts as httpx will not auto-discover
150+
# proxies if we provide our own HTTPTransport to the Client constructor
151+
proxy_mounts = self._build_caching_proxy_mounts()
132152
self.http_client = hishel.CacheClient(
133-
controller=cache_controller, transport=cache_transport, storage=self.cache_storage
153+
controller=cache_controller,
154+
transport=cache_transport,
155+
storage=self.cache_storage,
156+
mounts=proxy_mounts,
157+
timeout=None,
134158
)
135159
self.http_client_setup = True
136160

161+
def _build_caching_proxy_mounts(self) -> PROXY_MOUNT:
162+
"""
163+
Builds the proxy mounts for case where we have a CacheTransport.
164+
Autodiscovery of proxies only works when don't provide a transport
165+
to the client so this method acts as a replacement for that
166+
"""
167+
proxy_map = httpx._utils.get_environment_proxies()
168+
proxy_mounts: PROXY_MOUNT = {}
169+
for key, proxy in proxy_map.items():
170+
proxy_transport = None
171+
if proxy is not None:
172+
proxy_transport = httpx.HTTPTransport(
173+
verify=True,
174+
cert=None,
175+
trust_env=True,
176+
http1=True,
177+
http2=False,
178+
limits=httpx._config.DEFAULT_LIMITS,
179+
proxy=proxy,
180+
)
181+
proxy_mounts[key] = proxy_transport
182+
proxy_mounts = dict(sorted(proxy_mounts.items()))
183+
return proxy_mounts
184+
137185
def __del__(self):
138186
"""
139187
Destructor for the client to ensure that we close any potential
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
Any functions we provide as auxillary or helper
3+
functions for the client are stored here and exposed in the
4+
__init__ for usage by the users
5+
6+
We likely don't use these throughout the biothings.api core
7+
code, but the users might so we want to maintain that here while
8+
also indicating the purposes of the collection of functions in the
9+
module
10+
"""
11+
12+
from typing import Any, Union
13+
14+
15+
def alwayslist(value: Any) -> Union[list, tuple]:
16+
"""
17+
Simple transformation function that ensures the output is an iterable.
18+
19+
Control Flow:
20+
If the input value is a {list, tuple}, we no-opt and return the value unchanged
21+
Otherwise, we wrap the value in a list and then return that list
22+
23+
Example:
24+
25+
>>> x = 'abc'
26+
>>> for xx in alwayslist(x):
27+
... print xx
28+
>>> x = ['abc', 'def']
29+
>>> for xx in alwayslist(x):
30+
... print xx
31+
32+
"""
33+
if isinstance(value, (list, tuple)):
34+
return value
35+
else:
36+
return [value]

tests/conftest.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Fixtures for the biothings_client testing
33
"""
44

5+
import os
6+
57
import pytest
68

79
from biothings_client import get_client, get_async_client
@@ -111,3 +113,12 @@ def async_geneset_client() -> AsyncMyGenesetInfo:
111113
client = "geneset"
112114
geneset_client = get_async_client(client)
113115
return geneset_client
116+
117+
118+
@pytest.fixture(scope="function")
119+
def mock_client_proxy_configuration() -> None:
120+
os.environ["HTTP_PROXY"] = "http://fakehttpproxyhost:6374"
121+
os.environ["HTTPS_PROXY"] = "http://fakehttpsproxyhost:6375"
122+
yield
123+
os.environ.pop("HTTP_PROXY", None)
124+
os.environ.pop("HTTPS_PROXY", None)

tests/test_async.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from typing import List
66

7+
import httpx
78
import pytest
89

910
import biothings_client
@@ -82,3 +83,73 @@ async def test_url_protocol(client_name: str):
8283
# Transform back to HTTPS
8384
client_instance.use_https()
8485
client_instance.url.startswith(https_protocol)
86+
87+
88+
@pytest.mark.asyncio
89+
async def test_async_client_proxy_discovery(mock_client_proxy_configuration):
90+
"""
91+
Tests for verifying that we properly auto-discover the
92+
proxy configuration from the environment using the built-in
93+
methods provided by HTTPX
94+
95+
Brought to light by user issues on VPN
96+
https://github.com/biothings/mygene.py/issues/26#issuecomment-2588065562
97+
"""
98+
client_name = "gene"
99+
gene_client = biothings_client.get_async_client(client_name)
100+
await gene_client._build_http_client()
101+
for url_pattern, http_transport in gene_client.http_client._mounts.items():
102+
assert isinstance(url_pattern, httpx._utils.URLPattern)
103+
assert isinstance(http_transport, httpx.AsyncHTTPTransport)
104+
105+
if url_pattern.pattern == "https://":
106+
proxy_url = http_transport._pool._proxy_url
107+
assert proxy_url.scheme == b"http"
108+
assert proxy_url.host == b"fakehttpsproxyhost"
109+
assert proxy_url.port == 6375
110+
assert proxy_url.target == b"/"
111+
112+
elif url_pattern.pattern == "http://":
113+
proxy_url = http_transport._pool._proxy_url
114+
assert proxy_url.scheme == b"http"
115+
assert proxy_url.host == b"fakehttpproxyhost"
116+
assert proxy_url.port == 6374
117+
assert proxy_url.target == b"/"
118+
119+
120+
@pytest.mark.asyncio
121+
@pytest.mark.skipif(not biothings_client._CACHING, reason="caching libraries not installed")
122+
async def test_async_cache_client_proxy_discovery(mock_client_proxy_configuration):
123+
"""
124+
Tests for verifying that we properly auto-discover the
125+
proxy configuration from the environment using the built-in
126+
methods provided by HTTPX
127+
128+
Brought to light by user issues on VPN
129+
https://github.com/biothings/mygene.py/issues/26#issuecomment-2588065562
130+
"""
131+
client_name = "gene"
132+
gene_client = biothings_client.get_async_client(client_name)
133+
await gene_client._build_cache_http_client()
134+
135+
http_mounts = gene_client.http_client._mounts
136+
assert isinstance(http_mounts, dict)
137+
assert len(http_mounts) == 2
138+
139+
for url_pattern, http_transport in gene_client.http_client._mounts.items():
140+
assert isinstance(url_pattern, httpx._utils.URLPattern)
141+
assert isinstance(http_transport, httpx.AsyncHTTPTransport)
142+
143+
if url_pattern.pattern == "https://":
144+
proxy_url = http_transport._pool._proxy_url
145+
assert proxy_url.scheme == b"http"
146+
assert proxy_url.host == b"fakehttpsproxyhost"
147+
assert proxy_url.port == 6375
148+
assert proxy_url.target == b"/"
149+
150+
elif url_pattern.pattern == "http://":
151+
proxy_url = http_transport._pool._proxy_url
152+
assert proxy_url.scheme == b"http"
153+
assert proxy_url.host == b"fakehttpproxyhost"
154+
assert proxy_url.port == 6374
155+
assert proxy_url.target == b"/"

0 commit comments

Comments
 (0)