Skip to content

Commit 698cd0a

Browse files
author
jschaff
committed
Merge branch 'master' of github.com:biothings/biothings_client.py
2 parents 1dd8ef3 + a0100fe commit 698cd0a

File tree

6 files changed

+221
-7
lines changed

6 files changed

+221
-7
lines changed

biothings_client/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@
66
from biothings_client.client.base import BiothingClient, get_client
77
from biothings_client.__version__ import __version__
88
from biothings_client._dependencies import _CACHING, _PANDAS
9+
from biothings_client.utils._external import alwayslist
910

1011
__all__ = [
1112
"AsyncBiothingClient",
1213
"BiothingClient",
1314
"_CACHING",
1415
"_PANDAS",
1516
"__version__",
17+
"alwayslist",
1618
"get_async_client",
1719
"get_client",
1820
]

biothings_client/client/asynchronous.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
logger.setLevel(logging.INFO)
4949

5050

51+
PROXY_MOUNT = Dict[str, Union[httpx.BaseTransport, None]]
52+
53+
5154
# Future work:
5255
# Consider use "verbose" settings to control default logging output level
5356
# by doing this instead of using branching throughout the application,
@@ -100,6 +103,12 @@ async def _build_http_client(self, cache_db: Union[str, Path] = None) -> None:
100103
This modifies the state of the BiothingsClient instance
101104
to set the values for the http_client property
102105
106+
For the moment, we have disabled timeouts. This matches our prior
107+
behavior we had with the requests library, which by default did not
108+
specify a timeout when making a request. In the future this should
109+
be modified to prevent indefinite hanging with potentially bad network
110+
connections
111+
103112
Inputs:
104113
:param cache_db: pathlike object to the local sqlite3 cache database file
105114
@@ -108,6 +117,7 @@ async def _build_http_client(self, cache_db: Union[str, Path] = None) -> None:
108117
"""
109118
if not self.http_client_setup:
110119
self.http_client = httpx.AsyncClient()
120+
self.http_client = httpx.AsyncClient(timeout=None)
111121
self.http_client_setup = True
112122
self.http_cache_client_setup = False
113123

@@ -121,6 +131,12 @@ async def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> N
121131
This modifies the state of the BiothingsClient instance
122132
to set the values for the http_client property and the cache_storage property
123133
134+
For the moment, we have disabled timeouts. This matches our prior
135+
behavior we had with the requests library, which by default did not
136+
specify a timeout when making a request. In the future this should
137+
be modified to prevent indefinite hanging with potentially bad network
138+
connections
139+
124140
Inputs:
125141
:param cache_db: pathlike object to the local sqlite3 cache database file
126142
@@ -134,15 +150,47 @@ async def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> N
134150

135151
self.cache_storage = AsyncBiothingsClientSqlite3Cache()
136152
await self.cache_storage.setup_database_connection(cache_db)
137-
cache_transport = hishel.AsyncCacheTransport(
138-
transport=httpx.AsyncHTTPTransport(), storage=self.cache_storage
139-
)
153+
154+
async_http_transport = httpx.AsyncHTTPTransport()
155+
cache_transport = hishel.AsyncCacheTransport(transport=async_http_transport, storage=self.cache_storage)
140156
cache_controller = hishel.Controller(cacheable_methods=["GET", "POST"])
157+
158+
# Have to manually build the proxy mounts as httpx will not auto-discover
159+
# proxies if we provide our own HTTPTransport to the Client constructor
160+
proxy_mounts = self._build_caching_proxy_mounts()
141161
self.http_client = hishel.AsyncCacheClient(
142-
controller=cache_controller, transport=cache_transport, storage=self.cache_storage
162+
controller=cache_controller,
163+
transport=cache_transport,
164+
storage=self.cache_storage,
165+
mounts=proxy_mounts,
166+
timeout=None,
143167
)
144168
self.http_client_setup = True
145169

170+
def _build_caching_proxy_mounts(self) -> PROXY_MOUNT:
171+
"""
172+
Builds the proxy mounts for case where we have a CacheTransport.
173+
Autodiscovery of proxies only works when don't provide a transport
174+
to the client so this method acts as a replacement for that
175+
"""
176+
proxy_map = httpx._utils.get_environment_proxies()
177+
proxy_mounts: PROXY_MOUNT = {}
178+
for key, proxy in proxy_map.items():
179+
proxy_transport = None
180+
if proxy is not None:
181+
proxy_transport = httpx.AsyncHTTPTransport(
182+
verify=True,
183+
cert=None,
184+
trust_env=True,
185+
http1=True,
186+
http2=False,
187+
limits=httpx._config.DEFAULT_LIMITS,
188+
proxy=proxy,
189+
)
190+
proxy_mounts[key] = proxy_transport
191+
proxy_mounts = dict(sorted(proxy_mounts.items()))
192+
return proxy_mounts
193+
146194
async def __del__(self):
147195
"""
148196
Destructor for the client to ensure that we close any potential

biothings_client/client/base.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
logger.setLevel(logging.INFO)
5050

5151

52+
PROXY_MOUNT = Dict[str, Union[httpx.BaseTransport, None]]
53+
54+
5255
# Future work:
5356
# Consider use "verbose" settings to control default logging output level
5457
# by doing this instead of using branching throughout the application,
@@ -103,9 +106,15 @@ def _build_http_client(self, cache_db: Union[str, Path] = None) -> None:
103106
104107
This modifies the state of the BiothingsClient instance
105108
to set the values for the http_client property
109+
110+
For the moment, we have disabled timeouts. This matches our prior
111+
behavior we had with the requests library, which by default did not
112+
specify a timeout when making a request. In the future this should
113+
be modified to prevent indefinite hanging with potentially bad network
114+
connections
106115
"""
107116
if not self.http_client_setup:
108-
self.http_client = httpx.Client()
117+
self.http_client = httpx.Client(timeout=None)
109118
self.http_client_setup = True
110119
self.http_cache_client_setup = False
111120

@@ -118,6 +127,12 @@ def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> None:
118127
119128
This modifies the state of the BiothingsClient instance
120129
to set the values for the http_client property and the cache_storage property
130+
131+
For the moment, we have disabled timeouts. This matches our prior
132+
behavior we had with the requests library, which by default did not
133+
specify a timeout when making a request. In the future this should
134+
be modified to prevent indefinite hanging with potentially bad network
135+
connections
121136
"""
122137
if not self.http_client_setup:
123138
if cache_db is None:
@@ -126,13 +141,47 @@ def _build_cache_http_client(self, cache_db: Union[str, Path] = None) -> None:
126141

127142
self.cache_storage = BiothingsClientSqlite3Cache()
128143
self.cache_storage.setup_database_connection(cache_db)
129-
cache_transport = hishel.CacheTransport(transport=httpx.HTTPTransport(), storage=self.cache_storage)
144+
145+
http_transport = httpx.HTTPTransport()
146+
cache_transport = hishel.CacheTransport(transport=http_transport, storage=self.cache_storage)
130147
cache_controller = hishel.Controller(cacheable_methods=["GET", "POST"])
148+
149+
# Have to manually build the proxy mounts as httpx will not auto-discover
150+
# proxies if we provide our own HTTPTransport to the Client constructor
151+
proxy_mounts = self._build_caching_proxy_mounts()
131152
self.http_client = hishel.CacheClient(
132-
controller=cache_controller, transport=cache_transport, storage=self.cache_storage
153+
controller=cache_controller,
154+
transport=cache_transport,
155+
storage=self.cache_storage,
156+
mounts=proxy_mounts,
157+
timeout=None,
133158
)
134159
self.http_client_setup = True
135160

161+
def _build_caching_proxy_mounts(self) -> PROXY_MOUNT:
162+
"""
163+
Builds the proxy mounts for case where we have a CacheTransport.
164+
Autodiscovery of proxies only works when don't provide a transport
165+
to the client so this method acts as a replacement for that
166+
"""
167+
proxy_map = httpx._utils.get_environment_proxies()
168+
proxy_mounts: PROXY_MOUNT = {}
169+
for key, proxy in proxy_map.items():
170+
proxy_transport = None
171+
if proxy is not None:
172+
proxy_transport = httpx.HTTPTransport(
173+
verify=True,
174+
cert=None,
175+
trust_env=True,
176+
http1=True,
177+
http2=False,
178+
limits=httpx._config.DEFAULT_LIMITS,
179+
proxy=proxy,
180+
)
181+
proxy_mounts[key] = proxy_transport
182+
proxy_mounts = dict(sorted(proxy_mounts.items()))
183+
return proxy_mounts
184+
136185
def __del__(self):
137186
"""
138187
Destructor for the client to ensure that we close any potential
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""
2+
Any functions we provide as auxillary or helper
3+
functions for the client are stored here and exposed in the
4+
__init__ for usage by the users
5+
6+
We likely don't use these throughout the biothings.api core
7+
code, but the users might so we want to maintain that here while
8+
also indicating the purposes of the collection of functions in the
9+
module
10+
"""
11+
12+
from typing import Any, Union
13+
14+
15+
def alwayslist(value: Any) -> Union[list, tuple]:
16+
"""
17+
Simple transformation function that ensures the output is an iterable.
18+
19+
Control Flow:
20+
If the input value is a {list, tuple}, we no-opt and return the value unchanged
21+
Otherwise, we wrap the value in a list and then return that list
22+
23+
Example:
24+
25+
>>> x = 'abc'
26+
>>> for xx in alwayslist(x):
27+
... print xx
28+
>>> x = ['abc', 'def']
29+
>>> for xx in alwayslist(x):
30+
... print xx
31+
32+
"""
33+
if isinstance(value, (list, tuple)):
34+
return value
35+
else:
36+
return [value]

tests/test_async.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,41 @@ async def test_async_client_proxy_discovery(mock_client_proxy_configuration):
115115
assert proxy_url.host == b"fakehttpproxyhost"
116116
assert proxy_url.port == 6374
117117
assert proxy_url.target == b"/"
118+
119+
120+
@pytest.mark.asyncio
121+
@pytest.mark.skipif(not biothings_client._CACHING, reason="caching libraries not installed")
122+
async def test_async_cache_client_proxy_discovery(mock_client_proxy_configuration):
123+
"""
124+
Tests for verifying that we properly auto-discover the
125+
proxy configuration from the environment using the built-in
126+
methods provided by HTTPX
127+
128+
Brought to light by user issues on VPN
129+
https://github.com/biothings/mygene.py/issues/26#issuecomment-2588065562
130+
"""
131+
client_name = "gene"
132+
gene_client = biothings_client.get_async_client(client_name)
133+
await gene_client._build_cache_http_client()
134+
135+
http_mounts = gene_client.http_client._mounts
136+
assert isinstance(http_mounts, dict)
137+
assert len(http_mounts) == 2
138+
139+
for url_pattern, http_transport in gene_client.http_client._mounts.items():
140+
assert isinstance(url_pattern, httpx._utils.URLPattern)
141+
assert isinstance(http_transport, httpx.AsyncHTTPTransport)
142+
143+
if url_pattern.pattern == "https://":
144+
proxy_url = http_transport._pool._proxy_url
145+
assert proxy_url.scheme == b"http"
146+
assert proxy_url.host == b"fakehttpsproxyhost"
147+
assert proxy_url.port == 6375
148+
assert proxy_url.target == b"/"
149+
150+
elif url_pattern.pattern == "http://":
151+
proxy_url = http_transport._pool._proxy_url
152+
assert proxy_url.scheme == b"http"
153+
assert proxy_url.host == b"fakehttpproxyhost"
154+
assert proxy_url.port == 6374
155+
assert proxy_url.target == b"/"

tests/test_sync.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,47 @@ def test_client_proxy_discovery(mock_client_proxy_configuration):
9494
client_name = "gene"
9595
gene_client = biothings_client.get_client(client_name)
9696
gene_client._build_http_client()
97+
http_mounts = gene_client.http_client._mounts
98+
assert isinstance(http_mounts, dict)
99+
assert len(http_mounts) == 2
100+
101+
for url_pattern, http_transport in http_mounts.items():
102+
assert isinstance(url_pattern, httpx._utils.URLPattern)
103+
assert isinstance(http_transport, httpx.HTTPTransport)
104+
105+
if url_pattern.pattern == "https://":
106+
proxy_url = http_transport._pool._proxy_url
107+
assert proxy_url.scheme == b"http"
108+
assert proxy_url.host == b"fakehttpsproxyhost"
109+
assert proxy_url.port == 6375
110+
assert proxy_url.target == b"/"
111+
112+
elif url_pattern.pattern == "http://":
113+
proxy_url = http_transport._pool._proxy_url
114+
assert proxy_url.scheme == b"http"
115+
assert proxy_url.host == b"fakehttpproxyhost"
116+
assert proxy_url.port == 6374
117+
assert proxy_url.target == b"/"
118+
119+
120+
@pytest.mark.skipif(not biothings_client._CACHING, reason="caching libraries not installed")
121+
def test_cache_client_proxy_discovery(mock_client_proxy_configuration):
122+
"""
123+
Tests for verifying that we properly auto-discover the
124+
proxy configuration from the environment using the built-in
125+
methods provided by HTTPX
126+
127+
Brought to light by user issues on VPN
128+
https://github.com/biothings/mygene.py/issues/26#issuecomment-2588065562
129+
"""
130+
client_name = "gene"
131+
gene_client = biothings_client.get_client(client_name)
132+
gene_client._build_cache_http_client()
133+
134+
http_mounts = gene_client.http_client._mounts
135+
assert isinstance(http_mounts, dict)
136+
assert len(http_mounts) == 2
137+
97138
for url_pattern, http_transport in gene_client.http_client._mounts.items():
98139
assert isinstance(url_pattern, httpx._utils.URLPattern)
99140
assert isinstance(http_transport, httpx.HTTPTransport)

0 commit comments

Comments
 (0)