Skip to content

Commit e0e6ae2

Browse files
committed
feat(LAB-4123): use http_client instead of httpx
1 parent 6bfe48b commit e0e6ae2

File tree

5 files changed

+106
-52
lines changed

5 files changed

+106
-52
lines changed

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ dependencies = [
4444
"filelock >= 3.0.0, < 4.0.0",
4545
"pip-system-certs >= 4.0.0, < 5.0.0; platform_system=='Windows'",
4646
"pyrate-limiter >= 3, < 4",
47-
"kili-formats == 1.1.0",
48-
"httpx >= 0.27.0, < 1.0.0"
47+
"kili-formats == 1.1.0"
4948
]
5049
urls = { homepage = "https://github.com/kili-technology/kili-python-sdk" }
5150

src/kili/adapters/kili_api_gateway/asset/formatters.py

Lines changed: 38 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,62 +2,53 @@
22

33
import asyncio
44
import json
5-
import os
65

7-
import httpx
6+
import requests
87

98
from kili.adapters.http_client import HttpClient
9+
from kili.adapters.kili_api_gateway.helpers.http_json import load_json_from_link
1010
from kili.core.helpers import is_url
1111
from kili.domain.types import ListOrTuple
1212

1313
# Batch size for parallel JSON response downloads (same as export service)
1414
JSON_RESPONSE_BATCH_SIZE = 10
1515

1616

17-
def load_json_from_link(link: str, http_client: HttpClient) -> dict:
18-
"""Load json from link (synchronous fallback for non-batch operations)."""
19-
if link == "" or not is_url(link):
20-
return {}
21-
22-
response = http_client.get(link, timeout=30)
23-
response.raise_for_status()
24-
return response.json()
25-
26-
27-
async def _download_json_response(url: str) -> dict:
17+
async def _download_json_response(url: str, http_client: HttpClient) -> dict:
2818
"""Download and parse JSON response from a URL using asyncio.
2919
3020
Args:
3121
url: URL to download the JSON response from
22+
http_client: HttpClient instance with SSL verification already configured
3223
3324
Returns:
3425
Parsed JSON response as a dictionary
3526
"""
3627
try:
37-
verify_env = os.getenv("KILI_VERIFY")
38-
verify = verify_env.lower() in ("true", "1", "yes") if verify_env is not None else True
39-
40-
async with httpx.AsyncClient(verify=verify) as client:
41-
response = await client.get(url, timeout=30.0)
42-
response.raise_for_status()
43-
return response.json()
44-
except (httpx.HTTPError, json.JSONDecodeError):
28+
# Run synchronous requests call in a thread
29+
response = await asyncio.to_thread(http_client.get, url, timeout=30)
30+
response.raise_for_status()
31+
return response.json()
32+
except (requests.RequestException, json.JSONDecodeError):
4533
# Return empty dict on error to ensure consistent response format
4634
return {}
4735

4836

49-
async def _download_json_responses_async(url_to_label_mapping: list[tuple[str, dict]]) -> None:
37+
async def _download_json_responses_async(
38+
url_to_label_mapping: list[tuple[str, dict]], http_client: HttpClient
39+
) -> None:
5040
"""Download JSON responses in parallel using asyncio.
5141
5242
Args:
5343
url_to_label_mapping: List of tuples (url, label_dict) to download
44+
http_client: HttpClient instance with SSL verification already configured
5445
"""
5546
# Process in batches to limit concurrent connections
5647
for i in range(0, len(url_to_label_mapping), JSON_RESPONSE_BATCH_SIZE):
5748
batch = url_to_label_mapping[i : i + JSON_RESPONSE_BATCH_SIZE]
5849

5950
# Download all URLs in the batch in parallel using asyncio.gather
60-
download_tasks = [_download_json_response(url) for url, _ in batch]
51+
download_tasks = [_download_json_response(url, http_client) for url, _ in batch]
6152
json_responses = await asyncio.gather(*download_tasks)
6253

6354
# Assign the downloaded responses back to their labels and remove the URL
@@ -67,17 +58,37 @@ async def _download_json_responses_async(url_to_label_mapping: list[tuple[str, d
6758
del label["jsonResponseUrl"]
6859

6960

70-
def download_json_responses_parallel(url_to_label_mapping: list[tuple[str, dict]]) -> None:
61+
def download_json_responses_parallel(
62+
url_to_label_mapping: list[tuple[str, dict]], http_client: HttpClient
63+
) -> None:
7164
"""Download JSON responses in parallel and assign to labels.
7265
7366
Args:
7467
url_to_label_mapping: List of tuples (url, label_dict) to download
68+
http_client: HttpClient instance with SSL verification already configured
7569
"""
7670
if not url_to_label_mapping:
7771
return
7872

79-
# Run async downloads in a synchronous context
80-
asyncio.run(_download_json_responses_async(url_to_label_mapping))
73+
# Check if we're already in an event loop (e.g., Jupyter notebook, async web framework)
74+
try:
75+
asyncio.get_running_loop()
76+
except RuntimeError:
77+
# No running loop, safe to use asyncio.run() for parallel downloads
78+
asyncio.run(_download_json_responses_async(url_to_label_mapping, http_client))
79+
else:
80+
# Already in a loop - fall back to sequential downloads to avoid RuntimeError
81+
# This happens in Jupyter notebooks, FastAPI, and other async contexts
82+
for url, label in url_to_label_mapping:
83+
try:
84+
response = http_client.get(url, timeout=30)
85+
response.raise_for_status()
86+
label["jsonResponse"] = response.json()
87+
except (requests.RequestException, json.JSONDecodeError):
88+
label["jsonResponse"] = {}
89+
90+
if "jsonResponseUrl" in label:
91+
del label["jsonResponseUrl"]
8192

8293

8394
def _parse_label_json_response(label: dict) -> None:
@@ -129,6 +140,6 @@ def load_asset_json_fields(asset: dict, fields: ListOrTuple[str], http_client: H
129140
_process_label_json_response(asset["latestLabel"], url_to_label_mapping)
130141

131142
if url_to_label_mapping:
132-
download_json_responses_parallel(url_to_label_mapping)
143+
download_json_responses_parallel(url_to_label_mapping, http_client)
133144

134145
return asset
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""HTTP JSON helpers for downloading JSON data from URLs."""
2+
3+
from kili.adapters.http_client import HttpClient
4+
from kili.core.helpers import is_url
5+
6+
7+
def load_json_from_link(link: str, http_client: HttpClient) -> dict:
8+
"""Load json from link.
9+
10+
Args:
11+
link: URL to download JSON from
12+
http_client: HttpClient instance with SSL verification already configured
13+
14+
Returns:
15+
Parsed JSON response as a dictionary, or empty dict if link is invalid
16+
"""
17+
if link == "" or not is_url(link):
18+
return {}
19+
20+
response = http_client.get(link, timeout=30)
21+
response.raise_for_status()
22+
return response.json()

src/kili/adapters/kili_api_gateway/label/formatters.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,11 @@
33
import json
44

55
from kili.adapters.http_client import HttpClient
6+
from kili.adapters.kili_api_gateway.helpers.http_json import load_json_from_link
67
from kili.core.helpers import is_url
78
from kili.domain.types import ListOrTuple
89

910

10-
def load_json_from_link(link: str, http_client: HttpClient) -> dict:
11-
"""Load json from link."""
12-
if link == "" or not is_url(link):
13-
return {}
14-
15-
response = http_client.get(link, timeout=30)
16-
response.raise_for_status()
17-
return response.json()
18-
19-
2011
def load_label_json_fields(label: dict, fields: ListOrTuple[str], http_client: HttpClient) -> dict:
2112
"""Load json fields of a label."""
2213
if "jsonResponse" in fields:

tests/unit/adapters/kili_api_gateway/asset/test_formatters.py

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
JSON_RESPONSE_BATCH_SIZE,
1414
download_json_responses_parallel,
1515
load_asset_json_fields,
16-
load_json_from_link,
1716
)
17+
from kili.adapters.kili_api_gateway.helpers.http_json import load_json_from_link
1818

1919

2020
class TestLoadJsonFromLink:
@@ -56,6 +56,7 @@ class TestDownloadJsonResponsesParallel:
5656

5757
def test_download_single_json_response(self):
5858
"""Test downloading a single JSON response."""
59+
http_client = Mock()
5960
label = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
6061
url_to_label_mapping = [("https://example.com/label1.json", label)]
6162

@@ -65,7 +66,7 @@ def test_download_single_json_response(self):
6566
) as mock_download:
6667
mock_download.return_value = {"annotation": "data"}
6768

68-
download_json_responses_parallel(url_to_label_mapping)
69+
download_json_responses_parallel(url_to_label_mapping, http_client)
6970

7071
# Verify the JSON was downloaded and assigned
7172
assert label["jsonResponse"] == {"annotation": "data"}
@@ -75,6 +76,7 @@ def test_download_single_json_response(self):
7576

7677
def test_download_multiple_json_responses(self):
7778
"""Test downloading multiple JSON responses in parallel."""
79+
http_client = Mock()
7880
label1 = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
7981
label2 = {"id": "label2", "jsonResponseUrl": "https://example.com/label2.json"}
8082
label3 = {"id": "label3", "jsonResponseUrl": "https://example.com/label3.json"}
@@ -86,7 +88,7 @@ def test_download_multiple_json_responses(self):
8688
]
8789

8890
# Create mock side effect for different URLs
89-
async def mock_download(url):
91+
async def mock_download(url, client):
9092
if "label1" in url:
9193
return {"data": "label1"}
9294
elif "label2" in url:
@@ -99,7 +101,7 @@ async def mock_download(url):
99101
"kili.adapters.kili_api_gateway.asset.formatters._download_json_response",
100102
side_effect=mock_download,
101103
):
102-
download_json_responses_parallel(url_to_label_mapping)
104+
download_json_responses_parallel(url_to_label_mapping, http_client)
103105

104106
# Verify all labels got their JSON
105107
assert label1["jsonResponse"] == {"data": "label1"}
@@ -113,25 +115,27 @@ async def mock_download(url):
113115

114116
def test_download_handles_request_exception(self):
115117
"""Test that request exceptions are handled gracefully."""
118+
http_client = Mock()
116119
label = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
117120
url_to_label_mapping = [("https://example.com/label1.json", label)]
118121

119-
# Mock download to raise httpx.HTTPError
122+
# Mock download to return empty dict on error
120123
with patch(
121124
"kili.adapters.kili_api_gateway.asset.formatters._download_json_response",
122125
new_callable=AsyncMock,
123126
) as mock_download:
124127
mock_download.return_value = {} # Returns empty dict on error
125128

126129
# Should not raise exception
127-
download_json_responses_parallel(url_to_label_mapping)
130+
download_json_responses_parallel(url_to_label_mapping, http_client)
128131

129132
# Should set empty dict on error
130133
assert label["jsonResponse"] == {}
131134
assert "jsonResponseUrl" not in label
132135

133136
def test_download_handles_json_decode_error(self):
134137
"""Test that JSON decode errors are handled gracefully."""
138+
http_client = Mock()
135139
label = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
136140
url_to_label_mapping = [("https://example.com/label1.json", label)]
137141

@@ -143,14 +147,15 @@ def test_download_handles_json_decode_error(self):
143147
mock_download.return_value = {} # Returns empty dict on error
144148

145149
# Should not raise exception
146-
download_json_responses_parallel(url_to_label_mapping)
150+
download_json_responses_parallel(url_to_label_mapping, http_client)
147151

148152
# Should set empty dict on error
149153
assert label["jsonResponse"] == {}
150154
assert "jsonResponseUrl" not in label
151155

152156
def test_download_handles_timeout_error(self):
153157
"""Test that timeout errors are handled gracefully."""
158+
http_client = Mock()
154159
label = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
155160
url_to_label_mapping = [("https://example.com/label1.json", label)]
156161

@@ -162,14 +167,15 @@ def test_download_handles_timeout_error(self):
162167
mock_download.return_value = {} # Returns empty dict on error
163168

164169
# Should not raise exception
165-
download_json_responses_parallel(url_to_label_mapping)
170+
download_json_responses_parallel(url_to_label_mapping, http_client)
166171

167172
# Should set empty dict on error
168173
assert label["jsonResponse"] == {}
169174
assert "jsonResponseUrl" not in label
170175

171176
def test_download_processes_in_batches(self):
172177
"""Test that downloads are processed in batches of JSON_RESPONSE_BATCH_SIZE."""
178+
http_client = Mock()
173179
# Create more labels than batch size
174180
num_labels = JSON_RESPONSE_BATCH_SIZE + 5
175181
labels = [
@@ -188,7 +194,7 @@ def test_download_processes_in_batches(self):
188194
mock_download.return_value = {"data": "test"}
189195

190196
# Call the function
191-
download_json_responses_parallel(url_to_label_mapping)
197+
download_json_responses_parallel(url_to_label_mapping, http_client)
192198

193199
# Verify all labels got their JSON response
194200
for label in labels:
@@ -200,6 +206,7 @@ def test_download_processes_in_batches(self):
200206

201207
def test_download_mixed_success_and_failure(self):
202208
"""Test downloading with some successes and some failures."""
209+
http_client = Mock()
203210
label_success = {"id": "success", "jsonResponseUrl": "https://example.com/success.json"}
204211
label_fail = {"id": "fail", "jsonResponseUrl": "https://example.com/fail.json"}
205212

@@ -209,7 +216,7 @@ def test_download_mixed_success_and_failure(self):
209216
]
210217

211218
# Create mock side effect for different URLs
212-
async def mock_download(url):
219+
async def mock_download(url, client):
213220
if "success" in url:
214221
return {"status": "ok"}
215222
return {} # Failure returns empty dict
@@ -218,7 +225,7 @@ async def mock_download(url):
218225
"kili.adapters.kili_api_gateway.asset.formatters._download_json_response",
219226
side_effect=mock_download,
220227
):
221-
download_json_responses_parallel(url_to_label_mapping)
228+
download_json_responses_parallel(url_to_label_mapping, http_client)
222229

223230
# Success should have data
224231
assert label_success["jsonResponse"] == {"status": "ok"}
@@ -228,6 +235,30 @@ async def mock_download(url):
228235
assert label_fail["jsonResponse"] == {}
229236
assert "jsonResponseUrl" not in label_fail
230237

238+
def test_download_in_existing_event_loop(self):
239+
"""Test that downloads work when called from within an existing event loop."""
240+
import asyncio
241+
242+
http_client = Mock()
243+
label = {"id": "label1", "jsonResponseUrl": "https://example.com/label1.json"}
244+
url_to_label_mapping = [("https://example.com/label1.json", label)]
245+
246+
# Mock the http_client.get response
247+
mock_response = Mock()
248+
mock_response.json.return_value = {"data": "test"}
249+
http_client.get.return_value = mock_response
250+
251+
# Run inside an existing event loop (simulating Jupyter/FastAPI)
252+
async def run_in_loop():
253+
download_json_responses_parallel(url_to_label_mapping, http_client)
254+
255+
asyncio.run(run_in_loop())
256+
257+
# Verify the JSON was downloaded (sequentially in this case)
258+
assert label["jsonResponse"] == {"data": "test"}
259+
assert "jsonResponseUrl" not in label
260+
http_client.get.assert_called_once_with("https://example.com/label1.json", timeout=30)
261+
231262

232263
class TestLoadAssetJsonFields:
233264
"""Test load_asset_json_fields integration with download_json_responses_parallel."""
@@ -283,7 +314,7 @@ def test_load_asset_with_labels_json_response_url(self):
283314
fields = ["id", "labels.jsonResponse", "labels.jsonResponseUrl"]
284315

285316
# Create mock side effect for different URLs
286-
async def mock_download(url):
317+
async def mock_download(url, client):
287318
if "label1" in url:
288319
return {"data": "label1"}
289320
elif "label2" in url:
@@ -345,7 +376,7 @@ def test_load_asset_with_both_labels_and_latest_label(self):
345376
]
346377

347378
# Create mock side effect for different URLs
348-
async def mock_download(url):
379+
async def mock_download(url, client):
349380
if "label1" in url:
350381
return {"data": "label1"}
351382
elif "label2" in url:

0 commit comments

Comments
 (0)