Skip to content

Commit 4566e30

Browse files
committed
MOD: Batch download now returns a list of paths
1 parent 7a38465 commit 4566e30

File tree

4 files changed

+58
-14
lines changed

4 files changed

+58
-14
lines changed

databento/historical/api/batch.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def download(
233233
job_id: str,
234234
filename_to_download: Optional[str] = None,
235235
enable_partial_downloads: bool = True,
236-
) -> None:
236+
) -> List[Path]:
237237
"""
238238
Download a batch job or a specific file to `{output_dir}/{job_id}/`.
239239
@@ -254,6 +254,18 @@ def download(
254254
enable_partial_downloads : bool, default True
255255
If partially downloaded files will be resumed using range request(s).
256256
257+
Returns
258+
-------
259+
List[Path]
260+
A list of paths to the downloaded files.
261+
262+
Raises
263+
------
264+
RuntimeError
265+
When no files were found for the batch job.
266+
ValueError
267+
When a file fails to download.
268+
257269
"""
258270
output_dir = validate_path(output_dir, "output_dir")
259271
self._check_api_key()
@@ -270,7 +282,7 @@ def download(
270282

271283
if not job_files:
272284
logger.error("Cannot download batch job %s (no files found).", job_id)
273-
return
285+
raise RuntimeError(f"no files for batch job {job_id}")
274286

275287
if filename_to_download:
276288
# A specific file is being requested
@@ -287,12 +299,15 @@ def download(
287299
job_id,
288300
filename_to_download,
289301
)
290-
return
302+
raise ValueError(
303+
f"{filename_to_download} is not a file for batch job {job_id}",
304+
)
291305

292306
# Prepare job directory
293307
job_dir = Path(output_dir) / job_id
294308
os.makedirs(job_dir, exist_ok=True)
295309

310+
file_paths = []
296311
for details in job_files:
297312
filename = str(details["filename"])
298313
output_path = job_dir / filename
@@ -320,6 +335,9 @@ def download(
320335
output_path=output_path,
321336
enable_partial_downloads=enable_partial_downloads,
322337
)
338+
file_paths.append(output_path)
339+
340+
return file_paths
323341

324342
def _download_file(
325343
self,
@@ -355,7 +373,7 @@ async def download_async(
355373
job_id: str,
356374
filename_to_download: Optional[str] = None,
357375
enable_partial_downloads: bool = True,
358-
) -> None:
376+
) -> List[Path]:
359377
"""
360378
Asynchronously download a batch job or a specific file to
361379
`{output_dir}/{job_id}/`.
@@ -377,6 +395,18 @@ async def download_async(
377395
enable_partial_downloads : bool, default True
378396
If partially downloaded files will be resumed using range request(s).
379397
398+
Returns
399+
-------
400+
List[Path]
401+
A list of paths to the downloaded files.
402+
403+
Raises
404+
------
405+
RuntimeError
406+
When no files were found for the batch job.
407+
ValueError
408+
When a file fails to download.
409+
380410
"""
381411
output_dir = validate_path(output_dir, "output_dir")
382412
self._check_api_key()
@@ -393,7 +423,7 @@ async def download_async(
393423

394424
if not job_files:
395425
logger.error("Cannot download batch job %s (no files found).", job_id)
396-
return
426+
raise RuntimeError(f"no files for batch job {job_id}")
397427

398428
if filename_to_download:
399429
# A specific file is being requested
@@ -406,16 +436,19 @@ async def download_async(
406436
break
407437
if not is_file_found:
408438
logger.error(
409-
"Cannot download batch job %s file " "(%s not found)",
439+
"Cannot download batch job %s file (%s not found)",
410440
job_id,
411441
filename_to_download,
412442
)
413-
return
443+
raise ValueError(
444+
f"{filename_to_download} is not a file for batch job {job_id}",
445+
)
414446

415447
# Prepare job directory
416448
job_dir = Path(output_dir) / job_id
417449
os.makedirs(job_dir, exist_ok=True)
418450

451+
file_paths = []
419452
for details in job_files:
420453
filename = str(details["filename"])
421454
output_path = job_dir / filename
@@ -443,6 +476,9 @@ async def download_async(
443476
output_path=output_path,
444477
enable_partial_downloads=enable_partial_downloads,
445478
)
479+
file_paths.append(output_path)
480+
481+
return file_paths
446482

447483
async def _download_file_async(
448484
self,

examples/historical_batch_download.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
client = Historical(key=key)
77

88
# Will download all job files to a `my_data/YOUR_JOB_ID/` directory
9-
client.batch.download(
9+
downloaded_files = client.batch.download(
1010
output_dir="my_data",
1111
job_id="YOUR_JOB_ID", # <-- Discover this from `.list_jobs(...)`
1212
)
13+
14+
for file in downloaded_files:
15+
print(f"Downloaded {file.name}")

examples/historical_batch_download_async.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@ async def example_download_batch_job_async() -> None:
88
client = Historical(key=key)
99

1010
# Will download all job files to a `my_data/YOUR_JOB_ID/` directory
11-
await client.batch.download_async(
11+
downloaded_files = await client.batch.download_async(
1212
output_dir="my_data",
1313
job_id="YOUR_JOB_ID", # <-- Discover this from `.list_jobs(...)`
1414
)
1515

16+
for file in downloaded_files:
17+
print(f"Downloaded {file.name}")
18+
1619

1720
if __name__ == "__main__":
1821
asyncio.run(example_download_batch_job_async())

tests/test_historical_batch.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,11 +170,13 @@ def test_batch_download_single_file_sends_expected_request(
170170
filename = "glbx-mdp3-20220610.mbo.csv.zst"
171171

172172
# Act
173-
self.client.batch.download(
174-
job_id=job_id,
175-
output_dir="my_data",
176-
filename_to_download=filename,
177-
)
173+
with pytest.raises(ValueError):
174+
# We expect this to fail since this is not a real batch job.
175+
self.client.batch.download(
176+
job_id=job_id,
177+
output_dir="my_data",
178+
filename_to_download=filename,
179+
)
178180

179181
# Assert
180182
call = mocked_get.call_args.kwargs

0 commit comments

Comments
 (0)