Skip to content

Commit 4c053d2

Browse files
committed
MOD: Improve Python client path typing+validation
1 parent 549762c commit 4c053d2

File tree

5 files changed

+74
-27
lines changed

5 files changed

+74
-27
lines changed

databento/common/bento.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import abc
22
import datetime as dt
33
from io import BytesIO
4+
from os import PathLike
45
from pathlib import Path
56
from typing import IO, TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
67

@@ -53,14 +54,14 @@ class FileDataSource(DataSource):
5354
The name of the file.
5455
nbytes : int
5556
The size of the data in bytes; equal to the file size.
56-
path : Path
57+
path : PathLike or str
5758
The path of the file.
5859
reader : IO[bytes]
5960
A `BufferedReader` for this file-backed data.
6061
6162
"""
6263

63-
def __init__(self, source: Union[Path, str]):
64+
def __init__(self, source: Union[PathLike[str], str]):
6465
self._path = Path(source)
6566

6667
if not self._path.is_file() or not self._path.exists():
@@ -615,7 +616,7 @@ def symbols(self) -> List[str]:
615616
return self._metadata["symbols"]
616617

617618
@classmethod
618-
def from_file(cls, path: Union[Path, str]) -> "Bento":
619+
def from_file(cls, path: Union[PathLike[str], str]) -> "Bento":
619620
"""
620621
Load the data from a DBN file at the given path.
621622

databento/common/validation.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,39 @@
11
from enum import Enum
2+
from os import PathLike
3+
from pathlib import Path
24
from typing import Optional, Type, TypeVar, Union
35
from urllib.parse import urlsplit, urlunsplit
46

57

68
E = TypeVar("E", bound=Enum)
79

810

11+
def validate_path(value: Union[PathLike[str], str], param: str) -> Path:
12+
"""
13+
Validate whether the given value is a valid path.
14+
15+
Parameters
16+
----------
17+
value: PathLike or str
18+
The value to validate.
19+
param : str
20+
The name of the parameter being validated (for any error message).
21+
22+
Returns
23+
-------
24+
Path
25+
A valid path.
26+
27+
"""
28+
try:
29+
return Path(value)
30+
except TypeError as e:
31+
raise TypeError(
32+
f"The `{param}` was not a valid path type. "
33+
"Use any of [str, bytes, os.PathLike].",
34+
) from e
35+
36+
937
def validate_enum(
1038
value: object,
1139
enum: Type[E],
@@ -37,12 +65,12 @@ def validate_enum(
3765
"""
3866
try:
3967
return enum(value)
40-
except ValueError as exc:
68+
except ValueError as e:
4169
valid = list(map(str, enum))
4270
raise ValueError(
4371
f"The `{param}` was not a valid value of {enum}, was '{value}'. "
4472
f"Use any of {valid}.",
45-
) from exc
73+
) from e
4674

4775

4876
def validate_maybe_enum(

databento/historical/api/batch.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
from datetime import date
3+
from os import PathLike
34
from pathlib import Path
45
from typing import Any, Dict, List, Optional, Tuple, Union
56

@@ -23,7 +24,7 @@
2324
optional_symbols_list_to_string,
2425
optional_values_list_to_string,
2526
)
26-
from databento.common.validation import validate_enum
27+
from databento.common.validation import validate_enum, validate_path
2728
from databento.historical.api import API_VERSION
2829
from databento.historical.http import (
2930
BentoHttpAPI,
@@ -212,7 +213,7 @@ def list_files(self, job_id: str) -> List[Dict[str, Any]]:
212213

213214
def download(
214215
self,
215-
output_dir: Union[Path, str],
216+
output_dir: Union[PathLike[str], str],
216217
job_id: str,
217218
filename_to_download: Optional[str] = None,
218219
enable_partial_downloads: bool = True,
@@ -227,7 +228,7 @@ def download(
227228
228229
Parameters
229230
----------
230-
output_dir: Path or str
231+
output_dir: PathLike or str
231232
The directory to download the file(s) to.
232233
job_id : str
233234
The batch job identifier.
@@ -238,6 +239,7 @@ def download(
238239
If partially downloaded files will be resumed using range request(s).
239240
240241
"""
242+
output_dir = validate_path(output_dir, "output_dir")
241243
self._check_api_key()
242244

243245
params: List[Tuple[str, Optional[str]]] = [
@@ -271,12 +273,12 @@ def download(
271273
return
272274

273275
# Prepare job directory
274-
job_dir = os.path.join(output_dir, job_id)
276+
job_dir = Path(output_dir) / job_id
275277
os.makedirs(job_dir, exist_ok=True)
276278

277279
for details in job_files:
278280
filename = str(details["filename"])
279-
output_path = os.path.join(job_dir, filename)
281+
output_path = job_dir / filename
280282
log_info(
281283
f"Downloading batch job file to {output_path} ...",
282284
)
@@ -305,7 +307,7 @@ def _download_file(
305307
self,
306308
url: str,
307309
filesize: int,
308-
output_path: str,
310+
output_path: Path,
309311
enable_partial_downloads: bool,
310312
) -> None:
311313
headers, mode = self._get_file_download_headers_and_mode(
@@ -329,7 +331,7 @@ def _download_file(
329331

330332
async def download_async(
331333
self,
332-
output_dir: Union[Path, str],
334+
output_dir: Union[PathLike[str], str],
333335
job_id: str,
334336
filename_to_download: Optional[str] = None,
335337
enable_partial_downloads: bool = True,
@@ -345,7 +347,7 @@ async def download_async(
345347
346348
Parameters
347349
----------
348-
output_dir: Path or str
350+
output_dir: PathLike or str
349351
The directory to download the file(s) to.
350352
job_id : str
351353
The batch job identifier.
@@ -356,6 +358,7 @@ async def download_async(
356358
If partially downloaded files will be resumed using range request(s).
357359
358360
"""
361+
output_dir = validate_path(output_dir, "output_dir")
359362
self._check_api_key()
360363

361364
params: List[Tuple[str, Optional[str]]] = [
@@ -389,12 +392,12 @@ async def download_async(
389392
return
390393

391394
# Prepare job directory
392-
job_dir = os.path.join(output_dir, job_id)
395+
job_dir = Path(output_dir) / job_id
393396
os.makedirs(job_dir, exist_ok=True)
394397

395398
for details in job_files:
396399
filename = str(details["filename"])
397-
output_path = os.path.join(job_dir, filename)
400+
output_path = job_dir / filename
398401
log_info(
399402
f"Downloading batch job file to {output_path} ...",
400403
)
@@ -423,7 +426,7 @@ async def _download_file_async(
423426
self,
424427
url: str,
425428
filesize: int,
426-
output_path: str,
429+
output_path: Path,
427430
enable_partial_downloads: bool,
428431
) -> None:
429432
headers, mode = self._get_file_download_headers_and_mode(
@@ -449,15 +452,15 @@ async def _download_file_async(
449452
def _get_file_download_headers_and_mode(
450453
self,
451454
filesize: int,
452-
output_path: str,
455+
output_path: Path,
453456
enable_partial_downloads: bool,
454457
) -> Tuple[Dict[str, str], str]:
455458
headers: Dict[str, str] = self._headers.copy()
456459
mode = "wb"
457460

458461
# Check if file already exists in partially downloaded state
459-
if enable_partial_downloads and os.path.isfile(output_path):
460-
existing_size = os.path.getsize(output_path)
462+
if enable_partial_downloads and output_path.is_file():
463+
existing_size = output_path.stat().st_size
461464
if existing_size < filesize:
462465
# Make range request for partial download,
463466
# will be from next byte to end of file.

databento/historical/api/timeseries.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import warnings
22
from datetime import date
33
from io import BufferedIOBase, BytesIO
4-
from pathlib import Path
4+
from os import PathLike
55
from typing import List, Optional, Tuple, Union
66

77
import pandas as pd
@@ -35,7 +35,7 @@ def stream(
3535
stype_in: Union[SType, str] = "native",
3636
stype_out: Union[SType, str] = "product_id",
3737
limit: Optional[int] = None,
38-
path: Optional[Union[Path, str]] = None,
38+
path: Optional[Union[PathLike[str], str]] = None,
3939
) -> Bento:
4040
"""
4141
The `.stream` method is deprecated and will be removed in a future version.
@@ -63,7 +63,7 @@ def get_range(
6363
stype_in: Union[SType, str] = "native",
6464
stype_out: Union[SType, str] = "product_id",
6565
limit: Optional[int] = None,
66-
path: Optional[Union[Path, str]] = None,
66+
path: Optional[Union[PathLike[str], str]] = None,
6767
) -> Bento:
6868
"""
6969
Request a historical time series data stream from Databento.
@@ -98,7 +98,7 @@ def get_range(
9898
The output symbology type to resolve to.
9999
limit : int, optional
100100
The maximum number of records to return. If `None` then no limit.
101-
path : Path or str, optional
101+
path : PathLike or str, optional
102102
The path to stream the data to on disk (will then return a `Bento`).
103103
104104
Returns
@@ -170,7 +170,7 @@ async def stream_async(
170170
stype_in: Union[SType, str] = "native",
171171
stype_out: Union[SType, str] = "product_id",
172172
limit: Optional[int] = None,
173-
path: Optional[Union[Path, str]] = None,
173+
path: Optional[Union[PathLike[str], str]] = None,
174174
) -> Bento:
175175
"""
176176
The `.stream_async` method is deprecated and will be removed in a future
@@ -199,10 +199,10 @@ async def get_range_async(
199199
stype_in: Union[SType, str] = "native",
200200
stype_out: Union[SType, str] = "product_id",
201201
limit: Optional[int] = None,
202-
path: Optional[Union[Path, str]] = None,
202+
path: Optional[Union[PathLike[str], str]] = None,
203203
) -> Bento:
204204
"""
205-
Request a historical time series data stream from Databento asynchronously.
205+
Asynchronously request a historical time series data stream from Databento.
206206
207207
Makes a `GET /timeseries.get_range` HTTP request.
208208
@@ -234,7 +234,7 @@ async def get_range_async(
234234
The output symbology type to resolve to.
235235
limit : int, optional
236236
The maximum number of records to return. If `None` then no limit.
237-
path : Path or str, optional
237+
path : PathLike or str, optional
238238
The path to stream the data to on disk (will then return a `Bento`).
239239
240240
Returns

tests/test_common_validation.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,26 @@
77
validate_enum,
88
validate_gateway,
99
validate_maybe_enum,
10+
validate_path,
1011
validate_smart_symbol,
1112
)
1213

1314

1415
class TestValidation:
16+
@pytest.mark.parametrize(
17+
"value",
18+
[
19+
[None, 0],
20+
],
21+
)
22+
def test_validate_path_given_wrong_types_raises_type_error(
23+
self,
24+
value: Any,
25+
) -> None:
26+
# Arrange, Act, Assert
27+
with pytest.raises(TypeError):
28+
validate_path(value, "param")
29+
1530
@pytest.mark.parametrize(
1631
"value, enum",
1732
[

0 commit comments

Comments
 (0)