Skip to content

Commit 12f0156

Browse files
committed
MOD: Add semantic string validation for dataset
1 parent 5eb4088 commit 12f0156

File tree

6 files changed

+81
-16
lines changed

6 files changed

+81
-16
lines changed

databento/common/validation.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,36 @@ def validate_gateway(
143143
return urlunsplit(components=("https", url_chunks.path, "", "", ""))
144144

145145

146+
def validate_semantic_string(value: str, param: str) -> str:
147+
"""
148+
Validate whether a string contains a semantic value.
149+
A string is considered absent of meaning if:
150+
- It is empty.
151+
- It contains only whitespace.
152+
- It contains unprintable characters.
153+
154+
Parameters
155+
----------
156+
value: str
157+
The string to validate.
158+
param : str
159+
The name of the parameter being validated (for any error message).
160+
161+
Raises
162+
------
163+
ValueError
164+
If the string is not meaningful.
165+
166+
"""
167+
if not value:
168+
raise ValueError(f"The `{param}` cannot be an empty string.")
169+
if str.isspace(value):
170+
raise ValueError(f"The `{param}` cannot contain only whitepsace.")
171+
if not str.isprintable(value):
172+
raise ValueError(f"The `{param}` cannot contain unprintable characters.")
173+
return value
174+
175+
146176
def validate_smart_symbol(symbol: str) -> str:
147177
"""
148178
Validate whether symbol has a valid smart symbol format.

databento/historical/api/batch.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@
2626
optional_symbols_list_to_string,
2727
optional_values_list_to_string,
2828
)
29-
from databento.common.validation import validate_enum, validate_path
29+
from databento.common.validation import (
30+
validate_enum,
31+
validate_path,
32+
validate_semantic_string,
33+
)
3034
from databento.historical.api import API_VERSION
3135
from databento.historical.http import (
3236
BentoHttpAPI,
@@ -121,7 +125,7 @@ def submit_job(
121125
stype_in_valid = validate_enum(stype_in, SType, "stype_in")
122126
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
123127
params: List[Tuple[str, Optional[str]]] = [
124-
("dataset", dataset),
128+
("dataset", validate_semantic_string(dataset, "dataset")),
125129
("start", datetime_to_string(start)),
126130
("end", datetime_to_string(end)),
127131
("symbols", str(symbols_list)),

databento/historical/api/metadata.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
optional_datetime_to_string,
1010
optional_symbols_list_to_string,
1111
)
12-
from databento.common.validation import validate_enum, validate_maybe_enum
12+
from databento.common.validation import (
13+
validate_enum,
14+
validate_maybe_enum,
15+
validate_semantic_string,
16+
)
1317
from databento.historical.api import API_VERSION
1418
from databento.historical.http import BentoHttpAPI
1519
from requests import Response
@@ -110,7 +114,7 @@ def list_schemas(
110114
111115
"""
112116
params: List[Tuple[str, Optional[str]]] = [
113-
("dataset", dataset),
117+
("dataset", validate_semantic_string(dataset, "dataset")),
114118
("start_date", optional_date_to_string(start_date)),
115119
("end_date", optional_date_to_string(end_date)),
116120
]
@@ -152,7 +156,7 @@ def list_fields(
152156
153157
"""
154158
params: List[Tuple[str, Optional[str]]] = [
155-
("dataset", dataset),
159+
("dataset", validate_semantic_string(dataset, "dataset")),
156160
("schema", validate_maybe_enum(schema, Schema, "schema")),
157161
("encoding", validate_maybe_enum(encoding, Encoding, "encoding")),
158162
]
@@ -226,7 +230,7 @@ def list_unit_prices(
226230
227231
"""
228232
params: List[Tuple[str, Optional[str]]] = [
229-
("dataset", dataset),
233+
("dataset", validate_semantic_string(dataset, "dataset")),
230234
("mode", validate_maybe_enum(mode, FeedMode, "mode")),
231235
("schema", validate_maybe_enum(schema, Schema, "schema")),
232236
]
@@ -268,7 +272,7 @@ def list_dataset_conditions(
268272
269273
"""
270274
params: List[Tuple[str, Optional[str]]] = [
271-
("dataset", dataset),
275+
("dataset", validate_semantic_string(dataset, "dataset")),
272276
("start_date", optional_date_to_string(start_date)),
273277
("end_date", optional_date_to_string(end_date)),
274278
]
@@ -301,7 +305,7 @@ def get_dataset_range(
301305
302306
"""
303307
params: List[Tuple[str, Optional[str]]] = [
304-
("dataset", dataset),
308+
("dataset", validate_semantic_string(dataset, "dataset")),
305309
]
306310

307311
response: Response = self._get(
@@ -356,7 +360,7 @@ def get_record_count(
356360
stype_in_valid = validate_enum(stype_in, SType, "stype_in")
357361
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
358362
params: List[Tuple[str, Optional[str]]] = [
359-
("dataset", dataset),
363+
("dataset", validate_semantic_string(dataset, "dataset")),
360364
("symbols", symbols_list),
361365
("schema", str(validate_enum(schema, Schema, "schema"))),
362366
("start", optional_datetime_to_string(start)),
@@ -421,7 +425,7 @@ def get_billable_size(
421425
stype_in_valid = validate_enum(stype_in, SType, "stype_in")
422426
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
423427
params: List[Tuple[str, Optional[str]]] = [
424-
("dataset", dataset),
428+
("dataset", validate_semantic_string(dataset, "dataset")),
425429
("start", datetime_to_string(start)),
426430
("end", datetime_to_string(end)),
427431
("symbols", symbols_list),
@@ -489,7 +493,7 @@ def get_cost(
489493
stype_in_valid = validate_enum(stype_in, SType, "stype_in")
490494
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
491495
params: List[Tuple[str, str]] = [
492-
("dataset", dataset),
496+
("dataset", validate_semantic_string(dataset, "dataset")),
493497
("start", datetime_to_string(start)),
494498
("end", datetime_to_string(end)),
495499
("symbols", symbols_list),

databento/historical/api/symbology.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
datetime_to_date_string,
77
optional_symbols_list_to_string,
88
)
9-
from databento.common.validation import validate_enum
9+
from databento.common.validation import validate_enum, validate_semantic_string
1010
from databento.historical.api import API_VERSION
1111
from databento.historical.http import BentoHttpAPI
1212
from requests import Response
@@ -63,7 +63,7 @@ def resolve(
6363
stype_in_valid = validate_enum(stype_in, SType, "stype_in")
6464
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
6565
params: List[Tuple[str, str]] = [
66-
("dataset", dataset),
66+
("dataset", validate_semantic_string(dataset, "dataset")),
6767
("symbols", symbols_list),
6868
("stype_in", str(stype_in_valid)),
6969
("stype_out", str(validate_enum(stype_out, SType, "stype_out"))),

databento/historical/api/timeseries.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from databento.common.deprecated import deprecated
1212
from databento.common.enums import Compression, Dataset, Encoding, Schema, SType
1313
from databento.common.parsing import datetime_to_string, optional_symbols_list_to_string
14-
from databento.common.validation import validate_enum
14+
from databento.common.validation import validate_enum, validate_semantic_string
1515
from databento.historical.api import API_VERSION
1616
from databento.historical.error import BentoWarning
1717
from databento.historical.http import BentoHttpAPI
@@ -120,7 +120,7 @@ def get_range(
120120
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
121121
schema_valid = validate_enum(schema, Schema, "schema")
122122
params: List[Tuple[str, Optional[str]]] = [
123-
("dataset", dataset),
123+
("dataset", validate_semantic_string(dataset, "dataset")),
124124
("start", datetime_to_string(start)),
125125
("end", datetime_to_string(end)),
126126
("symbols", symbols_list),
@@ -256,7 +256,7 @@ async def get_range_async(
256256
symbols_list = optional_symbols_list_to_string(symbols, stype_in_valid)
257257
schema_valid = validate_enum(schema, Schema, "schema")
258258
params: List[Tuple[str, Optional[str]]] = [
259-
("dataset", dataset),
259+
("dataset", validate_semantic_string(dataset, "dataset")),
260260
("start", datetime_to_string(start)),
261261
("end", datetime_to_string(end)),
262262
("symbols", symbols_list),

tests/test_common_validation.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
validate_gateway,
99
validate_maybe_enum,
1010
validate_path,
11+
validate_semantic_string,
1112
validate_smart_symbol,
1213
)
1314

@@ -120,3 +121,29 @@ def test_validate_smart_symbol(
120121
else:
121122
with pytest.raises(expected):
122123
validate_smart_symbol(symbol)
124+
125+
126+
@pytest.mark.parametrize(
127+
"value,expected",
128+
[
129+
pytest.param("nick", "nick"),
130+
pytest.param("", ValueError, id="empty"),
131+
pytest.param(" ", ValueError, id="whitespace"),
132+
pytest.param("foo\x00", ValueError, id="unprintable"),
133+
],
134+
)
135+
def test_validate_semantic_string(
136+
value: str,
137+
expected: Union[str, Type[Exception]],
138+
) -> None:
139+
"""
140+
Test that validate_semantic_string rejects string which are:
141+
- empty
142+
- whitespace
143+
- contain unprintable characters
144+
"""
145+
if isinstance(expected, str):
146+
assert validate_semantic_string(value, "unittest") == expected
147+
else:
148+
with pytest.raises(expected):
149+
assert validate_semantic_string(value, "")

0 commit comments

Comments
 (0)