Skip to content

Commit f4e2a13

Browse files
authored
VER: Release 0.22.1
See release notes.
2 parents 3247f49 + c2814b3 commit f4e2a13

File tree

7 files changed

+313
-121
lines changed

7 files changed

+313
-121
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## 0.22.1 - 2023-10-24
4+
5+
#### Bug fixes
6+
- Fixed an issue where `DBNStore.to_csv` and `DBNStore.to_json` were mapping symbols even when `map_symbols` was set to `False`
7+
- Fixed an issue where empty symbology mappings caused a `ValueError` when loading symbols into the `DBNStore` instrument map
8+
39
## 0.22.0 - 2023-10-23
410

511
#### Enhancements

databento/common/dbnstore.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
from databento.common.error import BentoError
3737
from databento.common.iterator import chunk
3838
from databento.common.symbology import InstrumentMap
39-
from databento.common.symbology import SymbolInterval
4039
from databento.common.validation import validate_enum
4140
from databento.common.validation import validate_file_write_path
4241
from databento.common.validation import validate_maybe_enum
@@ -812,23 +811,13 @@ def to_csv(
812811
raise ValueError("a schema must be specified for mixed DBN data")
813812
schema = self.schema
814813

815-
record_type = SCHEMA_STRUCT_MAP[schema]
816-
record_iter = filter(lambda r: isinstance(r, record_type), self)
817-
818-
if map_symbols:
819-
self._instrument_map.insert_metadata(self.metadata)
820-
symbol_map = self._instrument_map._data
821-
else:
822-
symbol_map = None
823-
824814
with open(path, "xb") as output:
825815
self._transcode(
826816
output=output,
827-
records_iter=record_iter,
828817
encoding=Encoding.CSV,
829818
pretty_px=pretty_px,
830819
pretty_ts=pretty_ts,
831-
symbol_map=symbol_map,
820+
map_symbols=map_symbols,
832821
compression=compression,
833822
schema=schema,
834823
)
@@ -1025,23 +1014,13 @@ def to_json(
10251014
raise ValueError("a schema must be specified for mixed DBN data")
10261015
schema = self.schema
10271016

1028-
record_type = SCHEMA_STRUCT_MAP[schema]
1029-
record_iter = filter(lambda r: isinstance(r, record_type), self)
1030-
1031-
if map_symbols:
1032-
self._instrument_map.insert_metadata(self.metadata)
1033-
symbol_map = self._instrument_map._data
1034-
else:
1035-
symbol_map = None
1036-
10371017
with open(path, "xb") as output:
10381018
self._transcode(
10391019
output=output,
1040-
records_iter=record_iter,
10411020
encoding=Encoding.JSON,
10421021
pretty_px=pretty_px,
10431022
pretty_ts=pretty_ts,
1044-
symbol_map=symbol_map,
1023+
map_symbols=map_symbols,
10451024
compression=compression,
10461025
schema=schema,
10471026
)
@@ -1114,27 +1093,33 @@ def to_ndarray(
11141093
def _transcode(
11151094
self,
11161095
output: BinaryIO,
1117-
records_iter: Iterator[DBNRecord],
11181096
encoding: Encoding,
11191097
pretty_px: bool,
11201098
pretty_ts: bool,
1121-
symbol_map: dict[int, list[SymbolInterval]] | None,
1099+
map_symbols: bool,
11221100
compression: Compression,
11231101
schema: Schema,
11241102
) -> None:
1103+
if map_symbols:
1104+
self._instrument_map.insert_metadata(self.metadata)
1105+
symbol_map = self._instrument_map._data
1106+
else:
1107+
symbol_map = None
1108+
11251109
transcoder = Transcoder(
11261110
file=output,
11271111
encoding=encoding,
11281112
compression=compression,
11291113
pretty_px=pretty_px,
11301114
pretty_ts=pretty_ts,
1115+
map_symbols=map_symbols,
11311116
has_metadata=True,
11321117
symbol_map=symbol_map, # type: ignore [arg-type]
11331118
schema=schema,
11341119
)
11351120

11361121
transcoder.write(bytes(self.metadata))
1137-
for records in chunk(records_iter, 2**16):
1122+
for records in chunk(self, 2**16):
11381123
for record in records:
11391124
transcoder.write(bytes(record))
11401125
transcoder.flush()

databento/common/symbology.py

Lines changed: 70 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,23 @@
1919
ALL_SYMBOLS = "ALL_SYMBOLS"
2020

2121

22-
class SymbolInterval(NamedTuple):
22+
class MappingInterval(NamedTuple):
2323
"""
2424
Interval inside which a symbol is defined.
2525
2626
Attributes
2727
----------
28-
start: dt.date
28+
start_date: dt.date
2929
The start time of the interval.
30-
end: dt.date
30+
end_date: dt.date
3131
The end time of the interval (exclusive).
3232
symbol: str
3333
The string symbol.
3434
3535
"""
3636

37-
start: dt.date
38-
end: dt.date
37+
start_date: dt.date
38+
end_date: dt.date
3939
symbol: str
4040

4141

@@ -60,7 +60,7 @@ class InstrumentMap:
6060
)
6161

6262
def __init__(self) -> None:
63-
self._data: dict[int, list[SymbolInterval]] = defaultdict(list)
63+
self._data: dict[int, list[MappingInterval]] = defaultdict(list)
6464

6565
def clear(self) -> None:
6666
"""
@@ -96,7 +96,7 @@ def resolve(
9696
"""
9797
mappings = self._data[instrument_id]
9898
for entry in mappings:
99-
if entry.start <= date < entry.end:
99+
if entry.start_date <= date < entry.end_date:
100100
return entry.symbol
101101
return None
102102

@@ -119,17 +119,14 @@ def insert_metadata(self, metadata: Metadata) -> None:
119119
# Nothing to do
120120
return
121121

122-
if SType(metadata.stype_in) == SType.INSTRUMENT_ID:
123-
inverse = True
124-
elif SType(metadata.stype_out) == SType.INSTRUMENT_ID:
125-
inverse = False
126-
else:
127-
raise ValueError(
128-
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
129-
)
122+
stype_in = SType(metadata.stype_in)
123+
stype_out = SType(metadata.stype_out)
130124

131-
for in_symbol, entries in metadata.mappings.items():
125+
for symbol_in, entries in metadata.mappings.items():
132126
for entry in entries:
127+
if not entry["symbol"]:
128+
continue # skip empty symbol mapping
129+
133130
try:
134131
start_date = pd.Timestamp(entry["start_date"], tz="utc").date()
135132
end_date = pd.Timestamp(entry["end_date"], tz="utc").date()
@@ -138,28 +135,18 @@ def insert_metadata(self, metadata: Metadata) -> None:
138135
f"failed to parse date range from start_date={entry['start_date']} end_date={entry['end_date']}",
139136
)
140137

141-
if inverse:
142-
try:
143-
instrument_id = int(in_symbol)
144-
except TypeError:
145-
raise ValueError(
146-
f"failed to parse `{in_symbol}` as an instrument_id",
147-
)
148-
symbol = entry["symbol"]
149-
else:
150-
try:
151-
instrument_id = int(entry["symbol"])
152-
except TypeError:
153-
raise ValueError(
154-
f"failed to parse `{entry['symbol']}` as an instrument_id",
155-
)
156-
symbol = in_symbol
138+
symbol, instrument_id = _resolve_mapping_tuple(
139+
symbol_in=symbol_in,
140+
stype_in=stype_in,
141+
symbol_out=entry["symbol"],
142+
stype_out=stype_out,
143+
)
157144

158145
self._insert_inverval(
159146
instrument_id,
160-
SymbolInterval(
161-
start=start_date,
162-
end=end_date,
147+
MappingInterval(
148+
start_date=start_date,
149+
end_date=end_date,
163150
symbol=symbol,
164151
),
165152
)
@@ -201,9 +188,9 @@ def insert_symbol_mapping_msg(
201188

202189
self._insert_inverval(
203190
msg.hd.instrument_id,
204-
SymbolInterval(
205-
start=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
206-
end=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
191+
MappingInterval(
192+
start_date=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
193+
end_date=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
207194
symbol=symbol,
208195
),
209196
)
@@ -243,25 +230,22 @@ def insert_json(
243230
if not all(k in mapping for k in self.SYMBOLOGY_RESOLVE_KEYS):
244231
raise ValueError("mapping must contain a complete symbology.resolve result")
245232

246-
if SType(mapping["stype_in"]) == SType.INSTRUMENT_ID:
247-
inverse = True
248-
elif SType(mapping["stype_out"]) == SType.INSTRUMENT_ID:
249-
inverse = False
250-
else:
251-
raise ValueError(
252-
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
253-
)
254-
255233
if not isinstance(mapping["result"], dict):
256234
raise ValueError("`result` is not a valid symbology mapping")
257235

258-
for in_symbol, entries in mapping["result"].items():
236+
stype_in = SType(mapping["stype_in"])
237+
stype_out = SType(mapping["stype_out"])
238+
239+
for symbol_in, entries in mapping["result"].items():
259240
for entry in entries:
260241
if not all(k in entry for k in self.SYMBOLOGY_RESULT_KEYS):
261242
raise ValueError(
262243
"`result` contents must contain `d0`, `d1`, and `s` keys",
263244
)
264245

246+
if not entry["s"]:
247+
continue # skip empty symbol mapping
248+
265249
try:
266250
start_date = pd.Timestamp(entry["d0"], tz="utc").date()
267251
end_date = pd.Timestamp(entry["d1"], tz="utc").date()
@@ -270,33 +254,23 @@ def insert_json(
270254
f"failed to parse date range from d0={entry['d0']} d1={entry['d1']}",
271255
)
272256

273-
if inverse:
274-
try:
275-
instrument_id = int(in_symbol)
276-
except TypeError:
277-
raise ValueError(
278-
f"failed to parse `{in_symbol}` as an instrument_id",
279-
)
280-
symbol = entry["s"]
281-
else:
282-
try:
283-
instrument_id = int(entry["s"])
284-
except TypeError:
285-
raise ValueError(
286-
f"failed to parse `{entry['s']}` as an instrument_id",
287-
)
288-
symbol = in_symbol
257+
symbol, instrument_id = _resolve_mapping_tuple(
258+
symbol_in=symbol_in,
259+
stype_in=stype_in,
260+
symbol_out=entry["s"],
261+
stype_out=stype_out,
262+
)
289263

290264
self._insert_inverval(
291265
instrument_id,
292-
SymbolInterval(
293-
start=start_date,
294-
end=end_date,
266+
MappingInterval(
267+
start_date=start_date,
268+
end_date=end_date,
295269
symbol=symbol,
296270
),
297271
)
298272

299-
def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None:
273+
def _insert_inverval(self, instrument_id: int, interval: MappingInterval) -> None:
300274
"""
301275
Insert a SymbolInterval into the map.
302276
@@ -314,3 +288,31 @@ def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None
314288
return # this mapping is already present
315289

316290
mappings.insert(insert_position, interval)
291+
292+
293+
def _resolve_mapping_tuple(
294+
symbol_in: str | int,
295+
stype_in: SType,
296+
symbol_out: str | int,
297+
stype_out: SType,
298+
) -> tuple[str, int]:
299+
if stype_in == SType.INSTRUMENT_ID:
300+
try:
301+
instrument_id = int(symbol_in)
302+
except (TypeError, ValueError):
303+
raise ValueError(
304+
f"failed to parse `{symbol_in}` as an instrument_id",
305+
)
306+
return str(symbol_out), instrument_id
307+
elif stype_out == SType.INSTRUMENT_ID:
308+
try:
309+
instrument_id = int(symbol_out)
310+
except (TypeError, ValueError):
311+
raise ValueError(
312+
f"failed to parse `{symbol_out}` as an instrument_id",
313+
)
314+
return str(symbol_in), instrument_id
315+
316+
raise ValueError(
317+
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
318+
)

databento/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.22.0"
1+
__version__ = "0.22.1"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "databento"
3-
version = "0.22.0"
3+
version = "0.22.1"
44
description = "Official Python client library for Databento"
55
authors = [
66
"Databento <[email protected]>",

0 commit comments

Comments
 (0)