Skip to content

Commit aa77759

Browse files
committed
FIX: Fix empty symbology in InstrumentMap
1 parent e179b11 commit aa77759

File tree

3 files changed

+277
-75
lines changed

3 files changed

+277
-75
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#### Bug fixes
66
- Fixed an issue where `DBNStore.to_csv` and `DBNStore.to_json` were mapping symbols even when `map_symbols` was set to `False`
7+
- Fixed an issue where empty symbology mappings caused a `ValueError` when loading symbols into the `DBNStore` instrument map
78

89
## 0.22.0 - 2023-10-23
910

databento/common/symbology.py

Lines changed: 70 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -19,23 +19,23 @@
1919
ALL_SYMBOLS = "ALL_SYMBOLS"
2020

2121

22-
class SymbolInterval(NamedTuple):
22+
class MappingInterval(NamedTuple):
2323
"""
2424
Interval inside which a symbol is defined.
2525
2626
Attributes
2727
----------
28-
start: dt.date
28+
start_date: dt.date
2929
The start time of the interval.
30-
end: dt.date
30+
end_date: dt.date
3131
The end time of the interval (exclusive).
3232
symbol: str
3333
The string symbol.
3434
3535
"""
3636

37-
start: dt.date
38-
end: dt.date
37+
start_date: dt.date
38+
end_date: dt.date
3939
symbol: str
4040

4141

@@ -60,7 +60,7 @@ class InstrumentMap:
6060
)
6161

6262
def __init__(self) -> None:
63-
self._data: dict[int, list[SymbolInterval]] = defaultdict(list)
63+
self._data: dict[int, list[MappingInterval]] = defaultdict(list)
6464

6565
def clear(self) -> None:
6666
"""
@@ -96,7 +96,7 @@ def resolve(
9696
"""
9797
mappings = self._data[instrument_id]
9898
for entry in mappings:
99-
if entry.start <= date < entry.end:
99+
if entry.start_date <= date < entry.end_date:
100100
return entry.symbol
101101
return None
102102

@@ -119,17 +119,14 @@ def insert_metadata(self, metadata: Metadata) -> None:
119119
# Nothing to do
120120
return
121121

122-
if SType(metadata.stype_in) == SType.INSTRUMENT_ID:
123-
inverse = True
124-
elif SType(metadata.stype_out) == SType.INSTRUMENT_ID:
125-
inverse = False
126-
else:
127-
raise ValueError(
128-
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
129-
)
122+
stype_in = SType(metadata.stype_in)
123+
stype_out = SType(metadata.stype_out)
130124

131-
for in_symbol, entries in metadata.mappings.items():
125+
for symbol_in, entries in metadata.mappings.items():
132126
for entry in entries:
127+
if not entry["symbol"]:
128+
continue # skip empty symbol mapping
129+
133130
try:
134131
start_date = pd.Timestamp(entry["start_date"], tz="utc").date()
135132
end_date = pd.Timestamp(entry["end_date"], tz="utc").date()
@@ -138,28 +135,18 @@ def insert_metadata(self, metadata: Metadata) -> None:
138135
f"failed to parse date range from start_date={entry['start_date']} end_date={entry['end_date']}",
139136
)
140137

141-
if inverse:
142-
try:
143-
instrument_id = int(in_symbol)
144-
except TypeError:
145-
raise ValueError(
146-
f"failed to parse `{in_symbol}` as an instrument_id",
147-
)
148-
symbol = entry["symbol"]
149-
else:
150-
try:
151-
instrument_id = int(entry["symbol"])
152-
except TypeError:
153-
raise ValueError(
154-
f"failed to parse `{entry['symbol']}` as an instrument_id",
155-
)
156-
symbol = in_symbol
138+
symbol, instrument_id = _resolve_mapping_tuple(
139+
symbol_in=symbol_in,
140+
stype_in=stype_in,
141+
symbol_out=entry["symbol"],
142+
stype_out=stype_out,
143+
)
157144

158145
self._insert_inverval(
159146
instrument_id,
160-
SymbolInterval(
161-
start=start_date,
162-
end=end_date,
147+
MappingInterval(
148+
start_date=start_date,
149+
end_date=end_date,
163150
symbol=symbol,
164151
),
165152
)
@@ -201,9 +188,9 @@ def insert_symbol_mapping_msg(
201188

202189
self._insert_inverval(
203190
msg.hd.instrument_id,
204-
SymbolInterval(
205-
start=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
206-
end=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
191+
MappingInterval(
192+
start_date=pd.Timestamp(start_ts, unit="ns", tz="utc").date(),
193+
end_date=pd.Timestamp(end_ts, unit="ns", tz="utc").date(),
207194
symbol=symbol,
208195
),
209196
)
@@ -243,25 +230,22 @@ def insert_json(
243230
if not all(k in mapping for k in self.SYMBOLOGY_RESOLVE_KEYS):
244231
raise ValueError("mapping must contain a complete symbology.resolve result")
245232

246-
if SType(mapping["stype_in"]) == SType.INSTRUMENT_ID:
247-
inverse = True
248-
elif SType(mapping["stype_out"]) == SType.INSTRUMENT_ID:
249-
inverse = False
250-
else:
251-
raise ValueError(
252-
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
253-
)
254-
255233
if not isinstance(mapping["result"], dict):
256234
raise ValueError("`result` is not a valid symbology mapping")
257235

258-
for in_symbol, entries in mapping["result"].items():
236+
stype_in = SType(mapping["stype_in"])
237+
stype_out = SType(mapping["stype_out"])
238+
239+
for symbol_in, entries in mapping["result"].items():
259240
for entry in entries:
260241
if not all(k in entry for k in self.SYMBOLOGY_RESULT_KEYS):
261242
raise ValueError(
262243
"`result` contents must contain `d0`, `d1`, and `s` keys",
263244
)
264245

246+
if not entry["s"]:
247+
continue # skip empty symbol mapping
248+
265249
try:
266250
start_date = pd.Timestamp(entry["d0"], tz="utc").date()
267251
end_date = pd.Timestamp(entry["d1"], tz="utc").date()
@@ -270,33 +254,23 @@ def insert_json(
270254
f"failed to parse date range from d0={entry['d0']} d1={entry['d1']}",
271255
)
272256

273-
if inverse:
274-
try:
275-
instrument_id = int(in_symbol)
276-
except TypeError:
277-
raise ValueError(
278-
f"failed to parse `{in_symbol}` as an instrument_id",
279-
)
280-
symbol = entry["s"]
281-
else:
282-
try:
283-
instrument_id = int(entry["s"])
284-
except TypeError:
285-
raise ValueError(
286-
f"failed to parse `{entry['s']}` as an instrument_id",
287-
)
288-
symbol = in_symbol
257+
symbol, instrument_id = _resolve_mapping_tuple(
258+
symbol_in=symbol_in,
259+
stype_in=stype_in,
260+
symbol_out=entry["s"],
261+
stype_out=stype_out,
262+
)
289263

290264
self._insert_inverval(
291265
instrument_id,
292-
SymbolInterval(
293-
start=start_date,
294-
end=end_date,
266+
MappingInterval(
267+
start_date=start_date,
268+
end_date=end_date,
295269
symbol=symbol,
296270
),
297271
)
298272

299-
def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None:
273+
def _insert_inverval(self, instrument_id: int, interval: MappingInterval) -> None:
300274
"""
301275
Insert a SymbolInterval into the map.
302276
@@ -314,3 +288,31 @@ def _insert_inverval(self, instrument_id: int, interval: SymbolInterval) -> None
314288
return # this mapping is already present
315289

316290
mappings.insert(insert_position, interval)
291+
292+
293+
def _resolve_mapping_tuple(
294+
symbol_in: str | int,
295+
stype_in: SType,
296+
symbol_out: str | int,
297+
stype_out: SType,
298+
) -> tuple[str, int]:
299+
if stype_in == SType.INSTRUMENT_ID:
300+
try:
301+
instrument_id = int(symbol_in)
302+
except (TypeError, ValueError):
303+
raise ValueError(
304+
f"failed to parse `{symbol_in}` as an instrument_id",
305+
)
306+
return str(symbol_out), instrument_id
307+
elif stype_out == SType.INSTRUMENT_ID:
308+
try:
309+
instrument_id = int(symbol_out)
310+
except (TypeError, ValueError):
311+
raise ValueError(
312+
f"failed to parse `{symbol_out}` as an instrument_id",
313+
)
314+
return str(symbol_in), instrument_id
315+
316+
raise ValueError(
317+
"either `stype_out` or `stype_in` must be `instrument_id` to insert",
318+
)

0 commit comments

Comments
 (0)