Skip to content

Commit 8a9f14e

Browse files
committed
FIX: Filter live data by RType in Python client
1 parent d2c0981 commit 8a9f14e

File tree

3 files changed

+13
-23
lines changed

3 files changed

+13
-23
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ This release adds support for DBN v2 as well as Python v3.12.
77
#### Enhancements
88
- Added support for Python 3.12
99
- Improved the performance for stream writes in the `Live` client
10-
- Improved the performance of `DBNStore.to_ndarray` and `DBNStore.to_df` for heterogeneous DBN data
1110
- Upgraded `databento-dbn` to 0.14.2
1211
- Added `databento.common.types` module to hold common type annotations
1312

13+
#### Bug fixes
14+
- Fixed an issue where specifying an OHLCV schema in `DBNStore.to_ndarray` or `DBNStore.to_df` would not properly filter records by their interval
15+
- Fixed an issue where `DBNStore.to_ndarray` and `DBNStore.to_df` with a non-zero count could get stuck in a loop if the DBN data did not contain any records
16+
1417
#### Breaking Changes
1518
- `DBNStore` iteration and `DBNStore.replay` will upgrade DBN version 1 messages to version 2
1619
- `Live` client iteration and callbacks upgrade DBN version 1 messages to version 2

databento/common/dbnstore.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
from databento_dbn import SType
3838
from databento_dbn import Transcoder
3939
from databento_dbn import VersionUpgradePolicy
40-
from pandas.io.common import os
4140

4241
from databento.common.constants import DEFINITION_TYPE_MAX_MAP
4342
from databento.common.constants import INT64_NULL
@@ -1083,15 +1082,16 @@ def to_ndarray(
10831082
if schema is None:
10841083
raise ValueError("a schema must be specified for mixed DBN data")
10851084

1086-
schema_struct = self._schema_struct_map[schema]
1087-
schema_rtype = RType.from_schema(schema)
1085+
# Always use the latest since DBNStore iteration upgrades
1086+
schema_struct = SCHEMA_STRUCT_MAP[schema]
10881087
schema_dtype = schema_struct._dtypes
1088+
schema_rtype = RType.from_schema(schema)
1089+
schema_filter = filter(lambda r: r.rtype == schema_rtype, self)
10891090

10901091
reader = self.reader
10911092
reader.seek(self._metadata_length)
10921093
ndarray_iter = NDArrayBytesIterator(
1093-
stream=reader,
1094-
rtype=schema_rtype,
1094+
records=map(bytes, schema_filter),
10951095
dtype=schema_dtype,
10961096
count=count,
10971097
)
@@ -1234,35 +1234,22 @@ class NDArrayBytesIterator(NDArrayIterator):
12341234

12351235
def __init__(
12361236
self,
1237-
stream: IO[bytes],
1238-
rtype: RType,
1237+
records: Iterator[bytes],
12391238
dtype: list[tuple[str, str]],
12401239
count: int | None,
12411240
):
1242-
self._stream = stream
1243-
self._rtype = rtype
1241+
self._records = records
12441242
self._dtype = dtype
12451243
self._count = count
12461244
self._first_next = True
12471245

12481246
def __iter__(self) -> NDArrayIterator:
12491247
return self
12501248

1251-
def __iter_rtype__(self) -> Generator[bytes, None, None]:
1252-
while header := self._stream.read(2):
1253-
length, rtype = header[:2]
1254-
read_size = length * 4 - 2
1255-
if rtype == self._rtype:
1256-
yield header + self._stream.read(read_size)
1257-
else:
1258-
self._stream.seek(read_size, os.SEEK_CUR)
1259-
return
1260-
12611249
def __next__(self) -> np.ndarray[Any, Any]:
12621250
record_bytes = BytesIO()
12631251
num_records = 0
1264-
1265-
for record in itertools.islice(self.__iter_rtype__(), self._count):
1252+
for record in itertools.islice(self._records, self._count):
12661253
num_records += 1
12671254
record_bytes.write(record)
12681255

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def fixture_live_test_data(
142142
live_test_data_path: pathlib.Path,
143143
) -> bytes:
144144
"""
145-
Fixture to retrieve stub test data.
145+
Fixture to retrieve live stub test data.
146146
147147
Returns
148148
-------

0 commit comments

Comments
 (0)