Skip to content

Commit 7a38465

Browse files
committed
FIX: Change Bento iter to not use record_count
1 parent b3ee8fe commit 7a38465

File tree

4 files changed

+15
-84
lines changed

4 files changed

+15
-84
lines changed

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Changelog
22

3-
## 0.9.0 - TBD
3+
## 0.8.2 - 2023-03-10
4+
- Removed `record_count` property from Bento class
5+
- Fixed bug in `Bento` where invalid metadata would prevent iteration
6+
- Improved use of the logging module
47
- Changed `metadata.get_dataset_condition` response to a list of condition per date
58

69
## 0.8.1 - 2023-03-05

databento/common/bento.py

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,6 @@ class Bento:
269269
The raw compressed data in bytes.
270270
reader : IO[bytes]
271271
A zstd decompression stream.
272-
record_count : int
273-
The record count.
274272
schema : Schema
275273
The data record schema.
276274
start : pd.Timestamp
@@ -347,17 +345,17 @@ def __init__(self, data_source: DataSource) -> None:
347345

348346
def __iter__(self) -> Generator[np.void, None, None]:
349347
reader = self.reader
350-
for _ in range(self.record_count):
348+
while True:
351349
raw = reader.read(self.record_size)
352-
rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
353-
yield rec[0]
354-
355-
def __len__(self) -> int:
356-
return self.record_count
350+
if raw:
351+
rec = np.frombuffer(raw, dtype=STRUCT_MAP[self.schema])
352+
yield rec[0]
353+
else:
354+
break
357355

358356
def __repr__(self) -> str:
359357
name = self.__class__.__name__
360-
return f"<{name}(schema={self.schema}, record_count={self.record_count})>"
358+
return f"<{name}(schema={self.schema})>"
361359

362360
def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
363361
df.index = pd.to_datetime(df.index, utc=True)
@@ -608,18 +606,6 @@ def reader(self) -> IO[bytes]:
608606
reader.seek(self._metadata_length)
609607
return reader
610608

611-
@property
612-
def record_count(self) -> int:
613-
"""
614-
Return the record count.
615-
616-
Returns
617-
-------
618-
int
619-
620-
"""
621-
return self._metadata["record_count"]
622-
623609
@property
624610
def schema(self) -> Schema:
625611
"""

notebooks/quickstart.ipynb

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -688,32 +688,6 @@
688688
"data.compression"
689689
]
690690
},
691-
{
692-
"cell_type": "code",
693-
"execution_count": null,
694-
"id": "97bc0031-be8b-43cd-b9a4-1c5d2c388a28",
695-
"metadata": {
696-
"pycharm": {
697-
"name": "#%%\n"
698-
},
699-
"tags": []
700-
},
701-
"outputs": [
702-
{
703-
"data": {
704-
"text/plain": [
705-
"1000"
706-
]
707-
},
708-
"execution_count": null,
709-
"metadata": {},
710-
"output_type": "execute_result"
711-
}
712-
],
713-
"source": [
714-
"data.record_count"
715-
]
716-
},
717691
{
718692
"cell_type": "code",
719693
"execution_count": null,

tests/test_historical_bento.py

Lines changed: 4 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def test_bento_given_initial_nbytes_returns_expected_metadata() -> None:
116116
assert bento.start == pd.Timestamp("2020-12-28 13:00:00+0000", tz="UTC")
117117
assert bento.end == pd.Timestamp("2020-12-29 13:00:00+0000", tz="UTC")
118118
assert bento.limit == 2
119-
assert bento.record_count == 2
119+
assert len(bento.to_ndarray()) == 2
120120
assert bento.mappings == {
121121
"ESH1": [
122122
{
@@ -443,7 +443,7 @@ def test_from_dbn_alias() -> None:
443443

444444
# Assert
445445
assert data.schema == Schema.MBO
446-
assert data.record_count == 2
446+
assert len(data.to_ndarray()) == 2
447447

448448

449449
def test_mbo_to_csv_writes_expected_file_to_disk(tmp_path: Path) -> None:
@@ -671,38 +671,6 @@ def test_mbp_1_to_json_with_all_options_writes_expected_file_to_disk(
671671
)
672672

673673

674-
@pytest.mark.parametrize(
675-
"schema",
676-
[
677-
s
678-
for s in Schema
679-
if s
680-
not in (
681-
Schema.OHLCV_1H,
682-
Schema.OHLCV_1D,
683-
Schema.STATUS,
684-
Schema.STATISTICS,
685-
Schema.DEFINITION,
686-
Schema.GATEWAY_ERROR,
687-
Schema.SYMBOL_MAPPING,
688-
)
689-
],
690-
)
691-
def test_bento_len(schema: Schema) -> None:
692-
"""
693-
Check that calling `len()` on a Bento returns
694-
the record count.
695-
"""
696-
# Arrange
697-
stub_data = get_test_data(schema=schema)
698-
699-
# Act
700-
bento = Bento.from_bytes(data=stub_data)
701-
702-
# Assert
703-
assert len(bento) == bento.record_count
704-
705-
706674
@pytest.mark.parametrize(
707675
"schema",
708676
[
@@ -732,7 +700,7 @@ def test_bento_repr(schema: Schema) -> None:
732700
bento = Bento.from_bytes(data=stub_data)
733701

734702
# Assert
735-
assert repr(bento) == f"<Bento(schema={schema}, record_count={bento.record_count})>"
703+
assert repr(bento) == f"<Bento(schema={schema})>"
736704

737705

738706
def test_bento_iterable() -> None:
@@ -820,6 +788,6 @@ def test_bento_compression_equality(schema: Schema) -> None:
820788
zstd_bento = Bento.from_bytes(zstd_stub_data)
821789
dbn_bento = Bento.from_bytes(dbn_stub_data)
822790

823-
assert zstd_bento.record_count == dbn_bento.record_count
791+
assert len(zstd_bento.to_ndarray()) == len(dbn_bento.to_ndarray())
824792
assert zstd_bento.metadata == dbn_bento.metadata
825793
assert zstd_bento.reader.read() == dbn_bento.reader.read()

0 commit comments

Comments
 (0)