Skip to content

Commit 9c32915

Browse files
committed
MOD: Integrate databento-dbn with python client
1 parent 1fb3ddf commit 9c32915

19 files changed

+813
-691
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.7 and
3131
The minimum dependencies as found in the `requirements.txt` are also listed below:
3232
- Python (>=3.7)
3333
- aiohttp (>=3.7.2)
34-
- dbz-python (>=0.2.1)
34+
- databento-dbn (>=0.3.2)
3535
- numpy (>=1.17.0)
3636
- pandas (>=1.1.3)
3737
- requests (>=2.24.0)

databento/common/bento.py

Lines changed: 86 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,48 @@
4040
from databento.historical.client import Historical
4141

4242

43+
def is_zstandard(reader: IO[bytes]) -> bool:
44+
"""
45+
Determine if an `IO[bytes]` reader contains zstandard compressed
46+
data.
47+
48+
Parameters
49+
----------
50+
reader : IO[bytes]
51+
The data to check.
52+
53+
Returns
54+
-------
55+
bool
56+
57+
"""
58+
reader.seek(0) # ensure we read from the beginning
59+
try:
60+
zstandard.get_frame_parameters(reader.read(18))
61+
except zstandard.ZstdError:
62+
return False
63+
else:
64+
return True
65+
66+
67+
def is_dbn(reader: IO[bytes]) -> bool:
68+
"""
69+
Determine if an `IO[bytes]` reader contains dbn data.
70+
71+
Parameters
72+
----------
73+
reader : IO[bytes]
74+
The data to check.
75+
76+
Returns
77+
-------
78+
bool
79+
80+
"""
81+
reader.seek(0) # ensure we read from the beginning
82+
return reader.read(3) == b"DBN"
83+
84+
4385
class DataSource(abc.ABC):
4486
"""Abstract base class for backing Bento classes with data."""
4587

@@ -266,19 +308,35 @@ class Bento:
266308
def __init__(self, data_source: DataSource) -> None:
267309
self._data_source = data_source
268310

269-
# Check for zstd skippable frame
311+
# Check compression
270312
buffer = self._data_source.reader
271-
if not buffer.read(4).startswith(b"P*M\x18"):
272-
raise RuntimeError(f"{self._data_source.name} is not a valid DBN format")
313+
314+
if is_zstandard(buffer):
315+
self._compression = Compression.ZSTD
316+
buffer = zstandard.ZstdDecompressor().stream_reader(data_source.reader)
317+
elif is_dbn(buffer):
318+
self._compression = Compression.NONE
319+
buffer = data_source.reader
320+
else:
321+
# We don't know how to read this file
322+
raise RuntimeError(
323+
f"Could not determine compression format of {self._data_source.name}",
324+
)
325+
326+
# Get metadata length
327+
metadata_bytes = BytesIO(buffer.read(8))
328+
metadata_bytes.seek(4)
273329
metadata_length = int.from_bytes(
274-
buffer.read(4),
330+
metadata_bytes.read(4),
275331
byteorder="little",
276332
)
333+
self._metadata_length = metadata_length + 8
277334

278-
buffer.seek(0) # Rewind to read the entire header
335+
metadata_bytes.write(buffer.read(metadata_length))
279336

280-
self._metadata: Dict[str, Any] = MetadataDecoder.decode_to_json(
281-
raw_metadata=buffer.read(8 + metadata_length),
337+
# Read metadata
338+
self._metadata: Dict[str, Any] = MetadataDecoder().decode_to_json(
339+
metadata_bytes.getvalue(),
282340
)
283341

284342
# This is populated when _map_symbols is called
@@ -408,13 +466,14 @@ def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
408466
def compression(self) -> Compression:
409467
"""
410468
Return the data compression format (if any).
469+
This is determined by inspecting the data.
411470
412471
Returns
413472
-------
414473
Compression
415474
416475
"""
417-
return Compression(self._metadata["compression"])
476+
return self._compression
418477

419478
@property
420479
def dataset(self) -> str:
@@ -513,25 +572,37 @@ def raw(self) -> bytes:
513572
-------
514573
bytes
515574
575+
See Also
576+
--------
577+
Bento.reader
578+
516579
"""
517580
return self._data_source.reader.read()
518581

519582
@property
520583
def reader(self) -> IO[bytes]:
521584
"""
522-
Return an I/O reader for the data.
523-
524-
Parameters
525-
----------
526-
decompress : bool
527-
If data should be decompressed.
585+
Return an I/O reader for the DBN records.
528586
529587
Returns
530588
-------
531589
BinaryIO
532590
591+
See Also
592+
--------
593+
Bento.raw
594+
533595
"""
534-
return zstandard.ZstdDecompressor().stream_reader(self._data_source.reader)
596+
if self.compression == Compression.ZSTD:
597+
reader: IO[bytes] = zstandard.ZstdDecompressor().stream_reader(
598+
self._data_source.reader,
599+
)
600+
else:
601+
reader = self._data_source.reader
602+
603+
# Seek past the metadata to read records
604+
reader.seek(self._metadata_length)
605+
return reader
535606

536607
@property
537608
def record_count(self) -> int:

databento/common/enums.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -154,38 +154,6 @@ class Schema(StringyMixin, str, Enum):
154154
GATEWAY_ERROR = "gateway_error"
155155
SYMBOL_MAPPING = "symbol_mapping"
156156

157-
@classmethod
158-
def from_int(cls, value: int) -> "Schema":
159-
if value == 0:
160-
return cls.MBO
161-
if value == 1:
162-
return cls.MBP_1
163-
if value == 2:
164-
return cls.MBP_10
165-
if value == 3:
166-
return cls.TBBO
167-
if value == 4:
168-
return cls.TRADES
169-
if value == 5:
170-
return cls.OHLCV_1S
171-
if value == 6:
172-
return cls.OHLCV_1M
173-
if value == 7:
174-
return cls.OHLCV_1H
175-
if value == 8:
176-
return cls.OHLCV_1D
177-
if value == 9:
178-
return cls.DEFINITION
179-
if value == 10:
180-
return cls.STATISTICS
181-
if value == 11:
182-
return cls.STATUS
183-
if value == 12:
184-
return cls.GATEWAY_ERROR
185-
if value == 13:
186-
return cls.SYMBOL_MAPPING
187-
raise ValueError(f"value `{value}` is not a valid member of {cls.__name__}")
188-
189157

190158
@unique
191159
@coercible
@@ -205,14 +173,6 @@ class Compression(StringyMixin, str, Enum):
205173
NONE = "none"
206174
ZSTD = "zstd"
207175

208-
@classmethod
209-
def from_int(cls, value: int) -> "Compression":
210-
if value == 0:
211-
return cls.NONE
212-
if value == 1:
213-
return cls.ZSTD
214-
raise ValueError(f"value `{value}` is not a valid member of {cls.__name__}")
215-
216176

217177
@unique
218178
@coercible
@@ -254,16 +214,6 @@ class SType(StringyMixin, str, Enum):
254214
NATIVE = "native"
255215
SMART = "smart"
256216

257-
@classmethod
258-
def from_int(cls, value: int) -> "SType":
259-
if value == 0:
260-
return cls.PRODUCT_ID
261-
if value == 1:
262-
return cls.NATIVE
263-
if value == 2:
264-
return cls.SMART
265-
raise ValueError(f"value `{value}` is not a valid member of {cls.__name__}")
266-
267217

268218
@unique
269219
@coercible

databento/common/metadata.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Dict
22

3-
from databento.common.enums import Compression, Schema, SType
4-
from dbz_python import decode_metadata
3+
from databento_dbn import decode_metadata
54

65

76
class MetadataDecoder:
@@ -26,15 +25,17 @@ def decode_to_json(raw_metadata: bytes) -> Dict[str, object]:
2625
"""
2726
metadata = decode_metadata(raw_metadata)
2827
conversion_mapping = {
29-
"compression": lambda c: str(Compression.from_int(c)),
3028
"limit": lambda lim: None if lim == 0 else lim,
31-
"mappings": lambda m: {i["native"]: i["intervals"] for i in m},
32-
"schema": lambda s: str(Schema.from_int(s)),
33-
"stype_in": lambda s_in: str(SType.from_int(s_in)),
34-
"stype_out": lambda s_out: str(SType.from_int(s_out)),
3529
}
3630

37-
for key, conv_fn in conversion_mapping.items():
38-
metadata[key] = conv_fn(metadata[key])
31+
metadata_dict = {}
3932

40-
return metadata
33+
for field in dir(metadata):
34+
if field.startswith("__"):
35+
continue
36+
value = getattr(metadata, field)
37+
if field in conversion_mapping:
38+
value = conversion_mapping[field](value)
39+
metadata_dict[field] = value
40+
41+
return metadata_dict

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
aiohttp>=3.7.2,<4.0.0
2-
dbz-python>=0.2.1
2+
databento-dbn>=0.3.2
33
numpy>=1.17.0
44
pandas>=1.1.3
55
requests>=2.24.0
-87 Bytes
Binary file not shown.

tests/data/test_data.mbo.dbn.zst

-77 Bytes
Binary file not shown.

tests/data/test_data.mbp-1.dbn.zst

-73 Bytes
Binary file not shown.
-87 Bytes
Binary file not shown.
-63 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)