Skip to content

Commit f361be3

Browse files
committed
MOD: Upgrade Python client to databento_dbn 0.14.2
1 parent 17f1a02 commit f361be3

File tree

69 files changed

+799
-466
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+799
-466
lines changed

CHANGELOG.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,28 @@
11
# Changelog
22

3+
## 0.24.0 - TBD
4+
5+
This release adds support for DBN v2.
6+
7+
#### Enhancements
8+
- Improved the performance for stream writes in the `Live` client
9+
- Upgraded `databento-dbn` to 0.14.2
10+
- Added `databento.common.types` module to hold common type annotations
11+
12+
#### Breaking Changes
13+
- `DBNStore` iteration and `DBNStore.replay` will upgrade DBN version 1 messages to version 2
14+
- `Live` client iteration and callbacks upgrade DBN version 1 messages to version 2
15+
- Moved `DBNRecord`, `RecordCallback`, and `ExceptionCallback` types to them `databento.common.types` module
16+
- Moved `AUTH_TIMEOUT_SECONDS` and `CONNECT_TIMEOUT_SECONDS` constants from the `databento.live` module to `databento.live.session`
17+
- Moved `INT64_NULL` from the `databento.common.dbnstore` module to `databento.common.constants`
18+
- Moved `SCHEMA_STRUCT_MAP` from the `databento.common.data` module to `databento.common.constants`
19+
- Removed `schema` parameter from `DataFrameIterator` constructor, `struct_type` is to be used instead
20+
- Removed `NON_SCHEMA_RECORD_TYPES` constant as it is no longer used
21+
- Removed `DERIV_SCHEMAS` constant as it is no longer used
22+
- Removed `SCHEMA_COLUMNS` constant as it is no longer used
23+
- Removed `SCHEMA_DTYPES_MAP` constant as it is no longer used
24+
- Removed empty `databento.common.data` module
25+
326
## 0.23.1 - 2023-11-10
427

528
#### Enhancements

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
3232
The minimum dependencies as found in the `pyproject.toml` are also listed below:
3333
- python = "^3.8"
3434
- aiohttp = "^3.8.3"
35-
- databento-dbn = "0.13.0"
35+
- databento-dbn = "0.14.2"
3636
- numpy= ">=1.23.5"
3737
- pandas = ">=1.5.3"
3838
- requests = ">=2.24.0"

databento/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@
3737
from databento.common.publishers import Publisher
3838
from databento.common.publishers import Venue
3939
from databento.common.symbology import InstrumentMap
40+
from databento.common.types import DBNRecord
4041
from databento.historical.api import API_VERSION
4142
from databento.historical.client import Historical
42-
from databento.live import DBNRecord
4343
from databento.live.client import Live
4444
from databento.version import __version__ # noqa
4545

databento/common/constants.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,59 @@
1-
ALL_SYMBOLS = "ALL_SYMBOLS"
1+
from __future__ import annotations
2+
3+
from typing import Final
4+
5+
import numpy as np
6+
from databento_dbn import ImbalanceMsg
7+
from databento_dbn import InstrumentDefMsg
8+
from databento_dbn import InstrumentDefMsgV1
9+
from databento_dbn import MBOMsg
10+
from databento_dbn import MBP1Msg
11+
from databento_dbn import MBP10Msg
12+
from databento_dbn import OHLCVMsg
13+
from databento_dbn import Schema
14+
from databento_dbn import StatMsg
15+
from databento_dbn import TradeMsg
16+
17+
from databento.common.types import DBNRecord
18+
19+
20+
ALL_SYMBOLS: Final = "ALL_SYMBOLS"
21+
22+
23+
DEFINITION_TYPE_MAX_MAP: Final = {
24+
x[0]: np.iinfo(x[1]).max
25+
for x in InstrumentDefMsg._dtypes
26+
if not isinstance(x[1], str)
27+
}
28+
29+
INT64_NULL: Final = 9223372036854775807
30+
31+
SCHEMA_STRUCT_MAP: Final[dict[Schema, type[DBNRecord]]] = {
32+
Schema.DEFINITION: InstrumentDefMsg,
33+
Schema.IMBALANCE: ImbalanceMsg,
34+
Schema.MBO: MBOMsg,
35+
Schema.MBP_1: MBP1Msg,
36+
Schema.MBP_10: MBP10Msg,
37+
Schema.OHLCV_1S: OHLCVMsg,
38+
Schema.OHLCV_1M: OHLCVMsg,
39+
Schema.OHLCV_1H: OHLCVMsg,
40+
Schema.OHLCV_1D: OHLCVMsg,
41+
Schema.STATISTICS: StatMsg,
42+
Schema.TBBO: MBP1Msg,
43+
Schema.TRADES: TradeMsg,
44+
}
45+
46+
SCHEMA_STRUCT_MAP_V1: Final[dict[Schema, type[DBNRecord]]] = {
47+
Schema.DEFINITION: InstrumentDefMsgV1,
48+
Schema.IMBALANCE: ImbalanceMsg,
49+
Schema.MBO: MBOMsg,
50+
Schema.MBP_1: MBP1Msg,
51+
Schema.MBP_10: MBP10Msg,
52+
Schema.OHLCV_1S: OHLCVMsg,
53+
Schema.OHLCV_1M: OHLCVMsg,
54+
Schema.OHLCV_1H: OHLCVMsg,
55+
Schema.OHLCV_1D: OHLCVMsg,
56+
Schema.STATISTICS: StatMsg,
57+
Schema.TBBO: MBP1Msg,
58+
Schema.TRADES: TradeMsg,
59+
}

databento/common/data.py

Lines changed: 0 additions & 73 deletions
This file was deleted.

databento/common/dbnstore.py

Lines changed: 55 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -29,36 +29,26 @@
2929
from databento_dbn import Compression
3030
from databento_dbn import DBNDecoder
3131
from databento_dbn import Encoding
32-
from databento_dbn import ErrorMsg
32+
from databento_dbn import InstrumentDefMsg
33+
from databento_dbn import InstrumentDefMsgV1
3334
from databento_dbn import Metadata
3435
from databento_dbn import Schema
3536
from databento_dbn import SType
36-
from databento_dbn import SymbolMappingMsg
37-
from databento_dbn import SystemMsg
3837
from databento_dbn import Transcoder
38+
from databento_dbn import VersionUpgradePolicy
3939

40-
from databento.common.data import DEFINITION_TYPE_MAX_MAP
41-
from databento.common.data import SCHEMA_COLUMNS
42-
from databento.common.data import SCHEMA_DTYPES_MAP
43-
from databento.common.data import SCHEMA_STRUCT_MAP
40+
from databento.common.constants import DEFINITION_TYPE_MAX_MAP
41+
from databento.common.constants import INT64_NULL
42+
from databento.common.constants import SCHEMA_STRUCT_MAP
43+
from databento.common.constants import SCHEMA_STRUCT_MAP_V1
4444
from databento.common.error import BentoError
45-
from databento.common.iterator import chunk
4645
from databento.common.symbology import InstrumentMap
46+
from databento.common.types import DBNRecord
4747
from databento.common.validation import validate_enum
4848
from databento.common.validation import validate_file_write_path
4949
from databento.common.validation import validate_maybe_enum
50-
from databento.live import DBNRecord
5150

5251

53-
NON_SCHEMA_RECORD_TYPES = [
54-
ErrorMsg,
55-
SymbolMappingMsg,
56-
SystemMsg,
57-
Metadata,
58-
]
59-
60-
INT64_NULL = 9223372036854775807
61-
6252
logger = logging.getLogger(__name__)
6353

6454
if TYPE_CHECKING:
@@ -380,7 +370,9 @@ def __init__(self, data_source: DataSource) -> None:
380370

381371
def __iter__(self) -> Generator[DBNRecord, None, None]:
382372
reader = self.reader
383-
decoder = DBNDecoder()
373+
decoder = DBNDecoder(
374+
upgrade_policy=VersionUpgradePolicy.UPGRADE,
375+
)
384376
while True:
385377
raw = reader.read(DBNStore.DBN_READ_SIZE)
386378
if raw:
@@ -936,8 +928,8 @@ def to_df(
936928

937929
df_iter = DataFrameIterator(
938930
records=records,
939-
schema=schema,
940931
count=count,
932+
struct_type=self._schema_struct_map[schema],
941933
instrument_map=self._instrument_map,
942934
price_type=price_type,
943935
pretty_ts=pretty_ts,
@@ -1084,13 +1076,13 @@ def to_ndarray(
10841076
ndarray_iter: NDArrayIterator
10851077

10861078
if self.schema is None:
1087-
# If schema is None, we're handling heterogeneous data from the live client.
1088-
# This is less performant because the records of a given schema are not contiguous in memory.
1079+
# If schema is None, we're handling heterogeneous data from the live client
1080+
# This is less performant because the records of a given schema are not contiguous in memory
10891081
if schema is None:
10901082
raise ValueError("a schema must be specified for mixed DBN data")
10911083

1092-
schema_struct = SCHEMA_STRUCT_MAP[schema]
1093-
schema_dtype = SCHEMA_DTYPES_MAP[schema]
1084+
schema_struct = self._schema_struct_map[schema]
1085+
schema_dtype = schema_struct._dtypes
10941086
schema_filter = filter(lambda r: isinstance(r, schema_struct), self)
10951087

10961088
ndarray_iter = NDArrayBytesIterator(
@@ -1099,8 +1091,9 @@ def to_ndarray(
10991091
count=count,
11001092
)
11011093
else:
1102-
# If schema is set, we're handling homogeneous historical data.
1103-
schema_dtype = SCHEMA_DTYPES_MAP[self.schema]
1094+
# If schema is set, we're handling homogeneous historical data
1095+
schema_struct = self._schema_struct_map[self.schema]
1096+
schema_dtype = schema_struct._dtypes
11041097

11051098
if self._metadata.ts_out:
11061099
schema_dtype.append(("ts_out", "u8"))
@@ -1145,15 +1138,36 @@ def _transcode(
11451138
pretty_ts=pretty_ts,
11461139
has_metadata=True,
11471140
map_symbols=map_symbols,
1148-
symbol_map=symbol_map, # type: ignore [arg-type]
1141+
symbol_interval_map=symbol_map, # type: ignore [arg-type]
11491142
schema=schema,
11501143
)
11511144

1152-
transcoder.write(bytes(self.metadata))
1153-
for records in chunk(self, 2**16):
1154-
for record in records:
1155-
transcoder.write(bytes(record))
1156-
transcoder.flush()
1145+
reader = self.reader
1146+
transcoder.write(reader.read(self._metadata_length))
1147+
while byte_chunk := reader.read(2**16):
1148+
transcoder.write(byte_chunk)
1149+
1150+
if transcoder.buffer():
1151+
raise BentoError(
1152+
"DBN file is truncated or contains an incomplete record",
1153+
)
1154+
1155+
transcoder.flush()
1156+
1157+
@property
1158+
def _schema_struct_map(self) -> dict[Schema, type[DBNRecord]]:
1159+
"""
1160+
Return a mapping of Schema variants to DBNRecord types based on the DBN
1161+
metadata version.
1162+
1163+
Returns
1164+
-------
1165+
dict[Schema, type[DBNRecord]]
1166+
1167+
"""
1168+
if self.metadata.version == 1:
1169+
return SCHEMA_STRUCT_MAP_V1
1170+
return SCHEMA_STRUCT_MAP
11571171

11581172

11591173
class NDArrayIterator(Protocol):
@@ -1263,31 +1277,30 @@ def __init__(
12631277
self,
12641278
records: Iterator[np.ndarray[Any, Any]],
12651279
count: int | None,
1266-
schema: Schema,
1280+
struct_type: type[DBNRecord],
12671281
instrument_map: InstrumentMap,
12681282
price_type: Literal["fixed", "float", "decimal"] = "float",
12691283
pretty_ts: bool = True,
12701284
map_symbols: bool = True,
12711285
):
12721286
self._records = records
1273-
self._schema = schema
12741287
self._count = count
1288+
self._struct_type = struct_type
12751289
self._price_type = price_type
12761290
self._pretty_ts = pretty_ts
12771291
self._map_symbols = map_symbols
12781292
self._instrument_map = instrument_map
1279-
self._struct = SCHEMA_STRUCT_MAP[schema]
12801293

12811294
def __iter__(self) -> DataFrameIterator:
12821295
return self
12831296

12841297
def __next__(self) -> pd.DataFrame:
12851298
df = pd.DataFrame(
12861299
next(self._records),
1287-
columns=SCHEMA_COLUMNS[self._schema],
1300+
columns=self._struct_type._ordered_fields,
12881301
)
12891302

1290-
if self._schema == Schema.DEFINITION:
1303+
if self._struct_type in (InstrumentDefMsg, InstrumentDefMsgV1):
12911304
self._format_definition_fields(df)
12921305

12931306
self._format_hidden_fields(df)
@@ -1310,8 +1323,8 @@ def _format_definition_fields(self, df: pd.DataFrame) -> None:
13101323
df[column] = df[column].where(df[column] != type_max, np.nan)
13111324

13121325
def _format_hidden_fields(self, df: pd.DataFrame) -> None:
1313-
for column, dtype in SCHEMA_DTYPES_MAP[self._schema]:
1314-
hidden_fields = self._struct._hidden_fields
1326+
for column, dtype in self._struct_type._dtypes:
1327+
hidden_fields = self._struct_type._hidden_fields
13151328
if dtype.startswith("S") and column not in hidden_fields:
13161329
df[column] = df[column].str.decode("utf-8")
13171330

@@ -1328,7 +1341,7 @@ def _format_px(
13281341
df: pd.DataFrame,
13291342
price_type: Literal["fixed", "float", "decimal"],
13301343
) -> None:
1331-
px_fields = self._struct._price_fields
1344+
px_fields = self._struct_type._price_fields
13321345

13331346
if price_type == "decimal":
13341347
for field in px_fields:
@@ -1343,11 +1356,9 @@ def _format_px(
13431356
return # do nothing
13441357

13451358
def _format_pretty_ts(self, df: pd.DataFrame) -> None:
1346-
for field in self._struct._timestamp_fields:
1359+
for field in self._struct_type._timestamp_fields:
13471360
df[field] = pd.to_datetime(df[field], utc=True, errors="coerce")
13481361

13491362
def _format_set_index(self, df: pd.DataFrame) -> None:
1350-
index_column = (
1351-
"ts_event" if self._schema.value.startswith("ohlcv") else "ts_recv"
1352-
)
1363+
index_column = self._struct_type._ordered_fields[0]
13531364
df.set_index(index_column, inplace=True)

0 commit comments

Comments
 (0)