databento
diff --git a/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 5 deletions b/‎README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎databento/common/data.py‎
Lines changed: 39 additions & 9 deletions b/‎databento/common/data.py‎
Lines changed: 39 additions & 9 deletions
diff --git a/‎databento/common/dbnstore.py‎
Lines changed: 49 additions & 29 deletions b/‎databento/common/dbnstore.py‎
Lines changed: 49 additions & 29 deletions
@@ -1,5 +1,16 @@
 # Changelog
 
+## 0.13.0 - 2023-06-02
+- Added support for `statistics` schema
+- Added batch download support data files (`condition.json` and `symbology.json`)
+- Upgraded `databento-dbn` to 0.6.1
+- Renamed `booklevel` MBP field to `levels` for brevity and consistent naming
+- Changed `flags` field to an unsigned int
+- Changed default of `ts_out` to `False` for `Live` client
+- Changed `instrument_class` DataFrame representation to be consistent with other `char` types
+- Removed `open_interest_qty` and `cleared_volume` fields that were always unset from definition schema
+- Removed sunset `timeseries.stream` method
+
 ## 0.12.0 - 2023-05-01
 - Added `Live` client for connecting to Databento's live service
 - Upgraded `databento-dbn` to 0.5.0
@@ -11,6 +22,8 @@
 - Removed `bad` condition variant from `batch.get_dataset_condition`
 - Added `degraded`, `pending` and `missing` condition variants for `batch.get_dataset_condition`
 - Added `last_modified_date` field to `batch.get_dataset_condition` response
+- Renamed `product_id` field to `instrument_id`
+- Renamed `symbol` field in definitions to `raw_symbol`
 - Deprecated `SType.PRODUCT_ID` to `SType.INSTRUMENT_ID`
 - Deprecated `SType.NATIVE` to `SType.RAW_SYMBOL`
 - Deprecated `SType.SMART` to `SType.PARENT` and `SType.CONTINUOUS`
 
@@ -30,12 +30,12 @@ You can find our full client API reference on the [Historical Reference](https:/
 The library is fully compatible with the latest distribution of Anaconda 3.7 and above.
 The minimum dependencies as found in the `requirements.txt` are also listed below:
 - Python (>=3.7)
-- aiohttp (>=3.7.2)
-- databento-dbn (==0.5.0)
+- aiohttp (>=3.7.2,<4.0.0)
+- databento-dbn (==0.6.1)
 - numpy (>=1.17.0)
 - pandas (>=1.1.3)
 - requests (>=2.24.0)
-- zstandard (>=0.20.0)
+- zstandard (>=0.21.0)
 
 ## Installation
 To install the latest stable version of the package from PyPI:
@@ -57,7 +57,7 @@ client = db.Historical('YOUR_API_KEY')
 data = client.timeseries.get_range(
     dataset='GLBX.MDP3',
     symbols='ES.FUT',
-    stype_in='smart',
+    stype_in='parent',
     start='2022-06-10T14:30',
     end='2022-06-10T14:40',
 )
@@ -72,7 +72,7 @@ and dispatch each data event to an event handler. You can also use
 `.to_df()` or `.to_ndarray()` to cast the data into a Pandas `DataFrame` or numpy `ndarray`:
 
 ```python
-df = data.to_df(pretty_ts=True, pretty_px=True)  # to DataFrame, with pretty formatting
+df = data.to_df()  # to DataFrame
 array = data.to_ndarray()  # to ndarray
 ```
 
 
@@ -48,7 +48,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("order_id", np.uint64),
     ("price", np.int64),
     ("size", np.uint32),
-    ("flags", np.int8),
+    ("flags", np.uint8),
     ("channel_id", np.uint8),
     ("action", "S1"),  # 1 byte chararray
     ("side", "S1"),  # 1 byte chararray
@@ -62,7 +62,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("size", np.uint32),
     ("action", "S1"),  # 1 byte chararray
     ("side", "S1"),  # 1 byte chararray
-    ("flags", np.int8),
+    ("flags", np.uint8),
     ("depth", np.uint8),
     ("ts_recv", np.uint64),
     ("ts_in_delta", np.int32),
@@ -93,7 +93,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("price_ratio", np.int64),
     ("inst_attrib_value", np.int32),
     ("underlying_id", np.uint32),
-    ("cleared_volume", np.int32),
+    ("_reserved1", "S4"),
     ("market_depth_implied", np.int32),
     ("market_depth", np.int32),
     ("market_segment_id", np.uint32),
@@ -102,11 +102,11 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("min_lot_size_block", np.int32),
     ("min_lot_size_round_lot", np.int32),
     ("min_trade_vol", np.uint32),
-    ("open_interest_qty", np.int32),
+    ("_reserved2", "S4"),
     ("contract_multiplier", np.int32),
     ("decay_quantity", np.int32),
     ("original_contract_size", np.int32),
-    ("reserved1", "S4"),
+    ("_reserved3", "S4"),
     ("trading_reference_date", np.uint16),
     ("appl_id", np.int16),
     ("maturity_year", np.uint16),
@@ -125,9 +125,9 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("underlying", "S21"),  # 21 byte chararray
     ("strike_price_currency", "S4"),
     ("instrument_class", "S1"),
-    ("reserved2", "S2"),
+    ("_reserved4", "S2"),
     ("strike_price", np.int64),
-    ("reserved3", "S6"),
+    ("_reserved5", "S6"),
     ("match_algorithm", "S1"),  # 1 byte chararray
     ("md_security_trading_status", np.uint8),
     ("main_fraction", np.uint8),
@@ -170,6 +170,20 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     ("dummy", "S1"),
 ]
 
+STATISTICS_MSG: List[Tuple[str, Union[type, str]]] = RECORD_HEADER + [
+    ("ts_recv", np.uint64),
+    ("ts_ref", np.uint64),
+    ("price", np.int64),
+    ("quantity", np.int32),
+    ("sequence", np.uint32),
+    ("ts_in_delta", np.int32),
+    ("stat_type", np.uint16),
+    ("channel_id", np.uint16),
+    ("update_action", np.uint8),
+    ("stat_flags", np.uint8),
+    ("dummy", "S6"),
+]
+
 
 STRUCT_MAP: Dict[Schema, List[Tuple[str, Union[type, str]]]] = {
     Schema.MBO: MBO_MSG,
@@ -193,6 +207,7 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     Schema.OHLCV_1D: OHLCV_MSG,
     Schema.DEFINITION: DEFINITION_MSG,
     Schema.IMBALANCE: IMBALANCE_MSG,
+    Schema.STATISTICS: STATISTICS_MSG,
 }
 
 
@@ -208,20 +223,21 @@ def get_deriv_ba_types(level: int) -> List[Tuple[str, Union[type, str]]]:
     "security_type",
     "unit_of_measure",
     "underlying",
+    "strike_price_currency",
+    "instrument_class",
     "match_algorithm",
     "security_update_action",
     "user_defined_instrument",
-    "strike_price_currency",
 ]
 
 DEFINITION_PRICE_COLUMNS = [
     "min_price_increment",
-    "display_factor",
     "high_limit_price",
     "low_limit_price",
     "max_price_variation",
     "trading_reference_price",
     "min_price_increment_amount",
+    "price_ratio",
     "strike_price",
 ]
 
@@ -288,6 +304,13 @@ def get_deriv_ba_fields(level: int) -> List[str]:
     "dummy",
 ]
 
+STATISTICS_DROP_COLUMNS = [
+    "ts_recv",
+    "length",
+    "rtype",
+    "dummy",
+]
+
 DEFINITION_COLUMNS = [
     x
     for x in (np.dtype(DEFINITION_MSG).names or ())
@@ -298,6 +321,12 @@ def get_deriv_ba_fields(level: int) -> List[str]:
     x for x in (np.dtype(IMBALANCE_MSG).names or ()) if x not in IMBALANCE_DROP_COLUMNS
 ]
 
+STATISTICS_COLUMNS = [
+    x
+    for x in (np.dtype(STATISTICS_MSG).names or ())
+    if x not in STATISTICS_DROP_COLUMNS
+]
+
 COLUMNS = {
     Schema.MBO: [
         "ts_event",
@@ -333,4 +362,5 @@ def get_deriv_ba_fields(level: int) -> List[str]:
     Schema.OHLCV_1D: OHLCV_HEADER_COLUMNS,
     Schema.DEFINITION: DEFINITION_COLUMNS,
     Schema.IMBALANCE: IMBALANCE_COLUMNS,
+    Schema.STATISTICS: STATISTICS_COLUMNS,
 }
@@ -3,6 +3,7 @@
 import abc
 import datetime as dt
 import logging
+from collections.abc import Generator
 from io import BytesIO
 from os import PathLike
 from pathlib import Path
@@ -12,7 +13,6 @@
     Any,
     Callable,
     Dict,
-    Generator,
     List,
     Optional,
     Union,
@@ -21,20 +21,25 @@
 import numpy as np
 import pandas as pd
 import zstandard
-from databento.common.data import (
-    COLUMNS,
-    DEFINITION_CHARARRAY_COLUMNS,
-    DEFINITION_PRICE_COLUMNS,
-    DEFINITION_TYPE_MAX_MAP,
-    DERIV_SCHEMAS,
-    STRUCT_MAP,
-)
-from databento.common.enums import Compression, Schema, SType
+from databento_dbn import DBNDecoder
+from databento_dbn import ErrorMsg
+from databento_dbn import Metadata
+from databento_dbn import SymbolMappingMsg
+from databento_dbn import SystemMsg
+
+from databento.common.data import COLUMNS
+from databento.common.data import DEFINITION_CHARARRAY_COLUMNS
+from databento.common.data import DEFINITION_PRICE_COLUMNS
+from databento.common.data import DEFINITION_TYPE_MAX_MAP
+from databento.common.data import DERIV_SCHEMAS
+from databento.common.data import STRUCT_MAP
+from databento.common.enums import Compression
+from databento.common.enums import Schema
+from databento.common.enums import SType
 from databento.common.error import BentoError
 from databento.common.symbology import InstrumentIdMappingInterval
 from databento.common.validation import validate_maybe_enum
 from databento.live.data import DBNStruct
-from databento_dbn import DbnDecoder, ErrorMsg, Metadata, SymbolMappingMsg, SystemMsg
 
 
 NON_SCHEMA_RECORD_TYPES = [
@@ -264,7 +269,7 @@ class DBNStore:
         The data compression format (if any).
     dataset : str
         The dataset code.
-    end : pd.Timestamp
+    end : pd.Timestamp or None
         The query end for the data.
     limit : int | None
         The query limit for the data.
@@ -282,7 +287,7 @@ class DBNStore:
         The data record schema.
     start : pd.Timestamp
         The query start for the data.
-    stype_in : SType
+    stype_in : SType or None
         The query input symbology type for the data.
     stype_out : SType
         The query output symbology type for the data.
@@ -354,7 +359,7 @@ def __init__(self, data_source: DataSource) -> None:
 
     def __iter__(self) -> Generator[DBNStruct, None, None]:
         reader = self.reader
-        decoder = DbnDecoder()
+        decoder = DBNDecoder()
         while True:
             raw = reader.read(DBNStore.DBN_READ_SIZE)
             if raw:
@@ -363,7 +368,7 @@ def __iter__(self) -> Generator[DBNStruct, None, None]:
                     records = decoder.decode()
                 except ValueError:
                     continue
-                for record, _ in records:
+                for record in records:
                     yield record
             else:
                 if len(decoder.buffer()) > 0:
@@ -380,11 +385,19 @@ def _apply_pretty_ts(self, df: pd.DataFrame) -> pd.DataFrame:
         df.index = pd.to_datetime(df.index, utc=True)
         for column in df.columns:
             if column.startswith("ts_") and "delta" not in column:
-                df[column] = pd.to_datetime(df[column], utc=True)
+                df[column] = pd.to_datetime(df[column], errors="coerce", utc=True)
 
         if self.schema == Schema.DEFINITION:
-            df["expiration"] = pd.to_datetime(df["expiration"], utc=True)
-            df["activation"] = pd.to_datetime(df["activation"], utc=True)
+            df["expiration"] = pd.to_datetime(
+                df["expiration"],
+                errors="coerce",
+                utc=True,
+            )
+            df["activation"] = pd.to_datetime(
+                df["activation"],
+                errors="coerce",
+                utc=True,
+            )
 
         return df
 
@@ -479,8 +492,7 @@ def _map_symbols(self, df: pd.DataFrame, pretty_ts: bool) -> pd.DataFrame:
             df_index = df.index if pretty_ts else pd.to_datetime(df.index, utc=True)
             dates = [ts.date() for ts in df_index]
             df["symbol"] = [
-                self._instrument_id_index[dates[i]][p]
-                for i, p in enumerate(df["instrument_id"])
+                self._instrument_id_index[dates[i]][p] for i, p in enumerate(df["instrument_id"])
             ]
 
         return df
@@ -511,20 +523,24 @@ def dataset(self) -> str:
         return str(self._metadata.dataset)
 
     @property
-    def end(self) -> pd.Timestamp:
+    def end(self) -> Optional[pd.Timestamp]:
         """
         Return the query end for the data.
+        If None, the end time was not known when the data was generated.
 
         Returns
         -------
-        pd.Timestamp
+        pd.Timestamp or None
 
         Notes
         -----
         The data timestamps will not occur after `end`.
 
         """
-        return pd.Timestamp(self._metadata.end, tz="UTC")
+        end = self._metadata.end
+        if end:
+            return pd.Timestamp(self._metadata.end, tz="UTC")
+        return None
 
     @property
     def limit(self) -> Optional[int]:
@@ -625,7 +641,7 @@ def schema(self) -> Optional[Schema]:
 
         """
         schema = self._metadata.schema
-        if schema is not None:
+        if schema:
             return Schema(self._metadata.schema)
         return None
 
@@ -646,16 +662,20 @@ def start(self) -> pd.Timestamp:
         return pd.Timestamp(self._metadata.start, tz="UTC")
 
     @property
-    def stype_in(self) -> SType:
+    def stype_in(self) -> Optional[SType]:
         """
         Return the query input symbology type for the data.
+        If None, the records may contain mixed STypes.
 
         Returns
         -------
-        SType
+        SType or None
 
         """
-        return SType(self._metadata.stype_in)
+        stype = self._metadata.stype_in
+        if stype:
+            return SType(self._metadata.stype_in)
+        return None
 
     @property
     def stype_out(self) -> SType:
@@ -774,7 +794,7 @@ def request_full_definitions(
         """
         Request full instrument definitions based on the metadata properties.
 
-        Makes a `GET /timeseries.stream` HTTP request.
+        Makes a `GET /timeseries.get_range` HTTP request.
 
         Parameters
         ----------
@@ -792,7 +812,7 @@ def request_full_definitions(
         Calling this method will incur a cost.
 
         """
-        return client.timeseries.stream(
+        return client.timeseries.get_range(
             dataset=self.dataset,
             symbols=self.symbols,
             schema=Schema.DEFINITION,