databento
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎databento/common/bento.py‎
Lines changed: 78 additions & 39 deletions b/‎databento/common/bento.py‎
Lines changed: 78 additions & 39 deletions
diff --git a/‎databento/common/data.py‎
Lines changed: 0 additions & 42 deletions b/‎databento/common/data.py‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎databento/common/metadata.py‎
Lines changed: 21 additions & 70 deletions b/‎databento/common/metadata.py‎
Lines changed: 21 additions & 70 deletions
diff --git a/‎databento/historical/api/timeseries.py‎
Lines changed: 1 addition & 2 deletions b/‎databento/historical/api/timeseries.py‎
Lines changed: 1 addition & 2 deletions
@@ -31,6 +31,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.7 and
 The minimum dependencies as found in the `requirements.txt` are also listed below:
 - Python (>=3.7)
 - aiohttp (>=3.7.2)
+- dbz-lib (>=0.1.1)
 - numpy (>=1.17.0)
 - pandas (>=1.1.3)
 - requests (>=2.24.0)
 
@@ -5,12 +5,7 @@
 import numpy as np
 import pandas as pd
 import zstandard
-from databento.common.data import (
-    CSV_HEADERS,
-    DBZ_COLUMNS,
-    DBZ_STRUCT_MAP,
-    DERIV_SCHEMAS,
-)
+from databento.common.data import DBZ_COLUMNS, DBZ_STRUCT_MAP, DERIV_SCHEMAS
 from databento.common.enums import Compression, Encoding, Schema, SType
 from databento.common.logging import log_debug
 from databento.common.metadata import MetadataDecoder
@@ -65,19 +60,18 @@ def source_metadata(self) -> Dict[str, Any]:
         """
         log_debug("Decoding metadata...")
         metadata_initial: bytes = self.reader().read(8)
-
-        if not metadata_initial.startswith(b"Q*M\x18"):
-            return {}
-
         magic_bin = metadata_initial[:4]
         frame_size_bin = metadata_initial[4:]
 
+        if not metadata_initial.startswith(b"P*M\x18"):
+            return {}
+
         metadata_magic = int.from_bytes(bytes=magic_bin, byteorder="little")
         metadata_frame_size = int.from_bytes(bytes=frame_size_bin, byteorder="little")
         log_debug(f"magic={metadata_magic}, frame_size={metadata_frame_size}")
 
         metadata_raw = self.reader().read(8 + metadata_frame_size)
-        return MetadataDecoder.decode_to_json(metadata_raw[8:])
+        return MetadataDecoder.decode_to_json(metadata_raw)
 
     def set_metadata(self, metadata: Dict[str, Any]) -> None:
         """
@@ -322,22 +316,6 @@ def limit(self) -> Optional[int]:
 
         return self._limit
 
-    @property
-    def encoding(self) -> Encoding:
-        """
-        Return the data encoding.
-
-        Returns
-        -------
-        Encoding
-
-        """
-        if self._encoding is None:
-            self._check_metadata()
-            self._encoding = Encoding(self._metadata["encoding"])
-
-        return self._encoding
-
     @property
     def compression(self) -> Compression:
         """
@@ -367,13 +345,9 @@ def shape(self) -> Tuple:
         """
         if self._shape is None:
             self._check_metadata()
-            if self.encoding == Encoding.DBZ:
-                ncols = len(DBZ_STRUCT_MAP[self.schema])
-            else:
-                ncols = len(CSV_HEADERS[self.schema])
             self._shape = (
                 self._metadata["record_count"],
-                ncols,
+                len(DBZ_STRUCT_MAP[self.schema]),
             )
 
         return self._shape
@@ -395,10 +369,7 @@ def mappings(self) -> List[Dict[str, List[Dict[str, str]]]]:
     @property
     def symbology(self) -> Dict[str, Any]:
         """
-        Return the symbology resolution response information for the query.
-
-        This JSON representable object should exactly match a `symbology.resolve`
-        request using the same query parameters.
+        Return the symbology resolution information for the query.
 
         Returns
         -------
@@ -407,10 +378,12 @@ def symbology(self) -> Dict[str, Any]:
         """
         self._check_metadata()
 
-        status = self._metadata["status"]
-        if status == 1:
+        status = 0
+        if self._metadata["partial"]:
+            status = 1
             message = "Partially resolved"
-        elif status == 2:
+        elif self._metadata["not_found"]:
+            status = 2
             message = "Not found"
         else:
             message = "OK"
@@ -603,6 +576,72 @@ def to_json(self, path: str) -> None:
         """
         self.to_df().to_json(path, orient="records", lines=True)
 
+    def request_symbology(self, client) -> Dict[str, Dict[str, Any]]:
+        """
+        Request symbology resolution based on the metadata properties.
+
+        Makes a `GET /symbology.resolve` HTTP request.
+
+        Current symbology mappings from the metadata are also available by
+        calling the `.symbology` or `.mappings` properties.
+
+        Parameters
+        ----------
+        client : Historical
+            The historical client to use for the request.
+
+        Returns
+        -------
+        Dict[str, Dict[str, Any]]
+            A map of input symbol to output symbol across the date range.
+
+        """
+        return client.symbology.resolve(
+            dataset=self.dataset,
+            symbols=self.symbols,
+            stype_in=self.stype_in,
+            stype_out=self.stype_out,
+            start_date=self.start.date(),
+            end_date=self.end.date(),
+        )
+
+    def request_full_definitions(
+        self,
+        client,
+        path: Optional[str] = None,
+    ) -> "Bento":
+        """
+        Request full instrument definitions based on the metadata properties.
+
+        Makes a `GET /timeseries.stream` HTTP request.
+
+        Parameters
+        ----------
+        client : Historical
+            The historical client to use for the request.
+        path : str, optional
+            The file path to write to on disk (if provided).
+
+        Returns
+        -------
+        Bento
+
+        Warnings
+        --------
+        Calling this method will incur a cost.
+
+        """
+        return client.timeseries.stream(
+            dataset=self.dataset,
+            symbols=self.symbols,
+            schema=Schema.DEFINITION,
+            start=self.start,
+            end=self.end,
+            stype_in=self.stype_in,
+            stype_out=self.stype_out,
+            path=path,
+        )
+
 
 class MemoryBento(Bento):
     """
 
@@ -233,45 +233,3 @@ def get_deriv_ba_fields(level: int) -> List[str]:
     Schema.TBBO: DBZ_DERIV_HEADER_FIELDS + get_deriv_ba_fields(0),
     Schema.TRADES: DBZ_DERIV_HEADER_FIELDS,
 }
-
-
-################################################################################
-# CSV headers
-################################################################################
-
-CSV_DERIV_HEADER = b"ts_recv,ts_event,ts_in_delta,publisher_id,product_id,action,side,flags,price,size,sequence"  # noqa
-CSV_OHLCV_HEADER = b"ts_event,publisher_id,product_id,open,high,low,close,volume"
-
-
-CSV_HEADERS = {
-    Schema.MBO: b"ts_recv,ts_event,ts_in_delta,publisher_id,product_id,order_id,action,side,flags,price,size,sequence",  # noqa
-    Schema.MBP_1: CSV_DERIV_HEADER + b"," + ",".join(get_deriv_ba_fields(0)).encode(),
-    Schema.MBP_10: CSV_DERIV_HEADER
-    + b","
-    + ",".join(get_deriv_ba_fields(0)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(1)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(2)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(3)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(4)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(5)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(6)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(7)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(8)).encode()
-    + b","
-    + ",".join(get_deriv_ba_fields(9)).encode(),
-    Schema.TBBO: CSV_DERIV_HEADER + b"," + ",".join(get_deriv_ba_fields(0)).encode(),
-    Schema.TRADES: CSV_DERIV_HEADER,
-    Schema.OHLCV_1S: CSV_OHLCV_HEADER,
-    Schema.OHLCV_1M: CSV_OHLCV_HEADER,
-    Schema.OHLCV_1H: CSV_OHLCV_HEADER,
-    Schema.OHLCV_1D: CSV_OHLCV_HEADER,
-    # TODO(cs) Complete headers
-}
@@ -1,91 +1,42 @@
-import json
-import struct
-from typing import Any, Dict, Optional
+from typing import Any
 
-import zstandard
-from databento.common.enums import Compression, Encoding, Schema, SType
-from databento.common.parsing import (
-    int_to_compression,
-    int_to_encoding,
-    int_to_schema,
-    int_to_stype,
-)
+from databento.common.parsing import int_to_compression, int_to_schema, int_to_stype
+from dbz_lib import decode_metadata
 
 
 class MetadataDecoder:
     """
-    Provides a decoder for Databento metadata headers.
-
-    References
-    ----------
-    https://github.com/facebook/zstd/wiki
-    https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#skippable-frames
+    Provides a decoder for DBZ metadata headers.
     """
 
-    # 4 Bytes, little-endian ordering. Value : 0x184D2A5?, which means any value
-    # from 0x184D2A50 to 0x184D2A5F. All 16 values are valid to identify a
-    # skippable frame. This specification doesn't detail any specific tagging
-    # for skippable frames.
-    ZSTD_FIRST_MAGIC = 0x184D2A50  # 407710288
-    METADATA_STRUCT_FMT = "<B16sBBBQQQBBQH40x"
-    METADATA_STRUCT_SIZE = struct.calcsize(METADATA_STRUCT_FMT)
-
     @staticmethod
-    def decode_to_json(metadata: bytes) -> Dict[str, Any]:
+    def decode_to_json(raw_metadata: bytes) -> dict[str, Any]:
         """
         Decode the given metadata into a JSON object (as a Python dict).
 
         Parameters
         ----------
-        metadata : bytes
+        raw_metadata : bytes
             The metadata to decode.
 
         Returns
         -------
-        Dict[str, Any]
+        dict[str, Any]
 
         """
-        fixed_fmt: str = MetadataDecoder.METADATA_STRUCT_FMT
-        fixed_buffer: bytes = metadata[: MetadataDecoder.METADATA_STRUCT_SIZE]
-        fixed_values = struct.unpack(fixed_fmt, fixed_buffer)
-
-        # Decode fixed values
-        version: int = fixed_values[0]
-        dataset: str = fixed_values[1].replace(b"\x00", b"").decode("ascii")
-        schema: Schema = int_to_schema(fixed_values[2])
-        stype_in: SType = int_to_stype(fixed_values[3])
-        stype_out: SType = int_to_stype(fixed_values[4])
-        start: int = fixed_values[5]  # UNIX nanoseconds
-        end: int = fixed_values[6]  # UNIX nanoseconds
-
-        limit_int: int = fixed_values[7]
-        limit: Optional[int] = None if limit_int == 0 else limit_int
-
-        encoding: Encoding = int_to_encoding(fixed_values[8])
-        compression: Compression = int_to_compression(fixed_values[9])
 
-        nrows: int = fixed_values[10]
-        ncols: int = fixed_values[11]
-
-        var_buffer: bytes = metadata[MetadataDecoder.METADATA_STRUCT_SIZE :]
-        var_decompressed: bytes = zstandard.decompress(var_buffer)
-        var_json: Dict[str, Any] = json.loads(var_decompressed)
-
-        json_obj = {
-            "version": version,
-            "dataset": dataset,
-            "schema": schema.value,
-            "stype_in": stype_in.value,
-            "stype_out": stype_out.value,
-            "start": start,
-            "end": end,
-            "limit": limit,
-            "encoding": encoding.value,
-            "compression": compression.value,
-            "nrows": nrows,
-            "ncols": ncols,
+        def enum_value(fn):
+            return lambda x: fn(x).value
+
+        metadata = decode_metadata(raw_metadata)
+        conversion_mapping = {
+            "compression": enum_value(int_to_compression),
+            "limit": lambda lim: None if lim == 0 else lim,
+            "mappings": lambda m: {i["native"]: i["intervals"] for i in m},
+            "schema": enum_value(int_to_schema),
+            "stype_in": enum_value(int_to_stype),
+            "stype_out": enum_value(int_to_stype),
         }
-
-        json_obj.update(var_json)
-
-        return json_obj
+        for key, conv_fn in conversion_mapping.items():
+            metadata[key] = conv_fn(metadata[key])
+        return metadata
@@ -4,7 +4,7 @@
 
 import pandas as pd
 from databento.common.bento import Bento
-from databento.common.enums import Compression, Dataset, Encoding, Schema, SType
+from databento.common.enums import Dataset, Encoding, Schema, SType
 from databento.common.validation import validate_enum
 from databento.historical.api import API_VERSION
 from databento.historical.http import BentoHttpAPI
@@ -191,7 +191,6 @@ async def stream_async(
         )
 
         params.append(("encoding", Encoding.DBZ.value))  # Always requests DBZ
-        params.append(("compression", Compression.ZSTD.value))  # Always requests ZSTD
 
         self._pre_check_data_size(
             symbols=symbols,