databento
diff --git a/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎databento/common/dbnstore.py‎
Lines changed: 42 additions & 40 deletions b/‎databento/common/dbnstore.py‎
Lines changed: 42 additions & 40 deletions
diff --git a/‎databento/common/publishers.py‎
Lines changed: 13 additions & 0 deletions b/‎databento/common/publishers.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎databento/historical/api/batch.py‎
Lines changed: 22 additions & 1 deletion b/‎databento/historical/api/batch.py‎
Lines changed: 22 additions & 1 deletion
diff --git a/‎databento/historical/api/symbology.py‎
Lines changed: 0 additions & 4 deletions b/‎databento/historical/api/symbology.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎databento/live/client.py‎
Lines changed: 3 additions & 2 deletions b/‎databento/live/client.py‎
Lines changed: 3 additions & 2 deletions
@@ -1,5 +1,23 @@
 # Changelog
 
+## 0.20.0 - 2023-09-21
+
+#### Enhancements
+- Added `ARCX.PILLAR.ARCX` publisher
+- Added `pretty_px` option for `batch.submit_job`, which formats prices to the correct scale using the fixed-precision scalar 1e-9 (available for CSV and JSON text encodings)
+- Added `pretty_ts` option for `batch.submit_job`, which formats timestamps as ISO 8601 strings (available for CSV and JSON text encodings)
+- Added `map_symbols` option for `batch.submit_job`, which appends a symbol field to each text-encoded record (available for CSV and JSON text encodings)
+- Added `split_symbols` option for `batch.submit_job`, which will split files by raw symbol
+- Upgraded `databento-dbn` to 0.10.2
+
+#### Bug fixes
+- Fixed an issue where no disconnection exception were raised when iterating the `Live` client
+- Fixed an issue where calling `DBNStore.to_df`, `DBNStore.to_json`, or `DBNStore.to_csv` with `map_symbols=True` would cause a `TypeError`
+
+#### Breaking changes
+- Removed `default_value` parameter from `Historical.symbology.resolve`
+- Swapped the ordering for the `pretty_px` and `pretty_ts` boolean parameters
+
 ## 0.19.1 - 2023-09-08
 
 #### Bug fixes
 
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
 The minimum dependencies as found in the `pyproject.toml` are also listed below:
 - python = "^3.8"
 - aiohttp = "^3.8.3"
-- databento-dbn = "0.8.3"
+- databento-dbn = "0.10.2"
 - numpy= ">=1.23.5"
 - pandas = ">=1.5.3"
 - requests = ">=2.24.0"
 
@@ -9,13 +9,7 @@
 from io import BytesIO
 from os import PathLike
 from pathlib import Path
-from typing import (
-    IO,
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    overload,
-)
+from typing import IO, TYPE_CHECKING, Any, Callable, overload
 
 import databento_dbn
 import numpy as np
@@ -113,14 +107,14 @@ def format_dataframe(
             if column in df.columns:
                 df[column] = df[column].where(df[column] != type_max, np.nan)
 
-    if pretty_ts:
-        for ts_field in struct._timestamp_fields:
-            df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
-
     if pretty_px:
         for px_field in struct._price_fields:
             df[px_field] = df[px_field].replace(INT64_NULL, np.nan) / FIXED_PRICE_SCALE
 
+    if pretty_ts:
+        for ts_field in struct._timestamp_fields:
+            df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
+
     for column, dtype in SCHEMA_DTYPES_MAP[schema]:
         if dtype.startswith("S") and column not in struct._hidden_fields:
             df[column] = df[column].str.decode("utf-8")
@@ -826,9 +820,9 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
     def to_csv(
         self,
         path: Path | str,
-        pretty_ts: bool = True,
         pretty_px: bool = True,
-        map_symbols: bool = True,
+        pretty_ts: bool = True,
+        map_symbols: bool | None = None,
         schema: Schema | str | None = None,
     ) -> None:
         """
@@ -838,16 +832,16 @@ def to_csv(
         ----------
         path : Path or str
             The file path to write to.
-        pretty_ts : bool, default True
-            If all timestamp columns should be converted from UNIX nanosecond
-            `int` to `pd.Timestamp` tz-aware (UTC).
         pretty_px : bool, default True
             If all price columns should be converted from `int` to `float` at
-            the correct scale (using the fixed precision scalar 1e-9). Null
+            the correct scale (using the fixed-precision scalar 1e-9). Null
             prices are replaced with an empty string.
+        pretty_ts : bool, default True
+            If all timestamp columns should be converted from UNIX nanosecond
+            `int` to tz-aware UTC `pd.Timestamp`.
         map_symbols : bool, default True
             If symbology mappings from the metadata should be used to create
-            a 'symbol' column, mapping the instrument ID to its native symbol for
+            a 'symbol' column, mapping the instrument ID to its requested symbol for
             every record.
         schema : Schema or str, optional
             The schema for the csv.
@@ -864,8 +858,8 @@ def to_csv(
 
         """
         df_iter = self.to_df(
-            pretty_ts=pretty_ts,
             pretty_px=pretty_px,
+            pretty_ts=pretty_ts,
             map_symbols=map_symbols,
             schema=schema,
             count=2**16,
@@ -881,9 +875,9 @@ def to_csv(
     @overload
     def to_df(
         self,
-        pretty_ts: bool = ...,
         pretty_px: bool = ...,
-        map_symbols: bool = ...,
+        pretty_ts: bool = ...,
+        map_symbols: bool | None = ...,
         schema: Schema | str | None = ...,
         count: None = ...,
     ) -> pd.DataFrame:
@@ -892,19 +886,19 @@ def to_df(
     @overload
     def to_df(
         self,
-        pretty_ts: bool = ...,
         pretty_px: bool = ...,
-        map_symbols: bool = ...,
+        pretty_ts: bool = ...,
+        map_symbols: bool | None = ...,
         schema: Schema | str | None = ...,
         count: int = ...,
     ) -> DataFrameIterator:
         ...
 
     def to_df(
         self,
-        pretty_ts: bool = True,
         pretty_px: bool = True,
-        map_symbols: bool = True,
+        pretty_ts: bool = True,
+        map_symbols: bool | None = None,
         schema: Schema | str | None = None,
         count: int | None = None,
     ) -> pd.DataFrame | DataFrameIterator:
@@ -913,16 +907,16 @@ def to_df(
 
         Parameters
         ----------
-        pretty_ts : bool, default True
-            If all timestamp columns should be converted from UNIX nanosecond
-            `int` to `pd.Timestamp` tz-aware (UTC).
         pretty_px : bool, default True
             If all price columns should be converted from `int` to `float` at
-            the correct scale (using the fixed precision scalar 1e-9). Null
+            the correct scale (using the fixed-precision scalar 1e-9). Null
             prices are replaced with NaN.
+        pretty_ts : bool, default True
+            If all timestamp columns should be converted from UNIX nanosecond
+            `int` to tz-aware UTC `pd.Timestamp`.
         map_symbols : bool, default True
             If symbology mappings from the metadata should be used to create
-            a 'symbol' column, mapping the instrument ID to its native symbol for
+            a 'symbol' column, mapping the instrument ID to its requested symbol for
             every record.
         schema : Schema or str, optional
             The schema for the dataframe.
@@ -951,8 +945,16 @@ def to_df(
                 raise ValueError("a schema must be specified for mixed DBN data")
             schema = self.schema
 
-        if not self._instrument_id_index:
-            self._instrument_id_index = self._build_instrument_id_index()
+        if map_symbols is None:
+            map_symbols = self.stype_out == SType.INSTRUMENT_ID
+
+        if map_symbols:
+            if self.stype_out != SType.INSTRUMENT_ID:
+                raise ValueError(
+                    "`map_symbols` is not supported when `stype_out` is not 'instrument_id'",
+                )
+            if not self._instrument_id_index:
+                self._instrument_id_index = self._build_instrument_id_index()
 
         if count is None:
             records = iter([self.to_ndarray(schema)])
@@ -1000,9 +1002,9 @@ def to_file(self, path: Path | str) -> None:
     def to_json(
         self,
         path: Path | str,
-        pretty_ts: bool = True,
         pretty_px: bool = True,
-        map_symbols: bool = True,
+        pretty_ts: bool = True,
+        map_symbols: bool | None = None,
         schema: Schema | str | None = None,
     ) -> None:
         """
@@ -1012,15 +1014,15 @@ def to_json(
         ----------
         path : Path or str
             The file path to write to.
-        pretty_ts : bool, default True
-            If all timestamp columns should be converted from UNIX nanosecond
-            `int` to `pd.Timestamp` tz-aware (UTC).
         pretty_px : bool, default True
             If all price columns should be converted from `int` to `float` at
-            the correct scale (using the fixed precision scalar 1e-9).
+            the correct scale (using the fixed-precision scalar 1e-9).
+        pretty_ts : bool, default True
+            If all timestamp columns should be converted from UNIX nanosecond
+            `int` to tz-aware UTC `pd.Timestamp`.
         map_symbols : bool, default True
             If symbology mappings from the metadata should be used to create
-            a 'symbol' column, mapping the instrument ID to its native symbol for
+            a 'symbol' column, mapping the instrument ID to its requested symbol for
             every record.
         schema : Schema or str, optional
             The schema for the json.
@@ -1037,8 +1039,8 @@ def to_json(
 
         """
         df_iter = self.to_df(
-            pretty_ts=pretty_ts,
             pretty_px=pretty_px,
+            pretty_ts=pretty_ts,
             map_symbols=map_symbols,
             schema=schema,
             count=2**16,
 
@@ -692,6 +692,8 @@ class Publisher(StringyMixin, str, Enum):
         DBEQ Basic - IEX.
     DBEQ_BASIC_EPRL
         DBEQ Basic - MIAX Pearl.
+    ARCX_PILLAR_ARCX
+        NYSE Arca Integrated.
 
     """
 
@@ -737,6 +739,7 @@ class Publisher(StringyMixin, str, Enum):
     DBEQ_BASIC_XCIS = "DBEQ.BASIC.XCIS"
     DBEQ_BASIC_IEXG = "DBEQ.BASIC.IEXG"
     DBEQ_BASIC_EPRL = "DBEQ.BASIC.EPRL"
+    ARCX_PILLAR_ARCX = "ARCX.PILLAR.ARCX"
 
     @classmethod
     def from_int(cls, value: int) -> Publisher:
@@ -827,6 +830,8 @@ def from_int(cls, value: int) -> Publisher:
             return Publisher.DBEQ_BASIC_IEXG
         if value == 42:
             return Publisher.DBEQ_BASIC_EPRL
+        if value == 43:
+            return Publisher.ARCX_PILLAR_ARCX
         raise ValueError(f"Integer value {value} does not correspond with any Publisher variant")
 
     def to_int(self) -> int:
@@ -917,6 +922,8 @@ def to_int(self) -> int:
             return 41
         if self == Publisher.DBEQ_BASIC_EPRL:
             return 42
+        if self == Publisher.ARCX_PILLAR_ARCX:
+            return 43
         raise ValueError("Invalid Publisher")
     @property
     def venue(self) -> Venue:
@@ -1007,6 +1014,8 @@ def venue(self) -> Venue:
             return Venue.IEXG
         if self == Publisher.DBEQ_BASIC_EPRL:
             return Venue.EPRL
+        if self == Publisher.ARCX_PILLAR_ARCX:
+            return Venue.ARCX
         raise ValueError("Unexpected Publisher value")
     @property
     def dataset(self) -> Dataset:
@@ -1097,6 +1106,8 @@ def dataset(self) -> Dataset:
             return Dataset.DBEQ_BASIC
         if self == Publisher.DBEQ_BASIC_EPRL:
             return Dataset.DBEQ_BASIC
+        if self == Publisher.ARCX_PILLAR_ARCX:
+            return Dataset.ARCX_PILLAR
         raise ValueError("Unexpected Publisher value")
 
     @property
@@ -1188,4 +1199,6 @@ def description(self) -> str:
             return "DBEQ Basic - IEX"
         if self == Publisher.DBEQ_BASIC_EPRL:
             return "DBEQ Basic - MIAX Pearl"
+        if self == Publisher.ARCX_PILLAR_ARCX:
+            return "NYSE Arca Integrated"
         raise ValueError("Unexpected Publisher value")
@@ -54,6 +54,10 @@ def submit_job(
         end: pd.Timestamp | date | str | int | None = None,
         encoding: Encoding | str = "dbn",
         compression: Compression | str = "zstd",
+        pretty_px: bool = False,
+        pretty_ts: bool = False,
+        map_symbols: bool = False,
+        split_symbols: bool = False,
         split_duration: SplitDuration | str = "day",
         split_size: int | None = None,
         packaging: Packaging | str | None = None,
@@ -91,6 +95,17 @@ def submit_job(
             The data encoding.
         compression : Compression or str {'none', 'zstd'}, default 'zstd'
             The data compression format (if any).
+        pretty_px : bool, default False
+            If prices should be formatted to the correct scale (using the fixed-precision scalar 1e-9).
+            Only applicable for 'csv' or 'json' encodings.
+        pretty_ts : bool, default False
+            If timestamps should be formatted as ISO 8601 strings.
+            Only applicable for 'csv' or 'json' encodings.
+        map_symbols : bool, default False
+            If the requested symbol should be appended to every text encoded record.
+            Only applicable for 'csv' or 'json' encodings.
+        split_symbols : bool, default False
+            If files should be split by raw symbol. Cannot be requested with `'ALL_SYMBOLS'`.
         split_duration : SplitDuration or str {'day', 'week', 'month', 'none'}, default 'day'
             The maximum time duration before batched data is split into multiple files.
             A week starts on Sunday UTC.
@@ -131,7 +146,13 @@ def submit_job(
             "compression": str(validate_enum(compression, Compression, "compression"))
             if compression
             else None,
-            "split_duration": str(validate_enum(split_duration, SplitDuration, "split_duration")),
+            "pretty_px": pretty_px,
+            "pretty_ts": pretty_ts,
+            "map_symbols": map_symbols,
+            "split_symbols": split_symbols,
+            "split_duration": str(
+                validate_enum(split_duration, SplitDuration, "split_duration"),
+            ),
             "packaging": str(validate_enum(packaging, Packaging, "packaging"))
             if packaging
             else None,
 
@@ -33,7 +33,6 @@ def resolve(
         stype_out: SType | str,
         start_date: date | str,
         end_date: date | str | None = None,
-        default_value: str = "",
     ) -> dict[str, Any]:
         """
         Request symbology mappings resolution from Databento.
@@ -54,8 +53,6 @@ def resolve(
             The start date (UTC) of the request time range (inclusive).
         end_date : date or str, optional
             The end date (UTC) of the request time range (exclusive).
-        default_value : str, default '' (empty string)
-            The default value to return if a symbol cannot be resolved.
 
         Returns
         -------
@@ -73,7 +70,6 @@ def resolve(
             "stype_out": str(validate_enum(stype_out, SType, "stype_out")),
             "start_date": datetime_to_date_string(start_date),
             "end_date": optional_date_to_string(end_date),
-            "default_value": default_value,
         }
 
         response: Response = self._post(
 
@@ -145,7 +145,6 @@ def __next__(self) -> DBNRecord:
                     "yielding %s record from next",
                     type(record).__name__,
                 )
-                self._dbn_queue.task_done()
                 return record
             finally:
                 if not self._dbn_queue.half_full() and not self._session.is_reading():
@@ -156,6 +155,7 @@ def __next__(self) -> DBNRecord:
                     self._session.resume_reading()
 
         self._dbn_queue._enabled.clear()
+        self.block_for_close()
         raise StopIteration
 
     def __repr__(self) -> str:
@@ -528,8 +528,9 @@ def block_for_close(
             self.terminate()
             if isinstance(exc, KeyboardInterrupt):
                 raise
+        except BentoError:
+            raise
         except Exception:
-            logger.exception("exception encountered blocking for close")
             raise BentoError("connection lost") from None
 
     async def wait_for_close(