Skip to content

Commit 11ede3a

Browse files
authored
VER: Release 0.20.0
See release notes.
2 parents fb08a78 + 09ad868 commit 11ede3a

File tree

14 files changed

+175
-62
lines changed

14 files changed

+175
-62
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
# Changelog
22

3+
## 0.20.0 - 2023-09-21
4+
5+
#### Enhancements
6+
- Added `ARCX.PILLAR.ARCX` publisher
7+
- Added `pretty_px` option for `batch.submit_job`, which formats prices to the correct scale using the fixed-precision scalar 1e-9 (available for CSV and JSON text encodings)
8+
- Added `pretty_ts` option for `batch.submit_job`, which formats timestamps as ISO 8601 strings (available for CSV and JSON text encodings)
9+
- Added `map_symbols` option for `batch.submit_job`, which appends a symbol field to each text-encoded record (available for CSV and JSON text encodings)
10+
- Added `split_symbols` option for `batch.submit_job`, which will split files by raw symbol
11+
- Upgraded `databento-dbn` to 0.10.2
12+
13+
#### Bug fixes
14+
- Fixed an issue where no disconnection exception were raised when iterating the `Live` client
15+
- Fixed an issue where calling `DBNStore.to_df`, `DBNStore.to_json`, or `DBNStore.to_csv` with `map_symbols=True` would cause a `TypeError`
16+
17+
#### Breaking changes
18+
- Removed `default_value` parameter from `Historical.symbology.resolve`
19+
- Swapped the ordering for the `pretty_px` and `pretty_ts` boolean parameters
20+
321
## 0.19.1 - 2023-09-08
422

523
#### Bug fixes

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ The library is fully compatible with the latest distribution of Anaconda 3.8 and
3232
The minimum dependencies as found in the `pyproject.toml` are also listed below:
3333
- python = "^3.8"
3434
- aiohttp = "^3.8.3"
35-
- databento-dbn = "0.8.3"
35+
- databento-dbn = "0.10.2"
3636
- numpy= ">=1.23.5"
3737
- pandas = ">=1.5.3"
3838
- requests = ">=2.24.0"

databento/common/dbnstore.py

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,7 @@
99
from io import BytesIO
1010
from os import PathLike
1111
from pathlib import Path
12-
from typing import (
13-
IO,
14-
TYPE_CHECKING,
15-
Any,
16-
Callable,
17-
overload,
18-
)
12+
from typing import IO, TYPE_CHECKING, Any, Callable, overload
1913

2014
import databento_dbn
2115
import numpy as np
@@ -113,14 +107,14 @@ def format_dataframe(
113107
if column in df.columns:
114108
df[column] = df[column].where(df[column] != type_max, np.nan)
115109

116-
if pretty_ts:
117-
for ts_field in struct._timestamp_fields:
118-
df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
119-
120110
if pretty_px:
121111
for px_field in struct._price_fields:
122112
df[px_field] = df[px_field].replace(INT64_NULL, np.nan) / FIXED_PRICE_SCALE
123113

114+
if pretty_ts:
115+
for ts_field in struct._timestamp_fields:
116+
df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
117+
124118
for column, dtype in SCHEMA_DTYPES_MAP[schema]:
125119
if dtype.startswith("S") and column not in struct._hidden_fields:
126120
df[column] = df[column].str.decode("utf-8")
@@ -826,9 +820,9 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
826820
def to_csv(
827821
self,
828822
path: Path | str,
829-
pretty_ts: bool = True,
830823
pretty_px: bool = True,
831-
map_symbols: bool = True,
824+
pretty_ts: bool = True,
825+
map_symbols: bool | None = None,
832826
schema: Schema | str | None = None,
833827
) -> None:
834828
"""
@@ -838,16 +832,16 @@ def to_csv(
838832
----------
839833
path : Path or str
840834
The file path to write to.
841-
pretty_ts : bool, default True
842-
If all timestamp columns should be converted from UNIX nanosecond
843-
`int` to `pd.Timestamp` tz-aware (UTC).
844835
pretty_px : bool, default True
845836
If all price columns should be converted from `int` to `float` at
846-
the correct scale (using the fixed precision scalar 1e-9). Null
837+
the correct scale (using the fixed-precision scalar 1e-9). Null
847838
prices are replaced with an empty string.
839+
pretty_ts : bool, default True
840+
If all timestamp columns should be converted from UNIX nanosecond
841+
`int` to tz-aware UTC `pd.Timestamp`.
848842
map_symbols : bool, default True
849843
If symbology mappings from the metadata should be used to create
850-
a 'symbol' column, mapping the instrument ID to its native symbol for
844+
a 'symbol' column, mapping the instrument ID to its requested symbol for
851845
every record.
852846
schema : Schema or str, optional
853847
The schema for the csv.
@@ -864,8 +858,8 @@ def to_csv(
864858
865859
"""
866860
df_iter = self.to_df(
867-
pretty_ts=pretty_ts,
868861
pretty_px=pretty_px,
862+
pretty_ts=pretty_ts,
869863
map_symbols=map_symbols,
870864
schema=schema,
871865
count=2**16,
@@ -881,9 +875,9 @@ def to_csv(
881875
@overload
882876
def to_df(
883877
self,
884-
pretty_ts: bool = ...,
885878
pretty_px: bool = ...,
886-
map_symbols: bool = ...,
879+
pretty_ts: bool = ...,
880+
map_symbols: bool | None = ...,
887881
schema: Schema | str | None = ...,
888882
count: None = ...,
889883
) -> pd.DataFrame:
@@ -892,19 +886,19 @@ def to_df(
892886
@overload
893887
def to_df(
894888
self,
895-
pretty_ts: bool = ...,
896889
pretty_px: bool = ...,
897-
map_symbols: bool = ...,
890+
pretty_ts: bool = ...,
891+
map_symbols: bool | None = ...,
898892
schema: Schema | str | None = ...,
899893
count: int = ...,
900894
) -> DataFrameIterator:
901895
...
902896

903897
def to_df(
904898
self,
905-
pretty_ts: bool = True,
906899
pretty_px: bool = True,
907-
map_symbols: bool = True,
900+
pretty_ts: bool = True,
901+
map_symbols: bool | None = None,
908902
schema: Schema | str | None = None,
909903
count: int | None = None,
910904
) -> pd.DataFrame | DataFrameIterator:
@@ -913,16 +907,16 @@ def to_df(
913907
914908
Parameters
915909
----------
916-
pretty_ts : bool, default True
917-
If all timestamp columns should be converted from UNIX nanosecond
918-
`int` to `pd.Timestamp` tz-aware (UTC).
919910
pretty_px : bool, default True
920911
If all price columns should be converted from `int` to `float` at
921-
the correct scale (using the fixed precision scalar 1e-9). Null
912+
the correct scale (using the fixed-precision scalar 1e-9). Null
922913
prices are replaced with NaN.
914+
pretty_ts : bool, default True
915+
If all timestamp columns should be converted from UNIX nanosecond
916+
`int` to tz-aware UTC `pd.Timestamp`.
923917
map_symbols : bool, default True
924918
If symbology mappings from the metadata should be used to create
925-
a 'symbol' column, mapping the instrument ID to its native symbol for
919+
a 'symbol' column, mapping the instrument ID to its requested symbol for
926920
every record.
927921
schema : Schema or str, optional
928922
The schema for the dataframe.
@@ -951,8 +945,16 @@ def to_df(
951945
raise ValueError("a schema must be specified for mixed DBN data")
952946
schema = self.schema
953947

954-
if not self._instrument_id_index:
955-
self._instrument_id_index = self._build_instrument_id_index()
948+
if map_symbols is None:
949+
map_symbols = self.stype_out == SType.INSTRUMENT_ID
950+
951+
if map_symbols:
952+
if self.stype_out != SType.INSTRUMENT_ID:
953+
raise ValueError(
954+
"`map_symbols` is not supported when `stype_out` is not 'instrument_id'",
955+
)
956+
if not self._instrument_id_index:
957+
self._instrument_id_index = self._build_instrument_id_index()
956958

957959
if count is None:
958960
records = iter([self.to_ndarray(schema)])
@@ -1000,9 +1002,9 @@ def to_file(self, path: Path | str) -> None:
10001002
def to_json(
10011003
self,
10021004
path: Path | str,
1003-
pretty_ts: bool = True,
10041005
pretty_px: bool = True,
1005-
map_symbols: bool = True,
1006+
pretty_ts: bool = True,
1007+
map_symbols: bool | None = None,
10061008
schema: Schema | str | None = None,
10071009
) -> None:
10081010
"""
@@ -1012,15 +1014,15 @@ def to_json(
10121014
----------
10131015
path : Path or str
10141016
The file path to write to.
1015-
pretty_ts : bool, default True
1016-
If all timestamp columns should be converted from UNIX nanosecond
1017-
`int` to `pd.Timestamp` tz-aware (UTC).
10181017
pretty_px : bool, default True
10191018
If all price columns should be converted from `int` to `float` at
1020-
the correct scale (using the fixed precision scalar 1e-9).
1019+
the correct scale (using the fixed-precision scalar 1e-9).
1020+
pretty_ts : bool, default True
1021+
If all timestamp columns should be converted from UNIX nanosecond
1022+
`int` to tz-aware UTC `pd.Timestamp`.
10211023
map_symbols : bool, default True
10221024
If symbology mappings from the metadata should be used to create
1023-
a 'symbol' column, mapping the instrument ID to its native symbol for
1025+
a 'symbol' column, mapping the instrument ID to its requested symbol for
10241026
every record.
10251027
schema : Schema or str, optional
10261028
The schema for the json.
@@ -1037,8 +1039,8 @@ def to_json(
10371039
10381040
"""
10391041
df_iter = self.to_df(
1040-
pretty_ts=pretty_ts,
10411042
pretty_px=pretty_px,
1043+
pretty_ts=pretty_ts,
10421044
map_symbols=map_symbols,
10431045
schema=schema,
10441046
count=2**16,

databento/common/publishers.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,8 @@ class Publisher(StringyMixin, str, Enum):
692692
DBEQ Basic - IEX.
693693
DBEQ_BASIC_EPRL
694694
DBEQ Basic - MIAX Pearl.
695+
ARCX_PILLAR_ARCX
696+
NYSE Arca Integrated.
695697
696698
"""
697699

@@ -737,6 +739,7 @@ class Publisher(StringyMixin, str, Enum):
737739
DBEQ_BASIC_XCIS = "DBEQ.BASIC.XCIS"
738740
DBEQ_BASIC_IEXG = "DBEQ.BASIC.IEXG"
739741
DBEQ_BASIC_EPRL = "DBEQ.BASIC.EPRL"
742+
ARCX_PILLAR_ARCX = "ARCX.PILLAR.ARCX"
740743

741744
@classmethod
742745
def from_int(cls, value: int) -> Publisher:
@@ -827,6 +830,8 @@ def from_int(cls, value: int) -> Publisher:
827830
return Publisher.DBEQ_BASIC_IEXG
828831
if value == 42:
829832
return Publisher.DBEQ_BASIC_EPRL
833+
if value == 43:
834+
return Publisher.ARCX_PILLAR_ARCX
830835
raise ValueError(f"Integer value {value} does not correspond with any Publisher variant")
831836

832837
def to_int(self) -> int:
@@ -917,6 +922,8 @@ def to_int(self) -> int:
917922
return 41
918923
if self == Publisher.DBEQ_BASIC_EPRL:
919924
return 42
925+
if self == Publisher.ARCX_PILLAR_ARCX:
926+
return 43
920927
raise ValueError("Invalid Publisher")
921928
@property
922929
def venue(self) -> Venue:
@@ -1007,6 +1014,8 @@ def venue(self) -> Venue:
10071014
return Venue.IEXG
10081015
if self == Publisher.DBEQ_BASIC_EPRL:
10091016
return Venue.EPRL
1017+
if self == Publisher.ARCX_PILLAR_ARCX:
1018+
return Venue.ARCX
10101019
raise ValueError("Unexpected Publisher value")
10111020
@property
10121021
def dataset(self) -> Dataset:
@@ -1097,6 +1106,8 @@ def dataset(self) -> Dataset:
10971106
return Dataset.DBEQ_BASIC
10981107
if self == Publisher.DBEQ_BASIC_EPRL:
10991108
return Dataset.DBEQ_BASIC
1109+
if self == Publisher.ARCX_PILLAR_ARCX:
1110+
return Dataset.ARCX_PILLAR
11001111
raise ValueError("Unexpected Publisher value")
11011112

11021113
@property
@@ -1188,4 +1199,6 @@ def description(self) -> str:
11881199
return "DBEQ Basic - IEX"
11891200
if self == Publisher.DBEQ_BASIC_EPRL:
11901201
return "DBEQ Basic - MIAX Pearl"
1202+
if self == Publisher.ARCX_PILLAR_ARCX:
1203+
return "NYSE Arca Integrated"
11911204
raise ValueError("Unexpected Publisher value")

databento/historical/api/batch.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def submit_job(
5454
end: pd.Timestamp | date | str | int | None = None,
5555
encoding: Encoding | str = "dbn",
5656
compression: Compression | str = "zstd",
57+
pretty_px: bool = False,
58+
pretty_ts: bool = False,
59+
map_symbols: bool = False,
60+
split_symbols: bool = False,
5761
split_duration: SplitDuration | str = "day",
5862
split_size: int | None = None,
5963
packaging: Packaging | str | None = None,
@@ -91,6 +95,17 @@ def submit_job(
9195
The data encoding.
9296
compression : Compression or str {'none', 'zstd'}, default 'zstd'
9397
The data compression format (if any).
98+
pretty_px : bool, default False
99+
If prices should be formatted to the correct scale (using the fixed-precision scalar 1e-9).
100+
Only applicable for 'csv' or 'json' encodings.
101+
pretty_ts : bool, default False
102+
If timestamps should be formatted as ISO 8601 strings.
103+
Only applicable for 'csv' or 'json' encodings.
104+
map_symbols : bool, default False
105+
If the requested symbol should be appended to every text encoded record.
106+
Only applicable for 'csv' or 'json' encodings.
107+
split_symbols : bool, default False
108+
If files should be split by raw symbol. Cannot be requested with `'ALL_SYMBOLS'`.
94109
split_duration : SplitDuration or str {'day', 'week', 'month', 'none'}, default 'day'
95110
The maximum time duration before batched data is split into multiple files.
96111
A week starts on Sunday UTC.
@@ -131,7 +146,13 @@ def submit_job(
131146
"compression": str(validate_enum(compression, Compression, "compression"))
132147
if compression
133148
else None,
134-
"split_duration": str(validate_enum(split_duration, SplitDuration, "split_duration")),
149+
"pretty_px": pretty_px,
150+
"pretty_ts": pretty_ts,
151+
"map_symbols": map_symbols,
152+
"split_symbols": split_symbols,
153+
"split_duration": str(
154+
validate_enum(split_duration, SplitDuration, "split_duration"),
155+
),
135156
"packaging": str(validate_enum(packaging, Packaging, "packaging"))
136157
if packaging
137158
else None,

databento/historical/api/symbology.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ def resolve(
3333
stype_out: SType | str,
3434
start_date: date | str,
3535
end_date: date | str | None = None,
36-
default_value: str = "",
3736
) -> dict[str, Any]:
3837
"""
3938
Request symbology mappings resolution from Databento.
@@ -54,8 +53,6 @@ def resolve(
5453
The start date (UTC) of the request time range (inclusive).
5554
end_date : date or str, optional
5655
The end date (UTC) of the request time range (exclusive).
57-
default_value : str, default '' (empty string)
58-
The default value to return if a symbol cannot be resolved.
5956
6057
Returns
6158
-------
@@ -73,7 +70,6 @@ def resolve(
7370
"stype_out": str(validate_enum(stype_out, SType, "stype_out")),
7471
"start_date": datetime_to_date_string(start_date),
7572
"end_date": optional_date_to_string(end_date),
76-
"default_value": default_value,
7773
}
7874

7975
response: Response = self._post(

databento/live/client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ def __next__(self) -> DBNRecord:
145145
"yielding %s record from next",
146146
type(record).__name__,
147147
)
148-
self._dbn_queue.task_done()
149148
return record
150149
finally:
151150
if not self._dbn_queue.half_full() and not self._session.is_reading():
@@ -156,6 +155,7 @@ def __next__(self) -> DBNRecord:
156155
self._session.resume_reading()
157156

158157
self._dbn_queue._enabled.clear()
158+
self.block_for_close()
159159
raise StopIteration
160160

161161
def __repr__(self) -> str:
@@ -528,8 +528,9 @@ def block_for_close(
528528
self.terminate()
529529
if isinstance(exc, KeyboardInterrupt):
530530
raise
531+
except BentoError:
532+
raise
531533
except Exception:
532-
logger.exception("exception encountered blocking for close")
533534
raise BentoError("connection lost") from None
534535

535536
async def wait_for_close(

0 commit comments

Comments
 (0)