Skip to content

Commit e4135a9

Browse files
committed
ADD: Add pretty and split_symbol customizations
1 parent 6a5e73e commit e4135a9

File tree

3 files changed

+42
-25
lines changed

3 files changed

+42
-25
lines changed

databento/common/dbnstore.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -113,14 +113,14 @@ def format_dataframe(
113113
if column in df.columns:
114114
df[column] = df[column].where(df[column] != type_max, np.nan)
115115

116-
if pretty_ts:
117-
for ts_field in struct._timestamp_fields:
118-
df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
119-
120116
if pretty_px:
121117
for px_field in struct._price_fields:
122118
df[px_field] = df[px_field].replace(INT64_NULL, np.nan) / FIXED_PRICE_SCALE
123119

120+
if pretty_ts:
121+
for ts_field in struct._timestamp_fields:
122+
df[ts_field] = pd.to_datetime(df[ts_field], errors="coerce", utc=True)
123+
124124
for column, dtype in SCHEMA_DTYPES_MAP[schema]:
125125
if dtype.startswith("S") and column not in struct._hidden_fields:
126126
df[column] = df[column].str.decode("utf-8")
@@ -826,8 +826,8 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
826826
def to_csv(
827827
self,
828828
path: Path | str,
829-
pretty_ts: bool = True,
830829
pretty_px: bool = True,
830+
pretty_ts: bool = True,
831831
map_symbols: bool = True,
832832
schema: Schema | str | None = None,
833833
) -> None:
@@ -838,13 +838,13 @@ def to_csv(
838838
----------
839839
path : Path or str
840840
The file path to write to.
841-
pretty_ts : bool, default True
842-
If all timestamp columns should be converted from UNIX nanosecond
843-
`int` to `pd.Timestamp` tz-aware (UTC).
844841
pretty_px : bool, default True
845842
If all price columns should be converted from `int` to `float` at
846-
the correct scale (using the fixed precision scalar 1e-9). Null
843+
the correct scale (using the fixed-precision scalar 1e-9). Null
847844
prices are replaced with an empty string.
845+
pretty_ts : bool, default True
846+
If all timestamp columns should be converted from UNIX nanosecond
847+
`int` to tz-aware UTC `pd.Timestamp`.
848848
map_symbols : bool, default True
849849
If symbology mappings from the metadata should be used to create
850850
a 'symbol' column, mapping the instrument ID to its requested symbol for
@@ -864,8 +864,8 @@ def to_csv(
864864
865865
"""
866866
df_iter = self.to_df(
867-
pretty_ts=pretty_ts,
868867
pretty_px=pretty_px,
868+
pretty_ts=pretty_ts,
869869
map_symbols=map_symbols,
870870
schema=schema,
871871
count=2**16,
@@ -881,8 +881,8 @@ def to_csv(
881881
@overload
882882
def to_df(
883883
self,
884-
pretty_ts: bool = ...,
885884
pretty_px: bool = ...,
885+
pretty_ts: bool = ...,
886886
map_symbols: bool = ...,
887887
schema: Schema | str | None = ...,
888888
count: None = ...,
@@ -892,8 +892,8 @@ def to_df(
892892
@overload
893893
def to_df(
894894
self,
895-
pretty_ts: bool = ...,
896895
pretty_px: bool = ...,
896+
pretty_ts: bool = ...,
897897
map_symbols: bool = ...,
898898
schema: Schema | str | None = ...,
899899
count: int = ...,
@@ -902,8 +902,8 @@ def to_df(
902902

903903
def to_df(
904904
self,
905-
pretty_ts: bool = True,
906905
pretty_px: bool = True,
906+
pretty_ts: bool = True,
907907
map_symbols: bool = True,
908908
schema: Schema | str | None = None,
909909
count: int | None = None,
@@ -913,13 +913,13 @@ def to_df(
913913
914914
Parameters
915915
----------
916-
pretty_ts : bool, default True
917-
If all timestamp columns should be converted from UNIX nanosecond
918-
`int` to `pd.Timestamp` tz-aware (UTC).
919916
pretty_px : bool, default True
920917
If all price columns should be converted from `int` to `float` at
921-
the correct scale (using the fixed precision scalar 1e-9). Null
918+
the correct scale (using the fixed-precision scalar 1e-9). Null
922919
prices are replaced with NaN.
920+
pretty_ts : bool, default True
921+
If all timestamp columns should be converted from UNIX nanosecond
922+
`int` to tz-aware UTC `pd.Timestamp`.
923923
map_symbols : bool, default True
924924
If symbology mappings from the metadata should be used to create
925925
a 'symbol' column, mapping the instrument ID to its requested symbol for
@@ -1000,8 +1000,8 @@ def to_file(self, path: Path | str) -> None:
10001000
def to_json(
10011001
self,
10021002
path: Path | str,
1003-
pretty_ts: bool = True,
10041003
pretty_px: bool = True,
1004+
pretty_ts: bool = True,
10051005
map_symbols: bool = True,
10061006
schema: Schema | str | None = None,
10071007
) -> None:
@@ -1012,12 +1012,12 @@ def to_json(
10121012
----------
10131013
path : Path or str
10141014
The file path to write to.
1015-
pretty_ts : bool, default True
1016-
If all timestamp columns should be converted from UNIX nanosecond
1017-
`int` to `pd.Timestamp` tz-aware (UTC).
10181015
pretty_px : bool, default True
10191016
If all price columns should be converted from `int` to `float` at
1020-
the correct scale (using the fixed precision scalar 1e-9).
1017+
the correct scale (using the fixed-precision scalar 1e-9).
1018+
pretty_ts : bool, default True
1019+
If all timestamp columns should be converted from UNIX nanosecond
1020+
`int` to tz-aware UTC `pd.Timestamp`.
10211021
map_symbols : bool, default True
10221022
If symbology mappings from the metadata should be used to create
10231023
a 'symbol' column, mapping the instrument ID to its requested symbol for
@@ -1037,8 +1037,8 @@ def to_json(
10371037
10381038
"""
10391039
df_iter = self.to_df(
1040-
pretty_ts=pretty_ts,
10411040
pretty_px=pretty_px,
1041+
pretty_ts=pretty_ts,
10421042
map_symbols=map_symbols,
10431043
schema=schema,
10441044
count=2**16,

databento/historical/api/batch.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ def submit_job(
5454
end: pd.Timestamp | date | str | int | None = None,
5555
encoding: Encoding | str = "dbn",
5656
compression: Compression | str = "zstd",
57+
pretty_px: bool = False,
58+
pretty_ts: bool = False,
5759
map_symbols: bool = False,
60+
split_symbols: bool = False,
5861
split_duration: SplitDuration | str = "day",
5962
split_size: int | None = None,
6063
packaging: Packaging | str | None = None,
@@ -92,9 +95,17 @@ def submit_job(
9295
The data encoding.
9396
compression : Compression or str {'none', 'zstd'}, default 'zstd'
9497
The data compression format (if any).
98+
pretty_px : bool, default False
99+
If prices should be formatted to the correct scale (using the fixed-precision scalar 1e-9).
100+
Only applicable for 'csv' or 'json' encodings.
101+
pretty_ts : bool, default False
102+
If timestamps should be formatted as ISO 8601 strings.
103+
Only applicable for 'csv' or 'json' encodings.
95104
map_symbols : bool, default False
96-
If the raw symbol should be appended to every text encoded record.
97-
Must be requested with either 'csv' or 'json' encoding.
105+
If the requested symbol should be appended to every text encoded record.
106+
Only applicable for 'csv' or 'json' encodings.
107+
split_symbols : bool, default False
108+
If files should be split by raw symbol. Cannot be requested with `'ALL_SYMBOLS'`.
98109
split_duration : SplitDuration or str {'day', 'week', 'month', 'none'}, default 'day'
99110
The maximum time duration before batched data is split into multiple files.
100111
A week starts on Sunday UTC.
@@ -135,7 +146,10 @@ def submit_job(
135146
"compression": str(validate_enum(compression, Compression, "compression"))
136147
if compression
137148
else None,
149+
"pretty_px": pretty_px,
150+
"pretty_ts": pretty_ts,
138151
"map_symbols": map_symbols,
152+
"split_symbols": split_symbols,
139153
"split_duration": str(
140154
validate_enum(split_duration, SplitDuration, "split_duration"),
141155
),

tests/test_historical_batch.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,10 @@ def test_batch_submit_job_sends_expected_request(
9393
"stype_out": "instrument_id",
9494
"encoding": "csv",
9595
"compression": "zstd",
96+
"pretty_px": False,
97+
"pretty_ts": False,
9698
"map_symbols": False,
99+
"split_symbols": False,
97100
"split_duration": "day",
98101
"packaging": "none",
99102
"delivery": "download",

0 commit comments

Comments
 (0)