99from io import BytesIO
1010from os import PathLike
1111from pathlib import Path
12- from typing import (
13- IO ,
14- TYPE_CHECKING ,
15- Any ,
16- Callable ,
17- overload ,
18- )
12+ from typing import IO , TYPE_CHECKING , Any , Callable , overload
1913
2014import databento_dbn
2115import numpy as np
@@ -113,14 +107,14 @@ def format_dataframe(
113107 if column in df .columns :
114108 df [column ] = df [column ].where (df [column ] != type_max , np .nan )
115109
116- if pretty_ts :
117- for ts_field in struct ._timestamp_fields :
118- df [ts_field ] = pd .to_datetime (df [ts_field ], errors = "coerce" , utc = True )
119-
120110 if pretty_px :
121111 for px_field in struct ._price_fields :
122112 df [px_field ] = df [px_field ].replace (INT64_NULL , np .nan ) / FIXED_PRICE_SCALE
123113
114+ if pretty_ts :
115+ for ts_field in struct ._timestamp_fields :
116+ df [ts_field ] = pd .to_datetime (df [ts_field ], errors = "coerce" , utc = True )
117+
124118 for column , dtype in SCHEMA_DTYPES_MAP [schema ]:
125119 if dtype .startswith ("S" ) and column not in struct ._hidden_fields :
126120 df [column ] = df [column ].str .decode ("utf-8" )
@@ -826,9 +820,9 @@ def request_symbology(self, client: Historical) -> dict[str, Any]:
826820 def to_csv (
827821 self ,
828822 path : Path | str ,
829- pretty_ts : bool = True ,
830823 pretty_px : bool = True ,
831- map_symbols : bool = True ,
824+ pretty_ts : bool = True ,
825+ map_symbols : bool | None = None ,
832826 schema : Schema | str | None = None ,
833827 ) -> None :
834828 """
@@ -838,16 +832,16 @@ def to_csv(
838832 ----------
839833 path : Path or str
840834 The file path to write to.
841- pretty_ts : bool, default True
842- If all timestamp columns should be converted from UNIX nanosecond
843- `int` to `pd.Timestamp` tz-aware (UTC).
844835 pretty_px : bool, default True
845836 If all price columns should be converted from `int` to `float` at
846- the correct scale (using the fixed precision scalar 1e-9). Null
837+ the correct scale (using the fixed- precision scalar 1e-9). Null
847838 prices are replaced with an empty string.
839+ pretty_ts : bool, default True
840+ If all timestamp columns should be converted from UNIX nanosecond
841+ `int` to tz-aware UTC `pd.Timestamp`.
848842 map_symbols : bool, default True
849843 If symbology mappings from the metadata should be used to create
850- a 'symbol' column, mapping the instrument ID to its native symbol for
844+ a 'symbol' column, mapping the instrument ID to its requested symbol for
851845 every record.
852846 schema : Schema or str, optional
853847 The schema for the csv.
@@ -864,8 +858,8 @@ def to_csv(
864858
865859 """
866860 df_iter = self .to_df (
867- pretty_ts = pretty_ts ,
868861 pretty_px = pretty_px ,
862+ pretty_ts = pretty_ts ,
869863 map_symbols = map_symbols ,
870864 schema = schema ,
871865 count = 2 ** 16 ,
@@ -881,9 +875,9 @@ def to_csv(
881875 @overload
882876 def to_df (
883877 self ,
884- pretty_ts : bool = ...,
885878 pretty_px : bool = ...,
886- map_symbols : bool = ...,
879+ pretty_ts : bool = ...,
880+ map_symbols : bool | None = ...,
887881 schema : Schema | str | None = ...,
888882 count : None = ...,
889883 ) -> pd .DataFrame :
@@ -892,19 +886,19 @@ def to_df(
892886 @overload
893887 def to_df (
894888 self ,
895- pretty_ts : bool = ...,
896889 pretty_px : bool = ...,
897- map_symbols : bool = ...,
890+ pretty_ts : bool = ...,
891+ map_symbols : bool | None = ...,
898892 schema : Schema | str | None = ...,
899893 count : int = ...,
900894 ) -> DataFrameIterator :
901895 ...
902896
903897 def to_df (
904898 self ,
905- pretty_ts : bool = True ,
906899 pretty_px : bool = True ,
907- map_symbols : bool = True ,
900+ pretty_ts : bool = True ,
901+ map_symbols : bool | None = None ,
908902 schema : Schema | str | None = None ,
909903 count : int | None = None ,
910904 ) -> pd .DataFrame | DataFrameIterator :
@@ -913,16 +907,16 @@ def to_df(
913907
914908 Parameters
915909 ----------
916- pretty_ts : bool, default True
917- If all timestamp columns should be converted from UNIX nanosecond
918- `int` to `pd.Timestamp` tz-aware (UTC).
919910 pretty_px : bool, default True
920911 If all price columns should be converted from `int` to `float` at
921- the correct scale (using the fixed precision scalar 1e-9). Null
912+ the correct scale (using the fixed- precision scalar 1e-9). Null
922913 prices are replaced with NaN.
914+ pretty_ts : bool, default True
915+ If all timestamp columns should be converted from UNIX nanosecond
916+ `int` to tz-aware UTC `pd.Timestamp`.
923917 map_symbols : bool, default True
924918 If symbology mappings from the metadata should be used to create
925- a 'symbol' column, mapping the instrument ID to its native symbol for
919+ a 'symbol' column, mapping the instrument ID to its requested symbol for
926920 every record.
927921 schema : Schema or str, optional
928922 The schema for the dataframe.
@@ -951,8 +945,16 @@ def to_df(
951945 raise ValueError ("a schema must be specified for mixed DBN data" )
952946 schema = self .schema
953947
954- if not self ._instrument_id_index :
955- self ._instrument_id_index = self ._build_instrument_id_index ()
948+ if map_symbols is None :
949+ map_symbols = self .stype_out == SType .INSTRUMENT_ID
950+
951+ if map_symbols :
952+ if self .stype_out != SType .INSTRUMENT_ID :
953+ raise ValueError (
954+ "`map_symbols` is not supported when `stype_out` is not 'instrument_id'" ,
955+ )
956+ if not self ._instrument_id_index :
957+ self ._instrument_id_index = self ._build_instrument_id_index ()
956958
957959 if count is None :
958960 records = iter ([self .to_ndarray (schema )])
@@ -1000,9 +1002,9 @@ def to_file(self, path: Path | str) -> None:
10001002 def to_json (
10011003 self ,
10021004 path : Path | str ,
1003- pretty_ts : bool = True ,
10041005 pretty_px : bool = True ,
1005- map_symbols : bool = True ,
1006+ pretty_ts : bool = True ,
1007+ map_symbols : bool | None = None ,
10061008 schema : Schema | str | None = None ,
10071009 ) -> None :
10081010 """
@@ -1012,15 +1014,15 @@ def to_json(
10121014 ----------
10131015 path : Path or str
10141016 The file path to write to.
1015- pretty_ts : bool, default True
1016- If all timestamp columns should be converted from UNIX nanosecond
1017- `int` to `pd.Timestamp` tz-aware (UTC).
10181017 pretty_px : bool, default True
10191018 If all price columns should be converted from `int` to `float` at
1020- the correct scale (using the fixed precision scalar 1e-9).
1019+ the correct scale (using the fixed-precision scalar 1e-9).
1020+ pretty_ts : bool, default True
1021+ If all timestamp columns should be converted from UNIX nanosecond
1022+ `int` to tz-aware UTC `pd.Timestamp`.
10211023 map_symbols : bool, default True
10221024 If symbology mappings from the metadata should be used to create
1023- a 'symbol' column, mapping the instrument ID to its native symbol for
1025+ a 'symbol' column, mapping the instrument ID to its requested symbol for
10241026 every record.
10251027 schema : Schema or str, optional
10261028 The schema for the json.
@@ -1037,8 +1039,8 @@ def to_json(
10371039
10381040 """
10391041 df_iter = self .to_df (
1040- pretty_ts = pretty_ts ,
10411042 pretty_px = pretty_px ,
1043+ pretty_ts = pretty_ts ,
10421044 map_symbols = map_symbols ,
10431045 schema = schema ,
10441046 count = 2 ** 16 ,
0 commit comments