1111from io import BytesIO
1212from os import PathLike
1313from pathlib import Path
14- from typing import IO , TYPE_CHECKING , Any , Callable , Literal , overload
14+ from typing import IO , TYPE_CHECKING , Any , BinaryIO , Callable , Literal , overload
1515
1616import databento_dbn
1717import numpy as np
2020from databento_dbn import FIXED_PRICE_SCALE
2121from databento_dbn import Compression
2222from databento_dbn import DBNDecoder
23+ from databento_dbn import Encoding
2324from databento_dbn import ErrorMsg
2425from databento_dbn import Metadata
2526from databento_dbn import Schema
2627from databento_dbn import SType
2728from databento_dbn import SymbolMappingMsg
2829from databento_dbn import SystemMsg
30+ from databento_dbn import Transcoder
2931
3032from databento .common .data import DEFINITION_TYPE_MAX_MAP
3133from databento .common .data import SCHEMA_COLUMNS
3234from databento .common .data import SCHEMA_DTYPES_MAP
3335from databento .common .data import SCHEMA_STRUCT_MAP
3436from databento .common .error import BentoError
37+ from databento .common .iterator import chunk
3538from databento .common .symbology import InstrumentMap
39+ from databento .common .symbology import SymbolInterval
40+ from databento .common .validation import validate_enum
3641from databento .common .validation import validate_file_write_path
3742from databento .common .validation import validate_maybe_enum
3843from databento .live import DBNRecord
@@ -763,6 +768,7 @@ def to_csv(
763768 pretty_px : bool = True ,
764769 pretty_ts : bool = True ,
765770 map_symbols : bool = True ,
771+ compression : Compression | str = Compression .NONE ,
766772 schema : Schema | str | None = None ,
767773 ) -> None :
768774 """
@@ -783,6 +789,8 @@ def to_csv(
783789 If symbology mappings from the metadata should be used to create
784790 a 'symbol' column, mapping the instrument ID to its requested symbol for
785791 every record.
792+ compression : Compression or str, default `Compression.NONE`
793+ The output compression for writing.
786794 schema : Schema or str, optional
787795 The schema for the csv.
788796 This is only required when reading a DBN stream with mixed record types.
@@ -797,24 +805,33 @@ def to_csv(
797805 Requires all the data to be brought up into memory to then be written.
798806
799807 """
800- price_type : Literal ["fixed" , "float" ] = "fixed"
801- if pretty_px is True :
802- price_type = "float"
808+ compression = validate_enum (compression , Compression , "compression" )
809+ schema = validate_maybe_enum (schema , Schema , "schema" )
810+ if schema is None :
811+ if self .schema is None :
812+ raise ValueError ("a schema must be specified for mixed DBN data" )
813+ schema = self .schema
803814
804- df_iter = self .to_df (
805- price_type = price_type ,
806- pretty_ts = pretty_ts ,
807- map_symbols = map_symbols ,
808- schema = schema ,
809- count = 2 ** 16 ,
810- )
815+ record_type = SCHEMA_STRUCT_MAP [schema ]
816+ record_iter = filter (lambda r : isinstance (r , record_type ), self )
811817
812- with open (path , "x" , newline = "" ) as csv_file :
813- for i , frame in enumerate (df_iter ):
814- frame .to_csv (
815- csv_file ,
816- header = (i == 0 ),
817- )
818+ if map_symbols :
819+ self ._instrument_map .insert_metadata (self .metadata )
820+ symbol_map = self ._instrument_map ._data
821+ else :
822+ symbol_map = None
823+
824+ with open (path , "xb" ) as output :
825+ self ._transcode (
826+ output = output ,
827+ records_iter = record_iter ,
828+ encoding = Encoding .CSV ,
829+ pretty_px = pretty_px ,
830+ pretty_ts = pretty_ts ,
831+ symbol_map = symbol_map ,
832+ compression = compression ,
833+ schema = schema ,
834+ )
818835
819836 @overload
820837 def to_df (
@@ -965,6 +982,7 @@ def to_json(
965982 pretty_px : bool = True ,
966983 pretty_ts : bool = True ,
967984 map_symbols : bool = True ,
985+ compression : Compression | str = Compression .NONE ,
968986 schema : Schema | str | None = None ,
969987 ) -> None :
970988 """
@@ -984,6 +1002,8 @@ def to_json(
9841002 If symbology mappings from the metadata should be used to create
9851003 a 'symbol' column, mapping the instrument ID to its requested symbol for
9861004 every record.
1005+ compression : Compression or str, default `Compression.NONE`
1006+ The output compression for writing.
9871007 schema : Schema or str, optional
9881008 The schema for the json.
9891009 This is only required when reading a DBN stream with mixed record types.
@@ -998,27 +1018,33 @@ def to_json(
9981018 Requires all the data to be brought up into memory to then be written.
9991019
10001020 """
1001- price_type : Literal ["fixed" , "float" ] = "fixed"
1002- if pretty_px is True :
1003- price_type = "float"
1021+ compression = validate_enum (compression , Compression , "compression" )
1022+ schema = validate_maybe_enum (schema , Schema , "schema" )
1023+ if schema is None :
1024+ if self .schema is None :
1025+ raise ValueError ("a schema must be specified for mixed DBN data" )
1026+ schema = self .schema
10041027
1005- df_iter = self .to_df (
1006- price_type = price_type ,
1007- pretty_ts = pretty_ts ,
1008- map_symbols = map_symbols ,
1009- schema = schema ,
1010- count = 2 ** 16 ,
1011- )
1028+ record_type = SCHEMA_STRUCT_MAP [schema ]
1029+ record_iter = filter (lambda r : isinstance (r , record_type ), self )
10121030
1013- with open (path , "x" ) as json_path :
1014- for frame in df_iter :
1015- frame .reset_index (inplace = True )
1016- frame .to_json (
1017- json_path ,
1018- orient = "records" ,
1019- date_unit = "ns" ,
1020- lines = True ,
1021- )
1031+ if map_symbols :
1032+ self ._instrument_map .insert_metadata (self .metadata )
1033+ symbol_map = self ._instrument_map ._data
1034+ else :
1035+ symbol_map = None
1036+
1037+ with open (path , "xb" ) as output :
1038+ self ._transcode (
1039+ output = output ,
1040+ records_iter = record_iter ,
1041+ encoding = Encoding .JSON ,
1042+ pretty_px = pretty_px ,
1043+ pretty_ts = pretty_ts ,
1044+ symbol_map = symbol_map ,
1045+ compression = compression ,
1046+ schema = schema ,
1047+ )
10221048
10231049 @overload
10241050 def to_ndarray ( # type: ignore [misc]
@@ -1085,6 +1111,35 @@ def to_ndarray(
10851111
10861112 return ndarray_iter
10871113
1114+ def _transcode (
1115+ self ,
1116+ output : BinaryIO ,
1117+ records_iter : Iterator [DBNRecord ],
1118+ encoding : Encoding ,
1119+ pretty_px : bool ,
1120+ pretty_ts : bool ,
1121+ symbol_map : dict [int , list [SymbolInterval ]] | None ,
1122+ compression : Compression ,
1123+ schema : Schema ,
1124+ ) -> None :
1125+ transcoder = Transcoder (
1126+ file = output ,
1127+ encoding = encoding ,
1128+ compression = compression ,
1129+ pretty_px = pretty_px ,
1130+ pretty_ts = pretty_ts ,
1131+ has_metadata = True ,
1132+ input_compression = Compression .NONE ,
1133+ symbol_map = symbol_map , # type: ignore [arg-type]
1134+ schema = schema ,
1135+ )
1136+
1137+ transcoder .write (bytes (self .metadata ))
1138+ for records in chunk (records_iter , 2 ** 16 ):
1139+ for record in records :
1140+ transcoder .write (bytes (record ))
1141+ transcoder .flush ()
1142+
10881143
10891144class NDArrayIterator :
10901145 def __init__ (
0 commit comments