99from databento .common .data import (
1010 COLUMNS ,
1111 DEFINITION_CHARARRAY_COLUMNS ,
12+ DEFINITION_PRICE_COLUMNS ,
13+ DEFINITION_TYPE_MAX_MAP ,
1214 DERIV_SCHEMAS ,
1315 STRUCT_MAP ,
1416)
@@ -442,8 +444,20 @@ def to_df(
442444 """
443445 df = pd .DataFrame (self .to_ndarray ())
444446 df .set_index (self ._get_index_column (), inplace = True )
447+ df = self ._cleanup_dataframe (df )
445448
446- # Cleanup dataframe
449+ if pretty_ts :
450+ df = self ._apply_pretty_ts (df )
451+
452+ if pretty_px :
453+ df = self ._apply_pretty_px (df )
454+
455+ if map_symbols and self .schema != Schema .DEFINITION :
456+ df = self ._map_symbols (df , pretty_ts )
457+
458+ return df
459+
460+ def _cleanup_dataframe (self , df : pd .DataFrame ) -> pd .DataFrame :
447461 df .drop (["length" , "rtype" ], axis = 1 , inplace = True )
448462 if self .schema == Schema .MBO or self .schema in DERIV_SCHEMAS :
449463 df = df .reindex (columns = COLUMNS [self .schema ])
@@ -453,39 +467,52 @@ def to_df(
453467 elif self .schema == Schema .DEFINITION :
454468 for column in DEFINITION_CHARARRAY_COLUMNS :
455469 df [column ] = df [column ].str .decode ("utf-8" )
470+ for column , type_max in DEFINITION_TYPE_MAX_MAP .items ():
471+ if column in df .columns :
472+ df [column ] = df [column ].where (df [column ] != type_max , np .nan )
456473
457- if pretty_ts :
458- df .index = pd .to_datetime (df .index , utc = True )
459- for column in df .columns :
460- if column .startswith ("ts_" ) and "delta" not in column :
461- df [column ] = pd .to_datetime (df [column ], utc = True )
474+ return df
462475
463- if self .schema == Schema .DEFINITION :
464- df ["expiration" ] = pd .to_datetime (df ["expiration" ], utc = True )
465- df ["activation" ] = pd .to_datetime (df ["activation" ], utc = True )
476+ def _apply_pretty_ts (self , df : pd .DataFrame ) -> pd .DataFrame :
477+ df .index = pd .to_datetime (df .index , utc = True )
478+ for column in df .columns :
479+ if column .startswith ("ts_" ) and "delta" not in column :
480+ df [column ] = pd .to_datetime (df [column ], utc = True )
466481
467- if pretty_px :
468- for column in list (df .columns ):
469- if (
470- column in ("price" , "open" , "high" , "low" , "close" )
471- or column .startswith ("bid_px" ) # MBP
472- or column .startswith ("ask_px" ) # MBP
473- ):
474- df [column ] = df [column ] * 1e-9
475-
476- if map_symbols :
477- # Build product ID index
478- if not self ._product_id_index :
479- self ._product_id_index = self ._build_product_id_index ()
480-
481- # Map product IDs to native symbols
482- if self ._product_id_index :
483- df_index = df .index if pretty_ts else pd .to_datetime (df .index , utc = True )
484- dates = [ts .date () for ts in df_index ]
485- df ["symbol" ] = [
486- self ._product_id_index [dates [i ]][p ]
487- for i , p in enumerate (df ["product_id" ])
488- ]
482+ if self .schema == Schema .DEFINITION :
483+ df ["expiration" ] = pd .to_datetime (df ["expiration" ], utc = True )
484+ df ["activation" ] = pd .to_datetime (df ["activation" ], utc = True )
485+
486+ return df
487+
488+ def _apply_pretty_px (self , df : pd .DataFrame ) -> pd .DataFrame :
489+ for column in list (df .columns ):
490+ if (
491+ column in ("price" , "open" , "high" , "low" , "close" )
492+ or column .startswith ("bid_px" ) # MBP
493+ or column .startswith ("ask_px" ) # MBP
494+ ):
495+ df [column ] = df [column ] * 1e-9
496+
497+ if self .schema == Schema .DEFINITION :
498+ for column in DEFINITION_PRICE_COLUMNS :
499+ df [column ] = df [column ] * 1e-9
500+
501+ return df
502+
503+ def _map_symbols (self , df : pd .DataFrame , pretty_ts : bool ) -> pd .DataFrame :
504+ # Build product ID index
505+ if not self ._product_id_index :
506+ self ._product_id_index = self ._build_product_id_index ()
507+
508+ # Map product IDs to native symbols
509+ if self ._product_id_index :
510+ df_index = df .index if pretty_ts else pd .to_datetime (df .index , utc = True )
511+ dates = [ts .date () for ts in df_index ]
512+ df ["symbol" ] = [
513+ self ._product_id_index [dates [i ]][p ]
514+ for i , p in enumerate (df ["product_id" ])
515+ ]
489516
490517 return df
491518
0 commit comments