@@ -301,14 +301,13 @@ def generate_categorized_rows() -> Generator[Tuple, None, None]:
301301 np .fromiter (generate_categorized_rows (), dtype = types ),
302302 )
303303
304- experiment_dtype = pd .CategoricalDtype (categories = label_mapping )
305- df [index_column_name ] = pd .Categorical .from_codes (df [index_column_name ], dtype = experiment_dtype )
306304 if timestamp_column_name :
307305 df [timestamp_column_name ] = pd .to_datetime (df [timestamp_column_name ], unit = "ms" , origin = "unix" , utc = True )
308306
309- df = _pivot_and_reindex_df (df , include_point_previews , index_column_name , timestamp_column_name )
307+ df = _pivot_df (df , include_point_previews , index_column_name , timestamp_column_name )
308+ df = _restore_labels_in_index (df , index_column_name , label_mapping )
310309 df = _restore_path_column_names (df , path_mapping , "float_series" if type_suffix_in_column_names else None )
311- df = _sort_indices (df )
310+ df = _sort_index_and_columns (df , index_column_name )
312311
313312 return df
314313
@@ -390,14 +389,13 @@ def generate_categorized_rows() -> Generator[Tuple, None, None]:
390389 np .fromiter (generate_categorized_rows (), dtype = types ),
391390 )
392391
393- experiment_dtype = pd .CategoricalDtype (categories = label_mapping )
394- df [index_column_name ] = pd .Categorical .from_codes (df [index_column_name ], dtype = experiment_dtype )
395392 if timestamp_column_name :
396393 df [timestamp_column_name ] = pd .to_datetime (df [timestamp_column_name ], unit = "ms" , origin = "unix" , utc = True )
397394
398- df = _pivot_and_reindex_df (df , False , index_column_name , timestamp_column_name )
395+ df = _pivot_df (df , False , index_column_name , timestamp_column_name )
396+ df = _restore_labels_in_index (df , index_column_name , label_mapping )
399397 df = _restore_path_column_names (df , path_mapping , None )
400- df = _sort_indices (df )
398+ df = _sort_index_and_columns (df , index_column_name )
401399
402400 return df
403401
@@ -461,8 +459,6 @@ def generate_categorized_rows() -> Generator[Tuple, None, None]:
461459 df = pd .DataFrame (
462460 np .fromiter (generate_categorized_rows (), dtype = types ),
463461 )
464- experiment_dtype = pd .CategoricalDtype (categories = label_mapping )
465- df [container_column_name ] = pd .Categorical .from_codes (df [container_column_name ], dtype = experiment_dtype )
466462
467463 df ["bucket" ] = pd .IntervalIndex .from_arrays (df ["from_x" ], df ["to_x" ], closed = "right" )
468464 df = df .drop (columns = ["from_x" , "to_x" ])
@@ -475,11 +471,8 @@ def generate_categorized_rows() -> Generator[Tuple, None, None]:
475471 dropna = False ,
476472 sort = False ,
477473 )
478- df .columns = df .columns .set_levels (
479- df .columns .get_level_values (container_column_name ).unique ().astype (str ),
480- level = container_column_name ,
481- )
482474
475+ df = _restore_labels_in_columns (df , container_column_name , label_mapping )
483476 df = _restore_path_column_names (df , path_mapping , None )
484477
485478 # Clear out any columns that were not requested, but got added because of dropna=False
@@ -546,7 +539,7 @@ def _collapse_open_buckets(df: pd.DataFrame) -> pd.DataFrame:
546539 return df
547540
548541
549- def _pivot_and_reindex_df (
542+ def _pivot_df (
550543 df : pd .DataFrame ,
551544 include_point_previews : bool ,
552545 index_column_name : str ,
@@ -559,7 +552,7 @@ def _pivot_and_reindex_df(
559552 df [[index_column_name , "step" ]]
560553 .astype (
561554 {
562- index_column_name : "category " ,
555+ index_column_name : "uint32 " ,
563556 "step" : "float64" ,
564557 }
565558 )
@@ -588,15 +581,31 @@ def _pivot_and_reindex_df(
588581 )
589582
590583 # Include only observed (experiment, step) pairs
591- df = df .filter (observed_idx , axis = "index" )
584+ return df .filter (observed_idx , axis = "index" )
592585
593- # Replace categorical codes in `index_column_name` with strings
594- df .index = df .index .set_levels (
595- df .index .get_level_values (index_column_name ).unique ().astype (str ),
596- level = index_column_name ,
597- )
598586
599- return df .sort_index (level = [index_column_name , "step" ])
587+ def _restore_labels_in_index (
588+ df : pd .DataFrame ,
589+ column_name : str ,
590+ label_mapping : list [str ],
591+ ) -> pd .DataFrame :
592+ if df .index .empty :
593+ df .index = df .index .set_levels (df .index .get_level_values (column_name ).astype (str ), level = column_name )
594+ return df
595+
596+ return df .rename (index = {i : label for i , label in enumerate (label_mapping )}, level = column_name )
597+
598+
599+ def _restore_labels_in_columns (
600+ df : pd .DataFrame ,
601+ column_name : str ,
602+ label_mapping : list [str ],
603+ ) -> pd .DataFrame :
604+ if df .index .empty :
605+ df .columns = df .columns .set_levels (df .columns .get_level_values (column_name ).astype (str ), level = column_name )
606+ return df
607+
608+ return df .rename (columns = {i : label for i , label in enumerate (label_mapping )}, level = column_name )
600609
601610
602611def _restore_path_column_names (
@@ -622,16 +631,18 @@ def _restore_path_column_names(
622631 return df .rename (columns = reverse_mapping )
623632
624633
625- def _sort_indices (df : pd .DataFrame ) -> pd .DataFrame :
634+ def _sort_index_and_columns (df : pd .DataFrame , index_column_name : str ) -> pd .DataFrame :
626635 # MultiIndex DFs need to have column index order swapped: value/metric_name -> metric_name/value.
627636 # We also sort columns, but only after the original names have been restored.
628637 if isinstance (df .columns , pd .MultiIndex ):
629638 df .columns .names = (None , None )
630639 df = df .swaplevel (axis = "columns" )
631- return df .sort_index (axis = "columns" , level = 0 )
640+ df = df .sort_index (axis = "columns" , level = 0 )
632641 else :
633642 df .columns .name = None
634- return df .sort_index (axis = "columns" )
643+ df = df .sort_index (axis = "columns" )
644+
645+ return df .sort_index (axis = "index" , level = [index_column_name , "step" ])
635646
636647
637648def create_files_dataframe (
0 commit comments