@@ -372,7 +372,7 @@ def to_images(
372372 if merge :
373373 nn = len (df .columns ) // 2
374374 nn += nn % 2
375- fig , axs = plt .subplots (nn , 2 , figsize = (12 , 3 * nn ))
375+ fig , axs = plt .subplots (nn , 2 , figsize = (12 , 3 * nn * df . shape [ 0 ] / 12 ))
376376 pos = 0
377377 for c in loop :
378378 ax = axs [pos // 2 , pos % 2 ]
@@ -455,6 +455,7 @@ def __init__(
455455 ]
456456 ] = None ,
457457 fill_missing : Optional [Sequence [Tuple [str , Any ]]] = None ,
458+ keep_last_date : bool = False ,
458459 ):
459460 self ._data = data
460461 self ._time = time
@@ -464,6 +465,7 @@ def __init__(
464465 self .recent = recent
465466 self ._formulas = formulas
466467 self .fill_missing = fill_missing
468+ self .keep_last_date = keep_last_date
467469
468470 def post_load_process_piece (
469471 self , df : pandas .DataFrame , unique : bool = False
@@ -613,6 +615,16 @@ def load(self, verbose: int = 0):
613615 if self .keys_with_nans :
614616 print (f"[CubeLogs.load] keys_with_nans={ self .keys_with_nans } " )
615617 self .data [self .time ] = pandas .to_datetime (self .data [self .time ])
618+
619+ if self .keep_last_date :
620+ times = self .data [self .time ].dropna ()
621+ mi , mx = times .min (), times .max ()
622+ if mi != mx :
623+ print (f"[CubeLogs.load] setting all dates in column { self .time } to { mx !r} " )
624+ self .data .loc [~ self .data [self .time ].isna (), self .time ] = mx
625+ self .values_for_key [self .time ] = {mx }
626+ if self .data [self .time ].isna ().max ():
627+ self .values_for_key [self .time ].add (np .nan )
616628 if verbose :
617629 print (f"[CubeLogs.load] done, shape={ self .shape } " )
618630 return self
@@ -821,11 +833,6 @@ def view(
821833 unique = set ()
822834
823835 _md = lambda s : {k : v for k , v in self .values_for_key .items () if k in s } # noqa: E731
824- assert key_index , (
825- f"view_def.name={ view_def .name !r} , "
826- f"key_index should not be empty, got initially { key_index0 !r} , "
827- f"unique={ _md (key_index0 )} "
828- )
829836 all_cols = set (key_columns ) | set (key_index ) | set (key_agg ) | unique
830837 assert all_cols == set (self .keys_time ), (
831838 f"view_def.name={ view_def .name !r} , "
@@ -870,12 +877,6 @@ def view(
870877 key_columns = [c for c in key_columns if c not in seti ]
871878 values = [c for c in values if c not in seti ]
872879
873- assert key_index , (
874- f"view_def.name={ view_def .name !r} , view_def={ view_def } , "
875- f"key_index is empty, key_columns={ key_columns } , value={ values } , "
876- f"columns={ data .columns } ,shape={ data .shape } "
877- )
878-
879880 # final verification
880881 if verbose :
881882 print (f"[CubeLogs.view] key_index={ key_index } " )
@@ -896,7 +897,14 @@ def view(
896897 # pivot
897898 if verbose :
898899 print (f"[CubeLogs.view] values={ values } " )
899- piv = data .pivot (index = key_index [::- 1 ], columns = key_columns , values = values )
900+ if key_index :
901+ piv = data .pivot (index = key_index [::- 1 ], columns = key_columns , values = values )
902+ else :
903+ # pivot does return the same rank with it is empty.
904+ # Let's add arficially one
905+ data = data .copy ()
906+ data ["ALL" ] = "ALL"
907+ piv = data .pivot (index = ["ALL" ], columns = key_columns , values = values )
900908 if isinstance (piv , pandas .Series ):
901909 piv = piv .to_frame (name = "series" )
902910 names = list (piv .columns .names )
@@ -1106,7 +1114,7 @@ def to_excel(
11061114 if memory > 2 ** 22 :
11071115 msg = (
11081116 f"[CubeLogs.to_excel] skipping { name !r} , "
1109- f"too big for excel { memory } bytes"
1117+ f"too big for excel with { memory } bytes"
11101118 )
11111119 if verbose :
11121120 print (msg )
@@ -1123,13 +1131,26 @@ def to_excel(
11231131 plots .append (CubePlot (df , kind = "barh" , orientation = "row" , split = True ))
11241132 if raw :
11251133 assert main not in views , f"{ main !r} is duplicated in views { sorted (views )} "
1126- if verbose :
1127- print (f"[CubeLogs.to_excel] add sheet { raw !r} with shape { self .shape } " )
1128- self .data .to_excel (writer , sheet_name = raw , freeze_panes = (1 , 1 ), index = True )
11291134 # Too long.
11301135 # self._apply_excel_style(raw, writer, self.data)
11311136 if csv and "raw" in csv :
11321137 df .reset_index (drop = False ).to_csv (f"{ output } .raw.csv" , index = False )
1138+ memory = df .memory_usage (deep = True ).sum ()
1139+ if memory > 2 ** 22 :
1140+ msg = (
1141+ f"[CubeLogs.to_excel] skipping 'raw', "
1142+ f"too big for excel with { memory } bytes"
1143+ )
1144+ if verbose :
1145+ print (msg )
1146+ else :
1147+ warnings .warn (msg , category = RuntimeWarning , stacklevel = 0 )
1148+ else :
1149+ if verbose :
1150+ print (f"[CubeLogs.to_excel] add sheet 'raw' with shape { self .shape } " )
1151+ self .data .to_excel (
1152+ writer , sheet_name = "raw" , freeze_panes = (1 , 1 ), index = True
1153+ )
11331154
11341155 if plots :
11351156 from openpyxl .drawing .image import Image
@@ -1236,6 +1257,7 @@ def __init__(
12361257 "time_export_unbiased" ,
12371258 ),
12381259 fill_missing : Optional [Sequence [Tuple [str , Any ]]] = (("model_attn_impl" , "eager" ),),
1260+ keep_last_date : bool = False ,
12391261 ):
12401262 super ().__init__ (
12411263 data = data ,
@@ -1246,6 +1268,7 @@ def __init__(
12461268 recent = recent ,
12471269 formulas = formulas ,
12481270 fill_missing = fill_missing ,
1271+ keep_last_date = keep_last_date ,
12491272 )
12501273
12511274 def _process_formula (
@@ -1577,6 +1600,34 @@ def mean_geo(gr):
15771600 keep_columns_in_index = ["suite" ],
15781601 name = "agg-suite" ,
15791602 order = order ,
1603+ ),
1604+ "agg-all" : lambda : CubeViewDef (
1605+ key_index = index_cols ,
1606+ values = self ._filter_column (
1607+ [
1608+ "TIME_ITER" ,
1609+ "speedup" ,
1610+ "time_latency" ,
1611+ "time_latency_eager" ,
1612+ "time_export_success" ,
1613+ "time_export_unbiased" ,
1614+ "^n_.*" ,
1615+ "target_opset" ,
1616+ "onnx_filesize" ,
1617+ "onnx_weight_size_torch" ,
1618+ "onnx_weight_size_proto" ,
1619+ "onnx_n_nodes" ,
1620+ "peak_gpu_torch" ,
1621+ "peak_gpu_nvidia" ,
1622+ ],
1623+ self .values ,
1624+ ),
1625+ ignore_unique = True ,
1626+ key_agg = ["model_name" , "task" , "model_task" , "suite" ],
1627+ agg_args = lambda column_name : "sum" if column_name .startswith ("n_" ) else "mean" ,
1628+ agg_multi = {"speedup_weighted" : mean_weight , "speedup_geo" : mean_geo },
1629+ name = "agg-all" ,
1630+ order = order ,
15801631 plots = True ,
15811632 ),
15821633 "disc" : lambda : CubeViewDef (
0 commit comments