@@ -932,6 +932,17 @@ def view(
932932 else :
933933 piv .sort_index (inplace = True , axis = 1 )
934934
935+ # final step, force columns with numerical values to be float
936+ for c in list (piv .columns ):
937+ s = piv [c ]
938+ if not pandas .api .types .is_object_dtype (s ):
939+ continue
940+ try :
941+ sf = s .astype (float )
942+ except (ValueError , TypeError ):
943+ continue
944+ piv [c ] = sf
945+
935946 if verbose :
936947 print (f"[CubeLogs.view] levels { piv .index .names } , { piv .columns .names } " )
937948 print (f"[CubeLogs.view] -- done view { view_def .name !r} " )
@@ -974,7 +985,9 @@ def _dropna(
974985 for c in set (key_index ) | set (key_columns ):
975986 s = new_data [c ]
976987 if s .isna ().max ():
977- if pandas .api .types .is_numeric_dtype (s ):
988+ if pandas .api .types .is_numeric_dtype (
989+ s
990+ ) and not pandas .api .types .is_object_dtype (s ):
978991 min_v = s .dropna ().min ()
979992 assert (
980993 min_v >= 0
@@ -1011,7 +1024,7 @@ def describe(self) -> pandas.DataFrame:
10111024 )
10121025 if len (nonan ) > 0 :
10131026 obs .update (dict (count = len (nonan )))
1014- if is_numeric_dtype (nonan ):
1027+ if is_numeric_dtype (nonan ) and not pandas . api . types . is_object_dtype ( nonan ) :
10151028 obs .update (
10161029 dict (
10171030 min = nonan .min (),
@@ -1048,6 +1061,7 @@ def to_excel(
10481061 verbose : int = 0 ,
10491062 csv : Optional [Sequence [str ]] = None ,
10501063 time_mask : bool = False ,
1064+ sbs : Optional [Dict [str , Dict [str , Any ]]] = None ,
10511065 ):
10521066 """
10531067 Creates an excel file with a list of views.
@@ -1061,6 +1075,9 @@ def to_excel(
10611075 :param time_mask: color the background of the cells if one
10621076 of the value for the last date is unexpected,
10631077 assuming they should remain stale
1078+ :param sbs: configurations to compare side-by-side, this adds two tabs,
1079+ one gathering raw data about the two configurations, the other one
1080+ is aggregated by metrics
10641081 """
10651082 if verbose :
10661083 print (f"[CubeLogs.to_excel] create Excel file { output } , shape={ self .shape } " )
@@ -1175,6 +1192,36 @@ def to_excel(
11751192 writer , sheet_name = "raw" , freeze_panes = (1 , 1 ), index = True
11761193 )
11771194
1195+ if sbs :
1196+ if verbose :
1197+ for k , v in sbs .items ():
1198+ print (f"[CubeLogs.to_excel] sbs { k } : { v } " )
1199+ sbs_raw , sbs_agg = self .sbs (sbs )
1200+ if verbose :
1201+ print (f"[CubeLogs.to_excel] add sheet { name !r} with shape { sbs_raw .shape } " )
1202+ print (
1203+ f"[CubeLogs.to_excel] add sheet '{ name } -AGG' "
1204+ f"with shape { sbs_agg .shape } "
1205+ )
1206+ name = "∧" .join (sbs )
1207+ sbs_raw = sbs_raw .reset_index (drop = False )
1208+ sbs_raw .to_excel (
1209+ writer ,
1210+ sheet_name = name ,
1211+ freeze_panes = (
1212+ sbs_raw .columns .nlevels + sbs_raw .index .nlevels ,
1213+ sbs_raw .index .nlevels ,
1214+ ),
1215+ )
1216+ sbs_agg .to_excel (
1217+ writer ,
1218+ sheet_name = f"{ name } -AGG" ,
1219+ freeze_panes = (
1220+ sbs_agg .columns .nlevels + sbs_agg .index .nlevels ,
1221+ sbs_agg .index .nlevels ,
1222+ ),
1223+ )
1224+
11781225 if plots :
11791226 from openpyxl .drawing .image import Image
11801227
@@ -1206,7 +1253,9 @@ def to_excel(
12061253
12071254 if verbose :
12081255 print (f"[CubeLogs.to_excel] applies style to { output !r} " )
1209- apply_excel_style (writer , f_highlights , time_mask_view = time_mask_view ) # type: ignore[arg-type]
1256+ apply_excel_style ( # type: ignore[arg-type]
1257+ writer , f_highlights , time_mask_view = time_mask_view , verbose = verbose
1258+ )
12101259 if verbose :
12111260 print (f"[CubeLogs.to_excel] done with { len (views )} views" )
12121261
@@ -1265,15 +1314,19 @@ def sbs(
12651314 :param column_name: column to add with the name of the configuration
12661315 :return: data and aggregated date
12671316 """
1317+ assert (
1318+ len (configs ) >= 2
1319+ ), f"A side by side needs at least two configs but configs={ configs } "
12681320 set_keys_time = set (self .keys_time )
12691321 columns_index = None
12701322 data_list = []
12711323 for name_conf , conf in configs .items ():
12721324 if columns_index is None :
12731325 columns_index = list (conf .keys ())
1274- assert (
1275- set (columns_index ) <= set_keys_time
1276- ), f"Configuration { conf } includes columns outside the keys."
1326+ assert set (columns_index ) <= set_keys_time , (
1327+ f"Configuration { conf } includes columns outside the keys "
1328+ f"{ ', ' .join (sorted (set_keys_time ))} "
1329+ )
12771330 else :
12781331 assert set (columns_index ) == set (conf ), (
12791332 f"Every conf should share the same keys but conf={ conf } "
@@ -1294,57 +1347,94 @@ def sbs(
12941347 cube = self .clone (new_data , keys = [* self .keys_no_time , column_name ])
12951348 key_index = set (self .keys_time ) - {* columns_index , column_name } # type: ignore[misc]
12961349 view = CubeViewDef (key_index = set (key_index ), name = "sbs" , values = cube .values ) # type: ignore[arg-type]
1297- res = cube .view (view )
1298- res = res .stack ("METRICS" , future_stack = True ) # type: ignore[union-attr]
1299- res = res .reorder_levels (
1300- [res .index .nlevels - 1 , * list (range (res .index .nlevels - 1 ))]
1301- ).sort_index ()
1350+ view_res = cube .view (view )
13021351
13031352 # add metrics
1304- index = list (res .columns .names ).index (column_name )
1353+ index_column_name = list (view_res .columns .names ).index (column_name )
1354+ index_metrics = list (view_res .columns .names ).index ("METRICS" )
13051355
1306- def _mkc (s , index = index ):
1307- c = ["" for c in res .columns .names ]
1308- c [index ] = s
1356+ def _mkc (m , s ):
1357+ c = ["" for c in view_res .columns .names ]
1358+ c [index_column_name ] = s
1359+ c [index_metrics ] = m
13091360 return tuple (c )
13101361
1311- n_conf = res .shape [1 ]
1312- mean_columns = list (res .columns )
1362+ list_configs = list (configs .items ())
1363+ mean_columns = [
1364+ c
1365+ for c in view_res .columns
1366+ if pandas .api .types .is_numeric_dtype (view_res [c ])
1367+ and not pandas .api .types .is_object_dtype (view_res [c ])
1368+ ]
1369+ assert mean_columns , f"No numerical columns in { view_res .dtypes } "
1370+ view_res = view_res [mean_columns ].copy ()
1371+ metrics = sorted (set (c [index_metrics ] for c in view_res .columns ))
1372+ assert metrics , (
1373+ f"No numerical metrics detected in "
1374+ f"view_res.columns.names={ view_res .columns .names } , "
1375+ f"columns={ view_res .dtypes } "
1376+ )
13131377 sum_columns = []
1314- for i in range (n_conf ):
1315- c1 = res .columns [i ]
1316- n1 = c1 [index ]
1317- if not pandas .api .types .is_numeric_dtype (res [c1 ].dtype ):
1318- continue
1319- for j in range (i + 1 , n_conf ):
1320- c2 = res .columns [j ]
1321- n2 = c2 [index ]
1322- if not pandas .api .types .is_numeric_dtype (res [c2 ].dtype ):
1323- continue
1324- res [_mkc (f"∅{ n1 } ∧∅{ n2 } " )] = (res [c1 ].isna () & res [c2 ].isna ()).astype (int )
1325- res [_mkc (f"∅{ n1 } ∧{ n2 } " )] = (res [c1 ].isna () & ~ res [c2 ].isna ()).astype (int )
1326- res [_mkc (f"{ n1 } ∧∅{ n2 } " )] = (~ res [c1 ].isna () & res [c2 ].isna ()).astype (int )
1327- res [_mkc (f"{ n1 } ∧{ n2 } " )] = (~ res [c1 ].isna () & ~ res [c2 ].isna ()).astype (int )
1328- res [_mkc (f"{ n1 } <{ n2 } " )] = (res [c1 ] < res [c2 ]).astype (int )
1329- res [_mkc (f"{ n1 } >{ n2 } " )] = (res [c1 ] > res [c2 ]).astype (int )
1330- sum_columns .extend (
1331- [
1332- _mkc (f"∅{ n1 } ∧∅{ n2 } " ),
1333- _mkc (f"∅{ n1 } ∧{ n2 } " ),
1334- _mkc (f"{ n1 } ∧∅{ n2 } " ),
1335- _mkc (f"{ n1 } ∧{ n2 } " ),
1336- _mkc (f"{ n1 } <{ n2 } " ),
1337- _mkc (f"{ n1 } >{ n2 } " ),
1338- ]
1339- )
1378+ columns_to_add = []
1379+ for i in range (len (list_configs )):
1380+ for j in range (i + 1 , len (list_configs )):
1381+ for m in metrics :
1382+ iname , ci = list_configs [i ]
1383+ jname , cj = list_configs [j ]
1384+ ci = ci .copy ()
1385+ cj = cj .copy ()
1386+ ci ["METRICS" ] = m
1387+ cj ["METRICS" ] = m
1388+ ci ["CONF" ] = iname
1389+ cj ["CONF" ] = jname
1390+
1391+ ci_name = tuple (ci [n ] for n in view_res .columns .names )
1392+ cj_name = tuple (cj [n ] for n in view_res .columns .names )
1393+ assert ci_name in view_res .columns or cj_name in view_res .columns , (
1394+ f"Unable to find column { ci_name } or { cj_name } "
1395+ f"in columns { view_res .columns } , metrics={ metrics } "
1396+ )
1397+ if ci_name not in view_res .columns or cj_name not in view_res .columns :
1398+ # One config does not have such metric.
1399+ continue
1400+
1401+ si = view_res [ci_name ]
1402+ sj = view_res [cj_name ]
1403+
1404+ sinan = si .isna ()
1405+ sjnan = sj .isna ()
1406+ n1 = iname
1407+ n2 = jname
1408+ nas = pandas .DataFrame (
1409+ {
1410+ _mkc (m , f"∅{ n1 } ∧∅{ n2 } " ): (sinan & sjnan ).astype (int ),
1411+ _mkc (m , f"∅{ n1 } ∧{ n2 } " ): (sinan & ~ sjnan ).astype (int ),
1412+ _mkc (m , f"{ n1 } ∧∅{ n2 } " ): (~ sinan & sjnan ).astype (int ),
1413+ _mkc (m , f"{ n1 } ∧{ n2 } " ): (~ sinan & ~ sjnan ).astype (int ),
1414+ _mkc (m , f"{ n1 } <{ n2 } " ): (si < sj ).astype (int ),
1415+ _mkc (m , f"{ n1 } =={ n2 } " ): (si == sj ).astype (int ),
1416+ _mkc (m , f"{ n1 } >{ n2 } " ): (si > sj ).astype (int ),
1417+ }
1418+ )
1419+ nas .columns .names = view_res .columns .names
1420+ columns_to_add .append (nas )
1421+ sum_columns .extend (nas .columns )
13401422
13411423 # aggregated metrics
13421424 aggs = {
13431425 ** {k : "mean" for k in mean_columns }, # noqa: C420
13441426 ** {k : "sum" for k in sum_columns }, # noqa: C420
13451427 }
1346- agg = res .reset_index (level = "METRICS" ).groupby ("METRICS" ).agg (aggs )
1347- return res , agg
1428+ view_res = pandas .concat ([view_res , * columns_to_add ], axis = 1 )
1429+ res = view_res .stack ("METRICS" , future_stack = True ) # type: ignore[union-attr]
1430+ res = res .reorder_levels (
1431+ [res .index .nlevels - 1 , * list (range (res .index .nlevels - 1 ))]
1432+ ).sort_index ()
1433+
1434+ view_res ["GROUPBY" ] = "A"
1435+ flat = view_res .groupby ("GROUPBY" ).agg (aggs ).reset_index (drop = True )
1436+ flat = flat .stack ("METRICS" , future_stack = True ).droplevel (None , axis = 0 )
1437+ return res , flat
13481438
13491439
13501440class CubeLogsPerformance (CubeLogs ):
0 commit comments