@@ -425,15 +425,17 @@ def __init__(
425425 self .fill_missing = fill_missing
426426 self .keep_last_date = keep_last_date
427427
428- def clone (self , data : Optional [pandas .DataFrame ] = None ) -> "CubeLogs" :
428+ def clone (
429+ self , data : Optional [pandas .DataFrame ] = None , keys : Optional [Sequence [str ]] = None
430+ ) -> "CubeLogs" :
429431 """
430432 Makes a copy of the dataframe.
431433 It copies the processed data not the original one.
432434 """
433435 cube = self .__class__ (
434436 data if data is not None else self .data .copy (),
435437 time = self .time ,
436- keys = self .keys_no_time ,
438+ keys = keys or self .keys_no_time ,
437439 values = self .values ,
438440 )
439441 cube .load ()
@@ -1248,16 +1250,25 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
12481250 )
12491251 return self .clone (data = dgr .reset_index (drop = False ))
12501252
1251- def sbs (self , configs : Sequence [Dict [str , Any ]]) -> pandas .DataFrame :
1253+ def sbs (
1254+ self , configs : Dict [str , Dict [str , Any ]], column_name : str = "CONF"
1255+ ) -> Tuple [pandas .DataFrame , pandas .DataFrame ]:
12521256 """
12531257 Creates a side-by-side for two configurations.
12541258 Every configuration a dictionary column:value which filters in
12551259 the rows to keep in order to compute the side by side.
1260+ Every configuration is given a name (the key in configs),
1261+ it is added in column column_name.
1262+
1263+ :param configs: example
1264+ ``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
1265+ :param column_name: column to add with the name of the configuration
1266+ :return: data and aggregated date
12561267 """
12571268 set_keys_time = set (self .keys_time )
12581269 columns_index = None
12591270 datas = []
1260- for conf in configs :
1271+ for name_conf , conf in configs . items () :
12611272 if columns_index is None :
12621273 columns_index = list (conf .keys ())
12631274 assert (
@@ -1272,18 +1283,68 @@ def sbs(self, configs: Sequence[Dict[str, Any]]) -> pandas.DataFrame:
12721283 for k , v in conf .items ():
12731284 data = data [data [k ] == v ]
12741285 assert data .shape [0 ] > 0 , f"No rows found for conf={ conf } "
1275- datas .append ((conf , data ))
1286+ assert (
1287+ column_name not in data .columns
1288+ ), f"column_name={ column_name !r} is already in { data .columns } "
1289+ data = data .copy ()
1290+ data [column_name ] = name_conf
1291+ datas .append (data )
12761292
1277- new_data = pandas .concat ([ d [ 1 ] for d in datas ] , axis = 0 )
1278- cube = self .clone (new_data )
1279- key_index = {c for c in self .keys_time if c not in set ( columns_index ) }
1293+ new_data = pandas .concat (datas , axis = 0 )
1294+ cube = self .clone (new_data , keys = [ * self . keys_no_time , column_name ] )
1295+ key_index = {c for c in self .keys_time if c not in { * columns_index , column_name } }
12801296 view = CubeViewDef (key_index = key_index , name = "sbs" , values = cube .values )
12811297 res = cube .view (view )
12821298 res = res .stack ("METRICS" , future_stack = True ) # type: ignore[union-attr]
12831299 res = res .reorder_levels (
12841300 [res .index .nlevels - 1 , * list (range (res .index .nlevels - 1 ))]
12851301 ).sort_index ()
1286- return res
1302+
1303+ # add metrics
1304+ index = list (res .columns .names ).index (column_name )
1305+
1306+ def _mkc (s , index = index ):
1307+ c = ["" for c in res .columns .names ]
1308+ c [index ] = s
1309+ return tuple (c )
1310+
1311+ n_conf = res .shape [1 ]
1312+ mean_columns = list (res .columns )
1313+ sum_columns = []
1314+ for i in range (n_conf ):
1315+ c1 = res .columns [i ]
1316+ n1 = c1 [index ]
1317+ if not pandas .api .types .is_numeric_dtype (res [c1 ].dtype ):
1318+ continue
1319+ for j in range (i + 1 , n_conf ):
1320+ c2 = res .columns [j ]
1321+ n2 = c2 [index ]
1322+ if not pandas .api .types .is_numeric_dtype (res [c2 ].dtype ):
1323+ continue
1324+ res [_mkc (f"∅{ n1 } ∧∅{ n2 } " )] = (res [c1 ].isna () & res [c2 ].isna ()).astype (int )
1325+ res [_mkc (f"∅{ n1 } ∧{ n2 } " )] = (res [c1 ].isna () & ~ res [c2 ].isna ()).astype (int )
1326+ res [_mkc (f"{ n1 } ∧∅{ n2 } " )] = (~ res [c1 ].isna () & res [c2 ].isna ()).astype (int )
1327+ res [_mkc (f"{ n1 } ∧{ n2 } " )] = (~ res [c1 ].isna () & ~ res [c2 ].isna ()).astype (int )
1328+ res [_mkc (f"{ n1 } <{ n2 } " )] = (res [c1 ] < res [c2 ]).astype (int )
1329+ res [_mkc (f"{ n1 } >{ n2 } " )] = (res [c1 ] > res [c2 ]).astype (int )
1330+ sum_columns .extend (
1331+ [
1332+ _mkc (f"∅{ n1 } ∧∅{ n2 } " ),
1333+ _mkc (f"∅{ n1 } ∧{ n2 } " ),
1334+ _mkc (f"{ n1 } ∧∅{ n2 } " ),
1335+ _mkc (f"{ n1 } ∧{ n2 } " ),
1336+ _mkc (f"{ n1 } <{ n2 } " ),
1337+ _mkc (f"{ n1 } >{ n2 } " ),
1338+ ]
1339+ )
1340+
1341+ # aggregated metrics
1342+ aggs = {
1343+ ** {k : "mean" for k in mean_columns }, # noqa: C420
1344+ ** {k : "sum" for k in sum_columns }, # noqa: C420
1345+ }
1346+ agg = res .reset_index (level = "METRICS" ).groupby ("METRICS" ).agg (aggs )
1347+ return res , agg
12871348
12881349
12891350class CubeLogsPerformance (CubeLogs ):
@@ -1371,15 +1432,18 @@ def __init__(
13711432 keep_last_date = keep_last_date ,
13721433 )
13731434
1374- def clone (self , data : Optional [pandas .DataFrame ] = None ) -> "CubeLogs" :
1435+ def clone (
1436+ self , data : Optional [pandas .DataFrame ] = None , keys : Optional [Sequence [str ]] = None
1437+ ) -> "CubeLogs" :
13751438 """
13761439 Makes a copy of the dataframe.
13771440 It copies the processed data not the original one.
1441+ keys can be changed as well.
13781442 """
13791443 cube = self .__class__ (
13801444 data if data is not None else self .data .copy (),
13811445 time = self .time ,
1382- keys = self .keys_no_time ,
1446+ keys = keys or self .keys_no_time ,
13831447 values = self .values ,
13841448 recent = False ,
13851449 )
0 commit comments