Skip to content

Commit 4fe1d91

Browse files
committed
better agg
1 parent 145f5aa commit 4fe1d91

File tree

2 files changed

+83
-14
lines changed

2 files changed

+83
-14
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -514,10 +514,15 @@ def test_cube_sbs(self):
514514
cube = CubeLogs(
515515
df, keys=["^m_*", "exporter", "opt"], values=["time_p", "perf"], time="date"
516516
).load()
517-
sbs = cube.sbs([dict(exporter="E1", opt="O"), dict(exporter="E2", opt="O")])
518-
self.assertEqual(sbs.shape, (4, 2))
517+
sbs, sbs_agg = cube.sbs(
518+
dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))
519+
)
520+
self.assertEqual(sbs.shape, (4, 8))
519521
self.assertEqual(sbs.index.names, ["METRICS", "m_name"])
520-
self.assertEqual(sbs.columns.names, ["exporter"])
522+
self.assertEqual(sorted(sbs.columns.names), ["CONF", "exporter"])
523+
self.assertEqual(sbs_agg.shape, (2, 8))
524+
self.assertEqual(sbs_agg.index.names, ["METRICS"])
525+
self.assertEqual(sorted(sbs_agg.columns.names), ["CONF", "exporter"])
521526

522527

523528
if __name__ == "__main__":

onnx_diagnostic/helpers/log_helper.py

Lines changed: 75 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -425,15 +425,17 @@ def __init__(
425425
self.fill_missing = fill_missing
426426
self.keep_last_date = keep_last_date
427427

428-
def clone(self, data: Optional[pandas.DataFrame] = None) -> "CubeLogs":
428+
def clone(
429+
self, data: Optional[pandas.DataFrame] = None, keys: Optional[Sequence[str]] = None
430+
) -> "CubeLogs":
429431
"""
430432
Makes a copy of the dataframe.
431433
It copies the processed data not the original one.
432434
"""
433435
cube = self.__class__(
434436
data if data is not None else self.data.copy(),
435437
time=self.time,
436-
keys=self.keys_no_time,
438+
keys=keys or self.keys_no_time,
437439
values=self.values,
438440
)
439441
cube.load()
@@ -1248,16 +1250,25 @@ def cube_time(self, fill_other_dates: bool = False, threshold: float = 1.2) -> "
12481250
)
12491251
return self.clone(data=dgr.reset_index(drop=False))
12501252

1251-
def sbs(self, configs: Sequence[Dict[str, Any]]) -> pandas.DataFrame:
1253+
def sbs(
1254+
self, configs: Dict[str, Dict[str, Any]], column_name: str = "CONF"
1255+
) -> Tuple[pandas.DataFrame, pandas.DataFrame]:
12521256
"""
12531257
Creates a side-by-side for two configurations.
12541258
Every configuration a dictionary column:value which filters in
12551259
the rows to keep in order to compute the side by side.
1260+
Every configuration is given a name (the key in configs),
1261+
it is added in column column_name.
1262+
1263+
:param configs: example
1264+
``dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))``
1265+
:param column_name: column to add with the name of the configuration
1266+
:return: data and aggregated date
12561267
"""
12571268
set_keys_time = set(self.keys_time)
12581269
columns_index = None
12591270
datas = []
1260-
for conf in configs:
1271+
for name_conf, conf in configs.items():
12611272
if columns_index is None:
12621273
columns_index = list(conf.keys())
12631274
assert (
@@ -1272,18 +1283,68 @@ def sbs(self, configs: Sequence[Dict[str, Any]]) -> pandas.DataFrame:
12721283
for k, v in conf.items():
12731284
data = data[data[k] == v]
12741285
assert data.shape[0] > 0, f"No rows found for conf={conf}"
1275-
datas.append((conf, data))
1286+
assert (
1287+
column_name not in data.columns
1288+
), f"column_name={column_name!r} is already in {data.columns}"
1289+
data = data.copy()
1290+
data[column_name] = name_conf
1291+
datas.append(data)
12761292

1277-
new_data = pandas.concat([d[1] for d in datas], axis=0)
1278-
cube = self.clone(new_data)
1279-
key_index = {c for c in self.keys_time if c not in set(columns_index)}
1293+
new_data = pandas.concat(datas, axis=0)
1294+
cube = self.clone(new_data, keys=[*self.keys_no_time, column_name])
1295+
key_index = {c for c in self.keys_time if c not in {*columns_index, column_name}}
12801296
view = CubeViewDef(key_index=key_index, name="sbs", values=cube.values)
12811297
res = cube.view(view)
12821298
res = res.stack("METRICS", future_stack=True) # type: ignore[union-attr]
12831299
res = res.reorder_levels(
12841300
[res.index.nlevels - 1, *list(range(res.index.nlevels - 1))]
12851301
).sort_index()
1286-
return res
1302+
1303+
# add metrics
1304+
index = list(res.columns.names).index(column_name)
1305+
1306+
def _mkc(s, index=index):
1307+
c = ["" for c in res.columns.names]
1308+
c[index] = s
1309+
return tuple(c)
1310+
1311+
n_conf = res.shape[1]
1312+
mean_columns = list(res.columns)
1313+
sum_columns = []
1314+
for i in range(n_conf):
1315+
c1 = res.columns[i]
1316+
n1 = c1[index]
1317+
if not pandas.api.types.is_numeric_dtype(res[c1].dtype):
1318+
continue
1319+
for j in range(i + 1, n_conf):
1320+
c2 = res.columns[j]
1321+
n2 = c2[index]
1322+
if not pandas.api.types.is_numeric_dtype(res[c2].dtype):
1323+
continue
1324+
res[_mkc(f"∅{n1}∧∅{n2}")] = (res[c1].isna() & res[c2].isna()).astype(int)
1325+
res[_mkc(f"∅{n1}{n2}")] = (res[c1].isna() & ~res[c2].isna()).astype(int)
1326+
res[_mkc(f"{n1}∧∅{n2}")] = (~res[c1].isna() & res[c2].isna()).astype(int)
1327+
res[_mkc(f"{n1}{n2}")] = (~res[c1].isna() & ~res[c2].isna()).astype(int)
1328+
res[_mkc(f"{n1}<{n2}")] = (res[c1] < res[c2]).astype(int)
1329+
res[_mkc(f"{n1}>{n2}")] = (res[c1] > res[c2]).astype(int)
1330+
sum_columns.extend(
1331+
[
1332+
_mkc(f"∅{n1}∧∅{n2}"),
1333+
_mkc(f"∅{n1}{n2}"),
1334+
_mkc(f"{n1}∧∅{n2}"),
1335+
_mkc(f"{n1}{n2}"),
1336+
_mkc(f"{n1}<{n2}"),
1337+
_mkc(f"{n1}>{n2}"),
1338+
]
1339+
)
1340+
1341+
# aggregated metrics
1342+
aggs = {
1343+
**{k: "mean" for k in mean_columns}, # noqa: C420
1344+
**{k: "sum" for k in sum_columns}, # noqa: C420
1345+
}
1346+
agg = res.reset_index(level="METRICS").groupby("METRICS").agg(aggs)
1347+
return res, agg
12871348

12881349

12891350
class CubeLogsPerformance(CubeLogs):
@@ -1371,15 +1432,18 @@ def __init__(
13711432
keep_last_date=keep_last_date,
13721433
)
13731434

1374-
def clone(self, data: Optional[pandas.DataFrame] = None) -> "CubeLogs":
1435+
def clone(
1436+
self, data: Optional[pandas.DataFrame] = None, keys: Optional[Sequence[str]] = None
1437+
) -> "CubeLogs":
13751438
"""
13761439
Makes a copy of the dataframe.
13771440
It copies the processed data not the original one.
1441+
keys can be changed as well.
13781442
"""
13791443
cube = self.__class__(
13801444
data if data is not None else self.data.copy(),
13811445
time=self.time,
1382-
keys=self.keys_no_time,
1446+
keys=keys or self.keys_no_time,
13831447
values=self.values,
13841448
recent=False,
13851449
)

0 commit comments

Comments
 (0)