Skip to content

Commit b1f3aeb

Browse files
committed
sbs
1 parent 3da663f commit b1f3aeb

File tree

4 files changed

+192
-50
lines changed

4 files changed

+192
-50
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,10 +517,10 @@ def test_cube_sbs(self):
517517
sbs, sbs_agg = cube.sbs(
518518
dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))
519519
)
520-
self.assertEqual(sbs.shape, (4, 8))
520+
self.assertEqual(sbs.shape, (4, 9))
521521
self.assertEqual(sbs.index.names, ["METRICS", "m_name"])
522522
self.assertEqual(sorted(sbs.columns.names), ["CONF", "exporter"])
523-
self.assertEqual(sbs_agg.shape, (2, 8))
523+
self.assertEqual(sbs_agg.shape, (2, 9))
524524
self.assertEqual(sbs_agg.index.names, ["METRICS"])
525525
self.assertEqual(sorted(sbs_agg.columns.names), ["CONF", "exporter"])
526526

onnx_diagnostic/_command_lines_parser.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,27 @@ def _cmd_stats(argv: List[Any]):
645645
print("done.")
646646

647647

648+
class _ParseNamedDict(argparse.Action):
649+
def __call__(self, parser, namespace, values, option_string=None):
650+
assert ":" in values, f"':' missing from {values!r}"
651+
namespace_key, rest = values.split(":", 1)
652+
pairs = rest.split(",")
653+
inner_dict = {}
654+
655+
for pair in pairs:
656+
if "=" not in pair:
657+
raise argparse.ArgumentError(self, f"Expected '=' in pair '{pair}'")
658+
key, value = pair.split("=", 1)
659+
inner_dict[key] = value
660+
assert inner_dict, f"Unable to parse {rest!r} into a dictionary"
661+
if not hasattr(namespace, self.dest) or getattr(namespace, self.dest) is None:
662+
setattr(namespace, self.dest, {})
663+
assert isinstance(
664+
getattr(namespace, self.dest), dict
665+
), f"Unexpected type for namespace.{self.dest}={getattr(namespace, self.dest)}"
666+
getattr(namespace, self.dest).update({namespace_key: inner_dict})
667+
668+
648669
def get_parser_agg() -> ArgumentParser:
649670
parser = ArgumentParser(
650671
prog="agg",
@@ -653,6 +674,9 @@ def get_parser_agg() -> ArgumentParser:
653674
Aggregates statistics coming from benchmarks.
654675
Every run is a row. Every row is indexed by some keys,
655676
and produces values. Every row has a date.
677+
The data can come any csv files produces by benchmarks,
678+
it can concatenates many csv files, or csv files inside zip files.
679+
It produces an excel file with many tabs, one per view.
656680
"""
657681
),
658682
epilog=textwrap.dedent(
@@ -744,7 +768,15 @@ def get_parser_agg() -> ArgumentParser:
744768
"--views",
745769
default="agg-suite,agg-all,disc,speedup,time,time_export,err,cmd,"
746770
"bucket-speedup,raw-short,counts,peak-gpu,onnx",
747-
help="Views to add to the output files.",
771+
help=textwrap.dedent(
772+
"""
773+
Views to add to the output files. Each view becomes a tab.
774+
A view is defined by its name, among
775+
agg-suite, agg-all, disc, speedup, time, time_export, err,
776+
cmd, bucket-speedup, raw-short, counts, peak-gpu, onnx.
777+
Their definition is part of class CubeLogsPerformance.
778+
"""
779+
),
748780
)
749781
parser.add_argument(
750782
"--csv",
@@ -764,6 +796,18 @@ def get_parser_agg() -> ArgumentParser:
764796
help="adds a filter to filter out data, syntax is\n"
765797
'``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
766798
)
799+
parser.add_argument(
800+
"--sbs",
801+
help=textwrap.dedent(
802+
"""
803+
Defines an exporter to compare to another, there must be at least
804+
two arguments defined with --sbs. Example:
805+
--sbs dynamo:exporter=onnx-dynamo,opt=ir,attn_impl=eager
806+
--sbs cusom:exporter=custom,opt=default,attn_impl=eager
807+
"""
808+
),
809+
action=_ParseNamedDict,
810+
)
767811
return parser
768812

769813

@@ -816,6 +860,7 @@ def _cmd_agg(argv: List[Any]):
816860
csv=args.csv.split(","),
817861
raw=args.raw,
818862
time_mask=True,
863+
sbs=args.sbs,
819864
)
820865
if args.verbose:
821866
print(f"Wrote {args.output!r}")

onnx_diagnostic/helpers/_log_helper.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ def apply_excel_style(
320320
Dict[str, Callable[[Any], "CubeViewDef.HighLightKind"]] # noqa: F821
321321
] = None,
322322
time_mask_view: Optional[Dict[str, pandas.DataFrame]] = None,
323+
verbose: int = 0,
323324
):
324325
"""
325326
Applies styles on all sheets in a file unless the sheet is too big.
@@ -329,6 +330,7 @@ def apply_excel_style(
329330
:param time_mask_view: if specified, it contains dataframe with the same shape
330331
and values in {-1, 0, +1} which indicates if a value is unexpectedly lower (-1)
331332
or higher (+1), it changes the color of the background then.
333+
:param verbosity: progress loop
332334
"""
333335
from openpyxl import load_workbook
334336
from openpyxl.styles import Alignment
@@ -353,8 +355,13 @@ def apply_excel_style(
353355
CubeViewDef.HighLightKind.GREEN: Font(color="00AA00"),
354356
CubeViewDef.HighLightKind.RED: Font(color="FF0000"),
355357
}
358+
if verbose:
359+
from tqdm import tqdm
356360

357-
for name in workbook.sheetnames:
361+
sheet_names = tqdm(list(workbook.sheetnames))
362+
else:
363+
sheet_names = workbook.sheetnames
364+
for name in sheet_names:
358365
if time_mask_view and name in time_mask_view:
359366
mask = time_mask_view[name]
360367
with pandas.ExcelWriter(io.BytesIO(), engine="openpyxl") as mask_writer:
@@ -367,7 +374,7 @@ def apply_excel_style(
367374
sheet = workbook[name]
368375
n_rows = sheet.max_row
369376
n_cols = sheet.max_column
370-
if n_rows * n_cols > 2**18:
377+
if n_rows * n_cols > 2**16 or n_rows > 2**13:
371378
# Too big.
372379
continue
373380
co: Dict[int, int] = {}

onnx_diagnostic/helpers/log_helper.py

Lines changed: 135 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,17 @@ def view(
932932
else:
933933
piv.sort_index(inplace=True, axis=1)
934934

935+
# final step, force columns with numerical values to be float
936+
for c in list(piv.columns):
937+
s = piv[c]
938+
if not pandas.api.types.is_object_dtype(s):
939+
continue
940+
try:
941+
sf = s.astype(float)
942+
except (ValueError, TypeError):
943+
continue
944+
piv[c] = sf
945+
935946
if verbose:
936947
print(f"[CubeLogs.view] levels {piv.index.names}, {piv.columns.names}")
937948
print(f"[CubeLogs.view] -- done view {view_def.name!r}")
@@ -974,7 +985,9 @@ def _dropna(
974985
for c in set(key_index) | set(key_columns):
975986
s = new_data[c]
976987
if s.isna().max():
977-
if pandas.api.types.is_numeric_dtype(s):
988+
if pandas.api.types.is_numeric_dtype(
989+
s
990+
) and not pandas.api.types.is_object_dtype(s):
978991
min_v = s.dropna().min()
979992
assert (
980993
min_v >= 0
@@ -1011,7 +1024,7 @@ def describe(self) -> pandas.DataFrame:
10111024
)
10121025
if len(nonan) > 0:
10131026
obs.update(dict(count=len(nonan)))
1014-
if is_numeric_dtype(nonan):
1027+
if is_numeric_dtype(nonan) and not pandas.api.types.is_object_dtype(nonan):
10151028
obs.update(
10161029
dict(
10171030
min=nonan.min(),
@@ -1048,6 +1061,7 @@ def to_excel(
10481061
verbose: int = 0,
10491062
csv: Optional[Sequence[str]] = None,
10501063
time_mask: bool = False,
1064+
sbs: Optional[Dict[str, Dict[str, Any]]] = None,
10511065
):
10521066
"""
10531067
Creates an excel file with a list of views.
@@ -1061,6 +1075,9 @@ def to_excel(
10611075
:param time_mask: color the background of the cells if one
10621076
of the value for the last date is unexpected,
10631077
assuming they should remain stale
1078+
:param sbs: configurations to compare side-by-side, this adds two tabs,
1079+
one gathering raw data about the two configurations, the other one
1080+
is aggregated by metrics
10641081
"""
10651082
if verbose:
10661083
print(f"[CubeLogs.to_excel] create Excel file {output}, shape={self.shape}")
@@ -1175,6 +1192,36 @@ def to_excel(
11751192
writer, sheet_name="raw", freeze_panes=(1, 1), index=True
11761193
)
11771194

1195+
if sbs:
1196+
if verbose:
1197+
for k, v in sbs.items():
1198+
print(f"[CubeLogs.to_excel] sbs {k}: {v}")
1199+
sbs_raw, sbs_agg = self.sbs(sbs)
1200+
if verbose:
1201+
print(f"[CubeLogs.to_excel] add sheet {name!r} with shape {sbs_raw.shape}")
1202+
print(
1203+
f"[CubeLogs.to_excel] add sheet '{name}-AGG' "
1204+
f"with shape {sbs_agg.shape}"
1205+
)
1206+
name = "∧".join(sbs)
1207+
sbs_raw = sbs_raw.reset_index(drop=False)
1208+
sbs_raw.to_excel(
1209+
writer,
1210+
sheet_name=name,
1211+
freeze_panes=(
1212+
sbs_raw.columns.nlevels + sbs_raw.index.nlevels,
1213+
sbs_raw.index.nlevels,
1214+
),
1215+
)
1216+
sbs_agg.to_excel(
1217+
writer,
1218+
sheet_name=f"{name}-AGG",
1219+
freeze_panes=(
1220+
sbs_agg.columns.nlevels + sbs_agg.index.nlevels,
1221+
sbs_agg.index.nlevels,
1222+
),
1223+
)
1224+
11781225
if plots:
11791226
from openpyxl.drawing.image import Image
11801227

@@ -1206,7 +1253,9 @@ def to_excel(
12061253

12071254
if verbose:
12081255
print(f"[CubeLogs.to_excel] applies style to {output!r}")
1209-
apply_excel_style(writer, f_highlights, time_mask_view=time_mask_view) # type: ignore[arg-type]
1256+
apply_excel_style( # type: ignore[arg-type]
1257+
writer, f_highlights, time_mask_view=time_mask_view, verbose=verbose
1258+
)
12101259
if verbose:
12111260
print(f"[CubeLogs.to_excel] done with {len(views)} views")
12121261

@@ -1265,15 +1314,19 @@ def sbs(
12651314
:param column_name: column to add with the name of the configuration
12661315
:return: data and aggregated date
12671316
"""
1317+
assert (
1318+
len(configs) >= 2
1319+
), f"A side by side needs at least two configs but configs={configs}"
12681320
set_keys_time = set(self.keys_time)
12691321
columns_index = None
12701322
data_list = []
12711323
for name_conf, conf in configs.items():
12721324
if columns_index is None:
12731325
columns_index = list(conf.keys())
1274-
assert (
1275-
set(columns_index) <= set_keys_time
1276-
), f"Configuration {conf} includes columns outside the keys."
1326+
assert set(columns_index) <= set_keys_time, (
1327+
f"Configuration {conf} includes columns outside the keys "
1328+
f"{', '.join(sorted(set_keys_time))}"
1329+
)
12771330
else:
12781331
assert set(columns_index) == set(conf), (
12791332
f"Every conf should share the same keys but conf={conf} "
@@ -1294,57 +1347,94 @@ def sbs(
12941347
cube = self.clone(new_data, keys=[*self.keys_no_time, column_name])
12951348
key_index = set(self.keys_time) - {*columns_index, column_name} # type: ignore[misc]
12961349
view = CubeViewDef(key_index=set(key_index), name="sbs", values=cube.values) # type: ignore[arg-type]
1297-
res = cube.view(view)
1298-
res = res.stack("METRICS", future_stack=True) # type: ignore[union-attr]
1299-
res = res.reorder_levels(
1300-
[res.index.nlevels - 1, *list(range(res.index.nlevels - 1))]
1301-
).sort_index()
1350+
view_res = cube.view(view)
13021351

13031352
# add metrics
1304-
index = list(res.columns.names).index(column_name)
1353+
index_column_name = list(view_res.columns.names).index(column_name)
1354+
index_metrics = list(view_res.columns.names).index("METRICS")
13051355

1306-
def _mkc(s, index=index):
1307-
c = ["" for c in res.columns.names]
1308-
c[index] = s
1356+
def _mkc(m, s):
1357+
c = ["" for c in view_res.columns.names]
1358+
c[index_column_name] = s
1359+
c[index_metrics] = m
13091360
return tuple(c)
13101361

1311-
n_conf = res.shape[1]
1312-
mean_columns = list(res.columns)
1362+
list_configs = list(configs.items())
1363+
mean_columns = [
1364+
c
1365+
for c in view_res.columns
1366+
if pandas.api.types.is_numeric_dtype(view_res[c])
1367+
and not pandas.api.types.is_object_dtype(view_res[c])
1368+
]
1369+
assert mean_columns, f"No numerical columns in {view_res.dtypes}"
1370+
view_res = view_res[mean_columns].copy()
1371+
metrics = sorted(set(c[index_metrics] for c in view_res.columns))
1372+
assert metrics, (
1373+
f"No numerical metrics detected in "
1374+
f"view_res.columns.names={view_res.columns.names}, "
1375+
f"columns={view_res.dtypes}"
1376+
)
13131377
sum_columns = []
1314-
for i in range(n_conf):
1315-
c1 = res.columns[i]
1316-
n1 = c1[index]
1317-
if not pandas.api.types.is_numeric_dtype(res[c1].dtype):
1318-
continue
1319-
for j in range(i + 1, n_conf):
1320-
c2 = res.columns[j]
1321-
n2 = c2[index]
1322-
if not pandas.api.types.is_numeric_dtype(res[c2].dtype):
1323-
continue
1324-
res[_mkc(f"∅{n1}∧∅{n2}")] = (res[c1].isna() & res[c2].isna()).astype(int)
1325-
res[_mkc(f"∅{n1}{n2}")] = (res[c1].isna() & ~res[c2].isna()).astype(int)
1326-
res[_mkc(f"{n1}∧∅{n2}")] = (~res[c1].isna() & res[c2].isna()).astype(int)
1327-
res[_mkc(f"{n1}{n2}")] = (~res[c1].isna() & ~res[c2].isna()).astype(int)
1328-
res[_mkc(f"{n1}<{n2}")] = (res[c1] < res[c2]).astype(int)
1329-
res[_mkc(f"{n1}>{n2}")] = (res[c1] > res[c2]).astype(int)
1330-
sum_columns.extend(
1331-
[
1332-
_mkc(f"∅{n1}∧∅{n2}"),
1333-
_mkc(f"∅{n1}{n2}"),
1334-
_mkc(f"{n1}∧∅{n2}"),
1335-
_mkc(f"{n1}{n2}"),
1336-
_mkc(f"{n1}<{n2}"),
1337-
_mkc(f"{n1}>{n2}"),
1338-
]
1339-
)
1378+
columns_to_add = []
1379+
for i in range(len(list_configs)):
1380+
for j in range(i + 1, len(list_configs)):
1381+
for m in metrics:
1382+
iname, ci = list_configs[i]
1383+
jname, cj = list_configs[j]
1384+
ci = ci.copy()
1385+
cj = cj.copy()
1386+
ci["METRICS"] = m
1387+
cj["METRICS"] = m
1388+
ci["CONF"] = iname
1389+
cj["CONF"] = jname
1390+
1391+
ci_name = tuple(ci[n] for n in view_res.columns.names)
1392+
cj_name = tuple(cj[n] for n in view_res.columns.names)
1393+
assert ci_name in view_res.columns or cj_name in view_res.columns, (
1394+
f"Unable to find column {ci_name} or {cj_name} "
1395+
f"in columns {view_res.columns}, metrics={metrics}"
1396+
)
1397+
if ci_name not in view_res.columns or cj_name not in view_res.columns:
1398+
# One config does not have such metric.
1399+
continue
1400+
1401+
si = view_res[ci_name]
1402+
sj = view_res[cj_name]
1403+
1404+
sinan = si.isna()
1405+
sjnan = sj.isna()
1406+
n1 = iname
1407+
n2 = jname
1408+
nas = pandas.DataFrame(
1409+
{
1410+
_mkc(m, f"∅{n1}∧∅{n2}"): (sinan & sjnan).astype(int),
1411+
_mkc(m, f"∅{n1}{n2}"): (sinan & ~sjnan).astype(int),
1412+
_mkc(m, f"{n1}∧∅{n2}"): (~sinan & sjnan).astype(int),
1413+
_mkc(m, f"{n1}{n2}"): (~sinan & ~sjnan).astype(int),
1414+
_mkc(m, f"{n1}<{n2}"): (si < sj).astype(int),
1415+
_mkc(m, f"{n1}=={n2}"): (si == sj).astype(int),
1416+
_mkc(m, f"{n1}>{n2}"): (si > sj).astype(int),
1417+
}
1418+
)
1419+
nas.columns.names = view_res.columns.names
1420+
columns_to_add.append(nas)
1421+
sum_columns.extend(nas.columns)
13401422

13411423
# aggregated metrics
13421424
aggs = {
13431425
**{k: "mean" for k in mean_columns}, # noqa: C420
13441426
**{k: "sum" for k in sum_columns}, # noqa: C420
13451427
}
1346-
agg = res.reset_index(level="METRICS").groupby("METRICS").agg(aggs)
1347-
return res, agg
1428+
view_res = pandas.concat([view_res, *columns_to_add], axis=1)
1429+
res = view_res.stack("METRICS", future_stack=True) # type: ignore[union-attr]
1430+
res = res.reorder_levels(
1431+
[res.index.nlevels - 1, *list(range(res.index.nlevels - 1))]
1432+
).sort_index()
1433+
1434+
view_res["GROUPBY"] = "A"
1435+
flat = view_res.groupby("GROUPBY").agg(aggs).reset_index(drop=True)
1436+
flat = flat.stack("METRICS", future_stack=True).droplevel(None, axis=0)
1437+
return res, flat
13481438

13491439

13501440
class CubeLogsPerformance(CubeLogs):

0 commit comments

Comments
 (0)