Skip to content

Commit ebecb67

Browse files
authored
Add functions to compare side-by-side (#184)
* refactoring * mypy * fix * first sbs * mypy * better agg * sbs * fix * sbs * fix sbs * code change
1 parent d3db6cb commit ebecb67

File tree

5 files changed

+404
-10
lines changed

5 files changed

+404
-10
lines changed

CHANGELOGS.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ Change Logs
44
0.7.5
55
+++++
66

7+
* :pr:`184`: implements side-by-side
8+
79
0.7.4
810
+++++
911

_unittests/ut_helpers/test_log_helper.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,150 @@ def test_historical_cube_time_mask(self):
470470
cube = CubeLogs(df, keys=["^m_*", "exporter"], time="date").load()
471471
cube.to_excel(output, views=["time_p"], time_mask=True, verbose=1)
472472

473+
def test_cube_sbs_no_time(self):
474+
df = pandas.DataFrame(
475+
[
476+
dict(
477+
date="2025/01/01",
478+
time_p=0.51,
479+
exporter="E1",
480+
opt="O",
481+
perf=3.7,
482+
m_name="A",
483+
m_cls="CA",
484+
),
485+
dict(
486+
date="2025/01/01",
487+
time_p=0.51,
488+
perf=3.4,
489+
exporter="E2",
490+
opt="O",
491+
m_name="A",
492+
m_cls="CA",
493+
),
494+
dict(
495+
date="2025/01/01",
496+
time_p=0.71,
497+
perf=3.5,
498+
exporter="E2",
499+
opt="O",
500+
m_name="B",
501+
m_cls="CA",
502+
),
503+
dict(
504+
date="2025/01/01",
505+
time_p=0.71,
506+
perf=3.6,
507+
exporter="E2",
508+
opt="K",
509+
m_name="B",
510+
m_cls="CA",
511+
),
512+
]
513+
)
514+
cube = CubeLogs(
515+
df, keys=["^m_*", "exporter", "opt"], values=["time_p", "perf"], time="date"
516+
).load()
517+
sbs, sbs_agg = cube.sbs(
518+
dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))
519+
)
520+
self.assertEqual(sbs.shape, (4, 9))
521+
self.assertEqual(sbs.index.names, ["METRICS", "m_name", "date"])
522+
self.assertEqual(sorted(sbs.columns.names), ["CONF", "exporter"])
523+
self.assertEqual(sbs_agg.shape, (2, 9))
524+
self.assertEqual(sbs_agg.index.names, ["date", "METRICS"])
525+
self.assertEqual(sorted(sbs_agg.columns.names), ["CONF", "exporter"])
526+
527+
def test_cube_sbs_with_time(self):
528+
df = pandas.DataFrame(
529+
[
530+
dict(
531+
date="2025/01/01",
532+
time_p=0.51,
533+
exporter="E1",
534+
opt="O",
535+
perf=3.7,
536+
m_name="A",
537+
m_cls="CA",
538+
),
539+
dict(
540+
date="2025/01/01",
541+
time_p=0.51,
542+
perf=3.4,
543+
exporter="E2",
544+
opt="O",
545+
m_name="A",
546+
m_cls="CA",
547+
),
548+
dict(
549+
date="2025/01/01",
550+
time_p=0.71,
551+
perf=3.5,
552+
exporter="E2",
553+
opt="O",
554+
m_name="B",
555+
m_cls="CA",
556+
),
557+
dict(
558+
date="2025/01/01",
559+
time_p=0.71,
560+
perf=3.6,
561+
exporter="E2",
562+
opt="K",
563+
m_name="B",
564+
m_cls="CA",
565+
),
566+
dict(
567+
date="2025/01/02",
568+
time_p=0.51,
569+
exporter="E1",
570+
opt="O",
571+
perf=3.7,
572+
m_name="A",
573+
m_cls="CA",
574+
),
575+
dict(
576+
date="2025/01/02",
577+
time_p=0.51,
578+
perf=3.4,
579+
exporter="E2",
580+
opt="O",
581+
m_name="A",
582+
m_cls="CA",
583+
),
584+
dict(
585+
date="2025/01/02",
586+
time_p=0.71,
587+
perf=3.5,
588+
exporter="E2",
589+
opt="O",
590+
m_name="B",
591+
m_cls="CA",
592+
),
593+
dict(
594+
date="2025/01/02",
595+
time_p=0.71,
596+
perf=3.6,
597+
exporter="E2",
598+
opt="K",
599+
m_name="B",
600+
m_cls="CA",
601+
),
602+
]
603+
)
604+
cube = CubeLogs(
605+
df, keys=["^m_*", "exporter", "opt"], values=["time_p", "perf"], time="date"
606+
).load()
607+
sbs, sbs_agg = cube.sbs(
608+
dict(CFA=dict(exporter="E1", opt="O"), CFB=dict(exporter="E2", opt="O"))
609+
)
610+
self.assertEqual(sbs.shape, (8, 9))
611+
self.assertEqual(sbs.index.names, ["METRICS", "m_name", "date"])
612+
self.assertEqual(sorted(sbs.columns.names), ["CONF", "exporter"])
613+
self.assertEqual(sbs_agg.shape, (4, 9))
614+
self.assertEqual(sbs_agg.index.names, ["date", "METRICS"])
615+
self.assertEqual(sorted(sbs_agg.columns.names), ["CONF", "exporter"])
616+
473617

474618
if __name__ == "__main__":
475619
unittest.main(verbosity=2)

onnx_diagnostic/_command_lines_parser.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,27 @@ def _cmd_stats(argv: List[Any]):
645645
print("done.")
646646

647647

648+
class _ParseNamedDict(argparse.Action):
649+
def __call__(self, parser, namespace, values, option_string=None):
650+
assert ":" in values, f"':' missing from {values!r}"
651+
namespace_key, rest = values.split(":", 1)
652+
pairs = rest.split(",")
653+
inner_dict = {}
654+
655+
for pair in pairs:
656+
if "=" not in pair:
657+
raise argparse.ArgumentError(self, f"Expected '=' in pair '{pair}'")
658+
key, value = pair.split("=", 1)
659+
inner_dict[key] = value
660+
assert inner_dict, f"Unable to parse {rest!r} into a dictionary"
661+
if not hasattr(namespace, self.dest) or getattr(namespace, self.dest) is None:
662+
setattr(namespace, self.dest, {})
663+
assert isinstance(
664+
getattr(namespace, self.dest), dict
665+
), f"Unexpected type for namespace.{self.dest}={getattr(namespace, self.dest)}"
666+
getattr(namespace, self.dest).update({namespace_key: inner_dict})
667+
668+
648669
def get_parser_agg() -> ArgumentParser:
649670
parser = ArgumentParser(
650671
prog="agg",
@@ -653,6 +674,9 @@ def get_parser_agg() -> ArgumentParser:
653674
Aggregates statistics coming from benchmarks.
654675
Every run is a row. Every row is indexed by some keys,
655676
and produces values. Every row has a date.
677+
The data can come any csv files produces by benchmarks,
678+
it can concatenates many csv files, or csv files inside zip files.
679+
It produces an excel file with many tabs, one per view.
656680
"""
657681
),
658682
epilog=textwrap.dedent(
@@ -744,7 +768,15 @@ def get_parser_agg() -> ArgumentParser:
744768
"--views",
745769
default="agg-suite,agg-all,disc,speedup,time,time_export,err,cmd,"
746770
"bucket-speedup,raw-short,counts,peak-gpu,onnx",
747-
help="Views to add to the output files.",
771+
help=textwrap.dedent(
772+
"""
773+
Views to add to the output files. Each view becomes a tab.
774+
A view is defined by its name, among
775+
agg-suite, agg-all, disc, speedup, time, time_export, err,
776+
cmd, bucket-speedup, raw-short, counts, peak-gpu, onnx.
777+
Their definition is part of class CubeLogsPerformance.
778+
"""
779+
),
748780
)
749781
parser.add_argument(
750782
"--csv",
@@ -764,6 +796,18 @@ def get_parser_agg() -> ArgumentParser:
764796
help="adds a filter to filter out data, syntax is\n"
765797
'``"<column1>:<value1>;<value2>/<column2>:<value3>"`` ...',
766798
)
799+
parser.add_argument(
800+
"--sbs",
801+
help=textwrap.dedent(
802+
"""
803+
Defines an exporter to compare to another, there must be at least
804+
two arguments defined with --sbs. Example:
805+
--sbs dynamo:exporter=onnx-dynamo,opt=ir,attn_impl=eager
806+
--sbs custom:exporter=custom,opt=default,attn_impl=eager
807+
"""
808+
),
809+
action=_ParseNamedDict,
810+
)
767811
return parser
768812

769813

@@ -816,6 +860,7 @@ def _cmd_agg(argv: List[Any]):
816860
csv=args.csv.split(","),
817861
raw=args.raw,
818862
time_mask=True,
863+
sbs=args.sbs,
819864
)
820865
if args.verbose:
821866
print(f"Wrote {args.output!r}")

onnx_diagnostic/helpers/_log_helper.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ def apply_excel_style(
320320
Dict[str, Callable[[Any], "CubeViewDef.HighLightKind"]] # noqa: F821
321321
] = None,
322322
time_mask_view: Optional[Dict[str, pandas.DataFrame]] = None,
323+
verbose: int = 0,
323324
):
324325
"""
325326
Applies styles on all sheets in a file unless the sheet is too big.
@@ -329,6 +330,7 @@ def apply_excel_style(
329330
:param time_mask_view: if specified, it contains dataframe with the same shape
330331
and values in {-1, 0, +1} which indicates if a value is unexpectedly lower (-1)
331332
or higher (+1), it changes the color of the background then.
333+
:param verbosity: progress loop
332334
"""
333335
from openpyxl import load_workbook
334336
from openpyxl.styles import Alignment
@@ -353,8 +355,13 @@ def apply_excel_style(
353355
CubeViewDef.HighLightKind.GREEN: Font(color="00AA00"),
354356
CubeViewDef.HighLightKind.RED: Font(color="FF0000"),
355357
}
358+
if verbose:
359+
from tqdm import tqdm
356360

357-
for name in workbook.sheetnames:
361+
sheet_names = tqdm(list(workbook.sheetnames))
362+
else:
363+
sheet_names = workbook.sheetnames
364+
for name in sheet_names:
358365
if time_mask_view and name in time_mask_view:
359366
mask = time_mask_view[name]
360367
with pandas.ExcelWriter(io.BytesIO(), engine="openpyxl") as mask_writer:
@@ -367,7 +374,7 @@ def apply_excel_style(
367374
sheet = workbook[name]
368375
n_rows = sheet.max_row
369376
n_cols = sheet.max_column
370-
if n_rows * n_cols > 2**18:
377+
if n_rows * n_cols > 2**16 or n_rows > 2**13:
371378
# Too big.
372379
continue
373380
co: Dict[int, int] = {}

0 commit comments

Comments
 (0)