Skip to content

Commit d2a78c4

Browse files
authored
Extends command line to aggregate data (#151)
* extend command lien * doc * lint * fix * fix agg * mypy * fix log * better * mypy
1 parent cae7a79 commit d2a78c4

File tree

7 files changed

+785
-93
lines changed

7 files changed

+785
-93
lines changed

CHANGELOGS.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Change Logs
44
0.7.1
55
+++++
66

7+
* :pr:`151`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
78
* :pr:`152`: add a function to compute fully dynamic shapes given any inputs
89

910
0.7.0
28 KB
Binary file not shown.

_unittests/ut_helpers/test_log_helper.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
88
from onnx_diagnostic.helpers.log_helper import (
99
CubeLogs,
10+
CubeLogsPerformance,
1011
CubeViewDef,
1112
enumerate_csv_files,
1213
open_dataframe,
@@ -21,10 +22,10 @@ def df1(cls):
2122
textwrap.dedent(
2223
"""
2324
date,version_python,version_transformers,model_name,model_exporter,time_load,time_latency,time_baseline,disc_ort,disc_ort2
24-
2025/01/01,3.13.3,4.52.4,phi3,export,0.5,0.1,0.1,1e-5,1e-5
25-
2025/01/02,3.13.3,4.52.4,phi3,export,0.6,0.11,0.1,1e-5,1e-5
26-
2025/01/01,3.13.3,4.52.4,phi4,export,0.5,0.1,0.105,1e-5,1e-5
27-
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.5,0.1,0.999,1e-5,1e-5
25+
2025/01/01,3.13.3,4.52.4,phi3,export,0.51,0.1,0.1,1e-5,1e-5
26+
2025/01/02,3.13.3,4.52.4,phi3,export,0.62,0.11,0.11,1e-5,1e-5
27+
2025/01/01,3.13.3,4.52.4,phi4,export,0.53,0.1,0.105,1e-5,1e-5
28+
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.54,0.14,0.999,1e-5,1e-5
2829
"""
2930
)
3031
)
@@ -98,7 +99,11 @@ def test_cube_logs_view_repr(self):
9899
def test_cube_logs_view(self):
99100
cube = self.cube1(verbose=1)
100101
view = cube.view(
101-
CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
102+
CubeViewDef(
103+
["version.*", "model_name"],
104+
["time_latency", "time_baseline"],
105+
ignore_columns=["date"],
106+
)
102107
)
103108
self.assertEqual((3, 4), view.shape)
104109
self.assertEqual(
@@ -116,7 +121,10 @@ def test_cube_logs_view(self):
116121

117122
view = cube.view(
118123
CubeViewDef(
119-
["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
124+
["version.*"],
125+
["time_latency", "time_baseline"],
126+
order=["model_exporter"],
127+
ignore_columns=["date"],
120128
)
121129
)
122130
self.assertEqual((2, 6), view.shape)
@@ -139,12 +147,13 @@ def test_cube_logs_view_agg(self):
139147
CubeViewDef(
140148
["version.*", "model.*"],
141149
["time_latency", "time_baseline"],
142-
key_agg=["model_name"],
150+
key_agg=["model_name", "date"],
151+
ignore_columns=["version_python"],
143152
)
144153
)
145154
self.assertEqual((2, 2), view.shape)
146155
self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
147-
self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
156+
self.assertEqual([("export",), ("onnx-dynamo",)], list(view.index))
148157

149158
@hide_stdout()
150159
def test_cube_logs_excel(self):
@@ -166,6 +175,7 @@ def test_cube_logs_excel(self):
166175
)
167176
self.assertExists(output)
168177

178+
@hide_stdout()
169179
def test_enumerate_csv_files(self):
170180
df = self.df1()
171181
filename = self.get_dump_file("test_enumerate_csv_files.csv")
@@ -186,6 +196,30 @@ def test_enumerate_csv_files(self):
186196
self.assertEqual((3, 11), cube.shape)
187197
self.assertIn("RAWFILENAME", cube.data.columns)
188198

199+
def test_cube_logs_performance(self):
200+
output = self.get_dump_file("test_cube_logs_performance.xlsx")
201+
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
202+
assert list(enumerate_csv_files(filename))
203+
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
204+
assert dfs, f"{filename!r} empty"
205+
cube = CubeLogsPerformance(dfs)
206+
cube.load()
207+
cube.to_excel(
208+
output,
209+
views=[
210+
"agg-suite",
211+
"disc",
212+
"speedup",
213+
"time",
214+
"time_export",
215+
"err",
216+
# "cmd",
217+
"bucket-speedup",
218+
"raw-short",
219+
],
220+
)
221+
self.assertExists(output)
222+
189223

190224
if __name__ == "__main__":
191225
unittest.main(verbosity=2)

_unittests/ut_xrun_doc/test_command_lines.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from onnx_diagnostic.ext_test_case import ExtTestCase
55
from onnx_diagnostic._command_lines_parser import (
66
get_main_parser,
7+
get_parser_agg,
78
get_parser_config,
89
get_parser_find,
910
get_parser_lighten,
@@ -71,6 +72,13 @@ def test_parser_stats(self):
7172
text = st.getvalue()
7273
self.assertIn("input", text)
7374

75+
def test_parser_agg(self):
76+
st = StringIO()
77+
with redirect_stdout(st):
78+
get_parser_agg().print_help()
79+
text = st.getvalue()
80+
self.assertIn("--recent", text)
81+
7482

7583
if __name__ == "__main__":
7684
unittest.main(verbosity=2)

onnx_diagnostic/_command_lines_parser.py

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,128 @@ def _cmd_stats(argv: List[Any]):
609609
print("done.")
610610

611611

612+
def get_parser_agg() -> ArgumentParser:
613+
parser = ArgumentParser(
614+
prog="agg",
615+
description=textwrap.dedent(
616+
"""
617+
Aggregates statistics coming from benchmarks.
618+
Every run is a row. Every row is indexed by some keys,
619+
and produces values. Every row has a date.
620+
"""
621+
),
622+
epilog="example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1",
623+
formatter_class=RawTextHelpFormatter,
624+
)
625+
parser.add_argument("output", help="output excel file")
626+
parser.add_argument(
627+
"inputs",
628+
nargs="+",
629+
help="input csv or zip files, at least 1, it can be a name, or search path",
630+
)
631+
parser.add_argument(
632+
"--filter", default="rawdata_.*.csv", help="filter for input files inside zip files"
633+
)
634+
parser.add_argument(
635+
"--recent",
636+
default=True,
637+
action=BooleanOptionalAction,
638+
help="Keeps only the most recent experiment for the same of keys.",
639+
)
640+
parser.add_argument(
641+
"--raw",
642+
default=True,
643+
action=BooleanOptionalAction,
644+
help="Keeps the raw data in a sheet.",
645+
)
646+
parser.add_argument("-t", "--time", default="DATE", help="Date or time column")
647+
parser.add_argument(
648+
"-k",
649+
"--keys",
650+
default="^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture",
651+
help="List of columns to consider as keys, "
652+
"multiple values are separated by `,`\n"
653+
"regular expressions are allowed",
654+
)
655+
parser.add_argument(
656+
"-w",
657+
"--values",
658+
default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*",
659+
help="List of columns to consider as values, "
660+
"multiple values are separated by `,`\n"
661+
"regular expressions are allowed",
662+
)
663+
parser.add_argument(
664+
"-i", "--ignored", default="version_python", help="List of columns to ignore"
665+
)
666+
parser.add_argument(
667+
"-f",
668+
"--formula",
669+
default="speedup,bucket[speedup],ERR1",
670+
help="Columns to compute after the aggregation was done.",
671+
)
672+
parser.add_argument(
673+
"--views",
674+
default="agg-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup,raw-short",
675+
help="Views to add to the output files.",
676+
)
677+
parser.add_argument(
678+
"--csv",
679+
default="raw-short",
680+
help="Views to dump as csv files.",
681+
)
682+
parser.add_argument("-v", "--verbose", type=int, default=0, help="verbosity")
683+
return parser
684+
685+
686+
def _cmd_agg(argv: List[Any]):
687+
from .helpers.log_helper import CubeLogsPerformance, open_dataframe, enumerate_csv_files
688+
689+
parser = get_parser_agg()
690+
args = parser.parse_args(argv[1:])
691+
reg = re.compile(args.filter)
692+
693+
csv = list(
694+
enumerate_csv_files(
695+
args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
696+
)
697+
)
698+
assert csv, f"No csv files in {args.inputs}"
699+
if args.verbose:
700+
from tqdm import tqdm
701+
702+
loop = tqdm(csv)
703+
else:
704+
loop = csv
705+
dfs = []
706+
for c in loop:
707+
df = open_dataframe(c)
708+
assert args.time in df.columns, f"Missing time column {args.time!r} in {c.head()!r}"
709+
dfs.append(df)
710+
711+
cube = CubeLogsPerformance(
712+
dfs,
713+
time=args.time,
714+
keys=[a for a in args.keys.split(",") if a],
715+
values=[a for a in args.values.split(",") if a],
716+
ignored=[a for a in args.ignored.split(",") if a],
717+
recent=args.recent,
718+
formulas={k: k for k in args.formula.split(",")},
719+
)
720+
cube.load(verbose=max(args.verbose - 1, 0))
721+
if args.verbose:
722+
print(f"Dumps final file into {args.output!r}")
723+
cube.to_excel(
724+
args.output,
725+
{k: k for k in args.views.split(",")},
726+
verbose=args.verbose,
727+
csv=args.csv.split(","),
728+
raw=args.raw,
729+
)
730+
if args.verbose:
731+
print(f"Wrote {args.output!r}")
732+
733+
612734
def get_main_parser() -> ArgumentParser:
613735
parser = ArgumentParser(
614736
prog="onnx_diagnostic",
@@ -619,19 +741,29 @@ def get_main_parser() -> ArgumentParser:
619741
Type 'python -m onnx_diagnostic <cmd> --help'
620742
to get help for a specific command.
621743
744+
agg - aggregates statistics from multiple files
622745
config - prints a configuration for a model id
623746
find - find node consuming or producing a result
624747
lighten - makes an onnx model lighter by removing the weights,
625-
unlighten - restores an onnx model produces by the previous experiment
626748
print - prints the model on standard output
627-
validate - validate a model
628749
stats - produces statistics on a model
750+
unlighten - restores an onnx model produces by the previous experiment
751+
validate - validate a model
629752
"""
630753
),
631754
)
632755
parser.add_argument(
633756
"cmd",
634-
choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
757+
choices=[
758+
"agg",
759+
"config",
760+
"find",
761+
"lighten",
762+
"print",
763+
"stats",
764+
"unlighten",
765+
"validate",
766+
],
635767
help="Selects a command.",
636768
)
637769
return parser
@@ -646,6 +778,7 @@ def main(argv: Optional[List[Any]] = None):
646778
config=_cmd_config,
647779
validate=_cmd_validate,
648780
stats=_cmd_stats,
781+
agg=_cmd_agg,
649782
)
650783

651784
if argv is None:
@@ -667,6 +800,7 @@ def main(argv: Optional[List[Any]] = None):
667800
config=get_parser_config,
668801
validate=get_parser_validate,
669802
stats=get_parser_stats,
803+
agg=get_parser_agg,
670804
)
671805
cmd = argv[0]
672806
if cmd not in parsers:

0 commit comments

Comments
 (0)