Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.7.1
+++++

* :pr:`151`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
* :pr:`152`: add a function to compute fully dynamic shapes given any inputs

0.7.0
Expand Down
Binary file added _unittests/ut_helpers/data/data-agg.zip
Binary file not shown.
50 changes: 42 additions & 8 deletions _unittests/ut_helpers/test_log_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
from onnx_diagnostic.helpers.log_helper import (
CubeLogs,
CubeLogsPerformance,
CubeViewDef,
enumerate_csv_files,
open_dataframe,
Expand All @@ -21,10 +22,10 @@ def df1(cls):
textwrap.dedent(
"""
date,version_python,version_transformers,model_name,model_exporter,time_load,time_latency,time_baseline,disc_ort,disc_ort2
2025/01/01,3.13.3,4.52.4,phi3,export,0.5,0.1,0.1,1e-5,1e-5
2025/01/02,3.13.3,4.52.4,phi3,export,0.6,0.11,0.1,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi4,export,0.5,0.1,0.105,1e-5,1e-5
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.5,0.1,0.999,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi3,export,0.51,0.1,0.1,1e-5,1e-5
2025/01/02,3.13.3,4.52.4,phi3,export,0.62,0.11,0.11,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi4,export,0.53,0.1,0.105,1e-5,1e-5
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.54,0.14,0.999,1e-5,1e-5
"""
)
)
Expand Down Expand Up @@ -98,7 +99,11 @@ def test_cube_logs_view_repr(self):
def test_cube_logs_view(self):
cube = self.cube1(verbose=1)
view = cube.view(
CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
CubeViewDef(
["version.*", "model_name"],
["time_latency", "time_baseline"],
ignore_columns=["date"],
)
)
self.assertEqual((3, 4), view.shape)
self.assertEqual(
Expand All @@ -116,7 +121,10 @@ def test_cube_logs_view(self):

view = cube.view(
CubeViewDef(
["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
["version.*"],
["time_latency", "time_baseline"],
order=["model_exporter"],
ignore_columns=["date"],
)
)
self.assertEqual((2, 6), view.shape)
Expand All @@ -139,12 +147,13 @@ def test_cube_logs_view_agg(self):
CubeViewDef(
["version.*", "model.*"],
["time_latency", "time_baseline"],
key_agg=["model_name"],
key_agg=["model_name", "date"],
ignore_columns=["version_python"],
)
)
self.assertEqual((2, 2), view.shape)
self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
self.assertEqual([("export",), ("onnx-dynamo",)], list(view.index))

@hide_stdout()
def test_cube_logs_excel(self):
Expand All @@ -166,6 +175,7 @@ def test_cube_logs_excel(self):
)
self.assertExists(output)

@hide_stdout()
def test_enumerate_csv_files(self):
df = self.df1()
filename = self.get_dump_file("test_enumerate_csv_files.csv")
Expand All @@ -186,6 +196,30 @@ def test_enumerate_csv_files(self):
self.assertEqual((3, 11), cube.shape)
self.assertIn("RAWFILENAME", cube.data.columns)

def test_cube_logs_performance(self):
output = self.get_dump_file("test_cube_logs_performance.xlsx")
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
assert list(enumerate_csv_files(filename))
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
assert dfs, f"{filename!r} empty"
cube = CubeLogsPerformance(dfs)
cube.load()
cube.to_excel(
output,
views=[
"agg-suite",
"disc",
"speedup",
"time",
"time_export",
"err",
# "cmd",
"bucket-speedup",
"raw-short",
],
)
self.assertExists(output)


if __name__ == "__main__":
unittest.main(verbosity=2)
8 changes: 8 additions & 0 deletions _unittests/ut_xrun_doc/test_command_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from onnx_diagnostic.ext_test_case import ExtTestCase
from onnx_diagnostic._command_lines_parser import (
get_main_parser,
get_parser_agg,
get_parser_config,
get_parser_find,
get_parser_lighten,
Expand Down Expand Up @@ -71,6 +72,13 @@ def test_parser_stats(self):
text = st.getvalue()
self.assertIn("input", text)

def test_parser_agg(self):
st = StringIO()
with redirect_stdout(st):
get_parser_agg().print_help()
text = st.getvalue()
self.assertIn("--recent", text)


if __name__ == "__main__":
unittest.main(verbosity=2)
140 changes: 137 additions & 3 deletions onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,128 @@ def _cmd_stats(argv: List[Any]):
print("done.")


def get_parser_agg() -> ArgumentParser:
parser = ArgumentParser(
prog="agg",
description=textwrap.dedent(
"""
Aggregates statistics coming from benchmarks.
Every run is a row. Every row is indexed by some keys,
and produces values. Every row has a date.
"""
),
epilog="example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument("output", help="output excel file")
parser.add_argument(
"inputs",
nargs="+",
help="input csv or zip files, at least 1, it can be a name, or search path",
)
parser.add_argument(
"--filter", default="rawdata_.*.csv", help="filter for input files inside zip files"
)
parser.add_argument(
"--recent",
default=True,
action=BooleanOptionalAction,
help="Keeps only the most recent experiment for the same of keys.",
)
parser.add_argument(
"--raw",
default=True,
action=BooleanOptionalAction,
help="Keeps the raw data in a sheet.",
)
parser.add_argument("-t", "--time", default="DATE", help="Date or time column")
parser.add_argument(
"-k",
"--keys",
default="^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture",
help="List of columns to consider as keys, "
"multiple values are separated by `,`\n"
"regular expressions are allowed",
)
parser.add_argument(
"-w",
"--values",
default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*",
help="List of columns to consider as values, "
"multiple values are separated by `,`\n"
"regular expressions are allowed",
)
parser.add_argument(
"-i", "--ignored", default="version_python", help="List of columns to ignore"
)
parser.add_argument(
"-f",
"--formula",
default="speedup,bucket[speedup],ERR1",
help="Columns to compute after the aggregation was done.",
)
parser.add_argument(
"--views",
default="agg-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup,raw-short",
help="Views to add to the output files.",
)
parser.add_argument(
"--csv",
default="raw-short",
help="Views to dump as csv files.",
)
parser.add_argument("-v", "--verbose", type=int, default=0, help="verbosity")
return parser


def _cmd_agg(argv: List[Any]):
from .helpers.log_helper import CubeLogsPerformance, open_dataframe, enumerate_csv_files

parser = get_parser_agg()
args = parser.parse_args(argv[1:])
reg = re.compile(args.filter)

csv = list(
enumerate_csv_files(
args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
)
)
assert csv, f"No csv files in {args.inputs}"
if args.verbose:
from tqdm import tqdm

loop = tqdm(csv)
else:
loop = csv
dfs = []
for c in loop:
df = open_dataframe(c)
assert args.time in df.columns, f"Missing time column {args.time!r} in {c.head()!r}"
dfs.append(df)

cube = CubeLogsPerformance(
dfs,
time=args.time,
keys=[a for a in args.keys.split(",") if a],
values=[a for a in args.values.split(",") if a],
ignored=[a for a in args.ignored.split(",") if a],
recent=args.recent,
formulas={k: k for k in args.formula.split(",")},
)
cube.load(verbose=max(args.verbose - 1, 0))
if args.verbose:
print(f"Dumps final file into {args.output!r}")
cube.to_excel(
args.output,
{k: k for k in args.views.split(",")},
verbose=args.verbose,
csv=args.csv.split(","),
raw=args.raw,
)
if args.verbose:
print(f"Wrote {args.output!r}")


def get_main_parser() -> ArgumentParser:
parser = ArgumentParser(
prog="onnx_diagnostic",
Expand All @@ -619,19 +741,29 @@ def get_main_parser() -> ArgumentParser:
Type 'python -m onnx_diagnostic <cmd> --help'
to get help for a specific command.

agg - aggregates statistics from multiple files
config - prints a configuration for a model id
find - find node consuming or producing a result
lighten - makes an onnx model lighter by removing the weights,
unlighten - restores an onnx model produces by the previous experiment
print - prints the model on standard output
validate - validate a model
stats - produces statistics on a model
unlighten - restores an onnx model produces by the previous experiment
validate - validate a model
"""
),
)
parser.add_argument(
"cmd",
choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
choices=[
"agg",
"config",
"find",
"lighten",
"print",
"stats",
"unlighten",
"validate",
],
help="Selects a command.",
)
return parser
Expand All @@ -646,6 +778,7 @@ def main(argv: Optional[List[Any]] = None):
config=_cmd_config,
validate=_cmd_validate,
stats=_cmd_stats,
agg=_cmd_agg,
)

if argv is None:
Expand All @@ -667,6 +800,7 @@ def main(argv: Optional[List[Any]] = None):
config=get_parser_config,
validate=get_parser_validate,
stats=get_parser_stats,
agg=get_parser_agg,
)
cmd = argv[0]
if cmd not in parsers:
Expand Down
Loading
Loading