Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Change Logs
0.7.1
+++++

* :pr:`151`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
* :pr:`152`: add a function to compute fully dynamic shapes given any inputs

0.7.0
Expand Down
Binary file added _unittests/ut_helpers/data/data-agg.zip
Binary file not shown.
50 changes: 42 additions & 8 deletions _unittests/ut_helpers/test_log_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
from onnx_diagnostic.helpers.log_helper import (
CubeLogs,
CubeLogsPerformance,
CubeViewDef,
enumerate_csv_files,
open_dataframe,
Expand All @@ -21,10 +22,10 @@ def df1(cls):
textwrap.dedent(
"""
date,version_python,version_transformers,model_name,model_exporter,time_load,time_latency,time_baseline,disc_ort,disc_ort2
2025/01/01,3.13.3,4.52.4,phi3,export,0.5,0.1,0.1,1e-5,1e-5
2025/01/02,3.13.3,4.52.4,phi3,export,0.6,0.11,0.1,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi4,export,0.5,0.1,0.105,1e-5,1e-5
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.5,0.1,0.999,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi3,export,0.51,0.1,0.1,1e-5,1e-5
2025/01/02,3.13.3,4.52.4,phi3,export,0.62,0.11,0.11,1e-5,1e-5
2025/01/01,3.13.3,4.52.4,phi4,export,0.53,0.1,0.105,1e-5,1e-5
2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.54,0.14,0.999,1e-5,1e-5
"""
)
)
Expand Down Expand Up @@ -98,7 +99,11 @@ def test_cube_logs_view_repr(self):
def test_cube_logs_view(self):
cube = self.cube1(verbose=1)
view = cube.view(
CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
CubeViewDef(
["version.*", "model_name"],
["time_latency", "time_baseline"],
ignore_columns=["date"],
)
)
self.assertEqual((3, 4), view.shape)
self.assertEqual(
Expand All @@ -116,7 +121,10 @@ def test_cube_logs_view(self):

view = cube.view(
CubeViewDef(
["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
["version.*"],
["time_latency", "time_baseline"],
order=["model_exporter"],
ignore_columns=["date"],
)
)
self.assertEqual((2, 6), view.shape)
Expand All @@ -139,12 +147,13 @@ def test_cube_logs_view_agg(self):
CubeViewDef(
["version.*", "model.*"],
["time_latency", "time_baseline"],
key_agg=["model_name"],
key_agg=["model_name", "date"],
ignore_columns=["version_python"],
)
)
self.assertEqual((2, 2), view.shape)
self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
self.assertEqual([("export",), ("onnx-dynamo",)], list(view.index))

@hide_stdout()
def test_cube_logs_excel(self):
Expand All @@ -166,6 +175,7 @@ def test_cube_logs_excel(self):
)
self.assertExists(output)

@hide_stdout()
def test_enumerate_csv_files(self):
df = self.df1()
filename = self.get_dump_file("test_enumerate_csv_files.csv")
Expand All @@ -186,6 +196,30 @@ def test_enumerate_csv_files(self):
self.assertEqual((3, 11), cube.shape)
self.assertIn("RAWFILENAME", cube.data.columns)

def test_cube_logs_performance(self):
output = self.get_dump_file("test_cube_logs_performance.xlsx")
filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
assert list(enumerate_csv_files(filename))
dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
assert dfs, f"{filename!r} empty"
cube = CubeLogsPerformance(dfs)
cube.load()
cube.to_excel(
output,
views=[
"agg-suite",
"disc",
"speedup",
"time",
"time_export",
"err",
# "cmd",
"bucket-speedup",
"raw-short",
],
)
self.assertExists(output)


if __name__ == "__main__":
unittest.main(verbosity=2)
8 changes: 8 additions & 0 deletions _unittests/ut_xrun_doc/test_command_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from onnx_diagnostic.ext_test_case import ExtTestCase
from onnx_diagnostic._command_lines_parser import (
get_main_parser,
get_parser_agg,
get_parser_config,
get_parser_find,
get_parser_lighten,
Expand Down Expand Up @@ -71,6 +72,13 @@ def test_parser_stats(self):
text = st.getvalue()
self.assertIn("input", text)

def test_parser_agg(self):
st = StringIO()
with redirect_stdout(st):
get_parser_agg().print_help()
text = st.getvalue()
self.assertIn("--recent", text)


if __name__ == "__main__":
unittest.main(verbosity=2)
140 changes: 137 additions & 3 deletions onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,128 @@ def _cmd_stats(argv: List[Any]):
print("done.")


def get_parser_agg() -> ArgumentParser:
parser = ArgumentParser(
prog="agg",
description=textwrap.dedent(
"""
Aggregates statistics coming from benchmarks.
Every run is a row. Every row is indexed by some keys,
and produces values. Every row has a date.
"""
),
epilog="example\n python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1",
formatter_class=RawTextHelpFormatter,
)
parser.add_argument("output", help="output excel file")
parser.add_argument(
"inputs",
nargs="+",
help="input csv or zip files, at least 1, it can be a name, or search path",
)
parser.add_argument(
"--filter", default="rawdata_.*.csv", help="filter for input files inside zip files"
)
parser.add_argument(
"--recent",
default=True,
action=BooleanOptionalAction,
help="Keeps only the most recent experiment for the same of keys.",
)
parser.add_argument(
"--raw",
default=True,
action=BooleanOptionalAction,
help="Keeps the raw data in a sheet.",
)
parser.add_argument("-t", "--time", default="DATE", help="Date or time column")
parser.add_argument(
"-k",
"--keys",
default="^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture",
help="List of columns to consider as keys, "
"multiple values are separated by `,`\n"
"regular expressions are allowed",
)
parser.add_argument(
"-w",
"--values",
default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*",
help="List of columns to consider as values, "
"multiple values are separated by `,`\n"
"regular expressions are allowed",
)
parser.add_argument(
"-i", "--ignored", default="version_python", help="List of columns to ignore"
)
parser.add_argument(
"-f",
"--formula",
default="speedup,bucket[speedup],ERR1",
help="Columns to compute after the aggregation was done.",
)
parser.add_argument(
"--views",
default="agg-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup,raw-short",
help="Views to add to the output files.",
)
parser.add_argument(
"--csv",
default="raw-short",
help="Views to dump as csv files.",
)
parser.add_argument("-v", "--verbose", type=int, default=0, help="verbosity")
return parser


def _cmd_agg(argv: List[Any]):
from .helpers.log_helper import CubeLogsPerformance, open_dataframe, enumerate_csv_files

parser = get_parser_agg()
args = parser.parse_args(argv[1:])
reg = re.compile(args.filter)

csv = list(
enumerate_csv_files(
args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
)
)
assert csv, f"No csv files in {args.inputs}"
if args.verbose:
from tqdm import tqdm

loop = tqdm(csv)
else:
loop = csv
dfs = []
for c in loop:
df = open_dataframe(c)
assert args.time in df.columns, f"Missing time column {args.time!r} in {c.head()!r}"
dfs.append(df)

cube = CubeLogsPerformance(
dfs,
time=args.time,
keys=[a for a in args.keys.split(",") if a],
values=[a for a in args.values.split(",") if a],
ignored=[a for a in args.ignored.split(",") if a],
recent=args.recent,
formulas={k: k for k in args.formula.split(",")},
)
cube.load(verbose=max(args.verbose - 1, 0))
if args.verbose:
print(f"Dumps final file into {args.output!r}")
cube.to_excel(
args.output,
{k: k for k in args.views.split(",")},
verbose=args.verbose,
csv=args.csv.split(","),
raw=args.raw,
)
if args.verbose:
print(f"Wrote {args.output!r}")


def get_main_parser() -> ArgumentParser:
parser = ArgumentParser(
prog="onnx_diagnostic",
Expand All @@ -619,19 +741,29 @@ def get_main_parser() -> ArgumentParser:
Type 'python -m onnx_diagnostic <cmd> --help'
to get help for a specific command.

agg - aggregates statistics from multiple files
config - prints a configuration for a model id
find - find node consuming or producing a result
lighten - makes an onnx model lighter by removing the weights,
unlighten - restores an onnx model produces by the previous experiment
print - prints the model on standard output
validate - validate a model
stats - produces statistics on a model
unlighten - restores an onnx model produces by the previous experiment
validate - validate a model
"""
),
)
parser.add_argument(
"cmd",
choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
choices=[
"agg",
"config",
"find",
"lighten",
"print",
"stats",
"unlighten",
"validate",
],
help="Selects a command.",
)
return parser
Expand All @@ -646,6 +778,7 @@ def main(argv: Optional[List[Any]] = None):
config=_cmd_config,
validate=_cmd_validate,
stats=_cmd_stats,
agg=_cmd_agg,
)

if argv is None:
Expand All @@ -667,6 +800,7 @@ def main(argv: Optional[List[Any]] = None):
config=get_parser_config,
validate=get_parser_validate,
stats=get_parser_stats,
agg=get_parser_agg,
)
cmd = argv[0]
if cmd not in parsers:
Expand Down
Loading
Loading