Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOGS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Change Logs
0.7.1
+++++

* :pr:`151`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
* :pr:`155`: better aggregation of historical data
* :pr:`151`, :pr:`153`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
* :pr:`152`: add a function to compute fully dynamic shapes given any inputs

0.7.0
Expand Down
63 changes: 56 additions & 7 deletions _unittests/ut_helpers/test_log_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import textwrap
import unittest
import zipfile
import numpy as np
import pandas
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
from onnx_diagnostic.helpers.log_helper import (
Expand Down Expand Up @@ -46,7 +47,6 @@ def test_cube_logs_load_df(self):
cube = CubeLogs(df)
text = str(cube)
self.assertIsInstance(text, str)
self.assertRaise(lambda: cube.load(verbose=1), AssertionError)
cube = CubeLogs(
self.df1(),
recent=True,
Expand Down Expand Up @@ -130,12 +130,12 @@ def test_cube_logs_view(self):
self.assertEqual((2, 6), view.shape)
self.assertEqual(
[
("time_baseline", "export", "phi3"),
("time_baseline", "export", "phi4"),
("time_baseline", "onnx-dynamo", "phi4"),
("time_latency", "export", "phi3"),
("time_latency", "export", "phi4"),
("time_latency", "onnx-dynamo", "phi4"),
("time_baseline", "phi3", "export"),
("time_baseline", "phi4", "export"),
("time_baseline", "phi4", "onnx-dynamo"),
("time_latency", "phi3", "export"),
("time_latency", "phi4", "export"),
("time_latency", "phi4", "onnx-dynamo"),
],
list(view.columns),
)
Expand Down Expand Up @@ -229,6 +229,55 @@ def test_cube_logs_performance(self):
)
self.assertExists(output)

def test_duplicate(self):
df = pandas.DataFrame(
[
dict(date="2025/01/01", time_engine=0.5, model_name="A", version_engine="0.5"),
dict(date="2025/01/01", time_engine=0.5, model_name="A", version_engine="0.5"),
]
)
cube = CubeLogs(df)
self.assertRaise(lambda: cube.load(), AssertionError)
CubeLogs(df, recent=True).load()

def test_historical(self):
# case 1
df = pandas.DataFrame(
[
dict(date="2025/01/01", time_p=0.51, exporter="E1", m_name="A", m_cls="CA"),
dict(date="2025/01/02", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
dict(date="2025/01/01", time_p=0.53, exporter="E2", m_name="A", m_cls="CA"),
dict(date="2025/01/02", time_p=0.64, exporter="E2", m_name="A", m_cls="CA"),
dict(date="2025/01/01", time_p=0.55, exporter="E2", m_name="B", m_cls="CA"),
dict(date="2025/01/02", time_p=0.66, exporter="E2", m_name="B", m_cls="CA"),
]
)
cube = CubeLogs(df, keys=["^m_*", "exporter"]).load()
view, view_def = cube.view(CubeViewDef(["^m_.*"], ["^time_.*"]), return_view_def=True)
self.assertEqual(
"CubeViewDef(key_index=['^m_.*'], values=['^time_.*'])", repr(view_def)
)
self.assertEqual(["METRICS", "exporter", "date"], view.columns.names)
got = view.values.ravel()
self.assertEqual(
sorted([0.51, 0.62, 0.53, 0.64, -1, -1, 0.55, 0.66]),
sorted(np.where(np.isnan(got), -1, got).tolist()),
)

# case 2
df = pandas.DataFrame(
[
dict(date="2025/01/02", time_p=0.62, exporter="E1", m_name="A", m_cls="CA"),
dict(date="2025/01/02", time_p=0.64, exporter="E2", m_name="A", m_cls="CA"),
dict(date="2025/01/01", time_p=0.51, exporter="E1", m_name="B", m_cls="CA"),
dict(date="2025/01/02", time_p=0.66, exporter="E2", m_name="B", m_cls="CA"),
]
)
cube = CubeLogs(df, keys=["^m_*", "exporter"]).load()
view, view_def = cube.view(CubeViewDef(["^m_.*"], ["^time_.*"]), return_view_def=True)
self.assertEqual((2, 3), view.shape)
self.assertEqual(["METRICS", "exporter", "date"], view.columns.names)


if __name__ == "__main__":
unittest.main(verbosity=2)
26 changes: 15 additions & 11 deletions onnx_diagnostic/_command_lines_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,29 +656,31 @@ def get_parser_agg() -> ArgumentParser:
parser.add_argument(
"-w",
"--values",
default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*,^onnx_.*,^op_onnx_.*",
default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*,^onnx_.*,^op_onnx_.*,^peak_gpu_.*",
help="List of columns to consider as values, "
"multiple values are separated by `,`\n"
"regular expressions are allowed",
)
parser.add_argument(
"-i", "--ignored", default="version_python", help="List of columns to ignore"
"-i", "--ignored", default="^version_.*", help="List of columns to ignore"
)
parser.add_argument(
"-f",
"--formula",
default="speedup,bucket[speedup],ERR1,n_models,n_eager,"
"n_running,n_acc01,n_acc001,n_dynamic,n_pass,n_faster,"
"n_faster2x,n_faster3x,n_faster4x,n_attention,"
"peak_gpu_torch,peak_gpu_nvidia,n_control_flow,"
"n_constant,n_shape,n_expand,"
"n_function,n_initializer,n_scatter,time_export_unbiased",
default="speedup,bucket[speedup],ERR1,n_models,n_model_eager,"
"n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic,"
"n_model_pass,n_model_faster,"
"n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention,"
"peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow,"
"n_node_constant,n_node_shape,n_node_expand,"
"n_node_function,n_node_initializer,n_node_scatter,"
"time_export_unbiased",
help="Columns to compute after the aggregation was done.",
)
parser.add_argument(
"--views",
default="agg-suite,disc,speedup,time,time_export,err,cmd,"
"bucket-speedup,raw-short,counts",
"bucket-speedup,raw-short,counts,peak-gpu",
help="Views to add to the output files.",
)
parser.add_argument(
Expand All @@ -702,7 +704,7 @@ def _cmd_agg(argv: List[Any]):
args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
)
)
assert csv, f"No csv files in {args.inputs}"
assert csv, f"No csv files in {args.inputs}, csv={csv}"
if args.verbose:
from tqdm import tqdm

Expand All @@ -712,7 +714,9 @@ def _cmd_agg(argv: List[Any]):
dfs = []
for c in loop:
df = open_dataframe(c)
assert args.time in df.columns, f"Missing time column {args.time!r} in {c.head()!r}"
assert (
args.time in df.columns
), f"Missing time column {args.time!r} in {c!r}\n{df.head()}\n{sorted(df.columns)}"
dfs.append(df)

cube = CubeLogsPerformance(
Expand Down
Loading
Loading