sdpython
diff --git a/‎CHANGELOGS.rst‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOGS.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎_unittests/ut_helpers/data/data-agg.zip‎
28 KB b/‎_unittests/ut_helpers/data/data-agg.zip‎
28 KB
diff --git a/‎_unittests/ut_helpers/test_log_helper.py‎
Lines changed: 42 additions & 8 deletions b/‎_unittests/ut_helpers/test_log_helper.py‎
Lines changed: 42 additions & 8 deletions
diff --git a/‎_unittests/ut_xrun_doc/test_command_lines.py‎
Lines changed: 8 additions & 0 deletions b/‎_unittests/ut_xrun_doc/test_command_lines.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎onnx_diagnostic/_command_lines_parser.py‎
Lines changed: 137 additions & 3 deletions b/‎onnx_diagnostic/_command_lines_parser.py‎
Lines changed: 137 additions & 3 deletions
@@ -4,6 +4,7 @@ Change Logs
 0.7.1
 +++++
 
+* :pr:`151`: adds command line ``agg``, class CubeLogsPerformance to produce timeseries
 * :pr:`152`: add a function to compute fully dynamic shapes given any inputs
 
 0.7.0
 
@@ -7,6 +7,7 @@
 from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
 from onnx_diagnostic.helpers.log_helper import (
     CubeLogs,
+    CubeLogsPerformance,
     CubeViewDef,
     enumerate_csv_files,
     open_dataframe,
@@ -21,10 +22,10 @@ def df1(cls):
                 textwrap.dedent(
                     """
                     date,version_python,version_transformers,model_name,model_exporter,time_load,time_latency,time_baseline,disc_ort,disc_ort2
-                    2025/01/01,3.13.3,4.52.4,phi3,export,0.5,0.1,0.1,1e-5,1e-5
-                    2025/01/02,3.13.3,4.52.4,phi3,export,0.6,0.11,0.1,1e-5,1e-5
-                    2025/01/01,3.13.3,4.52.4,phi4,export,0.5,0.1,0.105,1e-5,1e-5
-                    2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.5,0.1,0.999,1e-5,1e-5
+                    2025/01/01,3.13.3,4.52.4,phi3,export,0.51,0.1,0.1,1e-5,1e-5
+                    2025/01/02,3.13.3,4.52.4,phi3,export,0.62,0.11,0.11,1e-5,1e-5
+                    2025/01/01,3.13.3,4.52.4,phi4,export,0.53,0.1,0.105,1e-5,1e-5
+                    2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.54,0.14,0.999,1e-5,1e-5
                     """
                 )
             )
@@ -98,7 +99,11 @@ def test_cube_logs_view_repr(self):
     def test_cube_logs_view(self):
         cube = self.cube1(verbose=1)
         view = cube.view(
-            CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
+            CubeViewDef(
+                ["version.*", "model_name"],
+                ["time_latency", "time_baseline"],
+                ignore_columns=["date"],
+            )
         )
         self.assertEqual((3, 4), view.shape)
         self.assertEqual(
@@ -116,7 +121,10 @@ def test_cube_logs_view(self):
 
         view = cube.view(
             CubeViewDef(
-                ["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
+                ["version.*"],
+                ["time_latency", "time_baseline"],
+                order=["model_exporter"],
+                ignore_columns=["date"],
             )
         )
         self.assertEqual((2, 6), view.shape)
@@ -139,12 +147,13 @@ def test_cube_logs_view_agg(self):
             CubeViewDef(
                 ["version.*", "model.*"],
                 ["time_latency", "time_baseline"],
-                key_agg=["model_name"],
+                key_agg=["model_name", "date"],
+                ignore_columns=["version_python"],
             )
         )
         self.assertEqual((2, 2), view.shape)
         self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
-        self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
+        self.assertEqual([("export",), ("onnx-dynamo",)], list(view.index))
 
     @hide_stdout()
     def test_cube_logs_excel(self):
@@ -166,6 +175,7 @@ def test_cube_logs_excel(self):
         )
         self.assertExists(output)
 
+    @hide_stdout()
     def test_enumerate_csv_files(self):
         df = self.df1()
         filename = self.get_dump_file("test_enumerate_csv_files.csv")
@@ -186,6 +196,30 @@ def test_enumerate_csv_files(self):
         self.assertEqual((3, 11), cube.shape)
         self.assertIn("RAWFILENAME", cube.data.columns)
 
+    def test_cube_logs_performance(self):
+        output = self.get_dump_file("test_cube_logs_performance.xlsx")
+        filename = os.path.join(os.path.dirname(__file__), "data", "data-agg.zip")
+        assert list(enumerate_csv_files(filename))
+        dfs = [open_dataframe(df) for df in enumerate_csv_files(filename)]
+        assert dfs, f"{filename!r} empty"
+        cube = CubeLogsPerformance(dfs)
+        cube.load()
+        cube.to_excel(
+            output,
+            views=[
+                "agg-suite",
+                "disc",
+                "speedup",
+                "time",
+                "time_export",
+                "err",
+                # "cmd",
+                "bucket-speedup",
+                "raw-short",
+            ],
+        )
+        self.assertExists(output)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
@@ -4,6 +4,7 @@
 from onnx_diagnostic.ext_test_case import ExtTestCase
 from onnx_diagnostic._command_lines_parser import (
     get_main_parser,
+    get_parser_agg,
     get_parser_config,
     get_parser_find,
     get_parser_lighten,
@@ -71,6 +72,13 @@ def test_parser_stats(self):
         text = st.getvalue()
         self.assertIn("input", text)
 
+    def test_parser_agg(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_agg().print_help()
+        text = st.getvalue()
+        self.assertIn("--recent", text)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
@@ -609,6 +609,128 @@ def _cmd_stats(argv: List[Any]):
         print("done.")
 
 
+def get_parser_agg() -> ArgumentParser:
+    parser = ArgumentParser(
+        prog="agg",
+        description=textwrap.dedent(
+            """
+            Aggregates statistics coming from benchmarks.
+            Every run is a row. Every row is indexed by some keys,
+            and produces values. Every row has a date.
+            """
+        ),
+        epilog="example\n  python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1",
+        formatter_class=RawTextHelpFormatter,
+    )
+    parser.add_argument("output", help="output excel file")
+    parser.add_argument(
+        "inputs",
+        nargs="+",
+        help="input csv or zip files, at least 1, it can be a name, or search path",
+    )
+    parser.add_argument(
+        "--filter", default="rawdata_.*.csv", help="filter for input files inside zip files"
+    )
+    parser.add_argument(
+        "--recent",
+        default=True,
+        action=BooleanOptionalAction,
+        help="Keeps only the most recent experiment for the same of keys.",
+    )
+    parser.add_argument(
+        "--raw",
+        default=True,
+        action=BooleanOptionalAction,
+        help="Keeps the raw data in a sheet.",
+    )
+    parser.add_argument("-t", "--time", default="DATE", help="Date or time column")
+    parser.add_argument(
+        "-k",
+        "--keys",
+        default="^version_.*,^model_.*,device,opt_patterns,suite,memory_peak,machine,exporter,dynamic,rtopt,dtype,device,architecture",
+        help="List of columns to consider as keys, "
+        "multiple values are separated by `,`\n"
+        "regular expressions are allowed",
+    )
+    parser.add_argument(
+        "-w",
+        "--values",
+        default="^time_.*,^disc.*,^ERR_.*,CMD,^ITER.*",
+        help="List of columns to consider as values, "
+        "multiple values are separated by `,`\n"
+        "regular expressions are allowed",
+    )
+    parser.add_argument(
+        "-i", "--ignored", default="version_python", help="List of columns to ignore"
+    )
+    parser.add_argument(
+        "-f",
+        "--formula",
+        default="speedup,bucket[speedup],ERR1",
+        help="Columns to compute after the aggregation was done.",
+    )
+    parser.add_argument(
+        "--views",
+        default="agg-suite,disc,speedup,time,time_export,err,cmd,bucket-speedup,raw-short",
+        help="Views to add to the output files.",
+    )
+    parser.add_argument(
+        "--csv",
+        default="raw-short",
+        help="Views to dump as csv files.",
+    )
+    parser.add_argument("-v", "--verbose", type=int, default=0, help="verbosity")
+    return parser
+
+
+def _cmd_agg(argv: List[Any]):
+    from .helpers.log_helper import CubeLogsPerformance, open_dataframe, enumerate_csv_files
+
+    parser = get_parser_agg()
+    args = parser.parse_args(argv[1:])
+    reg = re.compile(args.filter)
+
+    csv = list(
+        enumerate_csv_files(
+            args.inputs, verbose=args.verbose, filtering=lambda name: bool(reg.search(name))
+        )
+    )
+    assert csv, f"No csv files in {args.inputs}"
+    if args.verbose:
+        from tqdm import tqdm
+
+        loop = tqdm(csv)
+    else:
+        loop = csv
+    dfs = []
+    for c in loop:
+        df = open_dataframe(c)
+        assert args.time in df.columns, f"Missing time column {args.time!r} in {c.head()!r}"
+        dfs.append(df)
+
+    cube = CubeLogsPerformance(
+        dfs,
+        time=args.time,
+        keys=[a for a in args.keys.split(",") if a],
+        values=[a for a in args.values.split(",") if a],
+        ignored=[a for a in args.ignored.split(",") if a],
+        recent=args.recent,
+        formulas={k: k for k in args.formula.split(",")},
+    )
+    cube.load(verbose=max(args.verbose - 1, 0))
+    if args.verbose:
+        print(f"Dumps final file into {args.output!r}")
+    cube.to_excel(
+        args.output,
+        {k: k for k in args.views.split(",")},
+        verbose=args.verbose,
+        csv=args.csv.split(","),
+        raw=args.raw,
+    )
+    if args.verbose:
+        print(f"Wrote {args.output!r}")
+
+
 def get_main_parser() -> ArgumentParser:
     parser = ArgumentParser(
         prog="onnx_diagnostic",
@@ -619,19 +741,29 @@ def get_main_parser() -> ArgumentParser:
             Type 'python -m onnx_diagnostic <cmd> --help'
             to get help for a specific command.
 
+            agg        - aggregates statistics from multiple files
             config     - prints a configuration for a model id
             find       - find node consuming or producing a result
             lighten    - makes an onnx model lighter by removing the weights,
-            unlighten  - restores an onnx model produces by the previous experiment
             print      - prints the model on standard output
-            validate   - validate a model
             stats      - produces statistics on a model
+            unlighten  - restores an onnx model produces by the previous experiment
+            validate   - validate a model
             """
         ),
     )
     parser.add_argument(
         "cmd",
-        choices=["config", "find", "lighten", "print", "stats", "unlighten", "validate"],
+        choices=[
+            "agg",
+            "config",
+            "find",
+            "lighten",
+            "print",
+            "stats",
+            "unlighten",
+            "validate",
+        ],
         help="Selects a command.",
     )
     return parser
@@ -646,6 +778,7 @@ def main(argv: Optional[List[Any]] = None):
         config=_cmd_config,
         validate=_cmd_validate,
         stats=_cmd_stats,
+        agg=_cmd_agg,
     )
 
     if argv is None:
@@ -667,6 +800,7 @@ def main(argv: Optional[List[Any]] = None):
                 config=get_parser_config,
                 validate=get_parser_validate,
                 stats=get_parser_stats,
+                agg=get_parser_agg,
             )
             cmd = argv[0]
             if cmd not in parsers: