|
| 1 | +import io |
| 2 | +import os |
| 3 | +import textwrap |
| 4 | +import unittest |
| 5 | +import zipfile |
| 6 | +import pandas |
| 7 | +from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout |
| 8 | +from onnx_diagnostic.helpers.log_helper import ( |
| 9 | + CubeLogs, |
| 10 | + CubeViewDef, |
| 11 | + enumerate_csv_files, |
| 12 | + open_dataframe, |
| 13 | +) |
| 14 | + |
| 15 | + |
| 16 | +class TestLogHelper(ExtTestCase): |
| 17 | + @classmethod |
| 18 | + def df1(cls): |
| 19 | + return pandas.read_csv( |
| 20 | + io.StringIO( |
| 21 | + textwrap.dedent( |
| 22 | + """ |
| 23 | + date,version_python,version_transformers,model_name,model_exporter,time_load,time_latency,time_baseline,disc_ort,disc_ort2 |
| 24 | + 2025/01/01,3.13.3,4.52.4,phi3,export,0.5,0.1,0.1,1e-5,1e-5 |
| 25 | + 2025/01/02,3.13.3,4.52.4,phi3,export,0.6,0.11,0.1,1e-5,1e-5 |
| 26 | + 2025/01/01,3.13.3,4.52.4,phi4,export,0.5,0.1,0.105,1e-5,1e-5 |
| 27 | + 2025/01/01,3.12.3,4.52.4,phi4,onnx-dynamo,0.5,0.1,0.999,1e-5,1e-5 |
| 28 | + """ |
| 29 | + ) |
| 30 | + ) |
| 31 | + ) |
| 32 | + |
| 33 | + @classmethod |
| 34 | + def cube1(cls, verbose=0): |
| 35 | + cube = CubeLogs( |
| 36 | + cls.df1(), |
| 37 | + recent=True, |
| 38 | + formulas={"speedup": lambda df: df["time_baseline"] / df["time_baseline"]}, |
| 39 | + ) |
| 40 | + return cube.load(verbose=verbose) |
| 41 | + |
| 42 | + @hide_stdout() |
| 43 | + def test_cube_logs_load_df(self): |
| 44 | + df = self.df1() |
| 45 | + cube = CubeLogs(df) |
| 46 | + text = str(cube) |
| 47 | + self.assertIsInstance(text, str) |
| 48 | + self.assertRaise(lambda: cube.load(verbose=1), AssertionError) |
| 49 | + cube = CubeLogs( |
| 50 | + self.df1(), |
| 51 | + recent=True, |
| 52 | + formulas={"speedup": lambda df: df["time_baseline"] / df["time_baseline"]}, |
| 53 | + ) |
| 54 | + cube.load(verbose=1) |
| 55 | + text = str(cube) |
| 56 | + self.assertIsInstance(text, str) |
| 57 | + self.assertEqual((3, df.shape[1] + 1), cube.shape) |
| 58 | + self.assertEqual(set(cube.columns), {*df.columns, "speedup"}) |
| 59 | + |
| 60 | + @hide_stdout() |
| 61 | + def test_cube_logs_load_dfdf(self): |
| 62 | + df = self.df1() |
| 63 | + cube = CubeLogs([df, df], recent=True) |
| 64 | + cube.load(verbose=1) |
| 65 | + self.assertEqual((3, 10), cube.shape) |
| 66 | + |
| 67 | + @hide_stdout() |
| 68 | + def test_cube_logs_load_list(self): |
| 69 | + cube = CubeLogs( |
| 70 | + [ |
| 71 | + dict( |
| 72 | + date="1/1/2001", |
| 73 | + version_python="3.13", |
| 74 | + model_exporter="A", |
| 75 | + time_latency=5.6, |
| 76 | + ), |
| 77 | + dict( |
| 78 | + date="1/1/2001", |
| 79 | + version_python="3.13", |
| 80 | + model_exporter="B", |
| 81 | + time_latency=5.7, |
| 82 | + ), |
| 83 | + ] |
| 84 | + ) |
| 85 | + cube.load(verbose=1) |
| 86 | + self.assertEqual((2, 4), cube.shape) |
| 87 | + |
| 88 | + def test_cube_logs_view_repr(self): |
| 89 | + v = CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"]) |
| 90 | + r = repr(v) |
| 91 | + self.assertEqual( |
| 92 | + "CubeViewDef(key_index=['version.*', 'model_name'], " |
| 93 | + "values=['time_latency', 'time_baseline'])", |
| 94 | + r, |
| 95 | + ) |
| 96 | + |
| 97 | + @hide_stdout() |
| 98 | + def test_cube_logs_view(self): |
| 99 | + cube = self.cube1(verbose=1) |
| 100 | + view = cube.view( |
| 101 | + CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"]) |
| 102 | + ) |
| 103 | + self.assertEqual((3, 4), view.shape) |
| 104 | + self.assertEqual( |
| 105 | + [ |
| 106 | + ("time_baseline", "export"), |
| 107 | + ("time_baseline", "onnx-dynamo"), |
| 108 | + ("time_latency", "export"), |
| 109 | + ("time_latency", "onnx-dynamo"), |
| 110 | + ], |
| 111 | + list(view.columns), |
| 112 | + ) |
| 113 | + self.assertEqual( |
| 114 | + [("3.12.3", "phi4"), ("3.13.3", "phi3"), ("3.13.3", "phi4")], list(view.index) |
| 115 | + ) |
| 116 | + |
| 117 | + view = cube.view( |
| 118 | + CubeViewDef( |
| 119 | + ["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"] |
| 120 | + ) |
| 121 | + ) |
| 122 | + self.assertEqual((2, 6), view.shape) |
| 123 | + self.assertEqual( |
| 124 | + [ |
| 125 | + ("time_baseline", "export", "phi3"), |
| 126 | + ("time_baseline", "export", "phi4"), |
| 127 | + ("time_baseline", "onnx-dynamo", "phi4"), |
| 128 | + ("time_latency", "export", "phi3"), |
| 129 | + ("time_latency", "export", "phi4"), |
| 130 | + ("time_latency", "onnx-dynamo", "phi4"), |
| 131 | + ], |
| 132 | + list(view.columns), |
| 133 | + ) |
| 134 | + self.assertEqual(["3.12.3", "3.13.3"], list(view.index)) |
| 135 | + |
| 136 | + def test_cube_logs_view_agg(self): |
| 137 | + cube = self.cube1(verbose=0) |
| 138 | + view = cube.view( |
| 139 | + CubeViewDef( |
| 140 | + ["version.*", "model.*"], |
| 141 | + ["time_latency", "time_baseline"], |
| 142 | + key_agg=["model_name"], |
| 143 | + ) |
| 144 | + ) |
| 145 | + self.assertEqual((2, 2), view.shape) |
| 146 | + self.assertEqual(["time_baseline", "time_latency"], list(view.columns)) |
| 147 | + self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index)) |
| 148 | + |
| 149 | + @hide_stdout() |
| 150 | + def test_cube_logs_excel(self): |
| 151 | + output = self.get_dump_file("test_cube_logs_excel.xlsx") |
| 152 | + cube = self.cube1(verbose=0) |
| 153 | + cube.to_excel( |
| 154 | + output, |
| 155 | + { |
| 156 | + "example": CubeViewDef( |
| 157 | + ["version.*", "model_name"], ["time_latency", "time_baseline"] |
| 158 | + ), |
| 159 | + "agg": CubeViewDef( |
| 160 | + ["version.*", "model.*"], |
| 161 | + ["time_latency", "time_baseline"], |
| 162 | + key_agg=["model_name"], |
| 163 | + ), |
| 164 | + }, |
| 165 | + verbose=1, |
| 166 | + ) |
| 167 | + self.assertExists(output) |
| 168 | + |
| 169 | + def test_enumerate_csv_files(self): |
| 170 | + df = self.df1() |
| 171 | + filename = self.get_dump_file("test_enumerate_csv_files.csv") |
| 172 | + df.to_csv(filename, index=False) |
| 173 | + zip_file = self.get_dump_file("test_enumerate_csv_files.zip") |
| 174 | + with zipfile.ZipFile(zip_file, "w", zipfile.ZIP_DEFLATED) as zipf: |
| 175 | + zipf.write(filename) |
| 176 | + |
| 177 | + dirname = os.path.dirname(filename) |
| 178 | + data = [os.path.join(dirname, "*.csv"), os.path.join(dirname, "*.zip")] |
| 179 | + dfs = list(enumerate_csv_files(data, verbose=1)) |
| 180 | + self.assertNotEmpty(dfs) |
| 181 | + for df in dfs: |
| 182 | + open_dataframe(df) |
| 183 | + |
| 184 | + cube = CubeLogs(data, recent=True) |
| 185 | + cube.load(verbose=1) |
| 186 | + self.assertEqual((3, 11), cube.shape) |
| 187 | + self.assertIn("RAWFILENAME", cube.data.columns) |
| 188 | + |
| 189 | + |
| 190 | +if __name__ == "__main__": |
| 191 | + unittest.main(verbosity=2) |
0 commit comments