Skip to content

Commit 90a15c6

Browse files
committed
add view agg
1 parent 7c046a7 commit 90a15c6

File tree

2 files changed

+150
-32
lines changed

2 files changed

+150
-32
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
import unittest
44
import pandas
55
from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
6-
from onnx_diagnostic.helpers.log_helper import CubeLogs
6+
from onnx_diagnostic.helpers.log_helper import CubeLogs, CubeViewDef
77

88

99
class TestLogHelper(ExtTestCase):
10-
@hide_stdout()
11-
def test_cube_logs(self):
12-
df = pandas.read_csv(
10+
@classmethod
11+
def df1(cls):
12+
return pandas.read_csv(
1313
io.StringIO(
1414
textwrap.dedent(
1515
"""
@@ -22,12 +22,25 @@ def test_cube_logs(self):
2222
)
2323
)
2424
)
25+
26+
@classmethod
27+
def cube1(cls, verbose=0):
28+
cube = CubeLogs(
29+
cls.df1(),
30+
recent=True,
31+
formulas={"speedup": lambda df: df["time_baseline"] / df["time_baseline"]},
32+
)
33+
return cube.load(verbose=verbose)
34+
35+
@hide_stdout()
36+
def test_cube_logs_load_df(self):
37+
df = self.df1()
2538
cube = CubeLogs(df)
2639
text = str(cube)
2740
self.assertIsInstance(text, str)
2841
self.assertRaise(lambda: cube.load(verbose=1), AssertionError)
2942
cube = CubeLogs(
30-
df,
43+
self.df1(),
3144
recent=True,
3245
formulas={"speedup": lambda df: df["time_baseline"] / df["time_baseline"]},
3346
)
@@ -36,7 +49,43 @@ def test_cube_logs(self):
3649
self.assertIsInstance(text, str)
3750
self.assertEqual((3, df.shape[1] + 1), cube.shape)
3851
self.assertEqual(set(cube.columns), {*df.columns, "speedup"})
39-
view = cube.view(["version.*", "model_name"], ["time_latency", "time_baseline"])
52+
53+
@hide_stdout()
54+
def test_cube_logs_load_list(self):
55+
cube = CubeLogs(
56+
[
57+
dict(
58+
date="1/1/2001",
59+
version_python="3.13",
60+
model_exporter="A",
61+
time_latency=5.6,
62+
),
63+
dict(
64+
date="1/1/2001",
65+
version_python="3.13",
66+
model_exporter="B",
67+
time_latency=5.7,
68+
),
69+
]
70+
)
71+
cube.load(verbose=1)
72+
self.assertEqual((2, 4), cube.shape)
73+
74+
def test_cube_logs_view_repr(self):
75+
v = CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
76+
r = repr(v)
77+
self.assertEqual(
78+
"CubeViewDef(key_index=['version.*', 'model_name'], "
79+
"values=['time_latency', 'time_baseline'])",
80+
r,
81+
)
82+
83+
@hide_stdout()
84+
def test_cube_logs_view(self):
85+
cube = self.cube1(verbose=1)
86+
view = cube.view(
87+
CubeViewDef(["version.*", "model_name"], ["time_latency", "time_baseline"])
88+
)
4089
self.assertEqual((3, 4), view.shape)
4190
self.assertEqual(
4291
[
@@ -52,7 +101,9 @@ def test_cube_logs(self):
52101
)
53102

54103
view = cube.view(
55-
["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
104+
CubeViewDef(
105+
["version.*"], ["time_latency", "time_baseline"], order=["model_exporter"]
106+
)
56107
)
57108
self.assertEqual((2, 6), view.shape)
58109
self.assertEqual(
@@ -68,6 +119,19 @@ def test_cube_logs(self):
68119
)
69120
self.assertEqual(["3.12.3", "3.13.3"], list(view.index))
70121

122+
def test_cube_logs_view_agg(self):
123+
cube = self.cube1(verbose=0)
124+
view = cube.view(
125+
CubeViewDef(
126+
["version.*", "model.*"],
127+
["time_latency", "time_baseline"],
128+
key_agg=["model_name"],
129+
)
130+
)
131+
self.assertEqual((2, 2), view.shape)
132+
self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
133+
self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
134+
71135

72136
if __name__ == "__main__":
73137
unittest.main(verbosity=2)

onnx_diagnostic/helpers/log_helper.py

Lines changed: 79 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,45 @@
11
import re
22
from typing import Any, Callable, Dict, Optional, Sequence, Tuple
3+
from .helper import string_sig
34
import pandas
45

56

67
class CubeViewDef:
78
"""
89
Defines how to compute a view.
10+
11+
:param key_index: keys to put in the row index
12+
:param values: values to show
13+
:param ignore_unique: ignore keys with a unique value
14+
:param order: to reorder key in columns index
15+
:param key_agg: aggregate according to these columns before
16+
creating the view
17+
:param agg_args: see :meth:`pandas.core.groupby.DataFrameGroupBy.agg`
18+
:param agg_kwargs: see :meth:`pandas.core.groupby.DataFrameGroupBy.agg`
919
"""
10-
def __init__(self, )
20+
21+
def __init__(
22+
self,
1123
key_index: Sequence[str],
1224
values: Sequence[str],
1325
ignore_unique: bool = True,
1426
order: Optional[Sequence[str]] = None,
27+
key_agg: Optional[Sequence[str]] = None,
28+
agg_args: Sequence[Any] = ("sum",),
29+
agg_kwargs: Optional[Dict[str, Any]] = None,
30+
):
31+
self.key_index = key_index
32+
self.values = values
33+
self.ignore_unique = ignore_unique
34+
self.order = order
35+
self.key_agg = key_agg
36+
self.agg_args = agg_args
37+
self.agg_kwargs = agg_kwargs
38+
39+
def __repr__(self) -> str:
40+
"usual"
41+
return string_sig(self)
42+
1543

1644
class CubeLogs:
1745
"""
@@ -37,7 +65,7 @@ def __init__(
3765
self._formulas = formulas
3866

3967
def load(self, verbose: int = 0):
40-
"""Loads and preprocesses the data."""
68+
"""Loads and preprocesses the data. Returns self."""
4169
if isinstance(self._data, pandas.DataFrame):
4270
if verbose:
4371
print(f"[CubeLogs.load] load from dataframe, shape={self._data.shape}")
@@ -103,10 +131,16 @@ def load(self, verbose: int = 0):
103131
print(f"[CubeLogs.load] apply formula {k!r}")
104132
self.data[k] = f(self.data)
105133
self.values_for_key = {k: set(self.data[k]) for k in self.keys}
106-
nans = [c for c in self.keys if self.data[c].isna().astype(int).sum() > 0]
134+
nans = [
135+
c for c in [self.time, *self.keys] if self.data[c].isna().astype(int).sum() > 0
136+
]
107137
assert not nans, f"The following keys {nans} have nan values. This is not allowed."
138+
if verbose:
139+
print(f"[CubeLogs.load] convert column {self.time!r} into date")
140+
self.data[self.time] = pandas.to_datetime(self.data[self.time])
108141
if verbose:
109142
print(f"[CubeLogs.load] done, shape={self.shape}")
143+
return self
110144

111145
@property
112146
def shape(self) -> Tuple[int, int]:
@@ -171,43 +205,63 @@ def __str__(self) -> str:
171205
"usual"
172206
return str(self.data) if hasattr(self, "data") else str(self._data)
173207

174-
def view(
175-
self,
176-
key_index: Sequence[str],
177-
values: Sequence[str],
178-
ignore_unique: bool = True,
179-
order: Optional[Sequence[str]] = None,
180-
) -> pandas.DataFrame:
208+
def view(self, view_def: CubeViewDef) -> pandas.DataFrame:
181209
"""
182210
Returns a dataframe, a pivot view.
183211
`key_index` determines the index, the other key columns determines
184212
the columns. If `ignore_unique` is True, every columns with a unique value
185213
is removed.
186214
187-
:param key_index: keys to put in the row index
188-
:param values: values to show
189-
:param ignore_unique: ignore keys with a unique value
190-
:param order: to reorder key in columns index
215+
:param view_def: view definition
191216
:return: dataframe
192217
"""
193-
key_index = self._filter_column(key_index, self.keys)
194-
values = self._filter_column(values, self.values)
195-
assert set(key_index) <= set(
218+
key_agg = self._filter_column(view_def.key_agg, self.keys) if view_def.key_agg else []
219+
set_key_agg = set(key_agg)
220+
assert set_key_agg <= set(
196221
self.keys
197-
), f"Non existing columns in key_index {set(key_index) - set(self.keys)}"
222+
), f"Non existing keys in key_agg {set_key_agg - set(self.keys)}"
223+
224+
values = self._filter_column(view_def.values, self.values)
198225
assert set(values) <= set(
199226
self.values
200227
), f"Non existing columns in values {set(values) - set(self.values)}"
201-
set_key_columns = {c for c in self.keys if c not in key_index}
202-
if ignore_unique:
228+
229+
if key_agg:
230+
key_index = [
231+
c
232+
for c in self._filter_column(view_def.key_index, self.keys)
233+
if c not in set_key_agg
234+
]
235+
keys_no_agg = [c for c in self.keys if c not in set_key_agg]
236+
data = (
237+
self.data[[*keys_no_agg, *values]]
238+
.groupby(key_index, as_index=False)
239+
.agg(*view_def.agg_args, **(view_def.agg_kwargs or {}))
240+
)
241+
else:
242+
key_index = self._filter_column(view_def.key_index, self.keys)
243+
data = self.data[[*self.keys, *values]]
244+
245+
assert set(key_index) <= set(
246+
self.keys
247+
), f"Non existing keys in key_index {set(key_index) - set(self.keys)}"
248+
249+
set_key_columns = {
250+
c for c in self.keys if c not in key_index and c not in set(key_agg)
251+
}
252+
if view_def.ignore_unique:
203253
key_index = [k for k in key_index if len(self.values_for_key[k]) > 1]
204254
key_columns = [k for k in set_key_columns if len(self.values_for_key[k]) > 1]
205255
else:
206256
key_columns = sorted(set_key_columns)
207-
if order:
208-
assert set(order) <= set_key_columns, (
257+
258+
if view_def.order:
259+
assert set(view_def.order) <= set_key_columns, (
209260
f"Non existing columns from order in key_columns "
210-
f"{set(order) - set_key_columns}"
261+
f"{set(view_def.order) - set_key_columns}"
211262
)
212-
key_columns = [*order, *[c for c in key_columns if c not in order]]
213-
return self.data.pivot(index=key_index[::-1], columns=key_columns, values=values)
263+
key_columns = [
264+
*view_def.order,
265+
*[c for c in key_columns if c not in view_def.order],
266+
]
267+
return data.pivot(index=key_index[::-1], columns=key_columns, values=values)

0 commit comments

Comments
 (0)