Skip to content

Commit f842ac5

Browse files
committed
status
1 parent 13fd59d commit f842ac5

File tree

2 files changed

+183
-0
lines changed

2 files changed

+183
-0
lines changed

_unittests/ut_helpers/test_log_helper.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,26 @@ def test_cube_logs_view_agg(self):
132132
self.assertEqual(["time_baseline", "time_latency"], list(view.columns))
133133
self.assertEqual([("3.13.3", "export"), ("3.12.3", "onnx-dynamo")], list(view.index))
134134

135+
@hide_stdout()
136+
def test_cube_logs_excel(self):
137+
output = self.get_dump_file("test_cube_logs_excel.xlsx")
138+
cube = self.cube1(verbose=0)
139+
cube.to_excel(
140+
output,
141+
{
142+
"example": CubeViewDef(
143+
["version.*", "model_name"], ["time_latency", "time_baseline"]
144+
),
145+
"agg": CubeViewDef(
146+
["version.*", "model.*"],
147+
["time_latency", "time_baseline"],
148+
key_agg=["model_name"],
149+
),
150+
},
151+
verbose=1,
152+
)
153+
self.assertExists(output)
154+
135155

136156
if __name__ == "__main__":
137157
unittest.main(verbosity=2)

onnx_diagnostic/helpers/log_helper.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Callable, Dict, Optional, Sequence, Tuple
33
from .helper import string_sig
44
import pandas
5+
from pandas.api.types import is_numeric_dtype
56

67

78
class CubeViewDef:
@@ -265,3 +266,165 @@ def view(self, view_def: CubeViewDef) -> pandas.DataFrame:
265266
*[c for c in key_columns if c not in view_def.order],
266267
]
267268
return data.pivot(index=key_index[::-1], columns=key_columns, values=values)
269+
270+
def describe(self) -> pandas.DataFrame:
271+
"""Basic description of all variables."""
272+
rows = []
273+
for name in self.data.columns:
274+
values = self.data[name]
275+
dtype = values.dtype
276+
nonan = values.dropna()
277+
obs = dict(
278+
name=name,
279+
dtype=str(dtype),
280+
missing=len(values) - len(nonan),
281+
)
282+
if len(nonan) > 0:
283+
obs.update(
284+
dict(
285+
min=nonan.min(),
286+
max=nonan.max(),
287+
count=len(nonan),
288+
)
289+
)
290+
if is_numeric_dtype(nonan):
291+
obs.update(
292+
dict(
293+
mean=nonan.mean(),
294+
sum=nonan.sum(),
295+
)
296+
)
297+
else:
298+
unique = set(nonan)
299+
obs["n_values"] = len(unique)
300+
if len(unique) < 20:
301+
obs["values"] = ",".join(map(str, sorted(unique)))
302+
rows.append(obs)
303+
return pandas.DataFrame(rows).set_index("name")
304+
305+
def to_excel(
306+
self,
307+
output: str,
308+
views: Dict[str, CubeViewDef],
309+
main: Optional[str] = "main",
310+
raw: Optional[str] = "raw",
311+
verbose: int = 0,
312+
):
313+
"""
314+
Creates an excel file with a list of view.
315+
316+
:param output: output file to create
317+
:param views: list of views to append
318+
:param main: add a page with statitcs on all variables
319+
:param raw: add a page with the raw data
320+
:param verbose: verbosity
321+
"""
322+
323+
with pandas.ExcelWriter(output, engine="openpyxl") as writer:
324+
if main:
325+
assert main not in views, f"{main!r} is duplicated in views {sorted(views)}"
326+
df = self.describe()
327+
if verbose:
328+
print(f"[CubeLogs.to_helper] add sheet {main!r} with shape {df.shape}")
329+
df.to_excel(writer, sheet_name=main, freeze_panes=(1, 1))
330+
self._apply_excel_style(main, writer, df)
331+
if raw:
332+
assert main not in views, f"{main!r} is duplicated in views {sorted(views)}"
333+
if verbose:
334+
print(f"[CubeLogs.to_helper] add sheet {raw!r} with shape {self.shape}")
335+
self.data.to_excel(writer, sheet_name=raw, freeze_panes=(1, 1), index=True)
336+
self._apply_excel_style(raw, writer, self.data)
337+
338+
for name, view in views.items():
339+
df = self.view(view)
340+
if verbose:
341+
print(
342+
f"[CubeLogs.to_helper] add sheet {name!r} with shape "
343+
f"{df.shape}, index={df.index.names}, columns={df.columns.names}"
344+
)
345+
df.to_excel(
346+
writer,
347+
sheet_name=name,
348+
freeze_panes=(df.index.nlevels, df.columns.nlevels),
349+
)
350+
self._apply_excel_style(name, writer, df)
351+
if verbose:
352+
print(f"[CubeLogs.to_helper] done with {len(views)} views")
353+
354+
def _apply_excel_style(self, name: str, writer: pandas.ExcelWriter, df: pandas.DataFrame):
355+
from openpyxl.styles import Alignment
356+
from openpyxl.utils import get_column_letter
357+
358+
# from openpyxl.styles import Font, PatternFill, numbers
359+
360+
left = Alignment(horizontal="left")
361+
right = Alignment(horizontal="right")
362+
# center = Alignment(horizontal="center")
363+
# bold_font = Font(bold=True)
364+
# red = Font(color="FF0000")
365+
# yellow = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")
366+
# redf = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
367+
368+
sheet = writer.sheets[name]
369+
n_rows = df.shape[0] + df.columns.nlevels + df.index.nlevels
370+
n_cols = df.shape[1] + df.index.nlevels
371+
co = {}
372+
sizes = {}
373+
cols = set()
374+
for i in range(1, n_rows):
375+
for j, cell in enumerate(sheet[i]):
376+
if j > n_cols:
377+
break
378+
cols.add(cell.column)
379+
if isinstance(cell.value, float):
380+
co[j] = co.get(j, 0) + 1
381+
elif isinstance(cell.value, str):
382+
sizes[cell.column] = max(sizes.get(cell.column, 0), len(cell.value))
383+
384+
for k, v in sizes.items():
385+
c = get_column_letter(k)
386+
sheet.column_dimensions[c].width = max(15, v)
387+
for k in cols:
388+
if k not in sizes:
389+
c = get_column_letter(k)
390+
sheet.column_dimensions[c].width = 15
391+
392+
for i in range(1, n_rows):
393+
for j, cell in enumerate(sheet[i]):
394+
if j > n_cols:
395+
break
396+
if isinstance(cell.value, pandas.Timestamp):
397+
cell.alignment = right
398+
dt = cell.value.to_pydatetime()
399+
cell.value = dt
400+
cell.number_format = (
401+
"YYYY-MM-DD"
402+
if (
403+
dt.hour == 0
404+
and dt.minute == 0
405+
and dt.second == 0
406+
and dt.microsecond == 0
407+
)
408+
else "YYYY-MM-DD 00:00:00"
409+
)
410+
elif isinstance(cell.value, (float, int)):
411+
cell.alignment = right
412+
x = abs(cell.value)
413+
if int(x) == x:
414+
cell.number_format = "0"
415+
elif x > 5000:
416+
cell.number_format = "# ##0"
417+
elif x >= 500:
418+
cell.number_format = "0.0"
419+
elif x >= 50:
420+
cell.number_format = "0.00"
421+
elif x >= 5:
422+
cell.number_format = "0.000"
423+
elif x > 0.5:
424+
cell.number_format = "0.0000"
425+
elif x > 0.005:
426+
cell.number_format = "0.00000"
427+
else:
428+
cell.number_format = "0.000E+00"
429+
else:
430+
cell.alignment = left

0 commit comments

Comments
 (0)