Skip to content

Commit ba96266

Browse files
committed
better dco
1 parent 375a622 commit ba96266

File tree

2 files changed

+94
-1
lines changed

2 files changed

+94
-1
lines changed

onnx_diagnostic/_command_lines_parser.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -657,9 +657,16 @@ def get_parser_agg() -> ArgumentParser:
657657
),
658658
epilog=textwrap.dedent(
659659
"""
660-
examples:\n
660+
examples:
661661
662662
python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1
663+
python -m onnx_diagnostic agg agg.xlsx raw/*.zip raw/*.csv -v 1 \\
664+
--no-raw --keep-last-date --filter-out "exporter:test-exporter"
665+
666+
Another to create timeseries:
667+
668+
python -m onnx_diagnostic agg history.xlsx raw/*.csv -v 1 --no-raw \\
669+
--no-recent
663670
"""
664671
),
665672
formatter_class=RawTextHelpFormatter,

onnx_diagnostic/helpers/log_helper.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,46 @@ class CubeViewDef:
226226
:param name: name of the view, used mostly to debug
227227
:param plots: adds plot to the Excel sheet
228228
:param no_index: remove the index (but keeps the columns)
229+
230+
Some examples of views. First example is an aggregated view
231+
for many metrics.
232+
233+
.. code-block:: python
234+
235+
cube = CubeLogs(...)
236+
237+
CubeViewDef(
238+
key_index=cube._filter_column(fs, cube.keys_time),
239+
values=cube._filter_column(
240+
["TIME_ITER", "speedup", "time_latency.*", "onnx_n_nodes"],
241+
cube.values,
242+
),
243+
ignore_unique=True,
244+
key_agg=["model_name", "task", "model_task", "suite"],
245+
agg_args=lambda column_name: "sum" if column_name.startswith("n_") else "mean",
246+
agg_multi={"speedup_weighted": mean_weight, "speedup_geo": mean_geo},
247+
name="agg-all",
248+
plots=True,
249+
)
250+
251+
Next one focuses on a couple of metrics.
252+
253+
.. code-block:: python
254+
255+
cube = CubeLogs(...)
256+
257+
CubeViewDef(
258+
key_index=cube._filter_column(fs, cube.keys_time),
259+
values=cube._filter_column(["speedup"], cube.values),
260+
ignore_unique=True,
261+
keep_columns_in_index=["suite"],
262+
name="speedup",
263+
)
229264
"""
230265

231266
class HighLightKind(enum.IntEnum):
267+
"Codes to highlight values."
268+
232269
NONE = 0
233270
RED = 1
234271
GREEN = 2
@@ -397,6 +434,26 @@ class CubePlot:
397434
:param split: draw a graph per line in the dataframe
398435
:param timeseries: this assumes the time is one level of the columns,
399436
this argument indices the level name
437+
438+
It defines a graph. Usually *bar* or *barh* is used to
439+
compare experiments for every metric, a subplot by metric.
440+
441+
.. code-block:: python
442+
443+
CubePlot(df, kind="barh", orientation="row", split=True)
444+
445+
*line* is usually used to plot timeseries showing the
446+
evolution of metrics over time.
447+
448+
.. code-block:: python
449+
450+
CubePlot(
451+
df,
452+
kind="line",
453+
orientation="row",
454+
split=True,
455+
timeseries="time",
456+
)
400457
"""
401458

402459
KINDS = {"bar", "barh", "line"}
@@ -607,6 +664,35 @@ def rotate_align(ax, angle=15, align="right"):
607664
class CubeLogs:
608665
"""
609666
Processes logs coming from experiments.
667+
A cube is basically a database with certain columns
668+
playing specific roles.
669+
670+
* time: only one column, it is not mandatory but it is recommended
671+
to have one
672+
* keys: they are somehow coordinates, they cannot be aggregated,
673+
they are not numbers, more like categories, `(time, *keys)`
674+
identifies an element of the database in an unique way,
675+
there cannot be more than one row sharing the same key and time
676+
values
677+
* values: they are not necessary numerical, but if they are,
678+
they can be aggregated
679+
680+
Every other columns is ignored. More columns can be added
681+
by using formulas.
682+
683+
:param data: the raw data
684+
:param time: the time column
685+
:param keys: the keys, can include regular expressions
686+
:param values: the values, can include regular expressions
687+
:param ignored: ignores some column, acts as negative regular
688+
expressions for the other two
689+
:param recent: if more than one rows share the same keys,
690+
the cube only keeps the most recent one
691+
:param formulas: columns to add, defined with formulas
692+
:param fill_missing: a dictionary, defines values replacing missing one
693+
for some columns
694+
:param keep_last_date: overwrites all the times with the most recent
695+
one, it makes things easier for timeseries
610696
"""
611697

612698
def __init__(

0 commit comments

Comments
 (0)