@@ -226,9 +226,46 @@ class CubeViewDef:
226226 :param name: name of the view, used mostly to debug
227227 :param plots: adds plot to the Excel sheet
228228 :param no_index: remove the index (but keeps the columns)
229+
230+ Some examples of views. First example is an aggregated view
231+ for many metrics.
232+
233+ .. code-block:: python
234+
235+ cube = CubeLogs(...)
236+
237+ CubeViewDef(
238+ key_index=cube._filter_column(fs, cube.keys_time),
239+ values=cube._filter_column(
240+ ["TIME_ITER", "speedup", "time_latency.*", "onnx_n_nodes"],
241+ cube.values,
242+ ),
243+ ignore_unique=True,
244+ key_agg=["model_name", "task", "model_task", "suite"],
245+ agg_args=lambda column_name: "sum" if column_name.startswith("n_") else "mean",
246+ agg_multi={"speedup_weighted": mean_weight, "speedup_geo": mean_geo},
247+ name="agg-all",
248+ plots=True,
249+ )
250+
251+ Next one focuses on a couple of metrics.
252+
253+ .. code-block:: python
254+
255+ cube = CubeLogs(...)
256+
257+ CubeViewDef(
258+ key_index=cube._filter_column(fs, cube.keys_time),
259+ values=cube._filter_column(["speedup"], cube.values),
260+ ignore_unique=True,
261+ keep_columns_in_index=["suite"],
262+ name="speedup",
263+ )
229264 """
230265
231266 class HighLightKind (enum .IntEnum ):
267+ "Codes to highlight values."
268+
232269 NONE = 0
233270 RED = 1
234271 GREEN = 2
@@ -397,6 +434,26 @@ class CubePlot:
397434 :param split: draw a graph per line in the dataframe
398435 :param timeseries: this assumes the time is one level of the columns,
399436 this argument indices the level name
437+
438+ It defines a graph. Usually *bar* or *barh* is used to
439+ compare experiments for every metric, a subplot by metric.
440+
441+ .. code-block:: python
442+
443+ CubePlot(df, kind="barh", orientation="row", split=True)
444+
445+ *line* is usually used to plot timeseries showing the
446+ evolution of metrics over time.
447+
448+ .. code-block:: python
449+
450+ CubePlot(
451+ df,
452+ kind="line",
453+ orientation="row",
454+ split=True,
455+ timeseries="time",
456+ )
400457 """
401458
402459 KINDS = {"bar" , "barh" , "line" }
@@ -607,6 +664,35 @@ def rotate_align(ax, angle=15, align="right"):
607664class CubeLogs :
608665 """
609666 Processes logs coming from experiments.
667+ A cube is basically a database with certain columns
668+ playing specific roles.
669+
670+ * time: only one column, it is not mandatory but it is recommended
671+ to have one
672+ * keys: they are somehow coordinates, they cannot be aggregated,
673+ they are not numbers, more like categories, `(time, *keys)`
674+ identifies an element of the database in an unique way,
675+ there cannot be more than one row sharing the same key and time
676+ values
677+ * values: they are not necessary numerical, but if they are,
678+ they can be aggregated
679+
680+ Every other columns is ignored. More columns can be added
681+ by using formulas.
682+
683+ :param data: the raw data
684+ :param time: the time column
685+ :param keys: the keys, can include regular expressions
686+ :param values: the values, can include regular expressions
687+ :param ignored: ignores some column, acts as negative regular
688+ expressions for the other two
689+ :param recent: if more than one rows share the same keys,
690+ the cube only keeps the most recent one
691+ :param formulas: columns to add, defined with formulas
692+ :param fill_missing: a dictionary, defines values replacing missing one
693+ for some columns
694+ :param keep_last_date: overwrites all the times with the most recent
695+ one, it makes things easier for timeseries
610696 """
611697
612698 def __init__ (
0 commit comments