From 41436fbe7efa3c37d805aef34b833e12493a09b9 Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 25 Sep 2025 12:12:28 +0200 Subject: [PATCH 1/5] Adds more precise statistics about fusion --- onnx_diagnostic/_command_lines_parser.py | 2 + onnx_diagnostic/helpers/log_helper.py | 56 ++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py index 1c98a90c..2eefda5d 100644 --- a/onnx_diagnostic/_command_lines_parser.py +++ b/onnx_diagnostic/_command_lines_parser.py @@ -827,6 +827,8 @@ def get_parser_agg() -> ArgumentParser: "n_model_running,n_model_acc01,n_model_acc001,n_model_dynamic," "n_model_pass,n_model_faster," "n_model_faster2x,n_model_faster3x,n_model_faster4x,n_node_attention," + "n_node_attention23,n_node_rotary_embedding,n_node_rotary_embedding23," + "n_node_layer_normalization,n_node_layer_normalization23," "peak_gpu_torch,peak_gpu_nvidia,n_node_control_flow," "n_node_constant,n_node_shape,n_node_expand," "n_node_function,n_node_initializer,n_node_scatter," diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py index 0453580e..8d74a2fc 100644 --- a/onnx_diagnostic/helpers/log_helper.py +++ b/onnx_diagnostic/helpers/log_helper.py @@ -877,7 +877,11 @@ def view( print(f"[CubeLogs.view] key_columns={key_columns}") g = data[[*key_index, *key_columns]].copy() g["count"] = 1 - r = g.groupby([*key_index, *key_columns], dropna=False).sum() + r = ( + g.copy() + if not key_index and not key_columns + else g.groupby([*key_index, *key_columns], dropna=False).sum() + ) not_unique = r[r["count"] > 1] assert not_unique.shape[0] == 0, ( f"view_def.name={view_def.name!r}, " @@ -1505,6 +1509,11 @@ def __init__( "n_model_faster3x", "n_model_faster4x", "n_node_attention", + "n_node_attention23", + "n_node_rotary_embedding", + "n_node_rotary_embedding23", + "n_node_layer_normalization", + "n_node_layer_normalization23", "n_node_control_flow", "n_node_scatter", "n_node_function", @@ -1676,11 +1685,50 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: "time_latency", gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98, ), + n_node_attention23=lambda df: gpreserve( + df, "op_onnx__Attention", gdf(df, "op_onnx__Attention") + ), + n_node_rotary_embedding23=lambda df: gpreserve( + df, "op_onnx__RotaryEmbedding", gdf(df, "op_onnx__RotaryEmbedding") + ), + n_node_layer_normalization23=lambda df: gpreserve( + df, + "time_latency", + gdf(df, "op_onnx__LayerNormalization", 0) + + gdf(df, "op_onnx__RMSNormalization", 0) + + gdf(df, "op_onnx__BatchNormlization", 0) + + gdf(df, "op_onnx__InstanceNormlization", 0) + + gdf(df, "op_onnx__GroupNormalization", 0), + ), n_node_attention=lambda df: gpreserve( df, - "op_onnx_com.microsoft_Attention", - gdf(df, "op_onnx_com.microsoft_Attention") - + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention"), + "time_latency", + gdf(df, "op_onnx_com.microsoft_Attention", 0) + + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention", 0) + + gdf(df, "op_onnx_com.microsoft_PackedAttention", 0) + + gdf(df, "op_onnx_com.microsoft_PackedMultiHeadAttention", 0) + + gdf(df, "op_onnx_com.microsoft_GroupQueryAttention", 0) + + gdf(df, "op_onnx_com.microsoft_PagedAttention", 0) + + gdf(df, "op_onnx_com.microsoft_DecoderAttention", 0) + + gdf(df, "op_onnx_com.microsoft_LongformerAttention", 0) + + gdf(df, "op_onnx_com.microsoft_DecoderMaskedSelfAttention", 0) + + gdf(df, "op_onnx_com.microsoft_DecoderMaskedMultiHeadAttention", 0) + + gdf(df, "op_onnx_com.microsoft_SparseAttention", 0), + ), + n_node_layer_normalization=lambda df: gpreserve( + df, + "time_latency", + gdf(df, "op_onnx_com.microsoft_EmbedLayerNormalization", 0) + + gdf(df, "op_onnx_com.microsoft_SkipLayerNormalization", 0) + + gdf(df, "op_onnx_com.microsoft_LayerNormalization", 0) + + gdf(df, "op_onnx_com.microsoft_SkipSimplifiedLayerNormalization", 0) + + gdf(df, "op_onnx_com.microsoft_SimplifiedLayerNormalization", 0), + ), + n_node_rotary_embedding=lambda df: gpreserve( + df, + "time_latency", + gdf(df, "op_onnx_com.microsoft_GemmaRotaryEmbedding", 0) + + gdf(df, "op_onnx_com.microsoft_RotaryEmbedding", 0), ), n_node_control_flow=lambda df: gpreserve( df, From 3df7d9e8a1d883d10f9e5becd3a30d732a437072 Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 25 Sep 2025 13:01:18 +0200 Subject: [PATCH 2/5] ut stat --- _unittests/ut_helpers/test_log_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_helpers/test_log_helper.py b/_unittests/ut_helpers/test_log_helper.py index 63ee6392..afa16498 100644 --- a/_unittests/ut_helpers/test_log_helper.py +++ b/_unittests/ut_helpers/test_log_helper.py @@ -268,7 +268,7 @@ def test_cube_logs_performance_cube_time(self): cube = CubeLogsPerformance(dfs, keep_last_date=True) cube.load() ct = cube.clone() - self.assertEqual((52, 106), ct.shape) + self.assertEqual((52, 111), ct.shape) def test_duplicate(self): df = pandas.DataFrame( From 1be0d261de18af7f5231173126662c5bee93f27d Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 25 Sep 2025 14:01:03 +0200 Subject: [PATCH 3/5] fix aggregation --- onnx_diagnostic/helpers/log_helper.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py index 8d74a2fc..99c7631e 100644 --- a/onnx_diagnostic/helpers/log_helper.py +++ b/onnx_diagnostic/helpers/log_helper.py @@ -1577,7 +1577,9 @@ def _process_formula( def gdf(df, cname, default_value=np.nan): if cname in df.columns: - return df[cname] + if np.isnan(default_value): + return df[cname] + return df[cname].fillna(default_value) return pandas.Series(default_value, index=df.index) def ghas_value(df, cname): @@ -1686,14 +1688,14 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: gdf(df, "time_latency_eager") > gdf(df, "time_latency", np.inf) * 3.98, ), n_node_attention23=lambda df: gpreserve( - df, "op_onnx__Attention", gdf(df, "op_onnx__Attention") + df, "time_latency_eager", gdf(df, "op_onnx__Attention") ), n_node_rotary_embedding23=lambda df: gpreserve( - df, "op_onnx__RotaryEmbedding", gdf(df, "op_onnx__RotaryEmbedding") + df, "time_latency_eager", gdf(df, "op_onnx__RotaryEmbedding") ), n_node_layer_normalization23=lambda df: gpreserve( df, - "time_latency", + "time_latency_eager", gdf(df, "op_onnx__LayerNormalization", 0) + gdf(df, "op_onnx__RMSNormalization", 0) + gdf(df, "op_onnx__BatchNormlization", 0) @@ -1702,7 +1704,7 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: ), n_node_attention=lambda df: gpreserve( df, - "time_latency", + "time_latency_eager", gdf(df, "op_onnx_com.microsoft_Attention", 0) + gdf(df, "op_onnx_com.microsoft_MultiHeadAttention", 0) + gdf(df, "op_onnx_com.microsoft_PackedAttention", 0) @@ -1717,7 +1719,7 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: ), n_node_layer_normalization=lambda df: gpreserve( df, - "time_latency", + "time_latency_eager", gdf(df, "op_onnx_com.microsoft_EmbedLayerNormalization", 0) + gdf(df, "op_onnx_com.microsoft_SkipLayerNormalization", 0) + gdf(df, "op_onnx_com.microsoft_LayerNormalization", 0) @@ -1726,13 +1728,13 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: ), n_node_rotary_embedding=lambda df: gpreserve( df, - "time_latency", + "time_latency_eager", gdf(df, "op_onnx_com.microsoft_GemmaRotaryEmbedding", 0) + gdf(df, "op_onnx_com.microsoft_RotaryEmbedding", 0), ), n_node_control_flow=lambda df: gpreserve( df, - "op_onnx__If", + "time_latency_eager", ( gdf(df, "op_onnx__If", 0) + gdf(df, "op_onnx__Scan", 0) @@ -1741,7 +1743,7 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: ), n_node_scatter=lambda df: gpreserve( df, - "op_onnx__ScatterND", + "time_latency_eager", gdf(df, "op_onnx__ScatterND", 0) + gdf(df, "op_onnx__ScatterElements", 0), ), n_node_function=lambda df: gpreserve( @@ -1754,13 +1756,13 @@ def first_err(df: pandas.DataFrame) -> pandas.Series: df, "onnx_n_initializer", gdf(df, "onnx_n_initializer") ), n_node_constant=lambda df: gpreserve( - df, "op_onnx__Constant", gdf(df, "op_onnx__Constant") + df, "time_latency_eager", gdf(df, "op_onnx__Constant") ), n_node_shape=lambda df: gpreserve( - df, "op_onnx__Shape", gdf(df, "op_onnx__Shape") + df, "time_latency_eager", gdf(df, "op_onnx__Shape") ), n_node_expand=lambda df: gpreserve( - df, "op_onnx__Expand", gdf(df, "op_onnx__Expand") + df, "time_latency_eager", gdf(df, "op_onnx__Expand") ), ) assert ( From 857ef10ea0fee1dc1003eb48d9f2bea26a2cc6dd Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 25 Sep 2025 14:52:35 +0200 Subject: [PATCH 4/5] fix api for huggingface hub --- _unittests/ut_torch_models/test_hghub_api.py | 1 - onnx_diagnostic/helpers/log_helper.py | 7 +++++-- onnx_diagnostic/torch_models/hghub/hub_api.py | 14 ++++---------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/_unittests/ut_torch_models/test_hghub_api.py b/_unittests/ut_torch_models/test_hghub_api.py index 10a9689e..627e5e40 100644 --- a/_unittests/ut_torch_models/test_hghub_api.py +++ b/_unittests/ut_torch_models/test_hghub_api.py @@ -39,7 +39,6 @@ def test_enumerate_model_list(self): verbose=1, dump="test_enumerate_model_list.csv", filter="image-classification", - library="transformers", ) ) self.assertEqual(len(models), 2) diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py index 99c7631e..5993af0b 100644 --- a/onnx_diagnostic/helpers/log_helper.py +++ b/onnx_diagnostic/helpers/log_helper.py @@ -285,7 +285,8 @@ def _to_images_bar( nn = df.shape[1] // n_cols nn += int(df.shape[1] % n_cols != 0) ratio = float(os.environ.get("FIGSIZEH", "1")) - fig, axs = plt.subplots(nn, n_cols, figsize=(6 * n_cols, nn * df.shape[0] / 3 * ratio)) + figsize = (6 * n_cols, nn * (2 + df.shape[0] / 15) * ratio) + fig, axs = plt.subplots(nn, n_cols, figsize=figsize) pos = 0 imgs = [] for c in self._make_loop(df.columns, verbose): @@ -332,10 +333,12 @@ def rotate_align(ax, angle=15, align="right"): n_cols = len(groups) title_suffix = f"\n{title_suffix}" if title_suffix else "" + ratio = float(os.environ.get("FIGSIZEH", "1")) + figsize = (5 * n_cols, max(len(g) for g in groups) * (2 + df.shape[1] / 2) * ratio) fig, axs = plt.subplots( df.shape[1], n_cols, - figsize=(5 * n_cols, max(len(g) for g in groups) * df.shape[1] / 2), + figsize=figsize, sharex=True, sharey="row" if n_cols > 1 else False, ) diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py index 94f0aa7e..445f6c02 100644 --- a/onnx_diagnostic/torch_models/hghub/hub_api.py +++ b/onnx_diagnostic/torch_models/hghub/hub_api.py @@ -289,21 +289,17 @@ def task_from_tags(tags: Union[str, List[str]]) -> str: def enumerate_model_list( n: int = 50, - task: Optional[str] = None, - library: Optional[str] = None, - tags: Optional[Union[str, List[str]]] = None, + pipeline_tag: Optional[str] = None, search: Optional[str] = None, dump: Optional[str] = None, - filter: Optional[str] = None, + filter: Optional[Union[str, List[str]]] = None, verbose: int = 0, ): """ Enumerates models coming from :epkg:`huggingface_hub`. :param n: number of models to retrieve (-1 for all) - :param task: see :meth:`huggingface_hub.HfApi.list_models` - :param tags: see :meth:`huggingface_hub.HfApi.list_models` - :param library: see :meth:`huggingface_hub.HfApi.list_models` + :param pipeline_tag: see :meth:`huggingface_hub.HfApi.list_models` :param search: see :meth:`huggingface_hub.HfApi.list_models` :param filter: see :meth:`huggingface_hub.HfApi.list_models` :param dump: dumps the result in this csv file @@ -311,9 +307,7 @@ def enumerate_model_list( """ api = HfApi() models = api.list_models( - task=task, - library=library, - tags=tags, + pipeline_tag=pipeline_tag, search=search, full=True, filter=filter, From fd5c1f764912e6d089814e3744d91e678cf40440 Mon Sep 17 00:00:00 2001 From: xadupre Date: Thu, 25 Sep 2025 15:10:03 +0200 Subject: [PATCH 5/5] increase figisze --- onnx_diagnostic/helpers/log_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py index 5993af0b..48014deb 100644 --- a/onnx_diagnostic/helpers/log_helper.py +++ b/onnx_diagnostic/helpers/log_helper.py @@ -285,7 +285,7 @@ def _to_images_bar( nn = df.shape[1] // n_cols nn += int(df.shape[1] % n_cols != 0) ratio = float(os.environ.get("FIGSIZEH", "1")) - figsize = (6 * n_cols, nn * (2 + df.shape[0] / 15) * ratio) + figsize = (6 * n_cols, nn * (2.5 + df.shape[0] / 15) * ratio) fig, axs = plt.subplots(nn, n_cols, figsize=figsize) pos = 0 imgs = []