From 16479fc0970a0d45f5ebd4b623d03ab1cf11ca45 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Sun, 3 Nov 2024 22:57:38 +0100
Subject: [PATCH 01/14] Benchmarks core subset

---
 .github/workflows/triton-benchmarks.yml | 32 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 94e419646b..e9cfb7c2b8 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -24,6 +24,10 @@ on:
         description: Run name
         type: string
         default: "Triton benchmarks"
+      only_subset:
+        description: Run only core subset of benchmarks
+        type: boolean
+        default: false
   schedule:
     - cron: "5 23 * * *"
   pull_request:
@@ -132,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path:
@@ -148,7 +152,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Advanced path:
@@ -164,7 +168,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A@B^t) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
@@ -175,7 +179,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A^t@B) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
@@ -186,7 +190,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (stream-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_streamk_benchmark.py --reports $REPORTS
@@ -194,7 +198,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (split-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_splitk_benchmark.py --reports $REPORTS
@@ -202,7 +206,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PreOp (exp) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_preop_exp_benchmark.py --reports $REPORTS
@@ -210,7 +214,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_gelu_benchmark.py --reports $REPORTS
@@ -218,7 +222,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
@@ -226,7 +230,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python flash_attention_fwd_benchmark.py --reports $REPORTS
@@ -236,7 +240,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=0 \
@@ -249,7 +253,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=1 \
@@ -262,7 +266,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Prefix Sums kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python prefix_sums.py --reports $REPORTS
@@ -270,7 +274,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run micro benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
         run: |
           cd benchmarks/micro_benchmarks
           python run_benchmarks.py --reports $REPORTS

From 488f57c1d4eab65c55a5d79fa7a53b6529b21eb4 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:27:03 +0100
Subject: [PATCH 02/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index e9cfb7c2b8..5bc4c1dea1 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -24,10 +24,10 @@ on:
         description: Run name
         type: string
         default: "Triton benchmarks"
-      only_subset:
-        description: Run only core subset of benchmarks
-        type: boolean
-        default: false
+      skip_benchmarks:
+        description: List of benchmarks to skip
+        type: string
+        default: ""
   schedule:
     - cron: "5 23 * * *"
   pull_request:
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && search(inputs.skip_benchmarks, "fused_softmax.py") }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS

From 284d898656449b1176600c7604bc358584d494b4 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:28:49 +0100
Subject: [PATCH 03/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 5bc4c1dea1..c9f2622bd2 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && search(inputs.skip_benchmarks, "fused_softmax.py") }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, "fused_softmax.py") }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS

From 6ccc24392a5008fdb2f2944470a0e1d8bc5d3dd1 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:30:40 +0100
Subject: [PATCH 04/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index c9f2622bd2..3bc56f68e1 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, "fused_softmax.py") }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'fused_softmax.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS

From c67ca36330761ad6eef2bdfbd1f76cf0cbf58169 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:35:14 +0100
Subject: [PATCH 05/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 3bc56f68e1..f3fd2ddc72 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -125,7 +125,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_benchmark.py --reports $REPORTS
@@ -136,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path:
@@ -152,7 +152,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Advanced path:

From 0fbe25b554ebe53e02b8ad59beb97a0b6007222c Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:36:49 +0100
Subject: [PATCH 06/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index f3fd2ddc72..211acba872 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -168,7 +168,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A@B^t) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
@@ -179,7 +179,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A^t@B) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS

From 12aa9ef314f9cfad0b13a29f7cf4e69d36c7c5d3 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:38:08 +0100
Subject: [PATCH 07/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 211acba872..e3588ee453 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'fused_softmax.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'fused_softmax.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS
@@ -125,7 +125,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_benchmark.py --reports $REPORTS
@@ -136,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path:
@@ -152,7 +152,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Advanced path:
@@ -168,7 +168,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A@B^t) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
@@ -179,7 +179,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A^t@B) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS

From d10efe003b8c31f6132c7a1282db858c801fb281 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 01:49:25 +0100
Subject: [PATCH 08/14] Test skiplist approach

---
 .github/workflows/triton-benchmarks.yml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index e3588ee453..b69a66c832 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -190,7 +190,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (stream-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_streamk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_streamk_benchmark.py --reports $REPORTS
@@ -198,7 +198,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (split-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_splitk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_splitk_benchmark.py --reports $REPORTS
@@ -206,7 +206,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PreOp (exp) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_preop_exp_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_preop_exp_benchmark.py --reports $REPORTS
@@ -214,7 +214,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_gelu_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_gelu_benchmark.py --reports $REPORTS
@@ -222,7 +222,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_addmatrix_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
@@ -230,7 +230,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python flash_attention_fwd_benchmark.py --reports $REPORTS
@@ -240,7 +240,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=0 \
@@ -253,7 +253,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=1 \
@@ -266,7 +266,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Prefix Sums kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'prefix_sums.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python prefix_sums.py --reports $REPORTS
@@ -274,7 +274,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run micro benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'micro_benchmarks') }}
         run: |
           cd benchmarks/micro_benchmarks
           python run_benchmarks.py --reports $REPORTS

From df852e292235e04e9fc8a345382d5d94063c21b3 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:24:03 +0100
Subject: [PATCH 09/14] Parse input as JSON

---
 .github/workflows/triton-benchmarks.yml | 32 ++++++++++++-------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index b69a66c832..183ea3decc 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'fused_softmax.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'fused_softmax.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS
@@ -125,7 +125,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_benchmark.py --reports $REPORTS
@@ -136,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path:
@@ -152,7 +152,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Advanced path:
@@ -168,7 +168,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A@B^t) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_abt') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
@@ -179,7 +179,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A^t@B) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_atb') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
@@ -190,7 +190,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (stream-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_streamk_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_streamk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_streamk_benchmark.py --reports $REPORTS
@@ -198,7 +198,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (split-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_splitk_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_splitk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_splitk_benchmark.py --reports $REPORTS
@@ -206,7 +206,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PreOp (exp) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_preop_exp_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_preop_exp_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_preop_exp_benchmark.py --reports $REPORTS
@@ -214,7 +214,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_gelu_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_gelu_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_gelu_benchmark.py --reports $REPORTS
@@ -222,7 +222,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_addmatrix_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_addmatrix_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
@@ -230,7 +230,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python flash_attention_fwd_benchmark.py --reports $REPORTS
@@ -240,7 +240,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=0 \
@@ -253,7 +253,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_advanced') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=1 \
@@ -266,7 +266,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Prefix Sums kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'prefix_sums.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'prefix_sums.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python prefix_sums.py --reports $REPORTS
@@ -274,7 +274,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run micro benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'micro_benchmarks') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'micro_benchmarks') }}
         run: |
           cd benchmarks/micro_benchmarks
           python run_benchmarks.py --reports $REPORTS

From 8bc9c48cd9c78bf35fdc28c96c1cee8d3337095a Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:52:42 +0100
Subject: [PATCH 10/14] Update .github/workflows/triton-benchmarks.yml

Co-authored-by: Pavel Chekin <pavel.chekin@intel.com>
---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 183ea3decc..a5cf3ab845 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -25,7 +25,7 @@ on:
         type: string
         default: "Triton benchmarks"
       skip_benchmarks:
-        description: List of benchmarks to skip
+        description: JSON list of benchmarks to skip
         type: string
         default: ""
   schedule:

From d895bc479ee144391a343e471d16d15998c4124b Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:52:51 +0100
Subject: [PATCH 11/14] Update .github/workflows/triton-benchmarks.yml

Co-authored-by: Pavel Chekin <pavel.chekin@intel.com>
---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index a5cf3ab845..4323c49a3a 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -27,7 +27,7 @@ on:
       skip_benchmarks:
         description: JSON list of benchmarks to skip
         type: string
-        default: ""
+        default: "[]"
   schedule:
     - cron: "5 23 * * *"
   pull_request:

From fcdfae4b4e30c96cf4f7e4cc254f4f729a43e67c Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:53:09 +0100
Subject: [PATCH 12/14] Update .github/workflows/triton-benchmarks.yml

Co-authored-by: Pavel Chekin <pavel.chekin@intel.com>
---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 4323c49a3a..5a9d5e83b2 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -116,7 +116,7 @@ jobs:
           python setup.py install
 
       - name: Run Triton Softmax kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'fused_softmax.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python fused_softmax.py --reports $REPORTS

From a2854ff2448c738593d1e112100de1533b7d822e Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:55:25 +0100
Subject: [PATCH 13/14] Parse input as JSON

---
 .github/workflows/triton-benchmarks.yml | 30 ++++++++++++-------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 5a9d5e83b2..24b45c5fd8 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -125,7 +125,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_benchmark.py --reports $REPORTS
@@ -136,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'f), 'gemm_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path:
@@ -152,7 +152,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_advanced') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Advanced path:
@@ -168,7 +168,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A@B^t) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_abt') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
@@ -179,7 +179,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (A^t@B) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_atb') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
@@ -190,7 +190,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (stream-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_streamk_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_streamk_benchmark.py --reports $REPORTS
@@ -198,7 +198,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM (split-k) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_splitk_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_splitk_benchmark.py --reports $REPORTS
@@ -206,7 +206,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PreOp (exp) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_preop_exp_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_preop_exp_benchmark.py --reports $REPORTS
@@ -214,7 +214,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_gelu_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_gelu_benchmark.py --reports $REPORTS
@@ -222,7 +222,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_addmatrix_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
@@ -230,7 +230,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python flash_attention_fwd_benchmark.py --reports $REPORTS
@@ -240,7 +240,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmark || '[]'), 'flash_attention_fwd_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=0 \
@@ -253,7 +253,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Triton FA kernel benchmark - advanced path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_advanced') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           TRITON_INTEL_ADVANCED_PATH=1 \
@@ -266,7 +266,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run Prefix Sums kernel benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'prefix_sums.py') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           python prefix_sums.py --reports $REPORTS
@@ -274,7 +274,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
 
       - name: Run micro benchmark
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'micro_benchmarks') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
         run: |
           cd benchmarks/micro_benchmarks
           python run_benchmarks.py --reports $REPORTS

From e6d02f1bb101a6354d19a5cf7241abf1ef129658 Mon Sep 17 00:00:00 2001
From: Vadim Musin <vadim.musin@intel.com>
Date: Tue, 12 Nov 2024 19:57:41 +0100
Subject: [PATCH 14/14] fix typo

---
 .github/workflows/triton-benchmarks.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml
index 24b45c5fd8..2a5f9937f6 100644
--- a/.github/workflows/triton-benchmarks.yml
+++ b/.github/workflows/triton-benchmarks.yml
@@ -136,7 +136,7 @@ jobs:
           python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
 
       - name: Run Triton GEMM kernel benchmark - default path
-        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'f), 'gemm_benchmark.py_default') }}
+        if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
         run: |
           cd benchmarks/triton_kernels_benchmark
           # Default path: