From 16479fc0970a0d45f5ebd4b623d03ab1cf11ca45 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Sun, 3 Nov 2024 22:57:38 +0100 Subject: [PATCH 01/14] Benchmarks core subset --- .github/workflows/triton-benchmarks.yml | 32 ++++++++++++++----------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 94e419646b..e9cfb7c2b8 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -24,6 +24,10 @@ on: description: Run name type: string default: "Triton benchmarks" + only_subset: + description: Run only core subset of benchmarks + type: boolean + default: false schedule: - cron: "5 23 * * *" pull_request: @@ -132,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark # Default path: @@ -148,7 +152,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark # Advanced path: @@ -164,7 +168,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (A@B^t) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS @@ -175,7 +179,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (A^t@B) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS @@ -186,7 +190,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (stream-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python gemm_streamk_benchmark.py --reports $REPORTS @@ -194,7 +198,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (split-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python gemm_splitk_benchmark.py --reports $REPORTS @@ -202,7 +206,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PreOp (exp) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python gemm_preop_exp_benchmark.py --reports $REPORTS @@ -210,7 +214,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_gelu_benchmark.py --reports $REPORTS @@ -218,7 +222,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_addmatrix_benchmark.py --reports $REPORTS @@ -226,7 +230,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python flash_attention_fwd_benchmark.py --reports $REPORTS @@ -236,7 +240,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=0 \ @@ -249,7 +253,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=1 \ @@ -262,7 +266,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Prefix Sums kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/triton_kernels_benchmark python prefix_sums.py --reports $REPORTS @@ -270,7 +274,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run micro benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} run: | cd benchmarks/micro_benchmarks python run_benchmarks.py --reports $REPORTS From 488f57c1d4eab65c55a5d79fa7a53b6529b21eb4 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:27:03 +0100 Subject: [PATCH 02/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index e9cfb7c2b8..5bc4c1dea1 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -24,10 +24,10 @@ on: description: Run name type: string default: "Triton benchmarks" - only_subset: - description: Run only core subset of benchmarks - type: boolean - default: false + skip_benchmarks: + description: List of benchmarks to skip + type: string + default: "" schedule: - cron: "5 23 * * *" pull_request: @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && search(inputs.skip_benchmarks, "fused_softmax.py") }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS From 284d898656449b1176600c7604bc358584d494b4 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:28:49 +0100 Subject: [PATCH 03/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 5bc4c1dea1..c9f2622bd2 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && search(inputs.skip_benchmarks, "fused_softmax.py") }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, "fused_softmax.py") }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS From 6ccc24392a5008fdb2f2944470a0e1d8bc5d3dd1 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:30:40 +0100 Subject: [PATCH 04/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index c9f2622bd2..3bc56f68e1 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, "fused_softmax.py") }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'fused_softmax.py') }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS From c67ca36330761ad6eef2bdfbd1f76cf0cbf58169 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:35:14 +0100 Subject: [PATCH 05/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 3bc56f68e1..f3fd2ddc72 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -125,7 +125,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_benchmark.py --reports $REPORTS @@ -136,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark # Default path: @@ -152,7 +152,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark # Advanced path: From 0fbe25b554ebe53e02b8ad59beb97a0b6007222c Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:36:49 +0100 Subject: [PATCH 06/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index f3fd2ddc72..211acba872 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -168,7 +168,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (A@B^t) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS @@ -179,7 +179,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (A^t@B) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS From 12aa9ef314f9cfad0b13a29f7cf4e69d36c7c5d3 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:38:08 +0100 Subject: [PATCH 07/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 211acba872..e3588ee453 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'fused_softmax.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'fused_softmax.py') }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS @@ -125,7 +125,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_benchmark.py --reports $REPORTS @@ -136,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark # Default path: @@ -152,7 +152,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark # Advanced path: @@ -168,7 +168,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (A@B^t) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS @@ -179,7 +179,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (A^t@B) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS From d10efe003b8c31f6132c7a1282db858c801fb281 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 01:49:25 +0100 Subject: [PATCH 08/14] Test skiplist approach --- .github/workflows/triton-benchmarks.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index e3588ee453..b69a66c832 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -190,7 +190,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (stream-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_streamk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_streamk_benchmark.py --reports $REPORTS @@ -198,7 +198,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (split-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_splitk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_splitk_benchmark.py --reports $REPORTS @@ -206,7 +206,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PreOp (exp) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_preop_exp_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_preop_exp_benchmark.py --reports $REPORTS @@ -214,7 +214,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_gelu_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_gelu_benchmark.py --reports $REPORTS @@ -222,7 +222,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_addmatrix_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_addmatrix_benchmark.py --reports $REPORTS @@ -230,7 +230,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python flash_attention_fwd_benchmark.py --reports $REPORTS @@ -240,7 +240,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=0 \ @@ -253,7 +253,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=1 \ @@ -266,7 +266,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Prefix Sums kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'prefix_sums.py') }} run: | cd benchmarks/triton_kernels_benchmark python prefix_sums.py --reports $REPORTS @@ -274,7 +274,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run micro benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !(inputs.only_subset || false) }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'micro_benchmarks') }} run: | cd benchmarks/micro_benchmarks python run_benchmarks.py --reports $REPORTS From df852e292235e04e9fc8a345382d5d94063c21b3 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:24:03 +0100 Subject: [PATCH 09/14] Parse input as JSON --- .github/workflows/triton-benchmarks.yml | 32 ++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index b69a66c832..183ea3decc 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'fused_softmax.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'fused_softmax.py') }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS @@ -125,7 +125,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_benchmark.py --reports $REPORTS @@ -136,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark # Default path: @@ -152,7 +152,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_advanced') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark # Advanced path: @@ -168,7 +168,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (A@B^t) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_abt') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_abt') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS @@ -179,7 +179,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (A^t@B) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_benchmark.py_atb') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_atb') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS @@ -190,7 +190,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (stream-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_streamk_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_streamk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_streamk_benchmark.py --reports $REPORTS @@ -198,7 +198,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (split-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_splitk_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_splitk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_splitk_benchmark.py --reports $REPORTS @@ -206,7 +206,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PreOp (exp) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_preop_exp_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_preop_exp_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_preop_exp_benchmark.py --reports $REPORTS @@ -214,7 +214,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_gelu_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_gelu_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_gelu_benchmark.py --reports $REPORTS @@ -222,7 +222,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'gemm_postop_addmatrix_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_addmatrix_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_addmatrix_benchmark.py --reports $REPORTS @@ -230,7 +230,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python flash_attention_fwd_benchmark.py --reports $REPORTS @@ -240,7 +240,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=0 \ @@ -253,7 +253,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'flash_attention_fwd_benchmark.py_advanced') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=1 \ @@ -266,7 +266,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Prefix Sums kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'prefix_sums.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'prefix_sums.py') }} run: | cd benchmarks/triton_kernels_benchmark python prefix_sums.py --reports $REPORTS @@ -274,7 +274,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run micro benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(inputs.skip_benchmarks, 'micro_benchmarks') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'micro_benchmarks') }} run: | cd benchmarks/micro_benchmarks python run_benchmarks.py --reports $REPORTS From 8bc9c48cd9c78bf35fdc28c96c1cee8d3337095a Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:52:42 +0100 Subject: [PATCH 10/14] Update .github/workflows/triton-benchmarks.yml Co-authored-by: Pavel Chekin --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 183ea3decc..a5cf3ab845 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -25,7 +25,7 @@ on: type: string default: "Triton benchmarks" skip_benchmarks: - description: List of benchmarks to skip + description: JSON list of benchmarks to skip type: string default: "" schedule: From d895bc479ee144391a343e471d16d15998c4124b Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:52:51 +0100 Subject: [PATCH 11/14] Update .github/workflows/triton-benchmarks.yml Co-authored-by: Pavel Chekin --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index a5cf3ab845..4323c49a3a 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -27,7 +27,7 @@ on: skip_benchmarks: description: JSON list of benchmarks to skip type: string - default: "" + default: "[]" schedule: - cron: "5 23 * * *" pull_request: From fcdfae4b4e30c96cf4f7e4cc254f4f729a43e67c Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:53:09 +0100 Subject: [PATCH 12/14] Update .github/workflows/triton-benchmarks.yml Co-authored-by: Pavel Chekin --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 4323c49a3a..5a9d5e83b2 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -116,7 +116,7 @@ jobs: python setup.py install - name: Run Triton Softmax kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'fused_softmax.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }} run: | cd benchmarks/triton_kernels_benchmark python fused_softmax.py --reports $REPORTS From a2854ff2448c738593d1e112100de1533b7d822e Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:55:25 +0100 Subject: [PATCH 13/14] Parse input as JSON --- .github/workflows/triton-benchmarks.yml | 30 ++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 5a9d5e83b2..24b45c5fd8 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -125,7 +125,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_benchmark.py --reports $REPORTS @@ -136,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'f), 'gemm_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark # Default path: @@ -152,7 +152,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_advanced') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark # Advanced path: @@ -168,7 +168,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (A@B^t) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_abt') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS @@ -179,7 +179,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (A^t@B) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_benchmark.py_atb') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }} run: | cd benchmarks/triton_kernels_benchmark TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS @@ -190,7 +190,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG - name: Run Triton GEMM (stream-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_streamk_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_streamk_benchmark.py --reports $REPORTS @@ -198,7 +198,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM (split-k) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_splitk_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_splitk_benchmark.py --reports $REPORTS @@ -206,7 +206,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PreOp (exp) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_preop_exp_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_preop_exp_benchmark.py --reports $REPORTS @@ -214,7 +214,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (Gelu) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_gelu_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_gelu_benchmark.py --reports $REPORTS @@ -222,7 +222,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton GEMM + PostOp (add matrix) kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'gemm_postop_addmatrix_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python gemm_postop_addmatrix_benchmark.py --reports $REPORTS @@ -230,7 +230,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }} run: | cd benchmarks/triton_kernels_benchmark python flash_attention_fwd_benchmark.py --reports $REPORTS @@ -240,7 +240,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmark || '[]'), 'flash_attention_fwd_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=0 \ @@ -253,7 +253,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Triton FA kernel benchmark - advanced path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'flash_attention_fwd_benchmark.py_advanced') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }} run: | cd benchmarks/triton_kernels_benchmark TRITON_INTEL_ADVANCED_PATH=1 \ @@ -266,7 +266,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run Prefix Sums kernel benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'prefix_sums.py') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }} run: | cd benchmarks/triton_kernels_benchmark python prefix_sums.py --reports $REPORTS @@ -274,7 +274,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG - name: Run micro benchmark - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks), 'micro_benchmarks') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }} run: | cd benchmarks/micro_benchmarks python run_benchmarks.py --reports $REPORTS From e6d02f1bb101a6354d19a5cf7241abf1ef129658 Mon Sep 17 00:00:00 2001 From: Vadim Musin Date: Tue, 12 Nov 2024 19:57:41 +0100 Subject: [PATCH 14/14] fix typo --- .github/workflows/triton-benchmarks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/triton-benchmarks.yml b/.github/workflows/triton-benchmarks.yml index 24b45c5fd8..2a5f9937f6 100644 --- a/.github/workflows/triton-benchmarks.yml +++ b/.github/workflows/triton-benchmarks.yml @@ -136,7 +136,7 @@ jobs: python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG - name: Run Triton GEMM kernel benchmark - default path - if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'f), 'gemm_benchmark.py_default') }} + if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }} run: | cd benchmarks/triton_kernels_benchmark # Default path: