3434 PYTHON_VERSION : " 3.10"
3535 BENCHMARKING_METHOD : ${{ inputs.benchmarking_method || 'PYTORCH_LEGACY_PROFILER_USING_IPEX' }}
3636 USE_IPEX : ${{ github.event_name != 'workflow_dispatch' && '1' || inputs.benchmarking_method == 'PYTORCH_LEGACY_PROFILER_USING_IPEX' && '1' || '0' }}
37+ TAG : ${{ inputs.tag || (github.event_name == 'pull_request' && 'pr') || (github.event_name == 'schedule' && 'ci') || 'test' }}
3738
3839jobs :
3940 build :
@@ -111,7 +112,6 @@ jobs:
111112 cd benchmarks/triton_kernels_benchmark
112113 python fused_softmax.py --reports $REPORTS
113114 source ../../scripts/capture-hw-details.sh
114- TAG=${{ inputs.tag || 'ci' }}
115115 python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
116116 python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
117117
@@ -123,7 +123,6 @@ jobs:
123123 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
124124
125125 source ../../scripts/capture-hw-details.sh
126- TAG=${{ inputs.tag || 'ci' }}
127126 python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
128127 python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
129128
@@ -140,7 +139,7 @@ jobs:
140139 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
141140
142141 source ../../scripts/capture-hw-details.sh
143- TAG=${{ inputs.tag || 'ci' }} -dflt
142+ TAG="${TAG} -dflt"
144143 python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
145144
146145 - name : Run Triton GEMM kernel benchmark - advanced path
@@ -156,7 +155,7 @@ jobs:
156155 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
157156
158157 source ../../scripts/capture-hw-details.sh
159- TAG=${{ inputs.tag || 'ci' }} -adv
158+ TAG="${TAG} -adv"
160159 python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
161160
162161 - name : Run Triton GEMM (A@B^t) kernel benchmark
@@ -167,7 +166,6 @@ jobs:
167166 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
168167 source ../../scripts/capture-hw-details.sh
169168
170- TAG=${{ inputs.tag || 'ci' }}
171169 python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
172170
173171 - name : Run Triton GEMM (A^t@B) kernel benchmark
@@ -178,7 +176,6 @@ jobs:
178176 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
179177 source ../../scripts/capture-hw-details.sh
180178
181- TAG=${{ inputs.tag || 'ci' }}
182179 python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
183180
184181 - name : Run Triton GEMM (stream-k) kernel benchmark
@@ -187,7 +184,6 @@ jobs:
187184 cd benchmarks/triton_kernels_benchmark
188185 python gemm_streamk_benchmark.py --reports $REPORTS
189186 source ../../scripts/capture-hw-details.sh
190- TAG=${{ inputs.tag || 'ci' }}
191187 python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
192188
193189 - name : Run Triton GEMM (split-k) kernel benchmark
@@ -196,7 +192,6 @@ jobs:
196192 cd benchmarks/triton_kernels_benchmark
197193 python gemm_splitk_benchmark.py --reports $REPORTS
198194 source ../../scripts/capture-hw-details.sh
199- TAG=${{ inputs.tag || 'ci' }}
200195 python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
201196
202197 - name : Run Triton GEMM + PreOp (exp) kernel benchmark
@@ -205,7 +200,6 @@ jobs:
205200 cd benchmarks/triton_kernels_benchmark
206201 python gemm_preop_exp_benchmark.py --reports $REPORTS
207202 source ../../scripts/capture-hw-details.sh
208- TAG=${{ inputs.tag || 'ci' }}
209203 python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
210204
211205 - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
@@ -214,7 +208,6 @@ jobs:
214208 cd benchmarks/triton_kernels_benchmark
215209 python gemm_postop_gelu_benchmark.py --reports $REPORTS
216210 source ../../scripts/capture-hw-details.sh
217- TAG=${{ inputs.tag || 'ci' }}
218211 python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
219212
220213 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark
@@ -223,7 +216,6 @@ jobs:
223216 cd benchmarks/triton_kernels_benchmark
224217 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
225218 source ../../scripts/capture-hw-details.sh
226- TAG=${{ inputs.tag || 'ci' }}
227219 python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
228220
229221 - name : Run Triton FA kernel benchmark
@@ -232,7 +224,6 @@ jobs:
232224 cd benchmarks/triton_kernels_benchmark
233225 python flash_attention_fwd_benchmark.py --reports $REPORTS
234226
235- TAG=${{ inputs.tag || 'ci' }}
236227 source ../../scripts/capture-hw-details.sh
237228 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
238229 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
@@ -246,7 +237,7 @@ jobs:
246237 IGC_VISAOptions=" -enableBCR" \
247238 python flash_attention_fwd_benchmark.py --reports $REPORTS
248239
249- TAG=${{ inputs.tag || 'ci' }} -dflt
240+ TAG="${TAG} -dflt"
250241 source ../../scripts/capture-hw-details.sh
251242 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
252243
@@ -260,7 +251,7 @@ jobs:
260251 IGC_VISAOptions=" -enableBCR" \
261252 python flash_attention_fwd_benchmark.py --reports $REPORTS
262253
263- TAG=${{ inputs.tag || 'ci' }} -adv
254+ TAG="${TAG} -adv"
264255 source ../../scripts/capture-hw-details.sh
265256 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
266257
@@ -270,7 +261,6 @@ jobs:
270261 cd benchmarks/triton_kernels_benchmark
271262 python prefix_sums.py --reports $REPORTS
272263 source ../../scripts/capture-hw-details.sh
273- TAG=${{ inputs.tag || 'ci' }}
274264 python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
275265
276266 - name : Run micro benchmark
0 commit comments