@@ -132,26 +132,6 @@ jobs:
132132 cd benchmarks
133133 python setup.py install
134134
135- - name : Run Triton Softmax kernel benchmark
136- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
137- run : |
138- cd benchmarks/triton_kernels_benchmark
139- python fused_softmax.py --reports $REPORTS
140- source ../../scripts/capture-hw-details.sh
141- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
142- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
143-
144- - name : Run Triton GEMM kernel benchmark
145- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
146- run : |
147- cd benchmarks/triton_kernels_benchmark
148- python gemm_benchmark.py --reports $REPORTS
149- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
150-
151- source ../../scripts/capture-hw-details.sh
152- python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
153- python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
154-
155135 - name : Run Triton GEMM kernel benchmark - default path
156136 if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
157137 run : |
@@ -182,117 +162,61 @@ jobs:
182162 TAG="${TAG}-adv"
183163 python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
184164
185- - name : Run Triton GEMM (A@B^t) kernel benchmark
186- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
187- run : |
188- cd benchmarks/triton_kernels_benchmark
189- TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
190- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
191- source ../../scripts/capture-hw-details.sh
192-
193- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
194- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
195-
196- - name : Run Triton GEMM (A^t@B) kernel benchmark
197- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
198- run : |
199- cd benchmarks/triton_kernels_benchmark
200- TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
201- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
202- source ../../scripts/capture-hw-details.sh
203-
204- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
205- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
206-
207- - name : Run Triton GEMM (stream-k) kernel benchmark
208- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
209- run : |
210- cd benchmarks/triton_kernels_benchmark
211- python gemm_streamk_benchmark.py --reports $REPORTS
212- source ../../scripts/capture-hw-details.sh
213- python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
214-
215- - name : Run Triton GEMM (split-k) kernel benchmark
216- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
217- run : |
218- cd benchmarks/triton_kernels_benchmark
219- python gemm_splitk_benchmark.py --reports $REPORTS
220- source ../../scripts/capture-hw-details.sh
221- python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
222-
223- - name : Run Triton GEMM + PreOp (exp) kernel benchmark
224- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
225- run : |
226- cd benchmarks/triton_kernels_benchmark
227- python gemm_preop_exp_benchmark.py --reports $REPORTS
228- source ../../scripts/capture-hw-details.sh
229- python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
230-
231- - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
232- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
165+ - name : Run Triton GEMM kernel benchmark - default path (remove DisableLoopUnroll)
166+ if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
233167 run : |
234168 cd benchmarks/triton_kernels_benchmark
235- python gemm_postop_gelu_benchmark.py --reports $REPORTS
236- source ../../scripts/capture-hw-details.sh
237- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
169+ # Default path:
170+ TRITON_INTEL_ADVANCED_PATH=0 \
171+ IGC_VISAOptions=" -enableBCR -nolocalra" \
172+ python gemm_benchmark.py --reports $REPORTS
173+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
238174
239- - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark
240- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }}
241- run : |
242- cd benchmarks/triton_kernels_benchmark
243- python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
244175 source ../../scripts/capture-hw-details.sh
245- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
176+ TAG="${TAG}-dflt"
177+ python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
246178
247- - name : Run Triton FA kernel benchmark
248- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py ') }}
179+ - name : Run Triton GEMM kernel benchmark - advanced path (remove DisableLoopUnroll)
180+ if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced ') }}
249181 run : |
250182 cd benchmarks/triton_kernels_benchmark
251- python flash_attention_fwd_benchmark.py --reports $REPORTS
183+ # Advanced path:
184+ TRITON_INTEL_ADVANCED_PATH=1 \
185+ IGC_VISAOptions=" -enableBCR -nolocalra" \
186+ python gemm_benchmark.py --reports $REPORTS
187+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
252188
253189 source ../../scripts/capture-hw-details.sh
254- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $ TAG
255- python ../../scripts/build_report.py $REPORTS/attn -performance.csv $REPORTS/attn-xetla- report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL " --tflops_col XeTLA -TFlops --hbm_col "XeTLA -GB/s" --tag $TAG
190+ TAG="${ TAG}-adv"
191+ python ../../scripts/build_report.py $REPORTS/matmul -performance-adv-path .csv $REPORTS/gemm-triton-advanced- report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N " --tflops_col Triton -TFlops --hbm_col "Triton -GB/s" --tag $TAG
256192
257- - name : Run Triton FA kernel benchmark - default path
258- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmark || '[]'), 'flash_attention_fwd_benchmark .py_default') }}
193+ - name : Run Triton GEMM kernel benchmark - default path (remove VISAOptions)
194+ if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark .py_default') }}
259195 run : |
260196 cd benchmarks/triton_kernels_benchmark
197+ # Default path:
261198 TRITON_INTEL_ADVANCED_PATH=0 \
262- TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT =1 \
263- IGC_VISAOptions=" -enableBCR" \
264- python flash_attention_fwd_benchmark.py --reports $REPORTS
199+ IGC_DisableLoopUnroll =1 \
200+ python gemm_benchmark.py --reports $REPORTS
201+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
265202
266- TAG="${TAG}-dflt"
267203 source ../../scripts/capture-hw-details.sh
268- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
204+ TAG="${TAG}-dflt"
205+ python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
269206
270- - name : Run Triton FA kernel benchmark - advanced path
271- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark .py_advanced') }}
207+ - name : Run Triton GEMM kernel benchmark - advanced path (remove VISAOptions)
208+ if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark .py_advanced') }}
272209 run : |
273210 cd benchmarks/triton_kernels_benchmark
211+ # Advanced path:
274212 TRITON_INTEL_ADVANCED_PATH=1 \
275- TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
276- IGC_VISAOptions=" -enableBCR" \
277- python flash_attention_fwd_benchmark.py --reports $REPORTS
278-
279- TAG="${TAG}-adv"
280- source ../../scripts/capture-hw-details.sh
281- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
213+ IGC_DisableLoopUnroll=1 \
214+ python gemm_benchmark.py --reports $REPORTS
215+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
282216
283- - name : Run Prefix Sums kernel benchmark
284- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
285- run : |
286- cd benchmarks/triton_kernels_benchmark
287- python prefix_sums.py --reports $REPORTS
288217 source ../../scripts/capture-hw-details.sh
289- python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
290-
291- - name : Run micro benchmark
292- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
293- run : |
294- cd benchmarks/micro_benchmarks
295- python run_benchmarks.py --reports $REPORTS
218+ TAG="${TAG}-adv"
219+ python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
296220
297221 - name : Save pip cache
298222 if : ${{ steps.pip-cache.outputs.status == 'miss' }}
0 commit comments