@@ -138,139 +138,23 @@ jobs:
138138 cd benchmarks
139139 python setup.py install
140140
141- - name : Run Triton Softmax kernel benchmark
142- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
143- run : |
144- cd benchmarks/triton_kernels_benchmark
145- python fused_softmax.py --reports $REPORTS
146- source ../../scripts/capture-hw-details.sh
147- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
148- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
149-
150- - name : Run Triton GEMM kernel benchmark
151- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
152- run : |
153- cd benchmarks/triton_kernels_benchmark
154- python gemm_benchmark.py --reports $REPORTS
155- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
156-
157- source ../../scripts/capture-hw-details.sh
158- python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
159- python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
160-
161- - name : Run Triton GEMM kernel benchmark - advanced path
162- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }}
141+ - name : Run Triton FA kernel benchmark - advanced path (w/ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT)
142+ if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
163143 run : |
164144 cd benchmarks/triton_kernels_benchmark
165- # Advanced path:
166145 TRITON_INTEL_ADVANCED_PATH=1 \
167- IGC_VISAOptions=" -enableBCR -nolocalra" \
168- IGC_DisableLoopUnroll=1 \
169- python gemm_benchmark.py --reports $REPORTS
170- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
171-
172- source ../../scripts/capture-hw-details.sh
173- TAG="${TAG}-adv"
174- python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
175-
176- - name : Run Triton GEMM (A@B^t) kernel benchmark
177- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
178- run : |
179- cd benchmarks/triton_kernels_benchmark
180- TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
181- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
182- source ../../scripts/capture-hw-details.sh
183-
184- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
185- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
186-
187- - name : Run Triton GEMM (A^t@B) kernel benchmark
188- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
189- run : |
190- cd benchmarks/triton_kernels_benchmark
191- TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
192- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
193- source ../../scripts/capture-hw-details.sh
194-
195- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
196- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
197-
198- - name : Run Triton GEMM (stream-k) kernel benchmark
199- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
200- run : |
201- cd benchmarks/triton_kernels_benchmark
202- python gemm_streamk_benchmark.py --reports $REPORTS
203- source ../../scripts/capture-hw-details.sh
204- python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
205-
206- - name : Run Triton GEMM (split-k) kernel benchmark
207- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
208- run : |
209- cd benchmarks/triton_kernels_benchmark
210- python gemm_splitk_benchmark.py --reports $REPORTS
211- source ../../scripts/capture-hw-details.sh
212- python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
213-
214- - name : Run Triton GEMM + PreOp (exp) kernel benchmark
215- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
216- run : |
217- cd benchmarks/triton_kernels_benchmark
218- python gemm_preop_exp_benchmark.py --reports $REPORTS
219- source ../../scripts/capture-hw-details.sh
220- python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
221-
222- - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
223- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
224- run : |
225- cd benchmarks/triton_kernels_benchmark
226- python gemm_postop_gelu_benchmark.py --reports $REPORTS
227- source ../../scripts/capture-hw-details.sh
228- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
229-
230- - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark
231- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }}
232- run : |
233- cd benchmarks/triton_kernels_benchmark
234- python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
235- source ../../scripts/capture-hw-details.sh
236- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
237-
238- - name : Run Triton FA kernel benchmark
239- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
240- run : |
241- cd benchmarks/triton_kernels_benchmark
146+ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
147+ IGC_VISAOptions=" -enableBCR" \
242148 python flash_attention_fwd_benchmark.py --reports $REPORTS
243149
244- source ../../scripts/capture-hw-details.sh
245- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
246- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
247-
248- - name : Run Triton FA kernel benchmark - advanced path
150+ - name : Run Triton FA kernel benchmark - advanced path (w/o TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT)
249151 if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
250152 run : |
251153 cd benchmarks/triton_kernels_benchmark
252154 TRITON_INTEL_ADVANCED_PATH=1 \
253155 IGC_VISAOptions=" -enableBCR" \
254156 python flash_attention_fwd_benchmark.py --reports $REPORTS
255157
256- TAG="${TAG}-adv"
257- source ../../scripts/capture-hw-details.sh
258- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
259-
260- - name : Run Prefix Sums kernel benchmark
261- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
262- run : |
263- cd benchmarks/triton_kernels_benchmark
264- python prefix_sums.py --reports $REPORTS
265- source ../../scripts/capture-hw-details.sh
266- python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
267-
268- - name : Run micro benchmark
269- if : ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
270- run : |
271- cd benchmarks/micro_benchmarks
272- python run_benchmarks.py --reports $REPORTS
273-
274158 - name : Save pip cache
275159 if : ${{ steps.pip-cache.outputs.status == 'miss' }}
276160 uses : ./.github/actions/save
0 commit comments