@@ -106,25 +106,19 @@ jobs:
106106 cd benchmarks
107107 python setup.py install
108108
109- - name : Run Triton Softmax kernel benchmark
110- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
111- run : |
112- cd benchmarks/triton_kernels_benchmark
113- python fused_softmax.py --reports $REPORTS
114- source ../../scripts/capture-hw-details.sh
115- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
116- python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
117-
118- - name : Run Triton GEMM kernel benchmark
109+ - name : Run Triton GEMM kernel benchmark - default path (before)
119110 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
120111 run : |
121112 cd benchmarks/triton_kernels_benchmark
113+ # Default path:
114+ TRITON_INTEL_ADVANCED_PATH=0 \
115+ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
122116 python gemm_benchmark.py --reports $REPORTS
123- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base .csv
117+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path .csv
124118
125119 source ../../scripts/capture-hw-details.sh
126- python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $ TAG
127- python ../../scripts/build_report.py $REPORTS/matmul-performance-base .csv $REPORTS/gemm-xetla- report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA -TFlops --hbm_col "XeTLA -GB/s" --tag $TAG
120+ TAG="${ TAG}-dflt"
121+ python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path .csv $REPORTS/gemm-triton-default- report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton -TFlops --hbm_col "Triton -GB/s" --tag $TAG
128122
129123 - name : Run Triton GEMM kernel benchmark - default path
130124 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
@@ -142,7 +136,7 @@ jobs:
142136 TAG="${TAG}-dflt"
143137 python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
144138
145- - name : Run Triton GEMM kernel benchmark - advanced path
139+ - name : Run Triton GEMM kernel benchmark - advanced path (before)
146140 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
147141 run : |
148142 cd benchmarks/triton_kernels_benchmark
@@ -158,92 +152,46 @@ jobs:
158152 TAG="${TAG}-adv"
159153 python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
160154
161- - name : Run Triton GEMM (A@B^t) kernel benchmark
162- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
163- run : |
164- cd benchmarks/triton_kernels_benchmark
165- TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
166- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
167- source ../../scripts/capture-hw-details.sh
168-
169- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
170- python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
171-
172- - name : Run Triton GEMM (A^t@B) kernel benchmark
173- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
174- run : |
175- cd benchmarks/triton_kernels_benchmark
176- TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
177- mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
178- source ../../scripts/capture-hw-details.sh
179-
180- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
181- python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
182-
183- - name : Run Triton GEMM (stream-k) kernel benchmark
184- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
185- run : |
186- cd benchmarks/triton_kernels_benchmark
187- python gemm_streamk_benchmark.py --reports $REPORTS
188- source ../../scripts/capture-hw-details.sh
189- python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
190-
191- - name : Run Triton GEMM (split-k) kernel benchmark
192- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
193- run : |
194- cd benchmarks/triton_kernels_benchmark
195- python gemm_splitk_benchmark.py --reports $REPORTS
196- source ../../scripts/capture-hw-details.sh
197- python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
198-
199- - name : Run Triton GEMM + PreOp (exp) kernel benchmark
200- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
201- run : |
202- cd benchmarks/triton_kernels_benchmark
203- python gemm_preop_exp_benchmark.py --reports $REPORTS
204- source ../../scripts/capture-hw-details.sh
205- python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
206-
207- - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
155+ - name : Run Triton GEMM kernel benchmark - advanced path
208156 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
209157 run : |
210158 cd benchmarks/triton_kernels_benchmark
211- python gemm_postop_gelu_benchmark.py --reports $REPORTS
212- source ../../scripts/capture-hw-details.sh
213- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
159+ # Advanced path:
160+ TRITON_INTEL_ADVANCED_PATH=1 \
161+ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
162+ python gemm_benchmark.py --reports $REPORTS
163+ mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
214164
215- - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark
216- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
217- run : |
218- cd benchmarks/triton_kernels_benchmark
219- python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
220165 source ../../scripts/capture-hw-details.sh
221- python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
166+ TAG="${TAG}-adv"
167+ python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
222168
223- - name : Run Triton FA kernel benchmark
169+ - name : Run Triton FA kernel benchmark - default path (before)
224170 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
225171 run : |
226172 cd benchmarks/triton_kernels_benchmark
173+ TRITON_INTEL_ADVANCED_PATH=0 \
174+ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
175+ IGC_VISAOptions=" -enableBCR" \
227176 python flash_attention_fwd_benchmark.py --reports $REPORTS
228177
178+ TAG="${TAG}-dflt"
229179 source ../../scripts/capture-hw-details.sh
230- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
231- python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
180+ python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
232181
233182 - name : Run Triton FA kernel benchmark - default path
234183 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
235184 run : |
236185 cd benchmarks/triton_kernels_benchmark
237186 TRITON_INTEL_ADVANCED_PATH=0 \
238187 TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
239- IGC_VISAOptions=" -enableBCR" \
240188 python flash_attention_fwd_benchmark.py --reports $REPORTS
241189
242190 TAG="${TAG}-dflt"
243191 source ../../scripts/capture-hw-details.sh
244192 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
245193
246- - name : Run Triton FA kernel benchmark - advanced path
194+ - name : Run Triton FA kernel benchmark - advanced path (before)
247195 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
248196 run : |
249197 cd benchmarks/triton_kernels_benchmark
@@ -257,19 +205,18 @@ jobs:
257205 source ../../scripts/capture-hw-details.sh
258206 python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
259207
260- - name : Run Prefix Sums kernel benchmark
208+ - name : Run Triton FA kernel benchmark - advanced path
261209 if : ${{ steps.install.outcome == 'success' && !cancelled() }}
262210 run : |
263211 cd benchmarks/triton_kernels_benchmark
264- python prefix_sums.py --reports $REPORTS
265- source ../../scripts/capture-hw-details.sh
266- python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
212+ TRITON_INTEL_ADVANCED_PATH=1 \
213+ TRITON_INTEL_ENABLE_INSTR_SCHED=1 \
214+ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
215+ python flash_attention_fwd_benchmark.py --reports $REPORTS
267216
268- - name : Run micro benchmark
269- if : ${{ steps.install.outcome == 'success' && !cancelled() }}
270- run : |
271- cd benchmarks/micro_benchmarks
272- python run_benchmarks.py --reports $REPORTS
217+ TAG="${TAG}-adv"
218+ source ../../scripts/capture-hw-details.sh
219+ python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
273220
274221 - name : Save pip cache
275222 if : ${{ steps.pip-cache.outputs.status == 'miss' }}
0 commit comments