@@ -117,17 +117,9 @@ jobs:
117117 cd benchmarks
118118 pip install .
119119
120- - name : Build PTI
121- run : |
122- ./scripts/install-pti.sh --build-level-zero
123- PTI_LIBS_DIR=$(python ./scripts/pti_lib.py)
124- ls $PTI_LIBS_DIR
125- echo "PTI_LIBS_DIR=$PTI_LIBS_DIR" >> $GITHUB_ENV
126-
127120 - name : Run Triton Softmax kernel benchmark
128121 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
129122 run : |
130- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
131123 cd benchmarks/triton_kernels_benchmark
132124 python fused_softmax.py --reports $REPORTS --n_runs $N_RUNS
133125 source ../../scripts/capture-hw-details.sh
@@ -138,15 +130,13 @@ jobs:
138130 - name : Run Triton Softmax kernel benchmark with Proton
139131 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
140132 run : |
141- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
142133 cd benchmarks/triton_kernels_benchmark
143134 BENCHMARKING_METHOD=PROTON_PROFILER python fused_softmax.py
144135 source ../../scripts/capture-hw-details.sh
145136
146137 - name : Run Triton GEMM kernel benchmark
147138 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
148139 run : |
149- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
150140 cd benchmarks/triton_kernels_benchmark
151141 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
152142 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
@@ -160,7 +150,6 @@ jobs:
160150 - name : Run Triton GEMM kernel benchmark - with tensor of pointer
161151 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py') }}
162152 run : |
163- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
164153 cd benchmarks/triton_kernels_benchmark
165154 python gemm_tensor_of_ptr_benchmark.py --reports $REPORTS --n_runs $N_RUNS
166155 source ../../scripts/capture-hw-details.sh
@@ -173,7 +162,6 @@ jobs:
173162 - name : Run Triton GEMM kernel benchmark - with tensor descriptor
174163 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py') }}
175164 run : |
176- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
177165 cd benchmarks/triton_kernels_benchmark
178166 python gemm_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
179167 source ../../scripts/capture-hw-details.sh
@@ -186,7 +174,6 @@ jobs:
186174 - name : Run Triton GEMM (A@B^t) kernel benchmark
187175 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_abt')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
188176 run : |
189- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
190177 cd benchmarks/triton_kernels_benchmark
191178 TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
192179 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
@@ -198,7 +185,6 @@ jobs:
198185 - name : Run Triton GEMM (A^t@B) kernel benchmark
199186 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_atb')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
200187 run : |
201- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
202188 cd benchmarks/triton_kernels_benchmark
203189 TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
204190 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
@@ -210,7 +196,6 @@ jobs:
210196 - name : Run Triton GEMM (stream-k) kernel benchmark
211197 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_streamk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
212198 run : |
213- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
214199 cd benchmarks/triton_kernels_benchmark
215200 python gemm_streamk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
216201 source ../../scripts/capture-hw-details.sh
@@ -220,7 +205,6 @@ jobs:
220205 - name : Run Triton GEMM (split-k) kernel benchmark
221206 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_splitk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
222207 run : |
223- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
224208 cd benchmarks/triton_kernels_benchmark
225209 python gemm_splitk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
226210 source ../../scripts/capture-hw-details.sh
@@ -230,7 +214,6 @@ jobs:
230214 - name : Run Triton GEMM + PreOp (exp) kernel benchmark
231215 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_preop_exp_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
232216 run : |
233- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
234217 cd benchmarks/triton_kernels_benchmark
235218 python gemm_preop_exp_benchmark.py --reports $REPORTS --n_runs $N_RUNS
236219 source ../../scripts/capture-hw-details.sh
@@ -239,7 +222,6 @@ jobs:
239222 - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
240223 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
241224 run : |
242- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
243225 cd benchmarks/triton_kernels_benchmark
244226 python gemm_postop_gelu_benchmark.py --reports $REPORTS --n_runs $N_RUNS
245227 source ../../scripts/capture-hw-details.sh
@@ -248,7 +230,6 @@ jobs:
248230 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark bfloat16
249231 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py') }}
250232 run : |
251- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
252233 cd benchmarks/triton_kernels_benchmark
253234 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
254235 source ../../scripts/capture-hw-details.sh
@@ -258,7 +239,6 @@ jobs:
258239 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark int8
259240 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py') }}
260241 run : |
261- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
262242 cd benchmarks/triton_kernels_benchmark
263243 INT8_ONLY=1 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
264244 source ../../scripts/capture-hw-details.sh
@@ -268,7 +248,6 @@ jobs:
268248 - name : Run Triton FA fwd kernel benchmark
269249 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_benchmark.py') }}
270250 run : |
271- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
272251 cd benchmarks/triton_kernels_benchmark
273252 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
274253
@@ -279,7 +258,6 @@ jobs:
279258 - name : Run Triton FA bwd kernel benchmark
280259 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_bwd_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_bwd_benchmark.py') }}
281260 run : |
282- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
283261 cd benchmarks/triton_kernels_benchmark
284262 FA_KERNEL_MODE="bwd" \
285263 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
@@ -292,7 +270,6 @@ jobs:
292270 - name : Run Triton FA fwd kernel benchmark - with tensor descriptors
293271 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py') }}
294272 run : |
295- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
296273 cd benchmarks/triton_kernels_benchmark
297274 python flash_attention_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
298275 mv $REPORTS/attn-performance.csv $REPORTS/attn-tensor-desc-performance.csv
@@ -304,7 +281,6 @@ jobs:
304281 - name : Run Triton FlexAttention Causal Mask fwd kernel benchmark
305282 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py') }}
306283 run : |
307- # export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
308284 cd benchmarks/triton_kernels_benchmark
309285 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
310286
@@ -315,7 +291,6 @@ jobs:
315291 - name : Run Triton FlexAttention (batch_size=4) Causal Mask fwd kernel benchmark
316292 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py') }}
317293 run : |
318- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
319294 cd benchmarks/triton_kernels_benchmark
320295 BATCH_SIZE=4 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
321296
@@ -326,7 +301,6 @@ jobs:
326301 - name : Run Triton FlexAttention (batch_size=16) Causal Mask fwd kernel benchmark
327302 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py') }}
328303 run : |
329- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
330304 cd benchmarks/triton_kernels_benchmark
331305 BATCH_SIZE=16 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
332306
@@ -337,7 +311,6 @@ jobs:
337311 - name : Run Triton FlexAttention Causal Mask bwd kernel benchmark
338312 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_bwd_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_bwd_benchmark_causal_mask.py') }}
339313 run : |
340- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
341314 cd benchmarks/triton_kernels_benchmark
342315 FA_KERNEL_MODE='bwd' \
343316 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
@@ -349,7 +322,6 @@ jobs:
349322 - name : Run Triton FlexAttention Custom Masks fwd kernel benchmark
350323 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py') }}
351324 run : |
352- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
353325 cd benchmarks/triton_kernels_benchmark
354326 python flex_attention_benchmark_custom_masks.py --reports $REPORTS --n_runs $N_RUNS
355327
@@ -363,7 +335,6 @@ jobs:
363335 - name : Run Prefix Sums kernel benchmark
364336 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'prefix_sums.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
365337 run : |
366- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
367338 cd benchmarks/triton_kernels_benchmark
368339 python prefix_sums.py --reports $REPORTS --n_runs $N_RUNS
369340 source ../../scripts/capture-hw-details.sh
@@ -372,7 +343,6 @@ jobs:
372343 - name : Run micro benchmark
373344 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'micro_benchmarks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
374345 run : |
375- export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
376346 cd benchmarks/micro_benchmarks
377347 python run_benchmarks.py --reports $REPORTS
378348
0 commit comments