@@ -144,6 +144,7 @@ jobs:
144144 - name : Run Triton GEMM kernel benchmark
145145 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
146146 run : |
147+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
147148 cd benchmarks/triton_kernels_benchmark
148149 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
149150 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
@@ -157,6 +158,7 @@ jobs:
157158 - name : Run Triton GEMM kernel benchmark - with tensor of pointer
158159 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py') }}
159160 run : |
161+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
160162 cd benchmarks/triton_kernels_benchmark
161163 python gemm_tensor_of_ptr_benchmark.py --reports $REPORTS --n_runs $N_RUNS
162164 source ../../scripts/capture-hw-details.sh
@@ -169,6 +171,7 @@ jobs:
169171 - name : Run Triton GEMM kernel benchmark - with tensor descriptor
170172 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py') }}
171173 run : |
174+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
172175 cd benchmarks/triton_kernels_benchmark
173176 python gemm_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
174177 source ../../scripts/capture-hw-details.sh
@@ -181,6 +184,7 @@ jobs:
181184 - name : Run Triton GEMM (A@B^t) kernel benchmark
182185 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_abt')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
183186 run : |
187+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
184188 cd benchmarks/triton_kernels_benchmark
185189 TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
186190 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
@@ -192,6 +196,7 @@ jobs:
192196 - name : Run Triton GEMM (A^t@B) kernel benchmark
193197 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_atb')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
194198 run : |
199+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
195200 cd benchmarks/triton_kernels_benchmark
196201 TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
197202 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
@@ -203,6 +208,7 @@ jobs:
203208 - name : Run Triton GEMM (stream-k) kernel benchmark
204209 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_streamk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
205210 run : |
211+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
206212 cd benchmarks/triton_kernels_benchmark
207213 python gemm_streamk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
208214 source ../../scripts/capture-hw-details.sh
@@ -212,6 +218,7 @@ jobs:
212218 - name : Run Triton GEMM (split-k) kernel benchmark
213219 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_splitk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
214220 run : |
221+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
215222 cd benchmarks/triton_kernels_benchmark
216223 python gemm_splitk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
217224 source ../../scripts/capture-hw-details.sh
@@ -221,6 +228,7 @@ jobs:
221228 - name : Run Triton GEMM + PreOp (exp) kernel benchmark
222229 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_preop_exp_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
223230 run : |
231+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
224232 cd benchmarks/triton_kernels_benchmark
225233 python gemm_preop_exp_benchmark.py --reports $REPORTS --n_runs $N_RUNS
226234 source ../../scripts/capture-hw-details.sh
@@ -229,6 +237,7 @@ jobs:
229237 - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
230238 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
231239 run : |
240+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
232241 cd benchmarks/triton_kernels_benchmark
233242 python gemm_postop_gelu_benchmark.py --reports $REPORTS --n_runs $N_RUNS
234243 source ../../scripts/capture-hw-details.sh
@@ -237,6 +246,7 @@ jobs:
237246 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark bfloat16
238247 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py') }}
239248 run : |
249+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
240250 cd benchmarks/triton_kernels_benchmark
241251 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
242252 source ../../scripts/capture-hw-details.sh
@@ -246,6 +256,7 @@ jobs:
246256 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark int8
247257 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py') }}
248258 run : |
259+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
249260 cd benchmarks/triton_kernels_benchmark
250261 INT8_ONLY=1 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
251262 source ../../scripts/capture-hw-details.sh
@@ -255,6 +266,7 @@ jobs:
255266 - name : Run Triton FA fwd kernel benchmark
256267 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_benchmark.py') }}
257268 run : |
269+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
258270 cd benchmarks/triton_kernels_benchmark
259271 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
260272
@@ -265,6 +277,7 @@ jobs:
265277 - name : Run Triton FA bwd kernel benchmark
266278 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_bwd_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_bwd_benchmark.py') }}
267279 run : |
280+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
268281 cd benchmarks/triton_kernels_benchmark
269282 FA_KERNEL_MODE="bwd" \
270283 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
@@ -277,6 +290,7 @@ jobs:
277290 - name : Run Triton FA fwd kernel benchmark - with tensor descriptors
278291 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py') }}
279292 run : |
293+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
280294 cd benchmarks/triton_kernels_benchmark
281295 python flash_attention_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
282296 mv $REPORTS/attn-performance.csv $REPORTS/attn-tensor-desc-performance.csv
@@ -288,6 +302,7 @@ jobs:
288302 - name : Run Triton FlexAttention Causal Mask fwd kernel benchmark
289303 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py') }}
290304 run : |
305+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
291306 cd benchmarks/triton_kernels_benchmark
292307 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
293308
@@ -298,6 +313,7 @@ jobs:
298313 - name : Run Triton FlexAttention (batch_size=4) Causal Mask fwd kernel benchmark
299314 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py') }}
300315 run : |
316+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
301317 cd benchmarks/triton_kernels_benchmark
302318 BATCH_SIZE=4 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
303319
@@ -308,6 +324,7 @@ jobs:
308324 - name : Run Triton FlexAttention (batch_size=16) Causal Mask fwd kernel benchmark
309325 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py') }}
310326 run : |
327+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
311328 cd benchmarks/triton_kernels_benchmark
312329 BATCH_SIZE=16 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
313330
@@ -318,6 +335,7 @@ jobs:
318335 - name : Run Triton FlexAttention Custom Masks fwd kernel benchmark
319336 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py') }}
320337 run : |
338+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
321339 cd benchmarks/triton_kernels_benchmark
322340 python flex_attention_benchmark_custom_masks.py --reports $REPORTS --n_runs $N_RUNS
323341
@@ -331,6 +349,7 @@ jobs:
331349 - name : Run Prefix Sums kernel benchmark
332350 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'prefix_sums.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
333351 run : |
352+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
334353 cd benchmarks/triton_kernels_benchmark
335354 python prefix_sums.py --reports $REPORTS --n_runs $N_RUNS
336355 source ../../scripts/capture-hw-details.sh
@@ -339,6 +358,7 @@ jobs:
339358 - name : Run micro benchmark
340359 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'micro_benchmarks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
341360 run : |
361+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
342362 cd benchmarks/micro_benchmarks
343363 python run_benchmarks.py --reports $REPORTS
344364
0 commit comments