@@ -116,9 +116,24 @@ jobs:
116116 cd benchmarks
117117 pip install .
118118
119+ - name : Build PTI from source
120+ run : |
121+ PTI_COMMIT_ID="$(<.github/pins/pti.txt)"
122+ git clone https://github.com/intel/pti-gpu.git
123+ cd pti-gpu
124+ git checkout $PTI_COMMIT_ID
125+ cd sdk
126+ cmake --preset linux-icpx-release
127+ BUILD_TESTING=1 PTI_BUILD_SAMPLES=1 cmake --build --preset linux-icpx-release
128+
129+ PTI_LIBS_DIR="$(pwd)/build-linux-icpx-release/lib/"
130+ ls $PTI_LIBS_DIR
131+ echo "PTI_LIBS_DIR=$PTI_LIBS_DIR" >> $GITHUB_ENV
132+
119133 - name : Run Triton Softmax kernel benchmark
120134 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'fused_softmax.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
121135 run : |
136+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
122137 cd benchmarks/triton_kernels_benchmark
123138 python fused_softmax.py --reports $REPORTS --n_runs $N_RUNS
124139 source ../../scripts/capture-hw-details.sh
@@ -129,6 +144,7 @@ jobs:
129144 - name : Run Triton GEMM kernel benchmark
130145 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
131146 run : |
147+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
132148 cd benchmarks/triton_kernels_benchmark
133149 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
134150 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
@@ -142,6 +158,7 @@ jobs:
142158 - name : Run Triton GEMM kernel benchmark - with tensor of pointer
143159 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py') }}
144160 run : |
161+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
145162 cd benchmarks/triton_kernels_benchmark
146163 python gemm_tensor_of_ptr_benchmark.py --reports $REPORTS --n_runs $N_RUNS
147164 source ../../scripts/capture-hw-details.sh
@@ -154,6 +171,7 @@ jobs:
154171 - name : Run Triton GEMM kernel benchmark - with tensor descriptor
155172 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py') }}
156173 run : |
174+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
157175 cd benchmarks/triton_kernels_benchmark
158176 python gemm_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
159177 source ../../scripts/capture-hw-details.sh
@@ -166,6 +184,7 @@ jobs:
166184 - name : Run Triton GEMM (A@B^t) kernel benchmark
167185 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_abt')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
168186 run : |
187+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
169188 cd benchmarks/triton_kernels_benchmark
170189 TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
171190 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
@@ -177,6 +196,7 @@ jobs:
177196 - name : Run Triton GEMM (A^t@B) kernel benchmark
178197 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_atb')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
179198 run : |
199+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
180200 cd benchmarks/triton_kernels_benchmark
181201 TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
182202 mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
@@ -188,6 +208,7 @@ jobs:
188208 - name : Run Triton GEMM (stream-k) kernel benchmark
189209 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_streamk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
190210 run : |
211+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
191212 cd benchmarks/triton_kernels_benchmark
192213 python gemm_streamk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
193214 source ../../scripts/capture-hw-details.sh
@@ -197,6 +218,7 @@ jobs:
197218 - name : Run Triton GEMM (split-k) kernel benchmark
198219 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_splitk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
199220 run : |
221+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
200222 cd benchmarks/triton_kernels_benchmark
201223 python gemm_splitk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
202224 source ../../scripts/capture-hw-details.sh
@@ -206,6 +228,7 @@ jobs:
206228 - name : Run Triton GEMM + PreOp (exp) kernel benchmark
207229 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_preop_exp_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
208230 run : |
231+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
209232 cd benchmarks/triton_kernels_benchmark
210233 python gemm_preop_exp_benchmark.py --reports $REPORTS --n_runs $N_RUNS
211234 source ../../scripts/capture-hw-details.sh
@@ -214,6 +237,7 @@ jobs:
214237 - name : Run Triton GEMM + PostOp (Gelu) kernel benchmark
215238 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
216239 run : |
240+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
217241 cd benchmarks/triton_kernels_benchmark
218242 python gemm_postop_gelu_benchmark.py --reports $REPORTS --n_runs $N_RUNS
219243 source ../../scripts/capture-hw-details.sh
@@ -222,6 +246,7 @@ jobs:
222246 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark bfloat16
223247 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py') }}
224248 run : |
249+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
225250 cd benchmarks/triton_kernels_benchmark
226251 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
227252 source ../../scripts/capture-hw-details.sh
@@ -231,6 +256,7 @@ jobs:
231256 - name : Run Triton GEMM + PostOp (add matrix) kernel benchmark int8
232257 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py') }}
233258 run : |
259+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
234260 cd benchmarks/triton_kernels_benchmark
235261 INT8_ONLY=1 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
236262 source ../../scripts/capture-hw-details.sh
@@ -240,6 +266,7 @@ jobs:
240266 - name : Run Triton FA fwd kernel benchmark
241267 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_benchmark.py') }}
242268 run : |
269+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
243270 cd benchmarks/triton_kernels_benchmark
244271 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
245272
@@ -250,6 +277,7 @@ jobs:
250277 - name : Run Triton FA bwd kernel benchmark
251278 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_bwd_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_bwd_benchmark.py') }}
252279 run : |
280+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
253281 cd benchmarks/triton_kernels_benchmark
254282 FA_KERNEL_MODE="bwd" \
255283 python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
@@ -262,6 +290,7 @@ jobs:
262290 - name : Run Triton FA fwd kernel benchmark - with tensor descriptors
263291 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py') }}
264292 run : |
293+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
265294 cd benchmarks/triton_kernels_benchmark
266295 python flash_attention_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
267296 mv $REPORTS/attn-performance.csv $REPORTS/attn-tensor-desc-performance.csv
@@ -273,6 +302,7 @@ jobs:
273302 - name : Run Triton FlexAttention Causal Mask fwd kernel benchmark
274303 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py') }}
275304 run : |
305+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
276306 cd benchmarks/triton_kernels_benchmark
277307 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
278308
@@ -283,6 +313,7 @@ jobs:
283313 - name : Run Triton FlexAttention (batch_size=4) Causal Mask fwd kernel benchmark
284314 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py') }}
285315 run : |
316+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
286317 cd benchmarks/triton_kernels_benchmark
287318 BATCH_SIZE=4 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
288319
@@ -293,6 +324,7 @@ jobs:
293324 - name : Run Triton FlexAttention (batch_size=16) Causal Mask fwd kernel benchmark
294325 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py') }}
295326 run : |
327+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
296328 cd benchmarks/triton_kernels_benchmark
297329 BATCH_SIZE=16 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
298330
@@ -303,6 +335,7 @@ jobs:
303335 - name : Run Triton FlexAttention Custom Masks fwd kernel benchmark
304336 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py') }}
305337 run : |
338+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
306339 cd benchmarks/triton_kernels_benchmark
307340 python flex_attention_benchmark_custom_masks.py --reports $REPORTS --n_runs $N_RUNS
308341
@@ -316,6 +349,7 @@ jobs:
316349 - name : Run Prefix Sums kernel benchmark
317350 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'prefix_sums.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
318351 run : |
352+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
319353 cd benchmarks/triton_kernels_benchmark
320354 python prefix_sums.py --reports $REPORTS --n_runs $N_RUNS
321355 source ../../scripts/capture-hw-details.sh
@@ -324,6 +358,7 @@ jobs:
324358 - name : Run micro benchmark
325359 if : ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'micro_benchmarks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
326360 run : |
361+ export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
327362 cd benchmarks/micro_benchmarks
328363 python run_benchmarks.py --reports $REPORTS
329364
0 commit comments