Skip to content

Commit 1642cd3

Browse files
committed
fix UTs
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent 2ea9563 commit 1642cd3

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

.github/workflows/build-test-reusable.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,17 @@ jobs:
123123
run: |
124124
echo TRITON_BUILD_PROTON_XPU=1 | tee -a $GITHUB_ENV
125125
126+
git clone https://github.com/intel/pti-gpu.git
127+
cd pti-gpu
128+
git checkout 15a201d25e5659692613b98ee33513263b689101
129+
cd sdk
130+
cmake --preset linux-icpx-release
131+
BUILD_TESTING=1 PTI_BUILD_SAMPLES=1 cmake --build --preset linux-icpx-release
132+
133+
PTI_LIBS_DIR="$(pwd)/build-linux-icpx-release/lib/"
134+
ls $PTI_LIBS_DIR
135+
echo "PTI_LIBS_DIR=$PTI_LIBS_DIR" >> $GITHUB_ENV
136+
126137
- name: Build Triton
127138
uses: ./.github/actions/setup-triton
128139
with:
@@ -288,6 +299,7 @@ jobs:
288299
- name: Run Proton tests
289300
if: matrix.suite == 'rest' && inputs.driver_version == 'rolling' && inputs.device == 'max1100'
290301
run: |
302+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
291303
cd third_party/proton/test
292304
# FIXME: enable 'test_record.py' back
293305
pytest test_api.py test_lib.py test_profile.py test_viewer.py -s -v

.github/workflows/triton-benchmarks.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ jobs:
144144
- name: Run Triton GEMM kernel benchmark
145145
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
146146
run: |
147+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
147148
cd benchmarks/triton_kernels_benchmark
148149
python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
149150
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
@@ -157,6 +158,7 @@ jobs:
157158
- name: Run Triton GEMM kernel benchmark - with tensor of pointer
158159
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_of_ptr_benchmark.py') }}
159160
run: |
161+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
160162
cd benchmarks/triton_kernels_benchmark
161163
python gemm_tensor_of_ptr_benchmark.py --reports $REPORTS --n_runs $N_RUNS
162164
source ../../scripts/capture-hw-details.sh
@@ -169,6 +171,7 @@ jobs:
169171
- name: Run Triton GEMM kernel benchmark - with tensor descriptor
170172
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_tensor_desc_benchmark.py') }}
171173
run: |
174+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
172175
cd benchmarks/triton_kernels_benchmark
173176
python gemm_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
174177
source ../../scripts/capture-hw-details.sh
@@ -181,6 +184,7 @@ jobs:
181184
- name: Run Triton GEMM (A@B^t) kernel benchmark
182185
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_abt')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
183186
run: |
187+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
184188
cd benchmarks/triton_kernels_benchmark
185189
TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
186190
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
@@ -192,6 +196,7 @@ jobs:
192196
- name: Run Triton GEMM (A^t@B) kernel benchmark
193197
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_benchmark.py_atb')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
194198
run: |
199+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
195200
cd benchmarks/triton_kernels_benchmark
196201
TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS --n_runs $N_RUNS
197202
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
@@ -203,6 +208,7 @@ jobs:
203208
- name: Run Triton GEMM (stream-k) kernel benchmark
204209
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_streamk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
205210
run: |
211+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
206212
cd benchmarks/triton_kernels_benchmark
207213
python gemm_streamk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
208214
source ../../scripts/capture-hw-details.sh
@@ -212,6 +218,7 @@ jobs:
212218
- name: Run Triton GEMM (split-k) kernel benchmark
213219
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_splitk_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
214220
run: |
221+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
215222
cd benchmarks/triton_kernels_benchmark
216223
python gemm_splitk_benchmark.py --reports $REPORTS --n_runs $N_RUNS
217224
source ../../scripts/capture-hw-details.sh
@@ -221,6 +228,7 @@ jobs:
221228
- name: Run Triton GEMM + PreOp (exp) kernel benchmark
222229
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_preop_exp_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
223230
run: |
231+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
224232
cd benchmarks/triton_kernels_benchmark
225233
python gemm_preop_exp_benchmark.py --reports $REPORTS --n_runs $N_RUNS
226234
source ../../scripts/capture-hw-details.sh
@@ -229,6 +237,7 @@ jobs:
229237
- name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
230238
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
231239
run: |
240+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
232241
cd benchmarks/triton_kernels_benchmark
233242
python gemm_postop_gelu_benchmark.py --reports $REPORTS --n_runs $N_RUNS
234243
source ../../scripts/capture-hw-details.sh
@@ -237,6 +246,7 @@ jobs:
237246
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark bfloat16
238247
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_bfloat16.py') }}
239248
run: |
249+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
240250
cd benchmarks/triton_kernels_benchmark
241251
python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
242252
source ../../scripts/capture-hw-details.sh
@@ -246,6 +256,7 @@ jobs:
246256
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark int8
247257
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark_int8.py') }}
248258
run: |
259+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
249260
cd benchmarks/triton_kernels_benchmark
250261
INT8_ONLY=1 python gemm_postop_addmatrix_benchmark.py --reports $REPORTS --n_runs $N_RUNS
251262
source ../../scripts/capture-hw-details.sh
@@ -255,6 +266,7 @@ jobs:
255266
- name: Run Triton FA fwd kernel benchmark
256267
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_benchmark.py') }}
257268
run: |
269+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
258270
cd benchmarks/triton_kernels_benchmark
259271
python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
260272
@@ -265,6 +277,7 @@ jobs:
265277
- name: Run Triton FA bwd kernel benchmark
266278
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_bwd_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_bwd_benchmark.py') }}
267279
run: |
280+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
268281
cd benchmarks/triton_kernels_benchmark
269282
FA_KERNEL_MODE="bwd" \
270283
python flash_attention_benchmark.py --reports $REPORTS --n_runs $N_RUNS
@@ -277,6 +290,7 @@ jobs:
277290
- name: Run Triton FA fwd kernel benchmark - with tensor descriptors
278291
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_tensor_desc_benchmark.py') }}
279292
run: |
293+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
280294
cd benchmarks/triton_kernels_benchmark
281295
python flash_attention_tensor_desc_benchmark.py --reports $REPORTS --n_runs $N_RUNS
282296
mv $REPORTS/attn-performance.csv $REPORTS/attn-tensor-desc-performance.csv
@@ -288,6 +302,7 @@ jobs:
288302
- name: Run Triton FlexAttention Causal Mask fwd kernel benchmark
289303
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_causal_mask.py') }}
290304
run: |
305+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
291306
cd benchmarks/triton_kernels_benchmark
292307
python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
293308
@@ -298,6 +313,7 @@ jobs:
298313
- name: Run Triton FlexAttention (batch_size=4) Causal Mask fwd kernel benchmark
299314
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch4-causal_mask.py') }}
300315
run: |
316+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
301317
cd benchmarks/triton_kernels_benchmark
302318
BATCH_SIZE=4 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
303319
@@ -308,6 +324,7 @@ jobs:
308324
- name: Run Triton FlexAttention (batch_size=16) Causal Mask fwd kernel benchmark
309325
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_batch16-causal_mask.py') }}
310326
run: |
327+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
311328
cd benchmarks/triton_kernels_benchmark
312329
BATCH_SIZE=16 python flex_attention_benchmark_causal_mask.py --reports $REPORTS --n_runs $N_RUNS
313330
@@ -318,6 +335,7 @@ jobs:
318335
- name: Run Triton FlexAttention Custom Masks fwd kernel benchmark
319336
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flex_attention_benchmark_custom_masks.py') }}
320337
run: |
338+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
321339
cd benchmarks/triton_kernels_benchmark
322340
python flex_attention_benchmark_custom_masks.py --reports $REPORTS --n_runs $N_RUNS
323341
@@ -331,6 +349,7 @@ jobs:
331349
- name: Run Prefix Sums kernel benchmark
332350
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'prefix_sums.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
333351
run: |
352+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
334353
cd benchmarks/triton_kernels_benchmark
335354
python prefix_sums.py --reports $REPORTS --n_runs $N_RUNS
336355
source ../../scripts/capture-hw-details.sh
@@ -339,6 +358,7 @@ jobs:
339358
- name: Run micro benchmark
340359
if: ${{ steps.install.outcome == 'success' && !cancelled() && (inputs.benchmarks == '' || contains(fromJson(inputs.benchmarks || '[]'), 'micro_benchmarks.py')) && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
341360
run: |
361+
export LD_LIBRARY_PATH=$PTI_LIBS_DIR:$LD_LIBRARY_PATH
342362
cd benchmarks/micro_benchmarks
343363
python run_benchmarks.py --reports $REPORTS
344364

0 commit comments

Comments
 (0)