Skip to content

Commit 55095c8

Browse files
Revert "Test removal of TRITON_INTEL_ENABLE_INSTR_SCHED"
This reverts commit 35c3e09.
1 parent 268ef04 commit 55095c8

File tree

1 file changed

+143
-7
lines changed

1 file changed

+143
-7
lines changed

.github/workflows/triton-benchmarks.yml

Lines changed: 143 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,27 +111,149 @@ jobs:
111111
cd benchmarks
112112
python setup.py install
113113
114-
- name: Run Triton FA kernel benchmark - advanced path (w/ TRITON_INTEL_ENABLE_INSTR_SCHED)
114+
- name: Run Triton Softmax kernel benchmark
115115
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
116116
run: |
117117
cd benchmarks/triton_kernels_benchmark
118-
rm -rf ~/.triton/cache
118+
python fused_softmax.py --reports $REPORTS
119+
source ../../scripts/capture-hw-details.sh
120+
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
121+
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
122+
123+
- name: Run Triton GEMM kernel benchmark
124+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
125+
run: |
126+
cd benchmarks/triton_kernels_benchmark
127+
python gemm_benchmark.py --reports $REPORTS
128+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
129+
130+
source ../../scripts/capture-hw-details.sh
131+
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
132+
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
133+
134+
- name: Run Triton GEMM kernel benchmark - default path
135+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
136+
run: |
137+
cd benchmarks/triton_kernels_benchmark
138+
# Default path:
139+
TRITON_INTEL_ADVANCED_PATH=0 \
140+
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
141+
IGC_VISAOptions=" -enableBCR -nolocalra" \
142+
IGC_DisableLoopUnroll=1 \
143+
python gemm_benchmark.py --reports $REPORTS
144+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
145+
146+
source ../../scripts/capture-hw-details.sh
147+
TAG="${TAG}-dflt"
148+
python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
149+
150+
- name: Run Triton GEMM kernel benchmark - advanced path
151+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
152+
run: |
153+
cd benchmarks/triton_kernels_benchmark
154+
# Advanced path:
119155
TRITON_INTEL_ADVANCED_PATH=1 \
120-
TRITON_INTEL_ENABLE_INSTR_SCHED=1 \
156+
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
157+
IGC_VISAOptions=" -enableBCR -nolocalra" \
158+
IGC_DisableLoopUnroll=1 \
159+
python gemm_benchmark.py --reports $REPORTS
160+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
161+
162+
source ../../scripts/capture-hw-details.sh
163+
TAG="${TAG}-adv"
164+
python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
165+
166+
- name: Run Triton GEMM (A@B^t) kernel benchmark
167+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
168+
run: |
169+
cd benchmarks/triton_kernels_benchmark
170+
TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
171+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
172+
source ../../scripts/capture-hw-details.sh
173+
174+
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
175+
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
176+
177+
- name: Run Triton GEMM (A^t@B) kernel benchmark
178+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
179+
run: |
180+
cd benchmarks/triton_kernels_benchmark
181+
TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
182+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
183+
source ../../scripts/capture-hw-details.sh
184+
185+
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
186+
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
187+
188+
- name: Run Triton GEMM (stream-k) kernel benchmark
189+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
190+
run: |
191+
cd benchmarks/triton_kernels_benchmark
192+
python gemm_streamk_benchmark.py --reports $REPORTS
193+
source ../../scripts/capture-hw-details.sh
194+
python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
195+
196+
- name: Run Triton GEMM (split-k) kernel benchmark
197+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
198+
run: |
199+
cd benchmarks/triton_kernels_benchmark
200+
python gemm_splitk_benchmark.py --reports $REPORTS
201+
source ../../scripts/capture-hw-details.sh
202+
python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
203+
204+
- name: Run Triton GEMM + PreOp (exp) kernel benchmark
205+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
206+
run: |
207+
cd benchmarks/triton_kernels_benchmark
208+
python gemm_preop_exp_benchmark.py --reports $REPORTS
209+
source ../../scripts/capture-hw-details.sh
210+
python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
211+
212+
- name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
213+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
214+
run: |
215+
cd benchmarks/triton_kernels_benchmark
216+
python gemm_postop_gelu_benchmark.py --reports $REPORTS
217+
source ../../scripts/capture-hw-details.sh
218+
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
219+
220+
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
221+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
222+
run: |
223+
cd benchmarks/triton_kernels_benchmark
224+
python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
225+
source ../../scripts/capture-hw-details.sh
226+
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
227+
228+
- name: Run Triton FA kernel benchmark
229+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
230+
run: |
231+
cd benchmarks/triton_kernels_benchmark
232+
python flash_attention_fwd_benchmark.py --reports $REPORTS
233+
234+
source ../../scripts/capture-hw-details.sh
235+
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
236+
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
237+
238+
- name: Run Triton FA kernel benchmark - default path
239+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
240+
run: |
241+
cd benchmarks/triton_kernels_benchmark
242+
TRITON_INTEL_ADVANCED_PATH=0 \
121243
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
122244
IGC_VISAOptions=" -enableBCR" \
123245
python flash_attention_fwd_benchmark.py --reports $REPORTS
124246
125-
TAG="${TAG}-adv"
247+
TAG="${TAG}-dflt"
126248
source ../../scripts/capture-hw-details.sh
127-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
249+
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
128250
129-
- name: Run Triton FA kernel benchmark - advanced path (w/o TRITON_INTEL_ENABLE_INSTR_SCHED)
251+
- name: Run Triton FA kernel benchmark - advanced path
130252
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
131253
run: |
132254
cd benchmarks/triton_kernels_benchmark
133-
rm -rf ~/.triton/cache
134255
TRITON_INTEL_ADVANCED_PATH=1 \
256+
TRITON_INTEL_ENABLE_INSTR_SCHED=1 \
135257
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
136258
IGC_VISAOptions=" -enableBCR" \
137259
python flash_attention_fwd_benchmark.py --reports $REPORTS
@@ -140,6 +262,20 @@ jobs:
140262
source ../../scripts/capture-hw-details.sh
141263
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
142264
265+
- name: Run Prefix Sums kernel benchmark
266+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
267+
run: |
268+
cd benchmarks/triton_kernels_benchmark
269+
python prefix_sums.py --reports $REPORTS
270+
source ../../scripts/capture-hw-details.sh
271+
python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
272+
273+
- name: Run micro benchmark
274+
if: ${{ steps.install.outcome == 'success' && !cancelled() }}
275+
run: |
276+
cd benchmarks/micro_benchmarks
277+
python run_benchmarks.py --reports $REPORTS
278+
143279
- name: Save pip cache
144280
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
145281
uses: ./.github/actions/save

0 commit comments

Comments
 (0)