Skip to content

Commit 38a1cc6

Browse files
[GEMM] Reduce env
Signed-off-by: Whitney Tsang <[email protected]>
1 parent 4f6f088 commit 38a1cc6

File tree

1 file changed

+34
-110
lines changed

1 file changed

+34
-110
lines changed

.github/workflows/triton-benchmarks.yml

Lines changed: 34 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -132,26 +132,6 @@ jobs:
132132
cd benchmarks
133133
python setup.py install
134134
135-
- name: Run Triton Softmax kernel benchmark
136-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
137-
run: |
138-
cd benchmarks/triton_kernels_benchmark
139-
python fused_softmax.py --reports $REPORTS
140-
source ../../scripts/capture-hw-details.sh
141-
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
142-
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
143-
144-
- name: Run Triton GEMM kernel benchmark
145-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
146-
run: |
147-
cd benchmarks/triton_kernels_benchmark
148-
python gemm_benchmark.py --reports $REPORTS
149-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
150-
151-
source ../../scripts/capture-hw-details.sh
152-
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
153-
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
154-
155135
- name: Run Triton GEMM kernel benchmark - default path
156136
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
157137
run: |
@@ -182,117 +162,61 @@ jobs:
182162
TAG="${TAG}-adv"
183163
python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
184164
185-
- name: Run Triton GEMM (A@B^t) kernel benchmark
186-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
187-
run: |
188-
cd benchmarks/triton_kernels_benchmark
189-
TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
190-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
191-
source ../../scripts/capture-hw-details.sh
192-
193-
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
194-
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
195-
196-
- name: Run Triton GEMM (A^t@B) kernel benchmark
197-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
198-
run: |
199-
cd benchmarks/triton_kernels_benchmark
200-
TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
201-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
202-
source ../../scripts/capture-hw-details.sh
203-
204-
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
205-
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
206-
207-
- name: Run Triton GEMM (stream-k) kernel benchmark
208-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
209-
run: |
210-
cd benchmarks/triton_kernels_benchmark
211-
python gemm_streamk_benchmark.py --reports $REPORTS
212-
source ../../scripts/capture-hw-details.sh
213-
python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
214-
215-
- name: Run Triton GEMM (split-k) kernel benchmark
216-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
217-
run: |
218-
cd benchmarks/triton_kernels_benchmark
219-
python gemm_splitk_benchmark.py --reports $REPORTS
220-
source ../../scripts/capture-hw-details.sh
221-
python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
222-
223-
- name: Run Triton GEMM + PreOp (exp) kernel benchmark
224-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
225-
run: |
226-
cd benchmarks/triton_kernels_benchmark
227-
python gemm_preop_exp_benchmark.py --reports $REPORTS
228-
source ../../scripts/capture-hw-details.sh
229-
python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
230-
231-
- name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
232-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
165+
- name: Run Triton GEMM kernel benchmark - default path (remove DisableLoopUnroll)
166+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
233167
run: |
234168
cd benchmarks/triton_kernels_benchmark
235-
python gemm_postop_gelu_benchmark.py --reports $REPORTS
236-
source ../../scripts/capture-hw-details.sh
237-
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
169+
# Default path:
170+
TRITON_INTEL_ADVANCED_PATH=0 \
171+
IGC_VISAOptions=" -enableBCR -nolocalra" \
172+
python gemm_benchmark.py --reports $REPORTS
173+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
238174
239-
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
240-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }}
241-
run: |
242-
cd benchmarks/triton_kernels_benchmark
243-
python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
244175
source ../../scripts/capture-hw-details.sh
245-
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
176+
TAG="${TAG}-dflt"
177+
python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
246178
247-
- name: Run Triton FA kernel benchmark
248-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
179+
- name: Run Triton GEMM kernel benchmark - advanced path (remove DisableLoopUnroll)
180+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }}
249181
run: |
250182
cd benchmarks/triton_kernels_benchmark
251-
python flash_attention_fwd_benchmark.py --reports $REPORTS
183+
# Advanced path:
184+
TRITON_INTEL_ADVANCED_PATH=1 \
185+
IGC_VISAOptions=" -enableBCR -nolocalra" \
186+
python gemm_benchmark.py --reports $REPORTS
187+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
252188
253189
source ../../scripts/capture-hw-details.sh
254-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
255-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
190+
TAG="${TAG}-adv"
191+
python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
256192
257-
- name: Run Triton FA kernel benchmark - default path
258-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmark || '[]'), 'flash_attention_fwd_benchmark.py_default') }}
193+
- name: Run Triton GEMM kernel benchmark - default path (remove VISAOptions)
194+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_default') }}
259195
run: |
260196
cd benchmarks/triton_kernels_benchmark
197+
# Default path:
261198
TRITON_INTEL_ADVANCED_PATH=0 \
262-
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
263-
IGC_VISAOptions=" -enableBCR" \
264-
python flash_attention_fwd_benchmark.py --reports $REPORTS
199+
IGC_DisableLoopUnroll=1 \
200+
python gemm_benchmark.py --reports $REPORTS
201+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-default-path.csv
265202
266-
TAG="${TAG}-dflt"
267203
source ../../scripts/capture-hw-details.sh
268-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-default-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
204+
TAG="${TAG}-dflt"
205+
python ../../scripts/build_report.py $REPORTS/matmul-performance-default-path.csv $REPORTS/gemm-triton-default-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
269206
270-
- name: Run Triton FA kernel benchmark - advanced path
271-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
207+
- name: Run Triton GEMM kernel benchmark - advanced path (remove VISAOptions)
208+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }}
272209
run: |
273210
cd benchmarks/triton_kernels_benchmark
211+
# Advanced path:
274212
TRITON_INTEL_ADVANCED_PATH=1 \
275-
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
276-
IGC_VISAOptions=" -enableBCR" \
277-
python flash_attention_fwd_benchmark.py --reports $REPORTS
278-
279-
TAG="${TAG}-adv"
280-
source ../../scripts/capture-hw-details.sh
281-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
213+
IGC_DisableLoopUnroll=1 \
214+
python gemm_benchmark.py --reports $REPORTS
215+
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
282216
283-
- name: Run Prefix Sums kernel benchmark
284-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
285-
run: |
286-
cd benchmarks/triton_kernels_benchmark
287-
python prefix_sums.py --reports $REPORTS
288217
source ../../scripts/capture-hw-details.sh
289-
python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
290-
291-
- name: Run micro benchmark
292-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
293-
run: |
294-
cd benchmarks/micro_benchmarks
295-
python run_benchmarks.py --reports $REPORTS
218+
TAG="${TAG}-adv"
219+
python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
296220
297221
- name: Save pip cache
298222
if: ${{ steps.pip-cache.outputs.status == 'miss' }}

0 commit comments

Comments
 (0)