Skip to content

Commit 932d9ed

Browse files
Temp change
Signed-off-by: Whitney Tsang <[email protected]>
1 parent defa7b3 commit 932d9ed

File tree

1 file changed

+5
-121
lines changed

1 file changed

+5
-121
lines changed

.github/workflows/triton-benchmarks.yml

Lines changed: 5 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -138,139 +138,23 @@ jobs:
138138
cd benchmarks
139139
python setup.py install
140140
141-
- name: Run Triton Softmax kernel benchmark
142-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'fused_softmax.py') }}
143-
run: |
144-
cd benchmarks/triton_kernels_benchmark
145-
python fused_softmax.py --reports $REPORTS
146-
source ../../scripts/capture-hw-details.sh
147-
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-triton-report.csv --benchmark softmax --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
148-
python ../../scripts/build_report.py $REPORTS/softmax-performance.csv $REPORTS/softmax-xetla-report.csv --benchmark softmax --compiler xetla --param_cols "N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
149-
150-
- name: Run Triton GEMM kernel benchmark
151-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py') }}
152-
run: |
153-
cd benchmarks/triton_kernels_benchmark
154-
python gemm_benchmark.py --reports $REPORTS
155-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-base.csv
156-
157-
source ../../scripts/capture-hw-details.sh
158-
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-triton-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
159-
python ../../scripts/build_report.py $REPORTS/matmul-performance-base.csv $REPORTS/gemm-xetla-report.csv --benchmark gemm --compiler xetla --param_cols "B,M,K,N" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
160-
161-
- name: Run Triton GEMM kernel benchmark - advanced path
162-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_advanced') }}
141+
- name: Run Triton FA kernel benchmark - advanced path (w/ TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT)
142+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
163143
run: |
164144
cd benchmarks/triton_kernels_benchmark
165-
# Advanced path:
166145
TRITON_INTEL_ADVANCED_PATH=1 \
167-
IGC_VISAOptions=" -enableBCR -nolocalra" \
168-
IGC_DisableLoopUnroll=1 \
169-
python gemm_benchmark.py --reports $REPORTS
170-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-adv-path.csv
171-
172-
source ../../scripts/capture-hw-details.sh
173-
TAG="${TAG}-adv"
174-
python ../../scripts/build_report.py $REPORTS/matmul-performance-adv-path.csv $REPORTS/gemm-triton-advanced-report.csv --benchmark gemm --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
175-
176-
- name: Run Triton GEMM (A@B^t) kernel benchmark
177-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_abt') }}
178-
run: |
179-
cd benchmarks/triton_kernels_benchmark
180-
TRANSPOSE_B=1 python gemm_benchmark.py --reports $REPORTS
181-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-bt.csv
182-
source ../../scripts/capture-hw-details.sh
183-
184-
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-triton-report.csv --benchmark gemm-bt --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
185-
python ../../scripts/build_report.py $REPORTS/matmul-performance-bt.csv $REPORTS/gemm-bt-onednn-report.csv --benchmark gemm-bt --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
186-
187-
- name: Run Triton GEMM (A^t@B) kernel benchmark
188-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_benchmark.py_atb') }}
189-
run: |
190-
cd benchmarks/triton_kernels_benchmark
191-
TRANSPOSE_A=1 python gemm_benchmark.py --reports $REPORTS
192-
mv $REPORTS/matmul-performance.csv $REPORTS/matmul-performance-at.csv
193-
source ../../scripts/capture-hw-details.sh
194-
195-
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-triton-report.csv --benchmark gemm-at --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
196-
python ../../scripts/build_report.py $REPORTS/matmul-performance-at.csv $REPORTS/gemm-at-onednn-report.csv --benchmark gemm-at --compiler onednn --param_cols "B,M,K,N" --tflops_col onednn-TFlops --hbm_col "onednn-GB/s" --tag $TAG
197-
198-
- name: Run Triton GEMM (stream-k) kernel benchmark
199-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_streamk_benchmark.py') }}
200-
run: |
201-
cd benchmarks/triton_kernels_benchmark
202-
python gemm_streamk_benchmark.py --reports $REPORTS
203-
source ../../scripts/capture-hw-details.sh
204-
python ../../scripts/build_report.py $REPORTS/matmul-streamk-performance.csv $REPORTS/gemm-streamk-triton-report.csv --benchmark gemm-streamk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
205-
206-
- name: Run Triton GEMM (split-k) kernel benchmark
207-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_splitk_benchmark.py') }}
208-
run: |
209-
cd benchmarks/triton_kernels_benchmark
210-
python gemm_splitk_benchmark.py --reports $REPORTS
211-
source ../../scripts/capture-hw-details.sh
212-
python ../../scripts/build_report.py $REPORTS/matmul-splitk-performance.csv $REPORTS/gemm-splitk-triton-report.csv --benchmark gemm-splitk --compiler triton --param_cols "M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
213-
214-
- name: Run Triton GEMM + PreOp (exp) kernel benchmark
215-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_preop_exp_benchmark.py') }}
216-
run: |
217-
cd benchmarks/triton_kernels_benchmark
218-
python gemm_preop_exp_benchmark.py --reports $REPORTS
219-
source ../../scripts/capture-hw-details.sh
220-
python ../../scripts/build_report.py $REPORTS/matmul-performance-preop-exp.csv $REPORTS/gemm-preop-exp-triton-report.csv --benchmark gemm-preop-exp --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
221-
222-
- name: Run Triton GEMM + PostOp (Gelu) kernel benchmark
223-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_gelu_benchmark.py') }}
224-
run: |
225-
cd benchmarks/triton_kernels_benchmark
226-
python gemm_postop_gelu_benchmark.py --reports $REPORTS
227-
source ../../scripts/capture-hw-details.sh
228-
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-gelu.csv $REPORTS/gemm-postop-gelu-triton-report.csv --benchmark gemm-postop-gelu --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
229-
230-
- name: Run Triton GEMM + PostOp (add matrix) kernel benchmark
231-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'gemm_postop_addmatrix_benchmark.py') }}
232-
run: |
233-
cd benchmarks/triton_kernels_benchmark
234-
python gemm_postop_addmatrix_benchmark.py --reports $REPORTS
235-
source ../../scripts/capture-hw-details.sh
236-
python ../../scripts/build_report.py $REPORTS/matmul-performance-postop-addmatrix.csv $REPORTS/gemm-postop-addmatrix-triton-report.csv --benchmark gemm-postop-addmatrix --compiler triton --param_cols "B,M,K,N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
237-
238-
- name: Run Triton FA kernel benchmark
239-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
240-
run: |
241-
cd benchmarks/triton_kernels_benchmark
146+
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
147+
IGC_VISAOptions=" -enableBCR" \
242148
python flash_attention_fwd_benchmark.py --reports $REPORTS
243149
244-
source ../../scripts/capture-hw-details.sh
245-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
246-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
247-
248-
- name: Run Triton FA kernel benchmark - advanced path
150+
- name: Run Triton FA kernel benchmark - advanced path (w/o TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT)
249151
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py_advanced') }}
250152
run: |
251153
cd benchmarks/triton_kernels_benchmark
252154
TRITON_INTEL_ADVANCED_PATH=1 \
253155
IGC_VISAOptions=" -enableBCR" \
254156
python flash_attention_fwd_benchmark.py --reports $REPORTS
255157
256-
TAG="${TAG}-adv"
257-
source ../../scripts/capture-hw-details.sh
258-
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-advanced-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
259-
260-
- name: Run Prefix Sums kernel benchmark
261-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'prefix_sums.py') }}
262-
run: |
263-
cd benchmarks/triton_kernels_benchmark
264-
python prefix_sums.py --reports $REPORTS
265-
source ../../scripts/capture-hw-details.sh
266-
python ../../scripts/build_report.py $REPORTS/prefix-sums.csv $REPORTS/prefix_sums-triton-report.csv --benchmark prefix_sums --compiler triton --param_cols "N" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
267-
268-
- name: Run micro benchmark
269-
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'micro_benchmarks') }}
270-
run: |
271-
cd benchmarks/micro_benchmarks
272-
python run_benchmarks.py --reports $REPORTS
273-
274158
- name: Save pip cache
275159
if: ${{ steps.pip-cache.outputs.status == 'miss' }}
276160
uses: ./.github/actions/save

0 commit comments

Comments
 (0)