Skip to content

Commit 7422d02

Browse files
Update triton-benchmarks.yml
Signed-off-by: Whitney Tsang <[email protected]>
1 parent 4d3a94d commit 7422d02

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

.github/workflows/triton-benchmarks.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,19 @@ jobs:
250250
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
251251
run: |
252252
cd benchmarks/triton_kernels_benchmark
253+
TRITON_INTEL_DISABLE_LARGE_BLOCK_SIZE_IO_FOR_TRANS_DOT_B=1 \
254+
python flash_attention_fwd_benchmark.py --reports $REPORTS
255+
256+
source ../../scripts/capture-hw-details.sh
257+
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-triton-report.csv --benchmark attn --compiler triton --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col Triton-TFlops --hbm_col "Triton-GB/s" --tag $TAG
258+
python ../../scripts/build_report.py $REPORTS/attn-performance.csv $REPORTS/attn-xetla-report.csv --benchmark attn --compiler xetla --param_cols "Z,H,N_CTX,D_HEAD,CAUSAL" --tflops_col XeTLA-TFlops --hbm_col "XeTLA-GB/s" --tag $TAG
259+
260+
- name: Run Triton FA kernel benchmark (+ reduction)
261+
if: ${{ steps.install.outcome == 'success' && !cancelled() && !contains(fromJson(inputs.skip_benchmarks || '[]'), 'flash_attention_fwd_benchmark.py') }}
262+
run: |
263+
cd benchmarks/triton_kernels_benchmark
264+
TRITON_INTEL_DISABLE_LARGE_BLOCK_SIZE_IO_FOR_TRANS_DOT_B=1 \
265+
TRITON_INTEL_OPTIMIZE_REDUCTION_LOCALITY=1 \
253266
python flash_attention_fwd_benchmark.py --reports $REPORTS
254267
255268
source ../../scripts/capture-hw-details.sh
@@ -261,6 +274,7 @@ jobs:
261274
run: |
262275
cd benchmarks/triton_kernels_benchmark
263276
TRITON_INTEL_ADVANCED_PATH=0 \
277+
TRITON_INTEL_DISABLE_LARGE_BLOCK_SIZE_IO_FOR_TRANS_DOT_B=1 \
264278
TRITON_INTEL_ENABLE_ADDRESS_PAYLOAD_OPT=1 \
265279
IGC_VISAOptions=" -enableBCR" \
266280
python flash_attention_fwd_benchmark.py --reports $REPORTS

third_party/intel/lib/TritonIntelGPUToLLVM/DotOpToLLVM/DPAS.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,8 @@ class DotOpDPASConversionHelper {
186186
auto RC = IntegerAttr::get(rewriter.getIntegerType(32),
187187
dpasEncoding.getRepeatCount());
188188
fc.at({b, m, n}) = rewriter.create<TritonGEN::MatrixDPASOp>(
189-
loc, dTy, valc, valA, valB, pA, pB, RC);
189+
loc, dTy, bitcast(valc, cTy), bitcast(valA, aTy), bitcast(valB, bTy),
190+
pA, pB, RC);
190191
};
191192

192193
ArrayRef<unsigned> repCluster = dpasEncoding.getRepCluster();
@@ -345,8 +346,7 @@ class DotOpDPASConversionHelper {
345346
i32_val(k));
346347
}
347348
vals[{b, i * repClusterOuter + repOuter,
348-
j * repClusterInner + repInner}] =
349-
bitcast(matVal, dotOperandType);
349+
j * repClusterInner + repInner}] = matVal;
350350
}
351351
}
352352
}

0 commit comments

Comments
 (0)