Add FlexAttention to test script (#4950)

etiotto · web-flow · commit 7597ef1f9b55 · 2025-08-26T12:32:25.000-04:00
Signed-off-by: Ettore Tiotto &lt;ettore.tiotto@intel.com&gt;
diff --git a/scripts/test-triton.sh b/scripts/test-triton.sh
@@ -17,7 +17,8 @@ TEST:
     --benchmarks
     --softmax
     --gemm
-    --attention
+    --flash-attention
+    --flex-attention
     --instrumentation
     --inductor
     --sglang
@@ -55,7 +56,8 @@ TEST_MICRO_BENCHMARKS=false
 TEST_BENCHMARKS=false
 TEST_BENCHMARK_SOFTMAX=false
 TEST_BENCHMARK_GEMM=false
-TEST_BENCHMARK_ATTENTION=false
+TEST_BENCHMARK_FLASH_ATTENTION=false
+TEST_BENCHMARK_FLEX_ATTENTION=false
 TEST_INSTRUMENTATION=false
 TEST_INDUCTOR=false
 TEST_SGLANG=false
@@ -128,8 +130,13 @@ while (( $# != 0 )); do
       TEST_DEFAULT=false
       shift
       ;;
-    --attention)
-      TEST_BENCHMARK_ATTENTION=true
+    --flash-attention)
+      TEST_BENCHMARK_FLASH_ATTENTION=true
+      TEST_DEFAULT=false
+      shift
+      ;;
+    --flex-attention)
+      TEST_BENCHMARK_FLEX_ATTENTION=true
       TEST_DEFAULT=false
       shift
       ;;
@@ -410,9 +417,9 @@ run_benchmark_gemm() {
   python $TRITON_PROJ/benchmarks/triton_kernels_benchmark/gemm_tensor_desc_benchmark.py
 }
 
-run_benchmark_attention() {
+run_benchmark_flash_attention() {
   echo "****************************************************"
-  echo "*****            Running ATTENTION             *****"
+  echo "*****          Running FlashAttention          *****"
   echo "****************************************************"
   cd $TRITON_PROJ/benchmarks
   pip install .
@@ -433,6 +440,17 @@ run_benchmark_attention() {
     python $TRITON_PROJ/benchmarks/triton_kernels_benchmark/flash_attention_benchmark.py
 }
 
+run_benchmark_flex_attention() {
+  echo "****************************************************"
+  echo "*****          Running FlexAttention           *****"
+  echo "****************************************************"
+  cd $TRITON_PROJ/benchmarks
+  pip install .
+
+  echo "FlexAttention - causal mask:"
+  python $TRITON_PROJ/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py
+}
+
 run_benchmarks() {
   cd $TRITON_PROJ/benchmarks
   pip install .
@@ -538,8 +556,11 @@ test_triton() {
   if [ "$TEST_BENCHMARK_GEMM" = true ]; then
     run_benchmark_gemm
   fi
-  if [ "$TEST_BENCHMARK_ATTENTION" = true ]; then
-    run_benchmark_attention
+  if [ "$TEST_BENCHMARK_FLASH_ATTENTION" = true ]; then
+    run_benchmark_flash_attention
+  fi
+  if [ "$TEST_BENCHMARK_FLEX_ATTENTION" = true ]; then
+    run_benchmark_flex_attention
   fi
   if [ "$TEST_INSTRUMENTATION" == true ]; then
     run_instrumentation_tests