Do not run benchmark for batch_size >= 16 on b580 (#5423)

etiotto · web-flow · commit e427f2975da3 · 2025-11-04T10:03:20.000Z
Signed-off-by: Ettore Tiotto &lt;ettore.tiotto@intel.com&gt;
diff --git a/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py b/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py
@@ -74,6 +74,12 @@ def causal_mask(_, __, q_idx, kv_idx):
 batch_sizes = [16, 32, 64] if throughput_test else [batch_size]
 fa_kernel_mode = os.getenv('FA_KERNEL_MODE', 'fwd')
 
+if torch.xpu.get_device_name() == '580':
+    old_count = len(batch_sizes)
+    batch_sizes = [size for size in batch_sizes if size < 16]
+    if len(batch_sizes) != old_count:
+        print('Skipping running batch_sizes >= 16 on b580')
+
 
 # Kernel profiling for Backward mode is not working as expected:
 # For details: https://github.com/pytorch/pytorch/issues/144778