[CI][benchmarks] Fixed warmup type for flash attention and gemm-preop-exp (#5344)

Egor-Krivov · web-flow · commit 7ce3fa996554 · 2025-10-20T09:31:47.000-04:00
This is a follow up to #5293 In that PR there was no change to the warmup type, hence we currently too much warmup and CI is too slow.
diff --git a/benchmarks/triton_kernels_benchmark/flash_attention_benchmark.py b/benchmarks/triton_kernels_benchmark/flash_attention_benchmark.py
@@ -614,7 +614,7 @@ def benchmark(Z, H, N_CTX, D_HEAD, CAUSAL, MODE, provider):
                                                  err_msg=f'Error comparing {name} between triton and torch')
                 triton_fn = lambda: triton_o.backward(dout, retain_graph=True)
 
-            _, min_ms, max_ms, mean, cv = do_bench(triton_fn, grad_to_none=(q, k, v), time_warmup=False)
+            _, min_ms, max_ms, mean, cv = do_bench(triton_fn, grad_to_none=(q, k, v))
 
         elif provider == 'xetla':
             if MODE == 'bwd':
@@ -644,7 +644,7 @@ def xetla_bwd_fn():
                          bias_strideN, bias_strideF, attn_mask_padding)
                     return out
 
-                _, min_ms, max_ms, mean, cv = do_bench(xetla_bwd_fn, time_warmup=False)
+                _, min_ms, max_ms, mean, cv = do_bench(xetla_bwd_fn)
 
             else:
                 min_ms = float('nan')
@@ -664,7 +664,7 @@ def cutlass_fwd_fn():
 
                 benchmark_suite.assert_close(cutlass_fwd_fn, torch_fn, atol=atol, rtol=1e-3, err_msg='cutlass to torch')
 
-                _, min_ms, max_ms, mean, cv = do_bench(cutlass_fwd_fn, time_warmup=False)
+                _, min_ms, max_ms, mean, cv = do_bench(cutlass_fwd_fn)
 
             else:
                 min_ms = float('nan')
diff --git a/benchmarks/triton_kernels_benchmark/gemm_preop_exp_benchmark.py b/benchmarks/triton_kernels_benchmark/gemm_preop_exp_benchmark.py
@@ -277,7 +277,7 @@ def benchmark(B, M, N, K, provider):
         torch_fn = lambda: torch.matmul(torch.exp(a), b).to(torch.float32)
         rtol = 1e-2 if a.dtype == torch.bfloat16 else 1e-3
         benchmark_suite.assert_close(triton_fn, torch_fn, atol=1e-4, rtol=rtol, err_msg='triton to torch')
-        _, min_ms, max_ms, mean_ms, cv = do_bench(triton_fn, time_warmup=False)
+        _, min_ms, max_ms, mean_ms, cv = do_bench(triton_fn)
     else:
         raise NotImplementedError(f'Unsupported provider {provider}')