Increase the dynamo recompile limit number for the flex attention benchmark testing (#3819)

chengjunlu · web-flow · commit 6b8ee6d82e3d · 2025-04-07T09:37:54.000+08:00
The default number of Torch Dynamo recompile limit is 8. 
The benchmark test cases would fallback to eager mode kernel if the
number is larger than 8.

To increase the number to make sure all the flex attention kernel is
running on Triton kernel.
diff --git a/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py b/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_causal_mask.py
@@ -11,6 +11,8 @@
 import triton_kernels_benchmark as benchmark_suit
 from triton_kernels_benchmark import xetla_kernel
 
+torch._dynamo.config.recompile_limit = 100  # pylint: disable=protected-access
+
 # Compile the flex_attention function
 flex_attention = torch.compile(flex_attention, dynamic=False)
 
diff --git a/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_custom_masks.py b/benchmarks/triton_kernels_benchmark/flex_attention_benchmark_custom_masks.py
@@ -12,6 +12,8 @@
 
 import triton_kernels_benchmark as benchmark_suit
 
+torch._dynamo.config.recompile_limit = 100  # pylint: disable=protected-access
+
 # Compile the flex_attention function
 flex_attention = torch.compile(flex_attention, dynamic=False)