[release/2.7] Fix for flex attention tuning (#2589)

jataylo · web-flow · commit eb99fa11418b · 2025-09-02T11:28:58.000-05:00
Bug fix after #2392 landed Issue caused from bad merge conflict resolution, resulting in the code using an outdated API. > torch._inductor.exc.LoweringException: NameError: name '_get_default_config_bwd' is not defined target: flex_attention_backward Models now run to completion
diff --git a/torch/_inductor/kernel/flex_attention.py b/torch/_inductor/kernel/flex_attention.py
@@ -2476,20 +2476,6 @@ def flex_attention_backward(*args, **kwargs):
     SPARSE_KV_BLOCK_SIZE = V.graph.sizevars.evaluate_static_shape(SPARSE_KV_BLOCK_SIZE)
 
     choices: list[Any] = []
-    configs: list[tuple[int, int, int, int]] = []
-    configs.append(_get_default_config_bwd(query))
-    if config.max_autotune:
-        num_stages_list = [1, 3, 4, 5] if torch.version.hip is None else [1]
-        configs.extend(
-            [
-                (BLOCK1, BLOCK2, w, s)
-                for BLOCK1 in [32, 64]
-                for BLOCK2 in [32, 64, 128]
-                for w in ([4, 8] if BLOCK1 >= 128 or BLOCK2 >= 128 else [4])
-                for s in num_stages_list
-                if BLOCK2 % BLOCK1 == 0
-            ]
-        )
 
     dtype = query.get_dtype()
     head_dim = V.graph.sizevars.evaluate_static_shape(query.get_size()[-1])