From f442955f1145671060c538405f172cefda3dfc6e Mon Sep 17 00:00:00 2001 From: AmdSampsa Date: Wed, 29 Oct 2025 14:01:05 +0000 Subject: [PATCH] red kernel config heuristics number of args fixed for 2D red kernels --- torch/_inductor/runtime/triton_heuristics.py | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py index a79d6cd41b7cc..fd689b5962c83 100644 --- a/torch/_inductor/runtime/triton_heuristics.py +++ b/torch/_inductor/runtime/triton_heuristics.py @@ -2586,15 +2586,6 @@ def _persistent_reduction_configs( ) # defer to more autotuning, initially - if "y" in size_hints: - pass - # TODO(jansel): we should be able to improve these heuristics - if not max_autotune_enabled: # Don't filter if tuning enabled - if reduction_hint == ReductionHint.INNER and rnumel >= 256: - configs = configs[:1] - elif reduction_hint == ReductionHint.OUTER: - configs = configs[-1:] - tiny_configs = [ triton_config_reduction( size_hints, @@ -2603,13 +2594,22 @@ def _persistent_reduction_configs( ) ] - if max_autotune_enabled: - for conf in tiny_configs: - if conf not in configs: - configs.append(conf) - elif reduction_hint == ReductionHint.OUTER_TINY: - configs = tiny_configs - + if "y" in size_hints: + pass + # TODO(jansel): we should be able to improve these heuristics + elif not max_autotune_enabled: # Don't filter if tuning enabled + if reduction_hint == ReductionHint.INNER and rnumel >= 256: + configs = configs[:1] + elif reduction_hint == ReductionHint.OUTER: + configs = configs[-1:] + elif reduction_hint == ReductionHint.OUTER_TINY: + configs = tiny_configs + else: + if max_autotune_enabled: + for conf in tiny_configs: + if conf not in configs: + configs.append(conf) + for c in configs: # we don't need Rn_BLOCK for persistent reduction for prefix in size_hints: