From fdf61977af3e234d006f65a8415472cfd70b69a4 Mon Sep 17 00:00:00 2001 From: Jack Taylor <108682042+jataylo@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:38:26 +0100 Subject: [PATCH] Bug fix and optimisation for persistent reduction kernel tuning Original PR had incorrect indentation. Updated PR such that autotune will always add tiny configs, otherwise use the hinted configs only. --- torch/_inductor/runtime/triton_heuristics.py | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/torch/_inductor/runtime/triton_heuristics.py b/torch/_inductor/runtime/triton_heuristics.py index 2ea6a2d467a67..a79d6cd41b7cc 100644 --- a/torch/_inductor/runtime/triton_heuristics.py +++ b/torch/_inductor/runtime/triton_heuristics.py @@ -2595,20 +2595,20 @@ def _persistent_reduction_configs( elif reduction_hint == ReductionHint.OUTER: configs = configs[-1:] - if reduction_hint == ReductionHint.OUTER_TINY: - tiny_configs = [ - triton_config_reduction( - size_hints, - 2 * (256 // rnumel) if rnumel <= 256 else 1, - rnumel, - ) - ] - if max_autotune_enabled: - for tconfig in tiny_configs: - if tconfig not in configs: - configs.append(tconfig) - else: - configs = tiny_configs + tiny_configs = [ + triton_config_reduction( + size_hints, + 2 * (256 // rnumel) if rnumel <= 256 else 1, + rnumel, + ) + ] + + if max_autotune_enabled: + for conf in tiny_configs: + if conf not in configs: + configs.append(conf) + elif reduction_hint == ReductionHint.OUTER_TINY: + configs = tiny_configs for c in configs: # we don't need Rn_BLOCK for persistent reduction