[Inductor][Triton] Support TMA before strict 3.4 cutoff (pytorch#159777)

njriasan · pytorchmergebot · commit bbc0df1094b5 · 2025-08-05T03:29:13.000Z
Summary: Inductor's 3.4 Triton release is the most common used variant of Triton, but if someone is working with an alternative version of Triton this may not match. This moves the version check from 3.4 Triton to any variant that has support for the TMA APIs. Test Plan: Relying on CI. Should be a NFC. Rollback Plan: Reviewed By: davidberard98 Differential Revision: D79378792 Pull Request resolved: pytorch#159777 Approved by: https://github.com/davidberard98
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
@@ -26,7 +26,7 @@
 from torch._prims_common import is_integer_dtype
 from torch.utils._ordered_set import OrderedSet
 from torch.utils._sympy.functions import CeilDiv, FloorDiv, ModularIndexing
-from torch.utils._triton import has_triton_package
+from torch.utils._triton import has_triton_package, has_triton_stable_tma_api
 
 from ...utils._sympy.symbol import free_symbol_is_type, prefix_str, symbol_is_type, SymT
 from ...utils._sympy.value_ranges import ValueRanges
@@ -1692,14 +1692,12 @@ def __post_init__(self):
     def can_use_tma(
         self,
     ) -> bool:
-        import triton
-
         if not (
             V.graph.get_current_device_or_throw().type == "cuda"
             and torch.cuda.get_device_capability()[0] >= 9
             and config.triton.use_tensor_descriptor
             and config.assume_aligned_inputs
-            and triton.__version__ >= "3.4.0"
+            and has_triton_stable_tma_api()
             # For CUDA The base ptr needs to be aligned
         ):
             log.debug(