[release/2.8][ROCm][inductor] Improved fast_tanh code generation (#2803)

naromero77amd · web-flow · commit cba8b9d296cf · 2025-11-17T12:31:02.000-06:00
In the ROCm fork of PyTorch 2.8, Inductor currently has codegen support for fast_tanhf. However, there were some NaN issues in the original Triton implementation of fast_tanhf . Upstream Triton has an improved fast_tanhf where the NaN issues are now fixed. This upstream commit has been backported to ROCm fork of Triton (see code comments). A bump in the Triton commit is also needed. Other notes: - In support of [SWDEV-560271](https://ontrack-internal.amd.com/browse/SWDEV-560271) - Triton 3.4 backport of upstream Triton commit ROCm/triton#900 - Similar to #2802, #2804 - Related to pytorch#162052
diff --git a/.ci/docker/ci_commit_pins/triton.txt b/.ci/docker/ci_commit_pins/triton.txt
@@ -1 +1 @@
-21876a4bbaf371bcb83df8e6ee4f43a92f524dfe
+0cace8d2336a9dc399effbb11522eea7f7b8c0b2
diff --git a/torch/_inductor/codegen/triton.py b/torch/_inductor/codegen/triton.py
@@ -26,7 +26,7 @@
 from torch._prims_common import is_integer_dtype
 from torch.utils._ordered_set import OrderedSet
 from torch.utils._sympy.functions import CeilDiv, FloorDiv, ModularIndexing
-from torch.utils._triton import has_triton_package
+from torch.utils._triton import has_triton_package, get_triton_version
 
 from ...utils._sympy.symbol import free_symbol_is_type, prefix_str, symbol_is_type, SymT
 from ...utils._sympy.value_ranges import ValueRanges
@@ -1232,7 +1232,12 @@ def tan(x):
     @staticmethod
     @maybe_upcast_float32()
     def tanh(x):
-        return f"libdevice.fast_tanhf({x})"
+        if torch.version.hip and get_triton_version() > (3, 2):
+            # On ROCm, use fast_tanhf depending on Triton version
+            # Requires ROCm fork of Triton 3.3, 3.4, 3.5 or upstream Triton 3.6+
+            return f"libdevice.fast_tanhf({x})"
+        else:
+            return f"libdevice.tanh({x})"
 
     @staticmethod
     @maybe_upcast_float32()

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-21876a4bbaf371bcb83df8e6ee4f43a92f524dfe`
	`1`	`+0cace8d2336a9dc399effbb11522eea7f7b8c0b2`