ROCm: Enable tf32 testing on test_nn (#55)

jagadish-amd · pragupta · commit c1c3a9125089 · 2025-10-15T02:01:06.000Z
* Add trailing comma for consistency in gfx architecture list Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com> * ROCm: Enable tf32 testing on test_nn Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com> --------- Signed-off-by: Jagadish Krishnamoorthy <jagadish.krishnamoorthy@amd.com> (cherry picked from commit c113e14)
diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py
@@ -180,6 +180,9 @@ def tf32_off():
 
 @contextlib.contextmanager
 def tf32_on(self, tf32_precision=1e-5):
+    if torch.version.hip:
+        hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
+        os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
     old_allow_tf32_matmul = torch.backends.cuda.matmul.allow_tf32
     old_precision = self.precision
     try:
@@ -188,6 +191,11 @@ def tf32_on(self, tf32_precision=1e-5):
         with torch.backends.cudnn.flags(enabled=None, benchmark=None, deterministic=None, allow_tf32=True):
             yield
     finally:
+        if torch.version.hip:
+            if hip_allow_tf32 is not None:
+                os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
+            else:
+                del os.environ["HIPBLASLT_ALLOW_TF32"]
         torch.backends.cuda.matmul.allow_tf32 = old_allow_tf32_matmul
         self.precision = old_precision