Skip to content

Commit a0b4f86

Browse files
jeffdailyslojosic-amd
authored andcommitted
remove HIPBLASLT_ALLOW_TF32 os.environ manipulation from tests
1 parent 20e4bc9 commit a0b4f86

File tree

6 files changed

+42
-162
lines changed

6 files changed

+42
-162
lines changed

test/dynamo/test_graph_region_tracker.py

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -196,21 +196,6 @@ def fn(x, y, z):
196196
)
197197

198198
def test_mismatched_global_state(self):
199-
@contextlib.contextmanager
200-
def _hip_allow_tf32():
201-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
202-
# and only for MI300+
203-
hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
204-
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
205-
206-
try:
207-
yield
208-
finally:
209-
if hip_allow_tf32 is not None:
210-
os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
211-
else:
212-
del os.environ["HIPBLASLT_ALLOW_TF32"]
213-
214199
def inner_fn(x, y):
215200
x1 = x * 1
216201
y1 = y + 1
@@ -251,31 +236,29 @@ def set_default_dtype_bfloat16():
251236
def reset_default_dtype():
252237
torch.set_default_dtype(old_dtype)
253238

254-
tf32_ctx = _hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
255-
with tf32_ctx():
256-
for ctx in [
257-
lambda: torch.set_grad_enabled(False),
258-
torch.autograd.grad_mode.inference_mode,
259-
lambda: torch.autograd.graph.disable_saved_tensors_hooks(
260-
"This is not supported"
261-
),
262-
# lambda: torch.set_num_threads(2), : Unsupported
263-
(set_default_dtype_bfloat16, reset_default_dtype),
264-
(
265-
lambda: torch.use_deterministic_algorithms(True),
266-
lambda: torch.use_deterministic_algorithms(False),
267-
),
268-
# (lambda: torch.use_deterministic_algorithms(True, warn_only=True),
269-
# lambda: torch.use_deterministic_algorithms(False)), : Unsupported
270-
create_toggle_fns("allow_bf16_reduced_precision_reduction"),
271-
create_toggle_fns("allow_fp16_reduced_precision_reduction"),
272-
create_toggle_fns("allow_tf32"),
273-
]:
274-
self.assertExpectedInline(
275-
self.get_result(fn, torch.rand(10, 10), torch.ones(10, 20), ctx),
276-
"""[[['x1_2', 'y1_2', 'sum_3', 'o0'], ['x1_3', 'y1_3', 'sum_4', 'o2']], \
239+
for ctx in [
240+
lambda: torch.set_grad_enabled(False),
241+
torch.autograd.grad_mode.inference_mode,
242+
lambda: torch.autograd.graph.disable_saved_tensors_hooks(
243+
"This is not supported"
244+
),
245+
# lambda: torch.set_num_threads(2), : Unsupported
246+
(set_default_dtype_bfloat16, reset_default_dtype),
247+
(
248+
lambda: torch.use_deterministic_algorithms(True),
249+
lambda: torch.use_deterministic_algorithms(False),
250+
),
251+
# (lambda: torch.use_deterministic_algorithms(True, warn_only=True),
252+
# lambda: torch.use_deterministic_algorithms(False)), : Unsupported
253+
create_toggle_fns("allow_bf16_reduced_precision_reduction"),
254+
create_toggle_fns("allow_fp16_reduced_precision_reduction"),
255+
create_toggle_fns("allow_tf32"),
256+
]:
257+
self.assertExpectedInline(
258+
self.get_result(fn, torch.rand(10, 10), torch.ones(10, 20), ctx),
259+
"""[[['x1_2', 'y1_2', 'sum_3', 'o0'], ['x1_3', 'y1_3', 'sum_4', 'o2']], \
277260
[['x1', 'y1', 'sum_1', 'o4'], ['x1_1', 'y1_1', 'sum_2', 'o5']]]""",
278-
)
261+
)
279262

280263
def test_mutation_tracking_simple(self):
281264
def fn(x, y, z):

test/dynamo/test_misc.py

Lines changed: 18 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -8421,43 +8421,24 @@ def write_state(state):
84218421
def fn(x):
84228422
return x + 1
84238423

8424-
import contextlib
8425-
8426-
@contextlib.contextmanager
8427-
def _hip_allow_tf32():
8428-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
8429-
# and only for MI300+
8430-
hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
8431-
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
8432-
8433-
try:
8434-
yield
8435-
finally:
8436-
if hip_allow_tf32 is not None:
8437-
os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
8438-
else:
8439-
del os.environ["HIPBLASLT_ALLOW_TF32"]
8440-
8441-
tf32_ctx = _hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
8442-
with tf32_ctx():
8443-
initial_state = read_state()
8444-
y = torch.randn(10)
8445-
try:
8446-
for round in range(3):
8447-
for i in range(len(initial_state)):
8448-
new_state = [False] * len(initial_state)
8449-
new_state[i] = True
8450-
write_state(new_state)
8451-
assert read_state() == new_state
8452-
last_state.clear()
8453-
fn(y)
8454-
assert last_state == new_state
8455-
if round == 0:
8456-
assert cnt == i + 1
8457-
else:
8458-
assert cnt == len(initial_state)
8459-
finally:
8460-
write_state(initial_state)
8424+
initial_state = read_state()
8425+
y = torch.randn(10)
8426+
try:
8427+
for round in range(3):
8428+
for i in range(len(initial_state)):
8429+
new_state = [False] * len(initial_state)
8430+
new_state[i] = True
8431+
write_state(new_state)
8432+
assert read_state() == new_state
8433+
last_state.clear()
8434+
fn(y)
8435+
assert last_state == new_state
8436+
if round == 0:
8437+
assert cnt == i + 1
8438+
else:
8439+
assert cnt == len(initial_state)
8440+
finally:
8441+
write_state(initial_state)
84618442

84628443
def test_grad_state_mutated(self):
84638444
prior = torch.is_grad_enabled()

test/inductor/test_flex_decoding.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@
4343

4444

4545
Tolerances = namedtuple("Tolerances", ["atol", "rtol"])
46-
# In MI300, HIPBLASLT_ALLOW_TF32=1 is used to enable tf32 for matmul.
47-
# In the current test, HIPBLASLT_ALLOW_TF32 is not set, according to the
48-
# logic of allowTF32CuBLAS(), set float32_matmul_precision to highest.
4946
if torch.version.hip:
5047
torch.set_float32_matmul_precision("highest")
5148
else:

test/inductor/test_padding.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,6 @@ def setUpClass(cls):
109109
if HAS_GPU:
110110
cls.prior_float32_matmul_precision = torch.get_float32_matmul_precision()
111111
cls.prior_default_device = torch.get_default_device()
112-
# In MI300, HIPBLASLT_ALLOW_TF32=1 is used to enable tf32 for matmul.
113-
# In the current test, HIPBLASLT_ALLOW_TF32 is not set, according to the
114-
# logic of allowTF32CuBLAS(), set float32_matmul_precision to highest.
115112
if torch.version.hip:
116113
torch.set_float32_matmul_precision("highest")
117114
else:

test/test_cuda.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -759,53 +759,7 @@ def check_workspace_size(inp):
759759

760760
torch._C._cuda_clearCublasWorkspaces()
761761

762-
@contextlib.contextmanager
763-
def _hip_allow_tf32(self):
764-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
765-
# and only for MI300+
766-
hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
767-
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
768-
769-
try:
770-
yield
771-
finally:
772-
if hip_allow_tf32 is not None:
773-
os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
774-
else:
775-
del os.environ["HIPBLASLT_ALLOW_TF32"]
776-
777-
@unittest.skipIf(not TEST_WITH_ROCM, "not relevant for CUDA testing")
778-
def test_hipblaslt_allow_tf32(self):
779-
tf32_ctx = self._hip_allow_tf32
780-
with tf32_ctx():
781-
os.environ["HIPBLASLT_ALLOW_TF32"] = "0"
782-
# Save original value of allow_tf32
783-
orig = torch.backends.cuda.matmul.allow_tf32
784-
# If allow_tf32 variable is declared as static in aten/src/ATen/Context.cpp
785-
# then matmul.allow_tf32 will return False after this point even if
786-
# HIP_BLASLT_ALLOW_TF32 is set to 1 and matmul.allow_tf32 is changed.
787-
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
788-
# Toggle torch.backends.cuda.matmul.allow_tf32 couple of times.
789-
torch.backends.cuda.matmul.allow_tf32 = not orig
790-
test1 = torch.backends.cuda.matmul.allow_tf32
791-
torch.backends.cuda.matmul.allow_tf32 = orig
792-
test2 = torch.backends.cuda.matmul.allow_tf32
793-
self.assertNotEqual(test1, test2)
794-
# Restore original value of allow_tf32
795-
torch.backends.cuda.matmul.allow_tf32 = orig
796-
797762
def test_cublas_allow_tf32_get_set(self):
798-
"""
799-
We only turn on TF32 for MI300 with a special env var. This is because TF32
800-
is only available in MI300+ and is in experimental mode (hipblaslt support
801-
is current WIP)
802-
"""
803-
tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
804-
805-
with tf32_ctx():
806-
self._test_cublas_allow_tf32_get_set_inner()
807-
808-
def _test_cublas_allow_tf32_get_set_inner(self):
809763
skip_tf32_cublas = "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE" in os.environ and int(
810764
os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"]
811765
)
@@ -820,12 +774,6 @@ def _test_cublas_allow_tf32_get_set_inner(self):
820774
torch.backends.cuda.matmul.allow_tf32 = orig
821775

822776
def test_float32_matmul_precision_get_set(self):
823-
tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
824-
825-
with tf32_ctx():
826-
self._test_float32_matmul_precision_get_set_inner()
827-
828-
def _test_float32_matmul_precision_get_set_inner(self):
829777
orig = torch.get_float32_matmul_precision()
830778
skip_tf32_cublas = "TORCH_ALLOW_TF32_CUBLAS_OVERRIDE" in os.environ and int(
831779
os.environ["TORCH_ALLOW_TF32_CUBLAS_OVERRIDE"]

test/test_linalg.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -109,22 +109,6 @@ def get_tunableop_untuned_filename():
109109
return untuned_filename
110110

111111
class TestLinalg(TestCase):
112-
@contextlib.contextmanager
113-
def _hip_allow_tf32(self):
114-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
115-
# and only for MI300+. Environment variable will be removed in the future.
116-
import os
117-
hip_allow_tf32 = os.environ.get("HIPBLASLT_ALLOW_TF32", None)
118-
os.environ["HIPBLASLT_ALLOW_TF32"] = "1"
119-
120-
try:
121-
yield
122-
finally:
123-
if hip_allow_tf32 is not None:
124-
os.environ["HIPBLASLT_ALLOW_TF32"] = hip_allow_tf32
125-
else:
126-
del os.environ["HIPBLASLT_ALLOW_TF32"]
127-
128112
def setUp(self):
129113
super().setUp()
130114
torch.backends.cuda.matmul.allow_tf32 = False
@@ -5542,13 +5526,8 @@ def test_scaled_gemm_tunableop(self, device, dtype):
55425526
@runOnRocmArch(MI300_ARCH)
55435527
@dtypes(torch.float)
55445528
def test_tf32_tunableop(self, device, dtype):
5545-
# Test TunableOp with TF32. Supported by hipblasLT on MI300+.
5546-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
5547-
# and only for MI300+. Eventually this flag will go away.
5548-
tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
5549-
55505529
try:
5551-
with self._tunableop_ctx(), tf32_ctx():
5530+
with self._tunableop_ctx():
55525531
torch.backends.cuda.matmul.allow_tf32 = True
55535532
torch.cuda.tunable.set_rotating_buffer_size(0)
55545533

@@ -5611,13 +5590,8 @@ def test_tf32_offline_tunableop(self, device, dtype):
56115590
# This test is the offline version of test_tf32_tunableop
56125591
import os
56135592

5614-
# Test TunableOp with TF32. Supported by hipblasLT on MI300+.
5615-
# for HIP/AMDGPU, tf32 is behind a flag because the TF32 support is new
5616-
# and only for MI300+. Eventually this flag will go away.
5617-
tf32_ctx = self._hip_allow_tf32 if torch.version.hip else contextlib.nullcontext
5618-
56195593
try:
5620-
with self._tunableop_ctx(), tf32_ctx():
5594+
with self._tunableop_ctx():
56215595
torch.backends.cuda.matmul.allow_tf32 = True
56225596
ordinal = torch.cuda.current_device()
56235597
torch.cuda.tunable.set_rotating_buffer_size(0)

0 commit comments

Comments
 (0)