Skip to content

Commit e05d677

Browse files
iupaikov-amdjeffdaily
authored andcommitted
Unskipped multiple inductor tests for ROCm (pytorch#143581)
All of them should be fine to run now after the triton fix. Pull Request resolved: pytorch#143581 Approved by: https://github.com/jataylo, https://github.com/jeffdaily Co-authored-by: Jeff Daily <[email protected]>
1 parent 28b4992 commit e05d677

File tree

7 files changed

+2
-32
lines changed

7 files changed

+2
-32
lines changed

test/inductor/test_flex_decoding.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1350,7 +1350,6 @@ def bias_mod(score, batch, head, token_q, token_kv):
13501350
self.run_test(bias_mod)
13511351
self.run_test_with_paged_attention(bias_mod)
13521352

1353-
@skipIfRocm
13541353
@supported_platform
13551354
def test_fully_masked_out_rows_0_check_gqa(self):
13561355
# Ensure fully masked out rows won't cause NaNs.

test/inductor/test_inductor_freezing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -712,7 +712,6 @@ def foo(mod, inp):
712712
self.assertEqual(eager, compiled)
713713
self.assertTrue(weight_ref() is None)
714714

715-
@skipIfRocm
716715
def test_conv_with_as_strided(self):
717716
class Model(nn.Module):
718717
def __init__(self, groups):

test/inductor/test_max_autotune.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,6 @@ def mm(a, b):
305305
with config.patch({"max_autotune": True}):
306306
torch.compile(mm, dynamic=dynamic)(a, b)
307307

308-
@skipIfRocm
309308
def test_precompilation_threads(self):
310309
import threading
311310
from typing import Any, Dict
@@ -481,7 +480,6 @@ def addmm(x, a, b):
481480
with config.patch({"max_autotune": True}):
482481
torch.compile(addmm, dynamic=dynamic)(x, a, b)
483482

484-
@skipIfRocm
485483
def test_autotune_conv1x1(self):
486484
# Assuming input has 3 channels and we want to produce 16 channels as output
487485
conv1x1 = (
@@ -512,7 +510,6 @@ def foo(mod, x):
512510
FileCheck().check_not("extern_kernels.convolution").run(code[0])
513511
self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0)
514512

515-
@skipIfRocm
516513
def test_filled_cache_precompile(self):
517514
def fn(a, b, c):
518515
a = (a @ b) @ c
@@ -531,7 +528,6 @@ def fn(a, b, c):
531528
fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn)
532529
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
533530

534-
@skipIfRocm
535531
@fresh_inductor_cache()
536532
@config.patch(search_autotune_cache=True)
537533
def test_search_autotune_cache(self):
@@ -547,7 +543,6 @@ def fn(a, b, c):
547543
self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2)
548544
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
549545

550-
@skipIfRocm
551546
@fresh_inductor_cache()
552547
@config.patch(max_autotune=True, max_fusion_size=2)
553548
def test_jit_fusion_matches_aot_fusion(self):
@@ -990,7 +985,6 @@ def tearDown(self):
990985
super().tearDown()
991986
PatchCaches.tearDown()
992987

993-
@skipIfRocm
994988
@parametrize("dynamic", (False, True))
995989
def test_max_autotune_remote_caching(self, dynamic: bool):
996990
from unittest.mock import patch

test/inductor/test_memory_planning.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,7 @@
33
import sys
44
import unittest
55

6-
from torch.testing._internal.common_utils import (
7-
IS_CI,
8-
IS_WINDOWS,
9-
skipIfRocm,
10-
skipIfXpu,
11-
)
6+
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu
127
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu
138

149

@@ -83,7 +78,6 @@ def test_cpp_wrapper(self):
8378
)
8479
self.assertTrue(same(f(*args), result))
8580

86-
@skipIfRocm(msg="test_aot_inductor doesn't work on ROCm")
8781
@skipIfXpu(msg="aoti doesn't work on XPU")
8882
def test_aoti(self):
8983
try:

test/inductor/test_pattern_matcher.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ def _test_fused_int_mm_mul_impl(self, fn, args, fused_int_mm_mul_expected=True):
143143
ref[indices], test[indices]
144144
) # also checks that dtype is correct
145145

146-
@skipIfRocm
147146
@skipIfXpu
148147
@skipCUDAIf(not SM80OrLater, "need sm_80")
149148
@inductor_config.patch(force_fuse_int_mm_with_mul=True)
@@ -237,7 +236,6 @@ def f_replaced(x: torch.Tensor) -> torch.Tensor:
237236
self.assertEqual(f(inp), f_replaced(inp))
238237
self.assertEqual(count, 2)
239238

240-
@skipIfRocm
241239
@skipIfXpu
242240
@skipCUDAIf(not SM80OrLater, "need sm_80")
243241
@inductor_config.patch(force_fuse_int_mm_with_mul=True)

test/inductor/test_select_algorithm.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,6 @@ def foo(a, b):
112112
)
113113
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
114114

115-
# FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm
116-
@skipIfRocm
117115
@patches
118116
def test__int_mm(self):
119117
@torch.compile
@@ -296,7 +294,6 @@ def fn(x1, x2, seed):
296294
)
297295
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
298296

299-
@skipIfRocm
300297
@patches
301298
@torch._inductor.config.patch(conv_1x1_as_mm=False)
302299
def test_convolution2(self):

test/inductor/test_triton_kernels.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,7 @@
1919
from torch._library import capture_triton
2020
from torch.testing import FileCheck
2121
from torch.testing._internal import common_utils
22-
from torch.testing._internal.common_utils import (
23-
parametrize,
24-
skipIfRocm,
25-
skipIfXpu,
26-
TEST_WITH_ROCM,
27-
)
22+
from torch.testing._internal.common_utils import parametrize, skipIfXpu, TEST_WITH_ROCM
2823
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU
2924
from torch.testing._internal.logging_utils import logs_to_string
3025

@@ -550,7 +545,6 @@ def call_triton(output):
550545
call_triton(output)
551546

552547
@requires_gpu
553-
@skipIfRocm
554548
def test_triton_kernel_dependancies(self):
555549
def call_triton(
556550
x: torch.Tensor,
@@ -669,7 +663,6 @@ def call_triton(
669663

670664
@requires_gpu
671665
@skipIfXpu
672-
@skipIfRocm
673666
def test_triton_kernel_constants(self):
674667
@triton.jit
675668
def mulC_kernel(
@@ -754,7 +747,6 @@ def grid_fn(meta):
754747
self.assertEqual(compiled_func(t1, t2, output2), torch_add)
755748

756749
@requires_gpu
757-
@skipIfRocm # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431
758750
@common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"])
759751
@patch.object(
760752
torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True
@@ -1304,7 +1296,6 @@ def f(x, y):
13041296
self.assertEqual(compiled_out, eager_out)
13051297

13061298
@requires_gpu
1307-
@skipIfRocm
13081299
def test_triton_kernel_with_imported_symbol(self):
13091300
@triton.jit
13101301
def add_kernel_with_imported_symbol(
@@ -1336,7 +1327,6 @@ def f(x):
13361327
self.assertEqual(compiled_out, eager_out)
13371328

13381329
@requires_gpu
1339-
@skipIfRocm
13401330
def test_triton_kernel_with_imported_symbol_with_custom_name(self):
13411331
@triton.jit
13421332
def add_kernel_with_imported_symbol(
@@ -2434,7 +2424,6 @@ def argmax_kernel(a_ptr, c_ptr, stride_am, stride_an):
24342424
)
24352425

24362426
@requires_gpu
2437-
@skipIfRocm
24382427
def test_triton_kernel_inference_mode(self):
24392428
def f(x, y, out):
24402429
n_elements = x.numel()

0 commit comments

Comments
 (0)