Skip to content

Commit 7d9f26d

Browse files
Revert "Unskipped multiple inductor tests for ROCm (pytorch#143581)"
This reverts commit e05d677. Reverted pytorch#143581 on behalf of https://github.com/huydhn due to There is some tests failing on ROCm jobs in trunk ([comment](pytorch#143581 (comment)))
1 parent aaf5615 commit 7d9f26d

File tree

7 files changed

+32
-2
lines changed

7 files changed

+32
-2
lines changed

test/inductor/test_flex_decoding.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,7 @@ def bias_mod(score, batch, head, token_q, token_kv):
13501350
self.run_test(bias_mod)
13511351
self.run_test_with_paged_attention(bias_mod)
13521352

1353+
@skipIfRocm
13531354
@supported_platform
13541355
def test_fully_masked_out_rows_0_check_gqa(self):
13551356
# Ensure fully masked out rows won't cause NaNs.

test/inductor/test_inductor_freezing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,7 @@ def foo(mod, inp):
712712
self.assertEqual(eager, compiled)
713713
self.assertTrue(weight_ref() is None)
714714

715+
@skipIfRocm
715716
def test_conv_with_as_strided(self):
716717
class Model(nn.Module):
717718
def __init__(self, groups):

test/inductor/test_max_autotune.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ def mm(a, b):
305305
with config.patch({"max_autotune": True}):
306306
torch.compile(mm, dynamic=dynamic)(a, b)
307307

308+
@skipIfRocm
308309
def test_precompilation_threads(self):
309310
import threading
310311
from typing import Any, Dict
@@ -480,6 +481,7 @@ def addmm(x, a, b):
480481
with config.patch({"max_autotune": True}):
481482
torch.compile(addmm, dynamic=dynamic)(x, a, b)
482483

484+
@skipIfRocm
483485
def test_autotune_conv1x1(self):
484486
# Assuming input has 3 channels and we want to produce 16 channels as output
485487
conv1x1 = (
@@ -510,6 +512,7 @@ def foo(mod, x):
510512
FileCheck().check_not("extern_kernels.convolution").run(code[0])
511513
self.assertEqual(conv1x1(input_tensor), out, atol=1e-2, rtol=0)
512514

515+
@skipIfRocm
513516
def test_filled_cache_precompile(self):
514517
def fn(a, b, c):
515518
a = (a @ b) @ c
@@ -528,6 +531,7 @@ def fn(a, b, c):
528531
fn_c = torch.compile(mode="max-autotune-no-cudagraphs")(fn)
529532
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
530533

534+
@skipIfRocm
531535
@fresh_inductor_cache()
532536
@config.patch(search_autotune_cache=True)
533537
def test_search_autotune_cache(self):
@@ -543,6 +547,7 @@ def fn(a, b, c):
543547
self.assertEqual(fn(*inputs), fn_c(*inputs), atol=1e-2, rtol=1e-2)
544548
self.assertEqual(counters["inductor"]["select_algorithm_precompile"], 0)
545549

550+
@skipIfRocm
546551
@fresh_inductor_cache()
547552
@config.patch(max_autotune=True, max_fusion_size=2)
548553
def test_jit_fusion_matches_aot_fusion(self):
@@ -985,6 +990,7 @@ def tearDown(self):
985990
super().tearDown()
986991
PatchCaches.tearDown()
987992

993+
@skipIfRocm
988994
@parametrize("dynamic", (False, True))
989995
def test_max_autotune_remote_caching(self, dynamic: bool):
990996
from unittest.mock import patch

test/inductor/test_memory_planning.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,12 @@
33
import sys
44
import unittest
55

6-
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfXpu
6+
from torch.testing._internal.common_utils import (
7+
IS_CI,
8+
IS_WINDOWS,
9+
skipIfRocm,
10+
skipIfXpu,
11+
)
712
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU, requires_gpu
813

914

@@ -78,6 +83,7 @@ def test_cpp_wrapper(self):
7883
)
7984
self.assertTrue(same(f(*args), result))
8085

86+
@skipIfRocm(msg="test_aot_inductor doesn't work on ROCm")
8187
@skipIfXpu(msg="aoti doesn't work on XPU")
8288
def test_aoti(self):
8389
try:

test/inductor/test_pattern_matcher.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def _test_fused_int_mm_mul_impl(self, fn, args, fused_int_mm_mul_expected=True):
143143
ref[indices], test[indices]
144144
) # also checks that dtype is correct
145145

146+
@skipIfRocm
146147
@skipIfXpu
147148
@skipCUDAIf(not SM80OrLater, "need sm_80")
148149
@inductor_config.patch(force_fuse_int_mm_with_mul=True)
@@ -236,6 +237,7 @@ def f_replaced(x: torch.Tensor) -> torch.Tensor:
236237
self.assertEqual(f(inp), f_replaced(inp))
237238
self.assertEqual(count, 2)
238239

240+
@skipIfRocm
239241
@skipIfXpu
240242
@skipCUDAIf(not SM80OrLater, "need sm_80")
241243
@inductor_config.patch(force_fuse_int_mm_with_mul=True)

test/inductor/test_select_algorithm.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ def foo(a, b):
112112
)
113113
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
114114

115+
# FIXME: Investigate why _int_mm_out_cuda is not compiled on ROCm
116+
@skipIfRocm
115117
@patches
116118
def test__int_mm(self):
117119
@torch.compile
@@ -294,6 +296,7 @@ def fn(x1, x2, seed):
294296
)
295297
self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1)
296298

299+
@skipIfRocm
297300
@patches
298301
@torch._inductor.config.patch(conv_1x1_as_mm=False)
299302
def test_convolution2(self):

test/inductor/test_triton_kernels.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@
1919
from torch._library import capture_triton
2020
from torch.testing import FileCheck
2121
from torch.testing._internal import common_utils
22-
from torch.testing._internal.common_utils import parametrize, skipIfXpu, TEST_WITH_ROCM
22+
from torch.testing._internal.common_utils import (
23+
parametrize,
24+
skipIfRocm,
25+
skipIfXpu,
26+
TEST_WITH_ROCM,
27+
)
2328
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU
2429
from torch.testing._internal.logging_utils import logs_to_string
2530

@@ -545,6 +550,7 @@ def call_triton(output):
545550
call_triton(output)
546551

547552
@requires_gpu
553+
@skipIfRocm
548554
def test_triton_kernel_dependancies(self):
549555
def call_triton(
550556
x: torch.Tensor,
@@ -663,6 +669,7 @@ def call_triton(
663669

664670
@requires_gpu
665671
@skipIfXpu
672+
@skipIfRocm
666673
def test_triton_kernel_constants(self):
667674
@triton.jit
668675
def mulC_kernel(
@@ -747,6 +754,7 @@ def grid_fn(meta):
747754
self.assertEqual(compiled_func(t1, t2, output2), torch_add)
748755

749756
@requires_gpu
757+
@skipIfRocm # https://github.com/pytorch/pytorch/actions/runs/10051552819/job/27782048305?pr=131431
750758
@common_utils.parametrize("backend", ["eager", "aot_eager", "inductor"])
751759
@patch.object(
752760
torch._inductor.config, "unsafe_ignore_unsupported_triton_autotune_args", True
@@ -1296,6 +1304,7 @@ def f(x, y):
12961304
self.assertEqual(compiled_out, eager_out)
12971305

12981306
@requires_gpu
1307+
@skipIfRocm
12991308
def test_triton_kernel_with_imported_symbol(self):
13001309
@triton.jit
13011310
def add_kernel_with_imported_symbol(
@@ -1327,6 +1336,7 @@ def f(x):
13271336
self.assertEqual(compiled_out, eager_out)
13281337

13291338
@requires_gpu
1339+
@skipIfRocm
13301340
def test_triton_kernel_with_imported_symbol_with_custom_name(self):
13311341
@triton.jit
13321342
def add_kernel_with_imported_symbol(
@@ -2424,6 +2434,7 @@ def argmax_kernel(a_ptr, c_ptr, stride_am, stride_an):
24242434
)
24252435

24262436
@requires_gpu
2437+
@skipIfRocm
24272438
def test_triton_kernel_inference_mode(self):
24282439
def f(x, y, out):
24292440
n_elements = x.numel()

0 commit comments

Comments
 (0)