@@ -305,6 +305,7 @@ def mm(a, b):
305305 with config .patch ({"max_autotune" : True }):
306306 torch .compile (mm , dynamic = dynamic )(a , b )
307307
308+ @skipIfRocm
308309 def test_precompilation_threads (self ):
309310 import threading
310311 from typing import Any , Dict
@@ -480,6 +481,7 @@ def addmm(x, a, b):
480481 with config .patch ({"max_autotune" : True }):
481482 torch .compile (addmm , dynamic = dynamic )(x , a , b )
482483
484+ @skipIfRocm
483485 def test_autotune_conv1x1 (self ):
484486 # Assuming input has 3 channels and we want to produce 16 channels as output
485487 conv1x1 = (
@@ -510,6 +512,7 @@ def foo(mod, x):
510512 FileCheck ().check_not ("extern_kernels.convolution" ).run (code [0 ])
511513 self .assertEqual (conv1x1 (input_tensor ), out , atol = 1e-2 , rtol = 0 )
512514
515+ @skipIfRocm
513516 def test_filled_cache_precompile (self ):
514517 def fn (a , b , c ):
515518 a = (a @ b ) @ c
@@ -528,6 +531,7 @@ def fn(a, b, c):
528531 fn_c = torch .compile (mode = "max-autotune-no-cudagraphs" )(fn )
529532 self .assertEqual (counters ["inductor" ]["select_algorithm_precompile" ], 0 )
530533
534+ @skipIfRocm
531535 @fresh_inductor_cache ()
532536 @config .patch (search_autotune_cache = True )
533537 def test_search_autotune_cache (self ):
@@ -543,6 +547,7 @@ def fn(a, b, c):
543547 self .assertEqual (fn (* inputs ), fn_c (* inputs ), atol = 1e-2 , rtol = 1e-2 )
544548 self .assertEqual (counters ["inductor" ]["select_algorithm_precompile" ], 0 )
545549
550+ @skipIfRocm
546551 @fresh_inductor_cache ()
547552 @config .patch (max_autotune = True , max_fusion_size = 2 )
548553 def test_jit_fusion_matches_aot_fusion (self ):
@@ -985,6 +990,7 @@ def tearDown(self):
985990 super ().tearDown ()
986991 PatchCaches .tearDown ()
987992
993+ @skipIfRocm
988994 @parametrize ("dynamic" , (False , True ))
989995 def test_max_autotune_remote_caching (self , dynamic : bool ):
990996 from unittest .mock import patch
0 commit comments