3030 skipCUDAIfMiopen ,
3131 skipCUDAIfNoCudnn ,
3232 skipCUDAIfNoMiopen ,
33- skipCUDAIfNotMiopenSuggestNHWC ,
3433 skipCUDAIfRocm ,
3534 skipMeta ,
3635 skipMPS ,
5150 parametrize as parametrize_test ,
5251 run_tests ,
5352 set_default_dtype ,
54- skipIfNotMiopenSuggestNHWC ,
55- skipIfRocmVersionLessThan ,
5653 subtest ,
5754 TEST_SCIPY ,
5855 TEST_WITH_ROCM ,
6461
6562if TEST_WITH_ROCM :
6663 os .environ ["PYTORCH_MIOPEN_SUGGEST_NHWC" ] = "1"
64+ os .environ ["PYTORCH_MIOPEN_SUGGEST_NHWC_BATCHNORM" ] = "1"
6765
6866
6967if TEST_SCIPY :
@@ -715,7 +713,6 @@ def test_ConvTranspose2d_half_cublas_gemm(self):
715713 # Almost identical to the above `test_Conv2d_naive_groups`
716714 @torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
717715 @tf32_on_and_off (0.001 )
718- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
719716 def test_Conv2d_groups_nobias (self ):
720717 dev_dtypes = [("cpu" , torch .float )]
721718 if TEST_CUDA :
@@ -761,7 +758,6 @@ def test_Conv2d_groups_nobias(self):
761758 # and https://github.com/pytorch/pytorch/pull/18463#issuecomment-477001024
762759 @torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
763760 @tf32_on_and_off (0.001 )
764- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
765761 def test_Conv2d_groups_nobias_v2 (self ):
766762 torch .manual_seed (123 )
767763 dev_dtypes = [("cpu" , torch .float )]
@@ -896,7 +892,6 @@ def test_conv_tbc(self):
896892
897893 @unittest .skipIf (not TEST_CUDA , "CUDA unavailable" )
898894 @unittest .skipIf (not TEST_CUDNN , "needs cudnn" )
899- @skipIfNotMiopenSuggestNHWC
900895 def test_grouped_conv_cudnn_nhwc_support (self ):
901896 # in order to catch the hols in grouped convolution in nhwc support for earlier cudnn version
902897 input = torch .randn ((16 , 16 , 8 , 8 ), dtype = torch .float16 , device = "cuda" ).to (
@@ -3145,7 +3140,6 @@ def test_conv_noncontig_weights_and_bias(self, device):
31453140
31463141 @onlyCUDA
31473142 @largeTensorTest ("12GB" )
3148- @skipIfRocmVersionLessThan ((6 , 0 ))
31493143 def test_conv_transposed_large (self , device ):
31503144 dtype = torch .half if self .device_type == "cuda" else torch .float
31513145 conv = nn .ConvTranspose2d (1 , 1 , 1 , 1 , bias = False ).to (device ).to (dtype )
@@ -3189,7 +3183,6 @@ def test_conv_transposed_large(self, device):
31893183 self .assertEqual (maxdiff3 , 0 )
31903184
31913185 @onlyCUDA
3192- @skipCUDAIfRocm
31933186 @largeTensorTest ("12GB" )
31943187 def test_conv_large (self , device ):
31953188 dtype = torch .half if self .device_type == "cuda" else torch .float
@@ -3222,7 +3215,6 @@ def test_conv_large(self, device):
32223215 self .assertEqual (grad1 , grad2 , atol = 5e-2 , rtol = 5e-3 )
32233216
32243217 @onlyCUDA
3225- @skipCUDAIfRocm
32263218 @largeTensorTest ("20GB" , "cpu" )
32273219 @largeTensorTest ("60GB" , "cuda" )
32283220 def test_conv_large_batch_1 (self , device ):
@@ -3370,7 +3362,6 @@ def test_ConvTranspose3d_size_1_kernel(self, device):
33703362 @dtypes (torch .float )
33713363 @torch .backends .cudnn .flags (enabled = True , deterministic = True , benchmark = False )
33723364 @tf32_on_and_off (0.001 )
3373- @unittest .skipIf (TEST_WITH_ROCM , "Skipped on ROCm, since it is failing on ROCm 5.7" )
33743365 def test_Conv2d_naive_groups (self , device , dtype ):
33753366 # Check that grouped convolutions matches two half convolutions
33763367 m = nn .Conv2d (4 , 4 , kernel_size = 3 , groups = 2 ).to (device , dtype )
@@ -3639,19 +3630,21 @@ def helper(
36393630 )
36403631
36413632 @onlyCUDA
3642- @skipCUDAIfNotMiopenSuggestNHWC
36433633 @dtypes (torch .half , torch .float , torch .cfloat )
36443634 def test_conv_cudnn_nhwc (self , device , dtype ):
36453635 def helper (n , c , h , w , out_channels , kernel_size , groups ):
3646- input = torch .randint (- 3 , 3 , (n , c , h , w ), dtype = dtype , device = device ).to (
3647- memory_format = torch .channels_last
3648- )
3636+ # randint with dtype=torch.cfloat fails with
3637+ # RuntimeError: check_random_bounds handles only integral, floating-point and boolean types
3638+ # must create randint and randint_like using default int64, then cast to desired
3639+ input = torch .randint (
3640+ - 3 , 3 , (n , c , h , w ), dtype = torch .int64 , device = device
3641+ ).to (dtype , memory_format = torch .channels_last )
36493642 input .requires_grad_ ()
36503643 conv = nn .Conv2d (c , out_channels , kernel_size , groups = groups ).to (
36513644 device = "cuda" , dtype = dtype , memory_format = torch .channels_last
36523645 )
36533646 for p in conv .parameters ():
3654- p .data = torch .randint_like (p , - 3 , 3 )
3647+ p .data = torch .randint_like (p , - 3 , 3 , dtype = torch . int64 ). to ( p . dtype )
36553648
36563649 # use FP64 channels-first conv as reference
36573650 ref_input = input .detach ().clone ().contiguous ().double ().requires_grad_ ()
@@ -3665,7 +3658,7 @@ def helper(n, c, h, w, out_channels, kernel_size, groups):
36653658 out = conv (input )
36663659 ref_out = ref_conv (ref_input )
36673660
3668- grad = torch .randint_like (out , - 3 , 3 )
3661+ grad = torch .randint_like (out , - 3 , 3 , dtype = torch . int64 ). to ( out . dtype )
36693662 ref_grad = grad .detach ().clone ().double ().contiguous ()
36703663
36713664 out .backward (grad )
@@ -3692,7 +3685,6 @@ def helper(n, c, h, w, out_channels, kernel_size, groups):
36923685 helper (1 , 16 , 56 , 56 , out_channels = 16 , kernel_size = 3 , groups = 16 )
36933686
36943687 @onlyCUDA
3695- @skipCUDAIfRocm
36963688 @dtypes (torch .half , torch .float )
36973689 def test_conv_cudnn_ndhwc (self , device , dtype ):
36983690 def helper (n , c , d , h , w , out_channels , kernel_size , groups ):
@@ -3822,7 +3814,6 @@ def _test_conv_cudnn_nhwc_nchw(self, layer, n, c, h, w, k, filter_size, device):
38223814 )
38233815
38243816 @onlyCUDA
3825- @skipCUDAIfNotMiopenSuggestNHWC
38263817 @tf32_on_and_off (0.05 )
38273818 def test_conv_cudnn_mismatch_memory_format (self , device ):
38283819 configs = [
@@ -3955,7 +3946,6 @@ def test_cudnn_convolution_add_relu(self, device, dtype):
39553946 self .assertEqual (F .relu (conv2d_out + alpha * z ), cudnn_out )
39563947
39573948 @onlyCUDA
3958- @skipCUDAIfRocm
39593949 def test_convert_conv2d_weight_memory_format (self , device ):
39603950 input = torch .randint (1 , 10 , (2 , 8 , 4 , 4 ), dtype = torch .float32 , device = device )
39613951 model = nn .Sequential (nn .Conv2d (8 , 4 , 3 ), nn .BatchNorm2d (4 )).to (device ).float ()
@@ -3975,7 +3965,6 @@ def test_convert_conv2d_weight_memory_format(self, device):
39753965 self .assertTrue (out .is_contiguous (memory_format = memory_format ))
39763966
39773967 @onlyCUDA
3978- @skipCUDAIfRocm
39793968 def test_convert_conv3d_weight_memory_format (self , device ):
39803969 input = torch .randint (
39813970 1 , 10 , (2 , 8 , 4 , 4 , 4 ), dtype = torch .float32 , device = device
0 commit comments