@@ -132,6 +132,7 @@ def test_horovod_cpu_implicit(tmpdir):
132132 _run_horovod (trainer_options )
133133
134134
135+ @pytest .mark .xfail (raises = AssertionError , reason = "unhandled cuda error" )
135136@RunIf (min_cuda_gpus = 2 , horovod_nccl = True , skip_windows = True )
136137def test_horovod_multi_gpu (tmpdir ):
137138 """Test Horovod with multi-GPU support."""
@@ -149,6 +150,7 @@ def test_horovod_multi_gpu(tmpdir):
149150 _run_horovod (trainer_options )
150151
151152
153+ @pytest .mark .xfail (raises = AssertionError , reason = "unhandled cuda error" )
152154@RunIf (min_cuda_gpus = 2 , horovod_nccl = True , skip_windows = True )
153155def test_horovod_multi_gpu_accumulate_grad_batches (tmpdir ):
154156 trainer_options = dict (
@@ -165,10 +167,12 @@ def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
165167 _run_horovod (trainer_options )
166168
167169
170+ @pytest .mark .xfail (reason = "unhandled cuda error" )
168171@RunIf (horovod = True , skip_windows = True , min_cuda_gpus = 1 )
169172def test_horovod_raises_unsupported_accumulate_grad_batches (tmpdir ):
170173 """Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
171174 Strategy on multi-gpus."""
175+
172176 model = BoringModel ()
173177 with pytest .deprecated_call (match = r"horovod'\)` has been deprecated in v1.9" ):
174178 trainer = Trainer (
@@ -183,6 +187,7 @@ def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
183187 trainer .fit (model )
184188
185189
190+ @pytest .mark .xfail (raises = AssertionError , reason = "unhandled cuda error" )
186191@RunIf (min_cuda_gpus = 2 , horovod_nccl = True , skip_windows = True )
187192def test_horovod_multi_gpu_grad_by_value (tmpdir ):
188193 """Test Horovod with multi-GPU support."""
@@ -201,6 +206,7 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir):
201206 _run_horovod (trainer_options )
202207
203208
209+ @pytest .mark .xfail (raises = AssertionError , reason = "unhandled cuda error" )
204210@RunIf (min_cuda_gpus = 2 , horovod_nccl = True , skip_windows = True )
205211def test_horovod_amp (tmpdir ):
206212 """Test Horovod with multi-GPU support using native amp."""
@@ -220,6 +226,7 @@ def test_horovod_amp(tmpdir):
220226 _run_horovod (trainer_options )
221227
222228
229+ @pytest .mark .xfail (raises = AssertionError , reason = "unhandled cuda error" )
223230@RunIf (min_cuda_gpus = 2 , horovod_nccl = True , skip_windows = True )
224231def test_horovod_gather (tmpdir ):
225232 """Test Horovod with multi-GPU support using native amp."""
@@ -237,6 +244,7 @@ def test_horovod_gather(tmpdir):
237244 _run_horovod (trainer_options )
238245
239246
247+ @pytest .mark .xfail (reason = "unhandled cuda error" )
240248@RunIf (min_cuda_gpus = 2 , skip_windows = True , horovod = True , horovod_nccl = True )
241249def test_horovod_transfer_batch_to_gpu (tmpdir ):
242250 class TestTrainingStepModel (BoringModel ):
0 commit comments