Skip to content

Commit 04379c2

Browse files
authored
test: addressing flaky spawn in subprocesses (#20940)
1 parent 242d80f commit 04379c2

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

tests/tests_fabric/utilities/test_distributed.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,8 @@ def _test_all_reduce(strategy):
105105
assert result is tensor # inplace
106106

107107

108-
# flaky with "process 0 terminated with signal SIGABRT" (GLOO)
109-
@pytest.mark.flaky(reruns=3, only_rerun="torch.multiprocessing.spawn.ProcessExitedException")
108+
# flaky with "torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with signal SIGABRT" (GLOO)
109+
@pytest.mark.flaky(reruns=3)
110110
@RunIf(skip_windows=True)
111111
@pytest.mark.parametrize(
112112
"process",

tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,8 +354,8 @@ def test_swa_resume_training_from_checkpoint_custom_scheduler(tmp_path, crash_on
354354

355355

356356
@RunIf(skip_windows=True)
357-
# flaky with "process 0 terminated with signal SIGABRT" (GLOO)
358-
@pytest.mark.flaky(reruns=3, only_rerun="torch.multiprocessing.spawn.ProcessExitedException")
357+
# flaky with "torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with signal SIGABRT" (GLOO)
358+
@pytest.mark.flaky(reruns=3)
359359
def test_swa_resume_training_from_checkpoint_ddp(tmp_path):
360360
model = SwaTestModel(crash_on_epoch=3)
361361
resume_model = SwaTestModel()

tests/tests_pytorch/core/test_results.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ def result_reduce_ddp_fn(strategy):
4949
assert actual.item() == dist.get_world_size()
5050

5151

52-
# flaky with "process 0 terminated with signal SIGABRT"
53-
@pytest.mark.flaky(reruns=3, only_rerun="torch.multiprocessing.spawn.ProcessExitedException")
52+
# flaky with "torch.multiprocessing.spawn.ProcessExitedException: process 0 terminated with signal SIGABRT"
53+
@pytest.mark.flaky(reruns=3)
5454
@RunIf(skip_windows=True)
5555
def test_result_reduce_ddp():
5656
spawn_launch(result_reduce_ddp_fn, [torch.device("cpu")] * 2)

0 commit comments

Comments
 (0)