@@ -607,7 +607,7 @@ def select_training_type_plugin(self) -> TrainingTypePlugin:
607607 use_torchelastic_ddp = self .use_ddp and TorchElasticEnvironment .is_using_torchelastic ()
608608 use_kubeflow_ddp = self .use_ddp and KubeflowEnvironment .is_using_kubeflow ()
609609 use_ddp_spawn = self ._distrib_type == DistributedType .DDP_SPAWN
610- use_ddp_cpu_spawn = self . use_ddp and self .use_cpu
610+ use_ddp_cpu_spawn = use_ddp_spawn and self .use_cpu
611611 use_tpu_spawn = self .use_tpu and self ._distrib_type == DistributedType .TPU_SPAWN
612612 use_ddp_cpu_torch_elastic = use_ddp_cpu_spawn and TorchElasticEnvironment .is_using_torchelastic ()
613613 use_ddp_cpu_kubeflow = use_ddp_cpu_spawn and KubeflowEnvironment .is_using_kubeflow ()
@@ -738,14 +738,16 @@ def set_distributed_mode(self, distributed_backend: Optional[str] = None):
738738 if self .distributed_backend is None :
739739 if self .has_horovodrun ():
740740 self ._set_horovod_backend ()
741- elif self .num_gpus == 0 and ( self .num_nodes > 1 or self . num_processes > 1 ) :
741+ elif self .num_gpus == 0 and self .num_nodes > 1 :
742742 self ._distrib_type = DistributedType .DDP
743+ elif self .num_gpus == 0 and self .num_processes > 1 :
744+ self .distributed_backend = DistributedType .DDP_SPAWN
743745 elif self .num_gpus > 1 and not _use_cpu :
744746 rank_zero_warn (
745747 "You requested multiple GPUs but did not specify a backend, e.g."
746748 ' `Trainer(accelerator="dp"|"ddp"|"ddp2")`. Setting `accelerator="ddp_spawn"` for you.'
747749 )
748- self .distributed_backend = "ddp_spawn"
750+ self .distributed_backend = DistributedType . DDP_SPAWN
749751
750752 # special case with DDP on CPUs
751753 if self .distributed_backend == "ddp_cpu" :
0 commit comments