|
31 | 31 | from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, rank_zero_warn |
32 | 32 | from pytorch_lightning.utilities.cloud_io import atomic_save |
33 | 33 | from pytorch_lightning.utilities.cloud_io import load as pl_load |
34 | | -from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available |
| 34 | +from pytorch_lightning.utilities.distributed import rank_zero_info, rank_zero_only, ReduceOp, sync_ddp_if_available |
35 | 35 | from pytorch_lightning.utilities.seed import reset_seed |
36 | 36 |
|
37 | 37 | if _TORCH_GREATER_EQUAL_1_8: |
@@ -148,13 +148,6 @@ def new_process(self, process_idx, trainer, mp_queue): |
148 | 148 | # ... need to double check that it is the correct place |
149 | 149 | # self.trainer.call_setup_hook(self.model) |
150 | 150 |
|
151 | | - # on world_size=0 let everyone know training is starting |
152 | | - if self.is_global_zero and not torch.distributed.is_initialized(): |
153 | | - log.info("-" * 100) |
154 | | - log.info(f"distributed_backend={self.distributed_backend}") |
155 | | - log.info(f"All DDP processes registered. Starting ddp with {self.world_size} processes") |
156 | | - log.info("-" * 100) |
157 | | - |
158 | 151 | # set the ranks and devices |
159 | 152 | self.dist.rank = self.global_rank |
160 | 153 | self.dist.device = self.root_device |
@@ -230,6 +223,14 @@ def init_ddp_connection(self, global_rank: Optional[int], world_size: Optional[i |
230 | 223 | log.info(f"initializing ddp: GLOBAL_RANK: {global_rank}, MEMBER: {global_rank + 1}/{world_size}") |
231 | 224 | torch_distrib.init_process_group(self.torch_distributed_backend, rank=global_rank, world_size=world_size) |
232 | 225 |
|
| 226 | + # on rank=0 let everyone know training is starting |
| 227 | + rank_zero_info( |
| 228 | + f"{'-' * 100}\n" |
| 229 | + f"distributed_backend={self.torch_distributed_backend}\n" |
| 230 | + f"All DDP processes registered. Starting ddp with {self.world_size} processes\n" |
| 231 | + f"{'-' * 100}\n" |
| 232 | + ) |
| 233 | + |
233 | 234 | def determine_ddp_device_ids(self): |
234 | 235 | if self.root_device.type == "cpu": |
235 | 236 | return None |
|
0 commit comments