Skip to content

Commit 7c58c4d

Browse files
awaelchliBorda
authored andcommitted
Use DistributedSampler when running with custom accelerator (#7814)
Co-authored-by: Jirka Borovec <[email protected]> Co-authored-by: Adrian Wälchli <[email protected]>
1 parent dc44a53 commit 7c58c4d

File tree

3 files changed

+30
-2
lines changed

3 files changed

+30
-2
lines changed

pytorch_lightning/plugins/training_type/ddp.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ def __init__(
8484
self._ddp_comm_wrapper = ddp_comm_wrapper
8585
self.set_world_ranks()
8686

87+
@property
88+
def is_distributed(self) -> bool:
89+
return True
90+
8791
@property
8892
def root_device(self):
8993
return self.parallel_devices[self.local_rank]

pytorch_lightning/trainer/connectors/accelerator_connector.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,11 @@ def select_precision_plugin(self) -> PrecisionPlugin:
382382
raise NotImplementedError("We only support precisions 64, 32 and 16!")
383383

384384
def select_training_type_plugin(self) -> TrainingTypePlugin:
385-
if self.use_ddp2:
385+
if isinstance(
386+
self.distributed_backend, Accelerator
387+
) and self.distributed_backend.training_type_plugin is not None:
388+
plugin = self.distributed_backend.training_type_plugin
389+
elif self.use_ddp2:
386390
plugin = DDP2Plugin(
387391
parallel_devices=self.parallel_devices,
388392
num_nodes=self.num_nodes,

tests/accelerators/test_accelerator_connector.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,9 @@ class Prec(PrecisionPlugin):
383383
class TrainTypePlugin(SingleDevicePlugin):
384384
pass
385385

386+
ttp = TrainTypePlugin(device=torch.device("cpu"))
386387
accelerator = Accel(
387-
training_type_plugin=TrainTypePlugin(device=torch.device("cpu")),
388+
training_type_plugin=ttp,
388389
precision_plugin=Prec(),
389390
)
390391
trainer = Trainer(
@@ -395,6 +396,25 @@ class TrainTypePlugin(SingleDevicePlugin):
395396
assert isinstance(trainer.accelerator, Accel)
396397
assert isinstance(trainer.training_type_plugin, TrainTypePlugin)
397398
assert isinstance(trainer.precision_plugin, Prec)
399+
assert trainer.accelerator_connector.training_type_plugin is ttp
400+
401+
class DistributedPlugin(DDPPlugin):
402+
pass
403+
404+
ttp = DistributedPlugin()
405+
accelerator = Accel(
406+
training_type_plugin=ttp,
407+
precision_plugin=Prec(),
408+
)
409+
trainer = Trainer(
410+
accelerator=accelerator,
411+
fast_dev_run=True,
412+
num_processes=2,
413+
)
414+
assert isinstance(trainer.accelerator, Accel)
415+
assert isinstance(trainer.training_type_plugin, DistributedPlugin)
416+
assert isinstance(trainer.precision_plugin, Prec)
417+
assert trainer.accelerator_connector.training_type_plugin is ttp
398418

399419

400420
@mock.patch.dict(

0 commit comments

Comments
 (0)