Fix to avoid moving batch to device for DataParallel (#11780)

rohitgr7 · ananthsub · lexierule · commit 0bd69c9dcf23 · 2022-02-09T15:22:34.000-05:00
Co-authored-by: ananthsub &lt;ananth.subramaniam@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - The Rich progress bar now correctly shows the `on_epoch` logged values on train epoch end ([#11689](https://github.com/PyTorchLightning/pytorch-lightning/pull/11689))
 - Fixed an issue to make the `step` argument in `WandbLogger.log_image` work ([#11716](https://github.com/PyTorchLightning/pytorch-lightning/pull/11716))
 - Fixed `restore_optimizers` for mapping states ([#11757](https://github.com/PyTorchLightning/pytorch-lightning/pull/11757))
+- With `DPStrategy`, the batch is not explictly moved to the device ([#11780](https://github.com/PyTorchLightning/pytorch-lightning/pull/11780))
+
 
 
 ## [1.5.9] - 2022-01-18
diff --git a/pytorch_lightning/accelerators/accelerator.py b/pytorch_lightning/accelerators/accelerator.py
@@ -201,8 +201,11 @@ def batch_to_device(self, batch: Any, device: Optional[torch.device] = None, dat
         model = self.lightning_module
         device = device or self.root_device
 
-        if model is not None and not isinstance(self.training_type_plugin, DataParallelPlugin):
-            # no need to transfer batch to device in DP mode
+        # no need to transfer batch to device in DP mode
+        if isinstance(self.training_type_plugin, DataParallelPlugin):
+            return batch
+
+        if model is not None:
             return model._apply_batch_transfer_handler(batch, device=device, dataloader_idx=dataloader_idx)
 
         return move_data_to_device(batch, device)
diff --git a/tests/accelerators/test_dp.py b/tests/accelerators/test_dp.py
@@ -196,3 +196,26 @@ def test_dp_training_step_dict(tmpdir):
         strategy="dp",
     )
     trainer.fit(model)
+    trainer.test(model)
+
+
+@RunIf(min_gpus=2)
+def test_dp_batch_not_moved_to_device_explictly(tmpdir):
+    """Test that with DP, batch is not moved to the device explictly."""
+
+    class CustomModel(BoringModel):
+        def on_train_batch_start(self, batch, *args, **kargs):
+            assert not batch.is_cuda
+
+        def training_step(self, batch, batch_idx):
+            assert batch.is_cuda
+            return super().training_step(batch, batch_idx)
+
+    trainer = pl.Trainer(
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        gpus=2,
+        strategy="dp",
+    )
+
+    trainer.fit(CustomModel())