Enable non-blocking for gpu device transfer (#1843)

justusschock · web-flow · commit c05077fae3b0 · 2020-05-14T17:56:40.000-04:00
* Update distrib_parts.py

* Update CHANGELOG.md
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Changed
 
+- Enable `non-blocking` for device transfers to GPU ([#1843](https://github.com/PyTorchLightning/pytorch-lightning/pull/1843))
+
 - Replace mata_tags.csv with hparams.yaml ([#1271](https://github.com/PyTorchLightning/pytorch-lightning/pull/1271))
 
 - Reduction when `batch_size < num_gpus` ([#1609](https://github.com/PyTorchLightning/pytorch-lightning/pull/1609))
diff --git a/pytorch_lightning/trainer/distrib_parts.py b/pytorch_lightning/trainer/distrib_parts.py
@@ -449,10 +449,14 @@ def __transfer_data_to_device(self, batch, device, gpu_id=None):
         if device == 'gpu':
             # base case: object can be directly moved using `cuda` or `to`
             if callable(getattr(batch, 'cuda', None)):
-                return batch.cuda(gpu_id)
+                # non_blocking will be ignored if tensor is not pinned.
+                # so we can always set it to True
+                return batch.cuda(gpu_id, non_blocking=True)
 
             if callable(getattr(batch, 'to', None)):
-                return batch.to(torch.device('cuda', gpu_id))
+                # non_blocking will be ignored if tensor is not pinned.
+                # so we can always set it to True
+                return batch.to(torch.device('cuda', gpu_id), non_blocking=True)
 
         # when list
         if isinstance(batch, list):