Update dataloader docstrings (#17061)

carmocca · web-flow · commit 5fe9e9384247 · 2023-03-14T15:33:14.000Z
diff --git a/src/lightning/pytorch/core/hooks.py b/src/lightning/pytorch/core/hooks.py
@@ -388,11 +388,9 @@ def teardown(self, stage: str) -> None:
         """
 
     def train_dataloader(self) -> TRAIN_DATALOADERS:
-        """Implement one or more PyTorch DataLoaders for training.
+        """An iterable or collection of iterables specifying training samples.
 
-        Return:
-            A collection of :class:`torch.utils.data.DataLoader` specifying training samples.
-            In the case of multiple dataloaders, please see this :ref:`section <multiple-dataloaders>`.
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         The dataloader you return will not be reloaded unless you set
         :paramref:`~lightning.pytorch.trainer.Trainer.reload_dataloaders_every_n_epochs` to
@@ -412,55 +410,15 @@ def train_dataloader(self) -> TRAIN_DATALOADERS:
         - :meth:`setup`
 
         Note:
-            Lightning adds the correct sampler for distributed and arbitrary hardware.
+            Lightning tries to add the correct sampler for distributed and arbitrary hardware.
             There is no need to set it yourself.
-
-        Example::
-
-            # single dataloader
-            def train_dataloader(self):
-                transform = transforms.Compose([transforms.ToTensor(),
-                                                transforms.Normalize((0.5,), (1.0,))])
-                dataset = MNIST(root='/path/to/mnist/', train=True, transform=transform,
-                                download=True)
-                loader = torch.utils.data.DataLoader(
-                    dataset=dataset,
-                    batch_size=self.batch_size,
-                    shuffle=True
-                )
-                return loader
-
-            # multiple dataloaders, return as list
-            def train_dataloader(self):
-                mnist = MNIST(...)
-                cifar = CIFAR(...)
-                mnist_loader = torch.utils.data.DataLoader(
-                    dataset=mnist, batch_size=self.batch_size, shuffle=True
-                )
-                cifar_loader = torch.utils.data.DataLoader(
-                    dataset=cifar, batch_size=self.batch_size, shuffle=True
-                )
-                # each batch will be a list of tensors: [batch_mnist, batch_cifar]
-                return [mnist_loader, cifar_loader]
-
-            # multiple dataloader, return as dict
-            def train_dataloader(self):
-                mnist = MNIST(...)
-                cifar = CIFAR(...)
-                mnist_loader = torch.utils.data.DataLoader(
-                    dataset=mnist, batch_size=self.batch_size, shuffle=True
-                )
-                cifar_loader = torch.utils.data.DataLoader(
-                    dataset=cifar, batch_size=self.batch_size, shuffle=True
-                )
-                # each batch will be a dict of tensors: {'mnist': batch_mnist, 'cifar': batch_cifar}
-                return {'mnist': mnist_loader, 'cifar': cifar_loader}
         """
         raise MisconfigurationException("`train_dataloader` must be implemented to be used with the Lightning Trainer")
 
     def test_dataloader(self) -> EVAL_DATALOADERS:
-        r"""
-        Implement one or multiple PyTorch DataLoaders for testing.
+        r"""An iterable or collection of iterables specifying test samples.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         For data processing use the following pattern:
 
@@ -477,44 +435,19 @@ def test_dataloader(self) -> EVAL_DATALOADERS:
         - :meth:`setup`
 
         Note:
-            Lightning adds the correct sampler for distributed and arbitrary hardware.
+            Lightning tries to add the correct sampler for distributed and arbitrary hardware.
             There is no need to set it yourself.
 
-        Return:
-            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying testing samples.
-
-        Example::
-
-            def test_dataloader(self):
-                transform = transforms.Compose([transforms.ToTensor(),
-                                                transforms.Normalize((0.5,), (1.0,))])
-                dataset = MNIST(root='/path/to/mnist/', train=False, transform=transform,
-                                download=True)
-                loader = torch.utils.data.DataLoader(
-                    dataset=dataset,
-                    batch_size=self.batch_size,
-                    shuffle=False
-                )
-
-                return loader
-
-            # can also return multiple dataloaders
-            def test_dataloader(self):
-                return [loader_a, loader_b, ..., loader_n]
-
         Note:
             If you don't need a test dataset and a :meth:`test_step`, you don't need to implement
             this method.
-
-        Note:
-            In the case where you return multiple test dataloaders, the :meth:`test_step`
-            will have an argument ``dataloader_idx`` which matches the order here.
         """
         raise MisconfigurationException("`test_dataloader` must be implemented to be used with the Lightning Trainer")
 
     def val_dataloader(self) -> EVAL_DATALOADERS:
-        r"""
-        Implement one or multiple PyTorch DataLoaders for validation.
+        r"""An iterable or collection of iterables specifying validation samples.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         The dataloader you return will not be reloaded unless you set
         :paramref:`~lightning.pytorch.trainer.Trainer.reload_dataloaders_every_n_epochs` to
@@ -528,44 +461,19 @@ def val_dataloader(self) -> EVAL_DATALOADERS:
         - :meth:`setup`
 
         Note:
-            Lightning adds the correct sampler for distributed and arbitrary hardware
+            Lightning tries to add the correct sampler for distributed and arbitrary hardware
             There is no need to set it yourself.
 
-        Return:
-            A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
-
-        Examples::
-
-            def val_dataloader(self):
-                transform = transforms.Compose([transforms.ToTensor(),
-                                                transforms.Normalize((0.5,), (1.0,))])
-                dataset = MNIST(root='/path/to/mnist/', train=False,
-                                transform=transform, download=True)
-                loader = torch.utils.data.DataLoader(
-                    dataset=dataset,
-                    batch_size=self.batch_size,
-                    shuffle=False
-                )
-
-                return loader
-
-            # can also return multiple dataloaders
-            def val_dataloader(self):
-                return [loader_a, loader_b, ..., loader_n]
-
         Note:
             If you don't need a validation dataset and a :meth:`validation_step`, you don't need to
             implement this method.
-
-        Note:
-            In the case where you return multiple validation dataloaders, the :meth:`validation_step`
-            will have an argument ``dataloader_idx`` which matches the order here.
         """
         raise MisconfigurationException("`val_dataloader` must be implemented to be used with the Lightning Trainer")
 
     def predict_dataloader(self) -> EVAL_DATALOADERS:
-        r"""
-        Implement one or multiple PyTorch DataLoaders for prediction.
+        r"""An iterable or collection of iterables specifying prediction samples.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         It's recommended that all data downloads and preparation happen in :meth:`prepare_data`.
 
@@ -574,15 +482,11 @@ def predict_dataloader(self) -> EVAL_DATALOADERS:
         - :meth:`setup`
 
         Note:
-            Lightning adds the correct sampler for distributed and arbitrary hardware
+            Lightning tries to add the correct sampler for distributed and arbitrary hardware
             There is no need to set it yourself.
 
         Return:
             A :class:`torch.utils.data.DataLoader` or a sequence of them specifying prediction samples.
-
-        Note:
-            In the case where you return multiple prediction dataloaders, the :meth:`predict_step`
-            will have an argument ``dataloader_idx`` which matches the order here.
         """
         raise MisconfigurationException(
             "`predict_dataloader` must be implemented to be used with the Lightning Trainer"
diff --git a/src/lightning/pytorch/trainer/trainer.py b/src/lightning/pytorch/trainer/trainer.py
@@ -500,17 +500,20 @@ def fit(
         Args:
             model: Model to fit.
 
-            train_dataloaders: A collection of :class:`torch.utils.data.DataLoader` or a
-                :class:`~lightning.pytorch.core.datamodule.LightningDataModule` specifying training samples.
-                In the case of multiple dataloaders, please see this :ref:`section <multiple-dataloaders>`.
+            train_dataloaders: An iterable or collection of iterables specifying training samples.
+                Alternatively, a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.train_dataloader` hook.
 
-            val_dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them specifying validation samples.
+            val_dataloaders: An iterable or collection of iterables specifying validation samples.
 
             ckpt_path: Path/URL of the checkpoint from which training is resumed. Could also be one of two special
                 keywords ``"last"`` and ``"hpc"``. If there is no checkpoint file at the path, an exception is raised.
                 If resuming from mid-epoch checkpoint, training will start from the beginning of the next epoch.
 
-            datamodule: An instance of :class:`~lightning.pytorch.core.datamodule.LightningDataModule`.
+            datamodule: A :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.train_dataloader` hook.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
         """
         model = _maybe_unwrap_optimized(model)
         self.strategy._lightning_module = model
@@ -573,8 +576,9 @@ def validate(
         Args:
             model: The model to validate.
 
-            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
-                or a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` specifying validation samples.
+            dataloaders: An iterable or collection of iterables specifying validation samples.
+                Alternatively, a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.val_dataloader` hook.
 
             ckpt_path: Either ``"best"``, ``"last"``, ``"hpc"`` or path to the checkpoint you wish to validate.
                 If ``None`` and the model instance was passed, use the current weights.
@@ -583,7 +587,10 @@ def validate(
 
             verbose: If True, prints the validation results.
 
-            datamodule: An instance of :class:`~lightning.pytorch.core.datamodule.LightningDataModule`.
+            datamodule: A :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.val_dataloader` hook.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         Returns:
             List of dictionaries with metrics logged during the validation phase, e.g., in model- or callback hooks
@@ -666,8 +673,9 @@ def test(
         Args:
             model: The model to test.
 
-            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
-                or a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` specifying test samples.
+            dataloaders: An iterable or collection of iterables specifying test samples.
+                Alternatively, a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.test_dataloader` hook.
 
             ckpt_path: Either ``"best"``, ``"last"``, ``"hpc"`` or path to the checkpoint you wish to test.
                 If ``None`` and the model instance was passed, use the current weights.
@@ -676,7 +684,10 @@ def test(
 
             verbose: If True, prints the test results.
 
-            datamodule: An instance of :class:`~lightning.pytorch.core.datamodule.LightningDataModule`.
+            datamodule: A :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.test_dataloader` hook.
+
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
 
         Returns:
             List of dictionaries with metrics logged during the test phase, e.g., in model- or callback hooks
@@ -760,10 +771,12 @@ def predict(
         Args:
             model: The model to predict with.
 
-            dataloaders: A :class:`torch.utils.data.DataLoader` or a sequence of them,
-                or a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` specifying prediction samples.
+            dataloaders: An iterable or collection of iterables specifying predict samples.
+                Alternatively, a :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.predict_dataloader` hook.
 
-            datamodule: The datamodule with a predict_dataloader method that returns one or more dataloaders.
+            datamodule: A :class:`~lightning.pytorch.core.datamodule.LightningDataModule` that defines
+                the `:class:`~lightning.pytorch.core.hooks.DataHooks.predict_dataloader` hook.
 
             return_predictions: Whether to return predictions.
                 ``True`` by default except when an accelerator that spawns processes is used (not supported).
@@ -773,6 +786,8 @@ def predict(
                 Otherwise, the best model checkpoint from the previous ``trainer.fit`` call will be loaded
                 if a checkpoint callback is configured.
 
+        For more information about multiple dataloaders, see this :ref:`section <multiple-dataloaders>`.
+
         Returns:
             Returns a list of dictionaries, one for each provided dataloader containing their respective predictions.
 
@@ -1360,27 +1375,27 @@ def is_last_batch(self) -> bool:
         return self.fit_loop.epoch_loop.batch_progress.is_last_batch
 
     @property
-    def train_dataloader(self) -> TRAIN_DATALOADERS:
+    def train_dataloader(self) -> Optional[TRAIN_DATALOADERS]:
         """The training dataloader(s) used during ``trainer.fit()``."""
         if (combined_loader := self.fit_loop._combined_loader) is not None:
             return combined_loader.iterables
 
     @property
-    def val_dataloaders(self) -> EVAL_DATALOADERS:
+    def val_dataloaders(self) -> Optional[EVAL_DATALOADERS]:
         """The validation dataloader(s) used during ``trainer.fit()`` or ``trainer.validate()``."""
         if (combined_loader := self.fit_loop.epoch_loop.val_loop._combined_loader) is not None:
             return combined_loader.iterables
         elif (combined_loader := self.validate_loop._combined_loader) is not None:
             return combined_loader.iterables
 
     @property
-    def test_dataloaders(self) -> EVAL_DATALOADERS:
+    def test_dataloaders(self) -> Optional[EVAL_DATALOADERS]:
         """The test dataloader(s) used during ``trainer.test()``."""
         if (combined_loader := self.test_loop._combined_loader) is not None:
             return combined_loader.iterables
 
     @property
-    def predict_dataloaders(self) -> EVAL_DATALOADERS:
+    def predict_dataloaders(self) -> Optional[EVAL_DATALOADERS]:
         """The prediction dataloader(s) used during ``trainer.predict()``."""
         if (combined_loader := self.predict_loop._combined_loader) is not None:
             return combined_loader.iterables