Fix support for CombinedLoader while checking for warning raised with eval dataloaders (#10994)

rohitgr7 · lexierule · commit c014dd155d81 · 2021-12-15T13:38:59.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed support for logging within callbacks returned from `LightningModule` ([#10991](https://github.com/PyTorchLightning/pytorch-lightning/pull/10991))
 
 
+- Fixed support for `CombinedLoader` while checking for warning raised with eval dataloaders ([#10994](https://github.com/PyTorchLightning/pytorch-lightning/pull/10994))
+
+
 -
 
 
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
@@ -455,9 +455,11 @@ def _reset_eval_dataloader(
                         loader, SequentialSampler(loader.dataset), mode=mode
                     )
                 else:
-                    rank_zero_warn(
-                        f"Your `{mode.dataloader_prefix}_dataloader` has `shuffle=True`,"
-                        "it is strongly recommended that you turn this off for val/test/predict dataloaders."
+                    apply_to_collection(
+                        loader.loaders if isinstance(loader, CombinedLoader) else loader,
+                        DataLoader,
+                        self._check_eval_shuffling,
+                        mode=mode,
                     )
 
         if any(dl is None for dl in dataloaders):
@@ -620,3 +622,16 @@ def replace_sampler(dataloader: DataLoader) -> DataLoader:
             dataloader = apply_to_collection(dataloader, DataLoader, replace_sampler)
 
         return dataloader
+
+    @staticmethod
+    def _check_eval_shuffling(dataloader, mode):
+        if (
+            hasattr(dataloader, "sampler")
+            and not isinstance(dataloader.sampler, SequentialSampler)
+            and not isinstance(dataloader.dataset, IterableDataset)
+        ):
+            rank_zero_warn(
+                f"Your `{mode.dataloader_prefix}_dataloader` has `shuffle=True`,"
+                " it is strongly recommended that you turn this off for val/test/predict dataloaders.",
+                category=UserWarning,
+            )
diff --git a/pytorch_lightning/trainer/supporters.py b/pytorch_lightning/trainer/supporters.py
@@ -304,10 +304,10 @@ def __len__(self) -> int:
 
 
 class CombinedLoader:
-    """Combines different dataloaders and allows sampling in parallel. Supported modes are 'min_size', which raises
-    StopIteration after the shortest loader (the one with the lowest number of batches) is done, and
-    'max_size_cycle` which raises StopIteration after the longest loader (the one with most batches) is done, while
-    cycling through the shorter loaders.
+    """Combines different dataloaders and allows sampling in parallel. Supported modes are ``"min_size"``, which
+    raises StopIteration after the shortest loader (the one with the lowest number of batches) is done, and
+    ``"max_size_cycle"`` which raises StopIteration after the longest loader (the one with most batches) is done,
+    while cycling through the shorter loaders.
 
     Examples:
         >>> loaders = {'a': torch.utils.data.DataLoader(range(6), batch_size=4),
diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py
@@ -21,6 +21,7 @@
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.trainer.states import RunningStage
+from pytorch_lightning.trainer.supporters import CombinedLoader
 from pytorch_lightning.utilities.enums import DistributedType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel, RandomDataset
@@ -364,3 +365,27 @@ def test_error_raised_with_float_limited_eval_batches():
         match=fr"{limit_val_batches} \* {dl_size} < 1. Please increase the `limit_val_batches`",
     ):
         trainer._reset_eval_dataloader(RunningStage.VALIDATING, model)
+
+
+@pytest.mark.parametrize(
+    "val_dl",
+    [
+        DataLoader(dataset=RandomDataset(32, 64), shuffle=True),
+        CombinedLoader(DataLoader(dataset=RandomDataset(32, 64), shuffle=True)),
+        CombinedLoader(
+            [DataLoader(dataset=RandomDataset(32, 64)), DataLoader(dataset=RandomDataset(32, 64), shuffle=True)]
+        ),
+        CombinedLoader(
+            {
+                "dl1": DataLoader(dataset=RandomDataset(32, 64)),
+                "dl2": DataLoader(dataset=RandomDataset(32, 64), shuffle=True),
+            }
+        ),
+    ],
+)
+def test_non_sequential_sampler_warning_is_raised_for_eval_dataloader(val_dl):
+    trainer = Trainer()
+    model = BoringModel()
+    trainer._data_connector.attach_data(model, val_dataloaders=val_dl)
+    with pytest.warns(UserWarning, match="recommended .* turn this off for val/test/predict"):
+        trainer._reset_eval_dataloader(RunningStage.VALIDATING, model)

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).`
`18`	`18`	- Fixed support for logging within callbacks returned from `LightningModule` ([#10991](https://github.com/PyTorchLightning/pytorch-lightning/pull/10991))
`19`	`19`
`20`	`20`
	`21`	+- Fixed support for `CombinedLoader` while checking for warning raised with eval dataloaders ([#10994](https://github.com/PyTorchLightning/pytorch-lightning/pull/10994))
	`22`	`+`
	`23`	`+`
`21`	`24`	`-`
`22`	`25`
`23`	`26`