Better error message when dataloader and datamodule is None (V2) (#14637)

awaelchli · lexierule · commit 582b8cc70adb · 2022-09-13T15:17:59.000-04:00
diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [1.7.6] - 2022-09-13
 
+### Changed
+
+- When using multiple loggers, by default checkpoints and profiler output now get saved to the log dir of the first logger in the list ([#14325](https://github.com/Lightning-AI/lightning/pull/14325))
+- Improved the error messaging when passing `Trainer.method(model, x_dataloader=None)` with no module-method implementations available ([#14614](https://github.com/Lightning-AI/lightning/pull/14614))
+
 ### Fixed
 
 - Reset the dataloaders on OOM failure in batch size finder to use the last successful batch size ([#14372](https://github.com/Lightning-AI/lightning/pull/14372))
diff --git a/src/pytorch_lightning/trainer/configuration_validator.py b/src/pytorch_lightning/trainer/configuration_validator.py
@@ -71,16 +71,6 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh
             " `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined."
         )
 
-    # -----------------------------------
-    # verify model has a train dataloader
-    # -----------------------------------
-    has_train_dataloader = trainer._data_connector._train_dataloader_source.is_defined()
-    if not has_train_dataloader:
-        raise MisconfigurationException(
-            "No `train_dataloader()` method defined. Lightning `Trainer` expects as minimum a"
-            " `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined."
-        )
-
     # -----------------------------------
     # verify model has optimizer
     # -----------------------------------
@@ -121,19 +111,11 @@ def __verify_train_val_loop_configuration(trainer: "pl.Trainer", model: "pl.Ligh
 
 
 def __verify_eval_loop_configuration(trainer: "pl.Trainer", model: "pl.LightningModule", stage: str) -> None:
-    loader_name = f"{stage}_dataloader"
     step_name = "validation_step" if stage == "val" else f"{stage}_step"
     trainer_method = "validate" if stage == "val" else stage
 
-    has_loader = getattr(trainer._data_connector, f"_{stage}_dataloader_source").is_defined()
     has_step = is_overridden(step_name, model)
 
-    # -----------------------------------
-    # verify model has an eval_dataloader
-    # -----------------------------------
-    if not has_loader:
-        raise MisconfigurationException(f"No `{loader_name}()` method defined to run `Trainer.{trainer_method}`.")
-
     # predict_step is not required to be overridden
     if stage == "predict":
         if model.predict_step is None:
diff --git a/src/pytorch_lightning/trainer/connectors/data_connector.py b/src/pytorch_lightning/trainer/connectors/data_connector.py
@@ -144,6 +144,17 @@ def attach_data(
             predict_dataloaders=predict_dataloaders,
         )
         self.attach_datamodule(model, datamodule=datamodule)
+
+        # Validate that the required data sources are available
+        if self.trainer.state.fn == TrainerFn.FITTING:
+            _check_dataloader_none(train_dataloaders, self._train_dataloader_source, self.trainer.state.fn)
+        elif self.trainer.state.fn == TrainerFn.VALIDATING:
+            _check_dataloader_none(val_dataloaders, self._val_dataloader_source, self.trainer.state.fn)
+        elif self.trainer.state.fn == TrainerFn.TESTING:
+            _check_dataloader_none(test_dataloaders, self._test_dataloader_source, self.trainer.state.fn)
+        elif self.trainer.state.fn == TrainerFn.PREDICTING:
+            _check_dataloader_none(predict_dataloaders, self._predict_dataloader_source, self.trainer.state.fn)
+
         # set local properties on the model
         self._copy_trainer_model_properties(model)
 
@@ -581,3 +592,18 @@ def get_instance(self, hook_name: str) -> Union["pl.LightningModule", "pl.Lightn
                 " `LightningDataModule`. It will use the implementation from `LightningModule` instance."
             )
         return self.model
+
+
+def _check_dataloader_none(
+    dataloader: Optional[Union[TRAIN_DATALOADERS, EVAL_DATALOADERS]],
+    dataloader_source: _DataLoaderSource,
+    trainer_fn: TrainerFn,
+) -> None:
+    # A prefix in the message to disambiguate between the train- and (optional) val dataloader that .fit() accepts
+    prefix = "train_" if trainer_fn == TrainerFn.FITTING else ""
+    if dataloader is None and not dataloader_source.is_defined():
+        raise ValueError(
+            f"An invalid dataloader was passed to `Trainer.{trainer_fn}({prefix}dataloaders=...)`."
+            f" Either pass the dataloader to the `.{trainer_fn}()` method OR implement"
+            f" `def {dataloader_source.name}(self):` in your LightningModule/LightningDataModule."
+        )
diff --git a/tests/tests_pytorch/trainer/connectors/test_data_connector.py b/tests/tests_pytorch/trainer/connectors/test_data_connector.py
@@ -570,3 +570,38 @@ def test_error_raised_with_insufficient_float_limit_train_dataloader():
         match="Please increase the `limit_train_batches` argument. Try at least",
     ):
         trainer.reset_train_dataloader(model)
+
+
+@pytest.mark.parametrize(
+    "trainer_fn_name, dataloader_name",
+    [
+        ("fit", "train_dataloaders"),
+        ("validate", "dataloaders"),
+        ("test", "dataloaders"),
+        ("predict", "dataloaders"),
+    ],
+)
+def test_attach_data_input_validation_with_none_dataloader(trainer_fn_name, dataloader_name, tmpdir):
+    """Test that passing `Trainer.method(x_dataloader=None)` with no module-method implementations available raises
+    an error."""
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
+    model = BoringModel()
+    datamodule = BoringDataModule()
+    trainer_fn = getattr(trainer, trainer_fn_name)
+
+    # Pretend that these methods are not implemented
+    model.train_dataloader = None
+    model.val_dataloader = None
+    model.test_dataloader = None
+    model.predict_dataloader = None
+
+    datamodule.train_dataloader = None
+    datamodule.val_dataloader = None
+    datamodule.test_dataloader = None
+    datamodule.predict_dataloader = None
+
+    with pytest.raises(ValueError, match=f"An invalid .*dataloader was passed to `Trainer.{trainer_fn_name}"):
+        trainer_fn(model, **{dataloader_name: None}, datamodule=datamodule)
+
+    with pytest.raises(ValueError, match=f"An invalid .*dataloader was passed to `Trainer.{trainer_fn_name}"):
+        trainer_fn(model, **{dataloader_name: None}, datamodule=None)
diff --git a/tests/tests_pytorch/trainer/test_config_validator.py b/tests/tests_pytorch/trainer/test_config_validator.py
@@ -22,17 +22,9 @@
 
 
 def test_wrong_train_setting(tmpdir):
-    """
-    * Test that an error is thrown when no `train_dataloader()` is defined
-    * Test that an error is thrown when no `training_step()` is defined
-    """
+    """Test that an error is raised when no `training_step()` is defined."""
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)
 
-    with pytest.raises(MisconfigurationException, match=r"No `train_dataloader\(\)` method defined."):
-        model = BoringModel()
-        model.train_dataloader = None
-        trainer.fit(model)
-
     with pytest.raises(MisconfigurationException, match=r"No `training_step\(\)` method defined."):
         model = BoringModel()
         model.training_step = None
@@ -70,36 +62,18 @@ def test_eval_loop_config(tmpdir):
     """When either eval step or eval data is missing."""
     trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)
 
-    # has val step but no val data
-    model = BoringModel()
-    model.val_dataloader = None
-    with pytest.raises(MisconfigurationException, match=r"No `val_dataloader\(\)` method defined"):
-        trainer.validate(model)
-
     # has test data but no val step
     model = BoringModel()
     model.validation_step = None
     with pytest.raises(MisconfigurationException, match=r"No `validation_step\(\)` method defined"):
         trainer.validate(model)
 
-    # has test loop but no test data
-    model = BoringModel()
-    model.test_dataloader = None
-    with pytest.raises(MisconfigurationException, match=r"No `test_dataloader\(\)` method defined"):
-        trainer.test(model)
-
     # has test data but no test step
     model = BoringModel()
     model.test_step = None
     with pytest.raises(MisconfigurationException, match=r"No `test_step\(\)` method defined"):
         trainer.test(model)
 
-    # has predict step but no predict data
-    model = BoringModel()
-    model.predict_dataloader = None
-    with pytest.raises(MisconfigurationException, match=r"No `predict_dataloader\(\)` method defined"):
-        trainer.predict(model)
-
     # has predict data but no predict_step
     model = BoringModel()
     model.predict_step = None