Bugfix: Scheduler monitor for manual optimization (#7643)

maxoppelt · rohitgr7 · carmocca · web-flow · commit d90cb7fceb66 · 2021-07-27T16:04:14.000+05:30
Co-authored-by: Rohit Gupta &lt;rohitgr1998@gmail.com&gt;
Co-authored-by: Carlos Mocholí &lt;carlossmocholi@gmail.com&gt;
Co-authored-by: thomas chaton &lt;thomas@grid.ai&gt;
Co-authored-by: Kaushik B &lt;45285388+kaushikb11@users.noreply.github.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Kaushik B &lt;kaushikbokka@gmail.com&gt;
Co-authored-by: Jirka Borovec &lt;Borda@users.noreply.github.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -233,6 +233,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `accumulate_grad_batches` not been recomputed during model reload ([#5334](https://github.com/PyTorchLightning/pytorch-lightning/pull/5334))
 - Fixed a `TypeError` when wrapping optimizers in the `HorovodPlugin` and running `Trainer.test` ([#7840](https://github.com/PyTorchLightning/pytorch-lightning/pull/7840))
 - Fixed `BackboneFinetuning` restoration ([#8501](https://github.com/PyTorchLightning/pytorch-lightning/pull/8501))
+- Fixed `lr_scheduler` with metric (e.g. `torch.optim.lr_scheduler.ReduceLROnPlateau`) when using `automatic_optimization = False` ([#7643](https://github.com/PyTorchLightning/pytorch-lightning/pull/7643))
 
 
 ## [1.3.8] - 2021-07-01
diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst
@@ -230,6 +230,21 @@ If you want to call ``lr_scheduler.step()`` every ``n`` steps/epochs, do the fol
         if self.trainer.is_last_batch and (self.trainer.current_epoch + 1) % n == 0:
             sch.step()
 
+If you want to call schedulers that require a metric value after each epoch, consider doing the following:
+
+.. testcode::
+
+    def __init__(self):
+        super().__init__()
+        self.automatic_optimization = False
+
+    def training_epoch_end(self, outputs):
+        sch = self.lr_schedulers()
+
+        # If the selected scheduler is a ReduceLROnPlateau scheduler.
+        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau):
+            sch.step(self.trainer.callback_metrics["loss"])
+
 -----
 
 Use closure for LBFGS-like optimizers
diff --git a/pytorch_lightning/trainer/optimizers.py b/pytorch_lightning/trainer/optimizers.py
@@ -119,21 +119,8 @@ def configure_schedulers(
         lr_schedulers = []
         default_config = _get_default_scheduler_config()
         for scheduler in schedulers:
-            if isinstance(scheduler, dict):
-                # check provided keys
-                extra_keys = [k for k in scheduler.keys() if k not in default_config.keys()]
-                if extra_keys:
-                    rank_zero_warn(f"Found unsupported keys in the lr scheduler dict: {extra_keys}", RuntimeWarning)
-                if "scheduler" not in scheduler:
-                    raise MisconfigurationException(
-                        'The lr scheduler dict must have the key "scheduler" with its item being an lr scheduler'
-                    )
-                if "interval" in scheduler and scheduler["interval"] not in ("step", "epoch"):
-                    raise MisconfigurationException(
-                        f'The "interval" key in lr scheduler dict must be "step" or "epoch"'
-                        f' but is "{scheduler["interval"]}"'
-                    )
-                if is_manual_optimization:
+            if is_manual_optimization:
+                if isinstance(scheduler, dict):
                     invalid_keys = {"interval", "frequency", "reduce_on_plateau", "monitor", "strict"}
                     keys_to_warn = [k for k in scheduler.keys() if k in invalid_keys]
 
@@ -144,30 +131,49 @@ def configure_schedulers(
                             RuntimeWarning,
                         )
 
-                scheduler["reduce_on_plateau"] = isinstance(
-                    scheduler["scheduler"], optim.lr_scheduler.ReduceLROnPlateau
-                )
-                if scheduler["reduce_on_plateau"] and scheduler.get("monitor", None) is None:
-                    raise MisconfigurationException(
-                        "The lr scheduler dict must include a monitor when a `ReduceLROnPlateau` scheduler is used."
-                        ' For example: {"optimizer": optimizer, "lr_scheduler":'
-                        ' {"scheduler": scheduler, "monitor": "your_loss"}}'
+                    scheduler = {key: scheduler[key] for key in scheduler if key not in invalid_keys}
+                    lr_schedulers.append({**default_config, **scheduler})
+                else:
+                    lr_schedulers.append({**default_config, "scheduler": scheduler})
+            else:
+                if isinstance(scheduler, dict):
+                    # check provided keys
+                    extra_keys = [k for k in scheduler.keys() if k not in default_config.keys()]
+                    if extra_keys:
+                        rank_zero_warn(f"Found unsupported keys in the lr scheduler dict: {extra_keys}", RuntimeWarning)
+                    if "scheduler" not in scheduler:
+                        raise MisconfigurationException(
+                            'The lr scheduler dict must have the key "scheduler" with its item being an lr scheduler'
+                        )
+                    if "interval" in scheduler and scheduler["interval"] not in ("step", "epoch"):
+                        raise MisconfigurationException(
+                            'The "interval" key in lr scheduler dict must be "step" or "epoch"'
+                            f' but is "{scheduler["interval"]}"'
+                        )
+                    scheduler["reduce_on_plateau"] = isinstance(
+                        scheduler["scheduler"], optim.lr_scheduler.ReduceLROnPlateau
                     )
-                lr_schedulers.append({**default_config, **scheduler})
-            elif isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
-                if monitor is None:
-                    raise MisconfigurationException(
-                        "`configure_optimizers` must include a monitor when a `ReduceLROnPlateau` scheduler is used."
-                        " For example:"
-                        ' {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "metric_to_track"}'
+                    if scheduler["reduce_on_plateau"] and scheduler.get("monitor", None) is None:
+                        raise MisconfigurationException(
+                            "The lr scheduler dict must include a monitor when a `ReduceLROnPlateau` scheduler is used."
+                            ' For example: {"optimizer": optimizer, "lr_scheduler":'
+                            ' {"scheduler": scheduler, "monitor": "your_loss"}}'
+                        )
+                    lr_schedulers.append({**default_config, **scheduler})
+                elif isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
+                    if monitor is None:
+                        raise MisconfigurationException(
+                            "`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`"
+                            " scheduler is used. For example:"
+                            ' {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "metric_to_track"}'
+                        )
+                    lr_schedulers.append(
+                        {**default_config, "scheduler": scheduler, "reduce_on_plateau": True, "monitor": monitor}
                     )
-                lr_schedulers.append(
-                    {**default_config, "scheduler": scheduler, "reduce_on_plateau": True, "monitor": monitor}
-                )
-            elif isinstance(scheduler, optim.lr_scheduler._LRScheduler):
-                lr_schedulers.append({**default_config, "scheduler": scheduler})
-            else:
-                raise ValueError(f'The provided lr scheduler "{scheduler}" is invalid')
+                elif isinstance(scheduler, optim.lr_scheduler._LRScheduler):
+                    lr_schedulers.append({**default_config, "scheduler": scheduler})
+                else:
+                    raise ValueError(f'The provided lr scheduler "{scheduler}" is invalid')
         return lr_schedulers
 
 
diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py
@@ -957,6 +957,49 @@ def configure_optimizers(self):
     trainer.fit(model)
 
 
+@pytest.mark.parametrize("scheduler_as_dict", [True, False])
+def test_lr_schedulers_reduce_lr_on_plateau(tmpdir, scheduler_as_dict):
+    class TestModel(BoringModel):
+        def __init__(self, scheduler_as_dict):
+            super().__init__()
+            self.scheduler_as_dict = scheduler_as_dict
+            self.automatic_optimization = False
+
+        def training_step(self, batch, batch_idx):
+            return {"train_loss": torch.tensor([0.0])}
+
+        def training_epoch_end(self, outputs):
+            scheduler = self.lr_schedulers()
+
+            loss = torch.stack([x["train_loss"] for x in outputs]).mean()
+            scheduler.step(loss)
+
+        def configure_optimizers(self):
+            optimizer = torch.optim.SGD(self.parameters(), lr=0.1)
+
+            if self.scheduler_as_dict:
+                scheduler = {
+                    "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer),
+                    "monitor": "train_loss",
+                }
+            else:
+                scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
+
+            return [optimizer], [scheduler]
+
+    model = TestModel(scheduler_as_dict=scheduler_as_dict)
+
+    trainer = Trainer(
+        default_root_dir=tmpdir, max_epochs=1, limit_train_batches=1, limit_val_batches=1, limit_test_batches=1
+    )
+
+    if scheduler_as_dict:
+        with pytest.warns(RuntimeWarning, match="but the keys will be ignored"):
+            trainer.fit(model)
+    else:
+        trainer.fit(model)
+
+
 def test_lr_scheduler_step_not_called(tmpdir):
     """
     Test `lr_scheduler.step()` is not called in manual optimization.