Clarification of docs on schedulers (#21061)

SkafteNicki · web-flow · commit 08292b16588e · 2025-08-13T22:58:15.000+02:00
diff --git a/docs/source-pytorch/accelerators/gpu_intermediate.rst b/docs/source-pytorch/accelerators/gpu_intermediate.rst
@@ -59,7 +59,7 @@ variables:
     MASTER_ADDR=localhost MASTER_PORT=random() WORLD_SIZE=3 NODE_RANK=0 LOCAL_RANK=1 python my_file.py --accelerator 'gpu' --devices 3 --etc
     MASTER_ADDR=localhost MASTER_PORT=random() WORLD_SIZE=3 NODE_RANK=0 LOCAL_RANK=2 python my_file.py --accelerator 'gpu' --devices 3 --etc
 
-Using DDP this way has a few disadvantages over ``torch.multiprocessing.spawn()``:
+Using DDP this way has a few advantages over ``torch.multiprocessing.spawn()``:
 
 1. All processes (including the main process) participate in training and have the updated state of the model and Trainer state.
 2. No multiprocessing pickle errors
diff --git a/docs/source-pytorch/model/manual_optimization.rst b/docs/source-pytorch/model/manual_optimization.rst
@@ -204,7 +204,6 @@ Here is an example training a simple GAN with multiple optimizers using manual o
             d_opt = torch.optim.Adam(self.D.parameters(), lr=1e-5)
             return g_opt, d_opt
 
-
 Learning Rate Scheduling
 ========================
 
@@ -230,6 +229,10 @@ Here is an example calling ``lr_scheduler.step()`` every step.
         super().__init__()
         self.automatic_optimization = False
 
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
+        return [optimizer], [scheduler]
 
     def training_step(self, batch, batch_idx):
         # do forward, backward, and optimization
@@ -252,6 +255,11 @@ If you want to call ``lr_scheduler.step()`` every ``N`` steps/epochs, do the fol
         super().__init__()
         self.automatic_optimization = False
 
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
+        return [optimizer], [scheduler]
+
 
     def training_step(self, batch, batch_idx):
         # do forward, backward, and optimization
@@ -275,13 +283,22 @@ If you want to call schedulers that require a metric value after each epoch, con
         super().__init__()
         self.automatic_optimization = False
 
+    def configure_optimizers(self):
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
+        return [optimizer], [scheduler]
 
     def on_train_epoch_end(self):
         sch = self.lr_schedulers()
 
-        # If the selected scheduler is a ReduceLROnPlateau scheduler.
-        if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau):
-            sch.step(self.trainer.callback_metrics["loss"])
+        sch.step(self.trainer.callback_metrics["loss"])
+
+.. note::
+    :meth:`~lightning.pytorch.core.LightningModule.configure_optimizers` supports 6 different ways to define and return
+    optimizers and learning rate schedulers. Regardless of the way you define them, `self.optimizers()` will always return
+    either a single optimizer if you defined a single optimizer, or a list of optimizers if you defined multiple
+    optimizers. The same applies to the `self.lr_schedulers()` method, which will return a single scheduler
+    if you defined a single scheduler, or a list of schedulers if you defined multiple schedulers
 
 
 Optimizer Steps at Different Frequencies