update docs (#9903)

rohitgr7 · web-flow · commit 7eff00317d53 · 2021-10-12T15:37:08.000+05:30
diff --git a/docs/source/advanced/multi_gpu.rst b/docs/source/advanced/multi_gpu.rst
@@ -611,28 +611,34 @@ Let's say you have a batch size of 7 in your dataloader.
         def train_dataloader(self):
             return Dataset(..., batch_size=7)
 
-In DDP or Horovod your effective batch size will be 7 * gpus * num_nodes.
+In DDP, DDP_SPAWN, Deepspeed, DDP_SHARDED, or Horovod your effective batch size will be 7 * gpus * num_nodes.
 
 .. code-block:: python
 
     # effective batch size = 7 * 8
     Trainer(gpus=8, accelerator="ddp")
+    Trainer(gpus=8, accelerator="ddp_spawn")
+    Trainer(gpus=8, accelerator="ddp_sharded")
     Trainer(gpus=8, accelerator="horovod")
 
     # effective batch size = 7 * 8 * 10
     Trainer(gpus=8, num_nodes=10, accelerator="ddp")
+    Trainer(gpus=8, num_nodes=10, accelerator="ddp_spawn")
+    Trainer(gpus=8, num_nodes=10, accelerator="ddp_sharded")
     Trainer(gpus=8, num_nodes=10, accelerator="horovod")
 
-In DDP2, your effective batch size will be 7 * num_nodes.
+In DDP2 or DP, your effective batch size will be 7 * num_nodes.
 The reason is that the full batch is visible to all GPUs on the node when using DDP2.
 
 .. code-block:: python
 
     # effective batch size = 7
     Trainer(gpus=8, accelerator="ddp2")
+    Trainer(gpus=8, accelerator="dp")
 
     # effective batch size = 7 * 10
     Trainer(gpus=8, num_nodes=10, accelerator="ddp2")
+    Trainer(gpus=8, accelerator="dp")
 
 
 .. note:: Huge batch sizes are actually really bad for convergence. Check out:
diff --git a/pytorch_lightning/callbacks/base.py b/pytorch_lightning/callbacks/base.py
@@ -327,5 +327,5 @@ def on_before_optimizer_step(
         pass
 
     def on_before_zero_grad(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", optimizer: Optimizer) -> None:
-        """Called after ``optimizer.step()`` and before ``optimizer.zero_grad()``."""
+        """Called before ``optimizer.zero_grad()``."""
         pass
diff --git a/pytorch_lightning/callbacks/model_checkpoint.py b/pytorch_lightning/callbacks/model_checkpoint.py
@@ -130,7 +130,6 @@ class ModelCheckpoint(Callback):
 
             Use ``every_n_epochs`` instead.
 
-
     Note:
         For extra customization, ModelCheckpoint includes the following attributes:
 
diff --git a/pytorch_lightning/loggers/wandb.py b/pytorch_lightning/loggers/wandb.py
@@ -282,7 +282,7 @@ def __init__(
             rank_zero_warn(
                 f"Providing log_model={log_model} requires wandb version >= 0.10.22"
                 " for logging associated model metadata.\n"
-                "Hint: Upgrade with `pip install --ugrade wandb`."
+                "Hint: Upgrade with `pip install --upgrade wandb`."
             )
 
         super().__init__()

Original file line number	Diff line number	Diff line change
`@@ -282,7 +282,7 @@ def __init__(`
`282`	`282`	`rank_zero_warn(`
`283`	`283`	`f"Providing log_model={log_model} requires wandb version >= 0.10.22"`
`284`	`284`	`" for logging associated model metadata.\n"`
`285`		- "Hint: Upgrade with `pip install --ugrade wandb`."
	`285`	+ "Hint: Upgrade with `pip install --upgrade wandb`."
`286`	`286`	`)`
`287`	`287`
`288`	`288`	`super().__init__()`