Merge branch 'master' into docs_profiler_linking

Borda · web-flow · commit 2dcfaa6f748b · 2025-10-22T20:25:53.000+02:00
diff --git a/docs/source-pytorch/cli/lightning_cli_intermediate_2.rst b/docs/source-pytorch/cli/lightning_cli_intermediate_2.rst
@@ -201,9 +201,10 @@ If the scheduler you want needs other arguments, add them via the CLI (no need t
 
 .. code:: bash
 
-    python main.py fit --optimizer=Adam --lr_scheduler=ReduceLROnPlateau --lr_scheduler.monitor=epoch
+    python main.py fit --optimizer=Adam --lr_scheduler=ReduceLROnPlateau --lr_scheduler.monitor=train_loss
 
-Furthermore, any custom subclass of ``torch.optim.lr_scheduler.LRScheduler`` can be used as learning rate scheduler:
+(assuming you have a ``train_loss`` metric logged). Furthermore, any custom subclass of
+``torch.optim.lr_scheduler.LRScheduler`` can be used as learning rate scheduler:
 
 .. code:: python
 
@@ -212,7 +213,6 @@ Furthermore, any custom subclass of ``torch.optim.lr_scheduler.LRScheduler`` can
     from lightning.pytorch.cli import LightningCLI
     from lightning.pytorch.demos.boring_classes import DemoModel, BoringDataModule
 
-
     class LitLRScheduler(torch.optim.lr_scheduler.CosineAnnealingLR):
         def step(self):
             print("⚡", "using LitLRScheduler", "⚡")
diff --git a/docs/source-pytorch/common/precision_basic.rst b/docs/source-pytorch/common/precision_basic.rst
@@ -39,6 +39,14 @@ However, this setting can sometimes lead to unstable training.
 
     Trainer(precision="16-true")
 
+.. warning::
+
+    Float16 cannot represent values smaller than ~6e-5. Values like Adam's default ``eps=1e-8`` become zero, which can cause
+    NaN during training. Increase ``eps`` to 1e-4 or higher, and avoid extremely small values in your model weights and data.
+
+.. note::
+
+    BFloat16 (``"bf16-mixed"`` or ``"bf16-true"``) has better numerical stability with a wider dynamic range.
 
 ----
 
diff --git a/requirements/doctests.txt b/requirements/doctests.txt
@@ -1,2 +1,2 @@
 pytest ==8.4.2
-pytest-doctestplus ==1.4.0
+pytest-doctestplus ==1.5.0
diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt
@@ -5,7 +5,7 @@
 matplotlib>3.1, <3.11.0
 omegaconf >=2.2.3, <2.4.0
 hydra-core >=1.2.0, <1.4.0
-jsonargparse[signatures,jsonnet] >=4.39.0, <4.42.0
+jsonargparse[signatures,jsonnet] >=4.39.0, <4.43.0
 rich >=12.3.0, <14.2.0
 tensorboardX >=2.2, <2.7.0  # min version is set by torch.onnx missing attribute
 bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin"
diff --git a/requirements/pytorch/test.txt b/requirements/pytorch/test.txt
@@ -13,7 +13,7 @@ numpy >1.20.0, <1.27.0
 onnx >1.12.0, <1.20.0
 onnxruntime >=1.12.0, <1.24.0
 onnxscript >= 0.1.0, < 0.5.0
-psutil <7.1.1 # for `DeviceStatsMonitor`
+psutil <7.1.2 # for `DeviceStatsMonitor`
 pandas >2.0, <2.4.0  # needed in benchmarks
 fastapi  # for `ServableModuleValidator`  # not setting version as re-defined in App
 uvicorn  # for `ServableModuleValidator`  # not setting version as re-defined in App
diff --git a/src/lightning/pytorch/cli.py b/src/lightning/pytorch/cli.py
@@ -66,6 +66,13 @@
 
 
 class ReduceLROnPlateau(torch.optim.lr_scheduler.ReduceLROnPlateau):
+    """Custom ReduceLROnPlateau scheduler that extends PyTorch's ReduceLROnPlateau.
+
+    This class adds a `monitor` attribute to the standard PyTorch ReduceLROnPlateau to specify which metric should be
+    tracked for learning rate adjustment.
+
+    """
+
     def __init__(self, optimizer: Optimizer, monitor: str, *args: Any, **kwargs: Any) -> None:
         super().__init__(optimizer, *args, **kwargs)
         self.monitor = monitor
diff --git a/tests/tests_pytorch/utilities/test_model_summary.py b/tests/tests_pytorch/utilities/test_model_summary.py
@@ -319,13 +319,16 @@ def test_empty_model_size(max_depth):
 
 
 @pytest.mark.parametrize(
-    "accelerator",
+    ("accelerator", "precision"),
     [
-        pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)),
-        pytest.param("mps", marks=RunIf(mps=True)),
+        pytest.param("gpu", "16-true", marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("gpu", "32-true", marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("gpu", "64-true", marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("mps", "16-true", marks=RunIf(mps=True)),
+        pytest.param("mps", "32-true", marks=RunIf(mps=True)),
+        # Note: "64-true" with "mps" is skipped because MPS does not support float64
     ],
 )
-@pytest.mark.parametrize("precision", ["16-true", "32-true", "64-true"])
 def test_model_size_precision(tmp_path, accelerator, precision):
     """Test model size for different precision types."""
     model = PreCalculatedModel(precision=int(precision.split("-")[0]))

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`pytest ==8.4.2`
`2`		`-pytest-doctestplus ==1.4.0`
	`2`	`+pytest-doctestplus ==1.5.0`