Skip to content

Commit 2dcfaa6

Browse files
authored
Merge branch 'master' into docs_profiler_linking
2 parents 057e9b6 + f58a176 commit 2dcfaa6

File tree

7 files changed

+28
-10
lines changed

7 files changed

+28
-10
lines changed

docs/source-pytorch/cli/lightning_cli_intermediate_2.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,10 @@ If the scheduler you want needs other arguments, add them via the CLI (no need t
201201

202202
.. code:: bash
203203
204-
python main.py fit --optimizer=Adam --lr_scheduler=ReduceLROnPlateau --lr_scheduler.monitor=epoch
204+
python main.py fit --optimizer=Adam --lr_scheduler=ReduceLROnPlateau --lr_scheduler.monitor=train_loss
205205
206-
Furthermore, any custom subclass of ``torch.optim.lr_scheduler.LRScheduler`` can be used as learning rate scheduler:
206+
(assuming you have a ``train_loss`` metric logged). Furthermore, any custom subclass of
207+
``torch.optim.lr_scheduler.LRScheduler`` can be used as learning rate scheduler:
207208

208209
.. code:: python
209210
@@ -212,7 +213,6 @@ Furthermore, any custom subclass of ``torch.optim.lr_scheduler.LRScheduler`` can
212213
from lightning.pytorch.cli import LightningCLI
213214
from lightning.pytorch.demos.boring_classes import DemoModel, BoringDataModule
214215
215-
216216
class LitLRScheduler(torch.optim.lr_scheduler.CosineAnnealingLR):
217217
def step(self):
218218
print("", "using LitLRScheduler", "")

docs/source-pytorch/common/precision_basic.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ However, this setting can sometimes lead to unstable training.
3939
4040
Trainer(precision="16-true")
4141
42+
.. warning::
43+
44+
Float16 cannot represent values smaller than ~6e-5. Values like Adam's default ``eps=1e-8`` become zero, which can cause
45+
NaN during training. Increase ``eps`` to 1e-4 or higher, and avoid extremely small values in your model weights and data.
46+
47+
.. note::
48+
49+
BFloat16 (``"bf16-mixed"`` or ``"bf16-true"``) has better numerical stability with a wider dynamic range.
4250

4351
----
4452

requirements/doctests.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
pytest ==8.4.2
2-
pytest-doctestplus ==1.4.0
2+
pytest-doctestplus ==1.5.0

requirements/pytorch/extra.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
matplotlib>3.1, <3.11.0
66
omegaconf >=2.2.3, <2.4.0
77
hydra-core >=1.2.0, <1.4.0
8-
jsonargparse[signatures,jsonnet] >=4.39.0, <4.42.0
8+
jsonargparse[signatures,jsonnet] >=4.39.0, <4.43.0
99
rich >=12.3.0, <14.2.0
1010
tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute
1111
bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin"

requirements/pytorch/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ numpy >1.20.0, <1.27.0
1313
onnx >1.12.0, <1.20.0
1414
onnxruntime >=1.12.0, <1.24.0
1515
onnxscript >= 0.1.0, < 0.5.0
16-
psutil <7.1.1 # for `DeviceStatsMonitor`
16+
psutil <7.1.2 # for `DeviceStatsMonitor`
1717
pandas >2.0, <2.4.0 # needed in benchmarks
1818
fastapi # for `ServableModuleValidator` # not setting version as re-defined in App
1919
uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App

src/lightning/pytorch/cli.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@
6666

6767

6868
class ReduceLROnPlateau(torch.optim.lr_scheduler.ReduceLROnPlateau):
69+
"""Custom ReduceLROnPlateau scheduler that extends PyTorch's ReduceLROnPlateau.
70+
71+
This class adds a `monitor` attribute to the standard PyTorch ReduceLROnPlateau to specify which metric should be
72+
tracked for learning rate adjustment.
73+
74+
"""
75+
6976
def __init__(self, optimizer: Optimizer, monitor: str, *args: Any, **kwargs: Any) -> None:
7077
super().__init__(optimizer, *args, **kwargs)
7178
self.monitor = monitor

tests/tests_pytorch/utilities/test_model_summary.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,13 +319,16 @@ def test_empty_model_size(max_depth):
319319

320320

321321
@pytest.mark.parametrize(
322-
"accelerator",
322+
("accelerator", "precision"),
323323
[
324-
pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)),
325-
pytest.param("mps", marks=RunIf(mps=True)),
324+
pytest.param("gpu", "16-true", marks=RunIf(min_cuda_gpus=1)),
325+
pytest.param("gpu", "32-true", marks=RunIf(min_cuda_gpus=1)),
326+
pytest.param("gpu", "64-true", marks=RunIf(min_cuda_gpus=1)),
327+
pytest.param("mps", "16-true", marks=RunIf(mps=True)),
328+
pytest.param("mps", "32-true", marks=RunIf(mps=True)),
329+
# Note: "64-true" with "mps" is skipped because MPS does not support float64
326330
],
327331
)
328-
@pytest.mark.parametrize("precision", ["16-true", "32-true", "64-true"])
329332
def test_model_size_precision(tmp_path, accelerator, precision):
330333
"""Test model size for different precision types."""
331334
model = PreCalculatedModel(precision=int(precision.split("-")[0]))

0 commit comments

Comments
 (0)