Skip to content

Commit e2eb955

Browse files
authored
Merge branch 'master' into issue-20311-cli-save-hyper-instatiation-links
2 parents ce96c0a + d195d2b commit e2eb955

File tree

12 files changed

+128
-8
lines changed

12 files changed

+128
-8
lines changed

docs/source-pytorch/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,7 @@ def _load_py_module(name: str, location: str) -> ModuleType:
487487
("py:meth", "setup"),
488488
("py:meth", "test_step"),
489489
("py:meth", "toggle_optimizer"),
490+
("py:meth", "toggled_optimizer"),
490491
("py:class", "torch.ScriptModule"),
491492
("py:class", "torch.distributed.fsdp.fully_sharded_data_parallel.CPUOffload"),
492493
("py:class", "torch.distributed.fsdp.fully_sharded_data_parallel.MixedPrecision"),

docs/source-pytorch/model/manual_optimization.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ To manually optimize, do the following:
1717
* ``optimizer.zero_grad()`` to clear the gradients from the previous training step
1818
* ``self.manual_backward(loss)`` instead of ``loss.backward()``
1919
* ``optimizer.step()`` to update your model parameters
20-
* ``self.toggle_optimizer()`` and ``self.untoggle_optimizer()`` if needed
20+
* ``self.toggle_optimizer()`` and ``self.untoggle_optimizer()``, or ``self.toggled_optimizer()`` if needed
2121

2222
Here is a minimal example of manual optimization.
2323

requirements/doctests.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
pytest ==8.3.5
1+
pytest ==8.4.0
22
pytest-doctestplus ==1.4.0

requirements/fabric/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
coverage ==7.8.2
22
numpy >=1.17.2, <1.27.0
3-
pytest ==8.3.5
3+
pytest ==8.4.0
44
pytest-cov ==6.1.1
55
pytest-timeout ==2.4.0
66
pytest-rerunfailures ==15.1

requirements/pytorch/test.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
coverage ==7.8.2
2-
pytest ==8.3.5
2+
pytest ==8.4.0
33
pytest-cov ==6.1.1
44
pytest-timeout ==2.4.0
55
pytest-rerunfailures ==15.1
@@ -12,7 +12,7 @@ numpy >=1.17.2, <1.27.0
1212
onnx >=1.12.0, <1.19.0
1313
onnxruntime >=1.12.0, <1.21.0
1414
psutil <7.0.1 # for `DeviceStatsMonitor`
15-
pandas >1.0, <2.3.0 # needed in benchmarks
15+
pandas >2.0, <2.4.0 # needed in benchmarks
1616
fastapi # for `ServableModuleValidator` # not setting version as re-defined in App
1717
uvicorn # for `ServableModuleValidator` # not setting version as re-defined in App
1818

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ def _set_manifest_path(manifest_dir: str, aggregate: bool = False, mapping: Mapp
110110
assert os.path.exists(manifest_path)
111111
# avoid error: setup script specifies an absolute path
112112
manifest_path = os.path.relpath(manifest_path, _PATH_ROOT)
113-
logging.info("Set manifest path to", manifest_path)
113+
# Use lazy logging formatting
114+
logging.info("Set manifest path to %s", manifest_path)
114115
setuptools.command.egg_info.manifest_maker.template = manifest_path
115116
yield
116117
# cleanup

src/lightning/pytorch/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
1212
- Add enable_autolog_hparams argument to Trainer ([#20593](https://github.com/Lightning-AI/pytorch-lightning/pull/20593))
1313

1414

15+
- Add `toggled_optimizer(optimizer)` method to the LightningModule, which is a context manager version of `toggle_optimize` and `untoggle_optimizer` ([#20771](https://github.com/Lightning-AI/pytorch-lightning/pull/20771))
16+
17+
1518
- For cross-device local checkpoints, instruct users to install `fsspec>=2025.5.0` if unavailable ([#20780](https://github.com/Lightning-AI/pytorch-lightning/pull/20780))
1619

1720

src/lightning/pytorch/callbacks/progress/progress_bar.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def total_train_batches(self) -> Union[int, float]:
8585
dataloader is of infinite size.
8686
8787
"""
88+
if self.trainer.max_epochs == -1 and self.trainer.max_steps is not None and self.trainer.max_steps > 0:
89+
remaining_steps = self.trainer.max_steps - self.trainer.global_step
90+
return min(self.trainer.num_training_batches, remaining_steps)
8891
return self.trainer.num_training_batches
8992

9093
@property

src/lightning/pytorch/core/module.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,32 @@ def untoggle_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer]) ->
11411141
# save memory
11421142
self._param_requires_grad_state = {}
11431143

1144+
@contextmanager
1145+
def toggled_optimizer(self, optimizer: Union[Optimizer, LightningOptimizer]) -> Generator:
1146+
"""Makes sure only the gradients of the current optimizer's parameters are calculated in the training step to
1147+
prevent dangling gradients in multiple-optimizer setup. Combines :meth:`toggle_optimizer` and
1148+
:meth:`untoggle_optimizer` into context manager.
1149+
1150+
Args:
1151+
optimizer: The optimizer to toggle.
1152+
1153+
Example::
1154+
1155+
def training_step(...):
1156+
opt = self.optimizers()
1157+
with self.toggled_optimizer(opt):
1158+
loss = ...
1159+
opt.zero_grad()
1160+
self.manual_backward(loss)
1161+
opt.step()
1162+
1163+
"""
1164+
self.toggle_optimizer(optimizer)
1165+
try:
1166+
yield
1167+
finally:
1168+
self.untoggle_optimizer(optimizer)
1169+
11441170
def clip_gradients(
11451171
self,
11461172
optimizer: Optimizer,

tests/tests_pytorch/core/test_lightning_module.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,22 @@ def test_1_optimizer_toggle_model():
119119
assert not model._param_requires_grad_state
120120

121121

122+
def test_optimizer_toggle_model_context_manager():
123+
"""Test toggle_model runs when only one optimizer is used."""
124+
model = BoringModel()
125+
trainer = Mock()
126+
model.trainer = trainer
127+
params = model.parameters()
128+
optimizer = torch.optim.SGD(params, lr=0.1)
129+
trainer.optimizers = [optimizer]
130+
131+
assert not model._param_requires_grad_state
132+
# toggle optimizer was failing with a single optimizer
133+
with model.toggled_optimizer(optimizer):
134+
assert model._param_requires_grad_state
135+
assert not model._param_requires_grad_state
136+
137+
122138
def test_toggle_untoggle_2_optimizers_no_shared_parameters(tmp_path):
123139
class TestModel(BoringModel):
124140
def __init__(self):

0 commit comments

Comments
 (0)