Skip to content

Commit a404940

Browse files
Merge branch 'master' into fix/20972-mlflow-logger-windows-uri-from-master
2 parents d248b61 + 5a2b678 commit a404940

File tree

11 files changed

+53
-30
lines changed

11 files changed

+53
-30
lines changed

.azure/gpu-benchmarks.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
variables:
4747
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
4848
container:
49-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0"
49+
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
5050
options: "--gpus=all --shm-size=32g"
5151
strategy:
5252
matrix:

.azure/gpu-tests-fabric.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ jobs:
5757
strategy:
5858
matrix:
5959
"Fabric | oldest":
60-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
60+
image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
6161
PACKAGE_NAME: "fabric"
6262
"Fabric | latest":
63-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
63+
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
6464
PACKAGE_NAME: "fabric"
6565
#"Fabric | future":
66-
# image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
66+
# image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
6767
# PACKAGE_NAME: "fabric"
6868
"Lightning | latest":
69-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
69+
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
7070
PACKAGE_NAME: "lightning"
7171
workspace:
7272
clean: all

.azure/gpu-tests-pytorch.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,16 @@ jobs:
5050
strategy:
5151
matrix:
5252
"PyTorch | oldest":
53-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
53+
image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
5454
PACKAGE_NAME: "pytorch"
5555
"PyTorch | latest":
56-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
56+
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
5757
PACKAGE_NAME: "pytorch"
5858
#"PyTorch | future":
59-
# image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
59+
# image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
6060
# PACKAGE_NAME: "pytorch"
6161
"Lightning | latest":
62-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
62+
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
6363
PACKAGE_NAME: "lightning"
6464
pool: lit-rtx-3090
6565
variables:

.lightning/workflows/fabric.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ parametrize:
1010
matrix: {}
1111
include:
1212
# note that this is setting also all oldest requirements which is linked to Torch == 2.0
13-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
13+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
1414
PACKAGE_NAME: "fabric"
15-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
15+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
1616
PACKAGE_NAME: "fabric"
17-
# - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
17+
# - image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
1818
# PACKAGE_NAME: "fabric"
19-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
19+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
2020
PACKAGE_NAME: "lightning"
2121
exclude: []
2222

.lightning/workflows/pytorch.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ parametrize:
1010
matrix: {}
1111
include:
1212
# note that this is setting also all oldest requirements which is linked to Torch == 2.0
13-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
13+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
1414
PACKAGE_NAME: "pytorch"
15-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
15+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
1616
PACKAGE_NAME: "pytorch"
17-
# - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
17+
# - image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
1818
# PACKAGE_NAME: "pytorch"
19-
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
19+
- image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
2020
PACKAGE_NAME: "lightning"
2121
exclude: []
2222

dockers/README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@ git clone https://github.com/Lightning-AI/lightning.git
1111
docker image build -t pytorch-lightning:latest -f dockers/base-cuda/Dockerfile .
1212

1313
# build with specific arguments
14-
docker image build -t pytorch-lightning:base-cuda-py3.9-torch1.13-cuda11.7.1 -f dockers/base-cuda/Dockerfile --build-arg PYTHON_VERSION=3.9 --build-arg PYTORCH_VERSION=1.13 --build-arg CUDA_VERSION=11.7.1 .
14+
docker image build \
15+
-t pytorch-lightning:base-cuda12.6.3-py3.10-torch2.8 \
16+
-f dockers/base-cuda/Dockerfile \
17+
--build-arg PYTHON_VERSION=3.10 \
18+
--build-arg PYTORCH_VERSION=2.8 \
19+
--build-arg CUDA_VERSION=12.6.3 \
20+
.
1521
```
1622

1723
To run your docker use
@@ -45,18 +51,18 @@ sudo systemctl restart docker
4551
and later run the docker image with `--gpus all`. For example,
4652

4753
```
48-
docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1
54+
docker run --rm -it --gpus all pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.10-torch2.8
4955
```
5056

5157
## Run Jupyter server
5258

5359
1. Build the docker image:
5460
```bash
55-
docker image build -t pytorch-lightning:v1.6.5 -f dockers/nvidia/Dockerfile --build-arg LIGHTNING_VERSION=1.6.5 .
61+
docker image build -t pytorch-lightning:v2.5.1 -f dockers/nvidia/Dockerfile --build-arg LIGHTNING_VERSION=2.5.1 .
5662
```
5763
1. start the server and map ports:
5864
```bash
59-
docker run --rm -it --gpus=all -p 8888:8888 pytorch-lightning:v1.6.5
65+
docker run --rm -it --gpus=all -p 8888:8888 pytorch-lightning:v2.5.1
6066
```
6167
1. Connect in local browser:
6268
- copy the generated path e.g. `http://hostname:8888/?token=0719fa7e1729778b0cec363541a608d5003e26d4910983c6`

dockers/release/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ ARG PYTHON_VERSION=3.10
1616
ARG PYTORCH_VERSION=2.8
1717
ARG CUDA_VERSION=12.6.3
1818

19-
FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}-cuda${CUDA_VERSION}
19+
FROM pytorchlightning/pytorch_lightning:base-cuda${CUDA_VERSION}-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
2020

2121
LABEL maintainer="Lightning-AI <https://github.com/Lightning-AI>"
2222

docs/source-pytorch/accelerators/gpu_intermediate.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ variables:
5959
MASTER_ADDR=localhost MASTER_PORT=random() WORLD_SIZE=3 NODE_RANK=0 LOCAL_RANK=1 python my_file.py --accelerator 'gpu' --devices 3 --etc
6060
MASTER_ADDR=localhost MASTER_PORT=random() WORLD_SIZE=3 NODE_RANK=0 LOCAL_RANK=2 python my_file.py --accelerator 'gpu' --devices 3 --etc
6161
62-
Using DDP this way has a few disadvantages over ``torch.multiprocessing.spawn()``:
62+
Using DDP this way has a few advantages over ``torch.multiprocessing.spawn()``:
6363

6464
1. All processes (including the main process) participate in training and have the updated state of the model and Trainer state.
6565
2. No multiprocessing pickle errors

docs/source-pytorch/model/manual_optimization.rst

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,6 @@ Here is an example training a simple GAN with multiple optimizers using manual o
204204
d_opt = torch.optim.Adam(self.D.parameters(), lr=1e-5)
205205
return g_opt, d_opt
206206

207-
208207
Learning Rate Scheduling
209208
========================
210209

@@ -230,6 +229,10 @@ Here is an example calling ``lr_scheduler.step()`` every step.
230229
super().__init__()
231230
self.automatic_optimization = False
232231

232+
def configure_optimizers(self):
233+
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
234+
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
235+
return [optimizer], [scheduler]
233236

234237
def training_step(self, batch, batch_idx):
235238
# do forward, backward, and optimization
@@ -252,6 +255,11 @@ If you want to call ``lr_scheduler.step()`` every ``N`` steps/epochs, do the fol
252255
super().__init__()
253256
self.automatic_optimization = False
254257

258+
def configure_optimizers(self):
259+
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
260+
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
261+
return [optimizer], [scheduler]
262+
255263

256264
def training_step(self, batch, batch_idx):
257265
# do forward, backward, and optimization
@@ -275,13 +283,22 @@ If you want to call schedulers that require a metric value after each epoch, con
275283
super().__init__()
276284
self.automatic_optimization = False
277285

286+
def configure_optimizers(self):
287+
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
288+
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
289+
return [optimizer], [scheduler]
278290

279291
def on_train_epoch_end(self):
280292
sch = self.lr_schedulers()
281293

282-
# If the selected scheduler is a ReduceLROnPlateau scheduler.
283-
if isinstance(sch, torch.optim.lr_scheduler.ReduceLROnPlateau):
284-
sch.step(self.trainer.callback_metrics["loss"])
294+
sch.step(self.trainer.callback_metrics["loss"])
295+
296+
.. note::
297+
:meth:`~lightning.pytorch.core.LightningModule.configure_optimizers` supports 6 different ways to define and return
298+
optimizers and learning rate schedulers. Regardless of the way you define them, `self.optimizers()` will always return
299+
either a single optimizer if you defined a single optimizer, or a list of optimizers if you defined multiple
300+
optimizers. The same applies to the `self.lr_schedulers()` method, which will return a single scheduler
301+
if you defined a single scheduler, or a list of schedulers if you defined multiple schedulers
285302

286303

287304
Optimizer Steps at Different Frequencies

tests/legacy/back-compatible-versions.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,4 @@
106106
2.3.3
107107
2.5.1
108108
2.5.2
109+
2.5.3

0 commit comments

Comments
 (0)