Skip to content

Commit b31b81b

Browse files
authored
Merge branch 'master' into lr_finder/spacing_issue
2 parents 0c4451c + b7ec502 commit b31b81b

File tree

14 files changed

+321
-24
lines changed

14 files changed

+321
-24
lines changed

.azure/gpu-benchmarks.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ jobs:
9696
- bash: python -m pytest parity_$(PACKAGE_NAME) -v --durations=0
9797
env:
9898
PL_RUNNING_BENCHMARKS: "1"
99-
PL_RUN_CUDA_TESTS: "1"
99+
RUN_ONLY_CUDA_TESTS: "1"
100100
workingDirectory: tests/
101101
displayName: "Testing: benchmarks"
102102

@@ -105,7 +105,7 @@ jobs:
105105
# without succeeded this could run even if the job has already failed
106106
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
107107
env:
108-
PL_RUN_CUDA_TESTS: "1"
108+
RUN_ONLY_CUDA_TESTS: "1"
109109
PL_RUN_STANDALONE_TESTS: "1"
110110
displayName: "Testing: fabric standalone tasks"
111111
timeoutInMinutes: "10"

.azure/gpu-tests-fabric.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
4949
FREEZE_REQUIREMENTS: "1"
5050
PIP_CACHE_DIR: "/var/tmp/pip"
51-
PL_RUN_CUDA_TESTS: "1"
51+
RUN_ONLY_CUDA_TESTS: "1"
5252
container:
5353
image: $(image)
5454
# default shm size is 64m. Increase it to avoid:
@@ -78,8 +78,6 @@ jobs:
7878
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${cuda_ver}/torch_stable.html"
7979
scope=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="lightning_fabric").get(n, n))')
8080
echo "##vso[task.setvariable variable=COVERAGE_SOURCE]$scope"
81-
python_ver=$(python -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')")
82-
echo "##vso[task.setvariable variable=PYTHON_VERSION_MM]$python_ver"
8381
displayName: "set env. vars"
8482
- bash: |
8583
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}"

.azure/gpu-tests-pytorch.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
6767
FREEZE_REQUIREMENTS: "1"
6868
PIP_CACHE_DIR: "/var/tmp/pip"
69-
PL_RUN_CUDA_TESTS: "1"
69+
RUN_ONLY_CUDA_TESTS: "1"
7070
container:
7171
image: $(image)
7272
# default shm size is 64m. Increase it to avoid:
@@ -82,8 +82,6 @@ jobs:
8282
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/cu${cuda_ver}/torch_stable.html"
8383
scope=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(pytorch="pytorch_lightning").get(n, n))')
8484
echo "##vso[task.setvariable variable=COVERAGE_SOURCE]$scope"
85-
python_ver=$(python -c "import sys; print(f'{sys.version_info.major}{sys.version_info.minor}')")
86-
echo "##vso[task.setvariable variable=PYTHON_VERSION_MM]$python_ver"
8785
displayName: "set env. vars"
8886
- bash: |
8987
echo "##vso[task.setvariable variable=TORCH_URL]https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}"

.lightning/workflows/fabric.yml

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
trigger:
2+
push:
3+
branches: ["master"]
4+
pull_request:
5+
branches: ["master"]
6+
7+
timeout: "75" # minutes
8+
machine: "L4_X_2"
9+
parametrize:
10+
matrix: {}
11+
include:
12+
# note that this is setting also all oldest requirements which is linked to Torch == 2.0
13+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
14+
PACKAGE_NAME: "fabric"
15+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
16+
PACKAGE_NAME: "fabric"
17+
# - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
18+
# PACKAGE_NAME: "fabric"
19+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
20+
PACKAGE_NAME: "lightning"
21+
exclude: []
22+
23+
env:
24+
FREEZE_REQUIREMENTS: "1"
25+
RUN_ONLY_CUDA_TESTS: "1"
26+
27+
run: |
28+
whereis nvidia
29+
nvidia-smi
30+
python --version
31+
pip --version
32+
pip install -q fire wget packaging
33+
set -ex
34+
35+
CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
36+
echo "Using CUDA version: ${CUDA_VERSION}"
37+
CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
38+
CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
39+
TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
40+
echo "Torch URL: ${TORCH_URL}"
41+
COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="lightning_fabric").get(n, n))')
42+
echo "collecting coverage for: ${COVERAGE_SOURCE}"
43+
44+
if [ "${TORCH_VER}" == "2.1" ]; then
45+
echo "Set oldest versions"
46+
python .actions/assistant.py replace_oldest_ver
47+
pip install "cython<3.0" wheel # for compatibility
48+
fi
49+
50+
echo "Adjust torch versions in requirements files"
51+
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
52+
pip install -q wget packaging
53+
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
54+
for fpath in `ls requirements/**/*.txt`; do \
55+
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
56+
done
57+
58+
if [ "${PACKAGE_NAME}" == "fabric" ]; then
59+
echo "Replaced PL imports"
60+
pip install -U -q -r .actions/requirements.txt
61+
python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_fabric" \
62+
--source_import="lightning.fabric" \
63+
--target_import="lightning_fabric"
64+
python .actions/assistant.py copy_replace_imports --source_dir="./examples/fabric" \
65+
--source_import="lightning.fabric" \
66+
--target_import="lightning_fabric"
67+
fi
68+
69+
extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
70+
pip install -e ".[${extra}dev]" -U --upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
71+
72+
python requirements/collect_env_details.py
73+
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
74+
python requirements/pytorch/check-avail-extras.py
75+
python -c "import bitsandbytes"
76+
77+
echo "Testing: Fabric doctests"
78+
if [ "${PACKAGE_NAME}" == "fabric" ]; then
79+
cd src/
80+
python -m pytest lightning_fabric
81+
cd ..
82+
fi
83+
84+
cd tests/
85+
echo "Testing: fabric standard"
86+
python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_fabric/ -v --durations=50
87+
88+
echo "Testing: fabric standalone"
89+
export PL_RUN_STANDALONE_TESTS=1
90+
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
91+
bash ./run_standalone_tests.sh "tests_fabric"
92+
93+
# echo "Reporting coverage" # todo
94+
# python -m coverage report
95+
# python -m coverage xml
96+
# python -m coverage html
97+
98+
# TODO: enable coverage
99+
# # https://docs.codecov.com/docs/codecov-uploader
100+
# curl -Os https://uploader.codecov.io/latest/linux/codecov
101+
# chmod +x codecov
102+
# ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
103+
# --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
104+
# ls -l
105+
cd ..
106+
107+
echo "Testing: fabric examples"
108+
cd examples/
109+
bash run_fabric_examples.sh --accelerator=cuda --devices=1
110+
bash run_fabric_examples.sh --accelerator=cuda --devices=2 --strategy ddp

.lightning/workflows/pytorch.yml

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
trigger:
2+
push:
3+
branches: ["master"]
4+
pull_request:
5+
branches: ["master"]
6+
7+
timeout: "75" # minutes
8+
machine: "L4_X_2"
9+
parametrize:
10+
matrix: {}
11+
include:
12+
# note that this is setting also all oldest requirements which is linked to Torch == 2.0
13+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
14+
PACKAGE_NAME: "pytorch"
15+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
16+
PACKAGE_NAME: "pytorch"
17+
# - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
18+
# PACKAGE_NAME: "pytorch"
19+
- image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
20+
PACKAGE_NAME: "lightning"
21+
exclude: []
22+
23+
env:
24+
FREEZE_REQUIREMENTS: "1"
25+
RUN_ONLY_CUDA_TESTS: "1"
26+
27+
run: |
28+
whereis nvidia
29+
nvidia-smi
30+
python --version
31+
pip --version
32+
pip install -q fire wget packaging
33+
set -ex
34+
35+
CUDA_VERSION="${image##*cuda}" # Remove everything up to and including "cuda"
36+
echo "Using CUDA version: ${CUDA_VERSION}"
37+
CUDA_VERSION_M_M="${cuda_version%.*}" # Get major.minor by removing the last dot and everything after
38+
CUDA_VERSION_MM="${CUDA_VERSION_M_M//'.'/''}"
39+
TORCH_URL="https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html"
40+
echo "Torch URL: ${TORCH_URL}"
41+
COVERAGE_SOURCE=$(python -c 'n = "$(PACKAGE_NAME)" ; print(dict(fabric="pytorch_lightning").get(n, n))')
42+
echo "collecting coverage for: ${COVERAGE_SOURCE}"
43+
44+
if [ "${TORCH_VER}" == "2.1" ]; then
45+
recho "Set oldest versions"
46+
python .actions/assistant.py replace_oldest_ver
47+
pip install "cython<3.0" wheel # for compatibility
48+
fi
49+
50+
echo "Adjust torch versions in requirements files"
51+
PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
52+
pip install -q wget packaging
53+
python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/adjust-torch-versions.py
54+
for fpath in `ls requirements/**/*.txt`; do \
55+
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
56+
done
57+
58+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
59+
echo "Adjust PL imports"
60+
pip install -U -q -r .actions/requirements.txt
61+
python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_pytorch" \
62+
--source_import="lightning.fabric,lightning.pytorch" \
63+
--target_import="lightning_fabric,pytorch_lightning"
64+
python .actions/assistant.py copy_replace_imports --source_dir="./examples/pytorch/basics" \
65+
--source_import="lightning.fabric,lightning.pytorch" \
66+
--target_import="lightning_fabric,pytorch_lightning"
67+
fi
68+
69+
extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))")
70+
pip install -e ".[${extra}dev]" -U --upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
71+
72+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
73+
echo "uninstall lightning to have just single package"
74+
pip uninstall -y lightning
75+
elif [ "${PACKAGE_NAME}" == "lightning" ]; then
76+
echo "uninstall PL to have just single package"
77+
pip uninstall -y pytorch-lightning
78+
fi
79+
80+
python requirements/collect_env_details.py
81+
python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
82+
python requirements/pytorch/check-avail-extras.py
83+
python -c "import bitsandbytes"
84+
85+
echo "Testing: Pytorch doctests"
86+
if [ "${PACKAGE_NAME}" == "pytorch" ]; then
87+
cd src/
88+
python -m pytest pytorch_lightning
89+
cd ..
90+
fi
91+
92+
echo "Get legacy checkpoints"
93+
bash .actions/pull_legacy_checkpoints.sh
94+
cd tests/legacy
95+
# bash generate_checkpoints.sh
96+
ls -lh checkpoints/
97+
cd ../..
98+
99+
cd tests/
100+
echo "Testing: fabric standard"
101+
python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_pytorch/ -v --durations=50
102+
103+
echo "Testing: fabric standalone"
104+
export PL_USE_MOCKED_MNIST=1
105+
export PL_RUN_STANDALONE_TESTS=1
106+
wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/run_standalone_tests.sh
107+
bash ./run_standalone_tests.sh "tests_pytorch"
108+
109+
echo "Testing: PyTorch standalone tasks"
110+
cd tests_pytorch/
111+
bash run_standalone_tasks.sh
112+
113+
# echo "Reporting coverage" # todo
114+
# python -m coverage report
115+
# python -m coverage xml
116+
# python -m coverage html
117+
118+
# TODO: enable coverage
119+
# # https://docs.codecov.com/docs/codecov-uploader
120+
# curl -Os https://uploader.codecov.io/latest/linux/codecov
121+
# chmod +x codecov
122+
# ./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
123+
# --flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
124+
# ls -l
125+
cd ../..
126+
127+
echo "Testing: PyTorch examples"
128+
cd examples/
129+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=1
130+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=2 --trainer.strategy=ddp
131+
bash run_pl_examples.sh --trainer.accelerator=gpu --trainer.devices=2 --trainer.strategy=ddp --trainer.precision=16

src/lightning/fabric/utilities/testing/_runif.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def _runif_reasons(
4444
"""Construct reasons for pytest skipif.
4545
4646
Args:
47-
min_cuda_gpus: Require this number of gpus and that the ``PL_RUN_CUDA_TESTS=1`` environment variable is set.
47+
min_cuda_gpus: Require this number of gpus and that the ``RUN_ONLY_CUDA_TESTS=1`` environment variable is set.
4848
min_torch: Require that PyTorch is greater or equal than this version.
4949
max_torch: Require that PyTorch is less than this version.
5050
min_python: Require that Python is greater or equal than this version.

src/lightning/pytorch/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
4444
- Fix gradient calculation in `lr_finder` for `mode="exponential"` ([#21055](https://github.com/Lightning-AI/pytorch-lightning/pull/21055))
4545

4646

47+
- Fixed `save_hyperparameters` crashing with `dataclasses` using `init=False` fields ([#21051](https://github.com/Lightning-AI/pytorch-lightning/pull/21051))
48+
49+
4750
---
4851

4952
## [2.5.2] - 2025-06-20

src/lightning/pytorch/core/module.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,22 @@ def validation_step(self, batch, batch_idx):
808808
# CASE 2: multiple validation dataloaders
809809
def validation_step(self, batch, batch_idx, dataloader_idx=0):
810810
# dataloader_idx tells you which dataset this is.
811-
...
811+
x, y = batch
812+
813+
# implement your own
814+
out = self(x)
815+
816+
if dataloader_idx == 0:
817+
loss = self.loss0(out, y)
818+
else:
819+
loss = self.loss1(out, y)
820+
821+
# calculate acc
822+
labels_hat = torch.argmax(out, dim=1)
823+
acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
824+
825+
# log the outputs separately for each dataloader
826+
self.log_dict({f"val_loss_{dataloader_idx}": loss, f"val_acc_{dataloader_idx}": acc})
812827
813828
Note:
814829
If you don't need to validate you don't need to implement this method.
@@ -875,7 +890,22 @@ def test_step(self, batch, batch_idx):
875890
# CASE 2: multiple test dataloaders
876891
def test_step(self, batch, batch_idx, dataloader_idx=0):
877892
# dataloader_idx tells you which dataset this is.
878-
...
893+
x, y = batch
894+
895+
# implement your own
896+
out = self(x)
897+
898+
if dataloader_idx == 0:
899+
loss = self.loss0(out, y)
900+
else:
901+
loss = self.loss1(out, y)
902+
903+
# calculate acc
904+
labels_hat = torch.argmax(out, dim=1)
905+
acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
906+
907+
# log the outputs separately for each dataloader
908+
self.log_dict({f"test_loss_{dataloader_idx}": loss, f"test_acc_{dataloader_idx}": acc})
879909
880910
Note:
881911
If you don't need to test you don't need to implement this method.

src/lightning/pytorch/utilities/parsing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,8 @@ def save_hyperparameters(
167167
if given_hparams is not None:
168168
init_args = given_hparams
169169
elif is_dataclass(obj):
170-
init_args = {f.name: getattr(obj, f.name) for f in fields(obj)}
170+
obj_fields = fields(obj)
171+
init_args = {f.name: getattr(obj, f.name) for f in obj_fields if f.init}
171172
else:
172173
init_args = {}
173174

src/lightning/pytorch/utilities/testing/_runif.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def _runif_reasons(
4646
"""Construct reasons for pytest skipif.
4747
4848
Args:
49-
min_cuda_gpus: Require this number of gpus and that the ``PL_RUN_CUDA_TESTS=1`` environment variable is set.
49+
min_cuda_gpus: Require this number of gpus and that the ``RUN_ONLY_CUDA_TESTS=1`` environment variable is set.
5050
min_torch: Require that PyTorch is greater or equal than this version.
5151
max_torch: Require that PyTorch is less than this version.
5252
min_python: Require that Python is greater or equal than this version.

0 commit comments

Comments
 (0)