Skip to content

Commit 77eef8a

Browse files
awaelchlicarmoccaBordapre-commit-ci[bot]
authored
Update GPU CI and docker images for PyTorch 2.1 (#18719)
Co-authored-by: Carlos Mocholí <[email protected]> Co-authored-by: Jirka <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 5d819c9 commit 77eef8a

File tree

5 files changed

+10
-44
lines changed

5 files changed

+10
-44
lines changed

.azure/gpu-tests-fabric.yml

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,10 @@ jobs:
5959
strategy:
6060
matrix:
6161
"Fabric | latest":
62-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
63-
IS_NIGHTLY: "false"
62+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
6463
PACKAGE_NAME: "fabric"
6564
"Lightning | latest":
66-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
67-
IS_NIGHTLY: "false"
68-
PACKAGE_NAME: "lightning"
69-
"Lightning | RC":
70-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
71-
IS_NIGHTLY: "true"
65+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
7266
PACKAGE_NAME: "lightning"
7367
workspace:
7468
clean: all
@@ -87,7 +81,6 @@ jobs:
8781
echo $CUDA_VISIBLE_DEVICES
8882
echo $CUDA_VERSION_MM
8983
echo $TORCH_URL
90-
echo $(IS_NIGHTLY)
9184
echo $COVERAGE_SOURCE
9285
whereis nvidia
9386
nvidia-smi
@@ -104,23 +97,13 @@ jobs:
10497
for fpath in `ls requirements/**/*.txt`; do \
10598
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
10699
done
107-
# without succeeded this could run even if the job has already failed
108-
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
109100
displayName: "Adjust dependencies"
110101
111102
- bash: |
112103
extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
113104
pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL}
114105
displayName: "Install package & dependencies"
115106
116-
- bash: |
117-
pip uninstall -y torch torchvision
118-
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%}
119-
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
120-
# without succeeded this could run even if the job has already failed
121-
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
122-
displayName: "Bump to RC"
123-
124107
- bash: |
125108
set -e
126109
python requirements/collect_env_details.py

.azure/gpu-tests-pytorch.yml

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,16 +51,10 @@ jobs:
5151
strategy:
5252
matrix:
5353
"PyTorch | latest":
54-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
55-
IS_NIGHTLY: "false"
54+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
5655
PACKAGE_NAME: "pytorch"
5756
"Lightning | latest":
58-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
59-
IS_NIGHTLY: "false"
60-
PACKAGE_NAME: "lightning"
61-
"Lightning | RC":
62-
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
63-
IS_NIGHTLY: "true"
57+
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
6458
PACKAGE_NAME: "lightning"
6559
pool: lit-rtx-3090
6660
variables:
@@ -89,7 +83,6 @@ jobs:
8983
echo $CUDA_VISIBLE_DEVICES
9084
echo $CUDA_VERSION_MM
9185
echo $TORCH_URL
92-
echo $(IS_NIGHTLY)
9386
echo $COVERAGE_SOURCE
9487
whereis nvidia
9588
nvidia-smi
@@ -106,8 +99,6 @@ jobs:
10699
for fpath in `ls requirements/**/*.txt`; do \
107100
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
108101
done
109-
# without succeeded this could run even if the job has already failed
110-
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
111102
displayName: "Adjust dependencies"
112103
113104
- bash: |
@@ -122,14 +113,6 @@ jobs:
122113
pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL}
123114
displayName: "Install package & dependencies"
124115
125-
- bash: |
126-
pip uninstall -y torch torchvision
127-
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%}
128-
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
129-
# without succeeded this could run even if the job has already failed
130-
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
131-
displayName: "Bump to RC"
132-
133116
- bash: pip uninstall -y lightning
134117
# without succeeded this could run even if the job has already failed
135118
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'pytorch'))

.github/checkgroup.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,13 @@ subprojects:
149149
- "build-cuda (3.9, 1.13, 11.8.0)"
150150
- "build-cuda (3.9, 1.13, 12.0.1)"
151151
- "build-cuda (3.10, 2.0, 11.8.0)"
152-
- "build-cuda (3.10, 2.0, 12.0.1)"
152+
- "build-cuda (3.10, 2.1, 12.1.0)"
153153
#- "build-NGC"
154154
- "build-pl (3.9, 1.12, 11.7.1)"
155155
- "build-pl (3.9, 1.13, 11.8.0)"
156156
- "build-pl (3.9, 1.13, 12.0.1)"
157157
- "build-pl (3.10, 2.0, 11.8.0)"
158-
- "build-pl (3.10, 2.0, 12.0.1)"
158+
- "build-pl (3.10, 2.1, 12.1.0)"
159159

160160
# SECTIONS: lightning_data
161161

.github/workflows/docker-build.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
4848
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
4949
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
50-
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" }
50+
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
5151
steps:
5252
- uses: actions/checkout@v4
5353
with:
@@ -108,7 +108,7 @@ jobs:
108108
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
109109
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
110110
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
111-
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" }
111+
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
112112
steps:
113113
- uses: actions/checkout@v4
114114
- uses: docker/setup-buildx-action@v3

dockers/base-cuda/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ ARG CUDA_VERSION=11.7.1
1919
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
2020

2121
ARG PYTHON_VERSION=3.10
22-
ARG PYTORCH_VERSION=2.0
23-
ARG MAX_ALLOWED_NCCL=2.16.2
22+
ARG PYTORCH_VERSION=2.1
23+
ARG MAX_ALLOWED_NCCL=2.17.1
2424

2525
SHELL ["/bin/bash", "-c"]
2626
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/

0 commit comments

Comments
 (0)