Skip to content

Commit 29ebf54

Browse files
authored
Updated CI gpu image version (#3097)
* Updated CI gpu image version pytorch/conda-builder:cuda11.8 -> cuda12.1 * Fixed horovod installation to fix c++14 issue
1 parent 624eb63 commit 29ebf54

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

.github/workflows/gpu-hvd-tests.yml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
pytorch-channel: [pytorch, ]
2626
fail-fast: false
2727
env:
28-
DOCKER_IMAGE: "pytorch/conda-builder:cuda11.8"
28+
DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
2929
REPOSITORY: ${{ github.repository }}
3030
PR_NUMBER: ${{ github.event.pull_request.number }}
3131
runs-on: linux.8xlarge.nvidia.gpu
@@ -102,9 +102,9 @@ jobs:
102102
103103
# Install PyTorch
104104
if [ "${{ matrix.pytorch-channel }}" == "pytorch" ]; then
105-
pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu118
105+
pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu121
106106
else
107-
pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
107+
pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
108108
fi
109109
110110
python -c "import torch; print(torch.__version__, ', CUDA is available: ', torch.cuda.is_available()); exit(not torch.cuda.is_available())"
@@ -125,7 +125,17 @@ jobs:
125125
126126
set -xe
127127
128-
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]
128+
# Can't build Horovod with recent pytorch due to pytorch required C++17 standard
129+
# and horovod is still using C++14
130+
# HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]
131+
# Using a similar hack as described here:
132+
# https://github.com/horovod/horovod/issues/3941#issuecomment-1732505345
133+
git clone --recursive https://github.com/horovod/horovod.git /horovod
134+
cd /horovod
135+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" CMakeLists.txt
136+
sed -i "s/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g" horovod/torch/CMakeLists.txt
137+
HOROVOD_GPU_OPERATIONS=NCCL HOROVOD_WITH_PYTORCH=1 python setup.py install
138+
129139
horovodrun --check-build
130140
pip list
131141

.github/workflows/gpu-tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
pytorch-channel: [pytorch, pytorch-nightly]
2626
fail-fast: false
2727
env:
28-
DOCKER_IMAGE: "pytorch/conda-builder:cuda11.8"
28+
DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
2929
REPOSITORY: ${{ github.repository }}
3030
PR_NUMBER: ${{ github.event.pull_request.number }}
3131
runs-on: linux.8xlarge.nvidia.gpu
@@ -102,9 +102,9 @@ jobs:
102102
103103
# Install PyTorch
104104
if [ "${{ matrix.pytorch-channel }}" == "pytorch" ]; then
105-
pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu118
105+
pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu121
106106
else
107-
pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu118
107+
pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
108108
fi
109109
110110
python -c "import torch; print(torch.__version__, ', CUDA is available: ', torch.cuda.is_available()); exit(not torch.cuda.is_available())"

0 commit comments

Comments
 (0)