Skip to content

Commit ba1e869

Browse files
Bordalexierule
authored andcommitted
CI: check docker requires (#12677)
* check docker requires * ci update * bagua * conda * cuda
1 parent dfd9730 commit ba1e869

File tree

5 files changed

+48
-15
lines changed

5 files changed

+48
-15
lines changed

.azure-pipelines/gpu-tests.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ jobs:
5252
5353
- bash: |
5454
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
55-
pip install fairscale==0.4.5
56-
pip install deepspeed==0.5.7
57-
pip install bagua-cuda102==0.9.0
55+
pip install fairscale>=0.4.5
56+
pip install deepspeed>=0.6.0
57+
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
58+
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
5859
pip install . --requirement requirements/devel.txt
5960
pip list
6061
displayName: 'Install dependencies'

.github/workflows/ci_dockers.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,10 @@ jobs:
7373
strategy:
7474
fail-fast: false
7575
matrix:
76-
# the config used in '.azure-pipelines/gpu-tests.yml'
77-
python_version: ["3.7"]
78-
pytorch_version: ["1.8"]
76+
include:
77+
# the config used in '.azure-pipelines/gpu-tests.yml'
78+
- {python_version: "3.7", pytorch_version: "1.8"}
79+
- {python_version: "3.9", pytorch_version: "1.10"}
7980
steps:
8081
- name: Checkout
8182
uses: actions/checkout@v2

dockers/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ or with specific arguments
1414
```bash
1515
git clone <git-repository>
1616
docker image build \
17-
-t pytorch-lightning:base-cuda-py3.7-pt1.8 \
17+
-t pytorch-lightning:base-cuda-py3.9-pt1.10 \
1818
-f dockers/base-cuda/Dockerfile \
19-
--build-arg PYTHON_VERSION=3.7 \
20-
--build-arg PYTORCH_VERSION=1.8 \
19+
--build-arg PYTHON_VERSION=3.9 \
20+
--build-arg PYTORCH_VERSION=1.10 \
2121
.
2222
```
2323

dockers/base-conda/Dockerfile

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,33 @@ RUN \
147147
pip install --no-cache-dir --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \
148148
python -c "from apex import amp"
149149

150+
RUN \
151+
# install FairScale
152+
pip install fairscale==0.4.5 && \
153+
python -c "import fairscale; print(fairscale.__version__)"
154+
155+
RUN \
156+
# install DeepSpeed
157+
pip install deepspeed==0.6.0 && \
158+
python -c "import deepspeed; print(deepspeed.__version__)"
159+
160+
RUN \
161+
# install Bagua
162+
CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \
163+
pip install "bagua-cuda$CUDA_VERSION_MM==0.9.0" && \
164+
python -c "import bagua_core; bagua_core.install_deps()" && \
165+
python -c "import bagua; print(bagua.__version__)"
166+
167+
COPY requirements/check-avail-extras.py check-avail-extras.py
168+
COPY requirements/check-avail-strategies.py check-avail-strategies.py
169+
150170
RUN \
151171
# Show what we have
152172
pip --version && \
153173
conda info && \
154174
pip list && \
155175
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
156176
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
157-
python -c "import horovod.torch" && \
158-
python -c "from horovod.torch import nccl_built; nccl_built()"
177+
python check-avail-extras.py && \
178+
python check-avail-strategies.py && \
179+
rm check-avail-*.py

dockers/base-cuda/Dockerfile

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ RUN \
7676
pip install -q fire && \
7777
# Disable cache \
7878
CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \
79-
export BAGUA_CUDA_VERSION=$CUDA_VERSION_MM && \
8079
pip config set global.cache-dir false && \
8180
# set particular PyTorch version
8281
python ./requirements/adjust-versions.py requirements.txt ${PYTORCH_VERSION} && \
@@ -138,14 +137,25 @@ RUN \
138137

139138
RUN \
140139
# install DeepSpeed
141-
pip install deepspeed==0.5.7 && \
140+
pip install deepspeed==0.6.0 && \
142141
python -c "import deepspeed; print(deepspeed.__version__)"
143142

143+
RUN \
144+
# install Bagua
145+
CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \
146+
pip install "bagua-cuda$CUDA_VERSION_MM==0.9.0" && \
147+
python -c "import bagua_core; bagua_core.install_deps()" && \
148+
python -c "import bagua; print(bagua.__version__)"
149+
150+
COPY requirements/check-avail-extras.py check-avail-extras.py
151+
COPY requirements/check-avail-strategies.py check-avail-strategies.py
152+
144153
RUN \
145154
# Show what we have
146155
pip --version && \
147156
pip list && \
148157
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
149158
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
150-
python -c "import horovod.torch" && \
151-
python -c "from horovod.torch import nccl_built; nccl_built()"
159+
python check-avail-extras.py && \
160+
python check-avail-strategies.py && \
161+
rm check-avail-*.py

0 commit comments

Comments
 (0)