File tree Expand file tree Collapse file tree 5 files changed +48
-15
lines changed Expand file tree Collapse file tree 5 files changed +48
-15
lines changed Original file line number Diff line number Diff line change 5252
5353 - bash : |
5454 python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)"
55- pip install fairscale==0.4.5
56- pip install deepspeed==0.5.7
57- pip install bagua-cuda102==0.9.0
55+ pip install fairscale>=0.4.5
56+ pip install deepspeed>=0.6.0
57+ CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
58+ pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
5859 pip install . --requirement requirements/devel.txt
5960 pip list
6061 displayName: 'Install dependencies'
Original file line number Diff line number Diff line change 7373 strategy :
7474 fail-fast : false
7575 matrix :
76- # the config used in '.azure-pipelines/gpu-tests.yml'
77- python_version : ["3.7"]
78- pytorch_version : ["1.8"]
76+ include :
77+ # the config used in '.azure-pipelines/gpu-tests.yml'
78+ - {python_version: "3.7", pytorch_version: "1.8"}
79+ - {python_version: "3.9", pytorch_version: "1.10"}
7980 steps :
8081 - name : Checkout
8182 uses : actions/checkout@v2
Original file line number Diff line number Diff line change @@ -14,10 +14,10 @@ or with specific arguments
1414``` bash
1515git clone < git-repository>
1616docker image build \
17- -t pytorch-lightning:base-cuda-py3.7 -pt1.8 \
17+ -t pytorch-lightning:base-cuda-py3.9 -pt1.10 \
1818 -f dockers/base-cuda/Dockerfile \
19- --build-arg PYTHON_VERSION=3.7 \
20- --build-arg PYTORCH_VERSION=1.8 \
19+ --build-arg PYTHON_VERSION=3.9 \
20+ --build-arg PYTORCH_VERSION=1.10 \
2121 .
2222```
2323
Original file line number Diff line number Diff line change @@ -147,12 +147,33 @@ RUN \
147147 pip install --no-cache-dir --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \
148148 python -c "from apex import amp"
149149
150+ RUN \
151+ # install FairScale
152+ pip install fairscale==0.4.5 && \
153+ python -c "import fairscale; print(fairscale.__version__)"
154+
155+ RUN \
156+ # install DeepSpeed
157+ pip install deepspeed==0.6.0 && \
158+ python -c "import deepspeed; print(deepspeed.__version__)"
159+
160+ RUN \
161+ # install Bagua
162+ CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))" ) && \
163+ pip install "bagua-cuda$CUDA_VERSION_MM==0.9.0" && \
164+ python -c "import bagua_core; bagua_core.install_deps()" && \
165+ python -c "import bagua; print(bagua.__version__)"
166+
167+ COPY requirements/check-avail-extras.py check-avail-extras.py
168+ COPY requirements/check-avail-strategies.py check-avail-strategies.py
169+
150170RUN \
151171 # Show what we have
152172 pip --version && \
153173 conda info && \
154174 pip list && \
155175 python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
156176 python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
157- python -c "import horovod.torch" && \
158- python -c "from horovod.torch import nccl_built; nccl_built()"
177+ python check-avail-extras.py && \
178+ python check-avail-strategies.py && \
179+ rm check-avail-*.py
Original file line number Diff line number Diff line change 7676 pip install -q fire && \
7777 # Disable cache \
7878 CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))" ) && \
79- export BAGUA_CUDA_VERSION=$CUDA_VERSION_MM && \
8079 pip config set global.cache-dir false && \
8180 # set particular PyTorch version
8281 python ./requirements/adjust-versions.py requirements.txt ${PYTORCH_VERSION} && \
@@ -138,14 +137,25 @@ RUN \
138137
139138RUN \
140139 # install DeepSpeed
141- pip install deepspeed==0.5.7 && \
140+ pip install deepspeed==0.6.0 && \
142141 python -c "import deepspeed; print(deepspeed.__version__)"
143142
143+ RUN \
144+ # install Bagua
145+ CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))" ) && \
146+ pip install "bagua-cuda$CUDA_VERSION_MM==0.9.0" && \
147+ python -c "import bagua_core; bagua_core.install_deps()" && \
148+ python -c "import bagua; print(bagua.__version__)"
149+
150+ COPY requirements/check-avail-extras.py check-avail-extras.py
151+ COPY requirements/check-avail-strategies.py check-avail-strategies.py
152+
144153RUN \
145154 # Show what we have
146155 pip --version && \
147156 pip list && \
148157 python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
149158 python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
150- python -c "import horovod.torch" && \
151- python -c "from horovod.torch import nccl_built; nccl_built()"
159+ python check-avail-extras.py && \
160+ python check-avail-strategies.py && \
161+ rm check-avail-*.py
You can’t perform that action at this time.
0 commit comments