Skip to content

Commit 1d11c82

Browse files
committed
requirements for NGC Pytorch
1 parent 4114e9f commit 1d11c82

File tree

2 files changed

+112
-113
lines changed

2 files changed

+112
-113
lines changed

vendor/ngc-pytorch/Dockerfile.22.03-py3

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,6 @@ RUN git clone -q --branch=v0.3.20 https://github.com/xianyi/OpenBLAS.git && \
128128

129129
RUN /opt/conda/bin/conda install opencv ffmpeg spacy
130130

131-
WORKDIR /tmp
132-
COPY ./requirements.22.03.txt /tmp/requirements.txt
133-
RUN /opt/conda/bin/python3 -m pip install --no-cache-dir -r requirements.txt && \
134-
rm -f /tmp/*.whl /tmp/requirements.txt
135-
136131
# install git-lfs
137132
WORKDIR /tmp
138133
RUN curl -sLO https://github.com/git-lfs/git-lfs/releases/download/v3.0.2/git-lfs-linux-amd64-v3.0.2.tar.gz && \
@@ -166,17 +161,32 @@ RUN mkdir /tmp/openmpi && \
166161
echo '#!/bin/bash' > /usr/local/bin/mpirun && \
167162
echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
168163
chmod a+x /usr/local/bin/mpirun && \
169-
170164
# Configure OpenMPI to run good defaults:
171165
echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
172166

167+
WORKDIR /tmp
168+
COPY ./requirements.22.04.txt /tmp/requirements.txt
169+
RUN /opt/conda/bin/python3 -m pip install --no-cache-dir -r requirements.txt && \
170+
rm -f /tmp/*.whl /tmp/requirements.txt
171+
COPY ./requirements.22.04.1.txt /tmp/requirements.txt
172+
RUN /opt/conda/bin/python3 -m pip install --no-cache-dir -r requirements.txt && \
173+
rm -f /tmp/*.whl /tmp/requirements.txt
174+
173175
# Install Horovod, temporarily using CUDA stubs
174176
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
175-
HOROVOD_CUDA_HOME=$CONDA_PREFIX HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_NCCL_LINK=SHARED \
177+
LD_LIBRARY_PATH="/usr/local/cuda/compat/lib:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/local/nvidia/lib64:/usr/local/cuda-11.6/include:/usr/include/x86_64-linux-gnu:/usr/include:$LD_LIBRARY_PATH" \
178+
HOROVOD_NCCL_LINK=SHARED \
179+
HOROVOD_WITH_MPI=1 \
180+
HOROVOD_GPU_ALLREDUCE=NCCL \
181+
NCCL_LIBRARY=/usr/include \
182+
HOROVOD_NCCL_INCLUDE=/usr/include \
183+
HOROVOD_NCCL_LIB=/lib/x86_64-linux-gnu \
184+
HOROVOD_GPU_BROADCAST=NCCL \
176185
HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
177186
pip install --no-cache-dir horovod==0.24.2 && \
178187
ldconfig
179188

189+
180190
RUN python3 -m pip install --no-cache-dir \
181191
mpi4py==3.1.2 \
182192
nni==2.5 \
@@ -190,7 +200,6 @@ RUN jupyter nbextensions_configurator enable && \
190200
jupyter labextension install --no-build @jupyter-widgets/jupyterlab-manager && \
191201
jupyter labextension install --no-build @krassowski/jupyterlab-lsp && \
192202
jupyter serverextension enable --py jupyter_lsp && \
193-
jupyter labextension install --no-build @jupyterlab/toc && \
194203
jupyter nbextension enable execute_time/ExecuteTime && \
195204
jupyter nbextension enable toc2/main && \
196205
jupyter lab build
@@ -204,7 +213,7 @@ RUN apt autoclean && \
204213

205214
RUN /opt/conda/bin/python3 -m ipykernel install \
206215
--prefix=/opt/conda/ \
207-
--display-name "PyTorch 1.11 (NGC 22.03/Python 3.8 Conda) on Backend.AI" && \
216+
--display-name "PyTorch 1.12 (NGC 22.03/Python 3.8 Conda) on Backend.AI" && \
208217
cat /opt/conda/share/jupyter/kernels/python3/kernel.json
209218

210219
# Backend.AI specifics

0 commit comments

Comments
 (0)