1
+ FROM nvidia/cuda:10.0-devel-ubuntu18.04
2
+
3
+ # TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
4
+ ENV TENSORFLOW_VERSION=2.1.0
5
+ ENV PYTORCH_VERSION=1.4.0
6
+ ENV TORCHVISION_VERSION=0.5.0
7
+ ENV TENSORBOARDX_VERSION=1.9
8
+ ENV MXNET_VERSION=1.5.1
9
+ ENV CUDNN_VERSION=7.6.0.64-1+cuda10.0
10
+ ENV NCCL_VERSION=2.4.8-1+cuda10.0
11
+
12
+ # Python 2.7 or 3.7 is supported by Ubuntu Bionic out of the box
13
+ ARG python=3.7
14
+ ENV PYTHON_VERSION=${python}
15
+
16
+ ENV PYTHONUNBUFFERED=1 \
17
+ LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/nvidia/lib64" \
18
+ PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \
19
+ LANG=C.UTF-8
20
+
21
+ # Set default shell to /bin/bash
22
+ SHELL ["/bin/bash", "-cu"]
23
+
24
+ RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
25
+ build-essential \
26
+ cmake \
27
+ g++-4.8 \
28
+ dkms \
29
+ git \
30
+ curl \
31
+ vim \
32
+ wget \
33
+ ca-certificates \
34
+ software-properties-common \
35
+ openssh-client openssh-server \
36
+ pdsh curl sudo net-tools \
37
+ vim iputils-ping wget \
38
+ libcudnn7=${CUDNN_VERSION} \
39
+ libnccl2=${NCCL_VERSION} \
40
+ libnccl-dev=${NCCL_VERSION} \
41
+ libjpeg-dev \
42
+ libpng-dev \
43
+ libsm6 \
44
+ libxext6 \
45
+ libxrender-dev \
46
+ libcairo2-dev libgirepository1.0-dev pkg-config gir1.2-gtk-3.0 \
47
+ proj-bin libproj-dev libgeos-dev libgeos++-dev graphviz \
48
+ python${PYTHON_VERSION} \
49
+ python${PYTHON_VERSION}-dev \
50
+ libnuma-dev
51
+
52
+ RUN rm /usr/bin/python3 /usr/bin/python3m && \
53
+ ln -s /usr/bin/python3.7 /usr/bin/python3 && \
54
+ ln -s /usr/bin/python3.7m /usr/bin/python3m
55
+
56
+ RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \
57
+ apt-get update -y && \
58
+ apt-get install -y nodejs
59
+
60
+ # OFED
61
+ ENV STAGE_DIR=/tmp
62
+ RUN mkdir -p ${STAGE_DIR}
63
+ ENV MLNX_OFED_VERSION=4.6-1.0.1.1
64
+ RUN cd ${STAGE_DIR} && \
65
+ wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64.tgz | tar xzf -
66
+ RUN cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64 && \
67
+ ./mlnxofedinstall --user-space-only --without-fw-update --all -q && \
68
+ cd ${STAGE_DIR} && \
69
+ rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64*
70
+
71
+ # nvidia-peer
72
+ RUN mkdir -p ${STAGE_DIR} && \
73
+ git clone https://github.com/Mellanox/nv_peer_memory.git ${STAGE_DIR}/nv_peer_memory && \
74
+ cd ${STAGE_DIR}/nv_peer_memory && \
75
+ ./build_module.sh && \
76
+ cd ${STAGE_DIR} && \
77
+ tar xzf ${STAGE_DIR}/nvidia-peer-memory_1.0.orig.tar.gz && \
78
+ cd ${STAGE_DIR}/nvidia-peer-memory-1.0 && \
79
+ dpkg-buildpackage -us -uc && \
80
+ dpkg -i ${STAGE_DIR}/nvidia-peer-memory_1.0-8_all.deb
81
+
82
+ # Install CUDA-10.0 + cuDNN 7.6.0
83
+ RUN ln -s /usr/local/cuda-10.0 /usr/local/cuda && \
84
+ ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.0 /usr/local/cuda/lib64/libcudnn.so && \
85
+ ldconfig
86
+
87
+ RUN if [[ "${PYTHON_VERSION}" == "3.7" ]]; then \
88
+ apt-get install -y python${PYTHON_VERSION}-distutils python-apt; \
89
+ fi
90
+
91
+ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 2
92
+
93
+ WORKDIR /tmp
94
+ RUN curl https://bootstrap.pypa.io/get-pip.py | python3 && \
95
+ python3 -m pip install --no-cache-dir -U setuptools pip
96
+
97
+ # Install TensorFlow, Keras, PyTorch and MXNet
98
+ RUN python3 -m pip install pip --no-cache-dir -Iv \
99
+ Cython==0.29.13 \
100
+ numpy==1.16.5 \
101
+ scipy==1.3.1 \
102
+ scikit-image==0.16.2 \
103
+ scikit-learn==0.21.3 \
104
+ matplotlib==3.1.3 \
105
+ tensorflow-gpu==${TENSORFLOW_VERSION}
106
+
107
+ RUN python3 -m pip install --no-cache-dir \
108
+ https://download.pytorch.org/whl/cu100/torch-${PYTORCH_VERSION}%2Bcu100-cp37-cp37m-linux_x86_64.whl \
109
+ https://download.pytorch.org/whl/cu100/torchvision-${TORCHVISION_VERSION}%2Bcu100-cp37-cp37m-linux_x86_64.whl && \
110
+ python3 -m pip install --no-cache-dir tensorboardX==${TENSORBOARDX_VERSION}
111
+ RUN python3 -m pip install --no-cache-dir mxnet-cu100==${MXNET_VERSION}
112
+ RUN python3 -m pip install --no-cache-dir --extra-index-url \
113
+ https://developer.download.nvidia.com/compute/redist/cuda/10.0 \
114
+ nvidia-dali \
115
+ nvidia-dali-tf-plugin
116
+
117
+ WORKDIR /tmp
118
+ COPY ./requirements.py37.20.03.txt /tmp
119
+ RUN python3 -m pip install --no-cache-dir -Ir requirements.py37.20.03.txt
120
+
121
+ RUN python3 -m pip install --no-cache-dir -Iv \
122
+ tensorwatch==0.8.10 \
123
+ && \
124
+ rm -rf /root/.cache && \
125
+ rm -f /tmp/*.whl /tmp/requirements.19.09.txt
126
+
127
+ # Jupyter notebook extension
128
+ RUN mkdir -p /home/work/.jupyter/nbextension
129
+ WORKDIR /home/work/.jupyter/nbextension
130
+
131
+ RUN jupyter nbextensions_configurator enable && \
132
+ jupyter contrib nbextension install && \
133
+ jupyter nbextension enable --py --sys-prefix widgetsnbextension && \
134
+ jupyter contrib nbextension install && \
135
+ jupyter serverextension enable --py jupyterlab --sys-prefix && \
136
+ jupyter labextension install @jupyter-widgets/jupyterlab-manager && \
137
+ git clone https://github.com/lambdalisue/jupyter-vim-binding vim_binding && \
138
+ jupyter nbextension enable /home/work/.jupyter/nbextension/vim_binding/vim_binding
139
+
140
+ # Install Open MPI
141
+ RUN mkdir /tmp/openmpi && \
142
+ cd /tmp/openmpi && \
143
+ wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.1.tar.gz && \
144
+ tar zxf openmpi-4.0.1.tar.gz && \
145
+ cd openmpi-4.0.1 && \
146
+ ./configure --enable-orterun-prefix-by-default && \
147
+ make -j $(nproc) all && \
148
+ make install && \
149
+ ldconfig && \
150
+ rm -rf /tmp/openmpi
151
+
152
+ # Install Horovod, temporarily using CUDA stubs
153
+ RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
154
+ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL \
155
+ HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_MXNET=1 \
156
+ pip install --no-cache-dir horovod==0.19.0 && \
157
+ ldconfig
158
+
159
+ RUN python3 -m pip install --no-cache-dir \
160
+ mpi4py==3.0.3 \
161
+ nni
162
+
163
+ # Install OpenSSH for MPI to communicate between containers
164
+ RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
165
+ mkdir -p /var/run/sshd
166
+
167
+ # Allow OpenSSH to talk to containers without asking for confirmation
168
+ RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
169
+ echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
170
+ mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
171
+
172
+ WORKDIR /home/work
0 commit comments