Skip to content

Commit e028c61

Browse files
committed
Lablup base kernel 20.03 for Python 3.7 & CUDA 10
1 parent 5bf6f83 commit e028c61

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
FROM nvidia/cuda:10.0-devel-ubuntu18.04
2+
3+
# TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully
4+
ENV TENSORFLOW_VERSION=2.1.0
5+
ENV PYTORCH_VERSION=1.4.0
6+
ENV TORCHVISION_VERSION=0.5.0
7+
ENV TENSORBOARDX_VERSION=1.9
8+
ENV MXNET_VERSION=1.5.1
9+
ENV CUDNN_VERSION=7.6.0.64-1+cuda10.0
10+
ENV NCCL_VERSION=2.4.8-1+cuda10.0
11+
12+
# Python 2.7 or 3.7 is supported by Ubuntu Bionic out of the box
13+
ARG python=3.7
14+
ENV PYTHON_VERSION=${python}
15+
16+
ENV PYTHONUNBUFFERED=1 \
17+
LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/nvidia/lib64" \
18+
PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" \
19+
LANG=C.UTF-8
20+
21+
# Set default shell to /bin/bash
22+
SHELL ["/bin/bash", "-cu"]
23+
24+
RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
25+
build-essential \
26+
cmake \
27+
g++-4.8 \
28+
dkms \
29+
git \
30+
curl \
31+
vim \
32+
wget \
33+
ca-certificates \
34+
software-properties-common \
35+
openssh-client openssh-server \
36+
pdsh curl sudo net-tools \
37+
vim iputils-ping wget \
38+
libcudnn7=${CUDNN_VERSION} \
39+
libnccl2=${NCCL_VERSION} \
40+
libnccl-dev=${NCCL_VERSION} \
41+
libjpeg-dev \
42+
libpng-dev \
43+
libsm6 \
44+
libxext6 \
45+
libxrender-dev \
46+
libcairo2-dev libgirepository1.0-dev pkg-config gir1.2-gtk-3.0 \
47+
proj-bin libproj-dev libgeos-dev libgeos++-dev graphviz \
48+
python${PYTHON_VERSION} \
49+
python${PYTHON_VERSION}-dev \
50+
libnuma-dev
51+
52+
RUN rm /usr/bin/python3 /usr/bin/python3m && \
53+
ln -s /usr/bin/python3.7 /usr/bin/python3 && \
54+
ln -s /usr/bin/python3.7m /usr/bin/python3m
55+
56+
RUN curl -sL https://deb.nodesource.com/setup_10.x | bash - && \
57+
apt-get update -y && \
58+
apt-get install -y nodejs
59+
60+
# OFED
61+
ENV STAGE_DIR=/tmp
62+
RUN mkdir -p ${STAGE_DIR}
63+
ENV MLNX_OFED_VERSION=4.6-1.0.1.1
64+
RUN cd ${STAGE_DIR} && \
65+
wget -q -O - http://www.mellanox.com/downloads/ofed/MLNX_OFED-${MLNX_OFED_VERSION}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64.tgz | tar xzf -
66+
RUN cd ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64 && \
67+
./mlnxofedinstall --user-space-only --without-fw-update --all -q && \
68+
cd ${STAGE_DIR} && \
69+
rm -rf ${STAGE_DIR}/MLNX_OFED_LINUX-${MLNX_OFED_VERSION}-ubuntu18.04-x86_64*
70+
71+
# nvidia-peer
72+
RUN mkdir -p ${STAGE_DIR} && \
73+
git clone https://github.com/Mellanox/nv_peer_memory.git ${STAGE_DIR}/nv_peer_memory && \
74+
cd ${STAGE_DIR}/nv_peer_memory && \
75+
./build_module.sh && \
76+
cd ${STAGE_DIR} && \
77+
tar xzf ${STAGE_DIR}/nvidia-peer-memory_1.0.orig.tar.gz && \
78+
cd ${STAGE_DIR}/nvidia-peer-memory-1.0 && \
79+
dpkg-buildpackage -us -uc && \
80+
dpkg -i ${STAGE_DIR}/nvidia-peer-memory_1.0-8_all.deb
81+
82+
# Install CUDA-10.0 + cuDNN 7.6.0
83+
RUN ln -s /usr/local/cuda-10.0 /usr/local/cuda && \
84+
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.0 /usr/local/cuda/lib64/libcudnn.so && \
85+
ldconfig
86+
87+
RUN if [[ "${PYTHON_VERSION}" == "3.7" ]]; then \
88+
apt-get install -y python${PYTHON_VERSION}-distutils python-apt; \
89+
fi
90+
91+
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 2
92+
93+
WORKDIR /tmp
94+
RUN curl https://bootstrap.pypa.io/get-pip.py | python3 && \
95+
python3 -m pip install --no-cache-dir -U setuptools pip
96+
97+
# Install TensorFlow, Keras, PyTorch and MXNet
98+
RUN python3 -m pip install pip --no-cache-dir -Iv \
99+
Cython==0.29.13 \
100+
numpy==1.16.5 \
101+
scipy==1.3.1 \
102+
scikit-image==0.16.2 \
103+
scikit-learn==0.21.3 \
104+
matplotlib==3.1.3 \
105+
tensorflow-gpu==${TENSORFLOW_VERSION}
106+
107+
RUN python3 -m pip install --no-cache-dir \
108+
https://download.pytorch.org/whl/cu100/torch-${PYTORCH_VERSION}%2Bcu100-cp37-cp37m-linux_x86_64.whl \
109+
https://download.pytorch.org/whl/cu100/torchvision-${TORCHVISION_VERSION}%2Bcu100-cp37-cp37m-linux_x86_64.whl && \
110+
python3 -m pip install --no-cache-dir tensorboardX==${TENSORBOARDX_VERSION}
111+
RUN python3 -m pip install --no-cache-dir mxnet-cu100==${MXNET_VERSION}
112+
RUN python3 -m pip install --no-cache-dir --extra-index-url \
113+
https://developer.download.nvidia.com/compute/redist/cuda/10.0 \
114+
nvidia-dali \
115+
nvidia-dali-tf-plugin
116+
117+
WORKDIR /tmp
118+
COPY ./requirements.py37.20.03.txt /tmp
119+
RUN python3 -m pip install --no-cache-dir -Ir requirements.py37.20.03.txt
120+
121+
RUN python3 -m pip install --no-cache-dir -Iv \
122+
tensorwatch==0.8.10 \
123+
&& \
124+
rm -rf /root/.cache && \
125+
rm -f /tmp/*.whl /tmp/requirements.19.09.txt
126+
127+
# Jupyter notebook extension
128+
RUN mkdir -p /home/work/.jupyter/nbextension
129+
WORKDIR /home/work/.jupyter/nbextension
130+
131+
RUN jupyter nbextensions_configurator enable && \
132+
jupyter contrib nbextension install && \
133+
jupyter nbextension enable --py --sys-prefix widgetsnbextension && \
134+
jupyter contrib nbextension install && \
135+
jupyter serverextension enable --py jupyterlab --sys-prefix && \
136+
jupyter labextension install @jupyter-widgets/jupyterlab-manager && \
137+
git clone https://github.com/lambdalisue/jupyter-vim-binding vim_binding && \
138+
jupyter nbextension enable /home/work/.jupyter/nbextension/vim_binding/vim_binding
139+
140+
# Install Open MPI
141+
RUN mkdir /tmp/openmpi && \
142+
cd /tmp/openmpi && \
143+
wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-4.0.1.tar.gz && \
144+
tar zxf openmpi-4.0.1.tar.gz && \
145+
cd openmpi-4.0.1 && \
146+
./configure --enable-orterun-prefix-by-default && \
147+
make -j $(nproc) all && \
148+
make install && \
149+
ldconfig && \
150+
rm -rf /tmp/openmpi
151+
152+
# Install Horovod, temporarily using CUDA stubs
153+
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
154+
HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL \
155+
HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITH_MXNET=1 \
156+
pip install --no-cache-dir horovod==0.19.0 && \
157+
ldconfig
158+
159+
RUN python3 -m pip install --no-cache-dir \
160+
mpi4py==3.0.3 \
161+
nni
162+
163+
# Install OpenSSH for MPI to communicate between containers
164+
RUN apt-get install -y --no-install-recommends openssh-client openssh-server && \
165+
mkdir -p /var/run/sshd
166+
167+
# Allow OpenSSH to talk to containers without asking for confirmation
168+
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
169+
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
170+
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
171+
172+
WORKDIR /home/work

0 commit comments

Comments
 (0)