@@ -59,24 +59,55 @@ RUN apt-get update && \
59
59
pv \
60
60
pdsh \
61
61
protobuf-compiler \
62
- v4l-utils \
62
+ v4l-utils \
63
+ cmake \
63
64
x264
64
65
65
66
# Install CUDA + cuDNN
66
67
RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.8.3.2 /usr/local/cuda/lib64/libcudnn.so && \
67
68
rm -rf /var/lib/apt/lists/* && \
68
69
ldconfig
69
70
70
- # Install cmake
71
- RUN wget https://github.com/Kitware/CMake/releases/download/v3.25.1/cmake-3.25.1-Linux-x86_64.sh \
72
- -q -O /tmp/cmake-install.sh && \
73
- chmod u+x /tmp/cmake-install.sh && \
74
- mkdir /usr/bin/cmake && \
75
- /tmp/cmake-install.sh --skip-license --prefix=/usr/bin/cmake && \
76
- rm /tmp/cmake-install.sh
77
-
78
71
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 2
79
72
73
+ # Install Open MPI
74
+ RUN mkdir /tmp/openmpi && \
75
+ cd /tmp/openmpi && \
76
+ wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz && \
77
+ tar zxf openmpi-4.1.4.tar.gz && \
78
+ cd openmpi-4.1.4 && \
79
+ ./configure --enable-orterun-prefix-by-default && \
80
+ make -j $(nproc) all && \
81
+ make install && \
82
+ ldconfig && \
83
+ rm -rf /tmp/openmpi* && \
84
+ # Create a wrapper for OpenMPI to allow running as root by default
85
+ mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
86
+ echo '#!/bin/bash' > /usr/local/bin/mpirun && \
87
+ echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
88
+ chmod a+x /usr/local/bin/mpirun && \
89
+ # Configure OpenMPI to run good defaults:
90
+ echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
91
+
92
+
93
+ # Install Horovod, temporarily using CUDA stubs
94
+ RUN \
95
+ #ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
96
+ HOROVOD_NCCL_LINK=SHARED \
97
+ HOROVOD_WITH_MPI=1 \
98
+ HOROVOD_GPU_ALLREDUCE=NCCL \
99
+ NCCL_LIBRARY=/usr/include \
100
+ HOROVOD_NCCL_INCLUDE=/usr/include \
101
+ HOROVOD_NCCL_LIB=/lib/x86_64-linux-gnu \
102
+ HOROVOD_GPU_BROADCAST=NCCL \
103
+ HOROVOD_GPU_BROADCAST=NCCL \
104
+ HOROVOD_GPU_ALLREDUCE=NCCL \
105
+ HOROVOD_GPU=CUDA \
106
+ HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
107
+ pip install --no-cache-dir horovod==0.26.1 && \
108
+ ldconfig
109
+
110
+
80
111
# install NLP packages *mecab-ko & khai*
81
112
WORKDIR /tmp
82
113
RUN curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz && \
@@ -170,59 +201,36 @@ RUN curl -fL https://github.com/cdr/code-server/releases/download/v4.9.1/code-se
170
201
mv /usr/local/lib/code-server-4.9.1-linux-amd64 /usr/local/lib/code-server-4.9.1 && \
171
202
ln -s /usr/local/lib/code-server-4.9.1/bin/code-server /usr/local/bin/code-server
172
203
204
+
173
205
WORKDIR /tmp
174
- COPY ./requirements.22.09 .1.txt /tmp/requirements.txt
175
- RUN /opt/conda/bin/ python3 -m pip install --no-cache-dir -r requirements.txt
176
- COPY ./requirements.22.09 .2.txt /tmp/requirements.txt
177
- RUN /opt/conda/bin/ python3 -m pip install --no-cache-dir -r requirements.txt
178
- COPY ./requirements.22.09 .3.txt /tmp/requirements.txt
179
- RUN /opt/conda/bin/ python3 -m pip install --no-cache-dir -r requirements.txt
180
- COPY ./requirements.22.09 .4.txt /tmp/requirements.txt
181
- RUN /opt/conda/bin/ python3 -m pip install --no-cache-dir -r requirements.txt
182
- COPY ./requirements.22.09 .5.txt /tmp/requirements.txt
183
- RUN /opt/conda/bin/ python3 -m pip install --no-cache-dir -r requirements.txt && \
206
+ COPY ./requirements.22.12 .1.txt /tmp/requirements.txt
207
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt
208
+ COPY ./requirements.22.12 .2.txt /tmp/requirements.txt
209
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt
210
+ COPY ./requirements.22.12 .3.txt /tmp/requirements.txt
211
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt
212
+ COPY ./requirements.22.12 .4.txt /tmp/requirements.txt
213
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt
214
+ COPY ./requirements.22.12 .5.txt /tmp/requirements.txt
215
+ RUN python3 -m pip install --no-cache-dir -r requirements.txt && \
184
216
rm -f /tmp/*.whl /tmp/requirem*
185
217
186
- # Install Open MPI
187
- RUN mkdir /tmp/openmpi && \
188
- cd /tmp/openmpi && \
189
- wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.4.tar.gz && \
190
- tar zxf openmpi-4.1.4.tar.gz && \
191
- cd openmpi-4.1.4 && \
192
- ./configure --enable-orterun-prefix-by-default && \
193
- make -j $(nproc) all && \
194
- make install && \
195
- ldconfig && \
196
- rm -rf /tmp/openmpi* && \
197
- # Create a wrapper for OpenMPI to allow running as root by default
198
- mv /usr/local/bin/mpirun /usr/local/bin/mpirun.real && \
199
- echo '#!/bin/bash' > /usr/local/bin/mpirun && \
200
- echo 'mpirun.real --allow-run-as-root "$@"' >> /usr/local/bin/mpirun && \
201
- chmod a+x /usr/local/bin/mpirun && \
202
- # Configure OpenMPI to run good defaults:
203
- echo "btl_tcp_if_exclude = lo,docker0" >> /usr/local/etc/openmpi-mca-params.conf
204
-
205
- # Install Horovod, temporarily using CUDA stubs
206
- RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
207
- LD_LIBRARY_PATH="/usr/local/cuda/compat/lib:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/lib:/usr/local/cuda/lib64:/usr/local/nvidia/lib64:/usr/local/cuda-11.8/include:/usr/include/x86_64-linux-gnu:/usr/include:$LD_LIBRARY_PATH" \
208
- HOROVOD_NCCL_LINK=SHARED \
209
- HOROVOD_WITH_MPI=1 \
210
- HOROVOD_GPU_ALLREDUCE=NCCL \
211
- NCCL_LIBRARY=/usr/include \
212
- HOROVOD_NCCL_INCLUDE=/usr/include \
213
- HOROVOD_NCCL_LIB=/lib/x86_64-linux-gnu \
214
- HOROVOD_GPU_BROADCAST=NCCL \
215
- HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
216
- pip install --no-cache-dir horovod==0.24.2 && \
217
- ldconfig
218
-
219
218
RUN python3 -m pip install --no-cache-dir \
220
219
mpi4py==3.1.3 \
221
220
nni==2.9 \
222
221
mlflow==1.29.0 \
223
222
scikit-nni==0.2.1
224
223
225
- RUN jupyter nbextensions_configurator enable && \
224
+ RUN python3 -m pip install pytorch-lightning && \
225
+ python3 -m pip install --no-cache-dir \
226
+ torch-scatter \
227
+ torch-sparse \
228
+ torch-cluster \
229
+ torch-spline-conv \
230
+ torch-geometric \
231
+ -f https://pytorch-geometric.com/whl/torch-1.13.1%2Bcu117.html
232
+ RUN pip install jupyter_contrib_nbextensions markupsafe==2.0.1 && \
233
+ jupyter nbextensions_configurator enable && \
226
234
jupyter contrib nbextension install && \
227
235
jupyter nbextension enable --py --sys-prefix widgetsnbextension && \
228
236
jupyter serverextension enable --py jupyterlab --sys-prefix && \
@@ -240,11 +248,8 @@ RUN apt autoclean && \
240
248
rm -rf /root/.cache && \
241
249
rm -rf /tmp/*
242
250
243
- RUN /opt/conda/bin/python3 -m ipykernel install \
244
- --prefix=/opt/conda/ \
245
- --display-name "PyTorch 1.13 (NGC 22.09/Python 3.8 Conda) on Backend.AI" && \
246
- cat /opt/conda/share/jupyter/kernels/python3/kernel.json
247
- ENV JUPYTER_PATH="/opt/conda/share/jupyter"
251
+ Run python3 -m ipykernel install --display-name "PyTorch 1.14 (NGC 22.12/Python 3.8) on Backend.AI" && \
252
+ cat /usr/local/share/jupyter/kernels/python3/kernel.json
248
253
249
254
# Backend.AI specifics
250
255
COPY ./service-defs /etc/backend.ai/service-defs
@@ -259,9 +264,8 @@ LABEL ai.backend.kernelspec="1" \
259
264
ai.backend.resource.min.mem="1g" \
260
265
ai.backend.resource.min.cuda.device=1 \
261
266
ai.backend.resource.min.cuda.shares=0 \
262
- ai.backend.base-distro="ubuntu22.04" \
263
267
ai.backend.runtime-type="python" \
264
- ai.backend.runtime-path="/opt/conda/ bin/python3 " \
268
+ ai.backend.runtime-path="/usr/ bin/python " \
265
269
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
266
270
267
271
WORKDIR /home/work
0 commit comments