1
- FROM lablup/common-base:20.07- py36-cuda10.1
1
+ FROM lablup/common-base:py36-cuda10.1
2
2
3
3
ARG TF_BUILD_VERSION=r2.3
4
4
# Install the most recent bazel release.
@@ -80,13 +80,6 @@ RUN cp .bazelrc /root/.bazelrc
80
80
81
81
ENV CI_BUILD_PYTHON ${PYTHON}
82
82
ENV WHL_DIR=/tmp/pip3
83
- # Set bazel build parameters in .bazelrc in parameterized_docker_build.sh
84
- # Use --copt=-march values to get optimized builds appropriate for the hardware
85
- # platform of your choice.
86
- # For ivy-bridge or sandy-bridge
87
- # --copt=-march="avx" \
88
- # For haswell, broadwell, or skylake
89
- # --copt=-march="avx2" \
90
83
COPY .bazelrc /root/.mkl.bazelrc
91
84
RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc
92
85
@@ -132,33 +125,27 @@ RUN python3 -m pip --no-cache-dir install \
132
125
133
126
# Clean up Bazel cache when done.
134
127
128
+ RUN python3 -m pip install --extra-index-url \
129
+ https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 \
130
+ && \
131
+ python3 -m pip install --extra-index-url \
132
+ https://developer.download.nvidia.com/compute/redist nvidia-dali-tf-plugin-cuda100
133
+
135
134
# Install Horovod, temporarily using CUDA stubs
136
- RUN HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
137
- pip install --no-cache-dir horovod==0.19.5 && \
135
+ RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
136
+ HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_NCCL_LINK=SHARED \
137
+ HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1\
138
+ HOROVOD_GPU=CUDA \
139
+ python3 -m pip install --no-cache-dir horovod==0.21.1 && \
138
140
ldconfig
139
141
140
142
RUN python3 -m pip install --no-cache-dir \
141
- mpi4py==3.0.3 \
142
- nni==1.6 \
143
- scikit-nni==0.2.1
144
-
145
- RUN python3 -m pip install --no-cache-dir --extra-index-url \
146
- https://developer.download.nvidia.com/compute/redist \
147
- nvidia-dali-cuda110
148
- # RUN python3 -m pip install --no-cache-dir \
149
- # --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-tf-plugin-cuda110
150
- # WORKDIR /tmp
151
- # RUN git clone --recursive https://github.com/NVIDIA/DALI && \
152
- # cd DALI && \
153
- # mkdir build && \
154
- # cd build && \
155
- # cmake -D CMAKE_BUILD_TYPE=Release .. && \
156
- # make -j"$(nproc)" && \
157
- # cd .. && \
158
- # python3 -m pip install --no-cache-dir dali/python
159
-
160
-
161
-
143
+ mpi4py==3.0.3 \
144
+ nni==1.9 \
145
+ mlflow==1.12.1 \
146
+ scikit-nni==0.2.1
147
+
148
+ COPY ./service-defs /etc/backend.ai/service-defs
162
149
# Install ipython kernelspec
163
150
Run python3 -m ipykernel install --display-name "TensorFlow 2.3 on Python 3.6 & CUDA 10.1" && \
164
151
cat /usr/local/share/jupyter/kernels/python3/kernel.json
@@ -174,7 +161,7 @@ LABEL ai.backend.kernelspec="1" \
174
161
ai.backend.resource.min.cuda.shares=0 \
175
162
ai.backend.runtime-type="python" \
176
163
ai.backend.runtime-path="/usr/bin/python3" \
177
- ai.backend.service-ports="ipython:pty:3000,jupyter:http:8080 ,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006"
164
+ ai.backend.service-ports="ipython:pty:3000,jupyter:http:8081 ,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080 "
178
165
179
166
WORKDIR /home/work
180
167
# vim: ft=dockerfile
0 commit comments