|
| 1 | +FROM lablup/common-base:20.12-py36-cuda10.1 |
| 2 | + |
| 3 | +ARG TF_BUILD_VERSION=r2.4 |
| 4 | +# Install the most recent bazel release. |
| 5 | +ENV BAZEL_VERSION 3.1.0 |
| 6 | + |
| 7 | +# Set up Bazel. |
| 8 | + |
| 9 | +# Running bazel inside a `docker build` command causes trouble, cf: |
| 10 | +# https://github.com/bazelbuild/bazel/issues/134 |
| 11 | +# The easiest solution is to set up a bazelrc file forcing --batch. |
| 12 | +RUN echo "startup --batch" >>/etc/bazel.bazelrc |
| 13 | +# Similarly, we need to workaround sandboxing issues: |
| 14 | +# https://github.com/bazelbuild/bazel/issues/418 |
| 15 | +RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ |
| 16 | + >>/etc/bazel.bazelrc |
| 17 | + |
| 18 | +WORKDIR / |
| 19 | +RUN mkdir /bazel && \ |
| 20 | + cd /bazel && \ |
| 21 | + curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ |
| 22 | + curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \ |
| 23 | + chmod +x bazel-*.sh && \ |
| 24 | + ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ |
| 25 | + cd / && \ |
| 26 | + rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh |
| 27 | + |
| 28 | + |
| 29 | +RUN python3 -m pip install --no-cache-dir -U \ |
| 30 | + mesh-tensorflow==0.1.16 \ |
| 31 | + cachetools==3.1.0 \ |
| 32 | + tensorflow-transform==0.26.0 \ |
| 33 | + tensorflow_model_analysis==0.22.2 \ |
| 34 | + ngraph-tensorflow-bridge==0.18.0 \ |
| 35 | + tensorflow-metadata==0.26.0 \ |
| 36 | + tensorflow-datasets==3.2.0 \ |
| 37 | + tensorflow_probability==0.10.1 \ |
| 38 | + tensorwatch==0.9.1 \ |
| 39 | + tensorflow-hub==0.8.0 \ |
| 40 | + tensorflow-addons==0.10.0 \ |
| 41 | + tensorflow_text==2.3.0 \ |
| 42 | + neural-structured-learning==1.1.0 \ |
| 43 | + tensorflow_constrained_optimization \ |
| 44 | + tensorflow-graphics-gpu==1.0.0 \ |
| 45 | + tensorflow-gan==2.0.0 \ |
| 46 | + tensorflow-data-validation==0.26.0 \ |
| 47 | + tensorflow-model-optimization==0.3.0 |
| 48 | +RUN python3 -m pip install --no-cache-dir --upgrade \ |
| 49 | + jupyter-tensorboard==0.2.0 \ |
| 50 | + tf-agents==0.5.0 \ |
| 51 | + tf-slim==1.1.0 \ |
| 52 | + tensorflow-plot==0.3.2 \ |
| 53 | + tensorflow-lattice==2.0.5 \ |
| 54 | + tensorflow-io==0.15.0 \ |
| 55 | + tfx==0.22.1 \ |
| 56 | + tfx-bsl==0.22.1 |
| 57 | +RUN python3 -m pip install --no-cache-dir \ |
| 58 | + tensorflow_ranking==0.3.1 \ |
| 59 | + tensorflow-compression==1.3 \ |
| 60 | + tensor2tensor==1.15.7 \ |
| 61 | + jupyterlab-nvdashboard==0.3.1 |
| 62 | + |
| 63 | +# Download and build TensorFlow. |
| 64 | +WORKDIR /tensorflow |
| 65 | + |
| 66 | +# Download and build TensorFlow. |
| 67 | +# Enable checking out both tags and branches |
| 68 | +RUN export TAG_PREFIX="v" && \ |
| 69 | + echo ${TF_BUILD_VERSION} | grep -q ^${TAG_PREFIX}; \ |
| 70 | + if [ $? -eq 0 ]; then \ |
| 71 | + git clone --depth=1 https://github.com/tensorflow/tensorflow.git . && \ |
| 72 | + git fetch --tags && \ |
| 73 | + git checkout ${TF_BUILD_VERSION}; \ |
| 74 | + else \ |
| 75 | + git clone --depth=1 --branch=${TF_BUILD_VERSION} https://github.com/tensorflow/tensorflow.git . ; \ |
| 76 | + fi |
| 77 | + |
| 78 | +RUN yes "" | python3 configure.py |
| 79 | +RUN cp .bazelrc /root/.bazelrc |
| 80 | + |
| 81 | +ENV CI_BUILD_PYTHON ${PYTHON} |
| 82 | +ENV WHL_DIR=/tmp/pip3 |
| 83 | +# Set bazel build parameters in .bazelrc in parameterized_docker_build.sh |
| 84 | +# Use --copt=-march values to get optimized builds appropriate for the hardware |
| 85 | +# platform of your choice. |
| 86 | +# For ivy-bridge or sandy-bridge |
| 87 | +# --copt=-march="avx" \ |
| 88 | +# For haswell, broadwell, or skylake |
| 89 | +# --copt=-march="avx2" \ |
| 90 | +COPY .bazelrc /root/.mkl.bazelrc |
| 91 | +RUN echo "import /root/.mkl.bazelrc" >>/root/.bazelrc |
| 92 | + |
| 93 | +#ENV TF_NEED_TENSORRT=1 |
| 94 | +ENV TF_CUDA_COMPUTE_CAPABILITIES sm_35,sm_37,sm_52,sm_60,sm_61,sm_70,sm_75,compute_70,compute_75 |
| 95 | + |
| 96 | +RUN tensorflow/tools/ci_build/builds/configured GPU \ |
| 97 | + bazel --bazelrc=/root/.bazelrc build \ |
| 98 | + -c opt \ |
| 99 | + --copt=-msse4.1 \ |
| 100 | + --copt=-msse4.2 \ |
| 101 | + --copt=-mavx \ |
| 102 | + --copt=-mavx2 \ |
| 103 | + --copt=-mfma \ |
| 104 | + --copt=-mfpmath=both \ |
| 105 | + --copt=-O3 \ |
| 106 | + --copt=-Wformat \ |
| 107 | + --copt=-Wformat-security \ |
| 108 | + --copt=-fstack-protector \ |
| 109 | + --copt=-fPIC \ |
| 110 | + --copt=-fpic \ |
| 111 | + --config=opt \ |
| 112 | + --config=cuda \ |
| 113 | + --config=mkl \ |
| 114 | + --config=monolithic \ |
| 115 | + --config=gdr \ |
| 116 | + --config=verbs \ |
| 117 | +# --config=ngraph \ |
| 118 | + --config=numa \ |
| 119 | + --config=v2 \ |
| 120 | + --linkopt=-znoexecstack \ |
| 121 | + --linkopt=-zrelro \ |
| 122 | + --linkopt=-znow \ |
| 123 | + --linkopt=-fstack-protector \ |
| 124 | + --linkopt -ldl \ |
| 125 | + --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ |
| 126 | + -k //tensorflow/tools/pip_package:build_pip_package && \ |
| 127 | + bazel-bin/tensorflow/tools/pip_package/build_pip_package "${WHL_DIR}" && \ |
| 128 | + python3 -m pip --no-cache-dir install --upgrade "${WHL_DIR}"/tensorflow-*.whl |
| 129 | +RUN python3 -m pip --no-cache-dir install \ |
| 130 | + tensorboard==2.4 && \ |
| 131 | + rm -rf /root/.cache |
| 132 | + |
| 133 | +# Clean up Bazel cache when done. |
| 134 | + |
| 135 | +# Install Horovod, temporarily using CUDA stubs |
| 136 | +RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \ |
| 137 | + HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL HOROVOD_NCCL_LINK=SHARED \ |
| 138 | + HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITHOUT_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1\ |
| 139 | + HOROVOD_GPU=CUDA \ |
| 140 | + python3 -m pip install --no-cache-dir horovod==0.21.0 && \ |
| 141 | + ldconfig |
| 142 | + |
| 143 | +RUN python3 -m pip install --no-cache-dir \ |
| 144 | + mpi4py==3.0.3 \ |
| 145 | + nni==1.9 \ |
| 146 | + mlflow==1.12.1 \ |
| 147 | + scikit-nni==0.2.1 |
| 148 | + |
| 149 | + |
| 150 | +RUN python3 -m pip install --no-cache-dir --extra-index-url \ |
| 151 | + https://developer.download.nvidia.com/compute/redist \ |
| 152 | + nvidia-dali-cuda110 |
| 153 | +#RUN python3 -m pip install --no-cache-dir \ |
| 154 | +# --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-tf-plugin-cuda110 |
| 155 | +#WORKDIR /tmp |
| 156 | +#RUN git clone --recursive https://github.com/NVIDIA/DALI && \ |
| 157 | +# cd DALI && \ |
| 158 | +# mkdir build && \ |
| 159 | +# cd build && \ |
| 160 | +# cmake -D CMAKE_BUILD_TYPE=Release .. && \ |
| 161 | +# make -j"$(nproc)" && \ |
| 162 | +# cd .. && \ |
| 163 | +# python3 -m pip install --no-cache-dir dali/python |
| 164 | + |
| 165 | + |
| 166 | + |
| 167 | +# Install ipython kernelspec |
| 168 | +Run python3 -m ipykernel install --display-name "TensorFlow 2.4 on Python 3.6 & CUDA 10.1" && \ |
| 169 | + cat /usr/local/share/jupyter/kernels/python3/kernel.json |
| 170 | + |
| 171 | +# Backend.AI specifics |
| 172 | +LABEL ai.backend.kernelspec="1" \ |
| 173 | + ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \ |
| 174 | + ai.backend.features="batch query uid-match user-input" \ |
| 175 | + ai.backend.base-distro="ubuntu16.04" \ |
| 176 | + ai.backend.resource.min.cpu="1" \ |
| 177 | + ai.backend.resource.min.mem="1g" \ |
| 178 | + ai.backend.resource.min.cuda.device=0 \ |
| 179 | + ai.backend.resource.min.cuda.shares=0 \ |
| 180 | + ai.backend.runtime-type="python" \ |
| 181 | + ai.backend.runtime-path="/usr/bin/python3" \ |
| 182 | + ai.backend.service-ports="ipython:pty:3000,jupyter:http:8080,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006" |
| 183 | + |
| 184 | +WORKDIR /home/work |
| 185 | +# vim: ft=dockerfile |
0 commit comments