|
| 1 | +ARG CUDNN_VERSION=8 |
| 2 | +ARG CUDA_VERSION=11.1 |
| 3 | + |
| 4 | +# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 |
| 5 | +FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04 |
| 6 | +# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04 |
| 7 | + |
| 8 | +ARG PYTHON_VERSION=3.8 |
| 9 | +ARG PYTORCH_VERSION=1.7 |
| 10 | +ARG CONDA_VERSION=4.9.2 |
| 11 | + |
| 12 | +SHELL ["/bin/bash", "-c"] |
| 13 | + |
| 14 | +ENV PATH="$PATH:/root/.local/bin" |
| 15 | + |
| 16 | +WORKDIR /opt |
| 17 | +RUN apt-get update -qq && \ |
| 18 | + apt-get install -y --no-install-recommends \ |
| 19 | + build-essential \ |
| 20 | + cmake \ |
| 21 | + git \ |
| 22 | + wget \ |
| 23 | + curl \ |
| 24 | + unzip \ |
| 25 | + ca-certificates \ |
| 26 | + libopenmpi-dev \ |
| 27 | + && \ |
| 28 | + |
| 29 | +# Install conda and python. |
| 30 | +# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 |
| 31 | + |
| 32 | + wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh -O miniconda.sh && \ |
| 33 | + mkdir -p /opt && \ |
| 34 | + sh miniconda.sh -b -p /opt/conda && \ |
| 35 | + rm miniconda.sh && \ |
| 36 | + ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ |
| 37 | + echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ |
| 38 | + echo "conda activate base" >> ~/.bashrc && \ |
| 39 | + find /opt/conda/ -follow -type f -name '*.a' -delete && \ |
| 40 | + find /opt/conda/ -follow -type f -name '*.js.map' -delete && \ |
| 41 | + /opt/conda/bin/conda clean -afy && \ |
| 42 | + |
| 43 | + update-alternatives --install /opt/conda/bin/python python /opt/conda/bin/python3 2 && \ |
| 44 | + |
| 45 | + curl -sL https://deb.nodesource.com/setup_14.x | bash - && \ |
| 46 | + apt-get update -y && \ |
| 47 | + apt-get install -y nodejs && \ |
| 48 | + |
| 49 | + |
| 50 | +# Cleaning |
| 51 | + apt-get autoremove -y && \ |
| 52 | + apt-get clean && \ |
| 53 | + rm -rf /root/.cache && \ |
| 54 | + rm -rf /var/lib/apt/lists/* |
| 55 | + |
| 56 | +ENV \ |
| 57 | + LD_LIBRARY_PATH="/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/include/x86_64-linux-gnu:/opt/miniconda3/lib" \ |
| 58 | + PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/conda/bin:/usr/local/sbin:/usr/bin/cmake/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/tensorrt/bin:/opt/miniconda3/bin:$PATH" \ |
| 59 | + CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \ |
| 60 | + DEBIAN_FRONTEND=noninteractive \ |
| 61 | + MPLBACKEND=Svg \ |
| 62 | + PYTHONUNBUFFERED=1 \ |
| 63 | + LIBRARY_PATH=/usr/local/cuda/lib64/stubs \ |
| 64 | + _CUDA_COMPAT_PATH="/usr/local/cuda/compat" \ |
| 65 | + LANG=C.UTF-8 \ |
| 66 | + MKL_THREADING_LAYER=GNU \ |
| 67 | + HOROVOD_GPU_OPERATIONS=NCCL \ |
| 68 | + HOROVOD_WITH_PYTORCH=1 \ |
| 69 | + HOROVOD_WITHOUT_TENSORFLOW=1 \ |
| 70 | + HOROVOD_WITHOUT_MXNET=1 \ |
| 71 | + HOROVOD_WITH_GLOO=1 \ |
| 72 | + HOROVOD_WITHOUT_MPI=1 \ |
| 73 | + # MAKEFLAGS="-j$(nproc)" \ |
| 74 | + MAKEFLAGS="-j1" \ |
| 75 | + TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \ |
| 76 | + CONDA_ENV=lightning |
| 77 | + |
| 78 | +COPY environment.yml environment.yml |
| 79 | + |
| 80 | +# conda init |
| 81 | +RUN conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c pytorch -c pytorch-test -c pytorch-nightly && \ |
| 82 | + conda init bash && \ |
| 83 | + # NOTE: this requires that the channel is presented in the yaml before packages |
| 84 | + # replace channel to nigtly if needed, fix PT version and remove Horovod as it will be installed later |
| 85 | + python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- python[>=]+[\d\.]+', '# - python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \ |
| 86 | + python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \ |
| 87 | + python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- horovod[>=]+[\d\.]+', '# - horovod', open(fname).read()) ; open(fname, 'w').write(req)" && \ |
| 88 | + python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if 'horovod' not in ln])" && \ |
| 89 | + cat environment.yml && \ |
| 90 | + conda env update --name $CONDA_ENV --file environment.yml && \ |
| 91 | + conda clean -ya && \ |
| 92 | + rm environment.yml |
| 93 | + |
| 94 | +ENV \ |
| 95 | + PATH /opt/miniconda3/envs/${CONDA_ENV}/bin:$PATH \ |
| 96 | + LD_LIBRARY_PATH="/opt/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH" \ |
| 97 | + # if you want this environment to be the default one, uncomment the following line: |
| 98 | + CONDA_DEFAULT_ENV=${CONDA_ENV} |
| 99 | + |
| 100 | +COPY ./extra.txt requirements-extra.txt |
| 101 | +COPY ./test.txt requirements-test.txt |
| 102 | +COPY ./adjust_versions.py requirements_adjust_versions.py |
| 103 | + |
| 104 | +RUN \ |
| 105 | + pip list | grep torch && \ |
| 106 | + python -c "import torch; print(torch.__version__)" && \ |
| 107 | + python requirements_adjust_versions.py requirements-extra.txt && \ |
| 108 | + # Install remaining requirements |
| 109 | + pip install -r requirements-extra.txt --no-cache-dir && \ |
| 110 | + pip install -r requirements-test.txt --no-cache-dir && \ |
| 111 | + rm requirements* |
| 112 | + |
| 113 | +RUN \ |
| 114 | + # install DALI, needed for examples |
| 115 | + pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0 |
| 116 | + |
| 117 | +RUN \ |
| 118 | + # install NVIDIA AMP |
| 119 | + git clone https://github.com/NVIDIA/apex && \ |
| 120 | + pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \ |
| 121 | + rm -rf apex |
| 122 | + |
| 123 | + |
| 124 | +# install git-lfs |
| 125 | +WORKDIR /tmp |
| 126 | +RUN curl -sLO https://github.com/git-lfs/git-lfs/releases/download/v2.13.3/git-lfs-linux-amd64-v2.13.3.tar.gz && \ |
| 127 | + tar -zxf git-lfs-linux-amd64-v2.13.3.tar.gz && \ |
| 128 | + bash install.sh && \ |
| 129 | + rm -rf /tmp/* |
| 130 | + |
| 131 | +RUN curl -fL https://github.com/cdr/code-server/releases/download/v3.9.3/code-server-3.9.3-linux-amd64.tar.gz | tar -C /usr/local/lib -xz && \ |
| 132 | + mv /usr/local/lib/code-server-3.9.3-linux-amd64 /usr/local/lib/code-server-3.9.2 && \ |
| 133 | + ln -s /usr/local/lib/code-server-3.9.3/bin/code-server /usr/local/bin/code-server |
| 134 | + |
| 135 | +RUN /opt/conda/bin/python3 -m ipykernel install \ |
| 136 | + --prefix=/opt/conda/ \ |
| 137 | + --display-name "PyTorch 1.7 (Lightning/Python 3.8 Conda) on Backend.AI" && \ |
| 138 | + cat /opt/conda/share/jupyter/kernels/python3/kernel.json |
| 139 | + |
| 140 | +COPY ./service-defs /etc/backend.ai/service-defs |
| 141 | +COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/ |
| 142 | + |
| 143 | +LABEL ai.backend.kernelspec="1" \ |
| 144 | + ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \ |
| 145 | + ai.backend.features="batch query uid-match user-input" \ |
| 146 | + ai.backend.base-distro="ubuntu16.04" \ |
| 147 | + ai.backend.accelerators="cuda" \ |
| 148 | + ai.backend.resource.min.cpu="1" \ |
| 149 | + ai.backend.resource.min.mem="1g" \ |
| 150 | + ai.backend.resource.min.cuda.device=1 \ |
| 151 | + ai.backend.resource.min.cuda.shares=0.1 \ |
| 152 | + ai.backend.base-distro="ubuntu16.04" \ |
| 153 | + ai.backend.runtime-type="python" \ |
| 154 | + ai.backend.runtime-path="/opt/conda/bin/python3" \ |
| 155 | + ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080" |
| 156 | + |
| 157 | +RUN \ |
| 158 | + # Show what we have |
| 159 | + pip --version && \ |
| 160 | + conda info && \ |
| 161 | + pip list && \ |
| 162 | + python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ |
| 163 | + python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__" |
| 164 | + |
| 165 | + |
| 166 | +WORKDIR /home/work |
| 167 | +# vim: ft=dockerfile |
0 commit comments