@@ -8,25 +8,27 @@ ENV WORKSPACE_FLAVOR=$ARG_WORKSPACE_FLAVOR
88USER root
99
1010# ## NVIDIA CUDA BASE ###
11- # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18.04-x86_64/base/Dockerfile
12- RUN apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \
13- curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
14- echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
15- echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
11+ # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20.04-x86_64/base/Dockerfile
12+ RUN apt-get update && apt-get install -y --no-install-recommends \
13+ gnupg2 curl ca-certificates && \
14+ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub | apt-key add - && \
15+ echo "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/cuda.list && \
16+ echo "deb https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu2004/x86_64 /" > /etc/apt/sources.list.d/nvidia-ml.list && \
1617 # Cleanup - cannot use cleanup script here, otherwise too much is removed
1718 apt-get clean && \
1819 rm -rf $HOME/.cache/* && \
1920 rm -rf /tmp/* && \
2021 rm -rf /var/lib/apt/lists/*
2122
22- ENV CUDA_VERSION 10.1.243
23- ENV CUDA_PKG_VERSION 10-1=$CUDA_VERSION-1
23+ ENV CUDA_VERSION 11.2.2
24+ # ENV CUDA_PKG_VERSION 11-2=$CUDA_VERSION-1
25+ # ENV CUDART_VERSION 11-2=$CUDA_VERSION46-1
2426
2527# For libraries in the cuda-compat-* package: https://docs.nvidia.com/cuda/eula/index.html#attachment-a
2628RUN apt-get update && apt-get install -y --no-install-recommends \
27- cuda-cudart-$CUDA_PKG_VERSION \
28- cuda-compat-10-1 && \
29- ln -s cuda-10.1 /usr/local/cuda && \
29+ cuda-cudart-11-2=11.2.152-1 \
30+ cuda-compat-11-2 \
31+ && ln -s cuda-11.2 /usr/local/cuda && \
3032 rm -rf /var/lib/apt/lists/* && \
3133 # Cleanup - cannot use cleanup script here, otherwise too much is removed
3234 apt-get clean && \
@@ -35,107 +37,101 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
3537 rm -rf /var/lib/apt/lists/*
3638
3739# Required for nvidia-docker v1
38- RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
39- echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
40+ RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf \
41+ && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
4042
4143ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
42- ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH}
44+ ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
4345
4446# nvidia-container-runtime
4547# https://github.com/NVIDIA/nvidia-container-runtime#environment-variables-oci-spec
4648# nvidia-container-runtime
4749ENV NVIDIA_VISIBLE_DEVICES all
4850ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
49- ENV NVIDIA_REQUIRE_CUDA "cuda>=10.1 brand=tesla,driver>=396 ,driver<397 brand=tesla,driver>=410 ,driver<411 brand=tesla, driver>=418,driver<419 "
51+ ENV NVIDIA_REQUIRE_CUDA "cuda>=11.2 brand=tesla,driver>=418 ,driver<419 brand=tesla,driver>=440 ,driver<441 driver>=450 "
5052
5153# ## CUDA RUNTIME ###
52- # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18 .04-x86_64/runtime/Dockerfile
54+ # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20 .04-x86_64/runtime/Dockerfile
5355
54- ENV NCCL_VERSION 2.7.8
56+ ENV NCCL_VERSION 2.8.4
5557
5658RUN apt-get update && apt-get install -y --no-install-recommends \
57- cuda-libraries-$CUDA_PKG_VERSION \
58- cuda-npp-$CUDA_PKG_VERSION \
59- cuda-nvtx-$CUDA_PKG_VERSION \
60- libcublas10=10.2.1.243-1 \
61- libnccl2=$NCCL_VERSION-1+cuda10.1 && \
62- apt-mark hold libnccl2 && \
59+ cuda-libraries-11-2=11.2.2-1 \
60+ libnpp-11-2=11.3.2.152-1 \
61+ cuda-nvtx-11-2=11.2.152-1 \
62+ libcublas-11-2=11.4.1.1043-1 \
63+ libcusparse-11-2=11.4.1.1152-1 \
64+ libnccl2=$NCCL_VERSION-1+cuda11.2 \
65+ && rm -rf /var/lib/apt/lists/* \
6366 # Cleanup - cannot use cleanup script here, otherwise too much is removed
64- apt-get clean && \
65- rm -rf $HOME/.cache/* && \
66- rm -rf /tmp/* && \
67- rm -rf /var/lib/apt/lists/*
67+ && apt-get clean \
68+ && rm -rf $HOME/.cache/* \
69+ && rm -rf /tmp/* \
70+ && rm -rf /var/lib/apt/lists/*
6871
69- # apt from auto upgrading the cublas package. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88
70- RUN apt-mark hold libcublas10
72+ RUN apt-mark hold libcublas-11-2 libnccl2
7173
7274# ## END CUDA RUNTIME ###
7375
7476# ## CUDA DEVEL ###
75- # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18 .04-x86_64/devel/Dockerfile
77+ # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20 .04-x86_64/devel/Dockerfile
7678RUN apt-get update && apt-get install -y --no-install-recommends \
77- cuda-nvml-dev-$CUDA_PKG_VERSION \
78- cuda-command-line-tools-$CUDA_PKG_VERSION \
79- cuda-nvprof-$CUDA_PKG_VERSION \
80- cuda-npp-dev-$CUDA_PKG_VERSION \
81- cuda-libraries-dev-$CUDA_PKG_VERSION \
82- cuda-minimal-build-$CUDA_PKG_VERSION \
83- libcublas-dev=10.2.1.243-1 \
84- libnccl-dev=$NCCL_VERSION-1+cuda10.1 && \
85- apt-mark hold libnccl-dev && \
79+ libtinfo5 libncursesw5 \
80+ cuda-cudart-dev-11-2=11.2.152-1 \
81+ cuda-command-line-tools-11-2=11.2.2-1 \
82+ cuda-minimal-build-11-2=11.2.2-1 \
83+ cuda-libraries-dev-11-2=11.2.2-1 \
84+ cuda-nvml-dev-11-2=11.2.152-1 \
85+ libnpp-dev-11-2=11.3.2.152-1 \
86+ libnccl-dev=2.8.4-1+cuda11.2 \
87+ libcublas-dev-11-2=11.4.1.1043-1 \
88+ libcusparse-dev-11-2=11.4.1.1152-1 && \
8689 # Cleanup - cannot use cleanup script here, otherwise too much is removed
8790 apt-get clean && \
8891 rm -rf $HOME/.cache/* && \
8992 rm -rf /tmp/* && \
9093 rm -rf /var/lib/apt/lists/*
9194
9295# apt from auto upgrading the cublas package. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88
93- RUN apt-mark hold libcublas-dev
94-
96+ RUN apt-mark hold libcublas-dev-11-2 libnccl-dev
9597ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
9698
9799# ## END CUDA DEVEL ###
98100
99- # ## CUDANN7 DEVEL ###
100- # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/10.1/ubuntu18 .04-x86_64/devel/cudnn7 /Dockerfile
101+ # ## CUDANN8 DEVEL ###
102+ # https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/11.2.2/ubuntu20 .04-x86_64/devel/cudnn8 /Dockerfile
101103
102- ENV CUDNN_VERSION 7.6.5.32
104+ ENV CUDNN_VERSION 8.1.1.33
103105LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
104106
105- RUN apt-get update && \
106- apt-get install -y --no-install-recommends \
107- libcudnn7=$CUDNN_VERSION-1+cuda10.1 \
108- libcudnn7-dev=$CUDNN_VERSION-1+cuda10.1 && \
109- apt-mark hold libcudnn7 && \
107+ RUN apt-get update && apt-get install -y --no-install-recommends \
108+ libcudnn8=$CUDNN_VERSION-1+cuda11.2 \
109+ libcudnn8-dev=$CUDNN_VERSION-1+cuda11.2 \
110+ && apt-mark hold libcudnn8 && \
110111 # Cleanup
111112 apt-get clean && \
112113 rm -rf /root/.cache/* && \
113114 rm -rf /tmp/* && \
114115 rm -rf /var/lib/apt/lists/*
115116
116- # ## END CUDANN7 ###
117+ # ## END CUDANN8 ###
117118
118119# Link Cupti:
119120ENV LD_LIBRARY_PATH ${LD_LIBRARY_PATH}:/usr/local/cuda/extras/CUPTI/lib64
120121
121- # Install TensorRT. Requires that libcudnn7 is installed above.
122- # https://www.tensorflow.org/install/gpu#ubuntu_1804_cuda_101
123- RUN apt-get update && apt-get install -y --no-install-recommends \
124- libnvinfer6=6.0.1-1+cuda10.1 \
125- libnvinfer-dev=6.0.1-1+cuda10.1 \
126- libnvinfer-plugin6=6.0.1-1+cuda10.1 && \
127- # Cleanup
128- clean-layer.sh
129-
130122# ## GPU DATA SCIENCE LIBRARIES ###
131123
132124RUN \
133125 apt-get update && \
134126 apt-get install -y libomp-dev libopenblas-base && \
135- # Not needed? Install cuda-toolkit (e.g. for pytorch: https://pytorch.org/): https://anaconda.org/anaconda/cudatoolkit
136- conda install -y cudatoolkit=10.1 -c pytorch && \
127+ # Install pytorch gpu
128+ # uninstall cpu only packages via conda
129+ conda remove --force -y pytorch cpuonly && \
130+ # https://pytorch.org/get-started/locally/
131+ conda install cudatoolkit=11.2 -c pytorch -c nvidia && \
132+ pip install --no-cache-dir torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html && \
137133 # Install cupy: https://cupy.chainer.org/
138- pip install --no-cache-dir cupy-cuda101 && \
134+ pip install --no-cache-dir cupy-cuda112 && \
139135 # Install pycuda: https://pypi.org/project/pycuda
140136 pip install --no-cache-dir pycuda && \
141137 # Install gpu utils libs
@@ -144,25 +140,19 @@ RUN \
144140 pip install --no-cache-dir scikit-cuda && \
145141 # Install tensorflow gpu
146142 pip uninstall -y tensorflow tensorflow-cpu intel-tensorflow && \
147- # TODO: tensorflow 2.3.1 installs tenorboard 2.4.0 with problems, use 2.3.0
148- pip install --no-cache-dir tensorflow-gpu==2.3.0 && \
143+ pip install --no-cache-dir tensorflow-gpu==2.5.0 && \
149144 # Install ONNX GPU Runtime
150- # TODO: 1.4.x is latest with cuda 10.1 support
151145 pip uninstall -y onnxruntime && \
152- pip install --no-cache-dir onnxruntime-gpu==1.4.0 && \
153- # Install pytorch gpu
154- # uninstall cpu only packages via conda
155- conda remove --force -y pytorch cpuonly && \
156- # https://pytorch.org/get-started/locally/
157- conda install -y pytorch -c pytorch && \
158- # Install faiss gpu
159- conda remove --force -y faiss-cpu && \
160- conda install -y faiss-gpu -c pytorch && \
146+ pip install --no-cache-dir onnxruntime-gpu==1.8.0 onnxruntime-training==1.8.0 && \
147+ # Install faiss gpu - TODO: to large?
148+ # conda remove --force -y faiss-cpu && \
149+ # conda install -y faiss-gpu -c pytorch && \
161150 # Update mxnet to gpu edition
162151 pip uninstall -y mxnet-mkl && \
163- pip install --no-cache-dir mxnet-cu101mkl==1.6.0.post0 && \
152+ # cuda111 -> >= 11.1
153+ pip install --no-cache-dir mxnet-cu112 && \
164154 # install jax: https://github.com/google/jax#pip-installation
165- pip install --upgrade jax jaxlib==0.1.57+cuda101 -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
155+ pip install --upgrade jax[cuda111] -f https://storage.googleapis.com/jax-releases/jax_releases.html && \
166156 # Install pygpu - Required for theano: http://deeplearning.net/software/libgpuarray/
167157 conda install -y pygpu && \
168158 # Install lightgbm
@@ -177,19 +167,6 @@ RUN \
177167 # Cleanup
178168 clean-layer.sh
179169
180- # TODO: nvdashboard does not work with relative paths
181- # RUN \
182- # # Install Jupyterlab GPU Plugin: https://github.com/rapidsai/jupyterlab-nvdashboard
183- # pip install jupyterlab-nvdashboard && \
184- # jupyter labextension install jupyterlab-nvdashboard && \
185- # # Clean jupyter lab cache: https://github.com/jupyterlab/jupyterlab/issues/4930
186- # jupyter lab clean && \
187- # jlpm cache clean && \
188- # # Remove build folder -> should be remove by lab clean as well?
189- # rm -rf $CONDA_ROOT/share/jupyter/lab/staging && \
190- # # Cleanup
191- # clean-layer.sh
192-
193170# TODO install DALI: https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html#dali-and-ngc
194171# TODO: if > Ubuntu 19.04 -> install nvtop: https://github.com/Syllo/nvtop
195172# TODO: Install Arrrayfire: https://arrayfire.com/download/ pip install --no-cache-dir arrayfire && \
0 commit comments