Skip to content

Commit b515934

Browse files
committed
PythonLighting 1.7 (python 3.8, cuda 11.1)
1 parent b3b9075 commit b515934

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
ARG CUDNN_VERSION=8
2+
ARG CUDA_VERSION=11.1
3+
4+
# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
5+
FROM nvidia/cuda:${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu18.04
6+
# FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
7+
8+
ARG PYTHON_VERSION=3.8
9+
ARG PYTORCH_VERSION=1.7
10+
ARG CONDA_VERSION=4.9.2
11+
12+
SHELL ["/bin/bash", "-c"]
13+
14+
ENV PATH="$PATH:/root/.local/bin"
15+
16+
WORKDIR /opt
17+
RUN apt-get update -qq && \
18+
apt-get install -y --no-install-recommends \
19+
build-essential \
20+
cmake \
21+
git \
22+
wget \
23+
curl \
24+
unzip \
25+
ca-certificates \
26+
libopenmpi-dev \
27+
&& \
28+
29+
# Install conda and python.
30+
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
31+
32+
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh -O miniconda.sh && \
33+
mkdir -p /opt && \
34+
sh miniconda.sh -b -p /opt/conda && \
35+
rm miniconda.sh && \
36+
ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
37+
echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \
38+
echo "conda activate base" >> ~/.bashrc && \
39+
find /opt/conda/ -follow -type f -name '*.a' -delete && \
40+
find /opt/conda/ -follow -type f -name '*.js.map' -delete && \
41+
/opt/conda/bin/conda clean -afy && \
42+
43+
update-alternatives --install /opt/conda/bin/python python /opt/conda/bin/python3 2 && \
44+
45+
curl -sL https://deb.nodesource.com/setup_14.x | bash - && \
46+
apt-get update -y && \
47+
apt-get install -y nodejs && \
48+
49+
50+
# Cleaning
51+
apt-get autoremove -y && \
52+
apt-get clean && \
53+
rm -rf /root/.cache && \
54+
rm -rf /var/lib/apt/lists/*
55+
56+
ENV \
57+
LD_LIBRARY_PATH="/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/include/x86_64-linux-gnu:/opt/miniconda3/lib" \
58+
PATH="/usr/local/nvidia/bin:/usr/local/cuda/bin:/opt/conda/bin:/usr/local/sbin:/usr/bin/cmake/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/tensorrt/bin:/opt/miniconda3/bin:$PATH" \
59+
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
60+
DEBIAN_FRONTEND=noninteractive \
61+
MPLBACKEND=Svg \
62+
PYTHONUNBUFFERED=1 \
63+
LIBRARY_PATH=/usr/local/cuda/lib64/stubs \
64+
_CUDA_COMPAT_PATH="/usr/local/cuda/compat" \
65+
LANG=C.UTF-8 \
66+
MKL_THREADING_LAYER=GNU \
67+
HOROVOD_GPU_OPERATIONS=NCCL \
68+
HOROVOD_WITH_PYTORCH=1 \
69+
HOROVOD_WITHOUT_TENSORFLOW=1 \
70+
HOROVOD_WITHOUT_MXNET=1 \
71+
HOROVOD_WITH_GLOO=1 \
72+
HOROVOD_WITHOUT_MPI=1 \
73+
# MAKEFLAGS="-j$(nproc)" \
74+
MAKEFLAGS="-j1" \
75+
TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \
76+
CONDA_ENV=lightning
77+
78+
COPY environment.yml environment.yml
79+
80+
# conda init
81+
RUN conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c pytorch -c pytorch-test -c pytorch-nightly && \
82+
conda init bash && \
83+
# NOTE: this requires that the channel is presented in the yaml before packages
84+
# replace channel to nigtly if needed, fix PT version and remove Horovod as it will be installed later
85+
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- python[>=]+[\d\.]+', '# - python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
86+
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
87+
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- horovod[>=]+[\d\.]+', '# - horovod', open(fname).read()) ; open(fname, 'w').write(req)" && \
88+
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if 'horovod' not in ln])" && \
89+
cat environment.yml && \
90+
conda env update --name $CONDA_ENV --file environment.yml && \
91+
conda clean -ya && \
92+
rm environment.yml
93+
94+
ENV \
95+
PATH /opt/miniconda3/envs/${CONDA_ENV}/bin:$PATH \
96+
LD_LIBRARY_PATH="/opt/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH" \
97+
# if you want this environment to be the default one, uncomment the following line:
98+
CONDA_DEFAULT_ENV=${CONDA_ENV}
99+
100+
COPY ./extra.txt requirements-extra.txt
101+
COPY ./test.txt requirements-test.txt
102+
COPY ./adjust_versions.py requirements_adjust_versions.py
103+
104+
RUN \
105+
pip list | grep torch && \
106+
python -c "import torch; print(torch.__version__)" && \
107+
python requirements_adjust_versions.py requirements-extra.txt && \
108+
# Install remaining requirements
109+
pip install -r requirements-extra.txt --no-cache-dir && \
110+
pip install -r requirements-test.txt --no-cache-dir && \
111+
rm requirements*
112+
113+
RUN \
114+
# install DALI, needed for examples
115+
pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda${CUDA_VERSION%%.*}0
116+
117+
RUN \
118+
# install NVIDIA AMP
119+
git clone https://github.com/NVIDIA/apex && \
120+
pip install --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex && \
121+
rm -rf apex
122+
123+
124+
# install git-lfs
125+
WORKDIR /tmp
126+
RUN curl -sLO https://github.com/git-lfs/git-lfs/releases/download/v2.13.3/git-lfs-linux-amd64-v2.13.3.tar.gz && \
127+
tar -zxf git-lfs-linux-amd64-v2.13.3.tar.gz && \
128+
bash install.sh && \
129+
rm -rf /tmp/*
130+
131+
RUN curl -fL https://github.com/cdr/code-server/releases/download/v3.9.3/code-server-3.9.3-linux-amd64.tar.gz | tar -C /usr/local/lib -xz && \
132+
mv /usr/local/lib/code-server-3.9.3-linux-amd64 /usr/local/lib/code-server-3.9.2 && \
133+
ln -s /usr/local/lib/code-server-3.9.3/bin/code-server /usr/local/bin/code-server
134+
135+
RUN /opt/conda/bin/python3 -m ipykernel install \
136+
--prefix=/opt/conda/ \
137+
--display-name "PyTorch 1.7 (Lightning/Python 3.8 Conda) on Backend.AI" && \
138+
cat /opt/conda/share/jupyter/kernels/python3/kernel.json
139+
140+
COPY ./service-defs /etc/backend.ai/service-defs
141+
COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/
142+
143+
LABEL ai.backend.kernelspec="1" \
144+
ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
145+
ai.backend.features="batch query uid-match user-input" \
146+
ai.backend.base-distro="ubuntu16.04" \
147+
ai.backend.accelerators="cuda" \
148+
ai.backend.resource.min.cpu="1" \
149+
ai.backend.resource.min.mem="1g" \
150+
ai.backend.resource.min.cuda.device=1 \
151+
ai.backend.resource.min.cuda.shares=0.1 \
152+
ai.backend.base-distro="ubuntu16.04" \
153+
ai.backend.runtime-type="python" \
154+
ai.backend.runtime-path="/opt/conda/bin/python3" \
155+
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8091,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
156+
157+
RUN \
158+
# Show what we have
159+
pip --version && \
160+
conda info && \
161+
pip list && \
162+
python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \
163+
python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__"
164+
165+
166+
WORKDIR /home/work
167+
# vim: ft=dockerfile

0 commit comments

Comments
 (0)