pytorch 1.11 py38 cuda11.3

hephaex · hephaex · commit 8d6c928f09a1 · 2022-03-11T23:42:33.000+09:00
diff --git a/python-pytorch/Dockerfile.1.11-py38-cuda11.3 b/python-pytorch/Dockerfile.1.11-py38-cuda11.3
@@ -0,0 +1,83 @@
+FROM lablup/common-base:py38-cuda11.3
+
+# Install PyTorch
+ENV PYTORCH_VERSION=1.11.0
+ENV TORCHVISION_VERSION=0.12.0
+ENV TORCHAUDIO_VERSION=0.9.0
+ENV TORCHTEXT_VERSION=0.10.0
+ENV TENSORBOARDX_VERSION=2.1
+
+RUN python3 -m pip uninstall -y torch && \
+    python3 -m pip install -U pip==21.1.2 setuptools==57.0.0 && \
+    python3 -m pip install --no-cache-dir \
+        http://download.pytorch.org/whl/cu113/torch-1.11.0%2Bcu113-cp38-cp38-linux_x86_64.whl \
+        http://download.pytorch.org/whl/cu102/torchvision-0.12.0%2Bcu102-cp38-cp38-linux_x86_64.whl \
+	http://download.pytorch.org/whl/cu113/torchaudio-0.11.0%2Bcu113-cp38-cp38-linux_x86_64.whl \
+	http://download.pytorch.org/whl/torchserve-0.3.0-py2.py3-none-any.whl \
+        http://download.pytorch.org/whl/torchtext-0.12.0-cp38-cp38-linux_x86_64.whl \
+	http://download.pytorch.org/whl/cu101/torchcsprng-0.2.1%2Bcu101-cp38-cp38-linux_x86_64.whl \
+	http://download.pytorch.org/whl/torch_model_archiver-0.3.0-py2.py3-none-any.whl \
+	http://download.pytorch.org/whl/torch_tb_profiler-0.1.0-py3-none-any.whl \
+	jsonargparse \
+	torchmetrics \
+	pyDeprecate \
+	fsspec[http]>=2021.05.0 \
+	PyYAML>=5.1 \
+	pretrainedmodels \
+	cnn-finetune \
+	keras4torch \
+	pytorch-lightning 
+
+# torch2trt PyTorch to TensorRT converter which utilizes the TensorRT Python API
+WORKDIR /tmp
+RUN git clone --branch=v0.3.0 https://github.com/NVIDIA-AI-IOT/torch2trt /tmp/torch2trt && \
+    cd /tmp/torch2trt && \
+    python3 setup.py install --plugins && \
+    rm -fr /tmp/torch2trt  
+
+RUN python3 -m pip install --extra-index-url \
+       https://developer.download.nvidia.com/compute/redist --upgrade nvidia-dali-cuda110
+
+# Install Horovod, temporarily using CUDA stubs
+RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
+    HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL \
+    HOROVOD_WITHOUT_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
+    pip install --no-cache-dir horovod==0.24.2 && \
+    ldconfig
+
+RUN python3 -m pip install --no-cache-dir \
+    	mpi4py==3.1.3 \
+        mlflow==1.24.0 \
+	nni==2.6.1 \
+	tensorboard-plugin-wit \
+	scikit-nni==0.2.1
+
+RUN apt autoclean && \
+    sed -i 's/source \/usr\/local\/nvm\/nvm.sh//' /etc/bash.bashrc && \
+    ln -sf /usr/share/terminfo/x/xterm-color /usr/share/terminfo/x/xterm-256color && \
+    rm -rf /var/lib/apt/lists/* && \	
+    rm -rf /root/.cache && \
+    rm -rf /tmp/*
+
+COPY ./service-defs /etc/backend.ai/service-defs
+COPY ./runner-scripts/bootstrap.sh runner-scripts/setup_multinode.py /opt/container/
+
+# Install ipython kernelspec
+Run python3 -m ipykernel install --display-name "PyTorch 1.11.0 on Python 3.8 (CUDA 11.3)" && \
+    cat /usr/local/share/jupyter/kernels/python3/kernel.json
+
+# Backend.AI specifics
+LABEL ai.backend.kernelspec="1" \
+      ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
+      ai.backend.features="batch query uid-match user-input" \
+      ai.backend.base-distro="ubuntu16.04" \
+      ai.backend.resource.min.cpu="1" \
+      ai.backend.resource.min.mem="1g" \
+      ai.backend.resource.min.cuda.device=0 \
+      ai.backend.resource.min.cuda.shares=0 \
+      ai.backend.runtime-type="python" \
+      ai.backend.runtime-path="/usr/bin/python3" \
+      ai.backend.service-ports="ipython:pty:3000,jupyter:http:8081,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
+
+WORKDIR /home/work
+# vim: ft=dockerfile