Skip to content

Commit 4957249

Browse files
committed
Pytorch 1.7
1 parent ae3e361 commit 4957249

File tree

10 files changed

+186
-0
lines changed

10 files changed

+186
-0
lines changed
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
FROM lablup/common-base:py38-cuda11.1
2+
# Install PyTorch and MXNet
3+
ENV PYTORCH_VERSION=1.7.1
4+
ENV TORCHVISION_VERSION=0.8.2
5+
ENV TORCHAUDIO_VERSION=0.7.2
6+
ENV TORCHTEXT_VERSION=0.8.1
7+
ENV TENSORBOARDX_VERSION=2.1
8+
ENV MXNET_VERSION=1.6.0
9+
10+
RUN python3 -m pip uninstall -y torch && \
11+
python3 -m pip install --no-cache-dir \
12+
https://download.pytorch.org/whl/cu110/torch-${PYTORCH_VERSION}%2Bcu110-cp38-cp38-linux_x86_64.whl \
13+
https://download.pytorch.org/whl/cu110/torchvision-${TORCHVISION_VERSION}%2Bcu110-cp38-cp38-linux_x86_64.whl \
14+
https://download.pytorch.org/whl/torchaudio-${TORCHAUDIO_VERSION}-cp38-cp38-linux_x86_64.whl \
15+
https://download.pytorch.org/whl/torchtext-${TORCHTEXT_VERSION}-cp38-cp38-linux_x86_64.whl && \
16+
python3 -m pip install --no-cache-dir tensorboardX==${TENSORBOARDX_VERSION}
17+
18+
# torch2trt PyTorch to TensorRT converter which utilizes the TensorRT Python API
19+
WORKDIR /tmp
20+
RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt /tmp/torch2trt && \
21+
cd /tmp/torch2trt && \
22+
python3 setup.py install --plugins && \
23+
rm -fr /tmp/torch2trt
24+
25+
RUN python3 -m pip install --extra-index-url \
26+
https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100
27+
28+
# Install Horovod, temporarily using CUDA stubs
29+
RUN ldconfig /usr/local/cuda/targets/x86_64-linux/lib/stubs && \
30+
HOROVOD_GPU_ALLREDUCE=NCCL HOROVOD_GPU_BROADCAST=NCCL \
31+
HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 \
32+
pip install --no-cache-dir horovod==0.21.1 && \
33+
ldconfig
34+
35+
RUN python3 -m pip install --no-cache-dir \
36+
mpi4py==3.0.3 \
37+
mlflow==1.12.1 \
38+
nni==1.9 \
39+
scikit-nni==0.2.1
40+
41+
RUN apt autoclean && \
42+
rm -rf /var/lib/apt/lists/* && \
43+
rm -rf /root/.cache && \
44+
rm -rf /tmp/*
45+
COPY ./service-defs /etc/backend.ai/service-defs
46+
47+
# Install ipython kernelspec
48+
Run python3 -m ipykernel install --display-name "PyTorch 1.7.1 on Python 3.8 (CUDA 11.1)" && \
49+
cat /usr/local/share/jupyter/kernels/python3/kernel.json
50+
51+
# Backend.AI specifics
52+
LABEL ai.backend.kernelspec="1" \
53+
ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
54+
ai.backend.features="batch query uid-match user-input" \
55+
ai.backend.base-distro="ubuntu16.04" \
56+
ai.backend.resource.min.cpu="1" \
57+
ai.backend.resource.min.mem="1g" \
58+
ai.backend.resource.min.cuda.device=0 \
59+
ai.backend.resource.min.cuda.shares=0 \
60+
ai.backend.runtime-type="python" \
61+
ai.backend.runtime-path="/usr/bin/python3" \
62+
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8081,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006,mlflow-ui:preopen:5000,nniboard:preopen:8080"
63+
64+
WORKDIR /home/work
65+
# vim: ft=dockerfile
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"command": [
3+
"{runtime_path}", "-m", "digits"
4+
]
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"command": [
3+
"{runtime_path}", "-m", "IPython"
4+
]
5+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"prestart": [
3+
{
4+
"action": "write_tempfile",
5+
"args": {
6+
"body": [
7+
"c.NotebookApp.allow_root = True\n",
8+
"c.NotebookApp.ip = \"0.0.0.0\"\n",
9+
"c.NotebookApp.port = {ports[0]}\n",
10+
"c.NotebookApp.token = \"\"\n",
11+
"c.FileContentsManager.delete_to_trash = False\n"
12+
]
13+
},
14+
"ref": "jupyter_cfg"
15+
}
16+
],
17+
"command": [
18+
"{runtime_path}", "-m", "notebook", "--no-browser", "--config", "{jupyter_cfg}"
19+
]
20+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"prestart": [
3+
{
4+
"action": "write_tempfile",
5+
"args": {
6+
"body": [
7+
"c.NotebookApp.allow_root = True\n",
8+
"c.NotebookApp.ip = \"0.0.0.0\"\n",
9+
"c.NotebookApp.port = {ports[0]}\n",
10+
"c.NotebookApp.token = \"\"\n",
11+
"c.FileContentsManager.delete_to_trash = False\n"
12+
]
13+
},
14+
"ref": "jupyter_cfg"
15+
}
16+
],
17+
"command": [
18+
"{runtime_path}", "-m", "jupyterlab", "--no-browser", "--config", "{jupyter_cfg}"
19+
]
20+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"prestart": [],
3+
"command": [
4+
"/usr/local/bin/mlflow",
5+
"ui",
6+
"--host",
7+
"0.0.0.0"
8+
]
9+
}

python-pytorch/service-defs/nni.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"prestart": [
3+
{
4+
"action": "mkdir",
5+
"args": {
6+
"path": "/home/work/.config/nni"
7+
}
8+
}
9+
],
10+
"command": [
11+
"{runtime_path}",
12+
"-m", "nni.main",
13+
"--logdir", "/home/work/logs",
14+
"--create", "/home/work/.config/nni/config.yml",
15+
"--host", "0.0.0.0",
16+
"--port", "{ports[0]}"
17+
]
18+
}

python-pytorch/service-defs/sftp.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"command": [
3+
"{runtime_path}", "-m", "sftpserver", "--port", "{ports[0]}"
4+
]
5+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"prestart": [
3+
{
4+
"action": "mkdir",
5+
"args": {
6+
"path": "/home/work/logs"
7+
}
8+
}
9+
],
10+
"command": [
11+
"{runtime_path}",
12+
"-m", "tensorboard.main",
13+
"--host", "0.0.0.0",
14+
"--port", "{ports[0]}"
15+
],
16+
"allowed_arguments": [
17+
"--logdir"
18+
],
19+
"default_arguments": {
20+
"--logdir": "/home/work/logs"
21+
}
22+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"prestart": [],
3+
"command": [
4+
"/opt/kernel/code-server",
5+
"--port", "{ports[0]}",
6+
"--extensions-dir", "/etc/backend.ai/vscode-exts"
7+
],
8+
"allowed_arguments": [
9+
"--auth"
10+
],
11+
"allowed_envs": [
12+
"PASSWORD"
13+
],
14+
"default_arguments": {
15+
"--auth": "none"
16+
}
17+
}

0 commit comments

Comments
 (0)