Skip to content

Commit 3a1e1ea

Browse files
committed
fixed tensorboard bind-all
1 parent a8deddd commit 3a1e1ea

File tree

3 files changed

+31
-13
lines changed

3 files changed

+31
-13
lines changed

python-espnet/Dockerfile

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ RUN apt-get update && \
7070
apt-get clean && \
7171
rm -rf /var/lib/apt/lists/*
7272

73-
## FROM CUDA 11.0 base
73+
## FROM CUDA 11.0 base [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/11.0/base/Dockerfile]
74+
7475
RUN apt-get update && \
7576
apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && \
7677
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub | apt-key add - && \
@@ -89,7 +90,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
8990
RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
9091
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
9192

92-
## FROM CUDA 11.0 runtime
93+
## FROM CUDA 11.0 runtime [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/11.0/runtime/Dockerfile]
94+
9395
RUN apt-get update && apt-get install -y --no-install-recommends \
9496
cuda-libraries-11-0=11.0.3-1 \
9597
libnpp-11-0=11.1.0.245-1 \
@@ -101,7 +103,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
101103
apt-mark hold libcublas-11-0 libnccl2 && \
102104
rm -rf /var/lib/apt/lists/*
103105

104-
## FROM CUDA 11.0 devel
106+
## FROM CUDA 11.0 devel [https://gitlab.com/nvidia/cuda/blob/ubuntu18.04/11.0/devel/Dockerfile]
107+
105108
RUN apt-get update && apt-get install -y --no-install-recommends \
106109
cuda-cudart-dev-11-0=11.0.221-1 \
107110
cuda-command-line-tools-11-0=11.0.3-1 \
@@ -117,13 +120,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
117120
rm -rf /var/lib/apt/lists/*
118121

119122
## FROM CUDA 11.0-CUDNN 8 devel
123+
120124
RUN apt-get update && apt-get install -y --no-install-recommends \
121125
libcudnn8=$CUDNN_VERSION-1+cuda11.0 \
122126
libcudnn8-dev=$CUDNN_VERSION-1+cuda11.0 && \
123127
apt-mark hold libcudnn8 && \
124128
rm -rf /var/lib/apt/lists/*
125129

126-
# Install Conda
127130
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh -O miniconda.sh && \
128131
mkdir -p /opt && \
129132
sh miniconda.sh -b -p /opt/conda && \
@@ -136,6 +139,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux
136139
/opt/conda/bin/conda clean -afy
137140

138141
# Install OpenBlas
142+
WORKDIR /tmp
139143
RUN git clone -q --branch=master git://github.com/xianyi/OpenBLAS.git && \
140144
cd OpenBLAS && \
141145
make DYNAMIC_ARCH=1 NO_AFFINITY=1 NUM_THREADS=48 FC=gfortran && \
@@ -213,14 +217,12 @@ RUN git clone https://github.com/hephaex/kaldi.git /opt/kaldi && \
213217
find /opt/kaldi/src -name "*.o" -exec rm -f {} \; && \
214218
find /opt/kaldi/src -name "*.o" -exec rm -f {} \;
215219

216-
217-
# Download ESPnet
218-
RUN git clone https://github.com/espnet/espnet /opt/espnet
219-
220220
# Install espnet
221-
WORKDIR /opt/espnet/tools
222221
ENV TH_VERSION 1.7.1
223222
ENV CUDA_VER 11.0
223+
RUN git clone https://github.com/espnet/espnet /opt/espnet
224+
WORKDIR /opt/espnet/tools
225+
224226
# Replace nvidia-smi for nvcc because docker does not load nvidia-smi
225227
RUN if [ -z "$( which nvcc )" ]; then \
226228
echo "Build without CUDA" && \
@@ -258,16 +260,18 @@ RUN /opt/conda/bin/python3 -m pip install --no-cache-dir \
258260
jupyter==1.0.0 \
259261
python-language-server[all]
260262

263+
# Copy Backend.Ai multi-node support
264+
COPY runner-scripts/bootstrap.sh /opt/container/
261265
COPY ./service-defs /etc/backend.ai/service-defs
262-
RUN echo ". /opt/espnet/tools/activate_python.sh" >> /etc/profile
263-
264-
RUN chmod +x /opt/espnet/tools/activate_python.sh && \
266+
RUN echo "source /opt/espnet/tools/activate_python.sh" >> /etc/profile && \
267+
chmod +x /opt/espnet/tools/activate_python.sh && \
265268
chmod -R 777 /opt/espnet
266269

267270
# Install ipython kernelspec
268271
Run /opt/conda/bin/python3 -m ipykernel install --display-name "A4003 (ESPNet, PyTorch 1.7.1 on Python 3.7 & CUDA 11.0)" && \
269272
cat /usr/local/share/jupyter/kernels/python3/kernel.json
270273

274+
271275
# Backend.AI specifics
272276
LABEL ai.backend.kernelspec="1" \
273277
ai.backend.envs.corecount="OPENBLAS_NUM_THREADS,OMP_NUM_THREADS,NPROC" \
@@ -279,7 +283,8 @@ LABEL ai.backend.kernelspec="1" \
279283
ai.backend.resource.min.cuda.shares=0 \
280284
ai.backend.runtime-type="python" \
281285
ai.backend.runtime-path="/opt/conda/bin/python3" \
282-
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8081,tensorboard:http:6006"
286+
ai.backend.service-ports="ipython:pty:3000,jupyter:http:8081,jupyterlab:http:8090,vscode:http:8180,tensorboard:http:6006"
287+
283288

284289
WORKDIR /home/work
285290
# vim: ft=dockerfile
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
# Let self SSH possible
3+
cp -rp /home/work/id_container /home/work/.ssh/id_rsa
4+
cat /home/work/.ssh/authorized_keys > /home/work/.ssh/id_rsa.pub
5+
chown work.work /home/work/.ssh/id_rsa.pub
6+
chmod 644 /home/work/.ssh/id_rsa.pub
7+
# Default ssh client config
8+
cat <<EOF >> /home/work/.ssh/config
9+
Port 2200
10+
EOF
11+
chown work.work /home/work/.ssh/config
12+

python-espnet/service-defs/tensorboard.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@
2020
"--logdir": "/home/work/logs"
2121
}
2222
}
23+

0 commit comments

Comments
 (0)