Skip to content

Commit 5fac16d

Browse files
authored
Install Horovod and dependencies in the dev image (#2236)
* Install Horovod and dependencies in the dev image * Add a blank line at the end * Set commit id to clone
1 parent 443230d commit 5fac16d

File tree

1 file changed

+35
-3
lines changed

1 file changed

+35
-3
lines changed

elasticdl/docker/Dockerfile

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,30 @@ ARG BASE_IMAGE
33

44
FROM ${BASE_IMAGE} as dev
55
ARG EXTRA_PYPI_INDEX=https://pypi.org/simple
6+
ARG HOROVOD_COMMIT_ID="3108a24"
67

78
COPY elasticdl/docker/bashrc /etc/bash.bashrc
89
RUN chmod a+rx /etc/bash.bashrc
910

10-
RUN apt-get -qq update && \
11-
apt-get -qq install -y unzip curl git software-properties-common g++ wget \
12-
shellcheck libeigen3-dev clang-format > /dev/null && \
11+
RUN apt-get -qq update && apt-get -qq install -y \
12+
unzip \
13+
curl \
14+
git \
15+
software-properties-common \
16+
g++ \
17+
wget \
18+
build-essential \
19+
cmake \
20+
vim \
21+
ca-certificates \
22+
libjpeg-dev \
23+
libpng-dev \
24+
librdmacm1 \
25+
libibverbs1 \
26+
ibverbs-providers \
27+
shellcheck \
28+
libeigen3-dev \
29+
clang-format > /dev/null && \
1330
python -m pip install --quiet --upgrade pip
1431

1532
COPY elasticdl_client/requirements.txt /requirements.txt
@@ -61,6 +78,8 @@ COPY elasticdl/python/data/recordio_gen/heart_recordio_gen.py /scripts/heart_rec
6178

6279
FROM dev as allreduce
6380

81+
RUN pip install future typing
82+
6483
# Note that pip is having issue downloading PyTorch on manylinux so we use curl
6584
# to download it instead
6685
RUN curl -sLo torch-1.4.0-cp36-cp36m-manylinux1_x86_64.whl \
@@ -72,3 +91,16 @@ RUN cd /root && git clone --depth=1 https://github.com/caicloud/ftlib.git
7291
RUN cd /root/ftlib && python -m pip install --quiet -r requirements.txt
7392
RUN cd /root/ftlib/ftlib/consensus/gossip && bash ./gen_shared_lib.sh
7493
RUN cp -r /root/ftlib/ftlib /usr/local/lib/python3.6/dist-packages/ftlib
94+
95+
# The latest package of Horovod does not support elastic training,
96+
# so we need to git clone and install it using source codes.
97+
ENV HOROVOD_PATH /tmp/${HOROVOD_COMMIT_ID}
98+
RUN cd /tmp \
99+
&& git clone --recursive https://github.com/horovod/horovod.git \
100+
-b master ${HOROVOD_COMMIT_ID}
101+
102+
RUN cd ${HOROVOD_PATH} && HOROVOD_WITHOUT_MPI=1 \
103+
HOROVOD_WITHOUT_MXNET=1 \
104+
HOROVOD_WITH_TENSORFLOW=1 \
105+
HOROVOD_WITH_PYTORCH=1 \
106+
python setup.py install

0 commit comments

Comments
 (0)