11# SPDX-License-Identifier: MIT
2- FROM nvcr.io/nvidia/tensorflow:20.11-tf2-py3
2+ # Larger base stage with required items for building various tools
3+ FROM nvcr.io/nvidia/cuda:11.2.0-devel-ubuntu20.04 as build
34
45ENV DEBIAN_FRONTEND=noninteractive
56
7+ # Install all required build dependencies
68RUN apt-get update && apt-get -y install apt-utils && rm -rf /var/lib/apt/lists/*
79RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
8- openssh-client \
9- openssh-server \
1010 swig \
1111 bison \
12- libgfortran3 \
12+ gcc \
13+ libgfortran4 \
1314 pkg-config \
1415 autotools-dev \
1516 debhelper \
@@ -42,8 +43,46 @@ RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-
4243 kmod \
4344 libnuma1 \
4445 lsof \
46+ libopenmpi-dev && \
47+ rm -rf /var/lib/apt/lists/*
48+
49+ # Compile NVIDIA's NCCL tests
50+ RUN git clone https://github.com/NVIDIA/nccl-tests && \
51+ cd nccl-tests/ && \
52+ git reset --hard ec1b5e22e618d342698fda659efdd5918da6bd9f && \
53+ make MPI=1 MPI_HOME=/usr/lib/x86_64-linux-gnu/openmpi
54+
55+ # Compile OSU microbenchmarks
56+ RUN wget --no-check-certificate https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.6.2.tar.gz && \
57+ tar zxf osu-micro-benchmarks-5.6.2.tar.gz && \
58+ cd osu-micro-benchmarks-5.6.2 && \
59+ ./configure CC=/usr/bin/mpicc CXX=/usr/bin/mpicxx --enable-cuda --with-cuda-include=/usr/local/cuda/include --with-cuda-libpath=/usr/local/cuda/lib64 && \
60+ make && \
61+ make install && \
62+ rm -rf ../*.tar.gz
63+
64+ # Build IO500, IOR, and mdtest
65+ RUN git clone https://github.com/jyvet/io-500-dev && \
66+ cd io-500-dev && \
67+ git reset --hard 0232acfa8e64f7c543db8930dd279009ec9c32bc && \
68+ utilities/prepare.sh
69+
70+ # Lighter runtime stage copying only necessary build artifacts from earlier
71+ FROM nvcr.io/nvidia/cuda:11.2.0-runtime-ubuntu20.04
72+
73+ ENV DEBIAN_FRONTEND=noninteractive
74+
75+ RUN apt-get update && apt-get install -y --allow-downgrades --allow-change-held-packages --no-install-recommends \
76+ openssh-client \
77+ openssh-server \
78+ git \
4579 fio \
4680 psmisc \
81+ libopenmpi-dev \
82+ openmpi-bin \
83+ python \
84+ python3-dev \
85+ python3-pip \
4786 python3-distutils && \
4887 rm -rf /var/lib/apt/lists/*
4988
@@ -65,32 +104,25 @@ RUN mkdir -p /var/run/sshd && \
65104
66105WORKDIR /
67106
68- RUN git clone https://github.com/NVIDIA/nccl-tests && \
69- cd nccl-tests/ && \
70- git reset --hard ec1b5e22e618d342698fda659efdd5918da6bd9f && \
71- make MPI=1 MPI_HOME=/usr/local/mpi
107+ # Copy the compiled nccl-tests binaries to the runtime image
108+ COPY --from=build /nccl-tests/build /nccl-tests/build
72109
73- RUN wget --no-check-certificate https://mvapich.cse.ohio-state.edu/download/mvapich/osu-micro-benchmarks-5.6.2.tar.gz && \
74- tar zxf osu-micro-benchmarks-5.6.2.tar.gz && \
75- cd osu-micro-benchmarks-5.6.2 && \
76- ./configure CC=/usr/local/mpi/bin/mpicc CXX=/usr/local/mpi/bin/mpicxx --enable-cuda --with-cuda-include=/usr/local/cuda/include --with-cuda-libpath=/usr/local/cuda/lib64 && \
77- make && \
78- make install && \
79- rm -rf ../*.tar.gz
80-
81- RUN python3 -m pip install nvidia-pyindex && \
82- python3 -m pip install \
83- nvidia-imageinary['mxnet' ]>=1.1.2
110+ # Copy the compiled OSU microbenchmarks to the runtime image
111+ COPY --from=build /usr/local/libexec/osu-micro-benchmarks/mpi/collective/ /usr/local/libexec/osu-micro-benchmarks/mpi/collective/
84112
85- RUN git clone https://github.com/jyvet/io-500-dev && \
86- cd io-500-dev && \
87- git reset --hard 0232acfa8e64f7c543db8930dd279009ec9c32bc && \
88- utilities/prepare.sh
113+ # Copy the compiled IO500 binaries to the runtime image
114+ COPY --from=build /io-500-dev/bin /io-500-dev/bin
89115
90116RUN git clone https://github.com/NVIDIA/DALI dali && \
91117 cd dali/ && \
92118 git reset --hard fd30786d773d08185d78988b2903dce2ace0a00b
93119
120+ RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools && \
121+ python3 -m pip install --no-cache-dir nvidia-pyindex && \
122+ python3 -m pip install --no-cache-dir \
123+ nvidia-imageinary['tfrecord' ]>=1.1.2 \
124+ nvidia-dali-cuda110
125+
94126COPY test_scripts /tests/
95127
96128EXPOSE 2222
0 commit comments