@@ -94,9 +94,8 @@ RUN ARCH_SUFFIX=$(if [ "${ARCH}" = "aarch64" ]; then echo "arm64"; else echo "am
9494 rm -f doca-host.deb && \
9595 apt-get update && \
9696 apt-get upgrade -y && \
97- apt-get install -y --no-install-recommends doca-sdk-gpunetio libdoca-sdk-gpunetio-dev libdoca-sdk-verbs-dev && \
98- apt-get clean && \
99- rm -rf /var/lib/apt/lists/*
97+ apt-get install -y --no-install-recommends doca-sdk-gpunetio libdoca-sdk-gpunetio-dev libdoca-sdk-verbs-dev
98+ # no cleanup, next step needs the repo
10099
101100# Force reinstall of RDMA packages from DOCA repository
102101# Reinstall needed to fix broken libibverbs-dev, which may lead to lack of Infiniband support.
@@ -151,29 +150,6 @@ RUN rm -rf /usr/lib/ucx /opt/hpcx/ucx /usr/local/ucx && \
151150 rm -f /usr/lib/${ARCH}-linux-gnu/libucs* /usr/lib/${ARCH}-linux-gnu/libucp* \
152151 /usr/lib/${ARCH}-linux-gnu/libuct* /usr/lib/${ARCH}-linux-gnu/libucm* 2>/dev/null || true
153152
154- RUN cd /usr/local/src && \
155- git clone https://github.com/openucx/ucx.git && \
156- cd ucx && \
157- git checkout $UCX_REF && \
158- ./autogen.sh && ./configure \
159- --prefix=$UCX_PREFIX \
160- --enable-shared \
161- --disable-static \
162- --disable-doxygen-doc \
163- --enable-optimizations \
164- --enable-cma \
165- --enable-devel-headers \
166- --with-cuda=/usr/local/cuda \
167- --with-verbs \
168- --with-dm \
169- --with-gdrcopy=/usr/local \
170- --with-efa \
171- --enable-mt && \
172- make -j${NPROC:-$(nproc)} && \
173- make -j${NPROC:-$(nproc)} install-strip && \
174- ldconfig && \
175- cd /usr/local/src && rm -rf ucx
176-
177153RUN cd /tmp && \
178154 git clone --depth 1 https://github.com/google/gtest-parallel.git && \
179155 mkdir -p /usr/local/bin && \
@@ -182,8 +158,7 @@ RUN cd /tmp && \
182158ENV PATH=/usr/local/bin:$PATH
183159
184160# Build libfabric from source
185- RUN cd /tmp && \
186- wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
161+ RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
187162 "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
188163 tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
189164 cd libfabric-* && \
@@ -207,6 +182,17 @@ RUN cd /tmp && \
207182# Stage 2: Build NIXL (default stage)
208183FROM nixl-base
209184
185+ # Re-declare ARGs needed in this stage
186+ ARG ARCH="x86_64"
187+ ARG NIXL_PREFIX="/usr/local/nixl"
188+ ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/$ARCH-linux-gnu/plugins"
189+ ARG DEFAULT_PYTHON_VERSION="3.12"
190+ ARG UCX_REF="v1.19.0"
191+ ARG UCX_PREFIX="/usr"
192+ ARG UCX_PLUGIN_DIR="$UCX_PREFIX/lib/ucx"
193+ ARG LIBFABRIC_INSTALL_PATH="/usr/local"
194+ ARG NPROC
195+
210196# By default, uv downloads python packages to $HOME/.cache/uv and hard links them
211197# from the virtual environment. This means that the files reside in /root/.cache/uv,
212198# which is not what we want since some systems mount user home dir into /root,
@@ -230,14 +216,29 @@ RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click t
230216RUN export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
231217 uv pip install torch torchvision torchaudio
232218
233- # Re-declare ARGs needed in this stage
234- ARG NIXL_PREFIX="/usr/local/nixl"
235- ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/x86_64-linux-gnu/plugins"
236- ARG ARCH="x86_64"
237- ARG DEFAULT_PYTHON_VERSION="3.12"
238- ARG UCX_PREFIX="/usr"
239- ARG UCX_PLUGIN_DIR="$UCX_PREFIX/lib/ucx"
240- ARG LIBFABRIC_INSTALL_PATH="/usr/local"
219+ # Build UCX in Stage 2 for build-container.sh (CI uses build.sh to build UCX)
220+ RUN cd /usr/local/src && \
221+ git clone https://github.com/openucx/ucx.git && \
222+ cd ucx && \
223+ git checkout $UCX_REF && \
224+ ./autogen.sh && ./configure \
225+ --prefix=$UCX_PREFIX \
226+ --enable-shared \
227+ --disable-static \
228+ --disable-doxygen-doc \
229+ --enable-optimizations \
230+ --enable-cma \
231+ --enable-devel-headers \
232+ --with-cuda=/usr/local/cuda \
233+ --with-verbs \
234+ --with-dm \
235+ --with-gdrcopy=/usr/local \
236+ --with-efa \
237+ --enable-mt && \
238+ make -j${NPROC:-$(nproc)} && \
239+ make -j${NPROC:-$(nproc)} install-strip && \
240+ ldconfig && \
241+ cd /usr/local/src && rm -rf ucx
241242
242243WORKDIR /workspace/nixl
243244COPY . /workspace/nixl
0 commit comments