Skip to content

Commit d4698f8

Browse files
committed
Move UCX install out of base image
Signed-off-by: Alexey Rivkin <[email protected]>
1 parent 412061e commit d4698f8

File tree

3 files changed

+67
-65
lines changed

3 files changed

+67
-65
lines changed

.ci/jenkins/lib/build-matrix.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,19 +40,19 @@ kubernetes:
4040
runs_on_dockers:
4141
- {
4242
file: "contrib/Dockerfile",
43-
name: "ubuntu24.04-nixl-deps",
43+
name: "ubuntu24.04-nixl-base",
4444
uri: "$arch/$name",
45-
tag: "02112025",
45+
tag: "20251103",
4646
build_args: "--target nixl-base --build-arg ARCH=$arch"
4747
}
4848
- {
4949
file: "contrib/Dockerfile",
50-
name: "ubuntu22.04-nixl-deps",
50+
name: "ubuntu22.04-nixl-base",
5151
uri: "$arch/$name",
52-
tag: "02112025",
52+
tag: "20251103",
5353
build_args: "--target nixl-base --build-arg ARCH=$arch --build-arg BASE_IMAGE_TAG=24.10-cuda12.6-devel-ubuntu22.04"
5454
}
55-
# - { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true }
55+
- { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true }
5656

5757
matrix:
5858
axes:

.gitlab/build.sh

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ fi
5353
ARCH=$(uname -m)
5454
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
5555

56-
# Skip dependency installation if running in pre-built base image
56+
# Skip dependency installation if running in pre-built nixl-base image
5757
if [ -n "${NIXL_BASE_IMAGE_ENV}" ]; then
58-
UCX_INSTALL_DIR=/usr
59-
LIBFABRIC_INSTALL_DIR=/usr/local
58+
# Use pre-installed libfabric from base image
59+
LIBFABRIC_INSTALL_DIR=${LIBFABRIC_INSTALL_DIR:-/usr/local}
6060
else
6161

6262
# Some docker images are with broken installations:
@@ -135,27 +135,6 @@ chmod +x install_uv.sh
135135
./install_uv.sh
136136
export PATH="$HOME/.local/bin:$PATH"
137137

138-
curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz
139-
( \
140-
cd openucx-ucx* && \
141-
./autogen.sh && \
142-
./configure \
143-
--prefix="${UCX_INSTALL_DIR}" \
144-
--enable-shared \
145-
--disable-static \
146-
--disable-doxygen-doc \
147-
--enable-optimizations \
148-
--enable-cma \
149-
--enable-devel-headers \
150-
--with-verbs \
151-
--with-dm \
152-
${UCX_CUDA_BUILD_ARGS} \
153-
--enable-mt && \
154-
make -j && \
155-
make -j install-strip && \
156-
$SUDO ldconfig \
157-
)
158-
159138
wget --tries=3 --waitretry=5 -O "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" "https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
160139
tar xjf "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
161140
rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
@@ -212,6 +191,28 @@ rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
212191

213192
fi # end NIXL_BASE_IMAGE_ENV check
214193

194+
# Build UCX
195+
curl -fSsL "https://github.com/openucx/ucx/tarball/${UCX_VERSION}" | tar xz
196+
( \
197+
cd openucx-ucx* && \
198+
./autogen.sh && \
199+
./configure \
200+
--prefix="${UCX_INSTALL_DIR}" \
201+
--enable-shared \
202+
--disable-static \
203+
--disable-doxygen-doc \
204+
--enable-optimizations \
205+
--enable-cma \
206+
--enable-devel-headers \
207+
--with-verbs \
208+
--with-dm \
209+
${UCX_CUDA_BUILD_ARGS} \
210+
--enable-mt && \
211+
make -j && \
212+
make -j install-strip && \
213+
$SUDO ldconfig \
214+
)
215+
215216
# Ubuntu 22.04 specific setup
216217
if grep -q "Ubuntu 22.04" /etc/os-release 2>/dev/null; then
217218
# Upgrade pip for '--break-system-packages' support

contrib/Dockerfile

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,8 @@ RUN ARCH_SUFFIX=$(if [ "${ARCH}" = "aarch64" ]; then echo "arm64"; else echo "am
9494
rm -f doca-host.deb && \
9595
apt-get update && \
9696
apt-get upgrade -y && \
97-
apt-get install -y --no-install-recommends doca-sdk-gpunetio libdoca-sdk-gpunetio-dev libdoca-sdk-verbs-dev && \
98-
apt-get clean && \
99-
rm -rf /var/lib/apt/lists/*
97+
apt-get install -y --no-install-recommends doca-sdk-gpunetio libdoca-sdk-gpunetio-dev libdoca-sdk-verbs-dev
98+
# no cleanup, next step needs the repo
10099

101100
# Force reinstall of RDMA packages from DOCA repository
102101
# Reinstall needed to fix broken libibverbs-dev, which may lead to lack of Infiniband support.
@@ -151,29 +150,6 @@ RUN rm -rf /usr/lib/ucx /opt/hpcx/ucx /usr/local/ucx && \
151150
rm -f /usr/lib/${ARCH}-linux-gnu/libucs* /usr/lib/${ARCH}-linux-gnu/libucp* \
152151
/usr/lib/${ARCH}-linux-gnu/libuct* /usr/lib/${ARCH}-linux-gnu/libucm* 2>/dev/null || true
153152

154-
RUN cd /usr/local/src && \
155-
git clone https://github.com/openucx/ucx.git && \
156-
cd ucx && \
157-
git checkout $UCX_REF && \
158-
./autogen.sh && ./configure \
159-
--prefix=$UCX_PREFIX \
160-
--enable-shared \
161-
--disable-static \
162-
--disable-doxygen-doc \
163-
--enable-optimizations \
164-
--enable-cma \
165-
--enable-devel-headers \
166-
--with-cuda=/usr/local/cuda \
167-
--with-verbs \
168-
--with-dm \
169-
--with-gdrcopy=/usr/local \
170-
--with-efa \
171-
--enable-mt && \
172-
make -j${NPROC:-$(nproc)} && \
173-
make -j${NPROC:-$(nproc)} install-strip && \
174-
ldconfig && \
175-
cd /usr/local/src && rm -rf ucx
176-
177153
RUN cd /tmp && \
178154
git clone --depth 1 https://github.com/google/gtest-parallel.git && \
179155
mkdir -p /usr/local/bin && \
@@ -182,8 +158,7 @@ RUN cd /tmp && \
182158
ENV PATH=/usr/local/bin:$PATH
183159

184160
# Build libfabric from source
185-
RUN cd /tmp && \
186-
wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
161+
RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
187162
"https://github.com/ofiwg/libfabric/releases/download/${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION#v}.tar.bz2" -O libfabric.tar.bz2 && \
188163
tar xjf libfabric.tar.bz2 && rm libfabric.tar.bz2 && \
189164
cd libfabric-* && \
@@ -207,6 +182,17 @@ RUN cd /tmp && \
207182
# Stage 2: Build NIXL (default stage)
208183
FROM nixl-base
209184

185+
# Re-declare ARGs needed in this stage
186+
ARG ARCH="x86_64"
187+
ARG NIXL_PREFIX="/usr/local/nixl"
188+
ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/$ARCH-linux-gnu/plugins"
189+
ARG DEFAULT_PYTHON_VERSION="3.12"
190+
ARG UCX_REF="v1.19.0"
191+
ARG UCX_PREFIX="/usr"
192+
ARG UCX_PLUGIN_DIR="$UCX_PREFIX/lib/ucx"
193+
ARG LIBFABRIC_INSTALL_PATH="/usr/local"
194+
ARG NPROC
195+
210196
# By default, uv downloads python packages to $HOME/.cache/uv and hard links them
211197
# from the virtual environment. This means that the files reside in /root/.cache/uv,
212198
# which is not what we want since some systems mount user home dir into /root,
@@ -230,14 +216,29 @@ RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click t
230216
RUN export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
231217
uv pip install torch torchvision torchaudio
232218

233-
# Re-declare ARGs needed in this stage
234-
ARG NIXL_PREFIX="/usr/local/nixl"
235-
ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/x86_64-linux-gnu/plugins"
236-
ARG ARCH="x86_64"
237-
ARG DEFAULT_PYTHON_VERSION="3.12"
238-
ARG UCX_PREFIX="/usr"
239-
ARG UCX_PLUGIN_DIR="$UCX_PREFIX/lib/ucx"
240-
ARG LIBFABRIC_INSTALL_PATH="/usr/local"
219+
# Build UCX in Stage 2 for build-container.sh (CI uses build.sh to build UCX)
220+
RUN cd /usr/local/src && \
221+
git clone https://github.com/openucx/ucx.git && \
222+
cd ucx && \
223+
git checkout $UCX_REF && \
224+
./autogen.sh && ./configure \
225+
--prefix=$UCX_PREFIX \
226+
--enable-shared \
227+
--disable-static \
228+
--disable-doxygen-doc \
229+
--enable-optimizations \
230+
--enable-cma \
231+
--enable-devel-headers \
232+
--with-cuda=/usr/local/cuda \
233+
--with-verbs \
234+
--with-dm \
235+
--with-gdrcopy=/usr/local \
236+
--with-efa \
237+
--enable-mt && \
238+
make -j${NPROC:-$(nproc)} && \
239+
make -j${NPROC:-$(nproc)} install-strip && \
240+
ldconfig && \
241+
cd /usr/local/src && rm -rf ucx
241242

242243
WORKDIR /workspace/nixl
243244
COPY . /workspace/nixl

0 commit comments

Comments
 (0)