Skip to content

Commit 516db1e

Browse files
committed
CI: Unified multi-stage Docker build (x86, Ubuntu24)
Consolidates NIXL build procedures by using contrib/Dockerfile multi-stage build for both CI and customer workflows. Signed-off-by: Alexey Rivkin <[email protected]>
1 parent 38e2585 commit 516db1e

File tree

3 files changed

+64
-9
lines changed

3 files changed

+64
-9
lines changed

.ci/jenkins/lib/build-matrix.yaml

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
---
2323
job: nixl-ci-build
2424

25+
registry_host: harbor.mellanox.com
26+
registry_auth: nixl_harbor_credentials
27+
registry_path: /nixl
28+
2529
# Fail job if one of the steps fails or continue
2630
failFast: false
2731

@@ -34,17 +38,30 @@ kubernetes:
3438
requests: "{memory: 8Gi, cpu: 8000m}"
3539

3640
runs_on_dockers:
37-
- { name: "ubuntu24.04-cuda-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:25.06-cuda12.9-devel-ubuntu24.04" }
38-
- { name: "ubuntu22.04-cuda-dl-base", url: "nvcr.io/nvidia/cuda-dl-base:24.10-cuda12.6-devel-ubuntu22.04" }
39-
- { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true }
41+
- {
42+
file: "contrib/Dockerfile",
43+
name: "ubuntu24.04-nixl-deps",
44+
uri: "$arch/$name",
45+
tag: "02112025",
46+
build_args: "--target nixl-base"
47+
}
48+
# - {
49+
# file: "contrib/Dockerfile",
50+
# name: "ubuntu22.04-nixl-deps",
51+
# uri: "$arch/$name",
52+
# tag: "02112025",
53+
# build_args: "--target nixl-base --build-arg BASE_IMAGE_TAG=24.10-cuda12.6-devel-ubuntu22.04"
54+
# }
55+
# - { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true }
4056

4157
matrix:
4258
axes:
4359
arch:
4460
- x86_64
45-
- aarch64
61+
# - aarch64
4662

4763
env:
64+
NIXL_BASE_IMAGE_ENV: "true"
4865
NIXL_INSTALL_DIR: /opt/nixl
4966
TEST_TIMEOUT: 30
5067
UCX_TLS: "^shm"

.gitlab/build.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ fi
5353
ARCH=$(uname -m)
5454
[ "$ARCH" = "arm64" ] && ARCH="aarch64"
5555

56+
# Skip dependency installation if running in pre-built base image
57+
if [ -n "${NIXL_BASE_IMAGE_ENV}" ]; then
58+
UCX_INSTALL_DIR=/usr
59+
LIBFABRIC_INSTALL_DIR=/usr/local
60+
else
61+
5662
# Some docker images are with broken installations:
5763
$SUDO rm -rf /usr/lib/cmake/grpc /usr/lib/cmake/protobuf
5864

@@ -215,6 +221,8 @@ rm "libfabric-${LIBFABRIC_VERSION#v}.tar.bz2"
215221
cp gtest-parallel/* ${INSTALL_DIR}/bin/
216222
)
217223

224+
fi # end NIXL_BASE_IMAGE_ENV check
225+
218226
export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${INSTALL_DIR}/lib/$ARCH-linux-gnu:${INSTALL_DIR}/lib64:$LD_LIBRARY_PATH:${LIBFABRIC_INSTALL_DIR}/lib"
219227
export CPATH="${INSTALL_DIR}/include:${LIBFABRIC_INSTALL_DIR}/include:$CPATH"
220228
export PATH="${INSTALL_DIR}/bin:$PATH"

contrib/Dockerfile

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
1717
ARG BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
1818
ARG OS
1919

20-
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
20+
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl-base
2121

2222
# Set default OS if not provided
2323
ARG OS=${OS:-ubuntu24}
@@ -62,7 +62,28 @@ RUN apt-get update -y && \
6262
clang \
6363
hwloc \
6464
libhwloc-dev \
65-
libcurl4-openssl-dev libssl-dev uuid-dev zlib1g-dev # aws-sdk-cpp dependencies
65+
libcurl4-openssl-dev libssl-dev uuid-dev zlib1g-dev \
66+
python3-dev \
67+
python3-pip \
68+
curl \
69+
wget \
70+
numactl \
71+
git \
72+
libiberty-dev \
73+
libgoogle-glog-dev \
74+
libgmock-dev \
75+
libjsoncpp-dev \
76+
libpython3-dev \
77+
libboost-all-dev \
78+
meson \
79+
pkg-config \
80+
pybind11-dev \
81+
net-tools \
82+
iproute2 \
83+
pciutils \
84+
libpci-dev \
85+
libibmad-dev \
86+
doxygen
6687

6788
# Add DOCA repository and install packages
6889
RUN ARCH_SUFFIX=$(if [ "${ARCH}" = "aarch64" ]; then echo "arm64"; else echo "amd64"; fi) && \
@@ -167,6 +188,9 @@ RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
167188
make install && \
168189
ldconfig
169190

191+
# Stage 2: Build NIXL (default stage)
192+
FROM nixl-base
193+
170194
# By default, uv downloads python packages to $HOME/.cache/uv and hard links them
171195
# from the virtual environment. This means that the files reside in /root/.cache/uv,
172196
# which is not what we want since some systems mount user home dir into /root,
@@ -190,14 +214,20 @@ RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click t
190214
RUN export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
191215
uv pip install torch torchvision torchaudio
192216

217+
# Re-declare ARGs needed in this stage
218+
ARG NIXL_PREFIX="/usr/local/nixl"
219+
ARG NIXL_PLUGIN_DIR="$NIXL_PREFIX/lib/x86_64-linux-gnu/plugins"
220+
ARG ARCH="x86_64"
221+
ARG DEFAULT_PYTHON_VERSION="3.12"
222+
ARG UCX_PREFIX="/usr"
223+
ARG UCX_PLUGIN_DIR="$UCX_PREFIX/lib/ucx"
224+
ARG LIBFABRIC_INSTALL_PATH="/usr/local"
225+
193226
WORKDIR /workspace/nixl
194227
COPY . /workspace/nixl
195228

196229
ENV LD_LIBRARY_PATH=/usr/local/lib:$LIBFABRIC_INSTALL_PATH/lib:$LD_LIBRARY_PATH
197230

198-
# Install pybind11 via apt
199-
RUN apt-get update && apt-get install -y --no-install-recommends pybind11-dev
200-
201231
ENV NIXL_PREFIX=$NIXL_PREFIX
202232
RUN rm -rf build && \
203233
mkdir build && \

0 commit comments

Comments
 (0)