Skip to content

Commit 3e79948

Browse files
authored
Bindings: package separate wheels for CUDA 12 and 13 (#915)
* Package versioned wheel Signed-off-by: Ovidiu Mara <[email protected]> * Fix build for multiple python versions Signed-off-by: Ovidiu Mara <[email protected]> * Reformatting Signed-off-by: Ovidiu Mara <[email protected]> * Remove unused import Signed-off-by: Ovidiu Mara <[email protected]> * Add missing package Signed-off-by: Ovidiu Mara <[email protected]> * Install virtual environment the same way in all Dockerfiles for script compatibility Signed-off-by: Ovidiu Mara <[email protected]> * NIXL meta-package Signed-off-by: Ovidiu Mara <[email protected]> * Fix build in CI Signed-off-by: Ovidiu Mara <[email protected]> * Fix build Signed-off-by: Ovidiu Mara <[email protected]> * Revert changes to build.sh Signed-off-by: Ovidiu Mara <[email protected]> * Fix build issue Signed-off-by: Ovidiu Mara <[email protected]> * Simplify wheel build script Signed-off-by: Ovidiu Mara <[email protected]> * Fix build error due to wrong filename Signed-off-by: Ovidiu Mara <[email protected]> * Copy all wheels to dist/ Signed-off-by: Ovidiu Mara <[email protected]> * Replace hardcoded version with template Signed-off-by: Ovidiu Mara <[email protected]> * Add missing file Signed-off-by: Ovidiu Mara <[email protected]> * Fix build Signed-off-by: Ovidiu Mara <[email protected]> --------- Signed-off-by: Ovidiu Mara <[email protected]>
1 parent 420c39c commit 3e79948

File tree

11 files changed

+191
-32
lines changed

11 files changed

+191
-32
lines changed

benchmark/nixlbench/contrib/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ RUN rm -rf $VIRTUAL_ENV && uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION
183183
# Activate the virtual environment
184184
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
185185
# Install python dependencies
186-
RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click tabulate auditwheel
186+
RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click tabulate auditwheel tomlkit
187187
# Install PyTorch
188188
RUN CUDA_SHORT_VERSION=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .) && \
189189
FLAGS="--index-url https://download.pytorch.org/whl/$CUDA_SHORT_VERSION" && \

contrib/Dockerfile

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -167,23 +167,41 @@ RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
167167
make install && \
168168
ldconfig
169169

170+
# By default, uv downloads python packages to $HOME/.cache/uv and hard links them
171+
# from the virtual environment. This means that the files reside in /root/.cache/uv,
172+
# which is not what we want since some systems mount user home dir into /root,
173+
# in which case the venv is broken when the container is started.
174+
# Set a custom cache directory inside /workspace to avoid this.
175+
ENV UV_CACHE_DIR=/workspace/.cache/uv
176+
RUN mkdir -p $UV_CACHE_DIR
177+
# Disable build isolation, i.e. uv should not create a new virtual environment for
178+
# building wheels. This is faster as it skips installing dependencies twice.
179+
ENV UV_NO_BUILD_ISOLATION=1
180+
# Disable syncing, i.e. uv will not download packages outside uv pip commands.
181+
ENV UV_NO_SYNC=1
182+
# Create a new virtual environment
183+
ENV VIRTUAL_ENV=/workspace/.venv
184+
RUN rm -rf $VIRTUAL_ENV && uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION
185+
# Activate the virtual environment
186+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
187+
# Install python dependencies
188+
RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click tabulate auditwheel tomlkit
189+
# Install PyTorch
190+
RUN export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
191+
uv pip install torch torchvision torchaudio
192+
170193
WORKDIR /workspace/nixl
171194
COPY . /workspace/nixl
172195

173196
ENV LD_LIBRARY_PATH=/usr/local/lib:$LIBFABRIC_INSTALL_PATH/lib:$LD_LIBRARY_PATH
174197

175-
ENV VIRTUAL_ENV=/workspace/nixl/.venv
176-
RUN rm -rf $VIRTUAL_ENV && uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION && \
177-
# pybind11 pip install needed for ubuntu 22.04
178-
uv pip install --upgrade "meson>=0.64.0" pybind11 patchelf
179-
180198
# Install pybind11 via apt
181199
RUN apt-get update && apt-get install -y --no-install-recommends pybind11-dev
182200

183201
ENV NIXL_PREFIX=$NIXL_PREFIX
184202
RUN rm -rf build && \
185203
mkdir build && \
186-
uv run meson setup -Dlibfabric_path=$LIBFABRIC_INSTALL_PATH build/ --prefix=$NIXL_PREFIX && \
204+
meson setup -Dlibfabric_path=$LIBFABRIC_INSTALL_PATH build/ --prefix=$NIXL_PREFIX && \
187205
cd build && \
188206
ninja && \
189207
ninja install
@@ -195,11 +213,13 @@ RUN echo "$NIXL_PREFIX/lib/$ARCH-linux-gnu" > /etc/ld.so.conf.d/nixl.conf && \
195213
RUN cd src/bindings/rust && cargo build --release --locked
196214

197215
# Build wheel using the build-wheel.sh script for better UCX plugin bundling and library management
198-
RUN ./contrib/build-wheel.sh \
216+
RUN export PATH=$VIRTUAL_ENV/bin:$PATH && \
217+
mkdir -p dist && \
218+
./contrib/build-wheel.sh \
199219
--python-version $DEFAULT_PYTHON_VERSION \
200220
--platform manylinux_2_39_$ARCH \
201221
--ucx-plugins-dir $UCX_PLUGIN_DIR \
202222
--nixl-plugins-dir $NIXL_PLUGIN_DIR \
203223
--output-dir /workspace/nixl/dist
204224

205-
RUN uv pip install dist/nixl-*cp${DEFAULT_PYTHON_VERSION//./}*.whl
225+
RUN uv pip install dist/nixl*cp${DEFAULT_PYTHON_VERSION//./}*.whl

contrib/Dockerfile.manylinux

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,7 @@ ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
166166

167167
ENV CUDA_PATH=/usr/local/cuda
168168

169-
WORKDIR /workspace/nixl
170-
ENV VIRTUAL_ENV=/workspace/nixl/.venv
171-
RUN uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION && \
172-
uv pip install --upgrade meson pybind11 patchelf
169+
WORKDIR /workspace
173170

174171
RUN rm -rf /usr/lib/ucx
175172
RUN rm -rf /opt/hpcx/ucx
@@ -228,11 +225,30 @@ RUN wget --tries=3 --waitretry=5 --timeout=30 --read-timeout=60 \
228225
make install && \
229226
ldconfig
230227

228+
# By default, uv downloads python packages to $HOME/.cache/uv and hard links them
229+
# from the virtual environment. This means that the files reside in /root/.cache/uv,
230+
# which is not what we want since some systems mount user home dir into /root,
231+
# in which case the venv is broken when the container is started.
232+
# Set a custom cache directory inside /workspace to avoid this.
233+
ENV UV_CACHE_DIR=/workspace/.cache/uv
234+
RUN mkdir -p $UV_CACHE_DIR
235+
# Create a new virtual environment
236+
ENV VIRTUAL_ENV=/workspace/.venv
237+
RUN rm -rf $VIRTUAL_ENV && uv venv $VIRTUAL_ENV --python $DEFAULT_PYTHON_VERSION
238+
# Activate the virtual environment
239+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
240+
# Install python dependencies
241+
RUN uv pip install --upgrade meson meson-python pybind11 patchelf pyYAML click setuptools tabulate auditwheel tomlkit
242+
# Install PyTorch
243+
RUN export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
244+
uv pip install torch torchvision torchaudio
245+
231246
COPY . /workspace/nixl
247+
WORKDIR /workspace/nixl
232248

233249
RUN rm -rf build && \
234250
mkdir build && \
235-
uv run meson setup build/ --prefix=/usr/local/nixl --buildtype=release \
251+
meson setup build/ --prefix=/usr/local/nixl --buildtype=release \
236252
-Dcudapath_lib="/usr/local/cuda/lib64" \
237253
-Dcudapath_inc="/usr/local/cuda/include" && \
238254
cd build && \
@@ -252,7 +268,10 @@ RUN echo "/usr/local/nixl/lib/$ARCH-linux-gnu" > /etc/ld.so.conf.d/nixl.conf &&
252268
ARG WHL_PYTHON_VERSIONS="3.9,3.10,3.11,3.12"
253269
ARG WHL_PLATFORM="manylinux_2_28_$ARCH"
254270
RUN IFS=',' read -ra PYTHON_VERSIONS <<< "$WHL_PYTHON_VERSIONS" && \
271+
export UV_INDEX="https://download.pytorch.org/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d .)" && \
272+
mkdir -p dist && \
255273
for PYTHON_VERSION in "${PYTHON_VERSIONS[@]}"; do \
274+
export PATH=$VIRTUAL_ENV/bin:$PATH && \
256275
./contrib/build-wheel.sh \
257276
--python-version $PYTHON_VERSION \
258277
--platform $WHL_PLATFORM \
@@ -261,4 +280,7 @@ RUN IFS=',' read -ra PYTHON_VERSIONS <<< "$WHL_PYTHON_VERSIONS" && \
261280
--output-dir dist ; \
262281
done
263282

264-
RUN uv pip install dist/nixl-*cp${DEFAULT_PYTHON_VERSION//./}*.whl
283+
# Copy the meta package wheel to the output directory
284+
RUN cp build/src/bindings/python/nixl-meta/nixl*.whl dist/
285+
286+
RUN uv pip install dist/nixl*cp${DEFAULT_PYTHON_VERSION//./}*.whl

contrib/build-wheel.sh

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,23 +73,24 @@ done
7373
set -e
7474
set -x
7575

76-
# Check for required dependencies
77-
if ! command -v uv &> /dev/null; then
78-
echo "Required dependency: uv is not installed. Please install it from https://astral.sh/uv/install.sh"
79-
exit 1
80-
fi
81-
8276
# Build the wheel
8377
TMP_DIR=$(mktemp -d)
78+
79+
CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1)
80+
# Must be 12 or 13
81+
if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then
82+
echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'"
83+
exit 1
84+
fi
85+
PKG_NAME="nixl-cu${CUDA_MAJOR}"
86+
./contrib/tomlutil.py --set-name $PKG_NAME pyproject.toml
8487
uv build --wheel --out-dir $TMP_DIR --python $PYTHON_VERSION
8588

8689
# Bundle libraries
87-
uv pip install auditwheel patchelf
88-
89-
uv run auditwheel repair --exclude 'libcuda*' --exclude 'libcufile*' --exclude 'libssl*' --exclude 'libcrypto*' --exclude 'libefa*' --exclude 'libhwloc*' --exclude 'libfabric*' $TMP_DIR/nixl-*.whl --plat $WHL_PLATFORM --wheel-dir $OUTPUT_DIR
90-
91-
uv run ./contrib/wheel_add_ucx_plugins.py --ucx-plugins-dir $UCX_PLUGINS_DIR --nixl-plugins-dir $NIXL_PLUGINS_DIR $OUTPUT_DIR/*.whl
90+
mkdir $TMP_DIR/dist
91+
auditwheel repair --exclude 'libcuda*' --exclude 'libcufile*' --exclude 'libssl*' --exclude 'libcrypto*' --exclude 'libefa*' --exclude 'libhwloc*' --exclude 'libfabric*' $TMP_DIR/nixl*.whl --plat $WHL_PLATFORM --wheel-dir $TMP_DIR/dist
92+
./contrib/wheel_add_ucx_plugins.py --ucx-plugins-dir $UCX_PLUGINS_DIR --nixl-plugins-dir $NIXL_PLUGINS_DIR $TMP_DIR/dist/*.whl
93+
cp $TMP_DIR/dist/*.whl $OUTPUT_DIR
9294

9395
# Clean up
9496
rm -rf "$TMP_DIR"
95-

contrib/tomlutil.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
# SPDX-License-Identifier: Apache-2.0
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
import argparse
19+
20+
import tomlkit
21+
22+
parser = argparse.ArgumentParser()
23+
parser.add_argument("--set-name", type=str, help="Set the project name")
24+
parser.add_argument("file", type=str, help="The toml file to modify")
25+
args = parser.parse_args()
26+
27+
with open(args.file) as f:
28+
doc = tomlkit.parse(f.read())
29+
30+
if args.set_name:
31+
doc["project"]["name"] = args.set_name
32+
33+
with open(args.file, "w") as f:
34+
f.write(tomlkit.dumps(doc))

contrib/wheel_add_ucx_plugins.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,12 @@ def add_plugins(wheel_path, sys_plugins_dir, install_dirname):
189189
"""
190190
temp_dir = extract_wheel(wheel_path)
191191

192-
pkg_libs_dir = os.path.join(temp_dir, "nixl.libs")
192+
pkg_name = wheel_path.split("/")[-1].split("-")[0]
193+
pkg_libs_dir = os.path.join(temp_dir, f"{pkg_name}.libs")
193194
if not os.path.exists(pkg_libs_dir):
194-
raise FileNotFoundError(f"nixl.libs directory not found in wheel: {wheel_path}")
195+
raise FileNotFoundError(
196+
f"{pkg_name}.libs directory not found in wheel: {wheel_path}"
197+
)
195198

196199
logger.debug("Listing existing libs:")
197200
name_map = get_repaired_lib_name_map(pkg_libs_dir)

meson.build

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,10 @@ if cuda_dep.found()
101101
nvcc_flags_link += ['-gencode=arch=compute_90,code=sm_90']
102102
add_project_link_arguments(nvcc_flags_link, language: 'cuda')
103103
message('nvcc version: ' + nvcc.version())
104-
if nvcc.version().version_compare('>=12.8')
104+
if nvcc.version().version_compare('>=12.8') and nvcc.version().version_compare('<13.0')
105105
doca_gpunetio_dep = dependency('doca-gpunetio', required : false)
106106
else
107-
warning('CUDA version is less than 12.8, GPUNETIO plugin will be disabled')
107+
warning('GPUNETIO plugin not supported in CUDA version: ' + nvcc.version())
108108
doca_gpunetio_dep = disabler()
109109
endif
110110
else
@@ -116,7 +116,7 @@ endif
116116
if cuda_dep.found()
117117
nvcc_cmd = find_program('nvcc', required: false)
118118
if nvcc_cmd.found()
119-
if nvcc_cmd.version().version_compare('>=12.8')
119+
if nvcc_cmd.version().version_compare('>=12.8') and nvcc_cmd.version().version_compare('<13.0')
120120
doca_gpunetio_dep = dependency('doca-gpunetio', required : false)
121121
else
122122
warning('CUDA version = ' + nvcc_cmd.version() + ', GPUNETIO plugin will be disabled')

src/bindings/python/meson.build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,5 @@ py.extension_module('_utils',
3131
subdir: ('nixl'),
3232
dependencies: [pybind_dep],
3333
install: true)
34+
35+
subdir('nixl-meta')
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# nixl
2+
3+
This is a *meta package* that declares optional dependencies on CUDA variants.
4+
5+
Install one of:
6+
```bash
7+
pip install "nixl[cu12]" # for CUDA 12
8+
pip install "nixl[cu13]" # for CUDA 13
9+
```
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
source_dir = meson.current_source_dir()
17+
build_dir = meson.current_build_dir()
18+
19+
configure_file(
20+
input: 'pyproject.toml.in',
21+
output: 'pyproject.toml',
22+
configuration: { 'VERSION': meson.project_version() }
23+
)
24+
25+
uv = find_program('uv', required: false)
26+
if uv.found()
27+
wheel_name = 'nixl-@[email protected]'.format(meson.project_version())
28+
meta_wheel = custom_target(
29+
'build_nixl_meta',
30+
output: [wheel_name],
31+
command: [uv, 'build', '--wheel', '--out-dir', build_dir, build_dir],
32+
install: false,
33+
build_by_default: true
34+
)
35+
else
36+
warning('uv not found, skipping meta package build')
37+
endif

0 commit comments

Comments
 (0)