Skip to content

Commit 2417e5b

Browse files
authored
[CI] Build variant wheels for CUDA 12 and 13 (dmlc#11677)
* [CI] Don't set USE_DLOPEN_NCCL for CUDA 13 wheels * Update pypi_variants.py to accommodate two variants of nvidia-nccl * Update pypi_variants to accommodate cu12, cu13 suffixes * nvidia-nccl-cu13 is now available on PyPI * Fix formatting * Upload CUDA 13 wheel to S3 * Create stub package xgboost-cu12 * Update scripts * Update release_artifacts.py
1 parent cef9321 commit 2417e5b

12 files changed

+152
-40
lines changed

.github/workflows/python_wheels_winarm64.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
- name: Build XGBoost Python wheel for Win-ARM64
4747
run: |
4848
# Patch to rename pkg to xgboost-cpu
49-
python ops/script/pypi_variants.py --use-cpu-suffix=1 --require-nccl-dep=0
49+
python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
5050
cd python-package
5151
mkdir -p wheelhouse
5252
pip wheel --no-deps -v . --wheel-dir wheelhouse/

ops/pipeline/build-cuda-impl.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,14 @@ else
1111
cmake_args=''
1212
fi
1313

14-
if [[ "${USE_FEDERATED:-}" == 1 ]]
14+
if [[ "${USE_FEDERATED:-0}" == 1 ]]
1515
then
1616
cmake_args="${cmake_args} -DPLUGIN_FEDERATED=ON"
1717
else
1818
cmake_args="${cmake_args} -DPLUGIN_FEDERATED=OFF"
1919
fi
2020

21-
if [[ "${USE_RMM:-}" == 1 ]]
21+
if [[ "${USE_RMM:-0}" == 1 ]]
2222
then
2323
cmake_prefix_path='/opt/grpc;/opt/rmm;/opt/rmm/lib64/rapids/cmake'
2424
cmake_args="${cmake_args} -DPLUGIN_RMM=ON"

ops/pipeline/build-cuda13.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@ fi
3232

3333
set -x
3434

35-
# Remove nvidia-nccl-cu12 from the list of Python deps
36-
# nvidia-nccl-cu13 is not yet available on PyPI
37-
python3 ops/script/pypi_variants.py --use-cpu-suffix=0 --require-nccl-dep=0
35+
python3 ops/script/pypi_variants.py --use-suffix=cu13 --require-nccl-dep=cu13
3836

3937
python3 ops/docker_run.py \
4038
--image-uri ${BUILD_IMAGE_URI} \
@@ -56,3 +54,12 @@ fi
5654

5755
# Check size of wheel
5856
pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl
57+
58+
echo "--- Upload Python wheel"
59+
if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]]
60+
then
61+
python3 ops/pipeline/manage-artifacts.py upload \
62+
--s3-bucket xgboost-nightly-builds \
63+
--prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \
64+
python-package/dist/*.whl
65+
fi

ops/pipeline/build-python-wheels-arm64.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}
1919
echo "--- Build CPU code targeting ARM64"
2020
set -x
2121

22-
python3 ops/script/pypi_variants.py --use-cpu-suffix=0 --require-nccl-dep=0
22+
python3 ops/script/pypi_variants.py --use-suffix=na --require-nccl-dep=na
2323
python3 ops/docker_run.py \
2424
--image-uri ${IMAGE_URI} \
2525
-- ops/pipeline/build-python-wheels-arm64-impl.sh

ops/pipeline/build-python-wheels-cpu.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ echo "--- Build binary wheel for ${WHEEL_TAG} (CPU only)"
3131
set -x
3232

3333
# Patch to rename pkg to xgboost-cpu
34-
python3 ops/script/pypi_variants.py --use-cpu-suffix=1 --require-nccl-dep=0
34+
python3 ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
3535
python3 ops/docker_run.py \
3636
--image-uri "${IMAGE_URI}" \
3737
-- bash -c \

ops/pipeline/build-variant-wheels.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ then
1010
fi
1111

1212
image_repo='xgb-ci.gpu_build_rockylinux8'
13+
export USE_RMM=0
14+
export USE_FEDERATED=0
1315

1416
source ops/pipeline/classify-git-branch.sh
1517
source ops/pipeline/get-docker-registry-details.sh
@@ -27,13 +29,12 @@ then
2729
else
2830
export BUILD_ONLY_SM75=0
2931
fi
30-
export USE_RMM=0
3132

3233
set -x
3334

3435
python3 ops/docker_run.py \
3536
--image-uri ${BUILD_IMAGE_URI} \
36-
--run-args='-e BUILD_ONLY_SM75 -e USE_RMM' \
37+
--run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \
3738
-- ops/pipeline/build-cuda-impl.sh
3839

3940
echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard"

ops/pipeline/build-win64-cpu.ps1

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Write-Host "--- Build binary wheel"
1818
cd ..
1919
# Patch to rename pkg to xgboost-cpu
2020
conda activate
21-
python ops/script/pypi_variants.py --use-cpu-suffix=1 --require-nccl-dep=0
21+
python ops/script/pypi_variants.py --use-suffix=cpu --require-nccl-dep=na
2222
if ($LASTEXITCODE -ne 0) { throw "Last command failed" }
2323

2424
cd python-package

ops/script/change_version.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,17 @@ def pypkg(
5252
with open(pyver_path, "w") as fd:
5353
fd.write(pyver + "\n")
5454

55-
pyprj_path = os.path.join("pyproject.toml.in")
56-
with open(pyprj_path, "r") as fd:
57-
pyprj = fd.read()
58-
matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj)
59-
assert matched, "Couldn't find version string in pyproject.toml."
60-
pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]
61-
with open(pyprj_path, "w") as fd:
62-
fd.write(pyprj)
63-
64-
make_pyproject(use_cpu_suffix=0, require_nccl_dep=1)
55+
for pyprj_file in ["pyproject.toml.in", "pyproject.toml.stub.in"]:
56+
pyprj_path = os.path.join(pyprj_file)
57+
with open(pyprj_path, "r") as fd:
58+
pyprj = fd.read()
59+
matched = re.search('version = "' + r"([0-9]+\.[0-9]+\.[0-9]+.*)" + '"', pyprj)
60+
assert matched, "Couldn't find version string in pyproject.toml."
61+
pyprj = pyprj[: matched.start(1)] + pyver + pyprj[matched.end(1) :]
62+
with open(pyprj_path, "w") as fd:
63+
fd.write(pyprj)
64+
65+
make_pyproject(use_suffix="na", require_nccl_dep="cu12")
6566

6667

6768
@cd(R_PACKAGE)

ops/script/pypi_variants.py

Lines changed: 61 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,21 @@
22

33
import argparse
44
import os
5+
import tomllib
56

7+
from packaging.version import Version
68
from test_utils import PY_PACKAGE
79

810
IN_PATH = os.path.join(PY_PACKAGE, "pyproject.toml.in")
11+
STUB_IN_PATH = os.path.join(PY_PACKAGE, "pyproject.toml.stub.in")
912
OUT_PATH = os.path.join(PY_PACKAGE, "pyproject.toml")
1013

11-
NCCL_WHL = """ \"nvidia-nccl-cu12 ; platform_system == 'Linux' and platform_machine != 'aarch64'\","""
14+
NCCL_WHL = """ \"nvidia-nccl-{0} ; platform_system == 'Linux' and platform_machine != 'aarch64'\","""
1215

1316
NAME = "{{ name }}"
1417
NCCL = "{{ nccl }}"
18+
VERSION = "{{ version }}"
19+
CUDA_VARIANTS = ["cu12", "cu13"]
1520

1621

1722
def copyfile(src: str, dst: str) -> None:
@@ -21,22 +26,55 @@ def copyfile(src: str, dst: str) -> None:
2126
fd.write(content)
2227

2328

24-
def make_pyproject(*, use_cpu_suffix: int, require_nccl_dep: int) -> None:
25-
if use_cpu_suffix == 1 and require_nccl_dep == 1:
29+
def make_pyproject(
30+
*, use_suffix: str, require_nccl_dep: str, create_stub: bool = False
31+
) -> None:
32+
if use_suffix == "cpu" and require_nccl_dep != "na":
2633
raise ValueError(
2734
"xgboost-cpu cannot require NCCL dependency. "
28-
"If --use-cpu-suffix=1, you must set --require-nccl-dep=0."
35+
"When setting --use-suffix='cpu', you must also set --require-nccl-dep='na'."
2936
)
37+
if (
38+
use_suffix in CUDA_VARIANTS
39+
and require_nccl_dep in CUDA_VARIANTS
40+
and use_suffix != require_nccl_dep
41+
):
42+
raise ValueError(
43+
"Inconsistent choices for --use-suffix and --require-nccl-dep. "
44+
"When --use-suffix is set to one of {{{0}}}, --require-nccl-dep must be "
45+
"set to identical value as --use-suffix.".format(",".join(CUDA_VARIANTS))
46+
)
47+
if create_stub:
48+
if use_suffix == "na":
49+
raise ValueError("To create a stub package, --use-suffix must not be 'na'")
50+
if require_nccl_dep != "na":
51+
raise ValueError(
52+
"To create a stub package, --require-nccl-dep must be 'na'"
53+
)
3054

31-
with open(IN_PATH) as fd:
55+
with open(STUB_IN_PATH if create_stub else IN_PATH) as fd:
3256
pyproject = fd.read()
3357

3458
readme_dft = os.path.join(PY_PACKAGE, "README.dft.rst")
3559
readme_cpu = os.path.join(PY_PACKAGE, "README.cpu.rst")
60+
readme_stub = os.path.join(PY_PACKAGE, "README.stub.rst")
3661
readme = os.path.join(PY_PACKAGE, "README.rst")
37-
pyproject = pyproject.replace(NAME, "xgboost-cpu" if use_cpu_suffix else "xgboost")
38-
copyfile(readme_cpu if use_cpu_suffix else readme_dft, readme)
39-
pyproject = pyproject.replace(NCCL, NCCL_WHL if require_nccl_dep else "")
62+
pyproject = pyproject.replace(
63+
NAME, f"xgboost-{use_suffix}" if use_suffix != "na" else "xgboost"
64+
)
65+
if create_stub:
66+
copyfile(readme_stub, readme)
67+
pyproject_parsed = tomllib.loads(pyproject)
68+
pyproject = pyproject.replace(
69+
VERSION, str(Version(pyproject_parsed["project"]["version"]))
70+
)
71+
elif use_suffix == "cpu":
72+
copyfile(readme_cpu, readme)
73+
else:
74+
copyfile(readme_dft, readme)
75+
pyproject = pyproject.replace(
76+
NCCL, NCCL_WHL.format(require_nccl_dep) if require_nccl_dep != "na" else ""
77+
)
4078
pyproject = (
4179
f"# Generated by `{os.path.basename(__file__)}`, don't edit.\n" + pyproject
4280
)
@@ -48,21 +86,27 @@ def make_pyproject(*, use_cpu_suffix: int, require_nccl_dep: int) -> None:
4886
if __name__ == "__main__":
4987
parser = argparse.ArgumentParser()
5088
parser.add_argument(
51-
"--use-cpu-suffix",
52-
type=int,
53-
choices=[0, 1],
54-
required=True,
55-
help="Whether to rename the package name to xgboost-cpu",
89+
"--use-suffix",
90+
type=str,
91+
choices=["na", "cpu"] + CUDA_VARIANTS,
92+
default="na",
93+
help="When using this option, rename the package name to xgboost-[suffix]. Set to 'na' to disable",
5694
)
5795
parser.add_argument(
5896
"--require-nccl-dep",
59-
type=int,
60-
choices=[0, 1],
97+
type=str,
98+
choices=["na"] + CUDA_VARIANTS,
6199
required=True,
62-
help="Whether to require the NCCL dependency",
100+
help="Which NCCL dependency to use; select 'na' to remove NCCL dependency",
101+
)
102+
parser.add_argument(
103+
"--create-stub",
104+
action="store_true",
105+
help="Create a stub package that redirects users to install `xgboost`",
63106
)
64107
args = parser.parse_args()
65108
make_pyproject(
66-
use_cpu_suffix=args.use_cpu_suffix,
109+
use_suffix=args.use_suffix,
67110
require_nccl_dep=args.require_nccl_dep,
111+
create_stub=args.create_stub,
68112
)

ops/script/release_artifacts.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def make_python_sdist(
111111

112112
# Build sdist for `xgboost-cpu`.
113113
with DirectoryExcursion(ROOT):
114-
make_pyproject(use_cpu_suffix=1, require_nccl_dep=0)
114+
make_pyproject(use_suffix="cpu", require_nccl_dep="na")
115115
with DirectoryExcursion(ROOT / "python-package"):
116116
subprocess.run(["python", "-m", "build", "--sdist"], check=True)
117117
sdist_name = (
@@ -126,7 +126,7 @@ def make_python_sdist(
126126

127127
# Build sdist for `xgboost`.
128128
with DirectoryExcursion(ROOT):
129-
make_pyproject(use_cpu_suffix=0, require_nccl_dep=1)
129+
make_pyproject(use_suffix="na", require_nccl_dep="cu12")
130130

131131
with DirectoryExcursion(ROOT / "python-package"):
132132
subprocess.run(["python", "-m", "build", "--sdist"], check=True)
@@ -140,6 +140,22 @@ def make_python_sdist(
140140
dest = dist_dir / sdist_name
141141
shutil.move(src, dest)
142142

143+
# Build stub package `xgboost-cu12`.
144+
with DirectoryExcursion(ROOT):
145+
make_pyproject(use_suffix="cu12", require_nccl_dep="na", create_stub=True)
146+
147+
with DirectoryExcursion(ROOT / "python-package"):
148+
subprocess.run(["python", "-m", "build", "--sdist"], check=True)
149+
sdist_name = (
150+
f"xgboost_cu12-{release}{rc}{rc_ver}.tar.gz"
151+
if rc
152+
else f"xgboost_cu12-{release}.tar.gz"
153+
)
154+
src = DIST / sdist_name
155+
subprocess.run(["twine", "check", str(src)], check=True)
156+
dest = dist_dir / sdist_name
157+
shutil.move(src, dest)
158+
143159

144160
def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None:
145161
"""Download all Python binary wheels for the specified branch."""

0 commit comments

Comments
 (0)