Skip to content

Commit 1c94701

Browse files
authored
Merge branch 'main' into main
2 parents 978eccf + c3b8de2 commit 1c94701

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3965
-1047
lines changed

.github/scripts/build-cuda.sh

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ if [[ -v cuda_targets ]]; then
1111
elif [ "${build_arch}" = "aarch64" ]; then
1212
build_capability="75;80;90"
1313

14-
# CUDA 12.8+: Add sm100/sm120
14+
# CUDA 12.8-12.9: Add sm100/sm120
1515
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
16+
17+
# CUDA 13.0+: Add sm100/sm110/sm120
18+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120"
1619
else
17-
# By default, target Maxwell through Hopper.
18-
build_capability="50;60;70;75;80;86;89;90"
20+
# By default, target Pascal through Hopper.
21+
build_capability="60;70;75;80;86;89;90"
1922

2023
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
2124
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
25+
26+
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
27+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
2228
fi
2329

2430
[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
@@ -29,8 +35,8 @@ if [ "${build_os:0:6}" == ubuntu ]; then
2935
echo "Using image $image"
3036

3137
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
32-
"dnf update -y \
33-
&& dnf install cmake gcc-toolset-11 -y \
38+
"dnf -y --refresh update --security \
39+
&& dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \
3440
&& source scl_source enable gcc-toolset-11 \
3541
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \
3642
&& cmake --build . --config Release"

.github/scripts/build-xpu.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
declare build_os
3+
4+
set -xeuo pipefail
5+
6+
# We currently only build XPU on Linux.
7+
if [ "${build_os:0:6}" == ubuntu ]; then
8+
# TODO: We might want to pre-build this as our own customized image in the future.
9+
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
10+
echo "Using image $image"
11+
docker run --rm -i \
12+
-w /src -v "$PWD:/src" "$image" sh -c \
13+
"apt-get update \
14+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15+
cmake bison intel-fw-gpu intel-ocloc \
16+
&& cmake -DCOMPUTE_BACKEND=xpu . \
17+
&& cmake --build . --config Release"
18+
fi
19+
20+
output_dir="output/${build_os}/x86_64"
21+
mkdir -p "${output_dir}"
22+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")

.github/workflows/python-package.yml

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ on:
66
branches: [main]
77
paths:
88
- ".github/workflows/python-package.yml"
9+
- ".github/scripts/**"
910
- "bitsandbytes/**"
1011
- "csrc/**"
1112
- "include/**"
1213
- "tests/**"
1314
- "CMakeLists.txt"
14-
- "requirements*.txt"
15+
- "MANIFEST.in"
1516
- "setup.py"
1617
- "pyproject.toml"
1718
release:
@@ -25,19 +26,19 @@ concurrency:
2526

2627
jobs:
2728
##
28-
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
29+
# This job matrix builds the CPU versions of the libraries for all supported platforms.
2930
##
30-
build-shared-libs:
31+
build-cpu:
3132
strategy:
3233
matrix:
3334
include:
3435
- os: ubuntu-22.04
3536
arch: x86_64
3637
- os: ubuntu-22.04-arm
3738
arch: aarch64
38-
- os: windows-latest
39+
- os: windows-2025
3940
arch: x86_64
40-
- os: macos-latest
41+
- os: macos-15
4142
arch: arm64
4243
runs-on: ${{ matrix.os }}
4344
steps:
@@ -56,37 +57,40 @@ jobs:
5657
name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
5758
path: output/*
5859
retention-days: 7
60+
5961
##
6062
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
6163
##
62-
build-shared-libs-cuda:
64+
build-cuda:
6365
strategy:
6466
fail-fast: false
6567
matrix:
66-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest]
68+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
6769
include:
6870
- os: ubuntu-22.04
6971
arch: x86_64
7072
- os: ubuntu-22.04-arm
7173
arch: aarch64
72-
- os: windows-latest
74+
- os: windows-2025
7375
arch: x86_64
7476
cuda_version:
75-
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1"]
77+
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.1"]
7678
runs-on: ${{ matrix.os }}
7779
steps:
7880
- uses: actions/checkout@v4
7981
# Windows: We install Cuda on the agent (slow)
80-
- uses: Jimver/cuda-toolkit@c35baa1a18fd1fc9dcf47c5bd839bf30559c0bc3 # v0.2.24
82+
- uses: N-Storm/cuda-toolkit@d68ba29a800229200a2c3f572f9e816d7f67cdb4 # v0.2.24m
8183
if: startsWith(matrix.os, 'windows')
8284
id: cuda-toolkit
8385
with:
84-
# Temporary: Use CUDA 12.9.0 for Windows until 12.9.1 is supported with this action.
85-
cuda: ${{ matrix.cuda_version == '12.9.1' && '12.9.0' || matrix.cuda_version }}
86+
# Temporary: Use CUDA 13.0.0 for Windows until 13.0.1 is supported with this action.
87+
cuda: ${{ matrix.cuda_version == '13.0.1' && '13.0.0' || matrix.cuda_version }}
8688
method: "network"
87-
sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]'
88-
linux-local-args: '["--toolkit"]'
89+
# The "crt" "nvvm" and "nvptxcompiler" components are added for CUDA 13.
90+
sub-packages: ${{ format('["nvcc"{0},"cudart","cusparse","cublas","thrust","cublas_dev","cusparse_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }}
8991
use-github-cache: false
92+
use-local-cache: false
93+
log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt
9094
- name: Setup MSVC
9195
if: startsWith(matrix.os, 'windows')
9296
uses: ilammy/[email protected] # to use cl
@@ -103,18 +107,34 @@ jobs:
103107
path: output/*
104108
retention-days: 7
105109

106-
build-shared-libs-rocm:
110+
build-xpu:
111+
strategy:
112+
matrix:
113+
os: [ubuntu-22.04]
114+
runs-on: ${{ matrix.os }}
115+
steps:
116+
- uses: actions/checkout@v4
117+
- name: Build C++
118+
run: bash .github/scripts/build-xpu.sh
119+
env:
120+
build_os: ${{ matrix.os }}
121+
- name: Upload build artifact
122+
uses: actions/upload-artifact@v4
123+
with:
124+
name: shared_library_xpu_${{ matrix.os }}_x86_64
125+
path: output/*
126+
retention-days: 7
127+
128+
build-rocm:
107129
strategy:
108130
matrix:
109131
os: [ubuntu-22.04]
110132
arch: [x86_64]
111133
rocm_version:
112-
["6.1.2", "6.2.4", "6.3.2"]
134+
["6.1.2", "6.2.4", "6.3.4", "6.4.4", "7.0"]
113135
runs-on: ${{ matrix.os }}
114136
steps:
115137
- uses: actions/checkout@v4
116-
- name: Set up Docker multiarch
117-
uses: docker/setup-qemu-action@v3
118138
- name: Clean up disk space
119139
run: |
120140
sudo rm -rf \
@@ -149,23 +169,24 @@ jobs:
149169

150170
build-wheels:
151171
needs:
152-
- build-shared-libs
153-
- build-shared-libs-cuda
154-
- build-shared-libs-rocm
172+
- build-cpu
173+
- build-cuda
174+
- build-rocm
175+
- build-xpu
155176
strategy:
156177
matrix:
157-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest, macos-latest]
178+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
158179
include:
159180
- os: ubuntu-22.04
160181
arch: x86_64
161182
- os: ubuntu-22.04-arm
162183
arch: aarch64
163-
- os: windows-latest
184+
- os: windows-2025
164185
arch: x86_64
165-
- os: macos-latest
186+
- os: macos-15
166187
arch: arm64
167188
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
168-
# code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is
189+
# code. This ensures compatibility across Python versions, as compatibility is
169190
# dictated by the packaged code itself, not the Python version used for packaging.
170191
python-version: ["3.10"]
171192
runs-on: ${{ matrix.os }}

0 commit comments

Comments
 (0)