Skip to content

Commit 3ee0e76

Browse files
authored
Merge branch 'main' into docs/quickstart-update
2 parents fe44bb7 + c3b8de2 commit 3ee0e76

File tree

18 files changed

+399
-313
lines changed

18 files changed

+399
-313
lines changed

.github/scripts/build-cuda.sh

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ if [[ -v cuda_targets ]]; then
1111
elif [ "${build_arch}" = "aarch64" ]; then
1212
build_capability="75;80;90"
1313

14-
# CUDA 12.8+: Add sm100/sm120
14+
# CUDA 12.8-12.9: Add sm100/sm120
1515
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
16+
17+
# CUDA 13.0+: Add sm100/sm110/sm120
18+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120"
1619
else
1720
# By default, target Pascal through Hopper.
1821
build_capability="60;70;75;80;86;89;90"
1922

2023
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
2124
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
25+
26+
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
27+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
2228
fi
2329

2430
[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
@@ -29,8 +35,8 @@ if [ "${build_os:0:6}" == ubuntu ]; then
2935
echo "Using image $image"
3036

3137
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
32-
"dnf update -y \
33-
&& dnf install cmake gcc-toolset-11 -y \
38+
"dnf -y --refresh update --security \
39+
&& dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \
3440
&& source scl_source enable gcc-toolset-11 \
3541
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \
3642
&& cmake --build . --config Release"

.github/scripts/build-xpu.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
declare build_os
3+
4+
set -xeuo pipefail
5+
6+
# We currently only build XPU on Linux.
7+
if [ "${build_os:0:6}" == ubuntu ]; then
8+
# TODO: We might want to pre-build this as our own customized image in the future.
9+
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
10+
echo "Using image $image"
11+
docker run --rm -i \
12+
-w /src -v "$PWD:/src" "$image" sh -c \
13+
"apt-get update \
14+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15+
cmake bison intel-fw-gpu intel-ocloc \
16+
&& cmake -DCOMPUTE_BACKEND=xpu . \
17+
&& cmake --build . --config Release"
18+
fi
19+
20+
output_dir="output/${build_os}/x86_64"
21+
mkdir -p "${output_dir}"
22+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")

.github/workflows/python-package.yml

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ on:
66
branches: [main]
77
paths:
88
- ".github/workflows/python-package.yml"
9+
- ".github/scripts/**"
910
- "bitsandbytes/**"
1011
- "csrc/**"
1112
- "include/**"
1213
- "tests/**"
1314
- "CMakeLists.txt"
14-
- "requirements*.txt"
15+
- "MANIFEST.in"
1516
- "setup.py"
1617
- "pyproject.toml"
1718
release:
@@ -25,19 +26,19 @@ concurrency:
2526

2627
jobs:
2728
##
28-
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
29+
# This job matrix builds the CPU versions of the libraries for all supported platforms.
2930
##
30-
build-shared-libs:
31+
build-cpu:
3132
strategy:
3233
matrix:
3334
include:
3435
- os: ubuntu-22.04
3536
arch: x86_64
3637
- os: ubuntu-22.04-arm
3738
arch: aarch64
38-
- os: windows-latest
39+
- os: windows-2025
3940
arch: x86_64
40-
- os: macos-latest
41+
- os: macos-15
4142
arch: arm64
4243
runs-on: ${{ matrix.os }}
4344
steps:
@@ -56,37 +57,40 @@ jobs:
5657
name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
5758
path: output/*
5859
retention-days: 7
60+
5961
##
6062
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
6163
##
62-
build-shared-libs-cuda:
64+
build-cuda:
6365
strategy:
6466
fail-fast: false
6567
matrix:
66-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest]
68+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
6769
include:
6870
- os: ubuntu-22.04
6971
arch: x86_64
7072
- os: ubuntu-22.04-arm
7173
arch: aarch64
72-
- os: windows-latest
74+
- os: windows-2025
7375
arch: x86_64
7476
cuda_version:
75-
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1"]
77+
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.1"]
7678
runs-on: ${{ matrix.os }}
7779
steps:
7880
- uses: actions/checkout@v4
7981
# Windows: We install Cuda on the agent (slow)
80-
- uses: Jimver/cuda-toolkit@c35baa1a18fd1fc9dcf47c5bd839bf30559c0bc3 # v0.2.24
82+
- uses: N-Storm/cuda-toolkit@d68ba29a800229200a2c3f572f9e816d7f67cdb4 # v0.2.24m
8183
if: startsWith(matrix.os, 'windows')
8284
id: cuda-toolkit
8385
with:
84-
# Temporary: Use CUDA 12.9.0 for Windows until 12.9.1 is supported with this action.
85-
cuda: ${{ matrix.cuda_version == '12.9.1' && '12.9.0' || matrix.cuda_version }}
86+
# Temporary: Use CUDA 13.0.0 for Windows until 13.0.1 is supported with this action.
87+
cuda: ${{ matrix.cuda_version == '13.0.1' && '13.0.0' || matrix.cuda_version }}
8688
method: "network"
87-
sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]'
88-
linux-local-args: '["--toolkit"]'
89+
# The "crt" "nvvm" and "nvptxcompiler" components are added for CUDA 13.
90+
sub-packages: ${{ format('["nvcc"{0},"cudart","cusparse","cublas","thrust","cublas_dev","cusparse_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }}
8991
use-github-cache: false
92+
use-local-cache: false
93+
log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt
9094
- name: Setup MSVC
9195
if: startsWith(matrix.os, 'windows')
9296
uses: ilammy/[email protected] # to use cl
@@ -103,18 +107,34 @@ jobs:
103107
path: output/*
104108
retention-days: 7
105109

106-
build-shared-libs-rocm:
110+
build-xpu:
111+
strategy:
112+
matrix:
113+
os: [ubuntu-22.04]
114+
runs-on: ${{ matrix.os }}
115+
steps:
116+
- uses: actions/checkout@v4
117+
- name: Build C++
118+
run: bash .github/scripts/build-xpu.sh
119+
env:
120+
build_os: ${{ matrix.os }}
121+
- name: Upload build artifact
122+
uses: actions/upload-artifact@v4
123+
with:
124+
name: shared_library_xpu_${{ matrix.os }}_x86_64
125+
path: output/*
126+
retention-days: 7
127+
128+
build-rocm:
107129
strategy:
108130
matrix:
109131
os: [ubuntu-22.04]
110132
arch: [x86_64]
111133
rocm_version:
112-
["6.1.2", "6.2.4", "6.3.2"]
134+
["6.1.2", "6.2.4", "6.3.4", "6.4.4", "7.0"]
113135
runs-on: ${{ matrix.os }}
114136
steps:
115137
- uses: actions/checkout@v4
116-
- name: Set up Docker multiarch
117-
uses: docker/setup-qemu-action@v3
118138
- name: Clean up disk space
119139
run: |
120140
sudo rm -rf \
@@ -149,23 +169,24 @@ jobs:
149169

150170
build-wheels:
151171
needs:
152-
- build-shared-libs
153-
- build-shared-libs-cuda
154-
- build-shared-libs-rocm
172+
- build-cpu
173+
- build-cuda
174+
- build-rocm
175+
- build-xpu
155176
strategy:
156177
matrix:
157-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest, macos-latest]
178+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
158179
include:
159180
- os: ubuntu-22.04
160181
arch: x86_64
161182
- os: ubuntu-22.04-arm
162183
arch: aarch64
163-
- os: windows-latest
184+
- os: windows-2025
164185
arch: x86_64
165-
- os: macos-latest
186+
- os: macos-15
166187
arch: arm64
167188
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
168-
# code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is
189+
# code. This ensures compatibility across Python versions, as compatibility is
169190
# dictated by the packaged code itself, not the Python version used for packaging.
170191
python-version: ["3.10"]
171192
runs-on: ${{ matrix.os }}

.github/workflows/tests.yml

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Unit tests
1+
name: Nightly Tests
22

33
on:
44
workflow_dispatch:
@@ -49,6 +49,7 @@ jobs:
4949
build-cuda:
5050
strategy:
5151
matrix:
52+
# TODO: Add 13.0.1 when we have runners with new enough drivers.
5253
cuda_version: ["11.8.0", "12.6.3", "12.8.1", "12.9.1"]
5354
os: [ubuntu-22.04, ubuntu-22.04-arm]
5455
include:
@@ -111,7 +112,7 @@ jobs:
111112
arch: aarch64
112113
- os: ubuntu-22.04-arm
113114
arch: aarch64
114-
torch_version: "2.5.1"
115+
torch_version: "2.5.1" # Higher minimum requirement for aarch64
115116
- os: windows-2025
116117
arch: x86_64
117118
- os: macos-15
@@ -136,7 +137,7 @@ jobs:
136137
- name: Setup Python
137138
uses: actions/setup-python@v5
138139
with:
139-
python-version: 3.9
140+
python-version: '3.10'
140141

141142
- name: Setup MSVC
142143
if: startsWith(matrix.os, 'windows')
@@ -182,7 +183,7 @@ jobs:
182183
- name: Setup Python
183184
uses: actions/setup-python@v5
184185
with:
185-
python-version: 3.9
186+
python-version: '3.10'
186187

187188
- name: Install dependencies
188189
run: |
@@ -313,7 +314,7 @@ jobs:
313314
- name: Setup Python
314315
uses: actions/setup-python@v5
315316
with:
316-
python-version: 3.9
317+
python-version: '3.10'
317318

318319
- name: Install PyTorch
319320
run: pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/xpu
@@ -343,20 +344,26 @@ jobs:
343344
os: [ubuntu-22.04, windows-2025]
344345
arch: [x86_64]
345346
gpu: [T4, L40S]
346-
cuda_version: ["11.8.0", "12.6.3", "12.8.1", "12.9.1"]
347+
cuda_version: ["11.8.0", "12.6.3", "12.8.1", "12.9.1"] #, "13.0.1"]
347348
include:
348349
- cuda_version: "11.8.0"
349350
torch_version: "2.3.1"
350351
pypi_index: "https://download.pytorch.org/whl/cu118"
351352
- cuda_version: "12.6.3"
352353
torch_version: "2.6.0"
353354
pypi_index: "https://download.pytorch.org/whl/cu126"
354-
- cuda_version: "12.8.1"
355-
torch_version: "2.7.1"
356-
pypi_index: "https://download.pytorch.org/whl/cu128"
357355
- cuda_version: "12.9.1"
358356
torch_version: "2.8.0"
359357
pypi_index: "https://download.pytorch.org/whl/cu129"
358+
- cuda_version: "12.8.1"
359+
torch_version: "2.9.0"
360+
pypi_index: "https://download.pytorch.org/whl/test/cu128"
361+
362+
# Note: Currently our runners do not have new enough drivers for CUDA 13.
363+
# Add this when supported.
364+
# - cuda_version: "13.0.1"
365+
# torch_version: "2.9.0"
366+
# pypi_index: "https://download.pytorch.org/whl/test/cu130"
360367

361368

362369
# Linux L40S runners
@@ -395,6 +402,8 @@ jobs:
395402
exclude:
396403
# Our current T4 Windows runner has a driver too old (471.11)
397404
# and cannot support CUDA 12+. Skip for now.
405+
- os: windows-2025
406+
cuda_version: "13.0.1"
398407
- os: windows-2025
399408
cuda_version: "12.9.1"
400409
- os: windows-2025
@@ -424,7 +433,7 @@ jobs:
424433
- name: Setup Python
425434
uses: actions/setup-python@v5
426435
with:
427-
python-version: 3.9
436+
python-version: '3.10'
428437

429438
- name: Install dependencies
430439
run: |

CMakeLists.txt

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ elseif(${COMPUTE_BACKEND} STREQUAL "xpu")
7070
message(FATAL_ERROR "XPU is not supported on macOS" )
7171
endif()
7272
set(BUILD_CUDA OFF)
73+
set(BUILD_HIP OFF)
7374
set(BUILD_MPS OFF)
7475
set(BUILD_XPU ON)
7576
else()
@@ -113,30 +114,36 @@ if(BUILD_CUDA)
113114
)
114115
endif()
115116

116-
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.4")
117-
message(FATAL_ERROR "CUDA Version < 11.4 is not supported")
118-
elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
119-
message(FATAL_ERROR "CUDA Version > 12 is not supported")
117+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.8")
118+
message(FATAL_ERROR "CUDA Version < 11.8 is not supported")
119+
elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "14.0")
120+
message(FATAL_ERROR "CUDA Version > 13 is not supported")
120121
endif()
121122

122123
# CMake < 3.23.0 does not define CMAKE_CUDA_ARCHITECTURES_ALL.
123124
if(CMAKE_VERSION VERSION_LESS "3.23.0")
124125
message(STATUS "CMake < 3.23.0; determining CUDA architectures supported...")
125126

126-
# 11.4+ supports these at a minimum.
127-
set(CMAKE_CUDA_ARCHITECTURES_ALL 50 52 53 60 61 62 70 72 75 80 86 87)
128-
set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 50 60 70 80)
129-
130-
# CUDA 11.8 adds support for Ada and Hopper.
131-
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "11.8")
132-
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 89 90)
133-
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 90)
134-
endif()
135-
136-
# CUDA 12.8 adds support for Blackwell.
137-
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.8")
138-
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 100 101 120)
139-
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 100 120)
127+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
128+
# Starting in CUDA 13.0, Thor Blackwell is renamed to SM110.
129+
# Support for architectures older than Turing (SM75) is removed.
130+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 75 80 86 87 88 89 90 100 103 110 120 121)
131+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 80 90 100 110 120)
132+
else()
133+
# 11.8-12.9 supports these at a minimum.
134+
set(CMAKE_CUDA_ARCHITECTURES_ALL 50 52 53 60 61 62 70 72 75 80 86 87 89 90)
135+
set(CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 50 60 70 80 90)
136+
137+
# CUDA 12.8 adds support for Blackwell.
138+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.8")
139+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 100 101 120 121)
140+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL_MAJOR 100 120)
141+
endif()
142+
143+
# CUDA 12.9 adds SM103 (Blackwell B300).
144+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.9")
145+
list(APPEND CMAKE_CUDA_ARCHITECTURES_ALL 103)
146+
endif()
140147
endif()
141148
endif()
142149

@@ -252,7 +259,7 @@ endif()
252259

253260
set_source_files_properties(${CPP_FILES} PROPERTIES LANGUAGE CXX)
254261
add_library(bitsandbytes SHARED ${SRC_FILES})
255-
target_compile_features(bitsandbytes PUBLIC cxx_std_14)
262+
target_compile_features(bitsandbytes PUBLIC cxx_std_17)
256263
target_include_directories(bitsandbytes PUBLIC csrc include)
257264

258265

0 commit comments

Comments
 (0)