Skip to content

Commit 607ecf9

Browse files
committed
Merge remote-tracking branch 'upstream/main' into upstream_main_npu_enabled
Signed-off-by: SlightwindSec <[email protected]>
2 parents d9e152f + 63f538a commit 607ecf9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+4915
-1882
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.bat text eol=crlf

.github/FUNDING.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
open_collective: bitsandbytes

.github/scripts/build-cuda.sh

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,20 @@ if [[ -v cuda_targets ]]; then
1111
elif [ "${build_arch}" = "aarch64" ]; then
1212
build_capability="75;80;90"
1313

14-
# CUDA 12.8+: Add sm100/sm120
14+
# CUDA 12.8-12.9: Add sm100/sm120
1515
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;90;100;120"
16+
17+
# CUDA 13.0+: Add sm100/sm110/sm120
18+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;90;100;110;120"
1619
else
17-
# By default, target Maxwell through Hopper.
18-
build_capability="50;52;60;61;70;75;80;86;89;90"
20+
# By default, target Pascal through Hopper.
21+
build_capability="60;70;75;80;86;89;90"
22+
23+
# CUDA 12.8+: Add sm100 and sm120; remove < sm70 to align with PyTorch 2.8+cu128 minimum
24+
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="70;75;80;86;89;90;100;120"
1925

20-
# CUDA 12.8+: Add sm100 and sm120; remove < sm75 to align with PyTorch 2.7+cu128 minimum
21-
[[ "${cuda_version}" == 12.8.* || "${cuda_version}" == 12.9.* ]] && build_capability="75;80;86;89;90;100;120"
26+
# CUDA 13.0+: Remove < sm75 to align with PyTorch 2.9+cu130 minimum
27+
[[ "${cuda_version}" == 13.*.* ]] && build_capability="75;80;86;89;90;100;120"
2228
fi
2329

2430
[[ "${build_os}" = windows-* ]] && python3 -m pip install ninja
@@ -29,8 +35,8 @@ if [ "${build_os:0:6}" == ubuntu ]; then
2935
echo "Using image $image"
3036

3137
docker run -i -w /src -v "$PWD:/src" "$image" bash -c \
32-
"dnf update -y \
33-
&& dnf install cmake gcc-toolset-11 -y \
38+
"dnf -y --refresh update --security \
39+
&& dnf -y install cmake gcc-toolset-11 --setopt=install_weak_deps=False --setopt=tsflags=nodocs \
3440
&& source scl_source enable gcc-toolset-11 \
3541
&& cmake -DCOMPUTE_BACKEND=cuda -DCOMPUTE_CAPABILITY=\"${build_capability}\" . \
3642
&& cmake --build . --config Release"

.github/scripts/build-rocm.sh

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@ declare build_os
44
declare rocm_version
55

66
set -xeuo pipefail
7-
bnb_rocm_arch="gfx90a;gfx942;gfx1100"
7+
bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101"
8+
9+
# ROCm 6.4+ - Add gfx1200/gfx1201. Note we assume >=6.4.1.
10+
[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.*.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1200;gfx1201"
11+
12+
# ROCm 7.0+ - Add gfx950
13+
[[ "${rocm_version}" == 7.*.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
14+
815
if [ "${build_os:0:6}" == ubuntu ]; then
9-
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
10-
echo "Using image $image"
11-
docker run --rm --platform "linux/$build_arch" -i \
12-
-w /src -v "$PWD:/src" "$image" sh -c \
13-
"apt-get update \
16+
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
17+
echo "Using image $image"
18+
docker run --rm --platform "linux/$build_arch" -i \
19+
-w /src -v "$PWD:/src" "$image" sh -c \
20+
"apt-get update \
1421
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
1522
&& cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
1623
&& cmake --build ."
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
set INTEL_DLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
2+
set INTEL_DLE_TMP=%RUNNER_TEMP%\intel_dle
3+
set INTEL_DLE_LOG=%RUNNER_TEMP%\intel_dle_log.txt
4+
5+
echo ::group::Intel Deep Learning Essentials Installation
6+
curl -o intel-dle-installer.exe %INTEL_DLE_URL%
7+
start /wait "Intel DLE Install" intel-dle-installer.exe -f %INTEL_DLE_TMP% -l %INTEL_DLE_LOG% --silent -a --eula=accept -p=NEED_VS2022_INTEGRATION=0
8+
type %INTEL_DLE_LOG%
9+
if ERRORLEVEL 1 (
10+
echo Failed to install Intel Deep Learning Essentials
11+
exit /b 1
12+
)
13+
echo ::endgroup::
14+
15+
echo ::group::Build Environment Setup
16+
call "%ProgramFiles(x86)%\Intel\oneAPI\setvars.bat"
17+
cmake -G Ninja -DCOMPUTE_BACKEND=xpu -DCMAKE_BUILD_TYPE=Release .
18+
if ERRORLEVEL 1 (
19+
echo Failed to setup environment
20+
exit /b 1
21+
)
22+
echo ::endgroup::
23+
24+
echo ::group::Building with XPU backend
25+
cmake --build . --config Release
26+
if ERRORLEVEL 1 (
27+
echo Build failed
28+
exit /b 1
29+
)
30+
echo ::endgroup::
31+
32+
set output_dir=output\%build_os%\x86_64
33+
if not exist "%output_dir%" mkdir "%output_dir%"
34+
copy bitsandbytes\*.dll "%output_dir%\" 2>nul

.github/scripts/build-xpu.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
declare build_os
3+
4+
set -xeuo pipefail
5+
6+
# We currently only build XPU on Linux.
7+
if [ "${build_os:0:6}" == ubuntu ]; then
8+
# TODO: We might want to pre-build this as our own customized image in the future.
9+
image=intel/deep-learning-essentials:2025.1.3-0-devel-ubuntu22.04
10+
echo "Using image $image"
11+
docker run --rm -i \
12+
-w /src -v "$PWD:/src" "$image" sh -c \
13+
"apt-get update \
14+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
15+
cmake bison intel-fw-gpu intel-ocloc \
16+
&& cmake -DCOMPUTE_BACKEND=xpu . \
17+
&& cmake --build . --config Release"
18+
fi
19+
20+
output_dir="output/${build_os}/x86_64"
21+
mkdir -p "${output_dir}"
22+
(shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} "${output_dir}")

.github/workflows/python-package.yml

Lines changed: 66 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@ on:
66
branches: [main]
77
paths:
88
- ".github/workflows/python-package.yml"
9+
- ".github/scripts/**"
910
- "bitsandbytes/**"
1011
- "csrc/**"
1112
- "include/**"
1213
- "tests/**"
1314
- "CMakeLists.txt"
14-
- "requirements*.txt"
15+
- "MANIFEST.in"
1516
- "setup.py"
1617
- "pyproject.toml"
1718
release:
@@ -25,19 +26,19 @@ concurrency:
2526

2627
jobs:
2728
##
28-
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
29+
# This job matrix builds the CPU versions of the libraries for all supported platforms.
2930
##
30-
build-shared-libs:
31+
build-cpu:
3132
strategy:
3233
matrix:
3334
include:
3435
- os: ubuntu-22.04
3536
arch: x86_64
3637
- os: ubuntu-22.04-arm
3738
arch: aarch64
38-
- os: windows-latest
39+
- os: windows-2025
3940
arch: x86_64
40-
- os: macos-latest
41+
- os: macos-15
4142
arch: arm64
4243
runs-on: ${{ matrix.os }}
4344
steps:
@@ -56,37 +57,40 @@ jobs:
5657
name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
5758
path: output/*
5859
retention-days: 7
60+
5961
##
6062
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
6163
##
62-
build-shared-libs-cuda:
64+
build-cuda:
6365
strategy:
6466
fail-fast: false
6567
matrix:
66-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest]
68+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025]
6769
include:
6870
- os: ubuntu-22.04
6971
arch: x86_64
7072
- os: ubuntu-22.04-arm
7173
arch: aarch64
72-
- os: windows-latest
74+
- os: windows-2025
7375
arch: x86_64
7476
cuda_version:
75-
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1"]
77+
["11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.1", "12.6.3", "12.8.1", "12.9.1", "13.0.1"]
7678
runs-on: ${{ matrix.os }}
7779
steps:
7880
- uses: actions/checkout@v4
7981
# Windows: We install Cuda on the agent (slow)
80-
- uses: Jimver/cuda-toolkit@c35baa1a18fd1fc9dcf47c5bd839bf30559c0bc3 # v0.2.24
82+
- uses: N-Storm/cuda-toolkit@d68ba29a800229200a2c3f572f9e816d7f67cdb4 # v0.2.24m
8183
if: startsWith(matrix.os, 'windows')
8284
id: cuda-toolkit
8385
with:
84-
# Temporary: Use CUDA 12.9.0 for Windows until 12.9.1 is supported with this action.
85-
cuda: ${{ matrix.cuda_version == '12.9.1' && '12.9.0' || matrix.cuda_version }}
86+
# Temporary: Use CUDA 13.0.0 for Windows until 13.0.1 is supported with this action.
87+
cuda: ${{ matrix.cuda_version == '13.0.1' && '13.0.0' || matrix.cuda_version }}
8688
method: "network"
87-
sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]'
88-
linux-local-args: '["--toolkit"]'
89+
# The "crt" "nvvm" and "nvptxcompiler" components are added for CUDA 13.
90+
sub-packages: ${{ format('["nvcc"{0},"cudart","cusparse","cublas","thrust","cublas_dev","cusparse_dev"]', startsWith(matrix.cuda_version, '13.') && ',"crt","nvvm","nvptxcompiler"' || '') }}
8991
use-github-cache: false
92+
use-local-cache: false
93+
log-file-suffix: ${{matrix.os}}-${{matrix.cuda_version}}.txt
9094
- name: Setup MSVC
9195
if: startsWith(matrix.os, 'windows')
9296
uses: ilammy/[email protected] # to use cl
@@ -103,37 +107,56 @@ jobs:
103107
path: output/*
104108
retention-days: 7
105109

106-
build-shared-libs-rocm:
110+
build-xpu:
111+
strategy:
112+
matrix:
113+
os: [ubuntu-22.04, windows-2025]
114+
runs-on: ${{ matrix.os }}
115+
steps:
116+
- uses: actions/checkout@v4
117+
- name: Build C++ (Linux)
118+
if: runner.os == 'Linux'
119+
run: bash .github/scripts/build-xpu.sh
120+
env:
121+
build_os: ${{ matrix.os }}
122+
- name: Build C++ (Windows)
123+
if: runner.os == 'Windows'
124+
run: .github/scripts/build-xpu-windows.bat
125+
shell: cmd
126+
env:
127+
build_os: ${{ matrix.os }}
128+
- name: Upload build artifact
129+
uses: actions/upload-artifact@v4
130+
with:
131+
name: shared_library_xpu_${{ matrix.os }}_x86_64
132+
path: output/*
133+
retention-days: 7
134+
135+
build-rocm:
107136
strategy:
108137
matrix:
109138
os: [ubuntu-22.04]
110139
arch: [x86_64]
111-
rocm_version:
112-
["6.1.2", "6.2.4", "6.3.2"]
140+
rocm_version: ["6.2.4", "6.3.4", "6.4.4", "7.0.2"]
113141
runs-on: ${{ matrix.os }}
114142
steps:
115143
- uses: actions/checkout@v4
116-
- name: Set up Docker multiarch
117-
uses: docker/setup-qemu-action@v3
118144
- name: Clean up disk space
119145
run: |
146+
echo "Disk space before cleanup:"
147+
df -h
148+
149+
# These are the biggest disk space hogs.
120150
sudo rm -rf \
121-
/usr/share/dotnet \
122-
/opt/ghc \
123-
"/usr/local/share/boost" \
124-
"$AGENT_TOOLSDIRECTORY" \
125-
/opt/hostedtoolcache \
126-
/opt/google/chrome \
127-
/opt/microsoft/msedge \
128-
/opt/microsoft/powershell \
129-
/opt/pipx \
130-
/usr/lib/mono \
131-
/usr/local/julia* \
132-
/usr/local/lib/android \
133-
/usr/local/lib/node_modules \
134-
/usr/local/share/chromium \
135-
/usr/local/share/powershell \
136-
/usr/share/swift
151+
/opt/hostedtoolcache/CodeQL \
152+
/usr/lib/dotnet \
153+
/usr/lib/jvm \
154+
/usr/local/.ghcup \
155+
/usr/local/lib/android \
156+
/usr/share/swift
157+
158+
echo "Disk space after cleanup:"
159+
df -h
137160
- name: Build C++
138161
run: bash .github/scripts/build-rocm.sh
139162
env:
@@ -149,23 +172,24 @@ jobs:
149172

150173
build-wheels:
151174
needs:
152-
- build-shared-libs
153-
- build-shared-libs-cuda
154-
- build-shared-libs-rocm
175+
- build-cpu
176+
- build-cuda
177+
- build-rocm
178+
- build-xpu
155179
strategy:
156180
matrix:
157-
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-latest, macos-latest]
181+
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
158182
include:
159183
- os: ubuntu-22.04
160184
arch: x86_64
161185
- os: ubuntu-22.04-arm
162186
arch: aarch64
163-
- os: windows-latest
187+
- os: windows-2025
164188
arch: x86_64
165-
- os: macos-latest
189+
- os: macos-15
166190
arch: arm64
167191
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
168-
# code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is
192+
# code. This ensures compatibility across Python versions, as compatibility is
169193
# dictated by the packaged code itself, not the Python version used for packaging.
170194
python-version: ["3.10"]
171195
runs-on: ${{ matrix.os }}

0 commit comments

Comments
 (0)