Skip to content

Commit 6bcd19e

Browse files
authored
Merge branch 'main' into cpu_kernel
2 parents 4a9a6dc + 45553f7 commit 6bcd19e

File tree

24 files changed

+169
-118
lines changed

24 files changed

+169
-118
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
*.bat text eol=crlf

.github/scripts/build-rocm.sh

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@ declare build_os
44
declare rocm_version
55

66
set -xeuo pipefail
7-
bnb_rocm_arch="gfx90a;gfx942;gfx1100"
7+
bnb_rocm_arch="gfx90a;gfx942;gfx1100;gfx1101"
8+
9+
# ROCm 6.4+ - Add gfx1200/gfx1201. Note we assume >=6.4.1.
10+
[[ "${rocm_version}" == 6.4.* || "${rocm_version}" == 7.*.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx1200;gfx1201"
11+
12+
# ROCm 7.0+ - Add gfx950
13+
[[ "${rocm_version}" == 7.*.* ]] && bnb_rocm_arch="${bnb_rocm_arch};gfx950"
14+
815
if [ "${build_os:0:6}" == ubuntu ]; then
9-
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
10-
echo "Using image $image"
11-
docker run --rm --platform "linux/$build_arch" -i \
12-
-w /src -v "$PWD:/src" "$image" sh -c \
13-
"apt-get update \
16+
image=rocm/dev-ubuntu-22.04:${rocm_version}-complete
17+
echo "Using image $image"
18+
docker run --rm --platform "linux/$build_arch" -i \
19+
-w /src -v "$PWD:/src" "$image" sh -c \
20+
"apt-get update \
1421
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \
1522
&& cmake -DCOMPUTE_BACKEND=hip -DBNB_ROCM_ARCH=\"${bnb_rocm_arch}\" . \
1623
&& cmake --build ."
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
set INTEL_DLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
2+
set INTEL_DLE_TMP=%RUNNER_TEMP%\intel_dle
3+
set INTEL_DLE_LOG=%RUNNER_TEMP%\intel_dle_log.txt
4+
5+
echo ::group::Intel Deep Learning Essentials Installation
6+
curl -o intel-dle-installer.exe %INTEL_DLE_URL%
7+
start /wait "Intel DLE Install" intel-dle-installer.exe -f %INTEL_DLE_TMP% -l %INTEL_DLE_LOG% --silent -a --eula=accept -p=NEED_VS2022_INTEGRATION=0
8+
type %INTEL_DLE_LOG%
9+
if ERRORLEVEL 1 (
10+
echo Failed to install Intel Deep Learning Essentials
11+
exit /b 1
12+
)
13+
echo ::endgroup::
14+
15+
echo ::group::Build Environment Setup
16+
call "%ProgramFiles(x86)%\Intel\oneAPI\setvars.bat"
17+
cmake -G Ninja -DCOMPUTE_BACKEND=xpu -DCMAKE_BUILD_TYPE=Release .
18+
if ERRORLEVEL 1 (
19+
echo Failed to setup environment
20+
exit /b 1
21+
)
22+
echo ::endgroup::
23+
24+
echo ::group::Building with XPU backend
25+
cmake --build . --config Release
26+
if ERRORLEVEL 1 (
27+
echo Build failed
28+
exit /b 1
29+
)
30+
echo ::endgroup::
31+
32+
set output_dir=output\%build_os%\x86_64
33+
if not exist "%output_dir%" mkdir "%output_dir%"
34+
copy bitsandbytes\*.dll "%output_dir%\" 2>nul

.github/workflows/python-package.yml

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -110,14 +110,21 @@ jobs:
110110
build-xpu:
111111
strategy:
112112
matrix:
113-
os: [ubuntu-22.04]
113+
os: [ubuntu-22.04, windows-2025]
114114
runs-on: ${{ matrix.os }}
115115
steps:
116116
- uses: actions/checkout@v4
117-
- name: Build C++
117+
- name: Build C++ (Linux)
118+
if: runner.os == 'Linux'
118119
run: bash .github/scripts/build-xpu.sh
119120
env:
120121
build_os: ${{ matrix.os }}
122+
- name: Build C++ (Windows)
123+
if: runner.os == 'Windows'
124+
run: .github/scripts/build-xpu-windows.bat
125+
shell: cmd
126+
env:
127+
build_os: ${{ matrix.os }}
121128
- name: Upload build artifact
122129
uses: actions/upload-artifact@v4
123130
with:
@@ -130,30 +137,26 @@ jobs:
130137
matrix:
131138
os: [ubuntu-22.04]
132139
arch: [x86_64]
133-
rocm_version:
134-
["6.1.2", "6.2.4", "6.3.4", "6.4.4", "7.0"]
140+
rocm_version: ["6.2.4", "6.3.4", "6.4.4", "7.0.2"]
135141
runs-on: ${{ matrix.os }}
136142
steps:
137143
- uses: actions/checkout@v4
138144
- name: Clean up disk space
139145
run: |
146+
echo "Disk space before cleanup:"
147+
df -h
148+
149+
# These are the biggest disk space hogs.
140150
sudo rm -rf \
141-
/usr/share/dotnet \
142-
/opt/ghc \
143-
"/usr/local/share/boost" \
144-
"$AGENT_TOOLSDIRECTORY" \
145-
/opt/hostedtoolcache \
146-
/opt/google/chrome \
147-
/opt/microsoft/msedge \
148-
/opt/microsoft/powershell \
149-
/opt/pipx \
150-
/usr/lib/mono \
151-
/usr/local/julia* \
152-
/usr/local/lib/android \
153-
/usr/local/lib/node_modules \
154-
/usr/local/share/chromium \
155-
/usr/local/share/powershell \
156-
/usr/share/swift
151+
/opt/hostedtoolcache/CodeQL \
152+
/usr/lib/dotnet \
153+
/usr/lib/jvm \
154+
/usr/local/.ghcup \
155+
/usr/local/lib/android \
156+
/usr/share/swift
157+
158+
echo "Disk space after cleanup:"
159+
df -h
157160
- name: Build C++
158161
run: bash .github/scripts/build-rocm.sh
159162
env:

.pre-commit-config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
repos:
22
- repo: https://github.com/astral-sh/ruff-pre-commit
3-
rev: v0.11.2
3+
rev: v0.14.3
44
hooks:
55
- id: ruff
66
args:
@@ -17,6 +17,7 @@ repos:
1717
- id: mixed-line-ending
1818
args:
1919
- --fix=lf
20+
exclude: '\.bat$'
2021
- repo: https://github.com/crate-ci/typos
2122
rev: v1.26.0
2223
hooks:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ The library includes quantization primitives for 8-bit & 4-bit operations, throu
1919
## System Requirements
2020
bitsandbytes has the following minimum requirements for all platforms:
2121

22-
* Python 3.9+
22+
* Python 3.10+
2323
* [PyTorch](https://pytorch.org/get-started/locally/) 2.3+
2424
* _Note: While we aim to provide wide backwards compatibility, we recommend using the latest version of PyTorch for the best experience._
2525

benchmarking/matmul_benchmark.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def test_bench_matmul(batch, seq, model, hidden):
3535
B = torch.empty(hidden, model, dtype=torch.float16, device="cuda")
3636
torch.nn.init.xavier_uniform_(B)
3737

38-
B_fp4, state = F.quantize_fp4(B)
39-
B_fp4_c, state_c = F.quantize_fp4(B, compress_statistics=True)
38+
_B_fp4, _state = F.quantize_fp4(B)
39+
_B_fp4_c, _state_c = F.quantize_fp4(B, compress_statistics=True)
4040

4141
B_nf4, state_nf4 = F.quantize_nf4(B)
4242
B_nf4_c, state_nf4_c = F.quantize_nf4(B, compress_statistics=True)
@@ -117,8 +117,8 @@ def test_bench_matmul(batch, seq, model, hidden):
117117
f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
118118
)
119119

120-
CA, SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0)
121-
CB, SCB, _ = F.int8_vectorwise_quant(B)
120+
CA, _SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0)
121+
CB, _SCB, _ = F.int8_vectorwise_quant(B)
122122
torch.cuda.synchronize()
123123
t0 = time.time()
124124
for i in range(iters):

bitsandbytes/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,7 @@ def _import_backends():
5454
"""
5555
from importlib.metadata import entry_points
5656

57-
if sys.version_info < (3, 10):
58-
extensions = entry_points().get("bitsandbytes.backends", [])
59-
else:
60-
extensions = entry_points(group="bitsandbytes.backends")
57+
extensions = entry_points(group="bitsandbytes.backends")
6158

6259
for ext in extensions:
6360
try:
@@ -75,4 +72,4 @@ def _import_backends():
7572
"optim.optimizer.MockArgs": False,
7673
}
7774

78-
__version__ = "0.48.2.dev0"
75+
__version__ = "0.49.0.dev0"

bitsandbytes/autograd/_functions.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from collections.abc import Callable
12
from dataclasses import dataclass
23
from math import prod
3-
from typing import Callable, Optional
4+
from typing import Optional
45
import warnings
56
from warnings import warn
67

@@ -257,7 +258,7 @@ def backward(ctx: torch.autograd.function.FunctionCtx, grad_output: torch.Tensor
257258
return torch.zeros_like(ctx.A), torch.zeros_like(ctx.B), None, bias_grad, None
258259

259260
req_gradA, req_gradB, _, req_gradBias, _ = ctx.needs_input_grad
260-
CAt, subA, A = ctx.tensors
261+
CAt, subA, _A = ctx.tensors
261262
SCAt, idx = ctx.tensor_states
262263
state: MatmulLtState = ctx.state
263264
grad_A = grad_B = grad_bias = None

bitsandbytes/backends/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
import torch
55

66
try:
7-
import triton # noqa: F401
87
import triton.language as tl # noqa: F401
98

9+
import triton # noqa: F401
10+
1011
triton_available = True
1112
except ImportError:
1213
triton_available = False

0 commit comments

Comments
 (0)