Skip to content

Commit 3183abb

Browse files
authored
Merge branch 'main' into ipex
2 parents cf8bc14 + 1d4ea6a commit 3183abb

21 files changed

+351
-662
lines changed

.github/workflows/tests.yml

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -93,24 +93,32 @@ jobs:
9393
path: output/${{ matrix.os }}/${{ matrix.arch }}/*
9494
retention-days: 7
9595

96-
cpu-tests:
96+
test-cpu:
9797
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
9898
needs: build-cpu
9999
strategy:
100100
fail-fast: false
101101
matrix:
102102
os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15]
103-
torch_version: ["2.6.0", "2.7.0"]
103+
# Test with the oldest supported torch version and the two newest.
104+
torch_version: ["2.2.2", "2.6.0", "2.7.0"]
104105
include:
105106
- os: ubuntu-22.04
106107
arch: x86_64
107108
runner: banb-aws-general-8-plus-use1-public-80
108109
- os: ubuntu-22.04-arm
109110
arch: aarch64
111+
- os: ubuntu-22.04-arm
112+
arch: aarch64
113+
torch_version: "2.5.1"
110114
- os: windows-2025
111115
arch: x86_64
112116
- os: macos-15
113117
arch: arm64
118+
exclude:
119+
- os: ubuntu-22.04-arm
120+
torch_version: "2.2.2"
121+
114122
runs-on: ${{ matrix.runner || matrix.os }}
115123
env:
116124
BNB_TEST_DEVICE: cpu
@@ -129,12 +137,21 @@ jobs:
129137
with:
130138
python-version: 3.9
131139

140+
- name: Setup MSVC
141+
if: startsWith(matrix.os, 'windows')
142+
uses: ilammy/[email protected] # to use cl for torch.compile
143+
132144
- name: Install dependencies
133145
run: |
134146
pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/cpu
135147
pip install -e ".[test]"
136148
pip install pytest-cov
137149
150+
# We need to downgrade to numpy<2 for torch<2.3 compatibility.
151+
- name: Downgrade NumPy
152+
if: startsWith(matrix.torch_version, '2.2.')
153+
run: pip install "numpy<2"
154+
138155
- name: Show installed packages
139156
run: pip list
140157

@@ -144,7 +161,7 @@ jobs:
144161
- name: Run tests
145162
run: pytest --durations=100
146163

147-
# cuda-aarch64-tests:
164+
# test-cuda-aarch64:
148165
# if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
149166
# needs: build-cuda
150167
# strategy:
@@ -167,7 +184,7 @@ jobs:
167184

168185

169186

170-
cuda-tests:
187+
test-cuda:
171188
if: github.repository == 'bitsandbytes-foundation/bitsandbytes'
172189
needs: build-cuda
173190
strategy:
@@ -179,7 +196,7 @@ jobs:
179196
cuda_version: ["11.8.0", "12.6.3", "12.8.1"]
180197
include:
181198
- cuda_version: "11.8.0"
182-
torch_version: "2.4.1"
199+
torch_version: "2.2.2"
183200
pypi_index: "https://download.pytorch.org/whl/cu118"
184201
- cuda_version: "12.6.3"
185202
torch_version: "2.6.0"
@@ -188,18 +205,40 @@ jobs:
188205
torch_version: "2.7.0"
189206
pypi_index: "https://download.pytorch.org/whl/cu128"
190207

191-
# L40S runners
208+
209+
# Linux L40S runners
192210
- os: ubuntu-22.04
193211
gpu: L40S
194212
runner: bandb-aws-g6e-4xlarge-plus-use1-public-80
195213

196-
# T4 runners
214+
# Linux T4 runners
197215
- os: ubuntu-22.04
198216
gpu: T4
199217
runner: bandb-aws-g4dn-4xlarge-plus-use1-public-80
218+
219+
# Specific Windows runners using cu118
200220
- os: windows-2025
221+
arch: x86_64
201222
gpu: T4
202223
runner: CUDA-Windows-x64
224+
cuda_version: "11.8.0"
225+
torch_version: "2.2.0"
226+
pypi_index: "https://download.pytorch.org/whl/cu118"
227+
- os: windows-2025
228+
arch: x86_64
229+
gpu: T4
230+
runner: CUDA-Windows-x64
231+
cuda_version: "11.8.0"
232+
torch_version: "2.6.0"
233+
pypi_index: "https://download.pytorch.org/whl/cu118"
234+
- os: windows-2025
235+
arch: x86_64
236+
gpu: T4
237+
runner: CUDA-Windows-x64
238+
cuda_version: "11.8.0"
239+
torch_version: "2.7.0"
240+
pypi_index: "https://download.pytorch.org/whl/cu118"
241+
203242
exclude:
204243
# Our current T4 Windows runner has a driver too old (471.11)
205244
# and cannot support CUDA 12+. Skip for now.
@@ -238,6 +277,11 @@ jobs:
238277
pip install -e ".[test]"
239278
pip install pytest-cov
240279
280+
# We need to downgrade to numpy<2 for torch<2.3 compatibility.
281+
- name: Downgrade NumPy
282+
if: startsWith(matrix.torch_version, '2.2.')
283+
run: pip install "numpy<2"
284+
241285
- name: Show installed packages
242286
run: pip list
243287

README.md

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,44 +36,45 @@ bitsandbytes has the following minimum requirements for all platforms:
3636
</thead>
3737
<tbody>
3838
<tr>
39-
<td colspan="4">🐧 <strong>Linux</strong></td>
39+
<td colspan="4">🐧 <strong>Linux, glibc >= 2.24</strong></td>
4040
</tr>
4141
<tr>
4242
<td align="right">x86-64</td>
4343
<td>◻️ CPU</td>
44-
<td></td>
44+
<td>AVX2</td>
4545
<td>〰️ Partial Support</td>
4646
</tr>
4747
<tr>
4848
<td></td>
49-
<td>🟩 NVIDIA GPU</td>
49+
<td>🟩 NVIDIA GPU <br><code>cuda</code></td>
5050
<td>SM50+ minimum<br>SM75+ recommended</td>
51-
<td>✅ Full Support *</td>
51+
<td>✅ Full Support</td>
5252
</tr>
5353
<tr>
5454
<td></td>
55-
<td>🟥 AMD GPU</td>
56-
<td>gfx90a, gfx942, gfx1100</td>
55+
<td>🟥 AMD GPU <br><code>cuda</code></td>
56+
<td>
57+
CDNA: gfx90a, gfx942<br>
58+
RDNA: gfx1100, gfx1200
59+
</td>
5760
<td>🚧 In Development</td>
5861
</tr>
5962
<tr>
6063
<td></td>
61-
<td>🟦 Intel XPU</td>
64+
<td>🟦 Intel GPU <br><code>xpu</code></td>
6265
<td>
63-
Data Center GPU Max Series (Ponte Vecchio) <br>
64-
Arc A-Series (Alchemist) <br>
66+
Data Center GPU Max Series<br>
67+
Arc A-Series (Alchemist)<br>
6568
Arc B-Series (Battlemage)
6669
</td>
6770
<td>🚧 In Development</td>
6871
</tr>
69-
<!--
7072
<tr>
7173
<td></td>
72-
<td>🟦 Intel HPU</td>
74+
<td>🟪 Intel Gaudi <br><code>hpu</code></td>
7375
<td>Gaudi1, Gaudi2, Gaudi3</td>
74-
<td>🚧</td>
76+
<td>🚧 In Development</td>
7577
</tr>
76-
--->
7778
<tr>
7879
<td align="right">aarch64</td>
7980
<td>◻️ CPU</td>
@@ -82,12 +83,12 @@ bitsandbytes has the following minimum requirements for all platforms:
8283
</tr>
8384
<tr>
8485
<td></td>
85-
<td>🟩 NVIDIA GPU</td>
86+
<td>🟩 NVIDIA GPU <br><code>cuda</code></td>
8687
<td>SM75, SM80, SM90, SM100</td>
87-
<td>✅ Full Support *</td>
88+
<td>✅ Full Support</td>
8889
</tr>
8990
<tr>
90-
<td colspan="4">🪟 <strong>Windows</strong></td>
91+
<td colspan="4">🪟 <strong>Windows 11 / Windows Server 2019+</strong></td>
9192
</tr>
9293
<tr>
9394
<td align="right">x86-64</td>
@@ -97,33 +98,36 @@ bitsandbytes has the following minimum requirements for all platforms:
9798
</tr>
9899
<tr>
99100
<td></td>
100-
<td>🟩 NVIDIA GPU</td>
101+
<td>🟩 NVIDIA GPU <br><code>cuda</code></td>
101102
<td>SM50+ minimum<br>SM75+ recommended</td>
102-
<td>✅ Full Support *</td>
103+
<td>✅ Full Support</td>
103104
</tr>
104105
<tr>
105106
<td></td>
106-
<td>🟦 Intel XPU</td>
107+
<td>🟦 Intel GPU <br><code>xpu</code></td>
107108
<td>
108109
Arc A-Series (Alchemist) <br>
109110
Arc B-Series (Battlemage)
110111
</td>
111112
<td>🚧 In Development</td>
112113
</tr>
113114
<tr>
114-
<td colspan="4">🍎 <strong>macOS</strong></td>
115+
<td colspan="4">🍎 <strong>macOS 13.1+</strong></td>
115116
</tr>
116117
<tr>
117118
<td align="right">arm64</td>
118-
<td>◻️ CPU / Metal</td>
119+
<td>◻️ CPU</td>
119120
<td>Apple M1+</td>
120-
<td>❌ Under consideration</td>
121+
<td>🛣️ Future Roadmap</td>
121122
</tr>
123+
<tr>
124+
<td></td>
125+
<td>⬜ Metal <br><code>mps</code></td>
126+
<td>Apple M1+</td>
127+
<td>🛣️ Future Roadmap</td>
122128
</tbody>
123129
</table>
124130

125-
\* Accelerated INT8 requires SM75+.
126-
127131
## :book: Documentation
128132
* [Official Documentation](https://huggingface.co/docs/bitsandbytes/main)
129133
* 🤗 [Transformers](https://huggingface.co/docs/transformers/quantization/bitsandbytes)

benchmarking/int8/row_scale_benchmark.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

bitsandbytes/functional.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def create_dynamic_map(signed=True, max_exponent_bits=7, total_bits=8):
367367
# these are additional items that come from the case
368368
# where all the exponent bits are zero and no
369369
# indicator bit is present
370-
non_sign_bits = total_bits - (1 if signed else 1)
370+
non_sign_bits = total_bits - 1
371371
additional_items = 2 ** (non_sign_bits - max_exponent_bits) - 1
372372
for i in range(max_exponent_bits):
373373
fraction_items = int(
@@ -771,14 +771,14 @@ def quantize_blockwise(
771771
qabsmax, state2 = quantize_blockwise(_absmax, blocksize=blocksize, nested=False)
772772
quant_state = QuantState(
773773
absmax=qabsmax,
774-
code=code,
774+
code=code.to(A.device, copy=True),
775775
blocksize=blocksize,
776776
dtype=A.dtype,
777777
offset=offset,
778778
state2=state2,
779779
)
780780
else:
781-
quant_state = QuantState(absmax=_absmax, code=code.to(A.device), blocksize=blocksize, dtype=A.dtype)
781+
quant_state = QuantState(absmax=_absmax, code=code.to(A.device, copy=True), blocksize=blocksize, dtype=A.dtype)
782782

783783
# TODO(matthewdouglas): Deprecate out kwarg
784784
out = out.copy_(_out) if out is not None else _out

bitsandbytes/optim/optimizer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -303,9 +303,9 @@ def get_config(self, gindex, pindex, group):
303303
config["eps"] = group["eps"]
304304
config["weight_decay"] = group["weight_decay"]
305305
config["lr"] = group["lr"]
306-
config["alpha"] = group.get("alpha")
307-
config["t_alpha"] = group.get("t_alpha")
308-
config["t_beta3"] = group.get("t_beta3")
306+
config["alpha"] = group.get("alpha", 0.0)
307+
config["t_alpha"] = group.get("t_alpha", 0)
308+
config["t_beta3"] = group.get("t_beta3", 0)
309309
config["optim_bits"] = self.args.optim_bits
310310
config["min_8bit_size"] = self.args.min_8bit_size
311311
config["percentile_clipping"] = self.args.percentile_clipping
@@ -530,7 +530,7 @@ def update_step(self, group, p, gindex, pindex):
530530
state["state2"],
531531
config["betas"][1],
532532
config["betas"][2] if len(config["betas"]) >= 3 else 0.0,
533-
config["alpha"],
533+
config.get("alpha", 0.0),
534534
config["weight_decay"],
535535
gnorm_scale,
536536
state["unorm_vec"] if config["max_unorm"] > 0.0 else None,
@@ -575,7 +575,7 @@ def update_step(self, group, p, gindex, pindex):
575575
config["betas"][0],
576576
config["betas"][1],
577577
config["betas"][2] if len(config["betas"]) >= 3 else 0.0,
578-
config["alpha"],
578+
config.get("alpha", 0.0),
579579
config["eps"],
580580
step,
581581
config["lr"],

0 commit comments

Comments
 (0)