Fix compatibility for bitsandbytes>=0.46 (#20956)

matthewdouglas · Borda · web-flow · commit 73f1ab855857 · 2025-07-02T11:07:57.000+02:00
* Fix compatibility for bitsandbytes&gt;=0.46
* --upgrade-strategy=eager

---------

Co-authored-by: Jirka B &lt;j.borovec+github@gmail.com&gt;
diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml
@@ -130,7 +130,7 @@ jobs:
       - bash: |
           set -e
           extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))")
-          pip install -e ".[${extra}dev]" -U --extra-index-url="${TORCH_URL}"
+          pip install -e ".[${extra}dev]" -U --upgrade-strategy=eager --extra-index-url="${TORCH_URL}"
         displayName: "Install package & dependencies"
 
       - bash: |
diff --git a/docs/source-pytorch/common/precision_intermediate.rst b/docs/source-pytorch/common/precision_intermediate.rst
@@ -165,7 +165,7 @@ Under the hood, we use `transformer_engine.pytorch.fp8_autocast <https://docs.nv
 Quantization via Bitsandbytes
 *****************************
 
-`bitsandbytes <https://github.com/TimDettmers/bitsandbytes>`__ (BNB) is a library that supports quantizing :class:`torch.nn.Linear` weights.
+`bitsandbytes <https://github.com/bitsandbytes-foundation/bitsandbytes>`__ (BNB) is a library that supports quantizing :class:`torch.nn.Linear` weights.
 
 Both 4-bit (`paper reference <https://arxiv.org/abs/2305.14314v1>`__) and 8-bit (`paper reference <https://arxiv.org/abs/2110.02861>`__) quantization is supported.
 Specifically, we support the following modes:
diff --git a/requirements/fabric/strategies.txt b/requirements/fabric/strategies.txt
@@ -6,5 +6,4 @@
 # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods`
 #  shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372
 deepspeed >=0.9.3, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin"  # strict
-# skip bitsandbytes==0.46, due to ValueError: too many values to unpack (expected 2)
-bitsandbytes >=0.45.2,!=0.46,<0.47.0; platform_system != "Darwin"
+bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin"
diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt
@@ -8,5 +8,4 @@ hydra-core >=1.2.0, <1.4.0
 jsonargparse[signatures,jsonnet] >=4.39.0, <4.41.0
 rich >=12.3.0, <14.1.0
 tensorboardX >=2.2, <2.7.0  # min version is set by torch.onnx missing attribute
-# skip bitsandbytes==0.46, due to ValueError: too many values to unpack (expected 2)
-bitsandbytes >=0.45.2,!=0.46,<0.47.0; platform_system != "Darwin"
+bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin"
diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py
@@ -256,10 +256,12 @@ def quantize(
             if int8params.has_fp16_weights:
                 int8params.data = B
             else:
-                if hasattr(bnb.functional, "double_quant"):
+                # bitsandbytes >= 0.45 supports an improved API
+                if hasattr(bnb.functional, "int8_vectorwise_quant"):
+                    CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B)
+                else:  # old method is deprecated in 0.45, removed in 0.46+.
                     CB, _, SCB, _, _ = bnb.functional.double_quant(B)
-                else:  # for bitsandbytes versions ≥0.46
-                    CB, SCB = bnb.functional.int8_double_quant(B)
+
                 int8params.data = CB
                 setattr(int8params, "CB", CB)
                 setattr(int8params, "SCB", SCB)