diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index fe7d1349a6e9b..1adf9bfff67f4 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -130,7 +130,7 @@ jobs: - bash: | set -e extra=$(python -c "print({'lightning': 'fabric-'}.get('$(PACKAGE_NAME)', ''))") - pip install -e ".[${extra}dev]" -U --extra-index-url="${TORCH_URL}" + pip install -e ".[${extra}dev]" -U --upgrade-strategy=eager --extra-index-url="${TORCH_URL}" displayName: "Install package & dependencies" - bash: | diff --git a/docs/source-pytorch/common/precision_intermediate.rst b/docs/source-pytorch/common/precision_intermediate.rst index eff5805497c2d..70f86b1c09bb2 100644 --- a/docs/source-pytorch/common/precision_intermediate.rst +++ b/docs/source-pytorch/common/precision_intermediate.rst @@ -165,7 +165,7 @@ Under the hood, we use `transformer_engine.pytorch.fp8_autocast `__ (BNB) is a library that supports quantizing :class:`torch.nn.Linear` weights. +`bitsandbytes `__ (BNB) is a library that supports quantizing :class:`torch.nn.Linear` weights. Both 4-bit (`paper reference `__) and 8-bit (`paper reference `__) quantization is supported. Specifically, we support the following modes: diff --git a/requirements/fabric/strategies.txt b/requirements/fabric/strategies.txt index 031870edb775c..bea30b37fa5f8 100644 --- a/requirements/fabric/strategies.txt +++ b/requirements/fabric/strategies.txt @@ -6,5 +6,4 @@ # note: is a bug around 0.10 with `MPS_Accelerator must implement all abstract methods` # shall be resolved by https://github.com/microsoft/DeepSpeed/issues/4372 deepspeed >=0.9.3, <=0.9.3; platform_system != "Windows" and platform_system != "Darwin" # strict -# skip bitsandbytes==0.46, due to ValueError: too many values to unpack (expected 2) -bitsandbytes >=0.45.2,!=0.46,<0.47.0; platform_system != "Darwin" +bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin" diff --git a/requirements/pytorch/extra.txt b/requirements/pytorch/extra.txt index 862df2800b0cd..0f11b19c23431 100644 --- a/requirements/pytorch/extra.txt +++ b/requirements/pytorch/extra.txt @@ -8,5 +8,4 @@ hydra-core >=1.2.0, <1.4.0 jsonargparse[signatures,jsonnet] >=4.39.0, <4.41.0 rich >=12.3.0, <14.1.0 tensorboardX >=2.2, <2.7.0 # min version is set by torch.onnx missing attribute -# skip bitsandbytes==0.46, due to ValueError: too many values to unpack (expected 2) -bitsandbytes >=0.45.2,!=0.46,<0.47.0; platform_system != "Darwin" +bitsandbytes >=0.45.2,<0.47.0; platform_system != "Darwin" diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index 8a71a25bb914f..4c648f2b97181 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -256,10 +256,12 @@ def quantize( if int8params.has_fp16_weights: int8params.data = B else: - if hasattr(bnb.functional, "double_quant"): + # bitsandbytes >= 0.45 supports an improved API + if hasattr(bnb.functional, "int8_vectorwise_quant"): + CB, SCB, _ = bnb.functional.int8_vectorwise_quant(B) + else: # old method is deprecated in 0.45, removed in 0.46+. CB, _, SCB, _, _ = bnb.functional.double_quant(B) - else: # for bitsandbytes versions ≥0.46 - CB, SCB = bnb.functional.int8_double_quant(B) + int8params.data = CB setattr(int8params, "CB", CB) setattr(int8params, "SCB", SCB)