changes

hlky · hlky · commit bc912fc46e31 · 2025-03-07T10:39:24.000Z
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
@@ -18,11 +18,11 @@
 import torch
 from huggingface_hub.utils import validate_hf_hub_args
 
-from ..quantizers.bitsandbytes import dequantize_bnb_weight
 from ..utils import (
     USE_PEFT_BACKEND,
     deprecate,
     get_submodule_by_name,
+    is_bitsandbytes_available,
     is_peft_available,
     is_peft_version,
     is_torch_version,
@@ -48,6 +48,9 @@
 )
 
 
+if is_bitsandbytes_available():
+    from ..quantizers.bitsandbytes import dequantize_bnb_weight
+
 _LOW_CPU_MEM_USAGE_DEFAULT_LORA = False
 if is_torch_version(">=", "1.9.0"):
     if (
@@ -1971,11 +1974,13 @@ def _maybe_expand_transformer_param_shape_or_error_(
         is_peft_loaded = getattr(transformer, "peft_config", None) is not None
         for name, module in transformer.named_modules():
             if isinstance(module, torch.nn.Linear):
-                module_weight = (
-                    dequantize_bnb_weight(module.weight, state=module.weight.quant_state).data
-                    if module.weight.__class__.__name__ == "Params4bit"
-                    else module.weight.data
-                )
+                is_quantized = module.weight.__class__.__name__ == "Params4bit"
+                if is_quantized and not is_bitsandbytes_available():
+                    raise ValueError("Install `bitsandbytes` to load quantized checkpoints.")
+                elif is_quantized:
+                    module_weight = dequantize_bnb_weight(module.weight, state=module.weight.quant_state).data
+                else:
+                    module_weight = module.weight.data
                 module_bias = module.bias.data if module.bias is not None else None
                 bias = module_bias is not None
 
@@ -1997,8 +2002,6 @@ def _maybe_expand_transformer_param_shape_or_error_(
                 if tuple(module_weight_shape) == (out_features, in_features):
                     continue
 
-                # TODO (sayakpaul): We still need to consider if the module we're expanding is
-                # quantized and handle it accordingly if that is the case.
                 module_out_features, module_in_features = module_weight_shape
                 debug_message = ""
                 if in_features > module_in_features:
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
@@ -681,6 +681,44 @@ def test_lora_loading(self):
         self.assertTrue(max_diff < 1e-3)
 
 
+@require_transformers_version_greater("4.44.0")
+class SlowBnb4BitFluxWithLoraTests(Base4bitTests):
+    def setUp(self) -> None:
+        gc.collect()
+        torch.cuda.empty_cache()
+
+        self.pipeline_4bit = DiffusionPipeline.from_pretrained(
+            "eramth/flux-4bit",
+            torch_dtype=torch.float16,
+        )
+        self.pipeline_4bit.enable_model_cpu_offload()
+
+    def tearDown(self):
+        del self.pipeline_4bit
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def test_lora_loading(self):
+        self.pipeline_4bit.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
+
+        output = self.pipeline_4bit(
+            prompt=self.prompt,
+            height=256,
+            width=256,
+            max_sequence_length=64,
+            output_type="np",
+            num_inference_steps=8,
+            generator=torch.Generator().manual_seed(42),
+        ).images
+        out_slice = output[0, -3:, -3:, -1].flatten()
+        # TODO: update slice
+        expected_slice = np.array([0.5347, 0.5342, 0.5283, 0.5093, 0.4988, 0.5093, 0.5044, 0.5015, 0.4946])
+
+        max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
+        self.assertTrue(max_diff < 1e-3, msg=f"{out_slice=} != {expected_slice=}")
+
+
 @slow
 class BaseBnb4BitSerializationTests(Base4bitTests):
     def tearDown(self):