Merge branch 'main' into docs/add-pruna-to-diffusers-optimization

davidberenstein1957 · web-flow · commit 2f05b6ea42ca · 2025-06-11T06:46:32.000+02:00
diff --git a/src/diffusers/loaders/lora_pipeline.py b/src/diffusers/loaders/lora_pipeline.py
@@ -81,12 +81,17 @@ def _maybe_dequantize_weight_for_expanded_lora(model, module):
         from ..quantizers.gguf.utils import dequantize_gguf_tensor
 
     is_bnb_4bit_quantized = module.weight.__class__.__name__ == "Params4bit"
+    is_bnb_8bit_quantized = module.weight.__class__.__name__ == "Int8Params"
     is_gguf_quantized = module.weight.__class__.__name__ == "GGUFParameter"
 
     if is_bnb_4bit_quantized and not is_bitsandbytes_available():
         raise ValueError(
             "The checkpoint seems to have been quantized with `bitsandbytes` (4bits). Install `bitsandbytes` to load quantized checkpoints."
         )
+    if is_bnb_8bit_quantized and not is_bitsandbytes_available():
+        raise ValueError(
+            "The checkpoint seems to have been quantized with `bitsandbytes` (8bits). Install `bitsandbytes` to load quantized checkpoints."
+        )
     if is_gguf_quantized and not is_gguf_available():
         raise ValueError(
             "The checkpoint seems to have been quantized with `gguf`. Install `gguf` to load quantized checkpoints."
@@ -97,10 +102,10 @@ def _maybe_dequantize_weight_for_expanded_lora(model, module):
         weight_on_cpu = True
 
     device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
-    if is_bnb_4bit_quantized:
+    if is_bnb_4bit_quantized or is_bnb_8bit_quantized:
         module_weight = dequantize_bnb_weight(
             module.weight.to(device) if weight_on_cpu else module.weight,
-            state=module.weight.quant_state,
+            state=module.weight.quant_state if is_bnb_4bit_quantized else module.state,
             dtype=model.dtype,
         ).data
     elif is_gguf_quantized:
diff --git a/src/diffusers/utils/dynamic_modules_utils.py b/src/diffusers/utils/dynamic_modules_utils.py
@@ -154,12 +154,30 @@ def check_imports(filename):
     return get_relative_imports(filename)
 
 
-def get_class_in_module(class_name, module_path):
+def get_class_in_module(class_name, module_path, pretrained_model_name_or_path=None):
     """
     Import a module on the cache directory for modules and extract a class from it.
     """
     module_path = module_path.replace(os.path.sep, ".")
-    module = importlib.import_module(module_path)
+    try:
+        module = importlib.import_module(module_path)
+    except ModuleNotFoundError as e:
+        # This can happen when the repo id contains ".", which Python's import machinery interprets as a directory
+        # separator. We do a bit of monkey patching to detect and fix this case.
+        if not (
+            pretrained_model_name_or_path is not None
+            and "." in pretrained_model_name_or_path
+            and module_path.startswith("diffusers_modules")
+            and pretrained_model_name_or_path.replace("/", "--") in module_path
+        ):
+            raise e  # We can't figure this one out, just reraise the original error
+
+        corrected_path = os.path.join(HF_MODULES_CACHE, module_path.replace(".", "/")) + ".py"
+        corrected_path = corrected_path.replace(
+            pretrained_model_name_or_path.replace("/", "--").replace(".", "/"),
+            pretrained_model_name_or_path.replace("/", "--"),
+        )
+        module = importlib.machinery.SourceFileLoader(module_path, corrected_path).load_module()
 
     if class_name is None:
         return find_pipeline_class(module)
@@ -454,4 +472,4 @@ def get_class_from_dynamic_module(
         revision=revision,
         local_files_only=local_files_only,
     )
-    return get_class_in_module(class_name, final_module.replace(".py", ""))
+    return get_class_in_module(class_name, final_module.replace(".py", ""), pretrained_model_name_or_path)
diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py
@@ -1105,6 +1105,21 @@ def test_remote_auto_custom_pipe(self):
 
         assert images.shape == (1, 64, 64, 3)
 
+    def test_remote_custom_pipe_with_dot_in_name(self):
+        # make sure that trust remote code has to be passed
+        with self.assertRaises(ValueError):
+            pipeline = DiffusionPipeline.from_pretrained("akasharidas/ddpm-cifar10-32-dot.in.name")
+
+        pipeline = DiffusionPipeline.from_pretrained("akasharidas/ddpm-cifar10-32-dot.in.name", trust_remote_code=True)
+
+        assert pipeline.__class__.__name__ == "CustomPipeline"
+
+        pipeline = pipeline.to(torch_device)
+        images, output_str = pipeline(num_inference_steps=2, output_type="np")
+
+        assert images[0].shape == (1, 32, 32, 3)
+        assert output_str == "This is a test"
+
     def test_local_custom_pipeline_repo(self):
         local_custom_pipeline_path = get_tests_dir("fixtures/custom_pipeline")
         pipeline = DiffusionPipeline.from_pretrained(
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
@@ -19,15 +19,18 @@
 import numpy as np
 import pytest
 from huggingface_hub import hf_hub_download
+from PIL import Image
 
 from diffusers import (
     BitsAndBytesConfig,
     DiffusionPipeline,
+    FluxControlPipeline,
     FluxTransformer2DModel,
     SanaTransformer2DModel,
     SD3Transformer2DModel,
     logging,
 )
+from diffusers.quantizers import PipelineQuantizationConfig
 from diffusers.utils import is_accelerate_version
 from diffusers.utils.testing_utils import (
     CaptureLogger,
@@ -39,6 +42,7 @@
     numpy_cosine_similarity_distance,
     require_accelerate,
     require_bitsandbytes_version_greater,
+    require_peft_backend,
     require_peft_version_greater,
     require_torch,
     require_torch_accelerator,
@@ -697,6 +701,50 @@ def test_lora_loading(self):
         self.assertTrue(max_diff < 1e-3)
 
 
+@require_transformers_version_greater("4.44.0")
+@require_peft_backend
+class SlowBnb4BitFluxControlWithLoraTests(Base8bitTests):
+    def setUp(self) -> None:
+        gc.collect()
+        backend_empty_cache(torch_device)
+
+        self.pipeline_8bit = FluxControlPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-dev",
+            quantization_config=PipelineQuantizationConfig(
+                quant_backend="bitsandbytes_8bit",
+                quant_kwargs={"load_in_8bit": True},
+                components_to_quantize=["transformer", "text_encoder_2"],
+            ),
+            torch_dtype=torch.float16,
+        )
+        self.pipeline_8bit.enable_model_cpu_offload()
+
+    def tearDown(self):
+        del self.pipeline_8bit
+
+        gc.collect()
+        backend_empty_cache(torch_device)
+
+    def test_lora_loading(self):
+        self.pipeline_8bit.load_lora_weights("black-forest-labs/FLUX.1-Canny-dev-lora")
+
+        output = self.pipeline_8bit(
+            prompt=self.prompt,
+            control_image=Image.new(mode="RGB", size=(256, 256)),
+            height=256,
+            width=256,
+            max_sequence_length=64,
+            output_type="np",
+            num_inference_steps=8,
+            generator=torch.Generator().manual_seed(42),
+        ).images
+        out_slice = output[0, -3:, -3:, -1].flatten()
+        expected_slice = np.array([0.2029, 0.2136, 0.2268, 0.1921, 0.1997, 0.2185, 0.2021, 0.2183, 0.2292])
+
+        max_diff = numpy_cosine_similarity_distance(expected_slice, out_slice)
+        self.assertTrue(max_diff < 1e-3, msg=f"{out_slice=} != {expected_slice=}")
+
+
 @slow
 class BaseBnb8bitSerializationTests(Base8bitTests):
     def setUp(self):