Merge branch 'main' into torchao-quantizer

a-r-r-o-w · a-r-r-o-w · commit f9f15356293b · 2024-12-05T08:10:20.000+01:00
diff --git a/docs/source/en/using-diffusers/loading_adapters.md b/docs/source/en/using-diffusers/loading_adapters.md
@@ -134,14 +134,16 @@ The [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] method loads L
 - the LoRA weights don't have separate identifiers for the UNet and text encoder
 - the LoRA weights have separate identifiers for the UNet and text encoder
 
-But if you only need to load LoRA weights into the UNet, then you can use the [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`] method. Let's load the [jbilcke-hf/sdxl-cinematic-1](https://huggingface.co/jbilcke-hf/sdxl-cinematic-1) LoRA:
+To directly load (and save) a LoRA adapter at the *model-level*, use [`~PeftAdapterMixin.load_lora_adapter`], which builds and prepares the necessary model configuration for the adapter. Like [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`], [`PeftAdapterMixin.load_lora_adapter`] can load LoRAs for both the UNet and text encoder. For example, if you're loading a LoRA for the UNet, [`PeftAdapterMixin.load_lora_adapter`] ignores the keys for the text encoder.
+
+Use the `weight_name` parameter to specify the specific weight file and the `prefix` parameter to filter for the appropriate state dicts (`"unet"` in this case) to load.
 
 ```py
 from diffusers import AutoPipelineForText2Image
 import torch
 
 pipeline = AutoPipelineForText2Image.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
-pipeline.unet.load_attn_procs("jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors")
+pipeline.unet.load_lora_adapter("jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", prefix="unet")
 
 # use cnmt in the prompt to trigger the LoRA
 prompt = "A cute cnmt eating a slice of pizza, stunning color scheme, masterpiece, illustration"
@@ -153,6 +155,8 @@ image
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/load_attn_proc.png" />
 </div>
 
+Save an adapter with [`~PeftAdapterMixin.save_lora_adapter`].
+
 To unload the LoRA weights, use the [`~loaders.StableDiffusionLoraLoaderMixin.unload_lora_weights`] method to discard the LoRA weights and restore the model to its original weights:
 
 ```py
diff --git a/src/diffusers/models/model_loading_utils.py b/src/diffusers/models/model_loading_utils.py
@@ -175,7 +175,10 @@ def load_model_dict_into_meta(
     hf_quantizer=None,
     keep_in_fp32_modules=None,
 ) -> List[str]:
-    device = device or torch.device("cpu")
+    if device is not None and not isinstance(device, (str, torch.device)):
+        raise ValueError(f"Expected device to have type `str` or `torch.device`, but got {type(device)=}.")
+    if hf_quantizer is None:
+        device = device or torch.device("cpu")
     dtype = dtype or torch.float32
     is_quantized = hf_quantizer is not None
 
diff --git a/src/diffusers/models/modeling_utils.py b/src/diffusers/models/modeling_utils.py
@@ -829,7 +829,7 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
                         param_device = "cpu"
                     # TODO (sayakpaul,  SunMarc): remove this after model loading refactor
                     else:
-                        param_device = torch.cuda.current_device()
+                        param_device = torch.device(torch.cuda.current_device())
                     state_dict = load_state_dict(model_file, variant=variant)
                     model._convert_deprecated_attention_blocks(state_dict)
 
diff --git a/tests/lora/test_lora_layers_cogvideox.py b/tests/lora/test_lora_layers_cogvideox.py
@@ -129,7 +129,7 @@ def get_dummy_inputs(self, with_generator=True):
 
     @skip_mps
     @pytest.mark.xfail(
-        condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
+        condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
         reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",
         strict=True,
     )
diff --git a/tests/lora/test_lora_layers_mochi.py b/tests/lora/test_lora_layers_mochi.py
@@ -108,7 +108,7 @@ def get_dummy_inputs(self, with_generator=True):
         return noise, input_ids, pipeline_inputs
 
     @pytest.mark.xfail(
-        condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
+        condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
         reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",
         strict=True,
     )
diff --git a/tests/lora/utils.py b/tests/lora/utils.py
@@ -1513,7 +1513,7 @@ def test_simple_inference_with_text_denoiser_multi_adapter_weighted(self):
 
     @skip_mps
     @pytest.mark.xfail(
-        condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
+        condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),
         reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",
         strict=True,
     )

Original file line number	Diff line number	Diff line change
`@@ -129,7 +129,7 @@ def get_dummy_inputs(self, with_generator=True):`
`129`	`129`
`130`	`130`	`@skip_mps`
`131`	`131`	`@pytest.mark.xfail(`
`132`		`- condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
	`132`	`+ condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
`133`	`133`	`reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",`
`134`	`134`	`strict=True,`
`135`	`135`	`)`
Original file line number	Diff line number	Diff line change
`@@ -108,7 +108,7 @@ def get_dummy_inputs(self, with_generator=True):`
`108`	`108`	`return noise, input_ids, pipeline_inputs`
`109`	`109`
`110`	`110`	`@pytest.mark.xfail(`
`111`		`- condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
	`111`	`+ condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
`112`	`112`	`reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",`
`113`	`113`	`strict=True,`
`114`	`114`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1513,7 +1513,7 @@ def test_simple_inference_with_text_denoiser_multi_adapter_weighted(self):`
`1513`	`1513`
`1514`	`1514`	`@skip_mps`
`1515`	`1515`	`@pytest.mark.xfail(`
`1516`		`- condtion=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
	`1516`	`+ condition=torch.device(torch_device).type == "cpu" and is_torch_version(">=", "2.5"),`
`1517`	`1517`	`reason="Test currently fails on CPU and PyTorch 2.5.1 but not on PyTorch 2.4.1.",`
`1518`	`1518`	`strict=True,`
`1519`	`1519`	`)`