Merge branch 'main' into xla_sana

SahilCarterr · web-flow · commit 7a14bd5d8914 · 2024-12-24T22:07:44.000+05:30
diff --git a/examples/flux-control/train_control_flux.py b/examples/flux-control/train_control_flux.py
@@ -795,7 +795,7 @@ def main(args):
         flux_transformer.x_embedder = new_linear
 
     assert torch.all(flux_transformer.x_embedder.weight[:, initial_input_channels:].data == 0)
-    flux_transformer.register_to_config(in_channels=initial_input_channels * 2)
+    flux_transformer.register_to_config(in_channels=initial_input_channels * 2, out_channels=initial_input_channels)
 
     def unwrap_model(model):
         model = accelerator.unwrap_model(model)
@@ -1166,6 +1166,11 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
             flux_transformer.to(torch.float32)
         flux_transformer.save_pretrained(args.output_dir)
 
+        del flux_transformer
+        del text_encoding_pipeline
+        del vae
+        free_memory()
+
         # Run a final round of validation.
         image_logs = None
         if args.validation_prompt is not None:
diff --git a/examples/flux-control/train_control_lora_flux.py b/examples/flux-control/train_control_lora_flux.py
@@ -830,7 +830,7 @@ def main(args):
         flux_transformer.x_embedder = new_linear
 
     assert torch.all(flux_transformer.x_embedder.weight[:, initial_input_channels:].data == 0)
-    flux_transformer.register_to_config(in_channels=initial_input_channels * 2)
+    flux_transformer.register_to_config(in_channels=initial_input_channels * 2, out_channels=initial_input_channels)
 
     if args.train_norm_layers:
         for name, param in flux_transformer.named_parameters():
@@ -1319,6 +1319,11 @@ def get_sigmas(timesteps, n_dim=4, dtype=torch.float32):
             transformer_lora_layers=transformer_lora_layers,
         )
 
+        del flux_transformer
+        del text_encoding_pipeline
+        del vae
+        free_memory()
+
         # Run a final round of validation.
         image_logs = None
         if args.validation_prompt is not None:
diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
@@ -4839,6 +4839,8 @@ def __call__(
                 )
             else:
                 for index, (mask, scale, ip_state) in enumerate(zip(ip_adapter_masks, self.scale, ip_hidden_states)):
+                    if mask is None:
+                        continue
                     if not isinstance(mask, torch.Tensor) or mask.ndim != 4:
                         raise ValueError(
                             "Each element of the ip_adapter_masks array should be a tensor with shape "
@@ -5056,6 +5058,8 @@ def __call__(
                 )
             else:
                 for index, (mask, scale, ip_state) in enumerate(zip(ip_adapter_masks, self.scale, ip_hidden_states)):
+                    if mask is None:
+                        continue
                     if not isinstance(mask, torch.Tensor) or mask.ndim != 4:
                         raise ValueError(
                             "Each element of the ip_adapter_masks array should be a tensor with shape "
diff --git a/tests/single_file/single_file_testing_utils.py b/tests/single_file/single_file_testing_utils.py
@@ -378,14 +378,14 @@ def test_single_file_components_with_diffusers_config_local_files_only(
     def test_single_file_format_inference_is_same_as_pretrained(self, expected_max_diff=1e-4):
         sf_pipe = self.pipeline_class.from_single_file(self.ckpt_path, torch_dtype=torch.float16, safety_checker=None)
         sf_pipe.unet.set_default_attn_processor()
-        sf_pipe.enable_model_cpu_offload()
+        sf_pipe.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs(torch_device)
         image_single_file = sf_pipe(**inputs).images[0]
 
         pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16, safety_checker=None)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs(torch_device)
         image = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_controlnet_img2img_single_file.py b/tests/single_file/test_stable_diffusion_controlnet_img2img_single_file.py
@@ -76,14 +76,14 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
         controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
         pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         pipe_sf = self.pipeline_class.from_single_file(
             self.ckpt_path,
             controlnet=controlnet,
         )
         pipe_sf.unet.set_default_attn_processor()
-        pipe_sf.enable_model_cpu_offload()
+        pipe_sf.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs(torch_device)
         output = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_controlnet_inpaint_single_file.py b/tests/single_file/test_stable_diffusion_controlnet_inpaint_single_file.py
@@ -73,11 +73,11 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
         controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
         pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet, safety_checker=None)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         pipe_sf = self.pipeline_class.from_single_file(self.ckpt_path, controlnet=controlnet, safety_checker=None)
         pipe_sf.unet.set_default_attn_processor()
-        pipe_sf.enable_model_cpu_offload()
+        pipe_sf.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs()
         output = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_controlnet_single_file.py b/tests/single_file/test_stable_diffusion_controlnet_single_file.py
@@ -67,14 +67,14 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
         controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny")
         pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         pipe_sf = self.pipeline_class.from_single_file(
             self.ckpt_path,
             controlnet=controlnet,
         )
         pipe_sf.unet.set_default_attn_processor()
-        pipe_sf.enable_model_cpu_offload()
+        pipe_sf.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs()
         output = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_upscale_single_file.py b/tests/single_file/test_stable_diffusion_upscale_single_file.py
@@ -49,14 +49,14 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
 
         prompt = "a cat sitting on a park bench"
         pipe = StableDiffusionUpscalePipeline.from_pretrained(self.repo_id)
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         generator = torch.Generator("cpu").manual_seed(0)
         output = pipe(prompt=prompt, image=image, generator=generator, output_type="np", num_inference_steps=3)
         image_from_pretrained = output.images[0]
 
         pipe_from_single_file = StableDiffusionUpscalePipeline.from_single_file(self.ckpt_path)
-        pipe_from_single_file.enable_model_cpu_offload()
+        pipe_from_single_file.enable_model_cpu_offload(device=torch_device)
 
         generator = torch.Generator("cpu").manual_seed(0)
         output_from_single_file = pipe_from_single_file(
diff --git a/tests/single_file/test_stable_diffusion_xl_adapter_single_file.py b/tests/single_file/test_stable_diffusion_xl_adapter_single_file.py
@@ -76,7 +76,7 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
             torch_dtype=torch.float16,
             safety_checker=None,
         )
-        pipe_single_file.enable_model_cpu_offload()
+        pipe_single_file.enable_model_cpu_offload(device=torch_device)
         pipe_single_file.set_progress_bar_config(disable=None)
 
         inputs = self.get_inputs()
@@ -88,7 +88,7 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
             torch_dtype=torch.float16,
             safety_checker=None,
         )
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs()
         images = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_xl_controlnet_single_file.py b/tests/single_file/test_stable_diffusion_xl_controlnet_single_file.py
@@ -69,15 +69,15 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
             self.ckpt_path, controlnet=controlnet, torch_dtype=torch.float16
         )
         pipe_single_file.unet.set_default_attn_processor()
-        pipe_single_file.enable_model_cpu_offload()
+        pipe_single_file.enable_model_cpu_offload(device=torch_device)
         pipe_single_file.set_progress_bar_config(disable=None)
 
         inputs = self.get_inputs(torch_device)
         single_file_images = pipe_single_file(**inputs).images[0]
 
         pipe = self.pipeline_class.from_pretrained(self.repo_id, controlnet=controlnet, torch_dtype=torch.float16)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         inputs = self.get_inputs(torch_device)
         images = pipe(**inputs).images[0]
diff --git a/tests/single_file/test_stable_diffusion_xl_img2img_single_file.py b/tests/single_file/test_stable_diffusion_xl_img2img_single_file.py
@@ -85,7 +85,7 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
         pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16)
         pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
         pipe.unet.set_default_attn_processor()
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
         image = pipe(
@@ -95,7 +95,7 @@ def test_single_file_format_inference_is_same_as_pretrained(self):
         pipe_single_file = self.pipeline_class.from_single_file(self.ckpt_path, torch_dtype=torch.float16)
         pipe_single_file.scheduler = DDIMScheduler.from_config(pipe_single_file.scheduler.config)
         pipe_single_file.unet.set_default_attn_processor()
-        pipe_single_file.enable_model_cpu_offload()
+        pipe_single_file.enable_model_cpu_offload(device=torch_device)
 
         generator = torch.Generator(device="cpu").manual_seed(0)
         image_single_file = pipe_single_file(