added copied from and removed unnecessary tests

Darshil Jariwala · Darshil Jariwala · commit 41f96127ed94 · 2024-10-03T18:12:54.000+05:30
diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py
@@ -667,7 +667,7 @@ def check_inputs(
                     raise ValueError(
                         f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D"
                     )
-
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.StableDiffusionInpaintPipeline.prepare_latents
     def prepare_latents(
         self,
         batch_size,
@@ -731,6 +731,7 @@ def prepare_latents(
 
         return outputs
     
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.StableDiffusionInpaintPipeline._encode_vae_image
     def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
         if isinstance(generator, list):
             image_latents = [
@@ -745,6 +746,7 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
 
         return image_latents
 
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.StableDiffusionInpaintPipeline.prepare_mask_latents    
     def prepare_mask_latents(
         self, mask, masked_image, batch_size, height, width, dtype, device, generator, do_classifier_free_guidance
     ):
@@ -786,6 +788,9 @@ def prepare_mask_latents(
             torch.cat([masked_image_latents] * 2) if do_classifier_free_guidance else masked_image_latents
         )
 
+# star
+
+
         # aligning device to prevent device errors when concating it with the latent model input
         masked_image_latents = masked_image_latents.to(device=device, dtype=dtype)
         return mask, masked_image_latents
@@ -996,23 +1001,8 @@ def __call__(
         height = height or self.unet.config.sample_size * self.vae_scale_factor
         width = width or self.unet.config.sample_size * self.vae_scale_factor
         # to deal with lora scaling and other possible forward hooks
-
+       
         # 1. Check inputs. Raise error if not correct
-        # prompt,
-        #     image,
-        #     mask_image,
-        #     height,
-        #     width,
-        #     strength,
-        #     callback_steps,
-        #     output_type,
-        #     negative_prompt=None,
-        #     prompt_embeds=None,
-        #     negative_prompt_embeds=None,
-        #     ip_adapter_image=None,
-        #     ip_adapter_image_embeds=None,
-        #     callback_on_step_end_tensor_inputs=None,
-        #     padding_mask_crop=None,
         self.check_inputs(
             prompt,
             image,
@@ -1066,7 +1056,7 @@ def __call__(
             clip_skip=self.clip_skip,
         )
 
-                # 4. set timesteps
+        # 4. set timesteps
         timesteps, num_inference_steps = retrieve_timesteps(
             self.scheduler, num_inference_steps, device, timesteps, sigmas
         )
@@ -1098,7 +1088,7 @@ def __call__(
         )
         init_image = init_image.to(dtype=torch.float32)
 
-                # 6. Prepare latent variables
+        # 6. Prepare latent variables
         num_channels_latents = self.vae.config.latent_channels
         num_channels_unet = self.unet.config.in_channels
         return_image_latents = num_channels_unet == 4
@@ -1171,7 +1161,7 @@ def __call__(
             raise ValueError(
                 f"The unet {self.unet.__class__} should have either 4 or 9 input channels, not {self.unet.config.in_channels}."
             )
-        # 8.1 Prepare extra step kwargs.
+        # 9 Prepare extra step kwargs.
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
 
         # For classifier free guidance, we need to do two forward passes.
@@ -1210,22 +1200,22 @@ def __call__(
 
 
         
-        # 6.1 Add image embeds for IP-Adapter
+        # 9.1 Add image embeds for IP-Adapter
         added_cond_kwargs = (
             {"image_embeds": ip_adapter_image_embeds}
             if (ip_adapter_image is not None or ip_adapter_image_embeds is not None)
             else None
         )
 
-        # 6.2 Optionally get Guidance Scale Embedding
+        # 9.2 Optionally get Guidance Scale Embedding
         timestep_cond = None
         if self.unet.config.time_cond_proj_dim is not None:
             guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
             timestep_cond = self.get_guidance_scale_embedding(
                 guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
             ).to(device=device, dtype=latents.dtype)
 
-        # 7. Denoising loop
+        # 10. Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         
         
diff --git a/tests/pipelines/pag/test_pag_sd_inpaint.py b/tests/pipelines/pag/test_pag_sd_inpaint.py
@@ -78,7 +78,7 @@ def get_dummy_components(self, time_cond_proj_dim=None):
             time_cond_proj_dim=time_cond_proj_dim,
             layers_per_block=2,
             sample_size=32,
-            in_channels=9,
+            in_channels=4,
             out_channels=4,
             down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
             up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
@@ -148,43 +148,6 @@ def get_dummy_inputs(self, device, seed=0):
         }
         return inputs
 
-
-    def test_pag_disable_enable(self):
-        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
-        components = self.get_dummy_components()
-
-        # base  pipeline (expect same output when pag is disabled)
-        pipe_sd = StableDiffusionInpaintPipeline(**components)
-        pipe_sd = pipe_sd.to(device)
-        pipe_sd.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(device)
-        del inputs["pag_scale"]
-        assert (
-            "pag_scale" not in inspect.signature(pipe_sd.__call__).parameters
-        ), f"`pag_scale` should not be a call parameter of the base pipeline {pipe_sd.__class__.__name__}."
-        out = pipe_sd(**inputs).images[0, -3:, -3:, -1]
-
-        # pag disabled with pag_scale=0.0
-        pipe_pag = self.pipeline_class(**components)
-        pipe_pag = pipe_pag.to(device)
-        pipe_pag.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(device)
-        inputs["pag_scale"] = 0.0
-        out_pag_disabled = pipe_pag(**inputs).images[0, -3:, -3:, -1]
-
-        # pag enabled
-        pipe_pag = self.pipeline_class(**components, pag_applied_layers=["mid", "up", "down"])
-        pipe_pag = pipe_pag.to(device)
-        pipe_pag.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(device)
-        out_pag_enabled = pipe_pag(**inputs).images[0, -3:, -3:, -1]
-
-        assert np.abs(out.flatten() - out_pag_disabled.flatten()).max() < 1e-3
-        assert np.abs(out.flatten() - out_pag_enabled.flatten()).max() > 1e-3
-    
     def test_pag_applied_layers(self):
         device = "cpu"  # ensure determinism for the device-dependent torch.Generator
         components = self.get_dummy_components()