Use non-inverted mask generally(except inpaint model handling)

StAlKeR7779 · StAlKeR7779 · commit 19c00241c6d5 · 2024-07-24T00:59:13.000+03:00
diff --git a/invokeai/app/invocations/denoise_latents.py b/invokeai/app/invocations/denoise_latents.py
@@ -674,7 +674,7 @@ def prep_inpaint_mask(
         else:
             masked_latents = torch.where(mask < 0.5, 0.0, latents)
 
-        return 1 - mask, masked_latents, self.denoise_mask.gradient
+        return mask, masked_latents, self.denoise_mask.gradient
 
     @staticmethod
     def prepare_noise_and_latents(
@@ -830,6 +830,8 @@ def _old_invoke(self, context: InvocationContext) -> LatentsOutput:
         seed, noise, latents = self.prepare_noise_and_latents(context, self.noise, self.latents)
 
         mask, masked_latents, gradient_mask = self.prep_inpaint_mask(context, latents)
+        if mask is not None:
+            mask = 1 - mask
 
         # TODO(ryand): I have hard-coded `do_classifier_free_guidance=True` to mirror the behaviour of ControlNets,
         # below. Investigate whether this is appropriate.
diff --git a/invokeai/backend/stable_diffusion/extensions/inpaint.py b/invokeai/backend/stable_diffusion/extensions/inpaint.py
@@ -25,7 +25,7 @@ def __init__(
         """Initialize InpaintExt.
         Args:
             mask (torch.Tensor): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
-                expected to be in the range [0, 1]. A value of 0 means that the corresponding 'pixel' should not be
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
                 inpainted.
             is_gradient_mask (bool): If True, mask is interpreted as a gradient mask meaning that the mask values range
                 from 0 to 1. If False, mask is interpreted as binary mask meaning that the mask values are either 0 or
@@ -65,10 +65,10 @@ def _apply_mask(self, ctx: DenoiseContext, latents: torch.Tensor, t: torch.Tenso
         mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
         if self._is_gradient_mask:
             threshold = (t.item()) / ctx.scheduler.config.num_train_timesteps
-            mask_bool = mask > threshold
+            mask_bool = mask < 1 - threshold
             masked_input = torch.where(mask_bool, latents, mask_latents)
         else:
-            masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype))
+            masked_input = torch.lerp(latents, mask_latents.to(dtype=latents.dtype), mask.to(dtype=latents.dtype))
         return masked_input
 
     @callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
@@ -111,6 +111,6 @@ def apply_mask_to_step_output(self, ctx: DenoiseContext):
     @callback(ExtensionCallbackType.POST_DENOISE_LOOP)
     def restore_unmasked(self, ctx: DenoiseContext):
         if self._is_gradient_mask:
-            ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
+            ctx.latents = torch.where(self._mask < 1, ctx.latents, ctx.inputs.orig_latents)
         else:
-            ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)
+            ctx.latents = torch.lerp(ctx.latents, ctx.inputs.orig_latents, self._mask)
diff --git a/invokeai/backend/stable_diffusion/extensions/inpaint_model.py b/invokeai/backend/stable_diffusion/extensions/inpaint_model.py
@@ -25,7 +25,7 @@ def __init__(
         """Initialize InpaintModelExt.
         Args:
             mask (Optional[torch.Tensor]): The inpainting mask. Shape: (1, 1, latent_height, latent_width). Values are
-                expected to be in the range [0, 1]. A value of 0 means that the corresponding 'pixel' should not be
+                expected to be in the range [0, 1]. A value of 1 means that the corresponding 'pixel' should not be
                 inpainted.
             masked_latents (Optional[torch.Tensor]): Latents of initial image, with masked out by black color inpainted area.
                 If mask provided, then too should be provided. Shape: (1, 1, latent_height, latent_width)
@@ -37,7 +37,10 @@ def __init__(
         if mask is not None and masked_latents is None:
             raise ValueError("Source image required for inpaint mask when inpaint model used!")
 
-        self._mask = mask
+        # Inverse mask, because inpaint models treat mask as: 0 - remain same, 1 - inpaint
+        self._mask = None
+        if mask is not None:
+            self._mask = 1 - mask
         self._masked_latents = masked_latents
         self._is_gradient_mask = is_gradient_mask