Skip to content
Merged
11 changes: 10 additions & 1 deletion invokeai/app/invocations/denoise_latents.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from invokeai.app.util.controlnet_utils import prepare_control_image
from invokeai.backend.ip_adapter.ip_adapter import IPAdapter
from invokeai.backend.lora import LoRAModelRaw
from invokeai.backend.model_manager import BaseModelType
from invokeai.backend.model_manager import BaseModelType, ModelVariantType
from invokeai.backend.model_patcher import ModelPatcher
from invokeai.backend.stable_diffusion import PipelineIntermediateState, set_seamless
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext, DenoiseInputs
Expand All @@ -58,6 +58,8 @@
from invokeai.backend.stable_diffusion.diffusion.custom_atttention import CustomAttnProcessor2_0
from invokeai.backend.stable_diffusion.diffusion_backend import StableDiffusionBackend
from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.inpaint import InpaintExt
from invokeai.backend.stable_diffusion.extensions.inpaint_model import InpaintModelExt
from invokeai.backend.stable_diffusion.extensions.preview import PreviewExt
from invokeai.backend.stable_diffusion.extensions_manager import ExtensionsManager
from invokeai.backend.stable_diffusion.schedulers import SCHEDULER_MAP
Expand Down Expand Up @@ -790,6 +792,13 @@ def step_callback(state: PipelineIntermediateState) -> None:

ext_manager.add_extension(PreviewExt(step_callback))

### inpaint
mask, masked_latents, is_gradient_mask = self.prep_inpaint_mask(context, latents)
if unet_config.variant == ModelVariantType.Inpaint:
ext_manager.add_extension(InpaintModelExt(mask, masked_latents, is_gradient_mask))
elif mask is not None:
ext_manager.add_extension(InpaintExt(mask, is_gradient_mask))

# ext: t2i/ip adapter
ext_manager.run_callback(ExtensionCallbackType.SETUP, denoise_ctx)

Expand Down
92 changes: 92 additions & 0 deletions invokeai/backend/stable_diffusion/extensions/inpaint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional

import einops
import torch
from diffusers import UNet2DConditionModel

from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback

if TYPE_CHECKING:
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext


class InpaintExt(ExtensionBase):
def __init__(
self,
mask: torch.Tensor,
is_gradient_mask: bool,
):
super().__init__()
self._mask = mask
self._is_gradient_mask = is_gradient_mask
self._noise: Optional[torch.Tensor] = None

@staticmethod
def _is_normal_model(unet: UNet2DConditionModel):
return unet.conv_in.in_channels == 4

def _apply_mask(self, ctx: DenoiseContext, latents: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
batch_size = latents.size(0)
mask = einops.repeat(self._mask, "b c h w -> (repeat b) c h w", repeat=batch_size)
if t.dim() == 0:
# some schedulers expect t to be one-dimensional.
# TODO: file diffusers bug about inconsistency?
t = einops.repeat(t, "-> batch", batch=batch_size)
# Noise shouldn't be re-randomized between steps here. The multistep schedulers
# get very confused about what is happening from step to step when we do that.
mask_latents = ctx.scheduler.add_noise(ctx.inputs.orig_latents, self._noise, t)
# TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already?
# mask_latents = self.scheduler.scale_model_input(mask_latents, t)
mask_latents = einops.repeat(mask_latents, "b c h w -> (repeat b) c h w", repeat=batch_size)
if self._is_gradient_mask:
threshhold = (t.item()) / ctx.scheduler.config.num_train_timesteps
mask_bool = mask > threshhold # I don't know when mask got inverted, but it did
masked_input = torch.where(mask_bool, latents, mask_latents)
else:
masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype))
return masked_input

@callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
def init_tensors(self, ctx: DenoiseContext):
if not self._is_normal_model(ctx.unet):
raise Exception("InpaintExt should be used only on normal models!")

self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)

self._noise = ctx.inputs.noise
if self._noise is None:
self._noise = torch.randn(
ctx.latents.shape,
dtype=torch.float32,
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(ctx.seed),
).to(device=ctx.latents.device, dtype=ctx.latents.dtype)

# TODO: order value
@callback(ExtensionCallbackType.PRE_STEP, order=-100)
def apply_mask_to_initial_latents(self, ctx: DenoiseContext):
ctx.latents = self._apply_mask(ctx, ctx.latents, ctx.timestep)

# TODO: order value
# TODO: redo this with preview events rewrite
@callback(ExtensionCallbackType.POST_STEP, order=-100)
def apply_mask_to_step_output(self, ctx: DenoiseContext):
timestep = ctx.scheduler.timesteps[-1]
if hasattr(ctx.step_output, "denoised"):
ctx.step_output.denoised = self._apply_mask(ctx, ctx.step_output.denoised, timestep)
elif hasattr(ctx.step_output, "pred_original_sample"):
ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.pred_original_sample, timestep)
else:
ctx.step_output.pred_original_sample = self._apply_mask(ctx, ctx.step_output.prev_sample, timestep)

# TODO: should here be used order?
# restore unmasked part after the last step is completed
@callback(ExtensionCallbackType.POST_DENOISE_LOOP)
def restore_unmasked(self, ctx: DenoiseContext):
if self._is_gradient_mask:
ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
else:
ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)
66 changes: 66 additions & 0 deletions invokeai/backend/stable_diffusion/extensions/inpaint_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Optional

import torch
from diffusers import UNet2DConditionModel

from invokeai.backend.stable_diffusion.extension_callback_type import ExtensionCallbackType
from invokeai.backend.stable_diffusion.extensions.base import ExtensionBase, callback

if TYPE_CHECKING:
from invokeai.backend.stable_diffusion.denoise_context import DenoiseContext


class InpaintModelExt(ExtensionBase):
def __init__(
self,
mask: Optional[torch.Tensor],
masked_latents: Optional[torch.Tensor],
is_gradient_mask: bool,
):
super().__init__()
if mask is not None and masked_latents is None:
raise ValueError("Source image required for inpaint mask when inpaint model used!")

self._mask = mask
self._masked_latents = masked_latents
self._is_gradient_mask = is_gradient_mask

@staticmethod
def _is_inpaint_model(unet: UNet2DConditionModel):
return unet.conv_in.in_channels == 9

@callback(ExtensionCallbackType.PRE_DENOISE_LOOP)
def init_tensors(self, ctx: DenoiseContext):
if not self._is_inpaint_model(ctx.unet):
raise Exception("InpaintModelExt should be used only on inpaint models!")

if self._mask is None:
self._mask = torch.ones_like(ctx.latents[:1, :1])
self._mask = self._mask.to(device=ctx.latents.device, dtype=ctx.latents.dtype)

if self._masked_latents is None:
self._masked_latents = torch.zeros_like(ctx.latents[:1])
self._masked_latents = self._masked_latents.to(device=ctx.latents.device, dtype=ctx.latents.dtype)

# TODO: any ideas about order value?
# do last so that other extensions works with normal latents
@callback(ExtensionCallbackType.PRE_UNET, order=1000)
def append_inpaint_layers(self, ctx: DenoiseContext):
batch_size = ctx.unet_kwargs.sample.shape[0]
b_mask = torch.cat([self._mask] * batch_size)
b_masked_latents = torch.cat([self._masked_latents] * batch_size)
ctx.unet_kwargs.sample = torch.cat(
[ctx.unet_kwargs.sample, b_mask, b_masked_latents],
dim=1,
)

# TODO: should here be used order?
# restore unmasked part as inpaint model can change unmasked part slightly
@callback(ExtensionCallbackType.POST_DENOISE_LOOP)
def restore_unmasked(self, ctx: DenoiseContext):
if self._is_gradient_mask:
ctx.latents = torch.where(self._mask > 0, ctx.latents, ctx.inputs.orig_latents)
else:
ctx.latents = torch.lerp(ctx.inputs.orig_latents, ctx.latents, self._mask)