Skip to content

Commit 6b850d5

Browse files
feat: Inpaint & Outpaint Improvements (#4408)
## What type of PR is this? (check all applicable) - [x] Feature - [x] Optimization ## Have you discussed this change with the InvokeAI team? - [x] Yes ## Description # Coherence Mode A new parameter called Coherence Mode has been added to Coherence Pass settings. This parameter controls what kind of Coherence Pass is done after Inpainting and Outpainting. - Unmasked: This performs a complete unmasked image to image pass on the entire generation. - Mask: This performs a masked image to image pass using your input mask as the coherence mask. - Mask Edge [DEFAULT] - This performs as masked image to image pass on the edges of your mask to try and clear out the seams. # Why The Coherence Masked Modes? One of the issues with unmasked coherence pass arises when the diffusion process is trying to align detailed or organic objects. Because Image to Image tends change the image a little bit even at lower strengths, this ends up in the paste back process being slightly misaligned. By providing the mask to the Coherence Pass, we can try to eliminate this in those cases. While it will be impossible to address this for every image out there, having these options will allow the user to automate a lot of this. For everything else there's manual paint over with inpaint. # Graph Improvements The graphs have now been refined quite a bit. We no longer do manual blurring of the masks anymore for outpainting. This is no longer needed because we now dilate the mask depending on the blur size while pasting back. As a result we got rid of quite a few nodes that were handling this in the older graph. The graphs are also a lot cleaner now because we now tackle Scaled Dimensions & Coherence Mode completely independently. Inpainting result seem very promising especially with the Mask Edge mode. --- # New Infill Methods [Experimental] We are currently trying out various new infill methods to see which ones might perform the best in outpainting. We may keep all of them or keep none. This will be decided as we test more. ## LaMa Infill - Renabled LaMA infill in the UI. - We are trying to get this to work without a memory overhead. In order to use LaMa, you need to manually download and place the LaMa JIT model in `models/core/misc/lama/lama.pt`. You can download the JIT model from Sanster [here](https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt) and rename it to `lama.pt` or you can use the script in the original LaMA repo to convert the base model to a JIT model yourself. ## CV2 Infill - Added a new infilling method using CV2's Inpaint. ## Patchmatch Rescaling Patchmatch infill input image is now downscaled and infilled. Patchmatch can be really slow at large resolutions and this is a pretty decent way to get around that. Additionally, downscaling might also provide a better patch match by avoiding larger areas to be infilled with repeating patches. But that's just the theory. Still testing it out. ## [optional] Are there any post deployment tasks we need to perform? - If we decide to keep LaMA infill, then we will need to host the model and update the installer to download it as a core model.
2 parents 52bd2bb + 3f3e0ab commit 6b850d5

34 files changed

+965
-213
lines changed

invokeai/app/api/routers/app_info.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
import typing
22
from enum import Enum
3+
from pathlib import Path
4+
35
from fastapi import Body
46
from fastapi.routing import APIRouter
5-
from pathlib import Path
67
from pydantic import BaseModel, Field
78

9+
from invokeai.app.invocations.upscale import ESRGAN_MODELS
10+
from invokeai.backend.image_util.invisible_watermark import InvisibleWatermark
811
from invokeai.backend.image_util.patchmatch import PatchMatch
912
from invokeai.backend.image_util.safety_checker import SafetyChecker
10-
from invokeai.backend.image_util.invisible_watermark import InvisibleWatermark
11-
from invokeai.app.invocations.upscale import ESRGAN_MODELS
12-
13+
from invokeai.backend.util.logging import logging
1314
from invokeai.version import __version__
1415

1516
from ..dependencies import ApiDependencies
16-
from invokeai.backend.util.logging import logging
1717

1818

1919
class LogLevel(int, Enum):
@@ -55,7 +55,7 @@ async def get_version() -> AppVersion:
5555

5656
@app_router.get("/config", operation_id="get_config", status_code=200, response_model=AppConfig)
5757
async def get_config() -> AppConfig:
58-
infill_methods = ["tile", "lama"]
58+
infill_methods = ["tile", "lama", "cv2"]
5959
if PatchMatch.patchmatch_available():
6060
infill_methods.append("patchmatch")
6161

invokeai/app/invocations/image.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ class MaskEdgeInvocation(BaseInvocation):
563563
)
564564

565565
def invoke(self, context: InvocationContext) -> ImageOutput:
566-
mask = context.services.images.get_pil_image(self.image.image_name)
566+
mask = context.services.images.get_pil_image(self.image.image_name).convert("L")
567567

568568
npimg = numpy.asarray(mask, dtype=numpy.uint8)
569569
npgradient = numpy.uint8(255 * (1.0 - numpy.floor(numpy.abs(0.5 - numpy.float32(npimg) / 255.0) * 2.0)))
@@ -700,8 +700,13 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
700700
# Blur the mask out (into init image) by specified amount
701701
if self.mask_blur_radius > 0:
702702
nm = numpy.asarray(pil_init_mask, dtype=numpy.uint8)
703+
inverted_nm = 255 - nm
704+
dilation_size = int(round(self.mask_blur_radius) + 20)
705+
dilating_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilation_size, dilation_size))
706+
inverted_dilated_nm = cv2.dilate(inverted_nm, dilating_kernel)
707+
dilated_nm = 255 - inverted_dilated_nm
703708
nmd = cv2.erode(
704-
nm,
709+
dilated_nm,
705710
kernel=numpy.ones((3, 3), dtype=numpy.uint8),
706711
iterations=int(self.mask_blur_radius / 2),
707712
)

invokeai/app/invocations/infill.py

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,17 @@
88

99
from invokeai.app.invocations.primitives import ColorField, ImageField, ImageOutput
1010
from invokeai.app.util.misc import SEED_MAX, get_random_seed
11+
from invokeai.backend.image_util.cv2_inpaint import cv2_inpaint
1112
from invokeai.backend.image_util.lama import LaMA
1213
from invokeai.backend.image_util.patchmatch import PatchMatch
1314

1415
from ..models.image import ImageCategory, ResourceOrigin
1516
from .baseinvocation import BaseInvocation, InputField, InvocationContext, invocation
17+
from .image import PIL_RESAMPLING_MAP, PIL_RESAMPLING_MODES
1618

1719

1820
def infill_methods() -> list[str]:
19-
methods = [
20-
"tile",
21-
"solid",
22-
"lama",
23-
]
21+
methods = ["tile", "solid", "lama", "cv2"]
2422
if PatchMatch.patchmatch_available():
2523
methods.insert(0, "patchmatch")
2624
return methods
@@ -49,6 +47,10 @@ def infill_patchmatch(im: Image.Image) -> Image.Image:
4947
return im_patched
5048

5149

50+
def infill_cv2(im: Image.Image) -> Image.Image:
51+
return cv2_inpaint(im)
52+
53+
5254
def get_tile_images(image: np.ndarray, width=8, height=8):
5355
_nrows, _ncols, depth = image.shape
5456
_strides = image.strides
@@ -194,15 +196,35 @@ class InfillPatchMatchInvocation(BaseInvocation):
194196
"""Infills transparent areas of an image using the PatchMatch algorithm"""
195197

196198
image: ImageField = InputField(description="The image to infill")
199+
downscale: float = InputField(default=2.0, gt=0, description="Run patchmatch on downscaled image to speedup infill")
200+
resample_mode: PIL_RESAMPLING_MODES = InputField(default="bicubic", description="The resampling mode")
197201

198202
def invoke(self, context: InvocationContext) -> ImageOutput:
199-
image = context.services.images.get_pil_image(self.image.image_name)
203+
image = context.services.images.get_pil_image(self.image.image_name).convert("RGBA")
204+
205+
resample_mode = PIL_RESAMPLING_MAP[self.resample_mode]
206+
207+
infill_image = image.copy()
208+
width = int(image.width / self.downscale)
209+
height = int(image.height / self.downscale)
210+
infill_image = infill_image.resize(
211+
(width, height),
212+
resample=resample_mode,
213+
)
200214

201215
if PatchMatch.patchmatch_available():
202-
infilled = infill_patchmatch(image.copy())
216+
infilled = infill_patchmatch(infill_image)
203217
else:
204218
raise ValueError("PatchMatch is not available on this system")
205219

220+
infilled = infilled.resize(
221+
(image.width, image.height),
222+
resample=resample_mode,
223+
)
224+
225+
infilled.paste(image, (0, 0), mask=image.split()[-1])
226+
# image.paste(infilled, (0, 0), mask=image.split()[-1])
227+
206228
image_dto = context.services.images.create(
207229
image=infilled,
208230
image_origin=ResourceOrigin.INTERNAL,
@@ -245,3 +267,30 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
245267
width=image_dto.width,
246268
height=image_dto.height,
247269
)
270+
271+
272+
@invocation("infill_cv2", title="CV2 Infill", tags=["image", "inpaint"], category="inpaint")
273+
class CV2InfillInvocation(BaseInvocation):
274+
"""Infills transparent areas of an image using OpenCV Inpainting"""
275+
276+
image: ImageField = InputField(description="The image to infill")
277+
278+
def invoke(self, context: InvocationContext) -> ImageOutput:
279+
image = context.services.images.get_pil_image(self.image.image_name)
280+
281+
infilled = infill_cv2(image.copy())
282+
283+
image_dto = context.services.images.create(
284+
image=infilled,
285+
image_origin=ResourceOrigin.INTERNAL,
286+
image_category=ImageCategory.GENERAL,
287+
node_id=self.id,
288+
session_id=context.graph_execution_state_id,
289+
is_intermediate=self.is_intermediate,
290+
)
291+
292+
return ImageOutput(
293+
image=ImageField(image_name=image_dto.image_name),
294+
width=image_dto.width,
295+
height=image_dto.height,
296+
)
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import cv2
2+
import numpy as np
3+
from PIL import Image
4+
5+
6+
def cv2_inpaint(image: Image.Image) -> Image.Image:
7+
# Prepare Image
8+
image_array = np.array(image.convert("RGB"))
9+
image_cv = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
10+
11+
# Prepare Mask From Alpha Channel
12+
mask = image.split()[3].convert("RGB")
13+
mask_array = np.array(mask)
14+
mask_cv = cv2.cvtColor(mask_array, cv2.COLOR_BGR2GRAY)
15+
mask_inv = cv2.bitwise_not(mask_cv)
16+
17+
# Inpaint Image
18+
inpainted_result = cv2.inpaint(image_cv, mask_inv, 3, cv2.INPAINT_TELEA)
19+
inpainted_image = Image.fromarray(cv2.cvtColor(inpainted_result, cv2.COLOR_BGR2RGB))
20+
return inpainted_image

invokeai/backend/image_util/lama.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import torch
66
from PIL import Image
77

8+
import invokeai.backend.util.logging as logger
89
from invokeai.app.services.config import get_invokeai_config
910
from invokeai.backend.util.devices import choose_torch_device
1011

@@ -19,7 +20,7 @@ def norm_img(np_img):
1920

2021
def load_jit_model(url_or_path, device):
2122
model_path = url_or_path
22-
print(f"Loading model from: {model_path}")
23+
logger.info(f"Loading model from: {model_path}")
2324
model = torch.jit.load(model_path, map_location="cpu").to(device)
2425
model.eval()
2526
return model
@@ -52,5 +53,6 @@ def __call__(self, input_image: Image.Image, *args: Any, **kwds: Any) -> Any:
5253

5354
del model
5455
gc.collect()
56+
torch.cuda.empty_cache()
5557

5658
return infilled_image

invokeai/backend/install/invokeai_configure.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,9 +290,20 @@ def download_realesrgan():
290290
download_with_progress_bar(model["url"], config.models_path / model["dest"], model["description"])
291291

292292

293+
# ---------------------------------------------
294+
def download_lama():
295+
logger.info("Installing lama infill model")
296+
download_with_progress_bar(
297+
"https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
298+
config.models_path / "core/misc/lama/lama.pt",
299+
"lama infill model",
300+
)
301+
302+
293303
# ---------------------------------------------
294304
def download_support_models():
295305
download_realesrgan()
306+
download_lama()
296307
download_conversion_models()
297308

298309

invokeai/frontend/web/public/locales/en.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,7 @@
511511
"maskBlur": "Blur",
512512
"maskBlurMethod": "Blur Method",
513513
"coherencePassHeader": "Coherence Pass",
514+
"coherenceMode": "Mode",
514515
"coherenceSteps": "Steps",
515516
"coherenceStrength": "Strength",
516517
"seamLowThreshold": "Low",
@@ -520,6 +521,7 @@
520521
"scaledHeight": "Scaled H",
521522
"infillMethod": "Infill Method",
522523
"tileSize": "Tile Size",
524+
"patchmatchDownScaleSize": "Downscale",
523525
"boundingBoxHeader": "Bounding Box",
524526
"seamCorrectionHeader": "Seam Correction",
525527
"infillScalingHeader": "Infill and Scaling",

invokeai/frontend/web/src/features/canvas/components/IAICanvasToolbar/IAICanvasToolChooserOptions.tsx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,11 @@ const IAICanvasToolChooserOptions = () => {
118118
useHotkeys(
119119
['BracketLeft'],
120120
() => {
121-
dispatch(setBrushSize(Math.max(brushSize - 5, 5)));
121+
if (brushSize - 5 <= 5) {
122+
dispatch(setBrushSize(Math.max(brushSize - 1, 1)));
123+
} else {
124+
dispatch(setBrushSize(Math.max(brushSize - 5, 1)));
125+
}
122126
},
123127
{
124128
enabled: () => !isStaging,

invokeai/frontend/web/src/features/nodes/util/graphBuilders/addSDXLRefinerToGraph.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ import {
1010
CANVAS_OUTPUT,
1111
INPAINT_IMAGE_RESIZE_UP,
1212
LATENTS_TO_IMAGE,
13-
MASK_BLUR,
13+
MASK_COMBINE,
14+
MASK_RESIZE_UP,
1415
METADATA_ACCUMULATOR,
1516
SDXL_CANVAS_IMAGE_TO_IMAGE_GRAPH,
1617
SDXL_CANVAS_INPAINT_GRAPH,
@@ -46,6 +47,8 @@ export const addSDXLRefinerToGraph = (
4647
const { seamlessXAxis, seamlessYAxis, vaePrecision } = state.generation;
4748
const { boundingBoxScaleMethod } = state.canvas;
4849

50+
const fp32 = vaePrecision === 'fp32';
51+
4952
const isUsingScaledDimensions = ['auto', 'manual'].includes(
5053
boundingBoxScaleMethod
5154
);
@@ -231,7 +234,7 @@ export const addSDXLRefinerToGraph = (
231234
type: 'create_denoise_mask',
232235
id: SDXL_REFINER_INPAINT_CREATE_MASK,
233236
is_intermediate: true,
234-
fp32: vaePrecision === 'fp32' ? true : false,
237+
fp32,
235238
};
236239

237240
if (isUsingScaledDimensions) {
@@ -257,7 +260,7 @@ export const addSDXLRefinerToGraph = (
257260
graph.edges.push(
258261
{
259262
source: {
260-
node_id: MASK_BLUR,
263+
node_id: isUsingScaledDimensions ? MASK_RESIZE_UP : MASK_COMBINE,
261264
field: 'image',
262265
},
263266
destination: {

invokeai/frontend/web/src/features/nodes/util/graphBuilders/addVAEToGraph.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { RootState } from 'app/store/store';
22
import { NonNullableGraph } from 'features/nodes/types/types';
33
import { MetadataAccumulatorInvocation } from 'services/api/types';
44
import {
5+
CANVAS_COHERENCE_INPAINT_CREATE_MASK,
56
CANVAS_IMAGE_TO_IMAGE_GRAPH,
67
CANVAS_INPAINT_GRAPH,
78
CANVAS_OUTPAINT_GRAPH,
@@ -31,7 +32,7 @@ export const addVAEToGraph = (
3132
graph: NonNullableGraph,
3233
modelLoaderNodeId: string = MAIN_MODEL_LOADER
3334
): void => {
34-
const { vae } = state.generation;
35+
const { vae, canvasCoherenceMode } = state.generation;
3536
const { boundingBoxScaleMethod } = state.canvas;
3637
const { shouldUseSDXLRefiner } = state.sdxl;
3738

@@ -146,6 +147,20 @@ export const addVAEToGraph = (
146147
},
147148
}
148149
);
150+
151+
// Handle Coherence Mode
152+
if (canvasCoherenceMode !== 'unmasked') {
153+
graph.edges.push({
154+
source: {
155+
node_id: isAutoVae ? modelLoaderNodeId : VAE_LOADER,
156+
field: isAutoVae && isOnnxModel ? 'vae_decoder' : 'vae',
157+
},
158+
destination: {
159+
node_id: CANVAS_COHERENCE_INPAINT_CREATE_MASK,
160+
field: 'vae',
161+
},
162+
});
163+
}
149164
}
150165

151166
if (shouldUseSDXLRefiner) {

0 commit comments

Comments
 (0)