Skip to content

Commit 614d0c6

Browse files
yiyixuxuyiyixuxusayakpaul
authored
remove the deprecated prepare_mask_and_masked_image function (#8512)
remove prepare mask fn Co-authored-by: yiyixuxu <yixu310@gmail,com> Co-authored-by: Sayak Paul <[email protected]>
1 parent b1a2c0d commit 614d0c6

File tree

4 files changed

+0
-892
lines changed

4 files changed

+0
-892
lines changed

src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py

Lines changed: 0 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -118,129 +118,6 @@ def retrieve_latents(
118118
raise AttributeError("Could not access latents of provided encoder_output")
119119

120120

121-
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint.prepare_mask_and_masked_image
122-
def prepare_mask_and_masked_image(image, mask, height, width, return_image=False):
123-
"""
124-
Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be
125-
converted to ``torch.Tensor`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the
126-
``image`` and ``1`` for the ``mask``.
127-
128-
The ``image`` will be converted to ``torch.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be
129-
binarized (``mask > 0.5``) and cast to ``torch.float32`` too.
130-
131-
Args:
132-
image (Union[np.array, PIL.Image, torch.Tensor]): The image to inpaint.
133-
It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` or a ``channels x height x width``
134-
``torch.Tensor`` or a ``batch x channels x height x width`` ``torch.Tensor``.
135-
mask (_type_): The mask to apply to the image, i.e. regions to inpaint.
136-
It can be a ``PIL.Image``, or a ``height x width`` ``np.array`` or a ``1 x height x width``
137-
``torch.Tensor`` or a ``batch x 1 x height x width`` ``torch.Tensor``.
138-
139-
140-
Raises:
141-
ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
142-
should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
143-
TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
144-
(ot the other way around).
145-
146-
Returns:
147-
tuple[torch.Tensor]: The pair (mask, masked_image) as ``torch.Tensor`` with 4
148-
dimensions: ``batch x channels x height x width``.
149-
"""
150-
deprecation_message = "The prepare_mask_and_masked_image method is deprecated and will be removed in a future version. Please use VaeImageProcessor.preprocess instead"
151-
deprecate(
152-
"prepare_mask_and_masked_image",
153-
"0.30.0",
154-
deprecation_message,
155-
)
156-
if image is None:
157-
raise ValueError("`image` input cannot be undefined.")
158-
159-
if mask is None:
160-
raise ValueError("`mask_image` input cannot be undefined.")
161-
162-
if isinstance(image, torch.Tensor):
163-
if not isinstance(mask, torch.Tensor):
164-
raise TypeError(f"`image` is a torch.Tensor but `mask` (type: {type(mask)} is not")
165-
166-
# Batch single image
167-
if image.ndim == 3:
168-
assert image.shape[0] == 3, "Image outside a batch should be of shape (3, H, W)"
169-
image = image.unsqueeze(0)
170-
171-
# Batch and add channel dim for single mask
172-
if mask.ndim == 2:
173-
mask = mask.unsqueeze(0).unsqueeze(0)
174-
175-
# Batch single mask or add channel dim
176-
if mask.ndim == 3:
177-
# Single batched mask, no channel dim or single mask not batched but channel dim
178-
if mask.shape[0] == 1:
179-
mask = mask.unsqueeze(0)
180-
181-
# Batched masks no channel dim
182-
else:
183-
mask = mask.unsqueeze(1)
184-
185-
assert image.ndim == 4 and mask.ndim == 4, "Image and Mask must have 4 dimensions"
186-
assert image.shape[-2:] == mask.shape[-2:], "Image and Mask must have the same spatial dimensions"
187-
assert image.shape[0] == mask.shape[0], "Image and Mask must have the same batch size"
188-
189-
# Check image is in [-1, 1]
190-
if image.min() < -1 or image.max() > 1:
191-
raise ValueError("Image should be in [-1, 1] range")
192-
193-
# Check mask is in [0, 1]
194-
if mask.min() < 0 or mask.max() > 1:
195-
raise ValueError("Mask should be in [0, 1] range")
196-
197-
# Binarize mask
198-
mask[mask < 0.5] = 0
199-
mask[mask >= 0.5] = 1
200-
201-
# Image as float32
202-
image = image.to(dtype=torch.float32)
203-
elif isinstance(mask, torch.Tensor):
204-
raise TypeError(f"`mask` is a torch.Tensor but `image` (type: {type(image)} is not")
205-
else:
206-
# preprocess image
207-
if isinstance(image, (PIL.Image.Image, np.ndarray)):
208-
image = [image]
209-
if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
210-
# resize all images w.r.t passed height an width
211-
image = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in image]
212-
image = [np.array(i.convert("RGB"))[None, :] for i in image]
213-
image = np.concatenate(image, axis=0)
214-
elif isinstance(image, list) and isinstance(image[0], np.ndarray):
215-
image = np.concatenate([i[None, :] for i in image], axis=0)
216-
217-
image = image.transpose(0, 3, 1, 2)
218-
image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
219-
220-
# preprocess mask
221-
if isinstance(mask, (PIL.Image.Image, np.ndarray)):
222-
mask = [mask]
223-
224-
if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image):
225-
mask = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in mask]
226-
mask = np.concatenate([np.array(m.convert("L"))[None, None, :] for m in mask], axis=0)
227-
mask = mask.astype(np.float32) / 255.0
228-
elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
229-
mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
230-
231-
mask[mask < 0.5] = 0
232-
mask[mask >= 0.5] = 1
233-
mask = torch.from_numpy(mask)
234-
235-
masked_image = image * (mask < 0.5)
236-
237-
# n.b. ensure backwards compatibility as old function does not return image
238-
if return_image:
239-
return mask, masked_image, image
240-
241-
return mask, masked_image
242-
243-
244121
class StableDiffusionControlNetInpaintPipeline(
245122
DiffusionPipeline,
246123
StableDiffusionMixin,

src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py

Lines changed: 0 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import inspect
1616
from typing import Any, Callable, Dict, List, Optional, Union
1717

18-
import numpy as np
1918
import PIL.Image
2019
import torch
2120
from packaging import version
@@ -38,128 +37,6 @@
3837
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
3938

4039

41-
def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool = False):
42-
"""
43-
Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be
44-
converted to ``torch.Tensor`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the
45-
``image`` and ``1`` for the ``mask``.
46-
47-
The ``image`` will be converted to ``torch.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be
48-
binarized (``mask > 0.5``) and cast to ``torch.float32`` too.
49-
50-
Args:
51-
image (Union[np.array, PIL.Image, torch.Tensor]): The image to inpaint.
52-
It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` or a ``channels x height x width``
53-
``torch.Tensor`` or a ``batch x channels x height x width`` ``torch.Tensor``.
54-
mask (_type_): The mask to apply to the image, i.e. regions to inpaint.
55-
It can be a ``PIL.Image``, or a ``height x width`` ``np.array`` or a ``1 x height x width``
56-
``torch.Tensor`` or a ``batch x 1 x height x width`` ``torch.Tensor``.
57-
58-
59-
Raises:
60-
ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
61-
should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
62-
TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
63-
(ot the other way around).
64-
65-
Returns:
66-
tuple[torch.Tensor]: The pair (mask, masked_image) as ``torch.Tensor`` with 4
67-
dimensions: ``batch x channels x height x width``.
68-
"""
69-
deprecation_message = "The prepare_mask_and_masked_image method is deprecated and will be removed in a future version. Please use VaeImageProcessor.preprocess instead"
70-
deprecate(
71-
"prepare_mask_and_masked_image",
72-
"0.30.0",
73-
deprecation_message,
74-
)
75-
if image is None:
76-
raise ValueError("`image` input cannot be undefined.")
77-
78-
if mask is None:
79-
raise ValueError("`mask_image` input cannot be undefined.")
80-
81-
if isinstance(image, torch.Tensor):
82-
if not isinstance(mask, torch.Tensor):
83-
raise TypeError(f"`image` is a torch.Tensor but `mask` (type: {type(mask)} is not")
84-
85-
# Batch single image
86-
if image.ndim == 3:
87-
assert image.shape[0] == 3, "Image outside a batch should be of shape (3, H, W)"
88-
image = image.unsqueeze(0)
89-
90-
# Batch and add channel dim for single mask
91-
if mask.ndim == 2:
92-
mask = mask.unsqueeze(0).unsqueeze(0)
93-
94-
# Batch single mask or add channel dim
95-
if mask.ndim == 3:
96-
# Single batched mask, no channel dim or single mask not batched but channel dim
97-
if mask.shape[0] == 1:
98-
mask = mask.unsqueeze(0)
99-
100-
# Batched masks no channel dim
101-
else:
102-
mask = mask.unsqueeze(1)
103-
104-
assert image.ndim == 4 and mask.ndim == 4, "Image and Mask must have 4 dimensions"
105-
assert image.shape[-2:] == mask.shape[-2:], "Image and Mask must have the same spatial dimensions"
106-
assert image.shape[0] == mask.shape[0], "Image and Mask must have the same batch size"
107-
108-
# Check image is in [-1, 1]
109-
if image.min() < -1 or image.max() > 1:
110-
raise ValueError("Image should be in [-1, 1] range")
111-
112-
# Check mask is in [0, 1]
113-
if mask.min() < 0 or mask.max() > 1:
114-
raise ValueError("Mask should be in [0, 1] range")
115-
116-
# Binarize mask
117-
mask[mask < 0.5] = 0
118-
mask[mask >= 0.5] = 1
119-
120-
# Image as float32
121-
image = image.to(dtype=torch.float32)
122-
elif isinstance(mask, torch.Tensor):
123-
raise TypeError(f"`mask` is a torch.Tensor but `image` (type: {type(image)} is not")
124-
else:
125-
# preprocess image
126-
if isinstance(image, (PIL.Image.Image, np.ndarray)):
127-
image = [image]
128-
if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
129-
# resize all images w.r.t passed height an width
130-
image = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in image]
131-
image = [np.array(i.convert("RGB"))[None, :] for i in image]
132-
image = np.concatenate(image, axis=0)
133-
elif isinstance(image, list) and isinstance(image[0], np.ndarray):
134-
image = np.concatenate([i[None, :] for i in image], axis=0)
135-
136-
image = image.transpose(0, 3, 1, 2)
137-
image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
138-
139-
# preprocess mask
140-
if isinstance(mask, (PIL.Image.Image, np.ndarray)):
141-
mask = [mask]
142-
143-
if isinstance(mask, list) and isinstance(mask[0], PIL.Image.Image):
144-
mask = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in mask]
145-
mask = np.concatenate([np.array(m.convert("L"))[None, None, :] for m in mask], axis=0)
146-
mask = mask.astype(np.float32) / 255.0
147-
elif isinstance(mask, list) and isinstance(mask[0], np.ndarray):
148-
mask = np.concatenate([m[None, None, :] for m in mask], axis=0)
149-
150-
mask[mask < 0.5] = 0
151-
mask[mask >= 0.5] = 1
152-
mask = torch.from_numpy(mask)
153-
154-
masked_image = image * (mask < 0.5)
155-
156-
# n.b. ensure backwards compatibility as old function does not return image
157-
if return_image:
158-
return mask, masked_image, image
159-
160-
return mask, masked_image
161-
162-
16340
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
16441
def retrieve_latents(
16542
encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"

src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py

Lines changed: 0 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -132,124 +132,6 @@ def mask_pil_to_torch(mask, height, width):
132132
return mask
133133

134134

135-
def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool = False):
136-
"""
137-
Prepares a pair (image, mask) to be consumed by the Stable Diffusion pipeline. This means that those inputs will be
138-
converted to ``torch.Tensor`` with shapes ``batch x channels x height x width`` where ``channels`` is ``3`` for the
139-
``image`` and ``1`` for the ``mask``.
140-
141-
The ``image`` will be converted to ``torch.float32`` and normalized to be in ``[-1, 1]``. The ``mask`` will be
142-
binarized (``mask > 0.5``) and cast to ``torch.float32`` too.
143-
144-
Args:
145-
image (Union[np.array, PIL.Image, torch.Tensor]): The image to inpaint.
146-
It can be a ``PIL.Image``, or a ``height x width x 3`` ``np.array`` or a ``channels x height x width``
147-
``torch.Tensor`` or a ``batch x channels x height x width`` ``torch.Tensor``.
148-
mask (_type_): The mask to apply to the image, i.e. regions to inpaint.
149-
It can be a ``PIL.Image``, or a ``height x width`` ``np.array`` or a ``1 x height x width``
150-
``torch.Tensor`` or a ``batch x 1 x height x width`` ``torch.Tensor``.
151-
152-
153-
Raises:
154-
ValueError: ``torch.Tensor`` images should be in the ``[-1, 1]`` range. ValueError: ``torch.Tensor`` mask
155-
should be in the ``[0, 1]`` range. ValueError: ``mask`` and ``image`` should have the same spatial dimensions.
156-
TypeError: ``mask`` is a ``torch.Tensor`` but ``image`` is not
157-
(ot the other way around).
158-
159-
Returns:
160-
tuple[torch.Tensor]: The pair (mask, masked_image) as ``torch.Tensor`` with 4
161-
dimensions: ``batch x channels x height x width``.
162-
"""
163-
164-
# checkpoint. TOD(Yiyi) - need to clean this up later
165-
deprecation_message = "The prepare_mask_and_masked_image method is deprecated and will be removed in a future version. Please use VaeImageProcessor.preprocess instead"
166-
deprecate(
167-
"prepare_mask_and_masked_image",
168-
"0.30.0",
169-
deprecation_message,
170-
)
171-
if image is None:
172-
raise ValueError("`image` input cannot be undefined.")
173-
174-
if mask is None:
175-
raise ValueError("`mask_image` input cannot be undefined.")
176-
177-
if isinstance(image, torch.Tensor):
178-
if not isinstance(mask, torch.Tensor):
179-
mask = mask_pil_to_torch(mask, height, width)
180-
181-
if image.ndim == 3:
182-
image = image.unsqueeze(0)
183-
184-
# Batch and add channel dim for single mask
185-
if mask.ndim == 2:
186-
mask = mask.unsqueeze(0).unsqueeze(0)
187-
188-
# Batch single mask or add channel dim
189-
if mask.ndim == 3:
190-
# Single batched mask, no channel dim or single mask not batched but channel dim
191-
if mask.shape[0] == 1:
192-
mask = mask.unsqueeze(0)
193-
194-
# Batched masks no channel dim
195-
else:
196-
mask = mask.unsqueeze(1)
197-
198-
assert image.ndim == 4 and mask.ndim == 4, "Image and Mask must have 4 dimensions"
199-
# assert image.shape[-2:] == mask.shape[-2:], "Image and Mask must have the same spatial dimensions"
200-
assert image.shape[0] == mask.shape[0], "Image and Mask must have the same batch size"
201-
202-
# Check image is in [-1, 1]
203-
# if image.min() < -1 or image.max() > 1:
204-
# raise ValueError("Image should be in [-1, 1] range")
205-
206-
# Check mask is in [0, 1]
207-
if mask.min() < 0 or mask.max() > 1:
208-
raise ValueError("Mask should be in [0, 1] range")
209-
210-
# Binarize mask
211-
mask[mask < 0.5] = 0
212-
mask[mask >= 0.5] = 1
213-
214-
# Image as float32
215-
image = image.to(dtype=torch.float32)
216-
elif isinstance(mask, torch.Tensor):
217-
raise TypeError(f"`mask` is a torch.Tensor but `image` (type: {type(image)} is not")
218-
else:
219-
# preprocess image
220-
if isinstance(image, (PIL.Image.Image, np.ndarray)):
221-
image = [image]
222-
if isinstance(image, list) and isinstance(image[0], PIL.Image.Image):
223-
# resize all images w.r.t passed height an width
224-
image = [i.resize((width, height), resample=PIL.Image.LANCZOS) for i in image]
225-
image = [np.array(i.convert("RGB"))[None, :] for i in image]
226-
image = np.concatenate(image, axis=0)
227-
elif isinstance(image, list) and isinstance(image[0], np.ndarray):
228-
image = np.concatenate([i[None, :] for i in image], axis=0)
229-
230-
image = image.transpose(0, 3, 1, 2)
231-
image = torch.from_numpy(image).to(dtype=torch.float32) / 127.5 - 1.0
232-
233-
mask = mask_pil_to_torch(mask, height, width)
234-
mask[mask < 0.5] = 0
235-
mask[mask >= 0.5] = 1
236-
237-
if image.shape[1] == 4:
238-
# images are in latent space and thus can't
239-
# be masked set masked_image to None
240-
# we assume that the checkpoint is not an inpainting
241-
# checkpoint. TOD(Yiyi) - need to clean this up later
242-
masked_image = None
243-
else:
244-
masked_image = image * (mask < 0.5)
245-
246-
# n.b. ensure backwards compatibility as old function does not return image
247-
if return_image:
248-
return mask, masked_image, image
249-
250-
return mask, masked_image
251-
252-
253135
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
254136
def retrieve_latents(
255137
encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"

0 commit comments

Comments
 (0)