Skip to content

Commit 5b84e11

Browse files
Suggested changes
Co-Authored-By: Ryan Dick <[email protected]>
1 parent 6af659b commit 5b84e11

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

invokeai/backend/stable_diffusion/extensions/t2i_adapter.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,6 @@ def setup(self, ctx: DenoiseContext):
6262
image=self._image,
6363
latents_height=latents_height,
6464
latents_width=latents_width,
65-
max_unet_downscale=self._max_unet_downscale,
66-
resize_mode=self._resize_mode,
6765
)
6866

6967
def _run_model(
@@ -72,21 +70,28 @@ def _run_model(
7270
image: Image,
7371
latents_height: int,
7472
latents_width: int,
75-
max_unet_downscale: int,
76-
resize_mode: CONTROLNET_RESIZE_VALUES,
7773
):
78-
input_height = latents_height // max_unet_downscale * model.total_downscale_factor
79-
input_width = latents_width // max_unet_downscale * model.total_downscale_factor
80-
74+
# Resize the T2I-Adapter input image.
75+
# We select the resize dimensions so that after the T2I-Adapter's total_downscale_factor is applied, the
76+
# result will match the latent image's dimensions after max_unet_downscale is applied.
77+
input_height = latents_height // self._max_unet_downscale * model.total_downscale_factor
78+
input_width = latents_width // self._max_unet_downscale * model.total_downscale_factor
79+
80+
# Note: We have hard-coded `do_classifier_free_guidance=False`. This is because we only want to prepare
81+
# a single image. If CFG is enabled, we will duplicate the resultant tensor after applying the
82+
# T2I-Adapter model.
83+
#
84+
# Note: We re-use the `prepare_control_image(...)` from ControlNet for T2I-Adapter, because it has many
85+
# of the same requirements (e.g. preserving binary masks during resize).
8186
t2i_image = prepare_control_image(
8287
image=image,
8388
do_classifier_free_guidance=False,
8489
width=input_width,
8590
height=input_height,
86-
num_channels=model.config["in_channels"], # mypy treats this as a FrozenDict
91+
num_channels=model.config["in_channels"],
8792
device=model.device,
8893
dtype=model.dtype,
89-
resize_mode=resize_mode,
94+
resize_mode=self._resize_mode,
9095
)
9196

9297
return model(t2i_image)

0 commit comments

Comments
 (0)