Skip to content

Commit 1185f82

Browse files
committed
up
1 parent a9d50c8 commit 1185f82

File tree

4 files changed

+20
-10
lines changed

4 files changed

+20
-10
lines changed

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ def encode_prompt(
308308
prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
309309
prompt_embeds_mask = prompt_embeds_mask.repeat(1, num_images_per_prompt, 1)
310310
prompt_embeds_mask = prompt_embeds_mask.view(batch_size * num_images_per_prompt, seq_len)
311-
312311
return prompt_embeds, prompt_embeds_mask
313312

314313
def check_inputs(

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit_plus.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ def encode_prompt(
309309
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
310310
provided, text embeddings will be generated from `prompt` input argument.
311311
"""
312+
print(f"{image[0].size=}")
312313
device = device or self._execution_device
313314

314315
prompt = [prompt] if isinstance(prompt, str) else prompt
@@ -322,7 +323,7 @@ def encode_prompt(
322323
prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
323324
prompt_embeds_mask = prompt_embeds_mask.repeat(1, num_images_per_prompt, 1)
324325
prompt_embeds_mask = prompt_embeds_mask.view(batch_size * num_images_per_prompt, seq_len)
325-
326+
print(f"{prompt_embeds.shape=}, {prompt_embeds_mask.shape=}")
326327
return prompt_embeds, prompt_embeds_mask
327328

328329
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage_edit.QwenImageEditPipeline.check_inputs

tests/pipelines/qwenimage/test_qwenimage_edit.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,17 @@ def get_dummy_inputs(self, device, seed=0):
133133
else:
134134
generator = torch.Generator(device=device).manual_seed(seed)
135135

136+
# Even if we specify smaller dimensions for the images, it won't work because of how
137+
# the internal implementation enforces a minimal resolution of 1024x1024.
136138
inputs = {
137139
"prompt": "dance monkey",
138-
"image": Image.new("RGB", (32, 32)),
140+
"image": Image.new("RGB", (1024, 1024)),
139141
"negative_prompt": "bad quality",
140142
"generator": generator,
141143
"num_inference_steps": 2,
142144
"true_cfg_scale": 1.0,
143-
"height": 32,
144-
"width": 32,
145+
"height": 1024,
146+
"width": 1024,
145147
"max_sequence_length": 16,
146148
"output_type": "pt",
147149
}
@@ -240,5 +242,8 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
240242
def test_encode_prompt_works_in_isolation(
241243
self, extra_required_param_value_dict=None, keep_params=None, atol=1e-4, rtol=1e-4
242244
):
243-
keep_params = ["image"]
245+
# We include `image` because it's needed in both `encode_prompt` and some other subsequent calculations.
246+
# `max_sequence_length` to maintain parity between its value during all invokations of `encode_prompt`
247+
# in the following test.
248+
keep_params = ["image", "max_sequence_length"]
244249
super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, keep_params, atol, rtol)

tests/pipelines/qwenimage/test_qwenimage_edit_plus.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -134,16 +134,18 @@ def get_dummy_inputs(self, device, seed=0):
134134
else:
135135
generator = torch.Generator(device=device).manual_seed(seed)
136136

137-
image = Image.new("RGB", (32, 32))
137+
# Even if we specify smaller dimensions for the images, it won't work because of how
138+
# the internal implementation enforces a minimal resolution of 384*384.
139+
image = Image.new("RGB", (384, 384))
138140
inputs = {
139141
"prompt": "dance monkey",
140142
"image": [image, image],
141143
"negative_prompt": "bad quality",
142144
"generator": generator,
143145
"num_inference_steps": 2,
144146
"true_cfg_scale": 1.0,
145-
"height": 32,
146-
"width": 32,
147+
"height": 384,
148+
"width": 384,
147149
"max_sequence_length": 16,
148150
"output_type": "pt",
149151
}
@@ -239,7 +241,10 @@ def test_vae_tiling(self, expected_diff_max: float = 0.2):
239241
def test_encode_prompt_works_in_isolation(
240242
self, extra_required_param_value_dict=None, keep_params=None, atol=1e-4, rtol=1e-4
241243
):
242-
keep_params = ["image"]
244+
# We include `image` because it's needed in both `encode_prompt` and some other subsequent calculations.
245+
# `max_sequence_length` to maintain parity between its value during all invokations of `encode_prompt`
246+
# in the following test.
247+
keep_params = ["image", "max_sequence_length"]
243248
super().test_encode_prompt_works_in_isolation(extra_required_param_value_dict, keep_params, atol, rtol)
244249

245250
@pytest.mark.xfail(condition=True, reason="Batch of multiple images needs to be revisited", strict=True)

0 commit comments

Comments
 (0)