Skip to content

Commit c56f200

Browse files
committed
up
1 parent d91aa18 commit c56f200

File tree

8 files changed

+21
-7
lines changed

8 files changed

+21
-7
lines changed

src/diffusers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,8 +389,10 @@
389389
"FluxModularPipeline",
390390
"QwenImageAutoBlocks",
391391
"QwenImageEditAutoBlocks",
392+
"QwenImageEditPlusAutoBlocks",
392393
"QwenImageEditModularPipeline",
393394
"QwenImageModularPipeline",
395+
"QwenImageEditPlusModularPipeline",
394396
"StableDiffusionXLAutoBlocks",
395397
"StableDiffusionXLModularPipeline",
396398
"WanAutoBlocks",
@@ -1051,8 +1053,10 @@
10511053
FluxModularPipeline,
10521054
QwenImageAutoBlocks,
10531055
QwenImageEditAutoBlocks,
1056+
QwenImageEditPlusAutoBlocks,
10541057
QwenImageEditModularPipeline,
10551058
QwenImageModularPipeline,
1059+
QwenImageEditPlusModularPipeline,
10561060
StableDiffusionXLAutoBlocks,
10571061
StableDiffusionXLModularPipeline,
10581062
WanAutoBlocks,

src/diffusers/models/transformers/transformer_qwenimage.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def apply_rotary_emb_qwen(
134134

135135
return out
136136
else:
137+
print(f"{x.shape=}, {freqs_cis.shape=}")
137138
x_rotated = torch.view_as_complex(x.float().reshape(*x.shape[:-1], -1, 2))
138139
freqs_cis = freqs_cis.unsqueeze(1)
139140
x_out = torch.view_as_real(x_rotated * freqs_cis).flatten(3)

src/diffusers/modular_pipelines/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
"QwenImageModularPipeline",
5353
"QwenImageEditModularPipeline",
5454
"QwenImageEditAutoBlocks",
55+
"QwenImageEditPlusModularPipeline",
56+
"QwenImageEditPlusAutoBlocks",
5557
]
5658
_import_structure["components_manager"] = ["ComponentsManager"]
5759

@@ -79,6 +81,8 @@
7981
QwenImageEditAutoBlocks,
8082
QwenImageEditModularPipeline,
8183
QwenImageModularPipeline,
84+
QwenImageEditPlusModularPipeline,
85+
QwenImageEditPlusAutoBlocks,
8286
)
8387
from .stable_diffusion_xl import StableDiffusionXLAutoBlocks, StableDiffusionXLModularPipeline
8488
from .wan import WanAutoBlocks, WanModularPipeline

src/diffusers/modular_pipelines/modular_pipeline.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
("flux", "FluxModularPipeline"),
6060
("qwenimage", "QwenImageModularPipeline"),
6161
("qwenimage-edit", "QwenImageEditModularPipeline"),
62+
("qwenimage-edit-plus", "QwenImageEditPlusModularPipeline")
6263
]
6364
)
6465

@@ -1628,7 +1629,8 @@ def from_pretrained(
16281629
blocks = ModularPipelineBlocks.from_pretrained(
16291630
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
16301631
)
1631-
except EnvironmentError:
1632+
except EnvironmentError as e:
1633+
logger.debug(f"EnvironmentError: {e}")
16321634
blocks = None
16331635

16341636
cache_dir = kwargs.pop("cache_dir", None)

src/diffusers/modular_pipelines/qwenimage/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
"QwenImageEditAutoBlocks",
3939
"QwenImageEditPlusAutoBlocks",
4040
]
41-
_import_structure["modular_pipeline"] = ["QwenImageEditModularPipeline", "QwenImageModularPipeline"]
41+
_import_structure["modular_pipeline"] = ["QwenImageEditModularPipeline", "QwenImageModularPipeline", "QwenImageEditPlusModularPipeline"]
4242

4343
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
4444
try:

src/diffusers/modular_pipelines/qwenimage/denoise.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ def __call__(self, components: QwenImageModularPipeline, block_state: BlockState
343343
cond_kwargs = {k: v for k, v in cond_kwargs.items() if k in guider_input_fields}
344344

345345
# YiYi TODO: add cache context
346+
print(f"{block_state.img_shapes=}")
346347
guider_state_batch.noise_pred = components.transformer(
347348
hidden_states=block_state.latent_model_input,
348349
timestep=block_state.timestep / 1000,

src/diffusers/modular_pipelines/qwenimage/encoders.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def get_qwen_prompt_embeds_edit_plus(
132132
text_encoder,
133133
processor,
134134
prompt: Union[str, List[str]] = None,
135-
image: Optional[Union[torch.Tensor, List[PIL.Image.Image], [PIL.Image.Image]]] = None,
135+
image: Optional[Union[torch.Tensor, List[PIL.Image.Image], PIL.Image.Image]] = None,
136136
prompt_template_encode: str = "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
137137
img_template_encode: str = "Picture {}: <|vision_start|><|image_pad|><|vision_end|>",
138138
prompt_template_encode_start_idx: int = 64,
@@ -371,7 +371,7 @@ def intermediate_outputs(self) -> List[OutputParam]:
371371
),
372372
OutputParam(
373373
name=self._resized_image_vae_output_name,
374-
type_hint=List[PIL.Image.Image],
374+
type_hint=torch.Tensor,
375375
description="The resized images to be used by the VAE encoder.",
376376
),
377377
OutputParam(
@@ -409,8 +409,8 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
409409
)
410410
vae_width, vae_height, _ = calculate_dimensions(self.vae_image_size, image_width / image_height)
411411
vae_image_sizes.append((vae_width, vae_height))
412-
condition_images.append(self.image_processor.resize(img, condition_height, condition_width))
413-
vae_images.append(self.image_processor.preprocess(img, vae_height, vae_width).unsqueeze(2))
412+
condition_images.append(components.image_resize_processor.resize(img, condition_height, condition_width))
413+
vae_images.append(components.image_resize_processor.preprocess(img, vae_height, vae_width).unsqueeze(2))
414414

415415
setattr(block_state, self._resized_image_output_name, condition_images)
416416
setattr(block_state, self._resized_image_vae_output_name, vae_images)
@@ -718,7 +718,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
718718

719719
if components.requires_unconditional_embeds:
720720
negative_prompt = block_state.negative_prompt or " "
721-
block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
721+
block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit_plus(
722722
components.text_encoder,
723723
components.processor,
724724
prompt=negative_prompt,

src/diffusers/pipelines/auto_pipeline.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
QwenImageImg2ImgPipeline,
9999
QwenImageInpaintPipeline,
100100
QwenImagePipeline,
101+
QwenImageEditPlusPipeline,
101102
)
102103
from .sana import SanaPipeline
103104
from .stable_cascade import StableCascadeCombinedPipeline, StableCascadeDecoderPipeline
@@ -186,6 +187,7 @@
186187
("flux-kontext", FluxKontextPipeline),
187188
("qwenimage", QwenImageImg2ImgPipeline),
188189
("qwenimage-edit", QwenImageEditPipeline),
190+
("qwenimage-edit-plus", QwenImageEditPlusPipeline),
189191
]
190192
)
191193

0 commit comments

Comments
 (0)