fix

yiyixuxu · yiyixuxu · commit 81ed52f1f55f · 2025-09-09T23:09:11.000+02:00
diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -577,9 +577,8 @@ def description(self) -> str:
     def inputs(self) -> List[InputParam]:
         return [
             InputParam(name="batch_size", required=True),
-            InputParam(
-                name="resized_image", required=True, type_hint=torch.Tensor, description="The resized image input"
-            ),
+            InputParam(name="image_height", required=True),
+            InputParam(name="image_width", required=True),
             InputParam(name="height", required=True),
             InputParam(name="width", required=True),
             InputParam(name="prompt_embeds_mask"),
@@ -612,10 +611,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
         block_state = self.get_block_state(state)
 
         # for edit, image size can be different from the target size (height/width)
-        image = (
-            block_state.resized_image[0] if isinstance(block_state.resized_image, list) else block_state.resized_image
-        )
-        image_width, image_height = image.size
+
 
         block_state.img_shapes = [
             [
@@ -624,7 +620,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
                     block_state.height // components.vae_scale_factor // 2,
                     block_state.width // components.vae_scale_factor // 2,
                 ),
-                (1, image_height // components.vae_scale_factor // 2, image_width // components.vae_scale_factor // 2),
+                (1, block_state.image_height // components.vae_scale_factor // 2, block_state.image_width // components.vae_scale_factor // 2),
             ]
         ] * block_state.batch_size
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -496,7 +496,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
         )
 
         if components.requires_unconditional_embeds:
-            negative_prompt = block_state.negative_prompt or ""
+            negative_prompt = block_state.negative_prompt or " "
             block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(
                 components.text_encoder,
                 components.processor,
diff --git a/src/diffusers/modular_pipelines/qwenimage/inputs.py b/src/diffusers/modular_pipelines/qwenimage/inputs.py
@@ -307,6 +307,13 @@ def inputs(self) -> List[InputParam]:
 
         return inputs
 
+    @property
+    def intermediate_outputs(self) -> List[OutputParam]:
+        return [
+            OutputParam(name="image_height", type_hint=int, description="The height of the image latents"),
+            OutputParam(name="image_width", type_hint=int, description="The width of the image latents"),
+        ]
+
     @property
     def expected_components(self) -> List[ComponentSpec]:
         return [
@@ -327,6 +334,11 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
             block_state.height = block_state.height or height
             block_state.width = block_state.width or width
 
+            if not hasattr(block_state, "image_height"):
+                block_state.image_height = height
+            if not hasattr(block_state, "image_width"):
+                block_state.image_width = width
+
             # 2. Patchify the image latent tensor
             image_latent_tensor = components.pachifier.pack_latents(image_latent_tensor)
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py
@@ -699,7 +699,7 @@ def description(self):
 class QwenImageEditAutoInputStep(AutoPipelineBlocks):
     block_classes = [QwenImageInpaintInputStep, QwenImageEditInputStep]
     block_names = ["edit_inpaint", "edit"]
-    block_trigger_inputs = ["processed_mask_image", "image"]
+    block_trigger_inputs = ["processed_mask_image", "image_latents"]
 
     @property
     def description(self):
diff --git a/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py b/src/diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py
@@ -269,7 +269,7 @@ class StableDiffusionXLAutoBlocks(SequentialPipelineBlocks):
     block_names = [
         "text_encoder",
         "ip_adapter",
-        "image_encoder",
+        "vae_encoder",
         "before_denoise",
         "controlnet_input",
         "denoise",
@@ -321,7 +321,7 @@ def description(self):
 IMAGE2IMAGE_BLOCKS = InsertableDict(
     [
         ("text_encoder", StableDiffusionXLTextEncoderStep),
-        ("image_encoder", StableDiffusionXLVaeEncoderStep),
+        ("vae_encoder", StableDiffusionXLVaeEncoderStep),
         ("input", StableDiffusionXLInputStep),
         ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
         ("prepare_latents", StableDiffusionXLImg2ImgPrepareLatentsStep),
@@ -334,7 +334,7 @@ def description(self):
 INPAINT_BLOCKS = InsertableDict(
     [
         ("text_encoder", StableDiffusionXLTextEncoderStep),
-        ("image_encoder", StableDiffusionXLInpaintVaeEncoderStep),
+        ("vae_encoder", StableDiffusionXLInpaintVaeEncoderStep),
         ("input", StableDiffusionXLInputStep),
         ("set_timesteps", StableDiffusionXLImg2ImgSetTimestepsStep),
         ("prepare_latents", StableDiffusionXLInpaintPrepareLatentsStep),
@@ -361,7 +361,7 @@ def description(self):
     [
         ("text_encoder", StableDiffusionXLTextEncoderStep),
         ("ip_adapter", StableDiffusionXLAutoIPAdapterStep),
-        ("image_encoder", StableDiffusionXLAutoVaeEncoderStep),
+        ("vae_encoder", StableDiffusionXLAutoVaeEncoderStep),
         ("before_denoise", StableDiffusionXLAutoBeforeDenoiseStep),
         ("controlnet_input", StableDiffusionXLAutoControlNetInputStep),
         ("denoise", StableDiffusionXLAutoDenoiseStep),

Original file line number	Diff line number	Diff line change
`@@ -496,7 +496,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):`
`496`	`496`	`)`
`497`	`497`
`498`	`498`	`if components.requires_unconditional_embeds:`
`499`		`- negative_prompt = block_state.negative_prompt or ""`
	`499`	`+ negative_prompt = block_state.negative_prompt or " "`
`500`	`500`	`block_state.negative_prompt_embeds, block_state.negative_prompt_embeds_mask = get_qwen_prompt_embeds_edit(`
`501`	`501`	`components.text_encoder,`
`502`	`502`	`components.processor,`