first dynamic block!

yiyixuxu · yiyixuxu · commit 57a1bc6d12ef · 2025-08-25T04:59:38.000+02:00
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -348,9 +348,14 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
         return components, state
     
 
-class QwenImageVaeEncoderStep(ModularPipelineBlocks):
+class QwenImageVaeEncoderDynamicStep(ModularPipelineBlocks):
     model_name = "qwenimage"
 
+    def __init__(self, input_name: str = "image", output_name: str = "image_latents"):
+        self.input_name = input_name
+        self.output_name = output_name
+        super().__init__()
+
     @property
     def description(self) -> str:
         return "Vae Encoder step that encode the input image into a latent representation"
@@ -370,15 +375,15 @@ def expected_components(self) -> List[ComponentSpec]:
     @property
     def inputs(self) -> List[InputParam]:
         return [
-            InputParam("image", required=True, description="The image to encode, should already be resized using resize step"),
+            InputParam(self.input_name, required=True),
             InputParam("generator"),
         ]
 
     @property
     def intermediate_outputs(self) -> List[OutputParam]:
         return [
             OutputParam(
-                "image_latents",
+                self.output_name,
                 type_hint=torch.Tensor,
                 description="The latents representing the reference image",
             )
@@ -391,16 +396,20 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
         device = components._execution_device
         dtype = components.vae.dtype
 
-        image = components.image_processor.preprocess(block_state.image)
+        image = getattr(block_state, self.input_name)
+
+        image = components.image_processor.preprocess(image)
         image = image.unsqueeze(2)
         image = image.to(device=device, dtype=dtype)
 
 
         # Encode image into latents
-        block_state.image_latents = encode_vae_image(
+        image_latents = encode_vae_image(
             image=image, vae=components.vae, generator=block_state.generator, latent_channels=components.num_channels_latents
         )
 
+        setattr(block_state, self.output_name, image_latents)
+
         self.set_block_state(state, block_state)
 
         return components, state
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py
@@ -14,7 +14,7 @@
 
 from ...utils import logging
 
-from .encoders import QwenImageTextEncoderStep, QwenImageEditTextEncoderStep, QwenImageVaeEncoderStep
+from .encoders import QwenImageTextEncoderStep, QwenImageEditTextEncoderStep, QwenImageVaeEncoderDynamicStep
 from .decoders import QwenImageDecodeStep
 from .denoise import QwenImageDenoiseStep, QwenImageEditDenoiseStep
 from .before_denoise import QwenImageInputStep, QwenImagePrepareLatentsStep, QwenImageSetTimestepsStep, QwenImagePrepareAdditionalInputsStep, QwenImagePrepareImageLatentsStep, QwenImageEditPrepareAdditionalInputsStep, QwenImageImageResizeStep
@@ -41,7 +41,7 @@
     [
         ("image_resize", QwenImageImageResizeStep),
         ("text_encoder", QwenImageEditTextEncoderStep),
-        ("vae_encoder", QwenImageVaeEncoderStep),
+        ("vae_encoder", QwenImageVaeEncoderDynamicStep(input_name="image", output_name="image_latents")),
         ("input", QwenImageInputStep),
         ("prepare_image_latents", QwenImagePrepareImageLatentsStep),
         ("prepare_latents", QwenImagePrepareLatentsStep),