@@ -330,7 +330,7 @@ def __init__(
330330 output_name : str = "resized_image" ,
331331 vae_image_output_name : str = "vae_image" ,
332332 ):
333- """Create a configurable step for resizing images to the target area (1024 * 1024 ) while maintaining the aspect ratio.
333+ """Create a configurable step for resizing images to the target area (384 * 384 ) while maintaining the aspect ratio.
334334
335335 This block resizes an input image or a list input images and exposes the resized result under configurable
336336 input and output names. Use this when you need to wire the resize step to different image fields (e.g.,
@@ -809,9 +809,7 @@ def inputs(self) -> List[InputParam]:
809809
810810 @property
811811 def intermediate_outputs (self ) -> List [OutputParam ]:
812- return [
813- OutputParam (name = "processed_image" ),
814- ]
812+ return [OutputParam (name = "processed_image" )]
815813
816814 @staticmethod
817815 def check_inputs (height , width , vae_scale_factor ):
@@ -851,7 +849,10 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
851849
852850class QwenImageEditPlusProcessImagesInputStep (QwenImageProcessImagesInputStep ):
853851 model_name = "qwenimage-edit-plus"
854- vae_image_size = 1024 * 1024
852+
853+ def __init__ (self ):
854+ self .vae_image_size = 1024 * 1024
855+ super ().__init__ ()
855856
856857 @property
857858 def description (self ) -> str :
@@ -868,6 +869,7 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
868869 if block_state .vae_image is None and block_state .image is None :
869870 raise ValueError ("`vae_image` and `image` cannot be None at the same time" )
870871
872+ vae_image_sizes = None
871873 if block_state .vae_image is None :
872874 image = block_state .image
873875 self .check_inputs (
@@ -879,12 +881,19 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
879881 image = image , height = height , width = width
880882 )
881883 else :
882- width , height = block_state .vae_image [0 ].size
883- image = block_state .vae_image
884+ # QwenImage Edit Plus can allow multiple input images with varied resolutions
885+ processed_images = []
886+ vae_image_sizes = []
887+ for img in block_state .vae_image :
888+ width , height = img .size
889+ vae_width , vae_height , _ = calculate_dimensions (self .vae_image_size , width / height )
890+ vae_image_sizes .append ((vae_width , vae_height ))
891+ processed_images .append (
892+ components .image_processor .preprocess (image = img , height = vae_height , width = vae_width )
893+ )
894+ block_state .processed_image = processed_images
884895
885- block_state .processed_image = components .image_processor .preprocess (
886- image = image , height = height , width = width
887- )
896+ block_state .vae_image_sizes = vae_image_sizes
888897
889898 self .set_block_state (state , block_state )
890899 return components , state
@@ -926,17 +935,12 @@ def description(self) -> str:
926935
927936 @property
928937 def expected_components (self ) -> List [ComponentSpec ]:
929- components = [
930- ComponentSpec ("vae" , AutoencoderKLQwenImage ),
931- ]
938+ components = [ComponentSpec ("vae" , AutoencoderKLQwenImage )]
932939 return components
933940
934941 @property
935942 def inputs (self ) -> List [InputParam ]:
936- inputs = [
937- InputParam (self ._image_input_name , required = True ),
938- InputParam ("generator" ),
939- ]
943+ inputs = [InputParam (self ._image_input_name , required = True ), InputParam ("generator" )]
940944 return inputs
941945
942946 @property
@@ -974,6 +978,50 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
974978 return components , state
975979
976980
981+ class QwenImageEditPlusVaeEncoderDynamicStep (QwenImageVaeEncoderDynamicStep ):
982+ model_name = "qwenimage-edit-plus"
983+
984+ @property
985+ def intermediate_outputs (self ) -> List [OutputParam ]:
986+ # Each reference image latent can have varied resolutions hence we return this as a list.
987+ return [
988+ OutputParam (
989+ self ._image_latents_output_name ,
990+ type_hint = List [torch .Tensor ],
991+ description = "The latents representing the reference image(s)." ,
992+ )
993+ ]
994+
995+ @torch .no_grad ()
996+ def __call__ (self , components : QwenImageModularPipeline , state : PipelineState ) -> PipelineState :
997+ block_state = self .get_block_state (state )
998+
999+ device = components ._execution_device
1000+ dtype = components .vae .dtype
1001+
1002+ image = getattr (block_state , self ._image_input_name )
1003+
1004+ # Encode image into latents
1005+ image_latents = []
1006+ for img in image :
1007+ image_latents .append (
1008+ encode_vae_image (
1009+ image = img ,
1010+ vae = components .vae ,
1011+ generator = block_state .generator ,
1012+ device = device ,
1013+ dtype = dtype ,
1014+ latent_channels = components .num_channels_latents ,
1015+ )
1016+ )
1017+
1018+ setattr (block_state , self ._image_latents_output_name , image_latents )
1019+
1020+ self .set_block_state (state , block_state )
1021+
1022+ return components , state
1023+
1024+
9771025class QwenImageControlNetVaeEncoderStep (ModularPipelineBlocks ):
9781026 model_name = "qwenimage"
9791027
0 commit comments