docstring etc

yiyixuxu · yiyixuxu · commit 0e9c496b5f97 · 2025-09-08T11:08:15.000+02:00
diff --git a/src/diffusers/image_processor.py b/src/diffusers/image_processor.py
@@ -523,8 +523,6 @@ def resize(
                 size=(height, width),
             )
             image = self.pt_to_numpy(image)
-        else:
-            raise ValueError(f"Unsupported image type: {type(image)}")
 
         return image
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/__init__.py b/src/diffusers/modular_pipelines/qwenimage/__init__.py
@@ -25,6 +25,7 @@
     _import_structure["modular_blocks"] = [
         "ALL_BLOCKS",
         "AUTO_BLOCKS",
+        "EDIT_AUTO_BLOCKS",
         "CONTROLNET_BLOCKS",
         "EDIT_BLOCKS",
         "EDIT_INPAINT_BLOCKS",
@@ -48,6 +49,8 @@
         )
         from .modular_blocks import (
             ALL_BLOCKS,
+            AUTO_BLOCKS,
+            EDIT_AUTO_BLOCKS,
             CONTROLNET_BLOCKS,
             EDIT_BLOCKS,
             EDIT_INPAINT_BLOCKS,
diff --git a/src/diffusers/modular_pipelines/qwenimage/before_denoise.py b/src/diffusers/modular_pipelines/qwenimage/before_denoise.py
@@ -263,7 +263,6 @@ def check_inputs(image_latents, latents, batch_size):
         if image_latents.ndim != 3:
             raise ValueError(f"`image_latents` must have 3 dimensions (patchified), but got {image_latents.ndim}")
 
-
     @torch.no_grad()
     def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
         block_state = self.get_block_state(state)
@@ -294,7 +293,7 @@ class QwenImageCreateMaskLatentsStep(ModularPipelineBlocks):
 
     @property
     def description(self) -> str:
-        return "Step that creates mask latents from preprocessed mask_image by interpolating to latent space. Output is not patchified."
+        return "Step that creates mask latents from preprocessed mask_image by interpolating to latent space."
 
     @property
     def expected_components(self) -> List[ComponentSpec]:
@@ -572,7 +571,7 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
 
     @property
     def description(self) -> str:
-        return "Step that prepares the RoPE inputs for the text-to-image generation process. This is used in QwenImage Edit. Should be place after prepare_latents step"
+        return "Step that prepares the RoPE inputs for denoising process. This is used in QwenImage Edit. Should be place after prepare_latents step"
 
     @property
     def inputs(self) -> List[InputParam]:
diff --git a/src/diffusers/modular_pipelines/qwenimage/denoise.py b/src/diffusers/modular_pipelines/qwenimage/denoise.py
@@ -539,7 +539,7 @@ def description(self) -> str:
             " - `QwenImageLoopBeforeDenoiser`\n"
             " - `QwenImageLoopDenoiser`\n"
             " - `QwenImageLoopAfterDenoiser`\n"
-            "This block supports text2img tasks."
+            "This block supports text2image and image2image tasks for QwenImage."
         )
 
 
@@ -563,7 +563,7 @@ def description(self) -> str:
             " - `QwenImageLoopDenoiser`\n"
             " - `QwenImageLoopAfterDenoiser`\n"
             " - `QwenImageLoopAfterDenoiserInpaint`\n"
-            "This block supports inpainting tasks."
+            "This block supports inpainting tasks for QwenImage."
         )
 
 
@@ -587,7 +587,7 @@ def description(self) -> str:
             " - `QwenImageLoopBeforeDenoiserControlNet`\n"
             " - `QwenImageLoopDenoiser`\n"
             " - `QwenImageLoopAfterDenoiser`\n"
-            "This block supports text2img tasks."
+            "This block supports text2img/img2img tasks with controlnet for QwenImage."
         )
 
 
@@ -619,7 +619,7 @@ def description(self) -> str:
             " - `QwenImageLoopDenoiser`\n"
             " - `QwenImageLoopAfterDenoiser`\n"
             " - `QwenImageLoopAfterDenoiserInpaint`\n"
-            "This block supports inpainting tasks with controlnet."
+            "This block supports inpainting tasks with controlnet for QwenImage."
         )
 
 
@@ -641,7 +641,7 @@ def description(self) -> str:
             " - `QwenImageEditLoopBeforeDenoiser`\n"
             " - `QwenImageEditLoopDenoiser`\n"
             " - `QwenImageLoopAfterDenoiser`\n"
-            "This block supports image-to-image tasks for QwenImage Edit."
+            "This block supports QwenImage Edit."
         )
 
 
diff --git a/src/diffusers/modular_pipelines/qwenimage/encoders.py b/src/diffusers/modular_pipelines/qwenimage/encoders.py
@@ -186,11 +186,22 @@ def encode_vae_image(
     return image_latents
 
 
-# YiYi TODO: Check if this step need to be dynamic
 class QwenImageEditResizeDynamicStep(ModularPipelineBlocks):
     model_name = "qwenimage"
 
     def __init__(self, input_name: str = "image", output_name: str = "resized_image"):
+        """Create a configurable step for resizing images to the target area (1024 * 1024) while maintaining the aspect ratio.
+
+        This block resizes an input image tensor and exposes the resized result
+        under configurable input and output names. Use this when you need to wire the
+        resize step to different image fields (e.g., "image", "control_image")
+
+        Args:
+            input_name (str, optional): Name of the image field to read from the
+                pipeline state. Defaults to "image".
+            output_name (str, optional): Name of the resized image field to write
+                back to the pipeline state. Defaults to "resized_image".
+        """
         if not isinstance(input_name, str) or not isinstance(output_name, str):
             raise ValueError(
                 f"input_name and output_name must be strings but are {type(input_name)} and {type(output_name)}"
@@ -505,7 +516,7 @@ class QwenImageInpaintProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def description(self) -> str:
-        return "Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images need to be resized first using either the QwenImageResizeStep or QwenImageEditResizeStep."
+        return "Image Preprocess step for inpainting task. This processes the image and mask inputs together. Images can be resized first using QwenImageEditResizeDynamicStep."
 
     @property
     def expected_components(self) -> List[ComponentSpec]:
@@ -586,7 +597,7 @@ class QwenImageProcessImagesInputStep(ModularPipelineBlocks):
 
     @property
     def description(self) -> str:
-        return "Image Preprocess step. Images need to be resized first using either the QwenImageResizeStep or QwenImageEditResizeStep."
+        return "Image Preprocess step. Images can be resized first using QwenImageEditResizeDynamicStep."
 
     @property
     def expected_components(self) -> List[ComponentSpec]:
@@ -658,7 +669,11 @@ def __init__(
         input_name: str = "processed_image",
         output_name: str = "image_latents",
     ):
-        """Initialize a dynamic VAE encoder step for converting images to latent representations.
+        """Initialize a VAE encoder step for converting images to latent representations.
+
+        Both the input and output names are configurable so this block can be
+        configured to process to different image inputs (e.g., "processed_image" -> "image_latents",
+        "processed_control_image" -> "control_image_latents").
 
         Args:
             input_name (str, optional): Name of the input image tensor. Defaults to "processed_image".
diff --git a/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py b/src/diffusers/modular_pipelines/qwenimage/modular_blocks.py

Original file line number	Diff line number	Diff line change
`@@ -523,8 +523,6 @@ def resize(`
`523`	`523`	`size=(height, width),`
`524`	`524`	`)`
`525`	`525`	`image = self.pt_to_numpy(image)`
`526`		`- else:`
`527`		`- raise ValueError(f"Unsupported image type: {type(image)}")`
`528`	`526`
`529`	`527`	`return image`
`530`	`528`