Skip to content
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3bd289f
initial
yiyixuxu Aug 17, 2025
fa1a9cd
Merge branch 'main' into modular-qwen
yiyixuxu Aug 18, 2025
44e058c
up up
yiyixuxu Aug 22, 2025
ff06e95
Merge branch 'main' into modular-qwen
yiyixuxu Aug 23, 2025
49e683f
start to work on edit
yiyixuxu Aug 23, 2025
f72763c
add support for qwen edit
yiyixuxu Aug 24, 2025
57a1bc6
first dynamic block!
yiyixuxu Aug 25, 2025
5fbc817
add controlnet support!
yiyixuxu Aug 25, 2025
100122c
style
yiyixuxu Aug 25, 2025
84dbf17
up
yiyixuxu Aug 25, 2025
0a9f7f9
up
yiyixuxu Aug 26, 2025
4483400
up up
yiyixuxu Aug 27, 2025
a562806
style
yiyixuxu Aug 27, 2025
30faada
Merge branch 'main' into modular-qwen
yiyixuxu Aug 27, 2025
2d5d876
Apply suggestions from code review
yiyixuxu Aug 27, 2025
b89cc40
up
yiyixuxu Aug 27, 2025
8dce330
add inpaint processor to doc
yiyixuxu Aug 27, 2025
d16b7b9
refactor!
yiyixuxu Aug 27, 2025
dd8d0f6
add auto pipeline blocks, guider support
yiyixuxu Aug 27, 2025
2c05729
style + copies
yiyixuxu Aug 27, 2025
78f0038
Merge branch 'main' into modular-qwen
yiyixuxu Aug 31, 2025
7bf9730
qwen modular work with standard repo
yiyixuxu Sep 1, 2025
cd3a6a6
style
yiyixuxu Sep 1, 2025
5ecbbff
support edit inpaint!
yiyixuxu Sep 2, 2025
ef66598
style
yiyixuxu Sep 2, 2025
eed3ae0
qwen image edit autopipeline!
yiyixuxu Sep 2, 2025
a95651a
up up
yiyixuxu Sep 2, 2025
4efac2c
style
yiyixuxu Sep 2, 2025
1668c77
up up some refactor
yiyixuxu Sep 7, 2025
5b408fa
img2img
yiyixuxu Sep 7, 2025
a7414e6
more refactor
yiyixuxu Sep 7, 2025
675ae14
up
yiyixuxu Sep 7, 2025
6bf38c8
Apply suggestions from code review
yiyixuxu Sep 8, 2025
0e9c496
docstring etc
yiyixuxu Sep 8, 2025
f97e24a
style + copy
yiyixuxu Sep 8, 2025
7084119
fix
yiyixuxu Sep 8, 2025
9c5830e
fix more docs5rings
yiyixuxu Sep 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/source/en/api/image_processor.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ All pipelines with [`VaeImageProcessor`] accept PIL Image, PyTorch tensor, or Nu

[[autodoc]] image_processor.VaeImageProcessor

## InpaintProcessor

The [`InpaintProcessor`] accepts `mask` and `image` inputs and process them together. Optionally, it can accept padding_mask_crop and apply mask overlay.

[[autodoc]] image_processor.InpaintProcessor

## VaeImageProcessorLDM3D

The [`VaeImageProcessorLDM3D`] accepts RGB and depth inputs and returns RGB and depth outputs.
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@
[
"FluxAutoBlocks",
"FluxModularPipeline",
"QwenImageModularPipeline",
"StableDiffusionXLAutoBlocks",
"StableDiffusionXLModularPipeline",
"WanAutoBlocks",
Expand Down Expand Up @@ -1017,6 +1018,7 @@
from .modular_pipelines import (
FluxAutoBlocks,
FluxModularPipeline,
QwenImageModularPipeline,
StableDiffusionXLAutoBlocks,
StableDiffusionXLModularPipeline,
WanAutoBlocks,
Expand Down
10 changes: 10 additions & 0 deletions src/diffusers/hooks/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def _register_attention_processors_metadata():
from ..models.attention_processor import AttnProcessor2_0
from ..models.transformers.transformer_cogview4 import CogView4AttnProcessor
from ..models.transformers.transformer_flux import FluxAttnProcessor
from ..models.transformers.transformer_qwenimage import QwenDoubleStreamAttnProcessor2_0
from ..models.transformers.transformer_wan import WanAttnProcessor2_0

# AttnProcessor2_0
Expand Down Expand Up @@ -140,6 +141,14 @@ def _register_attention_processors_metadata():
metadata=AttentionProcessorMetadata(skip_processor_output_fn=_skip_proc_output_fn_Attention_FluxAttnProcessor),
)

# QwenDoubleStreamAttnProcessor2
AttentionProcessorRegistry.register(
model_class=QwenDoubleStreamAttnProcessor2_0,
metadata=AttentionProcessorMetadata(
skip_processor_output_fn=_skip_proc_output_fn_Attention_QwenDoubleStreamAttnProcessor2_0
),
)


def _register_transformer_blocks_metadata():
from ..models.attention import BasicTransformerBlock
Expand Down Expand Up @@ -298,4 +307,5 @@ def _skip_attention___ret___hidden_states___encoder_hidden_states(self, *args, *
_skip_proc_output_fn_Attention_WanAttnProcessor2_0 = _skip_attention___ret___hidden_states
# not sure what this is yet.
_skip_proc_output_fn_Attention_FluxAttnProcessor = _skip_attention___ret___hidden_states
_skip_proc_output_fn_Attention_QwenDoubleStreamAttnProcessor2_0 = _skip_attention___ret___hidden_states
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding. This one is for?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for guiders/hooks

# fmt: on
128 changes: 128 additions & 0 deletions src/diffusers/image_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,134 @@ def apply_overlay(
return image


class InpaintProcessor(ConfigMixin):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really nice!

(not for this PR, we could attempt to have an example of the processor for an inpaint pipeline)

"""
Image processor for inpainting image and mask.
"""

config_name = CONFIG_NAME

@register_to_config
def __init__(
self,
do_resize: bool = True,
vae_scale_factor: int = 8,
vae_latent_channels: int = 4,
resample: str = "lanczos",
reducing_gap: int = None,
do_normalize: bool = True,
do_binarize: bool = False,
do_convert_grayscale: bool = False,
mask_do_normalize: bool = False,
mask_do_binarize: bool = True,
mask_do_convert_grayscale: bool = True,
):
super().__init__()

self._image_processor = VaeImageProcessor(
do_resize=do_resize,
vae_scale_factor=vae_scale_factor,
vae_latent_channels=vae_latent_channels,
resample=resample,
reducing_gap=reducing_gap,
do_normalize=do_normalize,
do_binarize=do_binarize,
do_convert_grayscale=do_convert_grayscale,
)
self._mask_processor = VaeImageProcessor(
do_resize=do_resize,
vae_scale_factor=vae_scale_factor,
vae_latent_channels=vae_latent_channels,
resample=resample,
reducing_gap=reducing_gap,
do_normalize=mask_do_normalize,
do_binarize=mask_do_binarize,
do_convert_grayscale=mask_do_convert_grayscale,
)

def preprocess(
self,
image: PIL.Image.Image,
mask: PIL.Image.Image = None,
height: int = None,
width: int = None,
padding_mask_crop: Optional[int] = None,
) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Preprocess the image and mask.
"""
if mask is None and padding_mask_crop is not None:
raise ValueError("mask must be provided if padding_mask_crop is provided")

# if mask is None, same behavior as regular image processor
if mask is None:
return self._image_processor.preprocess(image, height=height, width=width)

if padding_mask_crop is not None:
crops_coords = self._image_processor.get_crop_region(mask, width, height, pad=padding_mask_crop)
resize_mode = "fill"
else:
crops_coords = None
resize_mode = "default"

processed_image = self._image_processor.preprocess(
image,
height=height,
width=width,
crops_coords=crops_coords,
resize_mode=resize_mode,
)

processed_mask = self._mask_processor.preprocess(
mask,
height=height,
width=width,
resize_mode=resize_mode,
crops_coords=crops_coords,
)

if crops_coords is not None:
postprocessing_kwargs = {
"crops_coords": crops_coords,
"original_image": image,
"original_mask": mask,
}
else:
postprocessing_kwargs = {
"crops_coords": None,
"original_image": None,
"original_mask": None,
}

return processed_image, processed_mask, postprocessing_kwargs

def postprocess(
self,
image: torch.Tensor,
output_type: str = "pil",
original_image: Optional[PIL.Image.Image] = None,
original_mask: Optional[PIL.Image.Image] = None,
crops_coords: Optional[Tuple[int, int, int, int]] = None,
) -> Tuple[PIL.Image.Image, PIL.Image.Image]:
"""
Postprocess the image, optionally apply mask overlay
"""
image = self._image_processor.postprocess(
image,
output_type=output_type,
)
# optionally apply the mask overlay
if crops_coords is not None and (original_image is None or original_mask is None):
raise ValueError("original_image and original_mask must be provided if crops_coords is provided")

elif crops_coords is not None:
image = [
self._image_processor.apply_overlay(original_mask, original_image, i, crops_coords) for i in image
]

return image


class VaeImageProcessorLDM3D(VaeImageProcessor):
"""
Image processor for VAE LDM3D.
Expand Down
2 changes: 2 additions & 0 deletions src/diffusers/modular_pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
_import_structure["stable_diffusion_xl"] = ["StableDiffusionXLAutoBlocks", "StableDiffusionXLModularPipeline"]
_import_structure["wan"] = ["WanAutoBlocks", "WanModularPipeline"]
_import_structure["flux"] = ["FluxAutoBlocks", "FluxModularPipeline"]
_import_structure["qwenimage"] = ["QwenImageModularPipeline"]
_import_structure["components_manager"] = ["ComponentsManager"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
Expand All @@ -68,6 +69,7 @@
SequentialPipelineBlocks,
)
from .modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, InsertableDict, OutputParam
from .qwenimage import QwenImageModularPipeline
from .stable_diffusion_xl import StableDiffusionXLAutoBlocks, StableDiffusionXLModularPipeline
from .wan import WanAutoBlocks, WanModularPipeline
else:
Expand Down
22 changes: 16 additions & 6 deletions src/diffusers/modular_pipelines/modular_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
("stable-diffusion-xl", "StableDiffusionXLModularPipeline"),
("wan", "WanModularPipeline"),
("flux", "FluxModularPipeline"),
("qwenimage", "QwenImageModularPipeline"),
]
)

Expand Down Expand Up @@ -548,8 +549,11 @@ class AutoPipelineBlocks(ModularPipelineBlocks):

def __init__(self):
sub_blocks = InsertableDict()
for block_name, block_cls in zip(self.block_names, self.block_classes):
sub_blocks[block_name] = block_cls()
for block_name, block in zip(self.block_names, self.block_classes):
if inspect.isclass(block):
sub_blocks[block_name] = block()
else:
sub_blocks[block_name] = block
self.sub_blocks = sub_blocks
if not (len(self.block_classes) == len(self.block_names) == len(self.block_trigger_inputs)):
raise ValueError(
Expand Down Expand Up @@ -856,8 +860,11 @@ def from_blocks_dict(cls, blocks_dict: Dict[str, Any]) -> "SequentialPipelineBlo

def __init__(self):
sub_blocks = InsertableDict()
for block_name, block_cls in zip(self.block_names, self.block_classes):
sub_blocks[block_name] = block_cls()
for block_name, block in zip(self.block_names, self.block_classes):
if inspect.isclass(block):
sub_blocks[block_name] = block()
else:
sub_blocks[block_name] = block
self.sub_blocks = sub_blocks

def _get_inputs(self):
Expand Down Expand Up @@ -1280,8 +1287,11 @@ def outputs(self) -> List[str]:

def __init__(self):
sub_blocks = InsertableDict()
for block_name, block_cls in zip(self.block_names, self.block_classes):
sub_blocks[block_name] = block_cls()
for block_name, block in zip(self.block_names, self.block_classes):
if inspect.isclass(block):
sub_blocks[block_name] = block()
else:
sub_blocks[block_name] = block
self.sub_blocks = sub_blocks

@classmethod
Expand Down
61 changes: 61 additions & 0 deletions src/diffusers/modular_pipelines/qwenimage/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from typing import TYPE_CHECKING

from ...utils import (
DIFFUSERS_SLOW_IMPORT,
OptionalDependencyNotAvailable,
_LazyModule,
get_objects_from_module,
is_torch_available,
is_transformers_available,
)


_dummy_objects = {}
_import_structure = {}

try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils import dummy_torch_and_transformers_objects # noqa F403

_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
else:
_import_structure["encoders"] = ["QwenImageTextEncoderStep"]
_import_structure["modular_blocks"] = [
"ALL_BLOCKS",
"CONTROLNET_BLOCKS",
"INPAINT_BLOCKS",
"TEXT2IMAGE_BLOCKS",
]
_import_structure["modular_pipeline"] = ["QwenImageModularPipeline"]

if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
try:
if not (is_transformers_available() and is_torch_available()):
raise OptionalDependencyNotAvailable()
except OptionalDependencyNotAvailable:
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
else:
from .encoders import (
QwenImageTextEncoderStep,
)
from .modular_blocks import (
ALL_BLOCKS,
CONTROLNET_BLOCKS,
INPAINT_BLOCKS,
TEXT2IMAGE_BLOCKS,
)
from .modular_pipeline import QwenImageModularPipeline
else:
import sys

sys.modules[__name__] = _LazyModule(
__name__,
globals()["__file__"],
_import_structure,
module_spec=__spec__,
)

for name, value in _dummy_objects.items():
setattr(sys.modules[__name__], name, value)
Loading