Skip to content

Commit 4483400

Browse files
committed
up up
1 parent 0a9f7f9 commit 4483400

File tree

7 files changed

+170
-212
lines changed

7 files changed

+170
-212
lines changed

src/diffusers/image_processor.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -887,15 +887,21 @@ def __init__(
887887
def preprocess(
888888
self,
889889
image: PIL.Image.Image,
890-
mask: PIL.Image.Image,
891-
height:int,
892-
width:int,
890+
mask: PIL.Image.Image = None,
891+
height:int = None,
892+
width:int = None,
893893
padding_mask_crop:Optional[int] = None,
894894
) -> Tuple[torch.Tensor, torch.Tensor]:
895895
"""
896896
Preprocess the image and mask.
897897
"""
898-
898+
if mask is None and padding_mask_crop is not None:
899+
raise ValueError("mask must be provided if padding_mask_crop is provided")
900+
901+
# if mask is None, same behavior as regular image processor
902+
if mask is None:
903+
return self._image_processor.preprocess(image, height=height, width=width)
904+
899905
if padding_mask_crop is not None:
900906
crops_coords = self._image_processor.get_crop_region(
901907
mask, width, height, pad=padding_mask_crop
@@ -913,6 +919,7 @@ def preprocess(
913919
resize_mode=resize_mode,
914920
)
915921

922+
916923
processed_mask = self._mask_processor.preprocess(
917924
mask,
918925
height=height,

src/diffusers/modular_pipelines/qwenimage/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"ALL_BLOCKS",
2727
"CONTROLNET_BLOCKS",
2828
"TEXT2IMAGE_BLOCKS",
29+
"INPAINT_BLOCKS",
2930
]
3031
_import_structure["modular_pipeline"] = ["QwenImageModularPipeline"]
3132

@@ -43,6 +44,7 @@
4344
ALL_BLOCKS,
4445
CONTROLNET_BLOCKS,
4546
TEXT2IMAGE_BLOCKS,
47+
INPAINT_BLOCKS,
4648
)
4749
from .modular_pipeline import QwenImageModularPipeline
4850
else:

src/diffusers/modular_pipelines/qwenimage/before_denoise.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class QwenImagePrepareLatentsStep(ModularPipelineBlocks):
129129

130130
@property
131131
def description(self) -> str:
132-
return "Prepare latents step that prepares the latents for the text-to-image generation process"
132+
return "Prepare initial random noise for the generation process"
133133

134134
@property
135135
def inputs(self) -> List[InputParam]:
@@ -358,7 +358,7 @@ class QwenImagePackLatentsDynamicStep(ModularPipelineBlocks):
358358

359359
@property
360360
def description(self) -> str:
361-
return "Step that pachify the latents inputs. Should be used with outputs from vae encoder step. If height and width are not provided, It will be updated based on the height and width of the latents."
361+
return "Step that pachify the latents inputs. Should be used with outputs from vae encoder step."
362362

363363
@property
364364
def inputs(self) -> List[InputParam]:
@@ -367,8 +367,6 @@ def inputs(self) -> List[InputParam]:
367367
additional_inputs.append(InputParam(name=input_name))
368368

369369
return [
370-
InputParam(name="height"),
371-
InputParam(name="width"),
372370
InputParam(name="num_images_per_prompt", default=1),
373371
InputParam(
374372
name="batch_size",
@@ -378,11 +376,10 @@ def inputs(self) -> List[InputParam]:
378376
),
379377
] + additional_inputs
380378

381-
def __init__(self, input_names: List[str] = ["image_latents"], update_height_width: bool = True):
379+
def __init__(self, input_names: List[str] = ["image_latents"]):
382380
if not isinstance(input_names, list):
383381
input_names = [input_names]
384382
self._latents_input_names = input_names
385-
self._update_height_width = update_height_width
386383
super().__init__()
387384

388385
@staticmethod
@@ -425,11 +422,6 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
425422
width=width_latents,
426423
)
427424

428-
if self._update_height_width and block_state.height is None:
429-
block_state.height = height_latents * components.vae_scale_factor
430-
if self._update_height_width and block_state.width is None:
431-
block_state.width = width_latents * components.vae_scale_factor
432-
433425
setattr(block_state, input_name, latents_input)
434426

435427
self.set_block_state(state, block_state)
@@ -580,7 +572,7 @@ class QwenImageRoPEInputsStep(ModularPipelineBlocks):
580572

581573
@property
582574
def description(self) -> str:
583-
return "Step that prepares the additional inputs for the text-to-image generation process"
575+
return "Step that prepares the RoPE inputs for the denoising process"
584576

585577
@property
586578
def inputs(self) -> List[InputParam]:
@@ -641,7 +633,7 @@ class QwenImageEditRoPEInputsStep(ModularPipelineBlocks):
641633

642634
@property
643635
def description(self) -> str:
644-
return "Step that prepares the additional inputs for the text-to-image generation process"
636+
return "Step that prepares the RoPE inputs for the text-to-image generation process. This is used in QwenImage Edit."
645637

646638
@property
647639
def inputs(self) -> List[InputParam]:

src/diffusers/modular_pipelines/qwenimage/denoise.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -610,5 +610,4 @@ def description(self) -> str:
610610
" - `QwenImageEditLoopBeforeDenoiser`\n"
611611
" - `QwenImageEditLoopDenoiser`\n"
612612
" - `QwenImageLoopAfterDenoiser`\n"
613-
"This block supports text2img and img2img tasks."
614613
)

src/diffusers/modular_pipelines/qwenimage/encoders.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,14 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState):
414414
class QwenImageVaeEncoderDynamicStep(ModularPipelineBlocks):
415415
model_name = "qwenimage"
416416

417-
def __init__(self, input_name: str = "image", output_name: str = "image_latents", include_image_processor: bool = True):
417+
def __init__(self, input_name: str = "image", output_name: str = "image_latents", include_image_processor: bool = True, **image_processor_kwargs):
418+
if not include_image_processor and len(image_processor_kwargs) > 0:
419+
logger.warning(f"these kwargs will be ignored: {image_processor_kwargs} since image_processor is not used in this block")
420+
418421
self._image_input_name = input_name
419422
self._image_latents_output_name = output_name
420423
self._include_image_processor = include_image_processor
424+
self._image_processor_kwargs = image_processor_kwargs
421425
super().__init__()
422426

423427
@property
@@ -430,22 +434,28 @@ def expected_components(self) -> List[ComponentSpec]:
430434
ComponentSpec("vae", AutoencoderKLQwenImage),
431435
]
432436
if self._include_image_processor:
437+
image_processor_config = {"vae_scale_factor": 16}
438+
image_processor_config.update(self._image_processor_kwargs)
433439
components.append(
434440
ComponentSpec(
435-
"image_processor",
441+
f"{self._image_input_name}_processor",
436442
VaeImageProcessor,
437-
config=FrozenDict({"vae_scale_factor": 16}),
443+
config=FrozenDict(image_processor_config),
438444
default_creation_method="from_config",
439445
)
440446
)
441447
return components
442448

443449
@property
444450
def inputs(self) -> List[InputParam]:
445-
return [
451+
inputs = [
446452
InputParam(self._image_input_name, required=True),
447453
InputParam("generator"),
448454
]
455+
if self._include_image_processor:
456+
inputs.append(InputParam("height"))
457+
inputs.append(InputParam("width"))
458+
return inputs
449459

450460
@property
451461
def intermediate_outputs(self) -> List[OutputParam]:
@@ -457,6 +467,14 @@ def intermediate_outputs(self) -> List[OutputParam]:
457467
)
458468
]
459469

470+
@staticmethod
471+
def check_inputs(height, width, vae_scale_factor):
472+
if height is not None and height % (vae_scale_factor * 2) != 0:
473+
raise ValueError(f"Height must be divisible by {vae_scale_factor * 2} but is {height}")
474+
475+
if width is not None and width % (vae_scale_factor * 2) != 0:
476+
raise ValueError(f"Width must be divisible by {vae_scale_factor * 2} but is {width}")
477+
460478
@torch.no_grad()
461479
def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -> PipelineState:
462480
block_state = self.get_block_state(state)
@@ -467,7 +485,16 @@ def __call__(self, components: QwenImageModularPipeline, state: PipelineState) -
467485
image = getattr(block_state, self._image_input_name)
468486

469487
if self._include_image_processor:
470-
image = components.image_processor.preprocess(image)
488+
image_processor = getattr(components, f"{self._image_input_name}_processor")
489+
self.check_inputs(block_state.height, block_state.width, components.vae_scale_factor)
490+
491+
if not image_processor.config.do_resize and (block_state.height is not None or block_state.width is not None):
492+
logger.warning(f"height and width are provided but image_processor.config.do_resize is False, these will be ignored")
493+
494+
height = block_state.height or components.default_height
495+
width = block_state.width or components.default_width
496+
image = image_processor.preprocess(image, height=height, width=width)
497+
471498
image = image.unsqueeze(2)
472499
image = image.to(device=device, dtype=dtype)
473500

0 commit comments

Comments
 (0)