Skip to content

Commit 283ffe7

Browse files
committed
Update qwenimage.md to reflect new pipelines and add # Copied from convention
1 parent 56565e1 commit 283ffe7

File tree

3 files changed

+34
-1
lines changed

3 files changed

+34
-1
lines changed

docs/source/en/api/pipelines/qwenimage.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,15 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
3333
## QwenImagePipelineOutput
3434

3535
[[autodoc]] pipelines.qwenimage.pipeline_output.QwenImagePipelineOutput
36+
37+
## QwenImageImg2ImgPipeline
38+
39+
[[autodoc]] QwenImageImg2ImgPipeline
40+
- all
41+
- __call__
42+
43+
## QwenImageInpaintPipeline
44+
45+
[[autodoc]] QwenImageInpaintPipeline
46+
- all
47+
- __call__

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
```
4343
"""
4444

45+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
4546
def retrieve_latents(
4647
encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
4748
):
@@ -54,6 +55,7 @@ def retrieve_latents(
5455
else:
5556
raise AttributeError("Could not access latents of provided encoder_output")
5657

58+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift
5759
def calculate_shift(
5860
image_seq_len,
5961
base_seq_len: int = 256,
@@ -178,6 +180,7 @@ def __init__(
178180
self.prompt_template_encode_start_idx = 34
179181
self.default_sample_size = 128
180182

183+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._extract_masked_hidden
181184
def _extract_masked_hidden(self, hidden_states: torch.Tensor, mask: torch.Tensor):
182185
bool_mask = mask.bool()
183186
valid_lengths = bool_mask.sum(dim=1)
@@ -186,6 +189,7 @@ def _extract_masked_hidden(self, hidden_states: torch.Tensor, mask: torch.Tensor
186189

187190
return split_result
188191

192+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._get_qwen_prompt_embeds
189193
def _get_qwen_prompt_embeds(
190194
self,
191195
prompt: Union[str, List[str]] = None,
@@ -247,6 +251,7 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
247251

248252
return image_latents
249253

254+
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_img2img.StableDiffusion3Img2ImgPipeline.get_timesteps
250255
def get_timesteps(self, num_inference_steps, strength, device):
251256
# get the original timestep using init_timestep
252257
init_timestep = min(num_inference_steps * strength, num_inference_steps)
@@ -258,6 +263,7 @@ def get_timesteps(self, num_inference_steps, strength, device):
258263

259264
return timesteps, num_inference_steps - t_start
260265

266+
# Copied fromCopied from diffusers.pipelines.qwenimage.pipeline_qwenimage.encode_prompt
261267
def encode_prompt(
262268
self,
263269
prompt: Union[str, List[str]],
@@ -296,6 +302,7 @@ def encode_prompt(
296302

297303
return prompt_embeds, prompt_embeds_mask
298304

305+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.check_inputs
299306
def check_inputs(
300307
self,
301308
prompt,
@@ -356,6 +363,7 @@ def check_inputs(
356363
raise ValueError(f"`max_sequence_length` cannot be greater than 1024 but is {max_sequence_length}")
357364

358365
@staticmethod
366+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._prepare_latent_image_ids
359367
def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
360368
latent_image_ids = torch.zeros(height, width, 3)
361369
latent_image_ids[..., 1] = latent_image_ids[..., 1] + torch.arange(height)[:, None]
@@ -370,6 +378,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
370378
return latent_image_ids.to(device=device, dtype=dtype)
371379

372380
@staticmethod
381+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._pack_latents
373382
def _pack_latents(latents, batch_size, num_channels_latents, height, width):
374383
latents = latents.view(batch_size, num_channels_latents, height // 2, 2, width // 2, 2)
375384
latents = latents.permute(0, 2, 4, 1, 3, 5)
@@ -378,6 +387,7 @@ def _pack_latents(latents, batch_size, num_channels_latents, height, width):
378387
return latents
379388

380389
@staticmethod
390+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._unpack_latents
381391
def _unpack_latents(latents, height, width, vae_scale_factor):
382392
batch_size, num_patches, channels = latents.shape
383393

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
```
4646
"""
4747

48+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
4849
def retrieve_latents(
4950
encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
5051
):
@@ -57,6 +58,7 @@ def retrieve_latents(
5758
else:
5859
raise AttributeError("Could not access latents of provided encoder_output")
5960

61+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift
6062
def calculate_shift(
6163
image_seq_len,
6264
base_seq_len: int = 256,
@@ -188,14 +190,16 @@ def __init__(
188190
self.prompt_template_encode_start_idx = 34
189191
self.default_sample_size = 128
190192

193+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._extract_masked_hidden
191194
def _extract_masked_hidden(self, hidden_states: torch.Tensor, mask: torch.Tensor):
192195
bool_mask = mask.bool()
193196
valid_lengths = bool_mask.sum(dim=1)
194197
selected = hidden_states[bool_mask]
195198
split_result = torch.split(selected, valid_lengths.tolist(), dim=0)
196199

197200
return split_result
198-
201+
202+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._get_qwen_prompt_embeds
199203
def _get_qwen_prompt_embeds(
200204
self,
201205
prompt: Union[str, List[str]] = None,
@@ -234,6 +238,7 @@ def _get_qwen_prompt_embeds(
234238

235239
return prompt_embeds, encoder_attention_mask
236240

241+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage_img2img._encode_vae_image
237242
def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
238243
if isinstance(generator, list):
239244
image_latents = [
@@ -257,6 +262,7 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
257262

258263
return image_latents
259264

265+
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_img2img.StableDiffusion3Img2ImgPipeline.get_timesteps
260266
def get_timesteps(self, num_inference_steps, strength, device):
261267
# get the original timestep using init_timestep
262268
init_timestep = min(num_inference_steps * strength, num_inference_steps)
@@ -268,6 +274,7 @@ def get_timesteps(self, num_inference_steps, strength, device):
268274

269275
return timesteps, num_inference_steps - t_start
270276

277+
# Copied fromCopied from diffusers.pipelines.qwenimage.pipeline_qwenimage.encode_prompt
271278
def encode_prompt(
272279
self,
273280
prompt: Union[str, List[str]],
@@ -306,6 +313,7 @@ def encode_prompt(
306313

307314
return prompt_embeds, prompt_embeds_mask
308315

316+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.check_inputs
309317
def check_inputs(
310318
self,
311319
prompt,
@@ -382,6 +390,7 @@ def check_inputs(
382390
raise ValueError(f"`max_sequence_length` cannot be greater than 1024 but is {max_sequence_length}")
383391

384392
@staticmethod
393+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._prepare_latent_image_ids
385394
def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
386395
latent_image_ids = torch.zeros(height, width, 3)
387396
latent_image_ids[..., 1] = latent_image_ids[..., 1] + torch.arange(height)[:, None]
@@ -396,6 +405,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
396405
return latent_image_ids.to(device=device, dtype=dtype)
397406

398407
@staticmethod
408+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._pack_latents
399409
def _pack_latents(latents, batch_size, num_channels_latents, height, width):
400410
latents = latents.view(batch_size, num_channels_latents, height // 2, 2, width // 2, 2)
401411
latents = latents.permute(0, 2, 4, 1, 3, 5)
@@ -404,6 +414,7 @@ def _pack_latents(latents, batch_size, num_channels_latents, height, width):
404414
return latents
405415

406416
@staticmethod
417+
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._unpack_latents
407418
def _unpack_latents(latents, height, width, vae_scale_factor):
408419
batch_size, num_patches, channels = latents.shape
409420

0 commit comments

Comments
 (0)