4545 ```
4646"""
4747
48+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
4849def retrieve_latents (
4950 encoder_output : torch .Tensor , generator : Optional [torch .Generator ] = None , sample_mode : str = "sample"
5051):
@@ -57,6 +58,7 @@ def retrieve_latents(
5758 else :
5859 raise AttributeError ("Could not access latents of provided encoder_output" )
5960
61+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift
6062def calculate_shift (
6163 image_seq_len ,
6264 base_seq_len : int = 256 ,
@@ -188,14 +190,16 @@ def __init__(
188190 self .prompt_template_encode_start_idx = 34
189191 self .default_sample_size = 128
190192
193+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._extract_masked_hidden
191194 def _extract_masked_hidden (self , hidden_states : torch .Tensor , mask : torch .Tensor ):
192195 bool_mask = mask .bool ()
193196 valid_lengths = bool_mask .sum (dim = 1 )
194197 selected = hidden_states [bool_mask ]
195198 split_result = torch .split (selected , valid_lengths .tolist (), dim = 0 )
196199
197200 return split_result
198-
201+
202+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._get_qwen_prompt_embeds
199203 def _get_qwen_prompt_embeds (
200204 self ,
201205 prompt : Union [str , List [str ]] = None ,
@@ -234,6 +238,7 @@ def _get_qwen_prompt_embeds(
234238
235239 return prompt_embeds , encoder_attention_mask
236240
241+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage_img2img._encode_vae_image
237242 def _encode_vae_image (self , image : torch .Tensor , generator : torch .Generator ):
238243 if isinstance (generator , list ):
239244 image_latents = [
@@ -257,6 +262,7 @@ def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
257262
258263 return image_latents
259264
265+ # Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_img2img.StableDiffusion3Img2ImgPipeline.get_timesteps
260266 def get_timesteps (self , num_inference_steps , strength , device ):
261267 # get the original timestep using init_timestep
262268 init_timestep = min (num_inference_steps * strength , num_inference_steps )
@@ -268,6 +274,7 @@ def get_timesteps(self, num_inference_steps, strength, device):
268274
269275 return timesteps , num_inference_steps - t_start
270276
277+ # Copied fromCopied from diffusers.pipelines.qwenimage.pipeline_qwenimage.encode_prompt
271278 def encode_prompt (
272279 self ,
273280 prompt : Union [str , List [str ]],
@@ -306,6 +313,7 @@ def encode_prompt(
306313
307314 return prompt_embeds , prompt_embeds_mask
308315
316+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.check_inputs
309317 def check_inputs (
310318 self ,
311319 prompt ,
@@ -382,6 +390,7 @@ def check_inputs(
382390 raise ValueError (f"`max_sequence_length` cannot be greater than 1024 but is { max_sequence_length } " )
383391
384392 @staticmethod
393+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._prepare_latent_image_ids
385394 def _prepare_latent_image_ids (batch_size , height , width , device , dtype ):
386395 latent_image_ids = torch .zeros (height , width , 3 )
387396 latent_image_ids [..., 1 ] = latent_image_ids [..., 1 ] + torch .arange (height )[:, None ]
@@ -396,6 +405,7 @@ def _prepare_latent_image_ids(batch_size, height, width, device, dtype):
396405 return latent_image_ids .to (device = device , dtype = dtype )
397406
398407 @staticmethod
408+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._pack_latents
399409 def _pack_latents (latents , batch_size , num_channels_latents , height , width ):
400410 latents = latents .view (batch_size , num_channels_latents , height // 2 , 2 , width // 2 , 2 )
401411 latents = latents .permute (0 , 2 , 4 , 1 , 3 , 5 )
@@ -404,6 +414,7 @@ def _pack_latents(latents, batch_size, num_channels_latents, height, width):
404414 return latents
405415
406416 @staticmethod
417+ # Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage._unpack_latents
407418 def _unpack_latents (latents , height , width , vae_scale_factor ):
408419 batch_size , num_patches , channels = latents .shape
409420
0 commit comments