From 6ee6b53a9a2ad3fdc4c465b814e1beb32a36cd3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tolga=20Cang=C3=B6z?= Date: Fri, 30 May 2025 12:26:59 +0300 Subject: [PATCH 1/2] fix: vae sampling mode --- src/diffusers/pipelines/wan/pipeline_wan_video2video.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py index 1844f1b49ba1..e4f967ad960c 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py +++ b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py @@ -419,12 +419,7 @@ def prepare_latents( ) if latents is None: - if isinstance(generator, list): - init_latents = [ - retrieve_latents(self.vae.encode(video[i].unsqueeze(0)), generator[i]) for i in range(batch_size) - ] - else: - init_latents = [retrieve_latents(self.vae.encode(vid.unsqueeze(0)), generator) for vid in video] + init_latents = [retrieve_latents(self.vae.encode(vid.unsqueeze(0)), sample_mode="argmax") for vid in video] init_latents = torch.cat(init_latents, dim=0).to(dtype) From 40243c8cc3e7defe898a986612aa39ad9fcf02d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tolga=20Cang=C3=B6z?= Date: Fri, 30 May 2025 12:27:13 +0300 Subject: [PATCH 2/2] fix a typo --- src/diffusers/pipelines/wan/pipeline_wan_video2video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py index e4f967ad960c..a4a10d4655a9 100644 --- a/src/diffusers/pipelines/wan/pipeline_wan_video2video.py +++ b/src/diffusers/pipelines/wan/pipeline_wan_video2video.py @@ -436,7 +436,7 @@ def prepare_latents( if hasattr(self.scheduler, "add_noise"): latents = self.scheduler.add_noise(init_latents, noise, timestep) else: - latents = self.scheduelr.scale_noise(init_latents, timestep, noise) + latents = self.scheduler.scale_noise(init_latents, timestep, noise) else: latents = latents.to(device)