From c813353375d626d47b1787bbfb4cd4a545e3b93b Mon Sep 17 00:00:00 2001 From: ppadjin Date: Mon, 20 Oct 2025 09:41:17 +0000 Subject: [PATCH 1/4] Changing the way we infer dtype to avoid force evaluation of lazy tensors --- src/diffusers/models/autoencoders/vae.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/autoencoders/vae.py b/src/diffusers/models/autoencoders/vae.py index 1d74d4f472d7..d93cfd619373 100644 --- a/src/diffusers/models/autoencoders/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -286,7 +286,7 @@ def forward( sample = self.conv_in(sample) - upscale_dtype = next(iter(self.up_blocks.parameters())).dtype + upscale_dtype = self.conv_out.weight.dtype if torch.is_grad_enabled() and self.gradient_checkpointing: # middle sample = self._gradient_checkpointing_func(self.mid_block, sample, latent_embeds) From e84a3411961dc86be1660fc22b3c16840813272d Mon Sep 17 00:00:00 2001 From: ppadjin Date: Wed, 22 Oct 2025 15:06:44 +0000 Subject: [PATCH 2/4] changing way to infer dtype to ensure type consistency --- src/diffusers/models/autoencoders/vae.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/models/autoencoders/vae.py b/src/diffusers/models/autoencoders/vae.py index d93cfd619373..7f3b45caddcc 100644 --- a/src/diffusers/models/autoencoders/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -286,7 +286,7 @@ def forward( sample = self.conv_in(sample) - upscale_dtype = self.conv_out.weight.dtype + upscale_dtype = self.up_blocks[0].resnets[0].norm1.weight.dtype if torch.is_grad_enabled() and self.gradient_checkpointing: # middle sample = self._gradient_checkpointing_func(self.mid_block, sample, latent_embeds) From 3a860ae5b2f31bf31fea882f53a7ee4767423e8a Mon Sep 17 00:00:00 2001 From: ppadjin Date: Thu, 23 Oct 2025 11:36:48 +0000 Subject: [PATCH 3/4] more robust infering of dtype --- src/diffusers/models/autoencoders/vae.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/diffusers/models/autoencoders/vae.py b/src/diffusers/models/autoencoders/vae.py index 7f3b45caddcc..63a0784e741d 100644 --- a/src/diffusers/models/autoencoders/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -286,7 +286,12 @@ def forward( sample = self.conv_in(sample) - upscale_dtype = self.up_blocks[0].resnets[0].norm1.weight.dtype + upscale_norm = self.up_blocks[0].resnets[0].norm1 + if isinstance(upscale_norm, SpatialNorm): + upscale_dtype = upscale_norm.norm_layer.weight.dtype + else: + upscale_dtype = upscale_norm.weight.dtype + if torch.is_grad_enabled() and self.gradient_checkpointing: # middle sample = self._gradient_checkpointing_func(self.mid_block, sample, latent_embeds) From 0c049738c9ff9c260a089b02145998df4cf9a3d2 Mon Sep 17 00:00:00 2001 From: ppadjin Date: Thu, 23 Oct 2025 14:07:37 +0000 Subject: [PATCH 4/4] removing the upscale dtype entirely --- src/diffusers/models/autoencoders/vae.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/diffusers/models/autoencoders/vae.py b/src/diffusers/models/autoencoders/vae.py index 63a0784e741d..b4c6373cf21b 100644 --- a/src/diffusers/models/autoencoders/vae.py +++ b/src/diffusers/models/autoencoders/vae.py @@ -286,16 +286,9 @@ def forward( sample = self.conv_in(sample) - upscale_norm = self.up_blocks[0].resnets[0].norm1 - if isinstance(upscale_norm, SpatialNorm): - upscale_dtype = upscale_norm.norm_layer.weight.dtype - else: - upscale_dtype = upscale_norm.weight.dtype - if torch.is_grad_enabled() and self.gradient_checkpointing: # middle sample = self._gradient_checkpointing_func(self.mid_block, sample, latent_embeds) - sample = sample.to(upscale_dtype) # up for up_block in self.up_blocks: @@ -303,7 +296,6 @@ def forward( else: # middle sample = self.mid_block(sample, latent_embeds) - sample = sample.to(upscale_dtype) # up for up_block in self.up_blocks: