-autocast

hlky · hlky · commit eb7ae846e211 · 2025-12-13T19:28:02.000Z
diff --git a/src/diffusers/models/transformers/transformer_kandinsky.py b/src/diffusers/models/transformers/transformer_kandinsky.py
@@ -158,19 +158,21 @@ class Kandinsky5TimeEmbeddings(nn.Module):
     def __init__(self, model_dim, time_dim, max_period=10000.0):
         super().__init__()
         assert model_dim % 2 == 0
+        print(f"{model_dim=}, {time_dim=}")
         self.model_dim = model_dim
         self.max_period = max_period
         self.freqs = get_freqs(self.model_dim // 2, self.max_period)
         self.in_layer = nn.Linear(model_dim, time_dim, bias=True)
         self.activation = nn.SiLU()
         self.out_layer = nn.Linear(time_dim, time_dim, bias=True)
 
-    @torch.autocast(device_type="cuda", dtype=torch.float32)
     def forward(self, time):
-        args = torch.outer(time, self.freqs.to(device=time.device))
+        original_dtype = time.dtype
+        print(f"{original_dtype=}")
+        args = torch.outer(time.to(torch.float32), self.freqs.to(device=time.device))
         time_embed = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
-        time_embed = self.out_layer(self.activation(self.in_layer(time_embed)))
-        return time_embed
+        time_embed = F.linear(self.activation(F.linear(time_embed, self.in_layer.weight.to(torch.float32), self.in_layer.bias.to(torch.float32))), self.out_layer.weight.to(torch.float32), self.out_layer.bias.to(torch.float32))
+        return time_embed.to(original_dtype)
 
 
 class Kandinsky5TextEmbeddings(nn.Module):
@@ -271,7 +273,7 @@ def __init__(self, time_dim, model_dim, num_params):
 
     @torch.autocast(device_type="cuda", dtype=torch.float32)
     def forward(self, x):
-        return self.out_layer(self.activation(x))
+        return F.linear(self.activation(x.to(torch.float32)), self.out_layer.weight.to(torch.float32), self.out_layer.bias.to(torch.float32)).type_as(x)
 
 
 class Kandinsky5AttnProcessor: