make style

a-r-r-o-w · a-r-r-o-w · commit c916ae59ea4c · 2024-10-25T09:11:14.000+02:00
diff --git a/scripts/convert_mochi_to_diffusers.py b/scripts/convert_mochi_to_diffusers.py
@@ -130,7 +130,9 @@ def convert_mochi_transformer_checkpoint_to_diffusers(ckpt_path):
             )
 
     # Output layers
-    new_state_dict["norm_out.linear.weight"] = swap_scale_shift(original_state_dict.pop("final_layer.mod.weight"), dim=0)
+    new_state_dict["norm_out.linear.weight"] = swap_scale_shift(
+        original_state_dict.pop("final_layer.mod.weight"), dim=0
+    )
     new_state_dict["norm_out.linear.bias"] = swap_scale_shift(original_state_dict.pop("final_layer.mod.bias"), dim=0)
     new_state_dict["proj_out.weight"] = original_state_dict.pop("final_layer.linear.weight")
     new_state_dict["proj_out.bias"] = original_state_dict.pop("final_layer.linear.bias")
diff --git a/src/diffusers/models/transformers/transformer_mochi.py b/src/diffusers/models/transformers/transformer_mochi.py
@@ -88,11 +88,17 @@ def __init__(
         self.norm3 = RMSNorm(dim, eps=eps, elementwise_affine=False)
         self.norm3_context = RMSNorm(pooled_projection_dim, eps=eps, elementwise_affine=False)
 
-        self.ff = FeedForward(dim, inner_dim=self.ff_inner_dim, activation_fn=activation_fn, bias=False, flip_gate=True)
+        self.ff = FeedForward(
+            dim, inner_dim=self.ff_inner_dim, activation_fn=activation_fn, bias=False, flip_gate=True
+        )
         self.ff_context = None
         if not context_pre_only:
             self.ff_context = FeedForward(
-                pooled_projection_dim, inner_dim=self.ff_context_inner_dim, activation_fn=activation_fn, bias=False, flip_gate=True
+                pooled_projection_dim,
+                inner_dim=self.ff_context_inner_dim,
+                activation_fn=activation_fn,
+                bias=False,
+                flip_gate=True,
             )
 
         self.norm4 = RMSNorm(dim, eps=eps, elementwise_affine=False)
@@ -131,7 +137,9 @@ def forward(
             ) * torch.tanh(enc_gate_msa).unsqueeze(1)
             norm_encoder_hidden_states = self.norm3_context(encoder_hidden_states) * (1 + enc_scale_mlp.unsqueeze(1))
             context_ff_output = self.ff_context(norm_encoder_hidden_states)
-            encoder_hidden_states = encoder_hidden_states + self.norm4_context(context_ff_output) * torch.tanh(enc_gate_mlp).unsqueeze(1)
+            encoder_hidden_states = encoder_hidden_states + self.norm4_context(context_ff_output) * torch.tanh(
+                enc_gate_mlp
+            ).unsqueeze(1)
 
         return hidden_states, encoder_hidden_states