make style

a-r-r-o-w · a-r-r-o-w · commit 0c98aad98d14 · 2024-11-10T21:50:29.000+01:00
diff --git a/src/diffusers/models/transformers/cogvideox_transformer_3d.py b/src/diffusers/models/transformers/cogvideox_transformer_3d.py
@@ -281,7 +281,9 @@ def __init__(
         self.ofs_embedding = None
 
         if ofs_embed_dim:
-            self.ofs_embedding = TimestepEmbedding(ofs_embed_dim, ofs_embed_dim, timestep_activation_fn) # same as time embeddings, for ofs
+            self.ofs_embedding = TimestepEmbedding(
+                ofs_embed_dim, ofs_embed_dim, timestep_activation_fn
+            )  # same as time embeddings, for ofs
 
         # 3. Define spatio-temporal transformers blocks
         self.transformer_blocks = nn.ModuleList(
@@ -516,7 +518,7 @@ def custom_forward(*inputs):
         # 5. Unpatchify
         p = self.config.patch_size
         p_t = self.config.patch_size_t
-        
+
         if p_t is None:
             output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
             output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py
@@ -383,8 +383,13 @@ def check_inputs(
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
 
         latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
-        if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
-            raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
+        if (
+            self.transformer.config.patch_size_t is not None
+            and latent_frames % self.transformer.config.patch_size_t != 0
+        ):
+            raise ValueError(
+                f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
+            )
 
         if callback_on_step_end_tensor_inputs is not None and not all(
             k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py
@@ -422,10 +422,15 @@ def check_inputs(
     ):
         if height % 8 != 0 or width % 8 != 0:
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
-        
+
         latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
-        if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
-            raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
+        if (
+            self.transformer.config.patch_size_t is not None
+            and latent_frames % self.transformer.config.patch_size_t != 0
+        ):
+            raise ValueError(
+                f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
+            )
 
         if callback_on_step_end_tensor_inputs is not None and not all(
             k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
@@ -643,14 +648,14 @@ def __call__(
 
         if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
             callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
-        
+
         if control_video is not None and isinstance(control_video[0], Image.Image):
             control_video = [control_video]
 
         height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
         width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
         num_frames = len(control_video[0]) if control_video is not None else control_video_latents.size(2)
-        
+
         num_videos_per_prompt = 1
 
         # 1. Check inputs. Raise error if not correct
diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py
@@ -461,8 +461,13 @@ def check_inputs(
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
 
         latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
-        if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
-            raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
+        if (
+            self.transformer.config.patch_size_t is not None
+            and latent_frames % self.transformer.config.patch_size_t != 0
+        ):
+            raise ValueError(
+                f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
+            )
 
         if callback_on_step_end_tensor_inputs is not None and not all(
             k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
@@ -682,7 +687,7 @@ def __call__(
         height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
         width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
         num_frames = num_frames or self.transformer.config.sample_frames
-        
+
         num_videos_per_prompt = 1
 
         # 1. Check inputs. Raise error if not correct
diff --git a/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py b/src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py
@@ -449,10 +449,15 @@ def check_inputs(
     ):
         if height % 8 != 0 or width % 8 != 0:
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
-        
+
         latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
-        if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
-            raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
+        if (
+            self.transformer.config.patch_size_t is not None
+            and latent_frames % self.transformer.config.patch_size_t != 0
+        ):
+            raise ValueError(
+                f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
+            )
 
         if strength < 0 or strength > 1:
             raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")
@@ -675,7 +680,7 @@ def __call__(
         height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
         width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
         num_frames = len(video) if latents is None else latents.size(1)
-        
+
         num_videos_per_prompt = 1
 
         # 1. Check inputs. Raise error if not correct