Skip to content

Commit 0c98aad

Browse files
committed
make style
1 parent 048a5f0 commit 0c98aad

File tree

5 files changed

+38
-16
lines changed

5 files changed

+38
-16
lines changed

src/diffusers/models/transformers/cogvideox_transformer_3d.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,9 @@ def __init__(
281281
self.ofs_embedding = None
282282

283283
if ofs_embed_dim:
284-
self.ofs_embedding = TimestepEmbedding(ofs_embed_dim, ofs_embed_dim, timestep_activation_fn) # same as time embeddings, for ofs
284+
self.ofs_embedding = TimestepEmbedding(
285+
ofs_embed_dim, ofs_embed_dim, timestep_activation_fn
286+
) # same as time embeddings, for ofs
285287

286288
# 3. Define spatio-temporal transformers blocks
287289
self.transformer_blocks = nn.ModuleList(
@@ -516,7 +518,7 @@ def custom_forward(*inputs):
516518
# 5. Unpatchify
517519
p = self.config.patch_size
518520
p_t = self.config.patch_size_t
519-
521+
520522
if p_t is None:
521523
output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
522524
output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)

src/diffusers/pipelines/cogvideo/pipeline_cogvideox.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -383,8 +383,13 @@ def check_inputs(
383383
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
384384

385385
latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
386-
if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
387-
raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
386+
if (
387+
self.transformer.config.patch_size_t is not None
388+
and latent_frames % self.transformer.config.patch_size_t != 0
389+
):
390+
raise ValueError(
391+
f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
392+
)
388393

389394
if callback_on_step_end_tensor_inputs is not None and not all(
390395
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs

src/diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -422,10 +422,15 @@ def check_inputs(
422422
):
423423
if height % 8 != 0 or width % 8 != 0:
424424
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
425-
425+
426426
latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
427-
if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
428-
raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
427+
if (
428+
self.transformer.config.patch_size_t is not None
429+
and latent_frames % self.transformer.config.patch_size_t != 0
430+
):
431+
raise ValueError(
432+
f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
433+
)
429434

430435
if callback_on_step_end_tensor_inputs is not None and not all(
431436
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
@@ -643,14 +648,14 @@ def __call__(
643648

644649
if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
645650
callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
646-
651+
647652
if control_video is not None and isinstance(control_video[0], Image.Image):
648653
control_video = [control_video]
649654

650655
height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
651656
width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
652657
num_frames = len(control_video[0]) if control_video is not None else control_video_latents.size(2)
653-
658+
654659
num_videos_per_prompt = 1
655660

656661
# 1. Check inputs. Raise error if not correct

src/diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,13 @@ def check_inputs(
461461
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
462462

463463
latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
464-
if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
465-
raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
464+
if (
465+
self.transformer.config.patch_size_t is not None
466+
and latent_frames % self.transformer.config.patch_size_t != 0
467+
):
468+
raise ValueError(
469+
f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
470+
)
466471

467472
if callback_on_step_end_tensor_inputs is not None and not all(
468473
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs
@@ -682,7 +687,7 @@ def __call__(
682687
height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
683688
width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
684689
num_frames = num_frames or self.transformer.config.sample_frames
685-
690+
686691
num_videos_per_prompt = 1
687692

688693
# 1. Check inputs. Raise error if not correct

src/diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -449,10 +449,15 @@ def check_inputs(
449449
):
450450
if height % 8 != 0 or width % 8 != 0:
451451
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
452-
452+
453453
latent_frames = (num_frames - 1) // self.vae_scale_factor_temporal + 1
454-
if self.transformer.config.patch_size_t is not None and latent_frames % self.transformer.config.patch_size_t != 0:
455-
raise ValueError(f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}.")
454+
if (
455+
self.transformer.config.patch_size_t is not None
456+
and latent_frames % self.transformer.config.patch_size_t != 0
457+
):
458+
raise ValueError(
459+
f"Number of latent frames must be divisible by `{self.transformer.config.patch_size_t}` but got {latent_frames=}."
460+
)
456461

457462
if strength < 0 or strength > 1:
458463
raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")
@@ -675,7 +680,7 @@ def __call__(
675680
height = height or self.transformer.config.sample_height * self.vae_scale_factor_spatial
676681
width = width or self.transformer.config.sample_width * self.vae_scale_factor_spatial
677682
num_frames = len(video) if latents is None else latents.size(1)
678-
683+
679684
num_videos_per_prompt = 1
680685

681686
# 1. Check inputs. Raise error if not correct

0 commit comments

Comments
 (0)