huggingface · yiyixuxu · Jan 20, 2025 · Jan 20, 2025
diff --git a/docs/source/en/api/pipelines/mochi.md b/docs/source/en/api/pipelines/mochi.md
@@ -115,7 +115,7 @@ export_to_video(frames, "mochi.mp4", fps=30)
 
 ## Reproducing the results from the Genmo Mochi repo
 
-The [Genmo Mochi implementation](https://github.com/genmoai/mochi/tree/main) uses different precision values for each stage in the inference process. The text encoder and VAE use `torch.float32`, while the DiT uses `torch.bfloat16` with the [attention kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html#torch.nn.attention.sdpa_kernel) set to `EFFICIENT_ATTENTION`. Diffusers pipelines currently do not support setting different `dtypes` for different stages of the pipeline. In order to run inference in the same way as the the original implementation, please refer to the following example.
+The [Genmo Mochi implementation](https://github.com/genmoai/mochi/tree/main) uses different precision values for each stage in the inference process. The text encoder and VAE use `torch.float32`, while the DiT uses `torch.bfloat16` with the [attention kernel](https://pytorch.org/docs/stable/generated/torch.nn.attention.sdpa_kernel.html#torch.nn.attention.sdpa_kernel) set to `EFFICIENT_ATTENTION`. Diffusers pipelines currently do not support setting different `dtypes` for different stages of the pipeline. In order to run inference in the same way as the original implementation, please refer to the following example.
 
 <Tip>
 The original Mochi implementation zeros out empty prompts. However, enabling this option and placing the entire pipeline under autocast can lead to numerical overflows with the T5 text encoder.

diff --git a/scripts/convert_consistency_decoder.py b/scripts/convert_consistency_decoder.py
@@ -73,7 +73,7 @@ def _download(url: str, root: str):
                 loop.update(len(buffer))
 
     if insecure_hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256:
-        raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match")
+        raise RuntimeError("Model has been downloaded but the SHA256 checksum does not match")
 
     return download_target
 

diff --git a/src/diffusers/optimization.py b/src/diffusers/optimization.py
@@ -258,7 +258,7 @@ def get_polynomial_decay_schedule_with_warmup(
 
     lr_init = optimizer.defaults["lr"]
     if not (lr_init > lr_end):
-        raise ValueError(f"lr_end ({lr_end}) must be be smaller than initial lr ({lr_init})")
+        raise ValueError(f"lr_end ({lr_end}) must be smaller than initial lr ({lr_init})")
 
     def lr_lambda(current_step: int):
         if current_step < num_warmup_steps:

diff --git a/src/diffusers/pipelines/pag/pag_utils.py b/src/diffusers/pipelines/pag/pag_utils.py
@@ -158,7 +158,7 @@ def set_pag_applied_layers(
         ),
     ):
         r"""
-        Set the the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
+        Set the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
 
         Args:
             pag_applied_layers (`str` or `List[str]`):

diff --git a/src/diffusers/video_processor.py b/src/diffusers/video_processor.py
@@ -67,7 +67,7 @@ def preprocess_video(self, video, height: Optional[int] = None, width: Optional[
 
         # ensure the input is a list of videos:
         # - if it is a batch of videos (5d torch.Tensor or np.ndarray), it is converted to a list of videos (a list of 4d torch.Tensor or np.ndarray)
-        # - if it is is a single video, it is convereted to a list of one video.
+        # - if it is a single video, it is convereted to a list of one video.
         if isinstance(video, (np.ndarray, torch.Tensor)) and video.ndim == 5:
             video = list(video)
         elif isinstance(video, list) and is_valid_image(video[0]) or is_valid_image_imagelist(video):
-Original file line number
+Diff line change
@@ Expand Up / @@ -158,7 +158,7 @@ def set_pag_applied_layers( @@
             ),
         ):
             r"""
-            Set the the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
+            Set the self-attention layers to apply PAG. Raise ValueError if the input is invalid.
             Args:
                 pag_applied_layers (`str` or `List[str]`):
@@ Expand Down @@