huggingface
diff --git a/‎.github/workflows/pr_style_bot.yml‎
Lines changed: 0 additions & 34 deletions b/‎.github/workflows/pr_style_bot.yml‎
Lines changed: 0 additions & 34 deletions
diff --git a/‎.github/workflows/pr_tests_gpu.yml‎
Lines changed: 47 additions & 1 deletion b/‎.github/workflows/pr_tests_gpu.yml‎
Lines changed: 47 additions & 1 deletion
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/en/api/cache.md‎
Lines changed: 33 additions & 0 deletions b/‎docs/source/en/api/cache.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/deepfloyd_if.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/en/api/pipelines/deepfloyd_if.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/en/api/pipelines/flux.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/hunyuan_video.md‎
Lines changed: 2 additions & 1 deletion b/‎docs/source/en/api/pipelines/hunyuan_video.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docs/source/en/api/pipelines/kolors.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/en/api/pipelines/kolors.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/ltx_video.md‎
Lines changed: 8 additions & 0 deletions b/‎docs/source/en/api/pipelines/ltx_video.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/sana.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/en/api/pipelines/sana.md‎
Lines changed: 1 addition & 0 deletions
@@ -13,39 +13,5 @@ jobs:
     uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
     with:
       python_quality_dependencies: "[quality]"
-      pre_commit_script_name: "Download and Compare files from the main branch"
-      pre_commit_script: |
-        echo "Downloading the files from the main branch"
-
-        curl -o main_Makefile https://raw.githubusercontent.com/huggingface/diffusers/main/Makefile
-        curl -o main_setup.py https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/setup.py
-        curl -o main_check_doc_toc.py https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/utils/check_doc_toc.py
-
-        echo "Compare the files and raise error if needed"
-
-        diff_failed=0
-        if ! diff -q main_Makefile Makefile; then
-          echo "Error: The Makefile has changed. Please ensure it matches the main branch."
-          diff_failed=1
-        fi
-
-        if ! diff -q main_setup.py setup.py; then
-          echo "Error: The setup.py has changed. Please ensure it matches the main branch."
-          diff_failed=1
-        fi
-
-        if ! diff -q main_check_doc_toc.py utils/check_doc_toc.py; then
-          echo "Error: The utils/check_doc_toc.py has changed. Please ensure it matches the main branch."
-          diff_failed=1
-        fi
-
-        if [ $diff_failed -eq 1 ]; then
-          echo "❌ Error happened as we detected changes in the files that should not be changed ❌"
-          exit 1
-        fi
-
-        echo "No changes in the files. Proceeding..."
-        rm -rf main_Makefile main_setup.py main_check_doc_toc.py
-      style_command: "make style && make quality"
     secrets:
       bot_token: ${{ secrets.GITHUB_TOKEN }}
@@ -28,7 +28,51 @@ env:
   PIPELINE_USAGE_CUTOFF: 1000000000 # set high cutoff so that only always-test pipelines run
 
 jobs:
+  check_code_quality:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.8"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[quality]
+      - name: Check quality
+        run: make quality
+      - name: Check if failure
+        if: ${{ failure() }}
+        run: |
+          echo "Quality check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make style && make quality'" >> $GITHUB_STEP_SUMMARY
+
+  check_repository_consistency:
+    needs: check_code_quality
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.8"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[quality]
+      - name: Check repo consistency
+        run: |
+          python utils/check_copies.py
+          python utils/check_dummies.py
+          python utils/check_support_list.py
+          make deps_table_check_updated
+      - name: Check if failure
+        if: ${{ failure() }}
+        run: |
+          echo "Repo consistency check failed. Please ensure the right dependency versions are installed with 'pip install -e .[quality]' and run 'make fix-copies'" >> $GITHUB_STEP_SUMMARY
+  
   setup_torch_cuda_pipeline_matrix:
+    needs: [check_code_quality, check_repository_consistency]
     name: Setup Torch Pipelines CUDA Slow Tests Matrix
     runs-on:
       group: aws-general-8-plus
@@ -133,6 +177,7 @@ jobs:
 
   torch_cuda_tests:
     name: Torch CUDA Tests
+    needs: [check_code_quality, check_repository_consistency]
     runs-on:
       group: aws-g4dn-2xlarge
     container:
@@ -201,7 +246,7 @@ jobs:
 
   run_examples_tests:
     name: Examples PyTorch CUDA tests on Ubuntu
-        pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
+    needs: [check_code_quality, check_repository_consistency]
     runs-on:
       group: aws-g4dn-2xlarge
 
@@ -220,6 +265,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+        pip uninstall transformers -y && python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
         python -m uv pip install -e [quality,test,training]
 
     - name: Environment
 
@@ -496,6 +496,8 @@
       title: PixArt-Σ
     - local: api/pipelines/sana
       title: Sana
+    - local: api/pipelines/sana_sprint
+      title: Sana Sprint
     - local: api/pipelines/self_attention_guidance
       title: Self-Attention Guidance
     - local: api/pipelines/semantic_stable_diffusion
 
@@ -38,6 +38,33 @@ config = PyramidAttentionBroadcastConfig(
 pipe.transformer.enable_cache(config)
 ```
 
+## Faster Cache
+
+[FasterCache](https://huggingface.co/papers/2410.19355) from Zhengyao Lv, Chenyang Si, Junhao Song, Zhenyu Yang, Yu Qiao, Ziwei Liu, Kwan-Yee K. Wong.
+
+FasterCache is a method that speeds up inference in diffusion transformers by:
+- Reusing attention states between successive inference steps, due to high similarity between them
+- Skipping unconditional branch prediction used in classifier-free guidance by revealing redundancies between unconditional and conditional branch outputs for the same timestep, and therefore approximating the unconditional branch output using the conditional branch output
+
+```python
+import torch
+from diffusers import CogVideoXPipeline, FasterCacheConfig
+
+pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+
+config = FasterCacheConfig(
+    spatial_attention_block_skip_range=2,
+    spatial_attention_timestep_skip_range=(-1, 681),
+    current_timestep_callback=lambda: pipe.current_timestep,
+    attention_weight_callback=lambda _: 0.3,
+    unconditional_batch_skip_range=5,
+    unconditional_batch_timestep_skip_range=(-1, 781),
+    tensor_format="BFCHW",
+)
+pipe.transformer.enable_cache(config)
+```
+
 ### CacheMixin
 
 [[autodoc]] CacheMixin
@@ -47,3 +74,9 @@ pipe.transformer.enable_cache(config)
 [[autodoc]] PyramidAttentionBroadcastConfig
 
 [[autodoc]] apply_pyramid_attention_broadcast
+
+### FasterCacheConfig
+
+[[autodoc]] FasterCacheConfig
+
+[[autodoc]] apply_faster_cache
@@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License.
 
 <div class="flex flex-wrap space-x-1">
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
 </div>
 
 ## Overview
 
@@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License.
 
 <div class="flex flex-wrap space-x-1">
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
 </div>
 
 Flux is a series of text-to-image generation models based on diffusion transformers. To know more about Flux, check out the original [blog post](https://blackforestlabs.ai/announcing-black-forest-labs/) by the creators of Flux, Black Forest Labs.
 
@@ -50,7 +50,8 @@ The following models are available for the image-to-video pipeline:
 | Model name | Description |
 |:---|:---|
 | [`Skywork/SkyReels-V1-Hunyuan-I2V`](https://huggingface.co/Skywork/SkyReels-V1-Hunyuan-I2V) | Skywork's custom finetune of HunyuanVideo (de-distilled). Performs best with `97x544x960` resolution. Performs best at `97x544x960` resolution, `guidance_scale=1.0`, `true_cfg_scale=6.0` and a negative prompt. |
-| [`hunyuanvideo-community/HunyuanVideo-I2V`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20) |
+| [`hunyuanvideo-community/HunyuanVideo-I2V-33ch`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo 33-channel I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20). |
+| [`hunyuanvideo-community/HunyuanVideo-I2V`](https://huggingface.co/hunyuanvideo-community/HunyuanVideo-I2V) | Tecent's official HunyuanVideo 16-channel I2V model. Performs best at resolutions of 480, 720, 960, 1280. A higher `shift` value when initializing the scheduler is recommended (good values are between 7 and 20) |
 
 ## Quantization
 
 
@@ -14,6 +14,7 @@ specific language governing permissions and limitations under the License.
 
 <div class="flex flex-wrap space-x-1">
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
 </div>
 
 ![](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/kolors/kolors_header_collage.png)
 
@@ -16,6 +16,7 @@
 
 <div class="flex flex-wrap space-x-1">
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
 </div>
 
 [LTX Video](https://huggingface.co/Lightricks/LTX-Video) is the first DiT-based video generation model capable of generating high-quality videos in real-time. It produces 24 FPS videos at a 768x512 resolution faster than they can be watched. Trained on a large-scale dataset of diverse videos, the model generates high-resolution videos with realistic and varied content. We provide a model for both text-to-video as well as image + text-to-video usecases.
@@ -32,6 +33,7 @@ Available models:
 |:-------------:|:-----------------:|
 | [`LTX Video 0.9.0`](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.safetensors) | `torch.bfloat16` |
 | [`LTX Video 0.9.1`](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.1.safetensors) | `torch.bfloat16` |
+| [`LTX Video 0.9.5`](https://huggingface.co/Lightricks/LTX-Video/blob/main/ltx-video-2b-v0.9.5.safetensors) | `torch.bfloat16` |
 
 Note: The recommended dtype is for the transformer component. The VAE and text encoders can be either `torch.float32`, `torch.bfloat16` or `torch.float16` but the recommended dtype is `torch.bfloat16` as used in the original repository.
 
@@ -196,6 +198,12 @@ export_to_video(video, "ship.mp4", fps=24)
   - all
   - __call__
 
+## LTXConditionPipeline
+
+[[autodoc]] LTXConditionPipeline
+  - all
+  - __call__
+
 ## LTXPipelineOutput
 
 [[autodoc]] pipelines.ltx.pipeline_output.LTXPipelineOutput
@@ -16,6 +16,7 @@
 
 <div class="flex flex-wrap space-x-1">
   <img alt="LoRA" src="https://img.shields.io/badge/LoRA-d8b4fe?style=flat"/>
+  <img alt="MPS" src="https://img.shields.io/badge/MPS-000000?style=flat&logo=apple&logoColor=white%22">
 </div>
 
 [SANA: Efficient High-Resolution Image Synthesis with Linear Diffusion Transformers](https://huggingface.co/papers/2410.10629) from NVIDIA and MIT HAN Lab, by Enze Xie, Junsong Chen, Junyu Chen, Han Cai, Haotian Tang, Yujun Lin, Zhekai Zhang, Muyang Li, Ligeng Zhu, Yao Lu, Song Han.