huggingface
diff --git a/‎.github/workflows/pr_style_bot.yml‎
Lines changed: 46 additions & 5 deletions b/‎.github/workflows/pr_style_bot.yml‎
Lines changed: 46 additions & 5 deletions
diff --git a/‎.github/workflows/pr_tests_gpu.yml‎
Lines changed: 14 additions & 5 deletions b/‎.github/workflows/pr_tests_gpu.yml‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 20 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/autoencoder_kl_wan.md‎
Lines changed: 32 additions & 0 deletions b/‎docs/source/en/api/models/autoencoder_kl_wan.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/autoencoderkl_magvit.md‎
Lines changed: 37 additions & 0 deletions b/‎docs/source/en/api/models/autoencoderkl_magvit.md‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/easyanimate_transformer3d.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/source/en/api/models/easyanimate_transformer3d.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/source/en/api/models/wan_transformer_3d.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/source/en/api/models/wan_transformer_3d.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/source/en/api/pipelines/easyanimate.md‎
Lines changed: 88 additions & 0 deletions b/‎docs/source/en/api/pipelines/easyanimate.md‎
Lines changed: 88 additions & 0 deletions
@@ -9,12 +9,33 @@ permissions:
   pull-requests: write
 
 jobs:
-  run-style-bot:
+  check-permissions:
     if: >
       contains(github.event.comment.body, '@bot /style') &&
       github.event.issue.pull_request != null
     runs-on: ubuntu-latest
+    outputs:
+      is_authorized: ${{ steps.check_user_permission.outputs.has_permission }}
+    steps:
+      - name: Check user permission
+        id: check_user_permission
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const comment_user = context.payload.comment.user.login;
+            const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              username: comment_user
+            });
+            const authorized = permission.permission === 'admin';
+            console.log(`User ${comment_user} has permission level: ${permission.permission}, authorized: ${authorized} (only admins allowed)`);
+            core.setOutput('has_permission', authorized);
 
+  run-style-bot:
+    needs: check-permissions
+    if: needs.check-permissions.outputs.is_authorized == 'true'
+    runs-on: ubuntu-latest
     steps:
       - name: Extract PR details
         id: pr_info
@@ -64,18 +85,38 @@ jobs:
         run: |
           pip install .[quality]
 
-      - name: Download Makefile from main branch
+      - name: Download necessary files from main branch of Diffusers
         run: |
           curl -o main_Makefile https://raw.githubusercontent.com/huggingface/diffusers/main/Makefile
+          curl -o main_setup.py https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/setup.py
+          curl -o main_check_doc_toc.py https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/utils/check_doc_toc.py
         
-      - name: Compare Makefiles
+      - name: Compare the files and raise error if needed
         run: |
+          diff_failed=0
+
           if ! diff -q main_Makefile Makefile; then
             echo "Error: The Makefile has changed. Please ensure it matches the main branch."
+            diff_failed=1
+          fi
+
+          if ! diff -q main_setup.py setup.py; then
+            echo "Error: The setup.py has changed. Please ensure it matches the main branch."
+            diff_failed=1
+          fi
+
+          if ! diff -q main_check_doc_toc.py utils/check_doc_toc.py; then
+            echo "Error: The utils/check_doc_toc.py has changed. Please ensure it matches the main branch."
+            diff_failed=1
+          fi
+
+          if [ $diff_failed -eq 1 ]; then
+            echo "❌ Error happened as we detected changes in the files that should not be changed ❌"
             exit 1
           fi
-          echo "No changes in Makefile. Proceeding..."
-          rm -rf main_Makefile
+
+          echo "No changes in the files. Proceeding..."
+          rm -rf main_Makefile main_setup.py main_check_doc_toc.py
 
       - name: Run make style and make quality
         run: |
 
@@ -11,6 +11,8 @@ on:
       - "src/diffusers/loaders/lora_base.py"
       - "src/diffusers/loaders/lora_pipeline.py"
       - "src/diffusers/loaders/peft.py"
+      - "tests/pipelines/test_pipelines_common.py"
+      - "tests/models/test_modeling_common.py"
   workflow_dispatch:
 
 concurrency:
@@ -104,11 +106,18 @@ jobs:
           # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
           CUBLAS_WORKSPACE_CONFIG: :16:8
         run: |
-          pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
-          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
-            -s -v -k "not Flax and not Onnx and $pattern" \
-            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
-            tests/pipelines/${{ matrix.module }}
+          if [ "${{ matrix.module }}" = "ip_adapters" ]; then 
+              python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+              -s -v -k "not Flax and not Onnx" \
+              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+              tests/pipelines/${{ matrix.module }}
+          else 
+              pattern=$(cat ${{ steps.extract_tests.outputs.pattern_file }})
+              python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+              -s -v -k "not Flax and not Onnx and $pattern" \
+              --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
+              tests/pipelines/${{ matrix.module }}
+          fi 
 
       - name: Failure short reports
         if: ${{ failure() }}
 
@@ -76,6 +76,14 @@
   - local: advanced_inference/outpaint
     title: Outpainting
   title: Advanced inference
+- sections:
+  - local: hybrid_inference/overview
+    title: Overview
+  - local: hybrid_inference/vae_decode
+    title: VAE Decode
+  - local: hybrid_inference/api_reference
+    title: API Reference
+  title: Hybrid Inference
 - sections:
   - local: using-diffusers/cogvideox
     title: CogVideoX
@@ -282,6 +290,8 @@
         title: CogView4Transformer2DModel
       - local: api/models/dit_transformer2d
         title: DiTTransformer2DModel
+      - local: api/models/easyanimate_transformer3d
+        title: EasyAnimateTransformer3DModel
       - local: api/models/flux_transformer
         title: FluxTransformer2DModel
       - local: api/models/hunyuan_transformer2d
@@ -314,6 +324,8 @@
         title: Transformer2DModel
       - local: api/models/transformer_temporal
         title: TransformerTemporalModel
+      - local: api/models/wan_transformer_3d
+        title: WanTransformer3DModel
       title: Transformers
     - sections:
       - local: api/models/stable_cascade_unet
@@ -342,8 +354,12 @@
         title: AutoencoderKLHunyuanVideo
       - local: api/models/autoencoderkl_ltx_video
         title: AutoencoderKLLTXVideo
+      - local: api/models/autoencoderkl_magvit
+        title: AutoencoderKLMagvit
       - local: api/models/autoencoderkl_mochi
         title: AutoencoderKLMochi
+      - local: api/models/autoencoder_kl_wan
+        title: AutoencoderKLWan
       - local: api/models/asymmetricautoencoderkl
         title: AsymmetricAutoencoderKL
       - local: api/models/autoencoder_dc
@@ -418,6 +434,8 @@
       title: DiffEdit
     - local: api/pipelines/dit
       title: DiT
+    - local: api/pipelines/easyanimate
+      title: EasyAnimate
     - local: api/pipelines/flux
       title: Flux
     - local: api/pipelines/control_flux_inpaint
@@ -534,6 +552,8 @@
       title: UniDiffuser
     - local: api/pipelines/value_guided_sampling
       title: Value-guided sampling
+    - local: api/pipelines/wan
+      title: Wan
     - local: api/pipelines/wuerstchen
       title: Wuerstchen
     title: Pipelines
 
@@ -0,0 +1,32 @@
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# AutoencoderKLWan
+
+The 3D variational autoencoder (VAE) model with KL loss used in [Wan 2.1](https://github.com/Wan-Video/Wan2.1) by the Alibaba Wan Team.
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import AutoencoderKLWan
+
+vae = AutoencoderKLWan.from_pretrained("Wan-AI/Wan2.1-T2V-1.3B-Diffusers", subfolder="vae", torch_dtype=torch.float32)
+```
+
+## AutoencoderKLWan
+
+[[autodoc]] AutoencoderKLWan
+  - decode
+  - all
+
+## DecoderOutput
+
+[[autodoc]] models.autoencoders.vae.DecoderOutput
@@ -0,0 +1,37 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# AutoencoderKLMagvit
+
+The 3D variational autoencoder (VAE) model with KL loss used in [EasyAnimate](https://github.com/aigc-apps/EasyAnimate) was introduced by Alibaba PAI.
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import AutoencoderKLMagvit
+
+vae = AutoencoderKLMagvit.from_pretrained("alibaba-pai/EasyAnimateV5.1-12b-zh", subfolder="vae", torch_dtype=torch.float16).to("cuda")
+```
+
+## AutoencoderKLMagvit
+
+[[autodoc]] AutoencoderKLMagvit
+    - decode
+    - encode
+    - all
+
+## AutoencoderKLOutput
+
+[[autodoc]] models.autoencoders.autoencoder_kl.AutoencoderKLOutput
+
+## DecoderOutput
+
+[[autodoc]] models.autoencoders.vae.DecoderOutput
@@ -0,0 +1,30 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# EasyAnimateTransformer3DModel
+
+A Diffusion Transformer model for 3D data from [EasyAnimate](https://github.com/aigc-apps/EasyAnimate) was introduced by Alibaba PAI.
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import EasyAnimateTransformer3DModel
+
+transformer = EasyAnimateTransformer3DModel.from_pretrained("alibaba-pai/EasyAnimateV5.1-12b-zh", subfolder="transformer", torch_dtype=torch.float16).to("cuda")
+```
+
+## EasyAnimateTransformer3DModel
+
+[[autodoc]] EasyAnimateTransformer3DModel
+
+## Transformer2DModelOutput
+
+[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
@@ -0,0 +1,30 @@
+<!-- Copyright 2024 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. -->
+
+# WanTransformer3DModel
+
+A Diffusion Transformer model for 3D video-like data was introduced in [Wan 2.1](https://github.com/Wan-Video/Wan2.1) by the Alibaba Wan Team.
+
+The model can be loaded with the following code snippet.
+
+```python
+from diffusers import WanTransformer3DModel
+
+transformer = WanTransformer3DModel.from_pretrained("Wan-AI/Wan2.1-T2V-1.3B-Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16)
+```
+
+## WanTransformer3DModel
+
+[[autodoc]] WanTransformer3DModel
+
+## Transformer2DModelOutput
+
+[[autodoc]] models.modeling_outputs.Transformer2DModelOutput
@@ -0,0 +1,88 @@
+<!--Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+-->
+
+# EasyAnimate
+[EasyAnimate](https://github.com/aigc-apps/EasyAnimate) by Alibaba PAI.
+
+The description from it's GitHub page:
+*EasyAnimate is a pipeline based on the transformer architecture, designed for generating AI images and videos, and for training baseline models and Lora models for Diffusion Transformer. We support direct prediction from pre-trained EasyAnimate models, allowing for the generation of videos with various resolutions, approximately 6 seconds in length, at 8fps (EasyAnimateV5.1, 1 to 49 frames). Additionally, users can train their own baseline and Lora models for specific style transformations.*
+
+This pipeline was contributed by [bubbliiiing](https://github.com/bubbliiiing). The original codebase can be found [here](https://huggingface.co/alibaba-pai). The original weights can be found under [hf.co/alibaba-pai](https://huggingface.co/alibaba-pai).
+
+There are two official EasyAnimate checkpoints for text-to-video and video-to-video.
+
+| checkpoints | recommended inference dtype |
+|:---:|:---:|
+| [`alibaba-pai/EasyAnimateV5.1-12b-zh`](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh) | torch.float16 |
+| [`alibaba-pai/EasyAnimateV5.1-12b-zh-InP`](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-InP) | torch.float16 |
+
+There is one official EasyAnimate checkpoints available for image-to-video and video-to-video.
+
+| checkpoints | recommended inference dtype |
+|:---:|:---:|
+| [`alibaba-pai/EasyAnimateV5.1-12b-zh-InP`](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-InP) | torch.float16 |
+
+There are two official EasyAnimate checkpoints available for control-to-video.
+
+| checkpoints | recommended inference dtype |
+|:---:|:---:|
+| [`alibaba-pai/EasyAnimateV5.1-12b-zh-Control`](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-Control) | torch.float16 |
+| [`alibaba-pai/EasyAnimateV5.1-12b-zh-Control-Camera`](https://huggingface.co/alibaba-pai/EasyAnimateV5.1-12b-zh-Control-Camera) | torch.float16 |
+
+For the EasyAnimateV5.1 series:
+- Text-to-video (T2V) and Image-to-video (I2V) works for multiple resolutions. The width and height can vary from 256 to 1024.
+- Both T2V and I2V models support generation with 1~49 frames and work best at this value. Exporting videos at 8 FPS is recommended.
+
+## Quantization
+
+Quantization helps reduce the memory requirements of very large models by storing model weights in a lower precision data type. However, quantization may have varying impact on video quality depending on the video model.
+
+Refer to the [Quantization](../../quantization/overview) overview to learn more about supported quantization backends and selecting a quantization backend that supports your use case. The example below demonstrates how to load a quantized [`EasyAnimatePipeline`] for inference with bitsandbytes.
+
+```py
+import torch
+from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig, EasyAnimateTransformer3DModel, EasyAnimatePipeline
+from diffusers.utils import export_to_video
+
+quant_config = DiffusersBitsAndBytesConfig(load_in_8bit=True)
+transformer_8bit = EasyAnimateTransformer3DModel.from_pretrained(
+    "alibaba-pai/EasyAnimateV5.1-12b-zh",
+    subfolder="transformer",
+    quantization_config=quant_config,
+    torch_dtype=torch.float16,
+)
+
+pipeline = EasyAnimatePipeline.from_pretrained(
+    "alibaba-pai/EasyAnimateV5.1-12b-zh",
+    transformer=transformer_8bit,
+    torch_dtype=torch.float16,
+    device_map="balanced",
+)
+
+prompt = "A cat walks on the grass, realistic style."
+negative_prompt = "bad detailed"
+video = pipeline(prompt=prompt, negative_prompt=negative_prompt, num_frames=49, num_inference_steps=30).frames[0]
+export_to_video(video, "cat.mp4", fps=8)
+```
+
+## EasyAnimatePipeline
+
+[[autodoc]] EasyAnimatePipeline
+  - all
+  - __call__
+
+## EasyAnimatePipelineOutput
+
+[[autodoc]] pipelines.easyanimate.pipeline_output.EasyAnimatePipelineOutput