CanvaChen
diff --git a/‎.github/workflows/nightly_tests.yml‎
Lines changed: 56 additions & 0 deletions b/‎.github/workflows/nightly_tests.yml‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎.github/workflows/pr_test_peft_backend.yml‎
Lines changed: 6 additions & 4 deletions b/‎.github/workflows/pr_test_peft_backend.yml‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎.github/workflows/push_tests.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/push_tests.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/ssh-runner.yml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/ssh-runner.yml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎docker/diffusers-onnxruntime-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/diffusers-onnxruntime-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/diffusers-pytorch-compile-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/diffusers-pytorch-compile-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/diffusers-pytorch-cpu/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/diffusers-pytorch-cpu/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/diffusers-pytorch-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/diffusers-pytorch-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docker/diffusers-pytorch-xformers-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎docker/diffusers-pytorch-xformers-cuda/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 22 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 22 additions & 0 deletions
@@ -180,6 +180,62 @@ jobs:
         pip install slack_sdk tabulate
         python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
 
+  run_big_gpu_torch_tests:
+    name: Torch tests on big GPU
+    strategy:
+      fail-fast: false
+      max-parallel: 2
+    runs-on:
+      group: aws-g6e-xlarge-plus
+    container:
+      image: diffusers/diffusers-pytorch-cuda
+      options: --shm-size "16gb" --ipc host --gpus 0
+    steps:
+      - name: Checkout diffusers
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 2
+      - name: NVIDIA-SMI
+        run: nvidia-smi
+      - name: Install dependencies
+        run: |
+          python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
+          python -m uv pip install -e [quality,test]
+          python -m uv pip install peft@git+https://github.com/huggingface/peft.git
+          pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+          python -m uv pip install pytest-reportlog
+      - name: Environment
+        run: |
+          python utils/print_env.py
+      - name: Selected Torch CUDA Test on big GPU
+        env:
+          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
+          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
+          CUBLAS_WORKSPACE_CONFIG: :16:8
+          BIG_GPU_MEMORY: 40
+        run: |
+          python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \
+            -m "big_gpu_with_torch_cuda" \
+            --make-reports=tests_big_gpu_torch_cuda \
+            --report-log=tests_big_gpu_torch_cuda.log \
+            tests/
+      - name: Failure short reports
+        if: ${{ failure() }}
+        run: |
+          cat reports/tests_big_gpu_torch_cuda_stats.txt
+          cat reports/tests_big_gpu_torch_cuda_failures_short.txt
+      - name: Test suite reports artifacts
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: torch_cuda_big_gpu_test_reports
+          path: reports
+      - name: Generate Report and Notify Channel
+        if: always()
+        run: |
+          pip install slack_sdk tabulate
+          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
+
   run_flax_tpu_tests:
     name: Nightly Flax TPU Tests
     runs-on: docker-tpu
 
@@ -92,12 +92,14 @@ jobs:
       run: |
         python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH"
         python -m uv pip install -e [quality,test]
+        # TODO (sayakpaul, DN6): revisit `--no-deps`
         if [ "${{ matrix.lib-versions }}" == "main" ]; then
-            python -m pip install -U peft@git+https://github.com/huggingface/peft.git
-            python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git
-            pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
+            python -m pip install -U peft@git+https://github.com/huggingface/peft.git --no-deps
+            python -m uv pip install -U transformers@git+https://github.com/huggingface/transformers.git --no-deps
+            pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git --no-deps
         else
-            python -m uv pip install -U peft transformers accelerate
+            python -m uv pip install -U peft --no-deps
+            python -m uv pip install -U transformers accelerate --no-deps
         fi
 
     - name: Environment
 
@@ -81,7 +81,7 @@ jobs:
       - name: Environment
         run: |
           python utils/print_env.py
-      - name: Slow PyTorch CUDA checkpoint tests on Ubuntu
+      - name: PyTorch CUDA checkpoint tests on Ubuntu
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
@@ -184,7 +184,7 @@ jobs:
       run: |
         python utils/print_env.py
 
-    - name: Run slow Flax TPU tests
+    - name: Run Flax TPU tests
       env:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
       run: |
@@ -232,7 +232,7 @@ jobs:
       run: |
         python utils/print_env.py
 
-    - name: Run slow ONNXRuntime CUDA tests
+    - name: Run ONNXRuntime CUDA tests
       env:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
       run: |
 
@@ -4,12 +4,13 @@ on:
   workflow_dispatch:
     inputs:
       runner_type:
-        description: 'Type of runner to test (aws-g6-4xlarge-plus: a10 or aws-g4dn-2xlarge: t4)'
+        description: 'Type of runner to test (aws-g6-4xlarge-plus: a10, aws-g4dn-2xlarge: t4, aws-g6e-xlarge-plus: L40)'
         type: choice
         required: true
         options:
           - aws-g6-4xlarge-plus
           - aws-g4dn-2xlarge
+          - aws-g6e-xlarge-plus
       docker_image:
         description: 'Name of the Docker image'
         required: true
 
@@ -28,7 +28,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
     python3.10 -m uv pip install --no-cache-dir \
-        torch \
+        "torch<2.5.0" \
         torchvision \
         torchaudio \
         "onnxruntime-gpu>=1.13.1" \
 
@@ -29,7 +29,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
     python3.10 -m uv pip install --no-cache-dir \
-    torch \
+    "torch<2.5.0" \
     torchvision \
     torchaudio \
     invisible_watermark && \
 
@@ -29,7 +29,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
     python3.10 -m uv pip install --no-cache-dir \
-        torch \
+        "torch<2.5.0" \
         torchvision \
         torchaudio \
         invisible_watermark \
 
@@ -29,7 +29,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
     python3.10 -m uv pip install --no-cache-dir \
-    torch \
+    "torch<2.5.0" \
     torchvision \
     torchaudio \
     invisible_watermark && \
 
@@ -29,7 +29,7 @@ ENV PATH="/opt/venv/bin:$PATH"
 # pre-install the heavy dependencies (these can later be overridden by the deps from setup.py)
 RUN python3.10 -m pip install --no-cache-dir --upgrade pip uv==0.1.11 && \
     python3.10 -m pip install --no-cache-dir \
-        torch \
+        "torch<2.5.0" \
         torchvision \
         torchaudio \
         invisible_watermark && \
 
@@ -150,6 +150,12 @@
       title: Reinforcement learning training with DDPO
     title: Methods
   title: Training
+- sections:
+  - local: quantization/overview
+    title: Getting Started
+  - local: quantization/bitsandbytes
+    title: bitsandbytes
+  title: Quantization Methods
 - sections:
   - local: optimization/fp16
     title: Speed up inference
@@ -182,6 +188,8 @@
       title: Metal Performance Shaders (MPS)
     - local: optimization/habana
       title: Habana Gaudi
+    - local: optimization/neuron
+      title: AWS Neuron
     title: Optimized hardware
   title: Accelerate inference and reduce memory
 - sections:
@@ -209,6 +217,8 @@
       title: Logging
     - local: api/outputs
       title: Outputs
+    - local: api/quantization
+      title: Quantization
     title: Main Classes
   - isExpanded: false
     sections:
@@ -242,6 +252,8 @@
         title: SparseControlNetModel
       title: ControlNets
     - sections:
+      - local: api/models/allegro_transformer3d
+        title: AllegroTransformer3DModel
       - local: api/models/aura_flow_transformer2d
         title: AuraFlowTransformer2DModel
       - local: api/models/cogvideox_transformer3d
@@ -258,6 +270,8 @@
         title: LatteTransformer3DModel
       - local: api/models/lumina_nextdit2d
         title: LuminaNextDiT2DModel
+      - local: api/models/mochi_transformer3d
+        title: MochiTransformer3DModel
       - local: api/models/pixart_transformer2d
         title: PixArtTransformer2DModel
       - local: api/models/prior_transformer
@@ -290,8 +304,12 @@
     - sections:
       - local: api/models/autoencoderkl
         title: AutoencoderKL
+      - local: api/models/autoencoderkl_allegro
+        title: AutoencoderKLAllegro
       - local: api/models/autoencoderkl_cogvideox
         title: AutoencoderKLCogVideoX
+      - local: api/models/autoencoderkl_mochi
+        title: AutoencoderKLMochi
       - local: api/models/asymmetricautoencoderkl
         title: AsymmetricAutoencoderKL
       - local: api/models/consistency_decoder_vae
@@ -308,6 +326,8 @@
     sections:
     - local: api/pipelines/overview
       title: Overview
+    - local: api/pipelines/allegro
+      title: Allegro
     - local: api/pipelines/amused
       title: aMUSEd
     - local: api/pipelines/animatediff
@@ -384,6 +404,8 @@
       title: Lumina-T2X
     - local: api/pipelines/marigold
       title: Marigold
+    - local: api/pipelines/mochi
+      title: Mochi
     - local: api/pipelines/panorama
       title: MultiDiffusion
     - local: api/pipelines/musicldm