Update on "[aoti-backend-consolidation 1/3] partitioners"

Gasoonjia · Gasoonjia · commit 4cdd2e64b6da · 2025-10-28T17:07:38.000-07:00
Differential Revision: [D85700449](https://our.internmc.facebook.com/intern/diff/D85700449/) [ghstack-poisoned]
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
@@ -32,7 +32,7 @@ jobs:
       id-token: write
       contents: read
     with:
-      runner: linux.2xlarge
+      runner: linux.2xlarge.memory
       docker-image: ${{ inputs.docker-image }}
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml
@@ -128,7 +128,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -208,7 +208,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
diff --git a/.github/workflows/metal.yml b/.github/workflows/metal.yml
@@ -30,6 +30,8 @@ jobs:
 
   export-voxtral-metal-artifact:
     name: export-voxtral-metal-artifact
+      # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
+    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
@@ -44,7 +46,7 @@ jobs:
         set -eux
 
         echo "::group::Setup Huggingface"
-        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+        ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
         ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         echo "::endgroup::"
 
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -315,7 +315,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -636,7 +636,7 @@ jobs:
         echo "::group::Setup ExecuTorch"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
         echo "::endgroup::"
-                
+
         echo "::group::Setup requirements"
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -626,7 +626,7 @@ jobs:
           BUILD_TORCHAO_EXPERIMENTAL=1 TORCHAO_BUILD_CPU_AARCH64=1 TORCHAO_BUILD_KLEIDIAI=1 TORCHAO_ENABLE_ARM_NEON_DOT=1 TORCHAO_PARALLEL_BACKEND=OPENMP pip install third-party/ao
         fi
 
-        pip install -U "huggingface_hub[cli]"
+        pip install -U "huggingface_hub[cli]<1.0"
 
         bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.model != 'phi_4_mini' && '--test_with_runner' || '' }}  ${{ matrix.backend == 'torchao' && '--use_torchao_kernels' || '' }}
 
@@ -659,7 +659,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Set up Huggingface"
-        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+        ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
         ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -834,7 +834,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -930,7 +930,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Set up Huggingface"
-        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+        ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
         ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
diff --git a/docs/source/llm/export-llm.md b/docs/source/llm/export-llm.md
@@ -26,7 +26,7 @@ The up-to-date list of supported LLMs can be found in the code [here](https://gi
 `export_llm` is ExecuTorch's high-level export API for LLMs. In this tutorial, we will focus on exporting Llama 3.2 1B using this API. `export_llm`'s arguments are specified either through CLI args or through a yaml configuration whose fields are defined in [`LlmConfig`](https://github.com/pytorch/executorch/blob/main/extension/llm/export/config/llm_config.py). To call `export_llm`:
 
 ```
-python -m executorch.examples.extension.llm.export.export_llm
+python -m executorch.extension.llm.export.export_llm
   --config <path-to-config-yaml>
   +base.<additional-CLI-overrides>
 ```
diff --git a/examples/models/voxtral/README.md b/examples/models/voxtral/README.md
@@ -198,7 +198,7 @@ Add the `--data_path` argument to provide the appropriate data blob to the comma
 - For CUDA: `--data_path path/to/aoti_cuda_blob.ptd`
 - For Metal: `--data_path path/to/aoti_metal_blob.ptd`
 
-# Example output:
+### Example output:
 ```
 The speaker in this audio seems to be talking about their concerns about a device called the model or maybe they're just talking about the model in general. They mention that the model was trained with the speaker for inference, which suggests that
  the model was trained based on the speaker's data or instructions. They also mention that the volume is quite small, which could imply that the speaker is trying to control the volume of the model's output, likely because they are concerned about how loud the model's responses might
@@ -212,7 +212,7 @@ I 00:00:24.036822 executorch:stats.h:147]       Time to first generated token:
 I 00:00:24.036828 executorch:stats.h:153]       Sampling time over 487 tokens:  0.099000 (seconds)
 ```
 
-# Generating audio input
+## Generating audio input
 You can easily produce an `.bin` for the audio input in Python like this:
 ```
 # t = some torch.Tensor
@@ -235,3 +235,7 @@ say -o call_samantha_hall.aiff "Call Samantha Hall"
 # Convert to .wav format
 afconvert -f WAVE -d LEI16 call_samantha_hall.aiff call_samantha_hall.wav
 ```
+
+## Android and iOS mobile demo apps
+
+We have example mobile demo apps for Android and iOS (using XNNPACK) [here](https://github.com/meta-pytorch/executorch-examples/tree/main/llm)
diff --git a/website/index.html b/website/index.html
@@ -3,7 +3,7 @@
 <html lang="en">
 <head>
     <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=5.0, user-scalable=yes">
     <title>ExecuTorch - On-Device AI Inference Powered by PyTorch</title>
     <meta name="description" content="ExecuTorch is PyTorch's unified solution for deploying AI models on-device—from smartphones to microcontrollers. Deploy LLMs, vision, speech, and multimodal models with the same PyTorch APIs.">
     <link rel="icon" type="image/svg+xml" href="https://raw.githubusercontent.com/dbort/executorch-logos/main/img/executorch-chip-logo.svg">
@@ -29,17 +29,18 @@
                 <img src="https://raw.githubusercontent.com/dbort/executorch-logos/main/img/executorch-chip-logo.svg" alt="ExecuTorch Logo" height="40">
                 <span style="color:#e0f2fe;">ExecuTorch</span>
             </div>
-            <ul class="nav-links">
+            <ul class="nav-links" id="navLinks">
                 <li><a href="#why-ondevice">Why On-Device</a></li>
                 <li><a href="#challenges">Challenges</a></li>
                 <li><a href="#features">Solution</a></li>
-                <li><a href="#performance">Performance</a></li>
+                <li><a href="#performance">Backends</a></li>
                 <li><a href="#partners">Partners</a></li>
                 <li><a href="https://github.com/pytorch/executorch" target="_blank">GitHub</a></li>
             </ul>
             <form class="nav-search" action="https://docs.pytorch.org/executorch/1.0/search.html" method="get">
                 <input type="text" placeholder="Search docs…" name="q" style="padding: 0.5rem 1rem; border-radius: 6px; border: 1px solid #e2e8f0;">
             </form>
+            <button class="nav-toggle" onclick="toggleNav()" aria-label="Toggle navigation">☰</button>
             </div>
         </div>
     </nav>
@@ -1055,13 +1056,19 @@ <h2 class="cta-title">Ready to Deploy AI at the Edge?</h2>
             navLinks.classList.toggle('open');
         }
 
-        // Highlight active nav link on click
+        // Highlight active nav link on click and close menu on mobile
         document.querySelectorAll('.nav-links a').forEach(function(link) {
             link.addEventListener('click', function() {
                 document.querySelectorAll('.nav-links a').forEach(function(l) {
                     l.classList.remove('active');
                 });
                 this.classList.add('active');
+
+                // Close mobile menu when clicking a link
+                var navLinks = document.getElementById('navLinks');
+                if (navLinks.classList.contains('open')) {
+                    navLinks.classList.remove('open');
+                }
             });
         });
     </script>
diff --git a/website/style.css b/website/style.css