pytorch
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions b/‎.ci/scripts/setup-windows-msvc.ps1‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 69 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 0 additions & 69 deletions
diff --git a/‎.ci/scripts/test_qnn_static_llm.sh‎
Lines changed: 94 additions & 0 deletions b/‎.ci/scripts/test_qnn_static_llm.sh‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎.github/workflows/_unittest.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_unittest.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 18 additions & 14 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 18 additions & 14 deletions
diff --git a/‎.github/workflows/metal.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/metal.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.github/workflows/nightly.yml‎
Lines changed: 34 additions & 0 deletions b/‎.github/workflows/nightly.yml‎
Lines changed: 34 additions & 0 deletions
@@ -18,7 +18,7 @@ build_qnn_backend() {
   export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
   parallelism=$(( $(nproc) - 1 ))
-  bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release
+  bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
 }
 
 set_up_aot() {
 
@@ -0,0 +1,52 @@
+conda create --yes --quiet -n et python=3.12
+conda activate et
+
+# Install cmake
+conda install -y cmake
+
+# Activate the VS environment - this is required for MSVC to work
+# There are a bunch of environment variables that it requires.
+# See https://learn.microsoft.com/en-us/cpp/build/building-on-the-command-line.
+& "C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\Common7\Tools\Launch-VsDevShell.ps1" -Arch amd64
+
+# Install CI requirements
+pip install -r .ci/docker/requirements-ci.txt
+
+# Create build directory
+$buildDir = "cmake-out-msvc"
+if (Test-Path -Path $buildDir) {
+    Remove-Item -Path $buildDir -Recurse -Force
+}
+New-Item -Path $buildDir -ItemType Directory
+
+# Configure CMake with MSVC (not ClangCL) and disable custom/quantized ops
+cmake -S . -B $buildDir `
+    -DCMAKE_BUILD_TYPE=Release `
+    -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON `
+    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=OFF `
+    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=OFF `
+    -DEXECUTORCH_BUILD_XNNPACK=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON `
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CMake configuration failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+# Build with MSVC
+cmake --build $buildDir --config Release -j16
+
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Build failed. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+Write-Host "MSVC build completed successfully!"
@@ -0,0 +1,94 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -euxo pipefail
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+TASK_NAME=$1
+if [[ -z "${TASK_NAME:-}" ]]; then
+  echo "Missing task name, exiting..."
+  exit 1
+fi
+
+
+# Download QNN_SDK. If already downloaded, export environment path
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
+install_qnn
+
+export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+export PYTHONPATH=".."
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
+pip install graphviz
+
+set +e
+
+echo "Executing task: $TASK_NAME"
+if [[ "${TASK_NAME}" == "stories_110m" ]]; then
+    # Download stories llama110m artifacts
+    download_stories_model_artifacts
+    echo "Creating tokenizer.bin"
+    $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
+
+    # Compile only as weight sharing is not applicable on x86.
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
+    exit_code1=$?
+
+    # Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
+    exit_code2=$?
+
+    # Check the exit codes and print messages
+    if [ $exit_code1 -ne 0 ]; then
+        echo "Static Llama compile only with weight sharing test failed. $exit_code1."
+    fi
+
+    if [ $exit_code2 -ne 0 ]; then
+        echo "Static Llama accuracy test failed. $exit_code2."
+    fi
+
+    if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+
+elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
+
+    # Check BC
+    bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
+    exit_code1=$?
+    if [ $exit_code1 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+
+elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
+    $PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
+    exit_code1=$?
+    if [ $exit_code1 -ne 0 ]; then
+        exit 1
+    else
+        exit 0
+    fi
+else
+    echo "Unsupported task: $TASK_NAME"
+    exit 1
+fi
@@ -32,7 +32,7 @@ jobs:
       id-token: write
       contents: read
     with:
-      runner: linux.2xlarge
+      runner: linux.2xlarge.memory
       docker-image: ${{ inputs.docker-image }}
       submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 
@@ -89,6 +89,8 @@ jobs:
 
   export-voxtral-cuda-artifact:
     name: export-voxtral-cuda-${{ matrix.quant.name }}
+    # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
+    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -126,7 +128,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -166,6 +168,8 @@ jobs:
 
   export-gemma3-cuda-artifact:
     name: export-gemma3-cuda-${{ matrix.quant.name }}
+    # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
+    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -176,12 +180,12 @@ jobs:
       matrix:
         quant:
           - name: "non-quantized"
-            artifact: "voxtral-cuda-export"
+            artifact: "gemma3-cuda-export"
             extra_args: ""
-          # TODO: enable gemma3 quantization
-          # - name: "quantized-int4-tile-packed"
-          #   artifact: "voxtral-cuda-quantized-int4-tile-packed"
-          #   extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
+          - name: "quantized-int4-tile-packed"
+            artifact: "gemma3-cuda-quantized-int4-tile-packed"
+            extra_args: "--qlinear 4w --qlinear_encoder 4w --qlinear_packing_format tile_packed_to_4d --qlinear_encoder_packing_format tile_packed_to_4d"
+          # TODO: enable int4-weight-only on gemma3.
           # - name: "quantized-int4-weight-only"
           #   artifact: "voxtral-cuda-quantized-int4-weight-only"
           #   # TODO: adding "--qlinear 4w" produces invalid results. Need further investigation.
@@ -194,7 +198,7 @@ jobs:
       gpu-arch-version: 12.6
       use-custom-docker-registry: false
       submodules: recursive
-      upload-artifact: gemma3-cuda-export
+      upload-artifact: ${{ matrix.quant.artifact }}
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
         set -eux
@@ -204,7 +208,7 @@ jobs:
         echo "::endgroup::"
 
         echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
+        pip install -U "huggingface_hub[cli]<1.0" accelerate
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -255,7 +259,7 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch Requirements"
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
+        ./install_requirements.sh
         pip list
         echo "::endgroup::"
 
@@ -305,7 +309,7 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch Requirements"
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
+        ./install_requirements.sh
         pip list
         echo "::endgroup::"
 
@@ -363,7 +367,7 @@ jobs:
         set -eux
 
         echo "::group::Setup ExecuTorch Requirements"
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
+        ./install_requirements.sh
         pip list
         echo "::endgroup::"
 
@@ -435,9 +439,9 @@ jobs:
         format:
           - name: "non-quantized"
             artifact: "gemma3-cuda-export"
-          # TODO: enable quantized gemma3.
-          # - name: "quantized-int4-tile-packed"
-          #   artifact: "gemma3-cuda-quantized-int4-tile-packed"
+          - name: "quantized-int4-tile-packed"
+            artifact: "gemma3-cuda-quantized-int4-tile-packed"
+          # TODO: enable int4-weight-only on gemma3.
           # - name: "quantized-int4-weight-only"
           #   artifact: "gemma3-cuda-quantized-int4-weight-only"
     with:
 
@@ -30,6 +30,8 @@ jobs:
 
   export-voxtral-metal-artifact:
     name: export-voxtral-metal-artifact
+      # Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
+    if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
     with:
@@ -44,7 +46,7 @@ jobs:
         set -eux
 
         echo "::group::Setup Huggingface"
-        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+        ${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
         ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         echo "::endgroup::"
 
 
@@ -36,3 +36,37 @@ jobs:
     uses: ./.github/workflows/_link_check.yml
     with:
       ref: ${{ github.sha }}
+    
+  test-static-hf-llm-qnn-linux:
+    name: test-static-hf-llm-qnn-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        task: [smollm2_135m]
+      fail-fast: false
+    with:
+      runner: linux.24xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        BUILD_TOOL="cmake"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+
+        # Setup install_requirements for llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llm.sh ${{ matrix.task }}
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ build_qnn_backend() {`
`18`	`18`	`export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"`
`19`	`19`
`20`	`20`	`parallelism=$(( $(nproc) - 1 ))`
`21`		`- bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release`
	`21`	`+ bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release`
`22`	`22`	`}`
`23`	`23`
`24`	`24`	`set_up_aot() {`