pytorch
diff --git a/‎.ci/scripts/test_llava.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llava.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_qnn_static_llama_eval.sh‎
Lines changed: 91 additions & 0 deletions b/‎.ci/scripts/test_qnn_static_llama_eval.sh‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 26 additions & 4 deletions b/‎.ci/scripts/test_wheel_package_qnn.sh‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 8 additions & 1 deletion b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 14 additions & 10 deletions b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 54 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 3 additions & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/aoti/CMakeLists.txt‎
Lines changed: 54 additions & 0 deletions b/‎backends/aoti/CMakeLists.txt‎
Lines changed: 54 additions & 0 deletions
@@ -107,7 +107,7 @@ cmake_build_llava_runner_for_android() {
 # only export the one without custom op for now since it's
 export_llava() {
     echo "Starting to export Llava. This will take about 6 mins"
-    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts --max-context-len 768
 }
 
 # Download a new image
 
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -euo pipefail
+
+echo ">>> Script invoked with arguments: $@"
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+# Download QNN_SDK. If already downloaded, export environment path
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
+install_qnn
+
+export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+export PYTHONPATH=".."
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# -------------------------------
+# Parse args
+# -------------------------------
+EXTRA_FLAGS=""
+THRESHOLD=62.0  # default fallback
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --flags)
+      EXTRA_FLAGS="$2"
+      shift 2
+      ;;
+    --threshold)
+      THRESHOLD="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+# Config
+PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
+MODEL="qwen2_5-0_5b"
+MAX_SEQ=1024
+PTQ="16a4w"
+
+EXTRA_FLAGS="$@"
+
+# Run command and capture *both stdout and stderr*
+LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
+
+echo ">>> Running evaluation with flags: $EXTRA_FLAGS | threshold: $THRESHOLD"
+$PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
+  --decoder_model "$MODEL" \
+  --quant_linear_only \
+  --max_seq_length "$MAX_SEQ" \
+  --ptq "$PTQ" \
+  $EXTRA_FLAGS 2>&1 | tee "$LOG_FILE"
+
+# Extract last word_perplexity
+LAST_PERP=$(grep "INFO:root:wikitext:" "$LOG_FILE" | tail -n 1 | sed -E "s/.*'word_perplexity,none': ([0-9.]+).*/\1/")
+
+if [[ -z "$LAST_PERP" ]]; then
+  echo "❌ Could not find word_perplexity in logs!"
+  exit 1
+fi
+
+echo ">>> Last word_perplexity = $LAST_PERP"
+
+# Compare against threshold
+awk -v val="$LAST_PERP" -v thr="$THRESHOLD" 'BEGIN {exit (val > thr)}'
+if [[ $? -ne 0 ]]; then
+  echo "❌ Regression detected: word_perplexity ($LAST_PERP) > threshold ($THRESHOLD)"
+  exit 1
+fi
+
+echo "✅ Check passed: word_perplexity ($LAST_PERP) <= $THRESHOLD"
@@ -98,7 +98,7 @@ PYTHON_VERSION=$1
 # Check wheel does NOT contain qualcomm/sdk
 # ----------------------------
 echo "Checking wheel does not contain qualcomm/sdk..."
-SDK_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep "executorch/backends/qualcomm/sdk" || true)
+SDK_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep -E "executorch/backends/qualcomm/sdk" || true)
 if [ -n "$SDK_FILES" ]; then
     echo "ERROR: Wheel package contains unexpected qualcomm/sdk files:"
     echo "$SDK_FILES"
@@ -111,7 +111,7 @@ fi
 # Check .so files in the wheel
 # ----------------------------
 echo "Checking for .so files inside the wheel..."
-WHEEL_SO_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep "executorch/backends/qualcomm/python" || true)
+WHEEL_SO_FILES=$(unzip -l "$WHEEL_FILE" | awk '{print $4}' | grep -E "executorch/backends/qualcomm/python" || true)
 if [ -z "$WHEEL_SO_FILES" ]; then
     echo "ERROR: No .so files found in wheel under executorch/backends/qualcomm/python"
     exit 1
@@ -139,8 +139,30 @@ run_core_tests () {
   echo "=== [$LABEL] Installing wheel & deps ==="
   "$PIPBIN" install --upgrade pip
   "$PIPBIN" install "$WHEEL_FILE"
-  "$PIPBIN" install torch=="2.9.0.dev20250906" --index-url "https://download.pytorch.org/whl/nightly/cpu"
-  "$PIPBIN" install --pre torchao --index-url "https://download.pytorch.org/whl/nightly/cpu"
+  TORCH_VERSION=$(
+  "$PYBIN" - <<'PY'
+import runpy
+module_vars = runpy.run_path("torch_pin.py")
+print(module_vars["TORCH_VERSION"])
+PY
+)
+
+  NIGHTLY_VERSION=$(
+  "$PYBIN" - <<'PY'
+import runpy
+module_vars = runpy.run_path("torch_pin.py")
+print(module_vars["NIGHTLY_VERSION"])
+PY
+)
+  echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ==="
+
+  # Install torchao based on the pinned PyTorch version
+  "$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu"
+
+  # Install torchao based on the pinned commit from third-party/ao submodule
+  pushd "$REPO_ROOT/third-party/ao" > /dev/null
+  USE_CPP=0 "$PYBIN" setup.py develop
+  popd > /dev/null
 
   echo "=== [$LABEL] Import smoke tests ==="
   "$PYBIN" -c "import executorch; print('executorch imported successfully')"
 
@@ -35,10 +35,17 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
 for op in "build" "test"; do
     buck2 $op $BUILDABLE_OPTIMIZED_OPS \
           //examples/selective_build:select_all_dtype_selective_lib_portable_lib \
+          //extension/llm/custom_ops/spinquant/test:fast_hadamard_transform_test \
+          //extension/llm/runner/test:test_multimodal_input \
+          //extension/llm/runner/test:test_generation_config \
           //kernels/portable/... \
           $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
           //runtime/executor: //runtime/kernel/... //runtime/platform/...
 done
 
 # Build only without testing
-buck2 build //codegen/tools/... # Needs torch for testing which we don't have in our OSS buck setup.
+buck2 build //codegen/tools/... \
+        //extension/llm/runner/io_manager:io_manager \
+        //extension/llm/modules/... \
+        //extension/llm/runner:multimodal_runner_lib \
+        //extension/llm/runner:text_decoder_runner
@@ -12,7 +12,6 @@ jobs:
       - name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
         uses: actions/github-script@v7
         with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
             const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
             const owner = 'pytorch';
 
@@ -15,15 +15,11 @@ on:
         type: choice
         options:
           - "xnnpack"
-          - "vulkan+xnnpack"
+          - "vulkan"
           - "qnn"
   schedule:
     - cron: 0 10 * * *
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
 jobs:
   check-if-aar-exists:
     name: check-if-aar-exists
@@ -34,12 +30,13 @@ jobs:
         shell: bash
         run: |
           VERSION="${{ inputs.version }}"
+          FLAVOR="${{ inputs.flavor }}"
           if [ -z "$VERSION" ]; then
             echo "No version name specified. Will create a snapshot AAR"
             exit 0
           fi
-          if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar" | grep "200 OK"; then
-            echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar"
+          if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar" | grep "200 OK"; then
+            echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar"
             echo "Will skip build/upload"
             exit 1
           fi
@@ -93,7 +90,10 @@ jobs:
         fi
 
         FLAVOR="${{ inputs.flavor }}"
-        if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
+        if [[ "$FLAVOR" == "vulkan" || -z "$FLAVOR" ]]; then
+          curl -O https://sdk.lunarg.com/sdk/download/1.4.321.1/linux/vulkansdk-linux-x86_64-1.4.321.1.tar.xz
+          tar xf vulkansdk-linux-x86_64-1.4.321.1.tar.xz -C /tmp
+          export PATH="/tmp/1.4.321.1/x86_64/bin:$PATH"
           export EXECUTORCH_BUILD_VULKAN=ON
         fi
 
@@ -145,8 +145,12 @@ jobs:
           pip install awscli==1.32.18
           AWS_CMD="aws s3 cp"
           VERSION="${{ inputs.version }}"
+          FLAVOR="${{ inputs.flavor }}"
           if [ -z "$VERSION" ]; then
             VERSION="snapshot-$(date +"%Y%m%d")"
           fi
-          ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}/executorch.aar --acl public-read
-          ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}/executorch.aar.sha256sums --acl public-read
+          if [ -z "$FLAVOR" ]; then
+            FLAVOR="xnnpack"
+          fi
+          ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar --acl public-read
+          ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar.sha256sums --acl public-read
@@ -1009,6 +1009,7 @@ jobs:
         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
         ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
         ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
+        ./cmake-out/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations
 
         # "Classic" Operator tests
         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_op.sh --build
 
@@ -973,6 +973,60 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
+  # this is for filtering out the qnn changes such that qnn jobs only triggered when the specific files are changed
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      qnn: ${{ steps.filter.outputs.qnn }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            qnn:
+              - 'backends/qualcomm/**'
+              - 'examples/qualcomm/**'
+              - 'examples/models/llama/**'
+
+  test-static-llama-qnn-eval-linux:
+    needs: changes # has dependency on changes jobs defined above
+    if: needs.changes.outputs.qnn == 'true'
+    name: test-static-llama-qnn-eval-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: "baseline"
+            flags: ""
+            threshold: 62.0
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 180
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        BUILD_TOOL="cmake"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+        # Setup install_requirements for llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+
+        echo ">>> Running config: ${{ matrix.config.name }}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
+          --flags "${{ matrix.config.flags }}" \
+          --threshold "${{ matrix.config.threshold }}"
+
   unittest-release:
     uses: ./.github/workflows/_unittest.yml
     permissions:
 
@@ -206,6 +206,7 @@ exclude_patterns = [
     '**/*.png',
     '**/*.webp',
     '**/*.jpeg',
+    '**/*.mp3',
     '**/*.mp4',
     '**/*.pte',
     '**/*.pth',
@@ -216,6 +217,8 @@ exclude_patterns = [
     '**/*.jpg',
     '**/*.jar',
     '**/*.gif',
+    'extension/llm/tokenizers',
+    'extension/llm/tokenizers/**',
     # File contains @generated
     'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
     'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
 
@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# Build AOTI backend for runtime.
+#
+# ### Editing this file ###
+#
+# This file should be formatted with
+# ~~~
+# cmake-format -i CMakeLists.txt
+# ~~~
+# It should also be cmake-lint clean.
+#
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+# Source root directory for executorch.
+if(NOT EXECUTORCH_ROOT)
+  set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+endif()
+
+# Use ExecuTorch's standard way to find PyTorch libraries for AOTI
+include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
+find_package_torch()
+
+# Common AOTI functionality - combines all AOTI common components
+set(_aoti_common_sources aoti_model_container.cpp common_shims.cpp)
+add_library(aoti_common STATIC ${_aoti_common_sources})
+target_include_directories(
+  aoti_common
+  PUBLIC $<BUILD_INTERFACE:${EXECUTORCH_ROOT}> $<INSTALL_INTERFACE:include>
+         # PyTorch AOTI headers from ExecuTorch's torch detection
+         ${TORCH_INCLUDE_DIRS}
+)
+target_compile_options(aoti_common PUBLIC -fexceptions -frtti -fPIC)
+# Ensure symbols are exported properly
+target_link_options(aoti_common PUBLIC -Wl,--export-dynamic)
+
+# Link against PyTorch libraries and standard libraries
+target_link_libraries(
+  aoti_common
+  PUBLIC extension_tensor ${CMAKE_DL_LIBS}
+         # Link PyTorch libraries for AOTI functions
+         ${TORCH_LIBRARIES}
+)
+executorch_target_link_options_shared_lib(aoti_common)
+
+install(
+  TARGETS aoti_common
+  EXPORT ExecuTorchTargets
+  DESTINATION lib
+)
Original file line number	Diff line number	Diff line change
`@@ -107,7 +107,7 @@ cmake_build_llava_runner_for_android() {`
`107`	`107`	`# only export the one without custom op for now since it's`
`108`	`108`	`export_llava() {`
`109`	`109`	`echo "Starting to export Llava. This will take about 6 mins"`
`110`		`- $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts`
	`110`	`+ $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts --max-context-len 768`
`111`	`111`	`}`
`112`	`112`
`113`	`113`	`# Download a new image`