pytorch
diff --git a/‎.ci/scripts/test_qnn_static_llama_eval.sh‎
Lines changed: 91 additions & 0 deletions b/‎.ci/scripts/test_qnn_static_llama_eval.sh‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 8 additions & 1 deletion b/‎.ci/scripts/unittest-buck2.sh‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/add-unanswered-to-project.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 14 additions & 10 deletions b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 54 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎.lintrunner.toml‎
Lines changed: 3 additions & 0 deletions b/‎.lintrunner.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm‎
Lines changed: 2 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/to_dim_order_copy_support.py‎
Lines changed: 38 additions & 8 deletions b/‎backends/arm/operator_support/to_dim_order_copy_support.py‎
Lines changed: 38 additions & 8 deletions
@@ -0,0 +1,91 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -euo pipefail
+
+echo ">>> Script invoked with arguments: $@"
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+# Download QNN_SDK. If already downloaded, export environment path
+source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
+install_qnn
+
+export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
+export PYTHONPATH=".."
+cp schema/program.fbs exir/_serialize/program.fbs
+cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# -------------------------------
+# Parse args
+# -------------------------------
+EXTRA_FLAGS=""
+THRESHOLD=62.0  # default fallback
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --flags)
+      EXTRA_FLAGS="$2"
+      shift 2
+      ;;
+    --threshold)
+      THRESHOLD="$2"
+      shift 2
+      ;;
+    *)
+      echo "Unknown option: $1"
+      exit 1
+      ;;
+  esac
+done
+
+# Config
+PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
+MODEL="qwen2_5-0_5b"
+MAX_SEQ=1024
+PTQ="16a4w"
+
+EXTRA_FLAGS="$@"
+
+# Run command and capture *both stdout and stderr*
+LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
+
+echo ">>> Running evaluation with flags: $EXTRA_FLAGS | threshold: $THRESHOLD"
+$PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
+  --decoder_model "$MODEL" \
+  --quant_linear_only \
+  --max_seq_length "$MAX_SEQ" \
+  --ptq "$PTQ" \
+  $EXTRA_FLAGS 2>&1 | tee "$LOG_FILE"
+
+# Extract last word_perplexity
+LAST_PERP=$(grep "INFO:root:wikitext:" "$LOG_FILE" | tail -n 1 | sed -E "s/.*'word_perplexity,none': ([0-9.]+).*/\1/")
+
+if [[ -z "$LAST_PERP" ]]; then
+  echo "❌ Could not find word_perplexity in logs!"
+  exit 1
+fi
+
+echo ">>> Last word_perplexity = $LAST_PERP"
+
+# Compare against threshold
+awk -v val="$LAST_PERP" -v thr="$THRESHOLD" 'BEGIN {exit (val > thr)}'
+if [[ $? -ne 0 ]]; then
+  echo "❌ Regression detected: word_perplexity ($LAST_PERP) > threshold ($THRESHOLD)"
+  exit 1
+fi
+
+echo "✅ Check passed: word_perplexity ($LAST_PERP) <= $THRESHOLD"
@@ -35,10 +35,17 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
 for op in "build" "test"; do
     buck2 $op $BUILDABLE_OPTIMIZED_OPS \
           //examples/selective_build:select_all_dtype_selective_lib_portable_lib \
+          //extension/llm/custom_ops/spinquant/test:fast_hadamard_transform_test \
+          //extension/llm/runner/test:test_multimodal_input \
+          //extension/llm/runner/test:test_generation_config \
           //kernels/portable/... \
           $BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
           //runtime/executor: //runtime/kernel/... //runtime/platform/...
 done
 
 # Build only without testing
-buck2 build //codegen/tools/... # Needs torch for testing which we don't have in our OSS buck setup.
+buck2 build //codegen/tools/... \
+        //extension/llm/runner/io_manager:io_manager \
+        //extension/llm/modules/... \
+        //extension/llm/runner:multimodal_runner_lib \
+        //extension/llm/runner:text_decoder_runner
@@ -12,7 +12,6 @@ jobs:
       - name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
         uses: actions/github-script@v7
         with:
-          github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
             const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
             const owner = 'pytorch';
 
@@ -15,15 +15,11 @@ on:
         type: choice
         options:
           - "xnnpack"
-          - "vulkan+xnnpack"
+          - "vulkan"
           - "qnn"
   schedule:
     - cron: 0 10 * * *
 
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
 jobs:
   check-if-aar-exists:
     name: check-if-aar-exists
@@ -34,12 +30,13 @@ jobs:
         shell: bash
         run: |
           VERSION="${{ inputs.version }}"
+          FLAVOR="${{ inputs.flavor }}"
           if [ -z "$VERSION" ]; then
             echo "No version name specified. Will create a snapshot AAR"
             exit 0
           fi
-          if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar" | grep "200 OK"; then
-            echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar"
+          if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar" | grep "200 OK"; then
+            echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar"
             echo "Will skip build/upload"
             exit 1
           fi
@@ -93,7 +90,10 @@ jobs:
         fi
 
         FLAVOR="${{ inputs.flavor }}"
-        if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
+        if [[ "$FLAVOR" == "vulkan" || -z "$FLAVOR" ]]; then
+          curl -O https://sdk.lunarg.com/sdk/download/1.4.321.1/linux/vulkansdk-linux-x86_64-1.4.321.1.tar.xz
+          tar xf vulkansdk-linux-x86_64-1.4.321.1.tar.xz -C /tmp
+          export PATH="/tmp/1.4.321.1/x86_64/bin:$PATH"
           export EXECUTORCH_BUILD_VULKAN=ON
         fi
 
@@ -145,8 +145,12 @@ jobs:
           pip install awscli==1.32.18
           AWS_CMD="aws s3 cp"
           VERSION="${{ inputs.version }}"
+          FLAVOR="${{ inputs.flavor }}"
           if [ -z "$VERSION" ]; then
             VERSION="snapshot-$(date +"%Y%m%d")"
           fi
-          ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}/executorch.aar --acl public-read
-          ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}/executorch.aar.sha256sums --acl public-read
+          if [ -z "$FLAVOR" ]; then
+            FLAVOR="xnnpack"
+          fi
+          ${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar --acl public-read
+          ${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar.sha256sums --acl public-read
@@ -973,6 +973,60 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
+  # this is for filtering out the qnn changes such that qnn jobs only triggered when the specific files are changed
+  changes:
+    runs-on: ubuntu-latest
+    outputs:
+      qnn: ${{ steps.filter.outputs.qnn }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          filters: |
+            qnn:
+              - 'backends/qualcomm/**'
+              - 'examples/qualcomm/**'
+              - 'examples/models/llama/**'
+
+  test-static-llama-qnn-eval-linux:
+    needs: changes # has dependency on changes jobs defined above
+    if: needs.changes.outputs.qnn == 'true'
+    name: test-static-llama-qnn-eval-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: "baseline"
+            flags: ""
+            threshold: 62.0
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 180
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        BUILD_TOOL="cmake"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+        # Setup install_requirements for llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+
+        echo ">>> Running config: ${{ matrix.config.name }}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
+          --flags "${{ matrix.config.flags }}" \
+          --threshold "${{ matrix.config.threshold }}"
+
   unittest-release:
     uses: ./.github/workflows/_unittest.yml
     permissions:
 
@@ -206,6 +206,7 @@ exclude_patterns = [
     '**/*.png',
     '**/*.webp',
     '**/*.jpeg',
+    '**/*.mp3',
     '**/*.mp4',
     '**/*.pte',
     '**/*.pth',
@@ -216,6 +217,8 @@ exclude_patterns = [
     '**/*.jpg',
     '**/*.jar',
     '**/*.gif',
+    'extension/llm/tokenizers',
+    'extension/llm/tokenizers/**',
     # File contains @generated
     'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
     'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',
 
@@ -436,11 +436,13 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
     switch (modelAssetType.value()) {
         case ModelAssetType::CompiledModel: {
             // Model is already compiled.
+            ETCoreMLLogInfo("The model in the pte file is pre-compiled.  Skipping compilation.");
             return modelURL;
         }
 
         case ModelAssetType::Model: {
             // Compile the model.
+            ETCoreMLLogInfo("The model in the pte file is not pre-compiled.  Compiling with a 5 min timeout.");
             NSURL *compiledModelURL = [ETCoreMLModelCompiler compileModelAtURL:modelURL
                                                           maxWaitTimeInSeconds:(5 * 60)
                                                                          error:error];
 
@@ -2,6 +2,13 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+"""Declare operator support for ``_to_dim_order_copy`` in TOSA.
+
+Provide dtype-compatibility checks for casting when converting to a specific
+dimension order. Supported input/output dtype pairs depend on the active TOSA
+profile (integer and/or float).
+
+"""
 
 # pyre-unsafe
 import copy
@@ -25,6 +32,16 @@
 
 @register_tosa_support_check
 class ToCopySupported(SupportedTOSAOperatorCheck):
+    """Provide TOSA support check for ``_to_dim_order_copy``.
+
+    Attributes:
+        SUPPORTED_INT_PROFILE_DTYPES (dict[torch.dtype, list[torch.dtype]]):
+            Allowed output dtypes for each integer input dtype.
+        SUPPORTED_FP_PROFILE_DTYPES (dict[torch.dtype, list[torch.dtype]]):
+            Allowed output dtypes for each floating input dtype.
+
+    """
+
     targets = [
         exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
     ]
@@ -40,21 +57,31 @@ def _merge_supported_types(
         dtypes1: SupportedTypeDict,
         dtypes2: SupportedTypeDict,
     ) -> SupportedTypeDict:
+        """Return a merged mapping of supported dtype transitions.
+
+        Args:
+            dtypes1 (dict[torch.dtype, list[torch.dtype]]): Base mapping.
+            dtypes2 (dict[torch.dtype, list[torch.dtype]]): Mapping to merge in.
+
+        Returns:
+            dict[torch.dtype, list[torch.dtype]]: Combined mapping.
+
+        """
         merged_dtypes = copy.deepcopy(
             dtypes1
-        )  # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_TYPES
+        )  # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_PROFILE_DTYPES
         for k, v in dtypes2.items():
             merged_dtypes[k] = merged_dtypes.get(k, []) + v
         return merged_dtypes
 
-    SUPPORTED_INT_TYPES: SupportedTypeDict = {
+    SUPPORTED_INT_PROFILE_DTYPES: SupportedTypeDict = {
         torch.bool: [torch.bool, torch.int8, torch.int16, torch.int32],
         torch.int8: [torch.bool, torch.int8, torch.int16, torch.int32],
         torch.int16: [torch.bool, torch.int8, torch.int16, torch.int32],
         torch.int32: [torch.bool, torch.int8, torch.int16, torch.int32],
         torch.int64: [torch.bool, torch.int8, torch.int16, torch.int32],
     }
-    SUPPORTED_FLOAT_TYPES: SupportedTypeDict = {
+    SUPPORTED_FP_PROFILE_DTYPES: SupportedTypeDict = {
         torch.int8: [torch.int8, torch.float16, torch.bfloat16, torch.float32],
         torch.int16: [torch.int16, torch.float16, torch.bfloat16, torch.float32],
         torch.int32: [torch.int32, torch.float16, torch.bfloat16, torch.float32],
@@ -92,22 +119,25 @@ def _merge_supported_types(
             torch.float32,
         ],
     }
-    ALL_SUPPORTED_TYPES = _merge_supported_types(
-        SUPPORTED_INT_TYPES, SUPPORTED_FLOAT_TYPES
-    )
 
     def is_node_tosa_supported(
         self, node: fx.Node, tosa_spec: TosaSpecification
     ) -> bool:
+        """Return True if the node is supported by TOSA.
+
+        Check FakeTensor metadata, validate input dtype is supported for the
+        active profile, and ensure the output dtype is allowed for the given
+        input dtype.
 
+        """
         supported_dtypes: SupportedTypeDict = {}
         if tosa_spec.support_integer():
             supported_dtypes = self._merge_supported_types(
-                self.SUPPORTED_INT_TYPES, supported_dtypes
+                self.SUPPORTED_INT_PROFILE_DTYPES, supported_dtypes
             )
         if tosa_spec.support_float():
             supported_dtypes = self._merge_supported_types(
-                self.SUPPORTED_FLOAT_TYPES, supported_dtypes
+                self.SUPPORTED_FP_PROFILE_DTYPES, supported_dtypes
             )
 
         if len(node.all_input_nodes) != 1: