pytorch
diff --git a/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/utils.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 5 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/mps/setup.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 2 additions & 5 deletions b/‎backends/arm/_passes/annotate_channels_last_dim_order_pass.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 4 additions & 3 deletions b/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/operators/__init__.py‎
Lines changed: 0 additions & 2 deletions b/‎backends/arm/operators/__init__.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎backends/arm/operators/op_dequant.py‎
Lines changed: 0 additions & 35 deletions b/‎backends/arm/operators/op_dequant.py‎
Lines changed: 0 additions & 35 deletions
diff --git a/‎backends/arm/operators/op_hardtanh.py‎
Lines changed: 3 additions & 4 deletions b/‎backends/arm/operators/op_hardtanh.py‎
Lines changed: 3 additions & 4 deletions
@@ -17,17 +17,17 @@ retry () {
 }
 
 clean_executorch_install_folders() {
-  ./install_requirements.sh --clean
+  ./install_executorch.sh --clean
 }
 
 install_executorch() {
   which pip
   # Install executorch, this assumes that Executorch is checked out in the
   # current directory.
   if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then
-    ./install_requirements.sh --pybind xnnpack --use-pt-pinned-commit
+    ./install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
   else
-    ./install_requirements.sh --pybind xnnpack
+    ./install_executorch.sh --pybind xnnpack
   fi
   # Just print out the list of packages for debugging
   pip list
 
@@ -9,7 +9,7 @@ on:
     paths:
       - .ci/scripts/setup-ios.sh
       - .github/workflows/apple.yml
-      - install_requirements.sh
+      - install_executorch.sh
       - backends/apple/**
       - build/build_apple_frameworks.sh
       - build/build_apple_llm_demo.sh
 
@@ -200,7 +200,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
@@ -333,6 +333,9 @@ jobs:
 
   unittest-arm:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -395,6 +398,25 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
+  test-qnn-models-linux:
+    name: test-qnn-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 180
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
+        # reminder: make sure each job runs fast
+
   test-phi-3-mini-runner-linux:
     name: test-phi-3-mini-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -414,7 +436,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
@@ -441,7 +463,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -468,7 +490,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -495,7 +517,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_requirements.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -132,6 +132,9 @@ jobs:
   test-arm-backend-delegation:
     name: test-arm-backend-delegation
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -159,6 +162,9 @@ jobs:
   test-arm-reference-delegation:
     name: test-arm-reference-delegation
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
 
@@ -97,7 +97,7 @@ I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successf
 ### [Optional] Run the generated model directly using pybind
 1. Make sure `pybind` MPS support was installed:
 ```bash
-./install_requirements.sh --pybind mps
+./install_executorch.sh --pybind mps
 ```
 2. Run the `mps_example` script to trace the model and run it directly from python:
 ```bash
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -15,7 +15,7 @@
     get_node_arg,
     insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, register_passable_op
+from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
 from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -43,9 +43,6 @@ def _transpose_impl(*args, **kwargs):
     return args[0]
 
 
-register_passable_op(torch.ops.passthrough_to_tosa._transpose)
-
-
 class AnnotateChannelsLastDimOrder(ExportPass):
     """
     Annotates each node with a tosa_dim_order. tosa_dim_order can be seen as a channels-last dim-order
 
@@ -19,12 +19,13 @@ def _is_fuseable_quantized_activation(self, node: Node):
             is_fuseable = min_val == 0
 
         is_quantized = len(node.users) == 1 and next(iter(node.users)).target == q_op
-        if is_quantized:
+        if is_fuseable and is_quantized:
             quant_node = next(iter(node.users))
             zp = quant_node.args[2]
             qmin = quant_node.args[3]
-
-        return is_fuseable and is_quantized and zp == qmin
+            return zp == qmin
+        else:
+            return False
 
     def _is_fuseable_input(self, node: Node):
         return (
 
@@ -13,7 +13,6 @@
     op_bmm,
     op_cat,
     op_conv2d,
-    op_dequant,
     op_exp,
     op_full,
     op_get_item,
@@ -24,7 +23,6 @@
     op_min,
     op_mul,
     op_permute,
-    op_quant,
     op_reciprocal,
     op_relu,
     op_repeat,
 
@@ -1,4 +1,4 @@
-# Copyright 2023-2024 Arm Limited and/or its affiliates.
+# Copyright 2023-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -19,7 +19,6 @@
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
 
-from executorch.backends.arm.tosa_quant_utils import quantize_value
 from serializer.tosa_serializer import TosaOp
 
 
@@ -44,8 +43,8 @@ def define_node(
             input_qparams = get_input_qparams(node)  # pyre-ignore[16]
             qargs = input_qparams[0]
             # Convert to quantized representation
-            clamp_min_qs = quantize_value(inputs[1].number, qargs)
-            clamp_max_qs = quantize_value(inputs[2].number, qargs)
+            clamp_min_qs = qargs.quantize_value(inputs[1].number).item()
+            clamp_max_qs = qargs.quantize_value(inputs[2].number).item()
             # Set fp values to 0.0 since they are not used
             clamp_min_fp = 0.0
             clamp_max_fp = 0.0