pytorch
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/test/ops/test_tanh.py‎
Lines changed: 13 additions & 5 deletions b/‎backends/arm/test/ops/test_tanh.py‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎backends/arm/test/targets.bzl‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/test/targets.bzl‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/runtime/et_pal.cpp‎
Lines changed: 1 addition & 1 deletion b/‎backends/cadence/runtime/et_pal.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/aot/ir/targets.bzl‎
Lines changed: 2 additions & 2 deletions b/‎backends/qualcomm/aot/ir/targets.bzl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/qualcomm/aot/python/targets.bzl‎
Lines changed: 4 additions & 4 deletions b/‎backends/qualcomm/aot/python/targets.bzl‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎backends/qualcomm/aot/wrappers/targets.bzl‎
Lines changed: 2 additions & 2 deletions b/‎backends/qualcomm/aot/wrappers/targets.bzl‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/qualcomm/builders/README.md‎
Lines changed: 124 additions & 1 deletion b/‎backends/qualcomm/builders/README.md‎
Lines changed: 124 additions & 1 deletion
@@ -80,6 +80,12 @@ jobs:
 
         echo -n "$SECRET_EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS" | base64 -d > /tmp/secring.gpg
 
+        # Update the version name in build.gradle in case of maven publish
+        VERSION="${{ inputs.version }}"
+        if [ ! -z "$VERSION" ]; then
+          sed -i "s/\(coordinates(\"org.pytorch\", \"executorch-android\", \"\)\([0-9]\+.[0-9]\+.[0-9]\+\)\(\")\)/\1$VERSION\3/" extension/android/executorch_android/build.gradle
+        fi
+
         # Build AAR Package
         mkdir aar-out
         export BUILD_AAR_DIR=aar-out
@@ -92,7 +98,7 @@ jobs:
         # Publish to maven staging
         UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
         if [[ "$UPLOAD_TO_MAVEN" == "true" ]]; then
-          (cd aar-out; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
+          (cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
         fi
 
   upload-release-aar:
 
@@ -761,12 +761,16 @@ if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
 endif()
 
+if(EXECUTORCH_BUILD_EXTENSION_MODULE)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
+endif()
+
 if(EXECUTORCH_BUILD_EXTENSION_LLM)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/tokenizers)
 endif()
 
-if(EXECUTORCH_BUILD_EXTENSION_MODULE)
-  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
+if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
 
@@ -45,7 +45,7 @@ executorch
 │   └── <a href="devtools/visualization">visualization</a> - Visualization tools for representing model structure and performance metrics.
 ├── <a href="docs">docs</a> - Static docs tooling and documentation source files.
 ├── <a href="examples">examples</a> - Examples of various user flows, such as model export, delegates, and runtime execution.
-├── <a href="exir">exir</a> - Ahead-of-time library: model capture and lowering APIs. EXport Intermediate Representation (EXIR) is a format for representing the result of <a href="https://pytorch.org/docs/main/export.ir_spec.html">torch.export</a>. This directory contains utilities and passes for lowering the EXIR graphs into different <a href="/docs/source/ir-exir.md">dialects</a> and eventually suitable to run on target hardware.
+├── <a href="exir">exir</a> - Ahead-of-time library: model capture and lowering APIs. EXport Intermediate Representation (EXIR) is a format for representing the result of <a href="https://pytorch.org/docs/stable/export.html">torch.export</a>. This directory contains utilities and passes for lowering the EXIR graphs into different <a href="/docs/source/ir-exir.md">dialects</a> and eventually suitable to run on target hardware.
 │   ├── <a href="exir/_serialize">_serialize</a> - Serialize final export artifact.
 │   ├── <a href="exir/backend">backend</a> - Backend delegate ahead of time APIs.
 │   ├── <a href="exir/capture">capture</a> - Program capture.
 
@@ -9,9 +9,11 @@
 
 from typing import Tuple
 
+import pytest
+
 import torch
 
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
@@ -40,7 +42,7 @@ def forward(self, x):
     def _test_tanh_tosa_MI_pipeline(
         self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
     ):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -54,11 +56,13 @@ def _test_tanh_tosa_MI_pipeline(
             .check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=test_data)
         )
 
+        if conftest.is_option_enabled("tosa_ref_model"):
+            tester.run_method_and_compare_outputs(inputs=test_data)
+
     def _test_tanh_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
-        (
+        tester = (
             ArmTester(
                 module,
                 example_inputs=test_data,
@@ -73,9 +77,11 @@ def _test_tanh_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple)
             .check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
-            .run_method_and_compare_outputs(inputs=test_data)
         )
 
+        if conftest.is_option_enabled("tosa_ref_model"):
+            tester.run_method_and_compare_outputs(inputs=test_data)
+
     def _test_tanh_tosa_ethos_BI_pipeline(
         self,
         compile_spec: list[CompileSpec],
@@ -114,6 +120,7 @@ def _test_tanh_tosa_u85_BI_pipeline(
         )
 
     @parameterized.expand(test_data_suite)
+    @pytest.mark.tosa_ref_model
     def test_tanh_tosa_MI(
         self,
         test_name: str,
@@ -122,6 +129,7 @@ def test_tanh_tosa_MI(
         self._test_tanh_tosa_MI_pipeline(self.Tanh(), (test_data,))
 
     @parameterized.expand(test_data_suite)
+    @pytest.mark.tosa_ref_model
     def test_tanh_tosa_BI(self, test_name: str, test_data: torch.Tensor):
         self._test_tanh_tosa_BI_pipeline(self.Tanh(), (test_data,))
 
 
@@ -16,6 +16,7 @@ def define_arm_tests():
         "ops/test_linear.py", 
         "ops/test_slice.py",
         "ops/test_sigmoid.py",
+        "ops/test_tanh.py",
     ]
 
     TESTS = {}
 
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#if defined(XTENSA)
+#if defined(__XTENSA__)
 
 #include <stdio.h>
 #include <sys/times.h>
 
@@ -4,7 +4,7 @@ load(
 )
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 load("@fbsource//xplat/executorch/backends/qualcomm:targets.bzl", "generate_schema_header")
-load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")
+load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_version")
 
 QCIR_NAME = "qcir"
 INPUT_QCIR = QCIR_NAME + ".fbs"
@@ -56,7 +56,7 @@ def define_common_targets():
         platforms = [ANDROID],
         visibility = ["@EXECUTORCH_CLIENTS"],
         deps = [
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
             "//executorch/runtime/backend:interface",
             "//executorch/runtime/core:core",
             "//executorch/backends/qualcomm/aot/wrappers:wrappers",
 
@@ -3,7 +3,7 @@ load(
     "ANDROID",
 )
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")
+load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_version")
 
 PYTHON_MODULE_NAME = "PyQnnManagerAdaptor"
 
@@ -34,7 +34,7 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
         ],
         external_deps = [
             "libtorch_python",
@@ -67,7 +67,7 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
         ],
         external_deps = [
             "libtorch_python",
@@ -94,6 +94,6 @@ def define_common_targets():
             "//executorch/backends/qualcomm/aot/ir:qcir_utils",
             "//executorch/backends/qualcomm/runtime:runtime",
             "fbsource//third-party/pybind11:pybind11",
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
         ],
     )
@@ -3,7 +3,7 @@ load(
     "ANDROID",
 )
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
-load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_verision")
+load("@fbsource//xplat/executorch/backends/qualcomm/qnn_version.bzl", "get_qnn_library_version")
 
 def define_common_targets():
     """Defines targets that should be shared between fbcode and xplat.
@@ -23,7 +23,7 @@ def define_common_targets():
         platforms = [ANDROID],
         visibility = ["@EXECUTORCH_CLIENTS"],
         deps = [
-            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_verision()),
+            "fbsource//third-party/qualcomm/qnn/qnn-{0}:api".format(get_qnn_library_version()),
             "//executorch/runtime/backend:interface",
             "//executorch/runtime/core:core",
         ],
 
@@ -8,6 +8,7 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch.
     * [Check Operator Spec](#check-operator-spec)
     * [Implementation](#implementation)
     * [Quantizer Annotation](#quantizer-annotation)
+* [Operator Support Status](#operator-support-status)
 * [Issues](#issues)
 * [Pull Requests](#pull-requests)
 
@@ -246,7 +247,7 @@ Now, we can start to fill in function body step by step:
             nodes_to_wrappers,
         )
     ```
-    The logic should be similar and straightforward. Please carefully set arguments `tensor_type` 
+    The logic should be similar and straightforward. Please carefully set arguments `tensor_type`
     according to tensors' property.
 
 3. Define parameters:
@@ -355,6 +356,128 @@ Now, we can start to fill in function body step by step:
 ### Quantizer Annotation
 The operator now should be functional for Qualcomm backends. For operator to work in fixed-precision, we should also make `QnnQuantizer` to correctly insert observers for recording calibrated encodings. Please read more on the [Quantization Annotation Tutorial](../quantizer//README.md).
 
+## Operator Support Status
+Please help update following table if you are contributing new operators:
+
+| Operators | HTP - 77/116 Enabled |
+|-----------|---------|
+| Argmax | &cross; |
+| Argmin | &check; |
+| BatchNorm | &check; |
+| BatchToSpace | &cross; |
+| Cast | &check; |
+| ChannelShuffle | &cross; |
+| Concat | &check; |
+| Conv2d | &check; |
+| Conv3d | &cross; |
+| Convert | &check; |
+| CreateSparse | &cross; |
+| CumulativeSum | &check; |
+| DepthToSpace | &check; |
+| DepthWiseConv2d | &check; |
+| Dequantize | &check; |
+| DetectionOutput | &cross; |
+| ElementWiseAbs | &check; |
+| ElementWiseAdd | &check; |
+| ElementWiseAnd | &check; |
+| ElementWiseAsin | &cross; |
+| ElementWiseAtan | &cross; |
+| ElementWiseBinary | &cross; |
+| ElementWiseCeil | &check; |
+| ElementWiseCos | &check; |
+| ElementWiseDivide | &check; |
+| ElementWiseEqual | &check; |
+| ElementWiseExp | &check; |
+| ElementWiseFloor | &cross; |
+| ElementWiseFloorDiv | &cross; |
+| ElementWiseGreater | &check; |
+| ElementWiseGreaterEqual | &check; |
+| ElementWiseLess | &check; |
+| ElementWiseLessEqual | &check; |
+| ElementWiseLog | &check; |
+| ElementWiseMaximum | &check; |
+| ElementWiseMinimum | &check; |
+| ElementWiseMultiply | &check; |
+| ElementWiseNeg | &check; |
+| ElementWiseNeuron | &check; |
+| ElementWiseNot | &check; |
+| ElementWiseNotEqual | &check; |
+| ElementWiseOr | &check; |
+| ElementWisePower | &check; |
+| ElementWiseRound | &cross; |
+| ElementWiseRsqrt | &check; |
+| ElementWiseSelect | &check; |
+| ElementWiseSign | &cross; |
+| ElementWiseSin | &check; |
+| ElementWiseSquaredDifference | &cross; |
+| ElementWiseSquareRoot | &check; |
+| ElementWiseSubtract | &check; |
+| ElementWiseUnary | &cross; |
+| ElementWiseXor | &cross; |
+| Elu | &check; |
+| ExpandDims | &check; |
+| ExtractGlimpse | &cross; |
+| ExtractPatches | &cross; |
+| FullyConnected | &check; |
+| Gather | &check; |
+| GatherElements | &cross; |
+| GatherNd | &check; |
+| Gelu | &check; |
+| GetSparseIndices | &cross; |
+| GetSparseValues | &cross; |
+| GridSample | &cross; |
+| GroupNorm | &check; |
+| HardSwish | &check; |
+| InstanceNorm | &check; |
+| L2Norm | &cross; |
+| LayerNorm | &check; |
+| LogSoftmax | &check; |
+| Lrn | &cross; |
+| Lstm | &cross; |
+| MatMul | &check; |
+| MultiClassNms | &cross; |
+| NonMaxSuppression | &cross; |
+| Nonzero | &cross; |
+| OneHot | &cross; |
+| Pack | &check; |
+| Pad | &check; |
+| PoolAvg2d | &check; |
+| PoolAvg3d | &cross; |
+| PoolMax2d | &check; |
+| Prelu | &check; |
+| Quantize | &check; |
+| ReduceMax | &check; |
+| ReduceMean | &check; |
+| ReduceMin | &cross; |
+| ReduceSum | &check; |
+| Relu | &check; |
+| Relu1 | &cross; |
+| Relu6 | &cross; |
+| ReluMinMax | &check; |
+| Reshape | &check; |
+| Resize | &cross; |
+| ResizeBilinear | &check; |
+| ResizeNearestNeighbor | &check; |
+| RoiAlign | &cross; |
+| RmsNorm | &check; |
+| ScatterElements | &cross; |
+| ScatterNd | &check; |
+| Sigmoid | &check; |
+| Softmax | &check; |
+| SpaceToBatch | &cross; |
+| SpaceToDepth | &check; |
+| SparseToDense | &cross; |
+| Split | &check; |
+| Squeeze | &check; |
+| StridedSlice | &check; |
+| Tanh | &check; |
+| Tile | &check; |
+| TopK | &check; |
+| TransPose | &check; |
+| TransPoseConv2d | &check; |
+| TransPoseConv3d | &cross; |
+| Unpack | &check; |
+
 ## Issues
 Please refer to the [issue section](../README.md#issues) for more information.
Original file line number	Diff line number	Diff line change
`@@ -16,6 +16,7 @@ def define_arm_tests():`
`16`	`16`	`"ops/test_linear.py",`
`17`	`17`	`"ops/test_slice.py",`
`18`	`18`	`"ops/test_sigmoid.py",`
	`19`	`+ "ops/test_tanh.py",`
`19`	`20`	`]`
`20`	`21`
`21`	`22`	`TESTS = {}`