pytorch
diff --git a/‎.ci/scripts/build_android_instrumentation.sh‎
Lines changed: 0 additions & 21 deletions b/‎.ci/scripts/build_android_instrumentation.sh‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎.github/workflows/_android.yml‎
Lines changed: 3 additions & 2 deletions b/‎.github/workflows/_android.yml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.mypy.ini‎
Lines changed: 3 additions & 0 deletions b/‎.mypy.ini‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 8 additions & 10 deletions b/‎CONTRIBUTING.md‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_sqrt_pass.py‎
Lines changed: 39 additions & 0 deletions b/‎backends/arm/_passes/decompose_sqrt_pass.py‎
Lines changed: 39 additions & 0 deletions
@@ -14,7 +14,7 @@ jobs:
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
-      submodules: 'true'
+      submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       upload-artifact: android-apps
@@ -36,7 +36,8 @@ jobs:
         cp ${BUILD_AAR_DIR}/executorch.aar $ARTIFACTS_DIR_NAME
 
         mkdir -p ${ARTIFACTS_DIR_NAME}/library_test_dir
-        bash .ci/scripts/build_android_instrumentation.sh
+        bash extension/android/executorch_android/android_test_setup.sh
+        (cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest)
         cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
 
         mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
 
@@ -345,7 +345,7 @@ jobs:
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
-      submodules: 'true'
+      submodules: 'recursive'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       upload-artifact: android-apps
 
@@ -45,7 +45,7 @@ jobs:
       secrets-env: EXECUTORCH_MAVEN_SIGNING_KEYID EXECUTORCH_MAVEN_SIGNING_PASSWORD EXECUTORCH_MAVEN_CENTRAL_PASSWORD EXECUTORCH_MAVEN_CENTRAL_USERNAME EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
-      submodules: 'true'
+      submodules: 'recursive'
       ref: ${{ github.sha }}
       timeout: 90
       upload-artifact: android-apps
 
@@ -80,6 +80,9 @@ ignore_missing_imports = True
 [mypy-serializer.*]
 ignore_missing_imports = True
 
+[mypy-tosa_tools.*]
+ignore_missing_imports = True
+
 [mypy-setuptools.*]
 ignore_missing_imports = True
 
 
@@ -1,7 +1,6 @@
 Thank you for your interest in contributing to ExecuTorch! We want to make
 it easy to contribute to this project.
 
-&nbsp;
 
 ## Dev Install
 
@@ -91,7 +90,7 @@ executorch
 │   └── <a href="runtime/platform">platform</a> - Layer between architecture specific code and portable C++.
 ├── <a href="schema">schema</a> - ExecuTorch PTE file format flatbuffer schemas.
 ├── <a href="scripts">scripts</a> - Utility scripts for building libs, size management, dependency management, etc.
-├── <a href="shim">shim</a> - Compatibility layer between OSS and Internal builds.
+├── <a href="shim_et">shim_et</a> - Compatibility layer between OSS and Internal builds.
 ├── <a href="test">test</a> - Broad scoped end-to-end tests.
 ├── <a href="third-party">third-party</a> - Third-party dependencies.
 ├── <a href="tools">tools</a> - Tools for building ExecuTorch from source, for different built tools (CMake, Buck).
@@ -103,6 +102,8 @@ executorch
 ## Contributing workflow
 We actively welcome your pull requests (PRs).
 
+If you're completely new to open-source projects, GitHub, or ExecuTorch, please see our [New Contributor Guide](./docs/source/new-contributor-guide.md) for a step-by-step walkthrough on making your first contribution. Otherwise, read on.
+
 1. [Claim an issue](#claiming-issues), if present, before starting work. If an
    issue doesn't cover the work you plan to do, consider creating one to provide
    context about it, and to build consensus about the scope and solution.
@@ -192,9 +193,6 @@ in the Github repo.
 
 ## Coding Style
 
-Goal: Encourage standards that make it easier to read, edit, maintain, and debug
-the ExecuTorch code.
-
 ### lintrunner
 
 We use [`lintrunner`](https://pypi.org/project/lintrunner/) to help make sure the
@@ -259,7 +257,7 @@ toolchains, and having access to relatively modern C++ features.
 
 #### C/C++ standard library usage
 
-**Restricted usage of the C++ standard library.**
+**Restricted usage of the C++ standard library**
 
 Rationale: ExecuTorch is intended to be portable to bare-metal systems that lack
 certain features, like dynamic memory, threading, and locking, required by parts
@@ -280,7 +278,7 @@ careful to also manually destroy objects initialized in this way.
 
 #### C++ language features
 
-**Exceptions: Do not use.**
+**Exceptions: Do not use**
 - Rationale: Exceptions are not widely supported on some classes of
   microcontrollers and DSPs, and they can significantly increase binary size.
 
@@ -289,12 +287,12 @@ must work with threading**
 - Rationale: The core runtime must work on systems that do not have threading
   support.
 
-**RTTI, dynamic_cast, and `<typeid>`: Do not use.**
+**RTTI, dynamic_cast, and `<typeid>`: Do not use**
 - Rationale: RTTI adds extra data to every virtual class. ExecuTorch doesn't
   have a strong need for `dynamic_cast` and friends, so it's better to reduce
   the binary size.
 
-**Templates and template metaprogramming: Be careful and avoid if possible.**
+**Templates and template metaprogramming: Be careful and avoid if possible**
 - Rationale: Most templating results in code generation, and is one of the most
   common sources of binary bloat. Some use of templates is fine (e.g. an
   `ArrayRef<T>`, or code that handles multiple `ScalarType` types), but for the
@@ -359,7 +357,7 @@ docs](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/
 for basics.
 
 1. Push your branch to your fork of `pytorch/executorch`. Most people do not
-  have permission to push a branch directoy to the upstream repo.
+  have permission to push a branch directory to the upstream repo.
 1. Create your PR
    - Use the `main` branch as the base.
    - Give the PR a clear and descriptive title. It will become the title of the
 
@@ -49,9 +49,9 @@ Key value propositions of ExecuTorch are:
 ## Getting Started
 To get started you can:
 
-- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) on getting things running locally and deploy a model to a device
+- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) to start playing around right away
-- Jump straight into LLMs use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
 
 ## Feedback and Engagement
 
 
@@ -27,6 +27,7 @@
 from .decompose_select import DecomposeSelectPass  # noqa
 from .decompose_softmax_pass import DecomposeSoftmaxPass  # noqa
 from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass  # noqa
+from .decompose_sqrt_pass import DecomposeSqrtPass  # noqa
 from .decompose_var_pass import DecomposeVarPass  # noqa
 from .fold_qdq_with_annotated_qparams_pass import (  # noqa
     FoldAndAnnotateQParamsPass,
 
@@ -32,6 +32,7 @@
     DecomposeSelectPass,
     DecomposeSoftmaxPass,
     DecomposeSoftmaxUnstablePass,
+    DecomposeSqrtPass,
     DecomposeVarPass,
     FoldAndAnnotateQParamsPass,
     FuseBatchnorm2DPass,
@@ -115,6 +116,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         return self._transform(exported_program.graph_module)
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+        self.add_pass(DecomposeSqrtPass())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSAMI())
         self.add_pass(FuseQuantizedActivationPass())
         self.add_pass(RemoveGetItemPass())
@@ -181,6 +183,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
+        self.add_pass(DecomposeSqrtPass())
 
         if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
             # Numerically stable softmax uses amax which is not supported on Ethos-U55
 
@@ -0,0 +1,39 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+edge_sqrt_ops = (exir_ops.edge.aten.sqrt.default,)
+aten_sqrt_ops = (
+    torch.ops.aten.sqrt.default,
+    torch.ops.aten.sqrt_.default,
+)
+
+
+def get_sqrt_decomposition(op) -> tuple:
+    # TODO : "MLETORCH-863 : Replace current sqrt -> pow.Tensor_Scalar workaround with pow.Tensor_Tensor"
+    if op in edge_sqrt_ops:
+        return exir_ops.edge.aten.pow.Tensor_Scalar
+    if op in aten_sqrt_ops:
+        return torch.ops.aten.pow.Tensor_Scalar
+    raise RuntimeError(f"Can't get sqrt decomposition for op {op}")
+
+
+class DecomposeSqrtPass(ExportPass):
+
+    def call_operator(self, op, args, kwargs, meta):
+        """
+        Decomposes `sqrt(x)` into `pow(x, 0.5)` for backend support.
+        """
+
+        if op not in (edge_sqrt_ops + aten_sqrt_ops):
+            return super().call_operator(op, args, kwargs, meta)
+
+        pow_op = get_sqrt_decomposition(op)
+
+        return super().call_operator(pow_op, (args[0], 0.5), {}, meta)