pytorch
diff --git a/‎.ci/scripts/zephyr-utils.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/zephyr-utils.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakePresets.json‎
Lines changed: 30 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py‎
Lines changed: 219 additions & 0 deletions b/‎backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py‎
Lines changed: 219 additions & 0 deletions
diff --git a/‎backends/nxp/edge_passes/neutron_edge_pass.py‎
Lines changed: 55 additions & 0 deletions b/‎backends/nxp/edge_passes/neutron_edge_pass.py‎
Lines changed: 55 additions & 0 deletions
@@ -6,9 +6,9 @@
 # LICENSE file in the root directory of this source tree.
 
 download_arm_zephyr_sdk () {
-    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz
-    tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz
-    rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz
+    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+    tar -xf zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+    rm -f zephyr-sdk-0.17.2_linux-x86_64.tar.xz
 }
 
 setup_zephyr_et_module () {
 
@@ -92,7 +92,7 @@ jobs:
 
         # TODO @Bujji: Should see if this can be moved into the docker image itself
         download_arm_zephyr_sdk
-        ./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
+        ./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
         cd $ZEPHYR_PROJ_ROOT
         setup_zephyr_et_module
 
 
@@ -6,6 +6,36 @@
       "hidden": true,
       "binaryDir": "${sourceDir}/cmake-out"
     },
+    {
+      "name": "android-arm64-v8a",
+      "displayName": "Build executorch core and JNI bindings on android arm64-v8a",
+      "inherits": ["common"],
+      "binaryDir": "${sourceDir}/cmake-out-android-arm64-v8a",
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+        "ANDROID_ABI": "arm64-v8a"
+      },
+      "condition": {
+        "type": "inList",
+        "string": "${hostSystemName}",
+        "list": ["Darwin", "Linux", "Windows"]
+      }
+    },
+    {
+      "name": "android-x86_64",
+      "displayName": "Build executorch core and JNI bindings on android x86_64",
+      "inherits": ["common"],
+      "binaryDir": "${sourceDir}/cmake-out-android-x86_64",
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+        "ANDROID_ABI": "x86_64"
+      },
+      "condition": {
+        "type": "inList",
+        "string": "${hostSystemName}",
+        "list": ["Darwin", "Linux", "Windows"]
+      }
+    },
     {
       "name": "macos",
       "displayName": "Build ExecuTorch for macOS",
 
@@ -0,0 +1,219 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass
+from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.fx import Node
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+def insert_qdq_pair_after_node(
+    graph: torch.fx.Graph, anchor: torch.fx.Node, q_params: tuple
+):
+    # Insert a Quantize node.
+    with graph.inserting_after(anchor):
+        quantize_op = graph.create_node(
+            op="call_function",
+            target=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+            args=(),  # Will be added later.
+        )
+        quantize_op.meta = anchor.meta
+
+    # Insert a Dequantize node.
+    with graph.inserting_after(quantize_op):
+        dequantize_op = graph.create_node(
+            op="call_function",
+            target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            args=(quantize_op,) + q_params,
+        )
+        dequantize_op.meta = quantize_op.meta
+    anchor.replace_all_uses_with(dequantize_op)
+
+    # Add this at the end, so the `anchor.replace_all_uses_with(dequantize_op)` does not replace the first use of the
+    #  `quantize_op`.
+    quantize_op.args = (anchor,) + q_params
+
+
+def _is_dequantize(node_: Node) -> bool:
+    return (
+        node_.op == "call_function"
+        and node_.target
+        == exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+    )
+
+
+def _is_quantize(node_: Node) -> bool:
+    return (
+        node_.op == "call_function"
+        and node_.target
+        == exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+    )
+
+
+class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
+    """
+                                                           │
+                                                     ┌─────▼──────┐
+                │                                    │ dequantize │
+          ┌─────▼──────┐                             └─────┬──────┘
+          │ dequantize │                             ┌─────▼──────┐
+          └─────┬──────┘                             │ <aux_node> │
+          ┌─────▼──────┐                             └─────┬──────┘
+          │ <aux_node> │                              ┌────▼─────┐            ┐
+          └─────┬──────┘                              │ quantize │            │
+     ┌──────────▼──────────┐       replaced with      └────┬─────┘            │
+    ⋯┤ <main_cluster_node> ├⋯     ──────────────►          │                  │ newly added nodes
+     └──────────┬──────────┘                         ┌─────▼──────┐           │
+                ▼                                    │ dequantize │           │
+                ⋮                                    └─────┬──────┘           ┘
+           ┌────▼─────┐                         ┌──────────▼──────────┐
+           │ quantize │                        ⋯┤ <main_cluster_node> ├⋯
+           └────┬─────┘                         └──────────┬──────────┘
+                ▼                                          ▼
+                                                           ⋮
+                                                      ┌────▼─────┐
+                                                      │ quantize │
+                                                      └────┬─────┘
+                                                           ▼
+    """
+
+    allowed_auxiliary_nodes = [exir_ops.edge.aten.view_copy.default]
+
+    # List of approved nodes to which the <aux_node> can be connected in order for the pass to make the modification.
+    allowed_main_cluster_nodes = [
+        exir_ops.edge.aten.addmm.default,
+        exir_ops.edge.aten.mm.default,
+    ]
+
+    def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        for aux_node in graph_module.graph.nodes:
+            if (
+                aux_node.op != "call_function"
+                or aux_node.target not in self.allowed_auxiliary_nodes
+            ):
+                continue
+
+            dequantize_node = aux_node.args[0]
+            if not _is_dequantize(dequantize_node):
+                # Not the intended use case.
+                continue
+
+            users = list(aux_node.users.keys())
+            if len(users) != 1:
+                # Not the intended use case.
+                continue
+
+            main_cluster_node = users[0]
+            if (
+                main_cluster_node.op != "call_function"
+                or main_cluster_node.target not in self.allowed_main_cluster_nodes
+            ):
+                # Unsupported `main_cluster_node`.
+                continue
+
+            # Make sure the nodes are part of the same QDQ cluster.
+            cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node)
+            if any(
+                node_ not in cluster
+                for node_ in [dequantize_node, aux_node, main_cluster_node]
+            ):
+                continue
+
+            # ---- The nodes follow the pattern described in the header. ----
+
+            q_params = dequantize_node.args[1:]
+            insert_qdq_pair_after_node(graph_module.graph, aux_node, q_params)
+
+            # The graph has now changed, and we shouldn't keep iterating through it. Return the new graph and the parent
+            #  class will call this pass again.
+            return PassResult(graph_module, True)
+
+        # Nothing was changed.
+        return PassResult(graph_module, False)
+
+
+class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
+    """
+                                                            │
+                                                      ┌─────▼──────┐
+                │                                     │ dequantize │
+          ┌─────▼──────┐                              └─────┬──────┘
+          │ dequantize │                                    ⋮
+          └─────┬──────┘                         ┌──────────▼──────────┐
+                ▼                               ⋯┤ <main_cluster_node> ├⋯
+                ⋮                                └──────────┬──────────┘
+     ┌──────────▼──────────┐       replaced with       ┌────▼─────┐            ┐
+    ⋯┤ <main_cluster_node> ├⋯     ──────────────►      │ quantize │            │
+     └──────────┬──────────┘                           └────┬─────┘            │
+          ┌─────▼──────┐                                    │                  │ newly added nodes
+          │ <aux_node> │                              ┌─────▼──────┐           │
+          └─────┬──────┘                              │ dequantize │           │
+           ┌────▼─────┐                               └─────┬──────┘           ┘
+           │ quantize │                               ┌─────▼──────┐
+           └────┬─────┘                               │ <aux_node> │
+                ▼                                     └─────┬──────┘
+                                                       ┌────▼─────┐
+                                                       │ quantize │
+                                                       └────┬─────┘
+                                                            ▼
+    """
+
+    allowed_auxiliary_nodes = [exir_ops.edge.aten.view_copy.default]
+
+    # List of approved nodes to which the `<aux_node>` can be connected in order for the pass to make the modification.
+    allowed_main_cluster_nodes = [
+        exir_ops.edge.aten.addmm.default,
+        exir_ops.edge.aten.mm.default,
+    ]
+
+    def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
+
+        for aux_node in graph_module.graph.nodes:
+            if (
+                aux_node.op != "call_function"
+                or aux_node.target not in self.allowed_auxiliary_nodes
+            ):
+                continue
+
+            main_cluster_node = aux_node.args[0]
+            if (
+                main_cluster_node.op != "call_function"
+                or main_cluster_node.target not in self.allowed_main_cluster_nodes
+            ):
+                # Unsupported `main_cluster_node`.
+                continue
+
+            users = list(aux_node.users.keys())
+            if len(users) != 1:
+                # Not the intended use case.
+                continue
+
+            quantize_node = users[0]
+            if not _is_quantize(quantize_node):
+                # Not the intended use case.
+                continue
+
+            # Make sure the nodes are part of the same QDQ cluster.
+            cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node)
+            if any(
+                node_ not in cluster
+                for node_ in [quantize_node, aux_node, main_cluster_node]
+            ):
+                continue
+
+            # ---- The nodes follow the pattern described in the header. ----
+
+            q_params = quantize_node.args[1:]
+            insert_qdq_pair_after_node(graph_module.graph, main_cluster_node, q_params)
+
+            # The graph has now changed, and we shouldn't keep iterating through it. Return the new graph and the parent
+            #  class will call this pass again.
+            return PassResult(graph_module, True)
+
+        # Nothing was changed.
+        return PassResult(graph_module, False)
@@ -0,0 +1,55 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from abc import abstractmethod
+
+import torch
+
+from executorch.exir.pass_base import ExportPass
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+class NeutronEdgePass(ExportPass):
+    """Abstract parent class for pre-processing passes on the edge dialect level."""
+
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        """Call `self.run()` as long as changes are being made. After a pass modifies the graph, it cannot keep on
+        iterating through its nodes, and must return. This method allows the pass to go through the whole model.
+        """
+
+        # Every pass will return once it makes a change to the graph, to avoid traversing and modifying a graph at the
+        #  same time. Therefore, it must be called multiple times (at most `iteration_limit` times).
+        iteration_limit = len(graph_module.graph.nodes)
+        modified = False
+        for _ in range(iteration_limit):
+            res = self.run(graph_module)
+            if res.modified:
+                modified = True
+                graph_module = res.graph_module
+
+            else:
+                # No more changes have been made.
+                graph_module = self.recompile_module(graph_module)
+                return PassResult(graph_module, modified)
+
+        # Iteration limit was reached.
+        logging.warning(
+            f"The NeutronEdgePass `{self.__class__.__name__}` reached the iteration limit."
+        )
+        graph_module = self.recompile_module(graph_module)
+        return PassResult(graph_module, modified)
+
+    @abstractmethod
+    def run(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        """Child classes should implement their graph modification here."""
+        pass
+
+    def recompile_module(
+        self, graph_module: torch.fx.GraphModule
+    ) -> torch.fx.GraphModule:
+        """Recompile the graph and re-trace the metadata. This should ensure that the datatypes and shapes are correct."""
+        graph_module.recompile()
+        return super().call(graph_module).graph_module