pytorch
diff --git a/‎backends/arm/CMakeLists.txt‎
Lines changed: 7 additions & 6 deletions b/‎backends/arm/CMakeLists.txt‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 8 additions & 7 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 3 additions & 2 deletions b/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 4 additions & 3 deletions b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/_passes/mm_to_bmm_pass.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/arm/_passes/mm_to_bmm_pass.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/constants.py‎
Lines changed: 31 additions & 0 deletions b/‎backends/arm/constants.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 6 additions & 6 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎backends/arm/quantizer/arm_quantizer_utils.py‎
Lines changed: 2 additions & 63 deletions b/‎backends/arm/quantizer/arm_quantizer_utils.py‎
Lines changed: 2 additions & 63 deletions
@@ -14,7 +14,9 @@ endif()
 
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
-set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+    ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
 add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 
 
@@ -34,13 +36,12 @@ set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
 list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
 
 add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
-target_include_directories(
-  executorch_delegate_ethos_u PUBLIC ${_common_include_directories}
-)
-target_include_directories(
-  executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}
+target_link_libraries(
+  executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
 )
 
+install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
+
 # end config for bare metal builds
 endif()
 
 
@@ -12,7 +12,7 @@
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
 
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -62,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
         }
         for partition in matmul_partitions:
             quantized_input = all(
-                input_node.target in dq_ops for input_node in partition.input_nodes
+                input_node.target in DQ_OPS for input_node in partition.input_nodes
             )
             matmul_node = [
                 node for node in partition.nodes if node.target in matmul_targets
@@ -93,7 +93,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     graph_module.graph.erase_node(partition_input)
 
             partition_output = list(partition.output_nodes[0].users)[0]
-            quantized_output = partition_output.target in q_ops
+            quantized_output = partition_output.target in Q_OPS
             if quantized_output:
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
 
@@ -15,8 +15,9 @@
     get_param_tensor,
     is_param_node,
 )
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -109,7 +110,7 @@ def fold_and_annotate_arg(
                 return
 
             arg_quant_params = None
-            if arg.target in dq_ops:
+            if arg.target in DQ_OPS:
                 args = arg.args
                 scales = args[1]
                 if (
@@ -137,9 +138,9 @@ def fold_and_annotate_arg(
         if input_qparams is not None:
             node.meta["input_qparams"][i] = input_qparams
             for n in nodes_to_remove:
-                if n.target not in dq_ops:
+                if n.target not in DQ_OPS:
                     raise RuntimeError(
-                        f"Expected one of {dq_ops} dq_op, got {n.target}"
+                        f"Expected one of {DQ_OPS} dq_op, got {n.target}"
                     )
 
                 node.replace_input_with(n, cast(Node, n.args[0]))
@@ -154,7 +155,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             if n.op != "call_function":
                 continue
             # Don't fold chains of quant-ops into each other.
-            if n.target in (*q_ops, *dq_ops):
+            if n.target in (*Q_OPS, *DQ_OPS):
                 continue
 
             # Make sure we haven't already set qparams meta information on the node
@@ -184,7 +185,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             # Copy the users, since we are modifying it.
             users_copy = copy.copy(n.users)
             for i, user in enumerate(users_copy):
-                if user.target not in q_ops:
+                if user.target not in Q_OPS:
                     continue
 
                 # quantization node found here, store the quantization parameters in meta value
@@ -221,7 +222,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
 
             # Make sure we have a quantized operator
             user = list(n.users)[0]
-            if user.target not in q_ops:
+            if user.target not in Q_OPS:
                 continue
 
             qargs = QuantArgs.from_operator(user.target, user.args)
 
@@ -6,7 +6,8 @@
 # pyre-unsafe
 
 import torch
-from executorch.backends.arm.tosa_quant_utils import q_ops, QuantArgs
+from executorch.backends.arm.constants import Q_OPS
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import Node
@@ -21,7 +22,7 @@ def _is_fuseable_quantized_activation(node: Node):
             min_val = node.args[1]
             is_fuseable = min_val == 0
 
-        is_quantized = len(node.users) == 1 and next(iter(node.users)).target in q_ops
+        is_quantized = len(node.users) == 1 and next(iter(node.users)).target in Q_OPS
         if is_fuseable and is_quantized:
             quant_node = next(iter(node.users))
             quant_args = QuantArgs.from_operator(quant_node.target, quant_node.args)
 
@@ -9,7 +9,8 @@
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch import Tensor
 from torch.fx import GraphModule, Node
@@ -94,11 +95,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
         for node in graph_module.graph.nodes:
             node = cast(Node, node)
 
-            if node.target not in dq_ops:
+            if node.target not in DQ_OPS:
                 continue
             # Copy users since we remove them while iterating, modyfing the node.users list.
             for user in copy(node.users):
-                if user.target in q_ops:
+                if user.target in Q_OPS:
                     self.fold_dq_q_to_rescale(node, user, graph_module)
                     modified = True
             if len(node.users) == 0:
 
@@ -12,7 +12,7 @@
     get_first_fake_tensor,
     insert_q_dq_pair,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import Node
@@ -56,7 +56,7 @@ def call(self, graph_module: torch.fx.GraphModule):
                     node.replace_input_with(input_node, unsqueeze_before)
 
                 # If Quantized we must insert unsqueeze --> q --> dq --> node
-                if input_node.target in dq_ops:
+                if input_node.target in DQ_OPS:
                     q_params = input_node.args[1:]
                     insert_q_dq_pair(graph, unsqueeze_before, q_params, from_node=node)
 
@@ -89,7 +89,7 @@ def call(self, graph_module: torch.fx.GraphModule):
                     user.replace_input_with(bmm_node, squeeze_after)
 
             # If quantized, insert mm --> q --> dq --> squeeze
-            if all(original_user.target in q_ops for original_user in original_users):
+            if all(original_user.target in Q_OPS for original_user in original_users):
                 q_params = original_users[0].args[1:]
                 insert_q_dq_pair(graph, bmm_node, q_params, from_node=node)
 
 
@@ -0,0 +1,31 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, cast, Final
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+exir_ops = cast(Any, exir_ops)
+
+qd = exir_ops.edge.quantized_decomposed
+
+QUANT_PER_TENSOR_OP: Final = qd.quantize_per_tensor.default
+QUANT_PER_TENSOR_OP_T: Final = qd.quantize_per_tensor.tensor
+QUANT_PER_CHANNEL_OP: Final = qd.quantize_per_channel.default
+
+DEQUANT_PER_TENSOR_OP: Final = qd.dequantize_per_tensor.default
+DEQUANT_PER_TENSOR_OP_T: Final = qd.dequantize_per_tensor.tensor
+DEQUANT_PER_CHANNEL_OP: Final = qd.dequantize_per_channel.default
+
+Q_OPS: Final = (QUANT_PER_TENSOR_OP, QUANT_PER_TENSOR_OP_T, QUANT_PER_CHANNEL_OP)
+DQ_OPS: Final = (DEQUANT_PER_TENSOR_OP, DEQUANT_PER_TENSOR_OP_T, DEQUANT_PER_CHANNEL_OP)
+
+PER_TENSOR_QDQ_OPS: Final = (
+    QUANT_PER_TENSOR_OP,
+    QUANT_PER_TENSOR_OP_T,
+    DEQUANT_PER_TENSOR_OP,
+    DEQUANT_PER_TENSOR_OP_T,
+)
+PER_CHANNEL_QDQ_OPS: Final = (QUANT_PER_CHANNEL_OP, DEQUANT_PER_CHANNEL_OP)
@@ -19,13 +19,13 @@
     FuseQuantizedActivationPass,
 )
 from executorch.backends.arm._passes.insert_table_ops import TableOps
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 from executorch.backends.arm.operator_support.ethos_u55_support import (
     EthosU55DtypeSupport,
     EthosU55NotSupported,
     EthosU55TransposeCheck,
     EthosU55ViewCheck,
 )
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
 from executorch.backends.arm.tosa_specification import TosaSpecification
 from executorch.exir import ExportedProgram
 from executorch.exir.backend.utils import WhyNoPartitionReporter
@@ -368,7 +368,7 @@ def _is_matmul_node_supported(
                     matched_partition = partition
             if matched_partition is not None:
                 input_quantized = all(
-                    input_node.target in dq_ops
+                    input_node.target in DQ_OPS
                     for input_node in matched_partition.input_nodes
                 )
                 if not input_quantized:
@@ -377,7 +377,7 @@ def _is_matmul_node_supported(
                     )
                     return False
                 output_quantized = all(
-                    output_node_user.target in q_ops
+                    output_node_user.target in Q_OPS
                     for output_node_user in matched_partition.output_nodes[0].users
                 )
                 if not output_quantized:
@@ -413,7 +413,7 @@ def is_node_supported(
             users = node.users
             output_quantized = all(
                 user.target == operator.getitem
-                and all(user_user.target in q_ops for user_user in user.users)
+                and all(user_user.target in Q_OPS for user_user in user.users)
                 for user in users
             )
         elif FuseQuantizedActivationPass._is_fuseable_input(node):
@@ -427,7 +427,7 @@ def is_node_supported(
             input_quantized = FuseQuantizedActivationPass._is_fuseable_input(input_node)
 
         input_quantized = input_quantized or all(
-            (input_node.target in dq_ops)
+            (input_node.target in DQ_OPS)
             or (not get_first_fake_tensor(input_node).dtype.is_floating_point)
             for input_node in node.all_input_nodes
         )
@@ -436,7 +436,7 @@ def is_node_supported(
             self.reporter.report_reject(node, "One or more inputs were not quantized.")
             return False
 
-        all_q_users = all((output_node.target in q_ops) for output_node in node.users)
+        all_q_users = all((output_node.target in Q_OPS) for output_node in node.users)
         is_floating_point = get_first_fake_tensor(node).dtype.is_floating_point
         output_quantized = output_quantized or all_q_users or not is_floating_point
 
 
@@ -11,11 +11,9 @@
 # Utility functions for TOSAQuantizer
 #
 
-from typing import cast, Sequence
+from typing import cast
 
-import torch
-from torch._subclasses import FakeTensor
-from torch.fx import GraphModule, Node
+from torch.fx import Node
 
 from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
 from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
@@ -45,62 +43,3 @@ def mark_node_as_annotated(node: Node) -> None:
     if Q_ANNOTATION_KEY not in node.meta:
         node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation()
     node.meta[Q_ANNOTATION_KEY]._annotated = True
-
-
-def is_ok_for_quantization(node: Node, gm: GraphModule):
-    """Check if an node can be quantized. The node can not be quantized if:
-    - The node does not output a float tensor or,
-    - The node outputs a large scalar.
-    """
-    return not (is_non_float_tensor(node) or is_large_scalar(node, gm))
-
-
-def get_node_target(module: torch.nn.Module | GraphModule, target_str: str):
-    targets = target_str.split(".")
-    for target in targets[:-1]:
-        module = module.get_submodule(target)
-    return getattr(module, targets[-1])
-
-
-def is_large_scalar(node: Node, gm: GraphModule):
-    """Check if input is a large scalar value. So that we can skip quantization for the node
-    since histc op (in HistogramObserver) only works for values up to certain upper bound
-    """
-    if node.op == "get_attr" and isinstance(node.target, str):
-        tensor = get_node_target(gm, node.target)
-        # torch.histc works until this upper bound
-        HISTC_UPPER_BOUND = 3.4028235e15
-        return tensor.numel() == 1 and abs(tensor.item()) > HISTC_UPPER_BOUND
-    return False
-
-
-def is_non_float_tensor(node: Node) -> bool:
-    """Check if the output of a node has a data type other than `torch.float32`.
-
-    If the output is not `torch.float32`, quantization cannot be performed, as
-    observers only work with floating-point tensors.
-
-    Args:
-        node (Node): The node to check the output(s) for.
-
-    Returns:
-        bool: `True` if the data type is not float32, otherwise `False`.
-
-    Note:
-        - If `node.meta["val"]` is a `list`, the function returns `True` if **any**
-          element is **not** an instance of `FakeTensor` or does **not** have
-          `torch.float32` as its data type.
-        - If node.meta["val"] is missing or is not an instance of `FakeTensor`, the
-          function returns True.
-    """
-    if "val" in node.meta and isinstance(node.meta["val"], Sequence):
-        return any(
-            not isinstance(fake_tensor, FakeTensor)
-            or fake_tensor.dtype != torch.float32
-            for fake_tensor in node.meta["val"]
-        )
-
-    if "val" not in node.meta or not isinstance(node.meta["val"], FakeTensor):
-        return True
-
-    return node.meta["val"].dtype != torch.float32