pytorch
diff --git a/‎.buckconfig‎
Lines changed: 1 addition & 0 deletions b/‎.buckconfig‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.vscode/launch.json‎
Lines changed: 17 additions & 0 deletions b/‎.vscode/launch.json‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎.vscode/settings.json‎
Lines changed: 69 additions & 0 deletions b/‎.vscode/settings.json‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 52 additions & 30 deletions b/‎CMakePresets.json‎
Lines changed: 52 additions & 30 deletions
diff --git a/‎backends/apple/coreml/executorchcoreml.cpython-312-darwin.so‎
236 KB b/‎backends/apple/coreml/executorchcoreml.cpython-312-darwin.so‎
236 KB
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_linalg_vector_norm_pass.py‎
Lines changed: 78 additions & 0 deletions b/‎backends/arm/_passes/decompose_linalg_vector_norm_pass.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎backends/arm/operators/op_abs.py‎
Lines changed: 6 additions & 29 deletions b/‎backends/arm/operators/op_abs.py‎
Lines changed: 6 additions & 29 deletions
@@ -39,6 +39,7 @@
 
 [buck2]
 restarter=true
+file_watcher=notify
 
 [oss]
 folly_cxx_tests = False
@@ -0,0 +1,17 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug CMake project",
+            "type": "lldb", // https://github.com/vadimcn/vscode-lldb
+            "request": "launch",
+            "program": "${command:cmake.launchTargetPath}",
+            "args": [
+                "--model_path=./add.pte",
+            ]
+        }
+    ]
+}
@@ -0,0 +1,69 @@
+{
+    "files.associations": {
+        "cstdlib": "cpp",
+        "__bit_reference": "cpp",
+        "__hash_table": "cpp",
+        "__locale": "cpp",
+        "__node_handle": "cpp",
+        "__split_buffer": "cpp",
+        "__tree": "cpp",
+        "__verbose_abort": "cpp",
+        "array": "cpp",
+        "bitset": "cpp",
+        "cctype": "cpp",
+        "charconv": "cpp",
+        "clocale": "cpp",
+        "cmath": "cpp",
+        "complex": "cpp",
+        "condition_variable": "cpp",
+        "cstdarg": "cpp",
+        "cstdint": "cpp",
+        "cstdio": "cpp",
+        "cstring": "cpp",
+        "ctime": "cpp",
+        "cwchar": "cpp",
+        "cwctype": "cpp",
+        "deque": "cpp",
+        "execution": "cpp",
+        "memory": "cpp",
+        "forward_list": "cpp",
+        "future": "cpp",
+        "initializer_list": "cpp",
+        "iomanip": "cpp",
+        "ios": "cpp",
+        "iosfwd": "cpp",
+        "iostream": "cpp",
+        "istream": "cpp",
+        "limits": "cpp",
+        "list": "cpp",
+        "locale": "cpp",
+        "map": "cpp",
+        "mutex": "cpp",
+        "new": "cpp",
+        "optional": "cpp",
+        "print": "cpp",
+        "queue": "cpp",
+        "ratio": "cpp",
+        "regex": "cpp",
+        "set": "cpp",
+        "shared_mutex": "cpp",
+        "sstream": "cpp",
+        "stack": "cpp",
+        "stdexcept": "cpp",
+        "streambuf": "cpp",
+        "string": "cpp",
+        "string_view": "cpp",
+        "typeindex": "cpp",
+        "typeinfo": "cpp",
+        "unordered_map": "cpp",
+        "unordered_set": "cpp",
+        "variant": "cpp",
+        "vector": "cpp",
+        "algorithm": "cpp",
+        "iterator": "cpp",
+        "tuple": "cpp",
+        "span": "cpp"
+    },
+    "C_Cpp.default.compilerPath": "/library/developer/commandlinetools/usr/bin/c++",
+    "python.analysis.typeCheckingMode": "off"
+}
@@ -1,33 +1,55 @@
 {
-  "version": 10,
-  "cmakeMinimumRequired": {
-    "major": 3,
-    "minor": 31,
-    "patch": 0
-  },
-  "$comment": "On-device AI across mobile, embedded and edge for PyTorch.",
-  "configurePresets": [
-    {
-      "name": "common",
-      "hidden": true,
-      "binaryDir": "${sourceDir}/cmake-out",
-      "generator": "Unix Makefiles"
+    "version": 10,
+    "cmakeMinimumRequired": {
+        "major": 3,
+        "minor": 31,
+        "patch": 0
     },
-    {
-      "name": "macos-arm64",
-      "inherits": ["common"],
-      "generator": "Xcode",
-      "cacheVariables": {
-        "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
-        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos-arm64.cmake",
-        "PLATFORM": "MAC_ARM64",
-        "DEPLOYMENT_TARGET": "10.15"
-      },
-      "condition": {
-        "lhs": "${hostSystemName}",
-        "type": "equals",
-        "rhs": "Darwin"
-      }
-    }
-  ]
+    "configurePresets": [
+        {
+            "name": "common",
+            "hidden": true,
+            "binaryDir": "${sourceDir}/cmake-out",
+            "generator": "Unix Makefiles"
+        },
+        {
+            "name": "macos-arm64",
+            "inherits": [
+                "common"
+            ],
+            "generator": "Xcode",
+            "cacheVariables": {
+                "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
+                "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos-arm64.cmake",
+                "PLATFORM": "MAC_ARM64",
+                "DEPLOYMENT_TARGET": "10.15"
+            },
+            "condition": {
+                "lhs": "${hostSystemName}",
+                "type": "equals",
+                "rhs": "Darwin"
+            }
+        },
+        {
+            "name": "Executorch",
+            "displayName": "Executorch",
+            "description": "Sets Ninja generator, build and install directory",
+            "generator": "Ninja",
+            "binaryDir": "${sourceDir}/out/build/${presetName}",
+            "cacheVariables": {
+                "CMAKE_BUILD_TYPE": "Debug",
+                "CMAKE_INSTALL_PREFIX": "${sourceDir}/out/install/${presetName}",
+                "EXECUTORCH_LOG_LEVEL": "Debug",
+                "EXECUTORCH_BUILD_PORTABLE_OPS": "ON"
+            }
+        }
+    ],
+    "buildPresets": [
+        {
+            "name": "Executorch",
+            "description": "",
+            "displayName": "",
+            "configurePreset": "Executorch"
+        }
+    ]
 }
@@ -24,6 +24,7 @@
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
 from .decompose_layernorm_pass import DecomposeLayerNormPass  # noqa
 from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass  # noqa
+from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass  # noqa
 from .decompose_linear_pass import DecomposeLinearPass  # noqa
 from .decompose_meandim_pass import DecomposeMeanDimPass  # noqa
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 
@@ -29,6 +29,7 @@
     DecomposeLayerNormPass,
     DecomposeLeakyReLUPass,
     DecomposeLinearPass,
+    DecomposeLinearVectorNormPass,
     DecomposeMeanDimPass,
     DecomposeNotEqualPass,
     DecomposeSelectPass,
@@ -86,6 +87,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertSplitToSlicePass())
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(ConvertToClampPass())
@@ -133,6 +135,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(FuseBatchnorm2DPass(exported_program))
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeLeakyReLUPass())
         self.add_pass(DecomposeBatchNormPass())
         self.add_pass(DecomposeLayerNormPass())
@@ -207,6 +210,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeCosineSimilarityPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeLeakyReLUPass())
+        self.add_pass(DecomposeLinearVectorNormPass())
         self.add_pass(DecomposeSqrtPass())
         self.add_pass(DecomposeSiluPass())
 
 
@@ -0,0 +1,78 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.pass_base import ExportPass
+
+
+class DecomposeLinearVectorNormPass(ExportPass):
+    """
+    This pass decomposes aten.linalg_vector_norm.default into more primitive ops.
+    We need to add this pass before quantization for graph annotation.
+    By default, aten.linalg_vector_norm op is decomposed during legalization to Edge IR.
+
+    The decomposition is as follows:
+
+      For p == 1:
+          out = REDUCE_SUM(ABS(x), dims, keepdim)
+
+      For p == 2:
+          out = SQRT(REDUCE_SUM(MUL(x, x), dims, keepdim))
+
+      For arbitrary p:
+          We dont support arbitrary p, because our decomposition looks like
+          out = POW(REDUCE_SUM(POW(ABS(x), p), dims, keepdim), 1/p)
+          In this case we need to wrap p into Tensor and we need to know
+          dtype prior, but we dont know this from FX graph.
+    """
+
+    torch_linalg_vector_norm = (torch.ops.aten.linalg_vector_norm.default,)
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in self.torch_linalg_vector_norm:
+            return super().call_operator(op, args, kwargs, meta)
+
+        # Extract inputs and optional arguments.
+        # Expected args:
+        #   args[0]: input tensor
+        #   args[1]: norm order 'p' (optional, default: 2.0)
+        #   args[2]: dimensions to reduce (should be provided)
+        #   args[3]: keepdim flag (optional, default: False)
+        input_tensor = args[0]
+        norm_order = args[1] if len(args) > 1 else 2.0
+        norm_dim = args[2] if len(args) > 2 else None
+        keepdim = args[3] if len(args) > 3 else False
+
+        if norm_order not in (1, 2):
+            raise ValueError(
+                f"The order of {norm_order}\n"
+                f"is not supported for linalg_vector_norm operator"
+            )
+
+        if norm_dim is None:
+            raise ValueError("The norm_dim for linalg_vector_norm is None.")
+
+        dims = [norm_dim] if isinstance(norm_dim, int) else list(norm_dim)
+
+        # Decomposition based on norm order.
+        if norm_order == 1:
+            op1 = super().call_operator(
+                torch.ops.aten.abs.default, (input_tensor,), {}, meta
+            )
+            op2 = super().call_operator(
+                torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta
+            )
+            return op2
+
+        elif norm_order == 2:
+            # For p == 2, decomposition is sqrt(sum(x * x, dims, keepdim))
+            op1 = super().call_operator(
+                torch.ops.aten.mul.Tensor, (input_tensor, input_tensor), {}, meta
+            )
+            op2 = super().call_operator(
+                torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta
+            )
+            op3 = super().call_operator(torch.ops.aten.sqrt.default, (op2,), {}, meta)
+            return op3
@@ -15,6 +15,7 @@
 )
 from executorch.backends.arm.operators.operator_validation_utils import (
     validate_num_inputs,
+    validate_same_dtype,
 )
 from executorch.backends.arm.tosa_mapping import TosaArg
 from executorch.backends.arm.tosa_specification import TosaSpecification
@@ -43,13 +44,8 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        # Specification (0.80) states that input and output types
-        # should all be the same
-        if not (inputs[0].dtype == output.dtype):
-            raise ValueError(
-                "All inputs and outputs need same dtype."
-                f"Got {inputs[0].dtype=}, {output.dtype=}"
-            )
+        validate_same_dtype(self.target, [*inputs, output])
+
         # Handle int8 (quantized) and int32
         if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
             raise ValueError(
@@ -110,13 +106,7 @@ def define_node(
         import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-        # Specification (0.80) states that input and output types
-        # should all be the same
-        if not (inputs[0].dtype == output.dtype):
-            raise ValueError(
-                "All inputs and output need same dtype."
-                f"Got {inputs[0].dtype=}, {output.dtype=}"
-            )
+        validate_same_dtype(self.target, [*inputs, output])
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers
@@ -163,14 +153,8 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
+        validate_same_dtype(self.target, [*inputs, output])
 
-        # Specification (1.0) states that input and output types
-        # should all be the same
-        if not (inputs[0].dtype == output.dtype):
-            raise ValueError(
-                "All inputs and outputs need same dtype."
-                f"Got {inputs[0].dtype=}, {output.dtype=}"
-            )
         # Handle int8 (quantized) and int32
         if not (inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]):
             raise ValueError(
@@ -232,14 +216,7 @@ def define_node(
         import serializer.tosa_serializer as ts  # type: ignore
 
         validate_num_inputs(self.target, inputs, 1)
-
-        # Specification (1.0) states that input and output types
-        # should all be the same
-        if not (inputs[0].dtype == output.dtype):
-            raise ValueError(
-                "All inputs and output need same dtype."
-                f"Got {inputs[0].dtype=}, {output.dtype=}"
-            )
+        validate_same_dtype(self.target, [*inputs, output])
 
         if inputs[0].dtype in [ts.DType.INT8, ts.DType.INT32]:
             # Call the inherited define_node for handling integers