pytorch
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build-presets.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 24 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 20 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 2 additions & 0 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 5 additions & 4 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 25 additions & 2 deletions b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎backends/apple/coreml/logging.py‎
Lines changed: 24 additions & 0 deletions b/‎backends/apple/coreml/logging.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 3 additions & 1 deletion b/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/arm/README.md‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/TARGETS‎
Lines changed: 12 additions & 2 deletions b/‎backends/arm/TARGETS‎
Lines changed: 12 additions & 2 deletions
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        preset: [macos, ios, ios-simulator, pybind, llm]
+        preset: [macos, ios, ios-simulator, pybind, profiling, llm]
     with:
       job-name: build
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
 
@@ -278,6 +278,30 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
   )
 endif()
 
+if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
+  set(TORCHAO_BUILD_ATEN_OPS OFF)
+  set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
+  set(TORCHAO_BUILD_CPU_AARCH64 ON)
+  set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
+
+  list(APPEND TORCHAO_INCLUDE_DIRS
+    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
+    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
+    ${EXECUTORCH_ROOT}/third-party/ao
+  )
+
+  set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
+
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental)
+  executorch_target_link_options_shared_lib(torchao_ops_executorch)
+  list(APPEND _executorch_kernels torchao_ops_executorch)
+endif()
+
+if(EXECUTORCH_BUILD_TESTS)
+  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+  include(CTest)
+endif()
+
 # TODO(dbort): Fix these warnings and remove this flag.
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
 
@@ -100,6 +100,26 @@
             "list": ["Darwin", "Linux", "Windows"]
         }
     },
+    {
+        "name": "profiling",
+        "displayName": "Build ExecuTorch with Profiling Enabled",
+        "inherits": [
+            "common"
+        ],
+        "cacheVariables": {
+            "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/profiling.cmake",
+            "CMAKE_OSX_DEPLOYMENT_TARGET": "12.0"
+        },
+        "condition": {
+            "type": "inList",
+            "string": "${hostSystemName}",
+            "list": [
+                "Darwin",
+                "Linux",
+                "Windows"
+            ]
+        }
+    },
     {
         "name": "zephyr",
         "displayName": "Build ExecuTorch for Zephyr RTOS",
 
@@ -17,6 +17,7 @@ runtime.python_library(
     name = "backend",
     srcs = glob([
         "compiler/*.py",
+        "logging.py",
     ]),
     visibility = [
         "@EXECUTORCH_CLIENTS",
@@ -33,6 +34,7 @@ runtime.python_library(
     name = "partitioner",
     srcs = glob([
         "partition/*.py",
+        "logging.py",
     ]),
     visibility = [
         "@EXECUTORCH_CLIENTS",
 
@@ -16,20 +16,20 @@
 
 import coremltools as ct
 import coremltools.optimize as cto
-
 from executorch.backends.apple.coreml import executorchcoreml
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
 from executorch.exir.backend.backend_details import (
     BackendDetails,
     ExportedProgram,
     PreprocessResult,
 )
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
-
 from executorch.backends.apple.coreml.compiler.torch_ops import *  # noqa: F401, F403
 
+logger = logging.getLogger(__name__)
+logger.setLevel(get_coreml_log_level(default_level=logging.WARNING))
+
 
 class COMPILE_SPEC_KEYS(Enum):
     COMPUTE_UNITS = "compute_units"
@@ -409,6 +409,7 @@ def preprocess(
         edge_program: ExportedProgram,
         compile_specs: List[CompileSpec],
     ) -> PreprocessResult:
+        logger.info(f"Edge program: {edge_program}")
         model_type: CoreMLBackend.MODEL_TYPE = (
             CoreMLBackend.model_type_from_compile_specs(
                 compile_specs,
 
@@ -9,13 +9,15 @@
 # the op to the coremltools library.
 
 import torch as _torch
-from coremltools import _logger as logger
+from coremltools import _logger
 from coremltools.converters.mil.frontend import _utils
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
+    _get_kwinputs,
     NUM_TO_NUMPY_DTYPE,
     NUM_TO_TORCH_DTYPE,
     split,
+    to,
     transpose,
     unbind,
 )
@@ -24,6 +26,7 @@
     register_torch_op,
 )
 from coremltools.converters.mil.mil import types
+from executorch.exir.dim_order_utils import get_memory_format
 
 
 # https://github.com/apple/coremltools/pull/2556
@@ -44,6 +47,26 @@ def split_copy(context, node):
     split(context, node)
 
 
+@register_torch_op(
+    torch_alias=[
+        "dim_order_ops::_to_dim_order_copy",
+        "dim_order_ops._to_dim_order_copy",
+    ],
+    override=False,
+)
+def _to_dim_order_copy(context, node):
+    dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
+    node.kwinputs.pop("dim_order")
+
+    # In CoreML, dim_order.val will be an ndarray, so we convert it to a list
+    dim_order = [int(d) for d in dim_order.val]
+    memory_format = get_memory_format(dim_order)
+    assert (
+        memory_format == _torch.contiguous_format
+    ), "Only contiguous memory format is supported in CoreML"
+    to(context, node)
+
+
 # https://github.com/apple/coremltools/pull/2558
 @register_torch_op(
     torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
@@ -88,7 +111,7 @@ def dequantize_affine(context, node):
     out_np_dtype = None
     if len(inputs) > 7:
         out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
-        logger.warning(
+        _logger.warning(
             f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
         )
 
 
@@ -0,0 +1,24 @@
+# Copyright © 2023 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import logging
+import os
+from typing import Optional
+
+
+def get_coreml_log_level(default_level: int) -> Optional[str]:
+    level_str = os.environ.get("ET_COREML_LOG_LEVEL", "").upper()
+    if level_str == "":
+        return default_level
+
+    level_map = {
+        "DEBUG": logging.DEBUG,
+        "INFO": logging.INFO,
+        "WARNING": logging.WARNING,
+        "ERROR": logging.ERROR,
+        "CRITICAL": logging.CRITICAL,
+    }
+    if level_str not in level_map:
+        raise ValueError(f"Invalid ET_COREML_LOG_LEVEL: {level_str}")
+    return level_map[level_str]
@@ -10,6 +10,8 @@
 import torch
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
+
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 from executorch.exir.backend.partitioner import (
@@ -23,7 +25,7 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
+logger.setLevel(get_coreml_log_level(default_level=logging.INFO))
 
 
 def _is_view_op(op: torch._ops.OpOverload) -> bool:
 
@@ -181,8 +181,8 @@ The Arm EthosU Backend should be considered a prototype quality at this point, l
 ## Current flows
 
 The EthosUBackend has a two stage process,
-- Compile to TOSA to rationalise the graph into known hardware support profiles. Currently this is to v0.80 TOSA BI with specific concern to a subset which gives support on Ethos-U55 and Ethos-U85, the target of the initial prototype efforts. This calls into the TOSABackend.
-- Lower via the ethos-u-vela compilation flow which takes TOSA v0.80 as an input and produces a low level commandstream for the hardware which is then passed via the delegate to the ethos-u-core-driver for direct execution.
+- Compile to TOSA to rationalise the graph into known hardware support profiles. Currently this is to v1.0 TOSA INT with specific concern to a subset which gives support on Ethos-U55 and Ethos-U85, the target of the initial prototype efforts. This calls into the TOSABackend.
+- Lower via the ethos-u-vela compilation flow which takes TOSA v1.0 as an input and produces a low level commandstream for the hardware which is then passed via the delegate to the ethos-u-core-driver for direct execution.
 
 The EthosUPartitioner is currenly used to ensure the operations converted are Ethos-U compatible, but will be extended to offer spec-correct TOSA Base inference and TOSA Main Inference generation in future.
 
 
@@ -1,10 +1,20 @@
 # @noautodeps
 load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+
+python_library(
+    name = "ethosu_partitioner",
+    srcs = [
+        "ethosu/__init__.py",
+        "ethosu/backend.py",
+        "ethosu/partitioner.py"
+    ],
+    deps = [
+        ":arm_partitioner",
+    ]
+)
 python_library(
     name = "arm_partitioner",
     srcs = [
-        "ethosu_backend.py",
-        "ethosu_partitioner.py",
         "tosa_backend.py",
         "tosa_partitioner.py",
         "vgf_backend.py",