pytorch
diff --git a/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions b/‎.ci/scripts/test_huggingface_optimum_model.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 4 additions & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 1 deletion b/‎.github/workflows/trunk.yml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/delegate/executorch_operations.h‎
Lines changed: 5 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/executorch_operations.h‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/executorch_operations.mm‎
Lines changed: 29 additions & 0 deletions b/‎backends/apple/coreml/runtime/delegate/executorch_operations.mm‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎backends/arm/CMakeLists.txt‎
Lines changed: 7 additions & 6 deletions b/‎backends/arm/CMakeLists.txt‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 8 additions & 7 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 8 additions & 7 deletions
diff --git a/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 3 additions & 2 deletions b/‎backends/arm/_passes/fuse_quantized_activation_pass.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 4 additions & 3 deletions b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 4 additions & 3 deletions
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
 
     assert torch.allclose(
         eager_output.logits, et_output, atol=1e-02, rtol=1e-02
-    ), "CoreML output does not match eager"
+    ), "Model output does not match eager"
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--model", type=str, required=True)
     parser.add_argument("--recipe", type=str, required=True)
     parser.add_argument("--quantize", action="store_true", help="Enable quantization")
+    parser.add_argument(
+        "--model_dir",
+        type=str,
+        required=False,
+        help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
+    )
     args = parser.parse_args()
 
     model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
             f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
         )
 
+    model_id, test_fn = model_to_model_id_and_test_function[args.model]
     with tempfile.TemporaryDirectory() as tmp_dir:
-        model_id, test_fn = model_to_model_id_and_test_function[args.model]
         test_fn(
             model_id=model_id,
-            model_dir=tmp_dir,
+            model_dir=tmp_dir if args.model_dir is None else args.model_dir,
             recipe=args.recipe,
             quantize=args.quantize,
         )
@@ -199,6 +199,9 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=albert
   elif [[ "${MODEL_NAME}" == "bert" ]]; then
     EXPORT_SCRIPT=bert
+  elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
+    EXPORT_SCRIPT=conv_former
+    EXTRA_FLAGS="--dataset imagenet-mini/val"
   elif [[ "${MODEL_NAME}" == "cvt" ]]; then
     EXPORT_SCRIPT=cvt
   elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +241,7 @@ test_model_with_qnn() {
     "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
         SCRIPT_FOLDER=oss_scripts
         ;;
-    "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
+    "albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
         pip install evaluate
         SCRIPT_FOLDER=oss_scripts
         # 16bit models will encounter op validation fail on some operations,
 
@@ -568,7 +568,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
+        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -815,6 +815,9 @@ jobs:
           smollm|coreml_fp32_gpu|--quantize,
           llama3|coreml_fp32_gpu|--quantize,
           olmo|coreml_fp32_gpu|--quantize,
+          # roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
+          bert|coreml_fp32_gpu|--quantize,
+          distilbert|coreml_fp32_gpu|--quantize,
         ]
       fail-fast: false
     with:
 
@@ -0,0 +1,5 @@
+#pragma once
+
+namespace executorch::core_ml_backend_delegate {
+void register_backend_coreml();
+} // namespace executorch::core_ml_backend_delegate
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "executorch_operations.h"
+#import <coreml_backend/delegate.h>
+#import "ETCoreMLStrings.h"
+#import "backend_delegate.h"
+
+#import <executorch/runtime/core/evalue.h>
+#import <executorch/runtime/platform/log.h>
+#import <executorch/runtime/backend/interface.h>
+
+#include <array>
+#import <memory>
+
+namespace executorch::core_ml_backend_delegate {
+  using executorch::runtime::get_backend_class;
+
+static std::unique_ptr<executorch::backends::coreml::CoreMLBackendDelegate> backendInterfaceLazy_;
+
+void register_backend_coreml() {
+    auto backendInterface = executorch::runtime::get_backend_class(ETCoreMLStrings.delegateIdentifier.UTF8String);
+    if (backendInterface == nullptr) {
+      backendInterfaceLazy_ = std::make_unique<executorch::backends::coreml::CoreMLBackendDelegate>();
+      executorch::runtime::Backend backend{ETCoreMLStrings.delegateIdentifier.UTF8String, backendInterfaceLazy_.get()};
+      std::ignore = register_backend(backend);
+    }
+  }
+
+} // namespace executorch::core_ml_backend_delegate
@@ -14,7 +14,9 @@ endif()
 
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
-set(_common_include_directories ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
+set(_common_include_directories
+    ${EXECUTORCH_ROOT}/.. ${EXECUTORCH_ROOT}/runtime/core/portable_type/c10
+)
 add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 
 
@@ -34,13 +36,12 @@ set(_arm_baremetal_sources backends/arm/runtime/EthosUBackend.cpp
 list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
 
 add_library(executorch_delegate_ethos_u STATIC ${_arm_baremetal_sources})
-target_include_directories(
-  executorch_delegate_ethos_u PUBLIC ${_common_include_directories}
-)
-target_include_directories(
-  executorch_delegate_ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}
+target_link_libraries(
+  executorch_delegate_ethos_u PUBLIC executorch_core ethosu_core_driver
 )
 
+install(TARGETS executorch_delegate_ethos_u EXPORT ExecuTorchTargets)
+
 # end config for bare metal builds
 endif()
 
 
@@ -12,7 +12,7 @@
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
 
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -62,7 +62,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
         }
         for partition in matmul_partitions:
             quantized_input = all(
-                input_node.target in dq_ops for input_node in partition.input_nodes
+                input_node.target in DQ_OPS for input_node in partition.input_nodes
             )
             matmul_node = [
                 node for node in partition.nodes if node.target in matmul_targets
@@ -93,7 +93,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
                     graph_module.graph.erase_node(partition_input)
 
             partition_output = list(partition.output_nodes[0].users)[0]
-            quantized_output = partition_output.target in q_ops
+            quantized_output = partition_output.target in Q_OPS
             if quantized_output:
                 with graph_module.graph.inserting_after(matmul_node):
                     # Create q-node after matmul
 
@@ -15,8 +15,9 @@
     get_param_tensor,
     is_param_node,
 )
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
 
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.dialects.edge._ops import EdgeOpOverload
@@ -109,7 +110,7 @@ def fold_and_annotate_arg(
                 return
 
             arg_quant_params = None
-            if arg.target in dq_ops:
+            if arg.target in DQ_OPS:
                 args = arg.args
                 scales = args[1]
                 if (
@@ -137,9 +138,9 @@ def fold_and_annotate_arg(
         if input_qparams is not None:
             node.meta["input_qparams"][i] = input_qparams
             for n in nodes_to_remove:
-                if n.target not in dq_ops:
+                if n.target not in DQ_OPS:
                     raise RuntimeError(
-                        f"Expected one of {dq_ops} dq_op, got {n.target}"
+                        f"Expected one of {DQ_OPS} dq_op, got {n.target}"
                     )
 
                 node.replace_input_with(n, cast(Node, n.args[0]))
@@ -154,7 +155,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             if n.op != "call_function":
                 continue
             # Don't fold chains of quant-ops into each other.
-            if n.target in (*q_ops, *dq_ops):
+            if n.target in (*Q_OPS, *DQ_OPS):
                 continue
 
             # Make sure we haven't already set qparams meta information on the node
@@ -184,7 +185,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
             # Copy the users, since we are modifying it.
             users_copy = copy.copy(n.users)
             for i, user in enumerate(users_copy):
-                if user.target not in q_ops:
+                if user.target not in Q_OPS:
                     continue
 
                 # quantization node found here, store the quantization parameters in meta value
@@ -221,7 +222,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
 
             # Make sure we have a quantized operator
             user = list(n.users)[0]
-            if user.target not in q_ops:
+            if user.target not in Q_OPS:
                 continue
 
             qargs = QuantArgs.from_operator(user.target, user.args)
 
@@ -6,7 +6,8 @@
 # pyre-unsafe
 
 import torch
-from executorch.backends.arm.tosa_quant_utils import q_ops, QuantArgs
+from executorch.backends.arm.constants import Q_OPS
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch.fx import Node
@@ -21,7 +22,7 @@ def _is_fuseable_quantized_activation(node: Node):
             min_val = node.args[1]
             is_fuseable = min_val == 0
 
-        is_quantized = len(node.users) == 1 and next(iter(node.users)).target in q_ops
+        is_quantized = len(node.users) == 1 and next(iter(node.users)).target in Q_OPS
         if is_fuseable and is_quantized:
             quant_node = next(iter(node.users))
             quant_args = QuantArgs.from_operator(quant_node.target, quant_node.args)
 
@@ -9,7 +9,8 @@
 
 import torch
 from executorch.backends.arm._passes.arm_pass_utils import create_node
-from executorch.backends.arm.tosa_quant_utils import dq_ops, q_ops, QuantArgs
+from executorch.backends.arm.constants import DQ_OPS, Q_OPS
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.pass_base import ExportPass, PassResult
 from torch import Tensor
 from torch.fx import GraphModule, Node
@@ -94,11 +95,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
         for node in graph_module.graph.nodes:
             node = cast(Node, node)
 
-            if node.target not in dq_ops:
+            if node.target not in DQ_OPS:
                 continue
             # Copy users since we remove them while iterating, modyfing the node.users list.
             for user in copy(node.users):
-                if user.target in q_ops:
+                if user.target in Q_OPS:
                     self.fold_dq_q_to_rescale(node, user, graph_module)
                     modified = True
             if len(node.users) == 0: