pytorch
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 2 additions & 0 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 5 additions & 4 deletions b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 25 additions & 2 deletions b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎backends/apple/coreml/logging.py‎
Lines changed: 24 additions & 0 deletions b/‎backends/apple/coreml/logging.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 3 additions & 1 deletion b/‎backends/apple/coreml/partition/coreml_partitioner.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/arm/_passes/unsqueeze_scalar_placeholders_pass.py‎
Lines changed: 18 additions & 10 deletions b/‎backends/arm/_passes/unsqueeze_scalar_placeholders_pass.py‎
Lines changed: 18 additions & 10 deletions
diff --git a/‎backends/cadence/aot/compiler_funcs.py‎
Lines changed: 3 additions & 3 deletions b/‎backends/cadence/aot/compiler_funcs.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/cadence/hifi/operators/operators.h‎
Lines changed: 40 additions & 43 deletions b/‎backends/cadence/hifi/operators/operators.h‎
Lines changed: 40 additions & 43 deletions
diff --git a/‎backends/vulkan/quantizer/TARGETS‎
Lines changed: 9 additions & 3 deletions b/‎backends/vulkan/quantizer/TARGETS‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎backends/vulkan/quantizer/vulkan_quantizer.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/quantizer/vulkan_quantizer.py‎
Lines changed: 1 addition & 1 deletion
@@ -17,6 +17,7 @@ runtime.python_library(
     name = "backend",
     srcs = glob([
         "compiler/*.py",
+        "logging.py",
     ]),
     visibility = [
         "@EXECUTORCH_CLIENTS",
@@ -33,6 +34,7 @@ runtime.python_library(
     name = "partitioner",
     srcs = glob([
         "partition/*.py",
+        "logging.py",
     ]),
     visibility = [
         "@EXECUTORCH_CLIENTS",
 
@@ -16,20 +16,20 @@
 
 import coremltools as ct
 import coremltools.optimize as cto
-
 from executorch.backends.apple.coreml import executorchcoreml
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
 from executorch.exir.backend.backend_details import (
     BackendDetails,
     ExportedProgram,
     PreprocessResult,
 )
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
-logger = logging.getLogger(__name__)
-logger.setLevel(logging.WARNING)
-
 from executorch.backends.apple.coreml.compiler.torch_ops import *  # noqa: F401, F403
 
+logger = logging.getLogger(__name__)
+logger.setLevel(get_coreml_log_level(default_level=logging.WARNING))
+
 
 class COMPILE_SPEC_KEYS(Enum):
     COMPUTE_UNITS = "compute_units"
@@ -409,6 +409,7 @@ def preprocess(
         edge_program: ExportedProgram,
         compile_specs: List[CompileSpec],
     ) -> PreprocessResult:
+        logger.info(f"Edge program: {edge_program}")
         model_type: CoreMLBackend.MODEL_TYPE = (
             CoreMLBackend.model_type_from_compile_specs(
                 compile_specs,
 
@@ -9,13 +9,15 @@
 # the op to the coremltools library.
 
 import torch as _torch
-from coremltools import _logger as logger
+from coremltools import _logger
 from coremltools.converters.mil.frontend import _utils
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
+    _get_kwinputs,
     NUM_TO_NUMPY_DTYPE,
     NUM_TO_TORCH_DTYPE,
     split,
+    to,
     transpose,
     unbind,
 )
@@ -24,6 +26,7 @@
     register_torch_op,
 )
 from coremltools.converters.mil.mil import types
+from executorch.exir.dim_order_utils import get_memory_format
 
 
 # https://github.com/apple/coremltools/pull/2556
@@ -44,6 +47,26 @@ def split_copy(context, node):
     split(context, node)
 
 
+@register_torch_op(
+    torch_alias=[
+        "dim_order_ops::_to_dim_order_copy",
+        "dim_order_ops._to_dim_order_copy",
+    ],
+    override=False,
+)
+def _to_dim_order_copy(context, node):
+    dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
+    node.kwinputs.pop("dim_order")
+
+    # In CoreML, dim_order.val will be an ndarray, so we convert it to a list
+    dim_order = [int(d) for d in dim_order.val]
+    memory_format = get_memory_format(dim_order)
+    assert (
+        memory_format == _torch.contiguous_format
+    ), "Only contiguous memory format is supported in CoreML"
+    to(context, node)
+
+
 # https://github.com/apple/coremltools/pull/2558
 @register_torch_op(
     torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
@@ -88,7 +111,7 @@ def dequantize_affine(context, node):
     out_np_dtype = None
     if len(inputs) > 7:
         out_np_dtype = NUM_TO_NUMPY_DTYPE[inputs[7].val]
-        logger.warning(
+        _logger.warning(
             f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
         )
 
 
@@ -0,0 +1,24 @@
+# Copyright © 2023 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+import logging
+import os
+from typing import Optional
+
+
+def get_coreml_log_level(default_level: int) -> Optional[str]:
+    level_str = os.environ.get("ET_COREML_LOG_LEVEL", "").upper()
+    if level_str == "":
+        return default_level
+
+    level_map = {
+        "DEBUG": logging.DEBUG,
+        "INFO": logging.INFO,
+        "WARNING": logging.WARNING,
+        "ERROR": logging.ERROR,
+        "CRITICAL": logging.CRITICAL,
+    }
+    if level_str not in level_map:
+        raise ValueError(f"Invalid ET_COREML_LOG_LEVEL: {level_str}")
+    return level_map[level_str]
@@ -10,6 +10,8 @@
 import torch
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
+
+from executorch.backends.apple.coreml.logging import get_coreml_log_level
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 from executorch.exir.backend.partitioner import (
@@ -23,7 +25,7 @@
 from torch.fx.passes.operator_support import OperatorSupportBase
 
 logger = logging.getLogger(__name__)
-logger.setLevel(logging.INFO)
+logger.setLevel(get_coreml_log_level(default_level=logging.INFO))
 
 
 def _is_view_op(op: torch._ops.OpOverload) -> bool:
 
@@ -7,6 +7,7 @@
 
 import torch
 from executorch.exir.pass_base import ExportPass, PassResult
+from torch._export.utils import is_buffer, is_param
 
 
 class UnsqueezeScalarPlaceholdersPass(ExportPass):
@@ -19,23 +20,27 @@ def __init__(self, exported_program):
         self.exported_program = exported_program
         super().__init__()
 
-    def _is_inputs_to_buffers_or_parameters(self, node):
-        return (
-            node.name in self.exported_program.graph_signature.inputs_to_buffers
-            or node.name in self.exported_program.graph_signature.inputs_to_parameters
-        )
-
     def call(self, graph_module: torch.fx.GraphModule):
         for node in graph_module.graph.nodes:
             if node.op != "placeholder":
                 continue
             rank = node.meta["val"].dim()
             if rank == 0:
-                if not self._is_inputs_to_buffers_or_parameters(node):
+                if is_buffer(self.exported_program, node):
+                    name = self.exported_program.graph_signature.inputs_to_buffers[
+                        node.name
+                    ]
+                elif is_param(self.exported_program, node):
+                    name = self.exported_program.graph_signature.inputs_to_parameters[
+                        node.name
+                    ]
+                else:
                     continue
-                tensor = self.exported_program.state_dict[node.name]
+
+                tensor = self.exported_program.state_dict[name]
+
                 if tensor.dim() == 0:
-                    self.exported_program.state_dict[node.name] = tensor.unsqueeze(0)
+                    self.exported_program.state_dict[name] = tensor.unsqueeze(0)
                     node.meta["val"] = node.meta["val"].fake_mode.from_tensor(
                         tensor.unsqueeze(0), static_shapes=True
                     )
@@ -53,6 +58,9 @@ def ensures(self, graph_module: torch.fx.GraphModule):
             if node.op == "placeholder":
                 rank = node.meta["val"].dim()
                 if rank == 0:
-                    if not self._is_inputs_to_buffers_or_parameters(node):
+                    if not (
+                        is_buffer(self.exported_program, node)
+                        or is_param(self.exported_program, node)
+                    ):
                         continue
                     raise ValueError("Placeholders of rank 0 are not supported!")
@@ -35,9 +35,9 @@ def trace(
     decomp_table = torch.export.default_decompositions()
     # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
     remove_decompositions(decomp_table, ops_to_keep)
-    program = torch.export.export_for_training(
-        model, inputs, strict=strict
-    ).run_decompositions(decomp_table)
+    program = torch.export.export(model, inputs, strict=strict).run_decompositions(
+        decomp_table
+    )
 
     return program
 
 
@@ -8,16 +8,13 @@
 
 #pragma once
 
+#include "executorch/runtime/core/exec_aten/exec_aten.h"
+#include "executorch/runtime/kernel/kernel_runtime_context.h"
+
 #define ET_FORALL_CADENCE_QUANTIZED_TYPES(_) \
   _(uint8_t, Byte)                           \
   _(int8_t, Char)
 
-using ::executorch::aten::IntArrayRef;
-using ::executorch::aten::optional;
-using ::executorch::aten::ScalarType;
-using ::executorch::aten::Tensor;
-using ::executorch::runtime::KernelRuntimeContext;
-
 namespace cadence {
 namespace impl {
 namespace HiFi {
@@ -62,68 +59,68 @@ void quantized_relu_out(
     ::executorch::aten::Tensor& output);
 
 void quantized_linear_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
-    const Tensor& in,
-    const Tensor& weight,
-    const Tensor& bias,
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& in,
+    const ::executorch::aten::Tensor& weight,
+    const ::executorch::aten::Tensor& bias,
     int64_t in_zero_point,
-    const Tensor& weight_zero_point,
-    const Tensor& out_multiplier,
-    const Tensor& out_shift,
+    const ::executorch::aten::Tensor& weight_zero_point,
+    const ::executorch::aten::Tensor& out_multiplier,
+    const ::executorch::aten::Tensor& out_shift,
     int64_t out_zero_point,
-    __ET_UNUSED const optional<Tensor>& offset,
-    Tensor& out);
+    const ::executorch::aten::optional<::executorch::aten::Tensor>& offset,
+    ::executorch::aten::Tensor& out);
 
 void quantized_linear_per_tensor_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
-    const Tensor& in,
-    const Tensor& weight,
-    const Tensor& bias,
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& in,
+    const ::executorch::aten::Tensor& weight,
+    const ::executorch::aten::Tensor& bias,
     int64_t in_zero_point,
     int64_t weight_zero_point,
     int64_t out_multiplier,
     int64_t out_shift,
     int64_t out_zero_point,
-    __ET_UNUSED const optional<Tensor>& offset,
-    Tensor& out);
+    const ::executorch::aten::optional<::executorch::aten::Tensor>& offset,
+    ::executorch::aten::Tensor& out);
 
 void quantized_conv_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
-    const Tensor& input,
-    const Tensor& weight,
-    const Tensor& bias,
-    IntArrayRef stride,
-    IntArrayRef padding,
-    IntArrayRef dilation,
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    const ::executorch::aten::Tensor& weight,
+    const ::executorch::aten::Tensor& bias,
+    ::executorch::aten::IntArrayRef stride,
+    ::executorch::aten::IntArrayRef padding,
+    ::executorch::aten::IntArrayRef dilation,
     int64_t groups,
     int64_t in_zero_point,
-    const Tensor& weight_zero_point,
-    const Tensor& bias_scale,
+    const ::executorch::aten::Tensor& weight_zero_point,
+    const ::executorch::aten::Tensor& bias_scale,
     double output_scale,
     int64_t output_zero_point,
-    __ET_UNUSED const Tensor& out_multiplier,
-    __ET_UNUSED const Tensor& out_shift,
+    const ::executorch::aten::Tensor& out_multiplier,
+    const ::executorch::aten::Tensor& out_shift,
     bool channel_last,
-    Tensor& out);
+    ::executorch::aten::Tensor& out);
 
 void quantized_conv_per_tensor_out(
-    __ET_UNUSED KernelRuntimeContext& ctx,
-    const Tensor& input,
-    const Tensor& weight,
-    const Tensor& bias,
-    IntArrayRef stride,
-    IntArrayRef padding,
-    IntArrayRef dilation,
+    ::executorch::runtime::KernelRuntimeContext& ctx,
+    const ::executorch::aten::Tensor& input,
+    const ::executorch::aten::Tensor& weight,
+    const ::executorch::aten::Tensor& bias,
+    ::executorch::aten::IntArrayRef stride,
+    ::executorch::aten::IntArrayRef padding,
+    ::executorch::aten::IntArrayRef dilation,
     int64_t groups,
     int64_t in_zero_point,
     int64_t weight_zero_point,
     double bias_scale,
     double output_scale,
     int64_t output_zero_point,
-    __ET_UNUSED int64_t out_multiplier,
-    __ET_UNUSED int64_t out_shift,
+    int64_t out_multiplier,
+    int64_t out_shift,
     bool channel_last,
-    Tensor& out);
+    ::executorch::aten::Tensor& out);
 
 } // namespace native
 } // namespace HiFi
 
@@ -4,11 +4,17 @@ oncall("executorch")
 
 python_library(
     name = "vulkan_quantizer",
-    srcs = [
-        "vulkan_quantizer.py",
+    srcs = ["vulkan_quantizer.py"],
+    deps = [
+        ":vulkan_quantizer_utils",
+        "//caffe2:torch",
     ],
+)
+
+python_library(
+    name = "vulkan_quantizer_utils",
+    srcs = ["vulkan_quantizer_utils.py"],
     deps = [
         "//caffe2:torch",
-        "//executorch/backends/xnnpack/quantizer:xnnpack_quantizer_utils",
     ],
 )
@@ -12,7 +12,7 @@
 from typing import Callable, Optional
 
 import torch
-from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import (
+from executorch.backends.vulkan.quantizer.vulkan_quantizer_utils import (
     _convert_scalars_to_attrs,
     OP_TO_ANNOTATOR,
     propagate_annotation,