From f56fcd739a38fecda8080b09ba33e82a02bd95f3 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Tue, 29 Jul 2025 16:06:21 -0700
Subject: [PATCH] Support dim_order in CoreML

---
 backends/apple/coreml/compiler/torch_ops.py | 23 +++++++++++++++++++++
 examples/apple/coreml/llama/export.py       |  6 +-----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py
index 479d744a97e..ddf17f3813d 100644
--- a/backends/apple/coreml/compiler/torch_ops.py
+++ b/backends/apple/coreml/compiler/torch_ops.py
@@ -13,9 +13,11 @@
 from coremltools.converters.mil.frontend import _utils
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
+    _get_kwinputs,
     NUM_TO_NUMPY_DTYPE,
     NUM_TO_TORCH_DTYPE,
     split,
+    to,
     transpose,
     unbind,
 )
@@ -24,6 +26,7 @@
     register_torch_op,
 )
 from coremltools.converters.mil.mil import types
+from executorch.exir.dim_order_utils import get_memory_format
 
 
 # https://github.com/apple/coremltools/pull/2556
@@ -44,6 +47,26 @@ def split_copy(context, node):
     split(context, node)
 
 
+@register_torch_op(
+    torch_alias=[
+        "dim_order_ops::_to_dim_order_copy",
+        "dim_order_ops._to_dim_order_copy",
+    ],
+    override=False,
+)
+def _to_dim_order_copy(context, node):
+    dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
+    node.kwinputs.pop("dim_order")
+
+    # In CoreML, dim_order.val will be an ndarray, so we convert it to a list
+    dim_order = [int(d) for d in dim_order.val]
+    memory_format = get_memory_format(dim_order)
+    assert (
+        memory_format == _torch.contiguous_format
+    ), "Only contiguous memory format is supported in CoreML"
+    to(context, node)
+
+
 # https://github.com/apple/coremltools/pull/2558
 @register_torch_op(
     torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
diff --git a/examples/apple/coreml/llama/export.py b/examples/apple/coreml/llama/export.py
index 8241226d34b..48edc3c0669 100644
--- a/examples/apple/coreml/llama/export.py
+++ b/examples/apple/coreml/llama/export.py
@@ -21,7 +21,7 @@
 
 from executorch.exir import to_edge_transform_and_lower
 from executorch.exir.backend.utils import format_delegated_graph
-from executorch.exir.capture._config import EdgeCompileConfig, ExecutorchBackendConfig
+from executorch.exir.capture._config import ExecutorchBackendConfig
 from executorch.exir.passes import MemoryPlanningPass
 from executorch.exir.passes.quant_fusion_pass import QuantFusionPass
 from executorch.exir.passes.sym_shape_eval_pass import ConstraintBasedSymShapeEvalPass
@@ -203,10 +203,6 @@ def main() -> None:
     edge_manager = to_edge_transform_and_lower(
         ep,
         partitioner=[partitioner],
-        compile_config=EdgeCompileConfig(
-            # TODO: fix lowering when dim_order is enabled
-            _skip_dim_order=True,
-        ),
     )
 
     print("Delegated program")