pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 23 additions & 0 deletions b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions b/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 23 additions & 0 deletions b/‎backends/apple/coreml/test/test_torch_ops.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 1 deletion b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/_passes/decompose_meandim_pass.py‎
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/decompose_meandim_pass.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 1 addition & 1 deletion
@@ -823,10 +823,10 @@ jobs:
           --tsv_path ${TSV_PATH}
         echo "::endgroup::"
 
-  test-huggingface-transformers-coreml:
+  test-huggingface-transformers-macos:
     # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
     if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers-coreml
+    name: test-huggingface-transformers-macos
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     permissions:
       id-token: write
@@ -844,10 +844,10 @@ jobs:
           # phi4-mini|xnnpack|--quantize,
           # smollm2-135m|xnnpack|--quantize,
           # smollm3-3b|xnnpack|--quantize,
+          # qwen3-1.7b|xnnpack|--quantize,
           # CoreML.
           llama3.2-1b|coreml_fp32_gpu|--quantize,
           qwen3-0.6b|coreml_fp32_gpu|--quantize,
-          qwen3-1.7b|xnnpack|--quantize,
           smollm2-135m|coreml_fp32_gpu|--quantize,
           olmo-1b|coreml_fp32_gpu|--quantize,
           bert|coreml_fp32_gpu|--quantize,
 
@@ -52,7 +52,7 @@ To get started you can:
 
 - Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
 - Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
-- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
+- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
 
 ## Feedback and Engagement
 
 
@@ -61,16 +61,21 @@ runtime.python_library(
 )
 
 runtime.python_library(
-    name = "recipes",
-    srcs = glob([
-        "recipes/*.py",
-    ]),
+    name = "coreml_recipes",
+    srcs = [
+        "recipes/__init__.py",
+        "recipes/coreml_recipe_provider.py"
+    ],
     visibility = [
         "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
     ],
     deps = [
         "fbsource//third-party/pypi/coremltools:coremltools",
+        ":coreml_recipe_types",
         ":backend",
+        ":partitioner",
+        ":quantizer",
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
     ],
 )
 
+runtime.python_library(
+    name = "coreml_recipe_types",
+    srcs = [
+        "recipes/coreml_recipe_types.py",
+    ],
+    visibility = [
+        "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
+    ],
+    deps = [
+        "//executorch/export:recipe",
+    ],
+)
+
 runtime.cxx_python_extension(
     name = "executorchcoreml",
     srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
-        ":recipes",
+        ":coreml_recipes",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
         "fbsource//third-party/pypi/scikit-learn:scikit-learn",
 
@@ -15,6 +15,7 @@
 from coremltools.converters.mil.frontend.torch.ops import (
     _get_inputs,
     _get_kwinputs,
+    noop,
     NUM_TO_NUMPY_DTYPE,
     NUM_TO_TORCH_DTYPE,
     split,
@@ -91,6 +92,28 @@ def _to_dim_order_copy(context, node):
         to(context, node)
 
 
+@register_torch_op(
+    torch_alias=[
+        "dim_order_ops::_clone_dim_order",
+        "dim_order_ops._clone_dim_order",
+    ],
+    override=False,
+)
+def _clone_dim_order(context, node):
+    dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
+    node.kwinputs.pop("dim_order")
+
+    # In CoreML, dim_order.val will be a ndarray, so we convert it to a list to check memory format.
+    dim_order = [int(d) for d in dim_order.val]
+    memory_format = get_memory_format(dim_order)
+    assert (
+        memory_format == _torch.contiguous_format
+    ), "Only contiguous memory format is supported in CoreML"
+
+    # Since CoreML only supports contiguous format, no dim_order preservation is needed. Treat this as a no-op clone.
+    noop(context, node)
+
+
 # https://github.com/apple/coremltools/pull/2558
 @register_torch_op(
     torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],
 
@@ -3,6 +3,7 @@
 # Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
 
+import logging
 from typing import Any, Optional, Sequence
 
 import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
         unexpected = set(kwargs.keys()) - expected_keys
         if unexpected:
-            raise ValueError(
-                f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
+            logging.warning(
+                f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
+                f"Expected parameters: {list(expected_keys)}"
             )
 
         self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
     def _get_expected_keys(self, recipe_type: RecipeType) -> set:
         """Get expected parameter keys for a recipe type"""
-        common_keys = {"minimum_deployment_target", "compute_unit"}
+        common_keys = {
+            "minimum_deployment_target",
+            "compute_unit",
+            "skip_ops_for_coreml_delegation",
+            "lower_full_graph",
+            "take_over_constant_data",
+        }
 
         if recipe_type in [
             CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
         if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
             take_over_mutable_buffer = False
 
+        # Extract additional partitioner parameters
+        skip_ops_for_coreml_delegation = kwargs.get(
+            "skip_ops_for_coreml_delegation", None
+        )
+        lower_full_graph = kwargs.get("lower_full_graph", False)
+        take_over_constant_data = kwargs.get("take_over_constant_data", True)
+
         partitioner = CoreMLPartitioner(
             compile_specs=compile_specs,
             take_over_mutable_buffer=take_over_mutable_buffer,
+            skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
+            lower_full_graph=lower_full_graph,
+            take_over_constant_data=take_over_constant_data,
         )
 
         edge_compile_config = EdgeCompileConfig(
 
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
             )
         self.assertIn("must be positive", str(cm.exception))
 
-        # Test unexpected parameter
-        with self.assertRaises(ValueError) as cm:
-            self.provider.create_recipe(
-                CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
-                group_size=32,  # group_size not valid for per-channel
-            )
-        self.assertIn("unexpected parameters", str(cm.exception))
-
     def test_int8_weight_only_per_channel(self):
         """Test INT8 weight-only per-channel quantization"""
         model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
         self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
         self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_pt2e_recipes_parameter_rejection(self):
-        """Test that PT2E recipes reject TorchAO-specific parameters"""
-        # PT2E recipes should reject TorchAO-specific parameters
-        pt2e_recipes = [
-            CoreMLRecipeType.PT2E_INT8_STATIC,
-            CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
-        ]
-        torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
-
-        for recipe_type in pt2e_recipes:
-            for param in torchao_params:
-                with self.subTest(recipe=recipe_type.value, param=param):
-                    kwargs = {param: "dummy_value"}
-                    with self.assertRaises(ValueError) as cm:
-                        self.provider.create_recipe(recipe_type, **kwargs)
-                    self.assertIn("unexpected parameters", str(cm.exception).lower())
-
     def test_filter_fn_comprehensive(self):
         """Comprehensive test for filter_fn parameter functionality"""
 
 
@@ -268,6 +268,28 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
         et_prog = delegated_program.to_executorch()
         self._compare_outputs(et_prog, model, example_inputs)
 
+    def test__clone_dim_order_contiguous(self):
+        class Model(torch.nn.Module):
+            def forward(self, x):
+                return torch.ops.dim_order_ops._clone_dim_order(
+                    x, dim_order=[0, 1, 2, 3]
+                )
+
+        model, example_inputs = Model(), (torch.randn(1, 3, 8, 8),)
+        ep = torch.export.export(model, example_inputs)
+        delegated_program = executorch.exir.to_edge_transform_and_lower(
+            ep,
+            partitioner=[self._coreml_partitioner()],
+        )
+        for node in delegated_program.exported_program().graph.nodes:
+            if node.op == "call_function":
+                assert node.target.__name__ in [
+                    "executorch_call_delegate",
+                    "getitem",
+                ], f"Got unexpected node target after delegation: {node.target.__name__}"
+        et_prog = delegated_program.to_executorch()
+        self._compare_outputs(et_prog, model, example_inputs)
+
 
 if __name__ == "__main__":
     test_runner = TestTorchOps()
@@ -280,3 +302,4 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
     test_runner.test_dequantize_codebook_linear_per_grouped_row()
     test_runner.test_dequantize_codebook_embedding_per_grouped_col()
     test_runner.test_dequantize_codebook_embedding_per_grouped_row()
+    test_runner.test__clone_dim_order_contiguous()
@@ -91,7 +91,8 @@
     UnsqueezeBeforeRepeatPass,
     UnsqueezeScalarPlaceholdersPass,
 )
-from executorch.backends.arm.tosa_specification import (
+
+from executorch.backends.arm.tosa.specification import (
     TosaLoweringContext,
     TosaSpecification,
 )
 
@@ -9,7 +9,6 @@
 import torch
 from executorch.backends.arm._passes import ArmPass
 from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
-from executorch.backends.arm.operator_support.pool_2d_support import AvgPool2dSupported
 from executorch.exir.backend.utils import WhyNoPartitionReporter
 from executorch.exir.dialects._ops import ops as exir_ops
 
@@ -67,6 +66,11 @@ def __init__(self, graph_module, tosa_spec):
         super().__init__()
         self._graph_module = graph_module
         self._tosa_spec = tosa_spec
+        # Lazy import to avoid circular dependency with operator_support
+        from executorch.backends.arm.operator_support.pool_2d_support import (
+            AvgPool2dSupported,
+        )
+
         self._avg_pool_checker = AvgPool2dSupported(
             self._tosa_spec, WhyNoPartitionReporter()
         )
 
@@ -18,7 +18,7 @@ class RemoveClonePass(ExportPass):
     """Remove all clones from graph_module"""
 
     def call_operator(self, op, args, kwargs, meta):
-        if op != exir_ops.edge.aten.clone.default:
+        if op != exir_ops.edge.dim_order_ops._clone_dim_order.default:
             return super().call_operator(op, args, kwargs, meta)
 
         if len(args) != 1:
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,8 @@`
`91`	`91`	`UnsqueezeBeforeRepeatPass,`
`92`	`92`	`UnsqueezeScalarPlaceholdersPass,`
`93`	`93`	`)`
`94`		`-from executorch.backends.arm.tosa_specification import (`
	`94`	`+`
	`95`	`+from executorch.backends.arm.tosa.specification import (`
`95`	`96`	`TosaLoweringContext,`
`96`	`97`	`TosaSpecification,`
`97`	`98`	`)`