pytorch
diff --git a/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 1 deletion b/‎.ci/scripts/test_model.sh‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_qnn_static_llama.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 62 additions & 8 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 62 additions & 8 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 21 additions & 0 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 41 additions & 1 deletion b/‎backends/apple/coreml/compiler/torch_ops.py‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎backends/apple/coreml/recipes/__init__.py‎
Lines changed: 17 additions & 0 deletions b/‎backends/apple/coreml/recipes/__init__.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 132 additions & 0 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 132 additions & 0 deletions
@@ -199,6 +199,8 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=albert
   elif [[ "${MODEL_NAME}" == "bert" ]]; then
     EXPORT_SCRIPT=bert
+  elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
+    EXPORT_SCRIPT=conv_former
   elif [[ "${MODEL_NAME}" == "cvt" ]]; then
     EXPORT_SCRIPT=cvt
   elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
@@ -238,7 +240,7 @@ test_model_with_qnn() {
     "cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
         SCRIPT_FOLDER=oss_scripts
         ;;
-    "albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
+    "albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
         pip install evaluate
         SCRIPT_FOLDER=oss_scripts
         # 16bit models will encounter op validation fail on some operations,
 
@@ -33,12 +33,12 @@ echo "Creating tokenizer.bin"
 $PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
 set +e
-# Compile only as weight sharing is not applicable on x86
-$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --llama_artifacts . --compile_only
+# Compile only as weight sharing is not applicable on x86.
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
 exit_code1=$?
 
 # Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
-$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64
+$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
 exit_code2=$?
 
 # Check BC
 
@@ -60,7 +60,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
       matrix:
-        model: [add]
+        model: [add, softmax, mv2]
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -72,31 +72,85 @@ jobs:
         MODEL_NAME=${{ matrix.model }}
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
+        if [[ ${{ matrix.model}} == "add" ]]; then
+          SIM_LIMIT_SEC=60
+        elif [[ ${{ matrix.model}} == "softmax" ]]; then
+          SIM_LIMIT_SEC=60
+        elif [[ ${{ matrix.model}} == "mv2" ]]; then
+          SIM_LIMIT_SEC=5000
+        else
+          echo "Failed unsupported model selection ${{ matrix.model }}"
+          exit 1
+        fi
 
         source .ci/scripts/utils.sh
         source .ci/scripts/zephyr-utils.sh
         mkdir -p zephyr_scratch/
         cd zephyr_scratch
         export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
+        export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
 
+        # TODO @Bujji: Should see if this can be moved into the docker image itself
         download_arm_zephyr_sdk
         ./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
-
         cd $ZEPHYR_PROJ_ROOT
         setup_zephyr_et_module
 
+        # Run setup scripts for Arm FVP and Arm AOT Compilation
         cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
         install_executorch "--use-pt-pinned-commit"
         .ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
         source examples/arm/ethos-u-scratch/setup_path.sh
         source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
-        cd $ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm/hello_world
-        west build -p always -b mps3/corstone300/fvp
-        FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 -C mps3_board.uart0.out_file='sim.out'  -C cpu0.CFGITCMSZ=15 -C cpu0.CFGDTCMSZ=15 --simlimit 120
 
-        grep -qF "Output[0][0]: (float) 2.000000" sim.out
+        # Get the model as PTE
+        python -m examples.arm.aot_arm_compiler \
+            --model_name="${MODEL_NAME}" \
+            --output="${MODEL_NAME}.pte"
+
+        # Generate the C-style header
+        cd $ARM_FVP_TUTORIALS_ROOT
+        python build_model.py \
+            --executorch-root $ZEPHYR_PROJ_ROOT/modules/lib/executorch \
+            --pte-file $ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte \
+            --output-path $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/src/
+
+        cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
+
+        # Build the zephyr elf
+        west build -p always -b mps3/corstone300/fvp -- \
+            -DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
+
+        # Run the simulation
+        FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
+            -C mps3_board.visualisation.disable-visualisation=1 \
+            -C mps3_board.telnetterminal0.start_telnet=0 \
+            -C mps3_board.uart0.out_file='sim.out'  \
+            -C cpu0.CFGITCMSZ=15 \
+            -C cpu0.CFGDTCMSZ=15 \
+            --simlimit ${SIM_LIMIT_SEC}
+
+        # Disable exit on error
+        set +e
+        # Report failure if any of the ouptut verification checks fail
+        grep -qF "ERROR" sim.out
+        exit_status=$? #store 0 if found (failure), 1 if not (success)
+        if [[ "$exit_status" -eq "0" ]]; then
+            cat sim.out
+            set -e
+            exit 1
+        fi
+
+        # Report fail if simulation does not complete successfully
+        grep -qF "SUCCESS: Program complete, exiting." sim.out
         exit_status=$? #store 0 if found (success), 1 if not (failure)
-        exit $exit_status
+        if [[ "$exit_status" -eq "1" ]]; then
+            cat sim.out
+            set -e
+            exit 1
+        fi
+        # Re-enable exit on error
+        set -e
 
   test-models-linux-aarch64:
     name: test-models-linux-aarch64
@@ -568,7 +622,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
+        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -60,6 +60,26 @@ runtime.python_library(
     ],
 )
 
+runtime.python_library(
+    name = "recipes",
+    srcs = glob([
+        "recipes/*.py",
+    ]),
+    visibility = [
+        "@EXECUTORCH_CLIENTS",
+    ],
+    deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
+        ":backend",
+        "//caffe2:torch",
+        "//executorch/exir:lib",
+        "//executorch/exir/backend:compile_spec_schema",
+        "//executorch/exir/backend:partitioner",
+        "//executorch/exir/backend:utils",
+        "//executorch/export:lib",
+    ],
+)
+
 runtime.cxx_python_extension(
     name = "executorchcoreml",
     srcs = [
@@ -103,6 +123,7 @@ runtime.python_test(
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
+        ":recipes",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
     ],
 
@@ -8,6 +8,7 @@
 # coremltools than is used by ExecuTorch.  Each op registered here should have a link to a PR in coremltools that adds
 # the op to the coremltools library.
 
+import numpy as np
 import torch as _torch
 from coremltools import _logger
 from coremltools.converters.mil.frontend import _utils
@@ -21,7 +22,6 @@
     transpose,
     unbind,
 )
-
 from coremltools.converters.mil.frontend.torch.torch_op_registry import (
     register_torch_op,
 )
@@ -132,3 +132,43 @@ def dequantize_affine(context, node):
         name=node.name,
     )
     context.add(output, node.name)
+
+
+@register_torch_op(
+    torch_alias=["quant::dequantize_codebook", "quant.dequantize_codebook"],
+    override=False,
+)
+def dequantize_codebook(context, node):
+    inputs = _get_inputs(context, node, expected=[4, 5])
+    codes = inputs[0].val
+    codebook = inputs[1].val
+    nbits = inputs[2].val
+
+    # information in block_size is redundant with codebook.shape
+    block_size = inputs[3].val  # noqa: F841
+
+    assert len(codes.shape) == 2, "Only rank 2 inputs are supported"
+
+    # Assert codebook is as expected.  codebook.dim() = codes.dim() + 2
+    assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
+    assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
+    n_luts = codebook.shape[1]
+    assert (
+        codes.shape[1] % n_luts == 0
+    ), "codes.shape[1] must be divisible by codebook.shape[1]"
+    assert codebook.shape[2] == 2**nbits
+    assert codebook.shape[3] == 1, "Only scalar look up values are supported"
+
+    if len(inputs) > 4:
+        output_dtype = inputs[4].val
+        out_np_dtype = NUM_TO_NUMPY_DTYPE[output_dtype]
+        _logger.warning(
+            f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
+        )
+
+    output = _utils._construct_constexpr_lut_op(
+        codes.astype(np.int8),
+        codebook,
+        name=node.name,
+    )
+    context.add(output, node.name)
@@ -0,0 +1,17 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+from executorch.export import recipe_registry
+
+from .coreml_recipe_provider import CoreMLRecipeProvider
+from .coreml_recipe_types import CoreMLRecipeType
+
+# Auto-register CoreML backend recipe provider
+recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
+
+__all__ = [
+    "CoreMLRecipeProvider",
+    "CoreMLRecipeType",
+]
@@ -0,0 +1,132 @@
+# Copyright © 2025 Apple Inc. All rights reserved.
+#
+# Please refer to the license found in the LICENSE file in the root directory of the source tree.
+
+
+from typing import Any, Optional, Sequence
+
+import coremltools as ct
+
+from executorch.backends.apple.coreml.compiler import CoreMLBackend
+from executorch.backends.apple.coreml.partition.coreml_partitioner import (
+    CoreMLPartitioner,
+)
+from executorch.backends.apple.coreml.recipes.coreml_recipe_types import (
+    COREML_BACKEND,
+    CoreMLRecipeType,
+)
+
+from executorch.exir import EdgeCompileConfig
+from executorch.export import (
+    BackendRecipeProvider,
+    ExportRecipe,
+    LoweringRecipe,
+    RecipeType,
+)
+
+
+class CoreMLRecipeProvider(BackendRecipeProvider):
+    @property
+    def backend_name(self) -> str:
+        return COREML_BACKEND
+
+    def get_supported_recipes(self) -> Sequence[RecipeType]:
+        return list(CoreMLRecipeType)
+
+    def create_recipe(
+        self, recipe_type: RecipeType, **kwargs: Any
+    ) -> Optional[ExportRecipe]:
+        """Create CoreML recipe with precision and compute unit combinations"""
+
+        if recipe_type not in self.get_supported_recipes():
+            return None
+
+        if ct is None:
+            raise ImportError(
+                "coremltools is required for CoreML recipes. "
+                "Install it with: pip install coremltools"
+            )
+
+        # Validate kwargs
+        self._validate_recipe_kwargs(recipe_type, **kwargs)
+
+        # Parse recipe type to get precision and compute unit
+        precision = None
+        if recipe_type == CoreMLRecipeType.FP32:
+            precision = ct.precision.FLOAT32
+        elif recipe_type == CoreMLRecipeType.FP16:
+            precision = ct.precision.FLOAT16
+
+        if precision is None:
+            raise ValueError(f"Unknown precision for recipe: {recipe_type.value}")
+
+        return self._build_recipe(recipe_type, precision, **kwargs)
+
+    def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> None:
+        if not kwargs:
+            return
+        expected_keys = {"minimum_deployment_target", "compute_unit"}
+        unexpected = set(kwargs.keys()) - expected_keys
+        if unexpected:
+            raise ValueError(
+                f"CoreML Recipes only accept 'minimum_deployment_target' or 'compute_unit' as parameter. "
+                f"Unexpected parameters: {list(unexpected)}"
+            )
+        if "minimum_deployment_target" in kwargs:
+            minimum_deployment_target = kwargs["minimum_deployment_target"]
+            if not isinstance(minimum_deployment_target, ct.target):
+                raise ValueError(
+                    f"Parameter 'minimum_deployment_target' must be an enum of type ct.target, got {type(minimum_deployment_target)}"
+                )
+        if "compute_unit" in kwargs:
+            compute_unit = kwargs["compute_unit"]
+            if not isinstance(compute_unit, ct.ComputeUnit):
+                raise ValueError(
+                    f"Parameter 'compute_unit' must be an enum of type ct.ComputeUnit, got {type(compute_unit)}"
+                )
+
+    def _build_recipe(
+        self,
+        recipe_type: RecipeType,
+        precision: ct.precision,
+        **kwargs: Any,
+    ) -> ExportRecipe:
+        lowering_recipe = self._get_coreml_lowering_recipe(
+            compute_precision=precision,
+            **kwargs,
+        )
+
+        return ExportRecipe(
+            name=recipe_type.value,
+            quantization_recipe=None,  # TODO - add quantization recipe
+            lowering_recipe=lowering_recipe,
+        )
+
+    def _get_coreml_lowering_recipe(
+        self,
+        compute_precision: ct.precision,
+        **kwargs: Any,
+    ) -> LoweringRecipe:
+        compile_specs = CoreMLBackend.generate_compile_specs(
+            compute_precision=compute_precision,
+            **kwargs,
+        )
+
+        minimum_deployment_target = kwargs.get("minimum_deployment_target", None)
+        take_over_mutable_buffer = True
+        if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
+            take_over_mutable_buffer = False
+
+        partitioner = CoreMLPartitioner(
+            compile_specs=compile_specs,
+            take_over_mutable_buffer=take_over_mutable_buffer,
+        )
+
+        edge_compile_config = EdgeCompileConfig(
+            _check_ir_validity=False,
+            _skip_dim_order=False,
+        )
+
+        return LoweringRecipe(
+            partitioners=[partitioner], edge_compile_config=edge_compile_config
+        )