pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions b/‎backends/apple/coreml/TARGETS‎
Lines changed: 24 additions & 5 deletions
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions b/‎backends/apple/coreml/test/test_coreml_recipes.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 74 additions & 7 deletions b/‎backends/cadence/aot/ref_implementations.py‎
Lines changed: 74 additions & 7 deletions
@@ -823,10 +823,10 @@ jobs:
           --tsv_path ${TSV_PATH}
         echo "::endgroup::"
 
-  test-huggingface-transformers-coreml:
+  test-huggingface-transformers-macos:
     # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
     if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers-coreml
+    name: test-huggingface-transformers-macos
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     permissions:
       id-token: write
@@ -844,10 +844,10 @@ jobs:
           # phi4-mini|xnnpack|--quantize,
           # smollm2-135m|xnnpack|--quantize,
           # smollm3-3b|xnnpack|--quantize,
+          # qwen3-1.7b|xnnpack|--quantize,
           # CoreML.
           llama3.2-1b|coreml_fp32_gpu|--quantize,
           qwen3-0.6b|coreml_fp32_gpu|--quantize,
-          qwen3-1.7b|xnnpack|--quantize,
           smollm2-135m|coreml_fp32_gpu|--quantize,
           olmo-1b|coreml_fp32_gpu|--quantize,
           bert|coreml_fp32_gpu|--quantize,
 
@@ -61,16 +61,21 @@ runtime.python_library(
 )
 
 runtime.python_library(
-    name = "recipes",
-    srcs = glob([
-        "recipes/*.py",
-    ]),
+    name = "coreml_recipes",
+    srcs = [
+        "recipes/__init__.py",
+        "recipes/coreml_recipe_provider.py"
+    ],
     visibility = [
         "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
     ],
     deps = [
         "fbsource//third-party/pypi/coremltools:coremltools",
+        ":coreml_recipe_types",
         ":backend",
+        ":partitioner",
+        ":quantizer",
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
     ],
 )
 
+runtime.python_library(
+    name = "coreml_recipe_types",
+    srcs = [
+        "recipes/coreml_recipe_types.py",
+    ],
+    visibility = [
+        "@EXECUTORCH_CLIENTS",
+        "//executorch/export/...",
+    ],
+    deps = [
+        "//executorch/export:recipe",
+    ],
+)
+
 runtime.cxx_python_extension(
     name = "executorchcoreml",
     srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
-        ":recipes",
+        ":coreml_recipes",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
         "fbsource//third-party/pypi/scikit-learn:scikit-learn",
 
@@ -3,6 +3,7 @@
 # Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
 
+import logging
 from typing import Any, Optional, Sequence
 
 import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
         unexpected = set(kwargs.keys()) - expected_keys
         if unexpected:
-            raise ValueError(
-                f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
+            logging.warning(
+                f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
+                f"Expected parameters: {list(expected_keys)}"
             )
 
         self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
 
     def _get_expected_keys(self, recipe_type: RecipeType) -> set:
         """Get expected parameter keys for a recipe type"""
-        common_keys = {"minimum_deployment_target", "compute_unit"}
+        common_keys = {
+            "minimum_deployment_target",
+            "compute_unit",
+            "skip_ops_for_coreml_delegation",
+            "lower_full_graph",
+            "take_over_constant_data",
+        }
 
         if recipe_type in [
             CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
         if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
             take_over_mutable_buffer = False
 
+        # Extract additional partitioner parameters
+        skip_ops_for_coreml_delegation = kwargs.get(
+            "skip_ops_for_coreml_delegation", None
+        )
+        lower_full_graph = kwargs.get("lower_full_graph", False)
+        take_over_constant_data = kwargs.get("take_over_constant_data", True)
+
         partitioner = CoreMLPartitioner(
             compile_specs=compile_specs,
             take_over_mutable_buffer=take_over_mutable_buffer,
+            skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
+            lower_full_graph=lower_full_graph,
+            take_over_constant_data=take_over_constant_data,
         )
 
         edge_compile_config = EdgeCompileConfig(
 
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
             )
         self.assertIn("must be positive", str(cm.exception))
 
-        # Test unexpected parameter
-        with self.assertRaises(ValueError) as cm:
-            self.provider.create_recipe(
-                CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
-                group_size=32,  # group_size not valid for per-channel
-            )
-        self.assertIn("unexpected parameters", str(cm.exception))
-
     def test_int8_weight_only_per_channel(self):
         """Test INT8 weight-only per-channel quantization"""
         model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
         self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
         self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_pt2e_recipes_parameter_rejection(self):
-        """Test that PT2E recipes reject TorchAO-specific parameters"""
-        # PT2E recipes should reject TorchAO-specific parameters
-        pt2e_recipes = [
-            CoreMLRecipeType.PT2E_INT8_STATIC,
-            CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
-        ]
-        torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
-
-        for recipe_type in pt2e_recipes:
-            for param in torchao_params:
-                with self.subTest(recipe=recipe_type.value, param=param):
-                    kwargs = {param: "dummy_value"}
-                    with self.assertRaises(ValueError) as cm:
-                        self.provider.create_recipe(recipe_type, **kwargs)
-                    self.assertIn("unexpected parameters", str(cm.exception).lower())
-
     def test_filter_fn_comprehensive(self):
         """Comprehensive test for filter_fn parameter functionality"""
 
 
@@ -24,7 +24,7 @@
 
 @impl(m, "quantize_per_tensor")
 def quantize_per_tensor(
-    input: torch.Tensor,
+    input_tensor: torch.Tensor,
     scale: float,
     zero_point: int,
     quant_min: int,
@@ -35,10 +35,10 @@ def quantize_per_tensor(
     Quantizes a floating-point tensor to an integral tensor.
 
     Args:
-        - input (Tensor): input tensor
-        - scale (float): Quantization scale. Derived from the ratio
+        - input_tensor (Tensor): input tensor
+        - scale (float): Inverse of quantization scale. Derived from the ratio
             between the min/max of the floating-point tensor and the
-            min/max of the quantized range.
+            min/max of the quantized range, and then inverted.
         - zero_point (int): The point which represents 0 in the quantized
             range. For example, consider the floating point range [-1., 2.] and
             quantized integer range [-7, 7]. In this case, 0 is 1/3 of way from
@@ -61,7 +61,12 @@ def quantize_per_tensor(
         raise ValueError(
             f"Unsupported dtype to quantize to. Supported dtypes must be one of {supported_quant_types}"
         )
-    return torch.round(input / scale + zero_point).to(dtype)
+
+    dequantized = torch.round(input_tensor * scale + zero_point).to(dtype)
+    return torch.max(
+        torch.min(dequantized, torch.tensor(quant_max)),
+        torch.tensor(quant_min),
+    )
 
 
 @impl(m, "dequantize_per_tensor")
@@ -173,9 +178,16 @@ def quantized_add(
     dequant_X = X_scale * (X - X_zero_point)
     dequant_Y = Y_scale * (Y - Y_zero_point)
 
+    out_scale_inv = 1 / out_scale
+
     # q_min/q_max are unused args
     return quantize_per_tensor(
-        dequant_X + dequant_Y, out_scale, out_zero_point, -128, 127, dtype
+        dequant_X + dequant_Y,
+        out_scale_inv,
+        out_zero_point,
+        torch.iinfo(dtype).min,
+        torch.iinfo(dtype).max,
+        dtype,
     )
 
 
@@ -206,6 +218,7 @@ def quantized_linear(
         - offset (Tensor): Unused
     """
     out_scale = -out_multiplier * (1 / (1 << 31)) * (2 ** out_shift[0])
+    out_scale_inv = 1 / out_scale
 
     N, K = weight.shape
 
@@ -223,10 +236,64 @@ def quantized_linear(
         src - in_zero_point, weight - weight_zero_point, bias
     )
     return quantize_per_tensor(
-        out, out_scale, out_zero_point, -128, 127, dtype
+        out,
+        out_scale_inv,
+        out_zero_point,
+        torch.iinfo(dtype).min,
+        torch.iinfo(dtype).max,
+        dtype,
     ).reshape(*leading_dims, N)
 
 
+@impl(m, "quantized_layer_norm_per_tensor")
+def quantized_layer_norm_per_tensor(
+    input_tensor: torch.Tensor,
+    X_scale: float,
+    X_zero_point: int,
+    normalized_shape: int,
+    weight: torch.Tensor,
+    bias: torch.Tensor,
+    eps: float,
+    output_scale: float,
+    output_zero_point: int,
+) -> torch.Tensor:
+    """
+    Quantized layer norm operation.
+
+    Args:
+        - input_tensor (Tensor): The activations tensor
+        - X_scale (float): The scale of the input
+        - X_zero_point (int): The zero point of the input
+        - normalized_shape (int): The shape of the input
+        - weight (Tensor): The weight tensor
+        - bias (Tensor): The bias tensor
+        - eps (float): The epsilon value
+        - output_scale (float): The scale of the output
+        - output_zero_point (int): The zero point of the output
+    """
+    supported_dtypes = [torch.int8, torch.uint8]
+    if input_tensor.dtype not in supported_dtypes:
+        raise ValueError(
+            f"Input dtype must be one of {supported_dtypes}. Got {input_tensor.dtype}"
+        )
+
+    float_input_tensor = dequantize_per_tensor(
+        input_tensor, X_scale, X_zero_point, -128, 127, torch.float32
+    )
+    out = torch.nn.functional.layer_norm(
+        float_input_tensor, (normalized_shape,), weight, bias, eps=eps
+    )
+
+    return quantize_per_tensor(
+        out,
+        1 / output_scale,
+        output_zero_point,
+        torch.iinfo(input_tensor.dtype).min,
+        torch.iinfo(input_tensor.dtype).max,
+        input_tensor.dtype,
+    )
+
+
 @impl(m, "requantize")
 def requantize(
     input: torch.Tensor,