diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS
index 6993b699427..c5eec41d5fc 100644
--- a/backends/apple/coreml/TARGETS
+++ b/backends/apple/coreml/TARGETS
@@ -120,6 +120,7 @@ runtime.python_test(
         "test/*.py",
     ]),
     deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
diff --git a/backends/apple/coreml/recipes/coreml_recipe_provider.py b/backends/apple/coreml/recipes/coreml_recipe_provider.py
index 75c937027bb..5d4fee6976d 100644
--- a/backends/apple/coreml/recipes/coreml_recipe_provider.py
+++ b/backends/apple/coreml/recipes/coreml_recipe_provider.py
@@ -6,6 +6,7 @@
 from typing import Any, Optional, Sequence
 
 import coremltools as ct
+import torch
 
 from executorch.backends.apple.coreml.compiler import CoreMLBackend
 from executorch.backends.apple.coreml.partition.coreml_partitioner import (
@@ -18,11 +19,15 @@
 
 from executorch.exir import EdgeCompileConfig
 from executorch.export import (
+    AOQuantizationConfig,
     BackendRecipeProvider,
     ExportRecipe,
     LoweringRecipe,
+    QuantizationRecipe,
     RecipeType,
 )
+from torchao.quantization.granularity import PerAxis, PerGroup
+from torchao.quantization.quant_api import IntxWeightOnlyConfig
 
 
 class CoreMLRecipeProvider(BackendRecipeProvider):
@@ -50,34 +55,98 @@ def create_recipe(
         # Validate kwargs
         self._validate_recipe_kwargs(recipe_type, **kwargs)
 
-        # Parse recipe type to get precision and compute unit
-        precision = None
         if recipe_type == CoreMLRecipeType.FP32:
-            precision = ct.precision.FLOAT32
+            return self._build_fp_recipe(recipe_type, ct.precision.FLOAT32, **kwargs)
         elif recipe_type == CoreMLRecipeType.FP16:
-            precision = ct.precision.FLOAT16
-
-        if precision is None:
-            raise ValueError(f"Unknown precision for recipe: {recipe_type.value}")
+            return self._build_fp_recipe(recipe_type, ct.precision.FLOAT16, **kwargs)
+        elif recipe_type == CoreMLRecipeType.PT2E_INT8_STATIC:
+            return self._build_pt2e_quantized_recipe(
+                recipe_type, activation_dtype=torch.quint8, **kwargs
+            )
+        elif recipe_type == CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY:
+            return self._build_pt2e_quantized_recipe(
+                recipe_type, activation_dtype=torch.float32, **kwargs
+            )
+        elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL:
+            return self._build_torchao_quantized_recipe(
+                recipe_type,
+                weight_dtype=torch.int4,
+                is_per_channel=True,
+                **kwargs,
+            )
+        elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP:
+            group_size = kwargs.pop("group_size", 32)
+            return self._build_torchao_quantized_recipe(
+                recipe_type,
+                weight_dtype=torch.int4,
+                is_per_channel=False,
+                group_size=group_size,
+                **kwargs,
+            )
+        elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL:
+            return self._build_torchao_quantized_recipe(
+                recipe_type, weight_dtype=torch.int8, is_per_channel=True, **kwargs
+            )
+        elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP:
+            group_size = kwargs.pop("group_size", 32)
+            return self._build_torchao_quantized_recipe(
+                recipe_type,
+                weight_dtype=torch.int8,
+                is_per_channel=False,
+                group_size=group_size,
+                **kwargs,
+            )
+        elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+            bits = kwargs.pop("bits", 3)
+            block_size = kwargs.pop("block_size", [-1, 16])
+            return self._build_codebook_quantized_recipe(
+                recipe_type, bits=bits, block_size=block_size, **kwargs
+            )
 
-        return self._build_recipe(recipe_type, precision, **kwargs)
+        return None
 
     def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> None:
-        if not kwargs:
-            return
-        expected_keys = {"minimum_deployment_target", "compute_unit"}
+        """Validate kwargs for each recipe type"""
+        expected_keys = self._get_expected_keys(recipe_type)
+
         unexpected = set(kwargs.keys()) - expected_keys
         if unexpected:
             raise ValueError(
-                f"CoreML Recipes only accept 'minimum_deployment_target' or 'compute_unit' as parameter. "
-                f"Unexpected parameters: {list(unexpected)}"
+                f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
             )
+
+        self._validate_base_parameters(kwargs)
+        self._validate_group_size_parameter(recipe_type, kwargs)
+        self._validate_codebook_parameters(recipe_type, kwargs)
+
+    def _get_expected_keys(self, recipe_type: RecipeType) -> set:
+        """Get expected parameter keys for a recipe type"""
+        common_keys = {"minimum_deployment_target", "compute_unit"}
+
+        if recipe_type in [
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+        ]:
+            return common_keys | {"group_size", "filter_fn"}
+        elif recipe_type in [
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
+        ]:
+            return common_keys | {"filter_fn"}
+        elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+            return common_keys | {"bits", "block_size", "filter_fn"}
+        else:
+            return common_keys
+
+    def _validate_base_parameters(self, kwargs: Any) -> None:
+        """Validate minimum_deployment_target and compute_unit parameters"""
         if "minimum_deployment_target" in kwargs:
             minimum_deployment_target = kwargs["minimum_deployment_target"]
             if not isinstance(minimum_deployment_target, ct.target):
                 raise ValueError(
                     f"Parameter 'minimum_deployment_target' must be an enum of type ct.target, got {type(minimum_deployment_target)}"
                 )
+
         if "compute_unit" in kwargs:
             compute_unit = kwargs["compute_unit"]
             if not isinstance(compute_unit, ct.ComputeUnit):
@@ -85,12 +154,73 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
                     f"Parameter 'compute_unit' must be an enum of type ct.ComputeUnit, got {type(compute_unit)}"
                 )
 
-    def _build_recipe(
+    def _validate_group_size_parameter(
+        self, recipe_type: RecipeType, kwargs: Any
+    ) -> None:
+        """Validate group_size parameter for applicable recipe types"""
+        if (
+            recipe_type
+            in [
+                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
+                CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+            ]
+            and "group_size" in kwargs
+        ):
+            group_size = kwargs["group_size"]
+            if not isinstance(group_size, int):
+                raise ValueError(
+                    f"Parameter 'group_size' must be an integer, got {type(group_size).__name__}: {group_size}"
+                )
+            if group_size <= 0:
+                raise ValueError(
+                    f"Parameter 'group_size' must be positive, got: {group_size}"
+                )
+
+    def _validate_codebook_parameters(
+        self, recipe_type: RecipeType, kwargs: Any
+    ) -> None:
+        """Validate bits and block_size parameters for codebook recipe type"""
+        if recipe_type != CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
+            return
+
+        if "bits" in kwargs:
+            bits = kwargs["bits"]
+            if not isinstance(bits, int):
+                raise ValueError(
+                    f"Parameter 'bits' must be an integer, got {type(bits).__name__}: {bits}"
+                )
+            if not (1 <= bits <= 8):
+                raise ValueError(
+                    f"Parameter 'bits' must be between 1 and 8, got: {bits}"
+                )
+
+        if "block_size" in kwargs:
+            block_size = kwargs["block_size"]
+            if not isinstance(block_size, list):
+                raise ValueError(
+                    f"Parameter 'block_size' must be a list, got {type(block_size).__name__}: {block_size}"
+                )
+
+    def _validate_and_set_deployment_target(
+        self, kwargs: Any, min_target: ct.target, quantization_type: str
+    ) -> None:
+        """Validate or set minimum deployment target for quantization recipes"""
+        minimum_deployment_target = kwargs.get("minimum_deployment_target", None)
+        if minimum_deployment_target and minimum_deployment_target < min_target:
+            raise ValueError(
+                f"minimum_deployment_target must be {str(min_target)} or higher for {quantization_type} quantization"
+            )
+        else:
+            # Default to the minimum target for this quantization type
+            kwargs["minimum_deployment_target"] = min_target
+
+    def _build_fp_recipe(
         self,
         recipe_type: RecipeType,
         precision: ct.precision,
         **kwargs: Any,
     ) -> ExportRecipe:
+        """Build FP32/FP16 recipe"""
         lowering_recipe = self._get_coreml_lowering_recipe(
             compute_precision=precision,
             **kwargs,
@@ -98,18 +228,142 @@ def _build_recipe(
 
         return ExportRecipe(
             name=recipe_type.value,
-            quantization_recipe=None,  # TODO - add quantization recipe
+            lowering_recipe=lowering_recipe,
+        )
+
+    def _build_pt2e_quantized_recipe(
+        self,
+        recipe_type: RecipeType,
+        activation_dtype: torch.dtype,
+        **kwargs: Any,
+    ) -> ExportRecipe:
+        """Build PT2E-based quantization recipe"""
+        from executorch.backends.apple.coreml.quantizer import CoreMLQuantizer
+
+        self._validate_and_set_deployment_target(kwargs, ct.target.iOS17, "pt2e")
+
+        # Validate activation_dtype
+        assert activation_dtype in [
+            torch.quint8,
+            torch.float32,
+        ], f"activation_dtype must be torch.quint8 or torch.float32, got {activation_dtype}"
+
+        # Create quantization config
+        config = ct.optimize.torch.quantization.LinearQuantizerConfig(
+            global_config=ct.optimize.torch.quantization.ModuleLinearQuantizerConfig(
+                quantization_scheme="symmetric",
+                activation_dtype=activation_dtype,
+                weight_dtype=torch.qint8,
+                weight_per_channel=True,
+            )
+        )
+
+        quantizer = CoreMLQuantizer(config)
+        quantization_recipe = QuantizationRecipe(quantizers=[quantizer])
+
+        lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+        return ExportRecipe(
+            name=recipe_type.value,
+            quantization_recipe=quantization_recipe,
+            lowering_recipe=lowering_recipe,
+        )
+
+    def _build_torchao_quantized_recipe(
+        self,
+        recipe_type: RecipeType,
+        weight_dtype: torch.dtype,
+        is_per_channel: bool,
+        group_size: int = 32,
+        **kwargs: Any,
+    ) -> ExportRecipe:
+        """Build TorchAO-based quantization recipe"""
+        if is_per_channel:
+            weight_granularity = PerAxis(axis=0)
+        else:
+            weight_granularity = PerGroup(group_size=group_size)
+
+        # Use user-provided filter_fn if provided
+        filter_fn = kwargs.get("filter_fn", None)
+        config = AOQuantizationConfig(
+            ao_base_config=IntxWeightOnlyConfig(
+                weight_dtype=weight_dtype,
+                granularity=weight_granularity,
+            ),
+            filter_fn=filter_fn,
+        )
+
+        quantization_recipe = QuantizationRecipe(
+            quantizers=None,
+            ao_quantization_configs=[config],
+        )
+
+        # override minimum_deployment_target to ios18 for torchao (GH issue #13122)
+        self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "torchao")
+        lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+        return ExportRecipe(
+            name=recipe_type.value,
+            quantization_recipe=quantization_recipe,
+            lowering_recipe=lowering_recipe,
+        )
+
+    def _build_codebook_quantized_recipe(
+        self,
+        recipe_type: RecipeType,
+        bits: int,
+        block_size: list,
+        **kwargs: Any,
+    ) -> ExportRecipe:
+        """Build codebook/palettization quantization recipe"""
+        from torchao.prototype.quantization.codebook_coreml import (
+            CodebookWeightOnlyConfig,
+        )
+
+        self._validate_and_set_deployment_target(kwargs, ct.target.iOS18, "codebook")
+
+        # Get the appropriate dtype (torch.uint1 through torch.uint8)
+        dtype = getattr(torch, f"uint{bits}")
+
+        # Use user-provided filter_fn or default to Linear/Embedding layers
+        filter_fn = kwargs.get(
+            "filter_fn",
+            lambda m, fqn: (
+                isinstance(m, torch.nn.Embedding) or isinstance(m, torch.nn.Linear)
+            ),
+        )
+
+        config = AOQuantizationConfig(
+            ao_base_config=CodebookWeightOnlyConfig(
+                dtype=dtype,
+                block_size=block_size,
+            ),
+            filter_fn=filter_fn,
+        )
+
+        quantization_recipe = QuantizationRecipe(
+            quantizers=None,
+            ao_quantization_configs=[config],
+        )
+
+        lowering_recipe = self._get_coreml_lowering_recipe(**kwargs)
+
+        return ExportRecipe(
+            name=recipe_type.value,
+            quantization_recipe=quantization_recipe,
             lowering_recipe=lowering_recipe,
         )
 
     def _get_coreml_lowering_recipe(
         self,
-        compute_precision: ct.precision,
+        compute_precision: ct.precision = ct.precision.FLOAT16,
         **kwargs: Any,
     ) -> LoweringRecipe:
+        """Get CoreML lowering recipe with optional precision"""
         compile_specs = CoreMLBackend.generate_compile_specs(
             compute_precision=compute_precision,
-            **kwargs,
+            compute_unit=kwargs.get("compute_unit", ct.ComputeUnit.ALL),
+            minimum_deployment_target=kwargs.get("minimum_deployment_target", None),
         )
 
         minimum_deployment_target = kwargs.get("minimum_deployment_target", None)
diff --git a/backends/apple/coreml/recipes/coreml_recipe_types.py b/backends/apple/coreml/recipes/coreml_recipe_types.py
index 77f808bd982..c2ce102a2fd 100644
--- a/backends/apple/coreml/recipes/coreml_recipe_types.py
+++ b/backends/apple/coreml/recipes/coreml_recipe_types.py
@@ -12,14 +12,42 @@
 class CoreMLRecipeType(RecipeType):
     """CoreML-specific generic recipe types"""
 
-    # FP32 generic recipe, defaults to values published by the CoreML backend and partitioner
-    # Precision = FP32, Default compute_unit = All (can be overriden by kwargs)
+    ## All the recipes accept common kwargs
+    # 1. minimum_deployment_unit (default: None)
+    # 2. compute_unit (default: ct.ComputeUnit.ALL)
+
+    # FP32 precision recipe, defaults to values published by the CoreML backend and partitioner
     FP32 = "coreml_fp32"
 
-    # FP16 generic recipe, defaults to values published by the CoreML backend and partitioner
-    # Precision = FP32, Default compute_unit = All (can be overriden by kwargs)
+    # FP16 precision recipe, defaults to values published by the CoreML backend and partitioner
     FP16 = "coreml_fp16"
 
+    ## PT2E-based quantization recipes
+    # INT8 Static Quantization (weights + activations), requires calibration dataset
+    PT2E_INT8_STATIC = "coreml_pt2e_int8_static"
+    # INT8 Weight-only Quantization (activations remain FP32)
+    PT2E_INT8_WEIGHT_ONLY = "coreml_pt2e_int8_weight_only"
+
+    ## TorchAO-based quantization recipes
+    # All TorchAO recipes accept filter_fn kwarg to control which layers are quantized
+    # INT4 Weight-only Quantization, per-channel (axis=0)
+    # Additional kwargs: filter_fn (default: None - quantizes linear layers)
+    INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_int4_weight_only_per_channel"
+    # INT4 Weight-only Quantization, per-group
+    # Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
+    INT4_WEIGHT_ONLY_PER_GROUP = "coreml_int4_weight_only_per_group"
+    # INT8 Weight-only Quantization, per-channel (axis=0)
+    # Additional kwargs: filter_fn (default: None - quantizes linear layers)
+    INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_int8_weight_only_per_channel"
+    # INT8 Weight-only Quantization, per-group
+    # Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
+    INT8_WEIGHT_ONLY_PER_GROUP = "coreml_int8_weight_only_per_group"
+
+    ## Codebook/Palettization Quantization
+    # Additional kwargs: bits (1-8, default: 3), block_size (default: [-1, 16]),
+    # filter_fn (default: targets Linear and Embedding layers only)
+    CODEBOOK_WEIGHT_ONLY = "coreml_codebook_weight_only"
+
     @classmethod
     def get_backend_name(cls) -> str:
         return COREML_BACKEND
diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py
index ca5c6c30c9c..1d53d5bcd4e 100644
--- a/backends/apple/coreml/test/test_coreml_recipes.py
+++ b/backends/apple/coreml/test/test_coreml_recipes.py
@@ -4,11 +4,10 @@
 
 
 import unittest
-from typing import List
 
 import coremltools as ct
-
 import torch
+
 from executorch.backends.apple.coreml.recipes import (
     CoreMLRecipeProvider,
     CoreMLRecipeType,
@@ -17,19 +16,17 @@
 from executorch.backends.apple.coreml.test.test_coreml_utils import (
     IS_VALID_TEST_RUNTIME,
 )
-from executorch.exir.schema import DelegateCall, Program
+from executorch.exir.schema import DelegateCall
 from executorch.export import export, ExportRecipe, recipe_registry
+
+from export.types import StageType
 from torch import nn
 from torch.testing._internal.common_quantization import TestHelperModules
+from torchao.quantization.utils import compute_error
 
 
 class TestCoreMLRecipes(unittest.TestCase):
-    fp32_recipes: List[CoreMLRecipeType] = [
-        CoreMLRecipeType.FP32,
-    ]
-    fp16_recipes: List[CoreMLRecipeType] = [
-        CoreMLRecipeType.FP16,
-    ]
+    """Test suite for CoreML recipes focusing on quantization functionality"""
 
     def setUp(self):
         torch._dynamo.reset()
@@ -41,198 +38,557 @@ def setUp(self):
     def tearDown(self):
         super().tearDown()
 
-    def check_fully_delegated(self, program: Program) -> None:
+    def check_fully_delegated(self, session) -> None:
+        """Helper to verify a program is fully delegated to CoreML"""
+        session.print_delegation_info()
+        program = session.get_executorch_program()
         instructions = program.execution_plan[0].chains[0].instructions
         assert instructions is not None
         self.assertEqual(len(instructions), 1)
         self.assertIsInstance(instructions[0].instr_args, DelegateCall)
 
-    def test_all_fp32_recipes_with_simple_model(self):
-        """Test all FP32 recipes with a simple linear model"""
-        for recipe_type in self.fp32_recipes:
-            with self.subTest(recipe=recipe_type.value):
-                m_eager = TestHelperModules.TwoLinearModule().eval()
-                example_inputs = [(torch.randn(9, 8),)]
+    def _compare_eager_quantized_model_outputs(self, session, example_inputs, atol):
+        """Utility to compare eager quantized model output with session output after coreml lowering"""
+        if IS_VALID_TEST_RUNTIME:
+            source_transform_output = session.get_stage_artifacts()[
+                StageType.SOURCE_TRANSFORM
+            ]
+            eager_quantized_model = source_transform_output.data["forward"]
+            output = session.run_method("forward", example_inputs[0])[0]
+            expected = eager_quantized_model(*example_inputs[0])
+            self.assertTrue(torch.allclose(output, expected, atol=atol))
+
+    def _compare_eager_unquantized_model_outputs(
+        self, session, eager_unquantized_model, example_inputs, sqnr_threshold=20
+    ):
+        """tility to compare eager unquantized model output with session output using SQNR"""
+        if IS_VALID_TEST_RUNTIME:
+            quantized_output = session.run_method("forward", example_inputs[0])[0]
+            original_output = eager_unquantized_model(*example_inputs[0])
+            error = compute_error(original_output, quantized_output)
+            print(f"SQNR: {error} dB")
+            self.assertTrue(error > sqnr_threshold)
+
+    def test_fp32_recipe(self):
+        """Test FP32 recipe functionality"""
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(CoreMLRecipeType.FP32),
+        )
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_fp16_recipe(self):
+        """Test FP16 recipe functionality"""
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
 
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(CoreMLRecipeType.FP16),
+        )
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_fp_recipes_with_custom_parameters(self):
+        """Test FP recipes with custom deployment target and compute unit"""
+        test_cases = [
+            (CoreMLRecipeType.FP32, {"minimum_deployment_target": ct.target.iOS16}),
+            (CoreMLRecipeType.FP16, {"compute_unit": ct.ComputeUnit.CPU_ONLY}),
+        ]
+
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+
+        for recipe_type, kwargs in test_cases:
+            with self.subTest(recipe=recipe_type.value, kwargs=kwargs):
                 session = export(
-                    model=m_eager,
+                    model=model,
                     example_inputs=example_inputs,
-                    export_recipe=ExportRecipe.get_recipe(recipe_type),
+                    export_recipe=ExportRecipe.get_recipe(recipe_type, **kwargs),
                 )
-                self.check_fully_delegated(session.get_executorch_program())
-
-                # Verify outputs match
-                if IS_VALID_TEST_RUNTIME:
-                    self.assertTrue(
-                        torch.allclose(
-                            session.run_method("forward", example_inputs[0])[0],
-                            m_eager(*example_inputs[0]),
-                            atol=1e-3,
-                        )
-                    )
+                self.check_fully_delegated(session)
+
+    def test_int4_weight_only_per_channel(self):
+        """Test INT4 weight-only per-channel quantization"""
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL
+            ),
+        )
+        self.check_fully_delegated(session)
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-02)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_all_fp16_recipes_with_simple_model(self):
-        """Test all FP16 recipes with a simple linear model"""
+    def test_int4_weight_only_per_group(self):
+        """Test INT4 weight-only per-group quantization with different group sizes"""
 
-        for recipe_type in self.fp16_recipes:
-            with self.subTest(recipe=recipe_type.value):
-                m_eager = TestHelperModules.TwoLinearModule().eval()
-                example_inputs = [(torch.randn(9, 8),)]
+        class CustomTwoLinearModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.layer1 = nn.Linear(32, 32)
+                self.layer2 = nn.Linear(32, 8)
+
+            def forward(self, x):
+                x = torch.relu(self.layer1(x))
+                x = self.layer2(x)
+                return x
 
+        model = CustomTwoLinearModel().eval()
+        example_inputs = [(torch.randn(1, 32),)]
+        # Test with different group sizes
+        for group_size in [8, 16, 32]:
+            with self.subTest(group_size=group_size):
                 session = export(
-                    model=m_eager,
+                    model=model,
                     example_inputs=example_inputs,
-                    export_recipe=ExportRecipe.get_recipe(recipe_type),
+                    export_recipe=ExportRecipe.get_recipe(
+                        CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
+                        group_size=group_size,
+                    ),
                 )
+                self.check_fully_delegated(session)
 
-                self.check_fully_delegated(session.get_executorch_program())
+                self._compare_eager_quantized_model_outputs(
+                    session, example_inputs, atol=1e-3
+                )
+                self._compare_eager_unquantized_model_outputs(
+                    session, model, example_inputs
+                )
 
-                # Verify outputs match (slightly higher tolerance for FP16)
-                if IS_VALID_TEST_RUNTIME:
-                    self.assertTrue(
-                        torch.allclose(
-                            session.run_method("forward", example_inputs[0])[0],
-                            m_eager(*example_inputs[0]),
-                            atol=1e-3,
-                        )
-                    )
+    def test_int4_weight_only_per_group_validation(self):
+        """Test INT4 per-group parameter validation"""
+        # Test invalid group size type
+        with self.assertRaises(ValueError) as cm:
+            self.provider.create_recipe(
+                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP, group_size="32"
+            )
+        self.assertIn("must be an integer", str(cm.exception))
+
+        # Test negative group size
+        with self.assertRaises(ValueError) as cm:
+            self.provider.create_recipe(
+                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP, group_size=-1
+            )
+        self.assertIn("must be positive", str(cm.exception))
+
+        # Test unexpected parameter
+        with self.assertRaises(ValueError) as cm:
+            self.provider.create_recipe(
+                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
+                group_size=32,  # group_size not valid for per-channel
+            )
+        self.assertIn("unexpected parameters", str(cm.exception))
+
+    def test_int8_weight_only_per_channel(self):
+        """Test INT8 weight-only per-channel quantization"""
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL
+            ),
+        )
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_custom_simple_model(self):
-        """Test with a custom simple model"""
+    def test_int8_weight_only_per_group(self):
+        """Test INT8 weight-only per-group quantization with different group sizes"""
 
-        class CustomTestModel(nn.Module):
+        class SimpleLinearModel(nn.Module):
             def __init__(self):
                 super().__init__()
-                self.linear1 = nn.Linear(10, 20)
-                self.relu = nn.ReLU()
-                self.linear2 = nn.Linear(20, 1)
+                self.layer = nn.Linear(64, 2)
 
             def forward(self, x):
-                x = self.linear1(x)
-                x = self.relu(x)
-                x = self.linear2(x)
-                return x
+                return self.layer(x)
 
-        model = CustomTestModel().eval()
-        example_inputs = [(torch.randn(1, 10),)]
-        for recipe_type in self.fp32_recipes + self.fp16_recipes:
-            with self.subTest(recipe=recipe_type.value):
+        model = SimpleLinearModel().eval()
+        example_inputs = [(torch.randn(1, 64),)]
+
+        # Test with different group sizes
+        for group_size in [16, 32, 64]:
+            with self.subTest(group_size=group_size):
                 session = export(
                     model=model,
                     example_inputs=example_inputs,
-                    export_recipe=ExportRecipe.get_recipe(recipe_type),
+                    export_recipe=ExportRecipe.get_recipe(
+                        CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+                        group_size=group_size,
+                    ),
+                )
+                self.check_fully_delegated(session)
+
+                self._compare_eager_quantized_model_outputs(
+                    session, example_inputs, atol=1e-2
+                )
+                self._compare_eager_unquantized_model_outputs(
+                    session, model, example_inputs
                 )
-                session.print_delegation_info()
-                self.check_fully_delegated(session.get_executorch_program())
-
-                if IS_VALID_TEST_RUNTIME:
-                    self.assertTrue(
-                        torch.allclose(
-                            session.run_method("forward", example_inputs[0])[0],
-                            model(*example_inputs[0]),
-                            atol=1e-3,
-                        )
-                    )
 
-    def test_unsupported_recipe_type(self):
-        """Test that unsupported recipe types return None"""
-        from executorch.export import RecipeType
+    def test_codebook_weight_only_default(self):
+        """Test codebook quantization with default parameters (3 bits)"""
 
-        class UnsupportedRecipeType(RecipeType):
-            UNSUPPORTED = "unsupported"
+        class SimpleLinearModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.layer = nn.Linear(32, 2)
 
-            @classmethod
-            def get_backend_name(cls) -> str:
-                return "dummy"
+            def forward(self, x):
+                return self.layer(x)
+
+        model = SimpleLinearModel().eval()
+        example_inputs = [(torch.randn(1, 32),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,
+                block_size=[-1, 8],
+            ),
+        )
+        self.check_fully_delegated(session)
 
-        recipe = self.provider.create_recipe(UnsupportedRecipeType.UNSUPPORTED)
-        self.assertIsNone(recipe)
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
 
-    def test_recipe_registry_integration(self):
-        """Test that recipes work with the global recipe registry"""
-        for recipe_type in self.fp32_recipes + self.fp16_recipes:
-            with self.subTest(recipe=recipe_type.value):
-                recipe = ExportRecipe.get_recipe(recipe_type)
-                self.assertIsNotNone(recipe)
-                self.assertEqual(recipe.name, recipe_type.value)
+    def test_codebook_weight_only_custom_bits(self):
+        """Test codebook quantization with different bit configurations"""
 
-    def test_invalid_recipe_kwargs(self):
-        """Test detailed error messages for invalid kwargs"""
-        provider = CoreMLRecipeProvider()
+        class SimpleLinearModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.layer = nn.Linear(32, 2)
 
-        # Test single invalid parameter
-        with self.assertRaises(ValueError) as cm:
-            provider.create_recipe(CoreMLRecipeType.FP16, invalid_param=123)
+            def forward(self, x):
+                return self.layer(x)
+
+        model = SimpleLinearModel().eval()
+        example_inputs = [(torch.randn(1, 32),)]
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=4
+            ),
+        )
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_codebook_weight_only_custom_block_size(self):
+        """Test codebook quantization with custom block sizes"""
 
-        error_msg = str(cm.exception)
-        self.assertIn("Unexpected parameters", error_msg)
+        class SimpleLinearModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.layer = nn.Linear(32, 2)
+
+            def forward(self, x):
+                return self.layer(x)
+
+        model = SimpleLinearModel().eval()
+        example_inputs = [(torch.randn(1, 32),)]
+
+        # Test different block sizes
+        test_cases = [
+            {"bits": 3, "block_size": [-1, 8]},
+        ]
+
+        for kwargs in test_cases:
+            with self.subTest(kwargs=kwargs):
+                session = export(
+                    model=model,
+                    example_inputs=example_inputs,
+                    export_recipe=ExportRecipe.get_recipe(
+                        CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, **kwargs
+                    ),
+                )
+                self.check_fully_delegated(session)
 
-        # Test multiple invalid parameters
+    def test_codebook_parameter_validation(self):
+        """Test codebook parameter validation"""
+        # Test invalid bits type
         with self.assertRaises(ValueError) as cm:
-            provider.create_recipe(
-                CoreMLRecipeType.FP32, param1="value1", param2="value2"
-            )
+            self.provider.create_recipe(CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits="3")
+        self.assertIn("must be an integer", str(cm.exception))
 
-        error_msg = str(cm.exception)
-        self.assertIn("Unexpected parameters", error_msg)
+        # Test bits out of range
+        with self.assertRaises(ValueError) as cm:
+            self.provider.create_recipe(CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=0)
+        self.assertIn("must be between 1 and 8", str(cm.exception))
 
-        # Test mix of valid and invalid parameters
         with self.assertRaises(ValueError) as cm:
-            provider.create_recipe(
-                CoreMLRecipeType.FP32,
-                minimum_deployment_target=ct.target.iOS16,  # valid
-                invalid_param="invalid",  # invalid
-            )
+            self.provider.create_recipe(CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, bits=9)
+        self.assertIn("must be between 1 and 8", str(cm.exception))
 
-        error_msg = str(cm.exception)
-        self.assertIn("Unexpected parameters", error_msg)
+        # Test invalid block_size type
+        with self.assertRaises(ValueError) as cm:
+            self.provider.create_recipe(
+                CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, block_size="[-1, 16]"
+            )
+        self.assertIn("must be a list", str(cm.exception))
 
-    def test_valid_kwargs(self):
-        """Test valid kwargs"""
-        recipe = self.provider.create_recipe(
-            CoreMLRecipeType.FP32,
-            minimum_deployment_target=ct.target.iOS16,
-            compute_unit=ct.ComputeUnit.CPU_AND_GPU,
-        )
-        self.assertIsNotNone(recipe)
-        self.assertEqual(recipe.name, "coreml_fp32")
+    def test_int8_static_quantization(self):
+        """Test INT8 static quantization (weights + activations)"""
 
-        # Verify partitioners are properly configured
-        partitioners = recipe.lowering_recipe.partitioners
-        self.assertEqual(len(partitioners), 1, "Expected exactly one partitioner")
+        class SimpleLinearModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.layer1 = nn.Linear(32, 16)
+                self.layer2 = nn.Linear(16, 2)
 
-        # Verify delegation spec and compile specs
-        delegation_spec = partitioners[0].delegation_spec
-        self.assertIsNotNone(delegation_spec, "Delegation spec should not be None")
+            def forward(self, x):
+                x = torch.relu(self.layer1(x))
+                x = self.layer2(x)
+                return x
 
-        compile_specs = delegation_spec.compile_specs
-        self.assertIsNotNone(compile_specs, "Compile specs should not be None")
+        model = SimpleLinearModel().eval()
+        example_inputs = [(torch.randn(1, 32),)]
 
-        spec_dict = {spec.key: spec.value for spec in compile_specs}
+        recipe = ExportRecipe.get_recipe(
+            CoreMLRecipeType.PT2E_INT8_STATIC, minimum_deployment_target=ct.target.iOS17
+        )
 
-        # Assert that all expected specs are present with correct values
-        self.assertIn(
-            "min_deployment_target",
-            spec_dict,
-            "minimum_deployment_target should be in compile specs",
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=recipe,
         )
-        min_target_value = spec_dict["min_deployment_target"]
-        if isinstance(min_target_value, bytes):
-            min_target_value = min_target_value.decode("utf-8")
-        self.assertEqual(
-            str(min_target_value),
-            str(ct.target.iOS16.value),
-            "minimum_deployment_target should match the provided value",
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-3)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_int8_weight_only_pt2e(self):
+        """Test PT2E-based INT8 weight-only quantization"""
+        model = TestHelperModules.TwoLinearModule().eval()
+        example_inputs = [(torch.randn(9, 8),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY
+            ),
         )
+        self.check_fully_delegated(session)
 
-        self.assertIn(
-            "compute_units", spec_dict, "compute_unit should be in compile specs"
-        )
-        compute_unit_value = spec_dict["compute_units"]
-        if isinstance(compute_unit_value, bytes):
-            compute_unit_value = compute_unit_value.decode("utf-8")
-        self.assertEqual(
-            str(compute_unit_value),
-            ct.ComputeUnit.CPU_AND_GPU.name.lower(),
-            "compute_unit should match the provided value",
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_int8_weight_only_pt2e_with_conv(self):
+        """Test PT2E-based INT8 weight-only quantization with convolution layers"""
+
+        class ConvModel(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
+                self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
+                self.pool = nn.AdaptiveAvgPool2d((1, 1))
+                self.fc = nn.Linear(32, 10)
+
+            def forward(self, x):
+                x = torch.relu(self.conv1(x))
+                x = torch.relu(self.conv2(x))
+                x = self.pool(x)
+                x = x.view(x.size(0), -1)
+                x = self.fc(x)
+                return x
+
+        model = ConvModel().eval()
+        example_inputs = [(torch.randn(1, 3, 32, 32),)]
+
+        session = export(
+            model=model,
+            example_inputs=example_inputs,
+            export_recipe=ExportRecipe.get_recipe(
+                CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY
+            ),
         )
+        self.check_fully_delegated(session)
+
+        self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
+        self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
+
+    def test_pt2e_recipes_parameter_rejection(self):
+        """Test that PT2E recipes reject TorchAO-specific parameters"""
+        # PT2E recipes should reject TorchAO-specific parameters
+        pt2e_recipes = [
+            CoreMLRecipeType.PT2E_INT8_STATIC,
+            CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
+        ]
+        torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
+
+        for recipe_type in pt2e_recipes:
+            for param in torchao_params:
+                with self.subTest(recipe=recipe_type.value, param=param):
+                    kwargs = {param: "dummy_value"}
+                    with self.assertRaises(ValueError) as cm:
+                        self.provider.create_recipe(recipe_type, **kwargs)
+                    self.assertIn("unexpected parameters", str(cm.exception).lower())
+
+    def test_filter_fn_comprehensive(self):
+        """Comprehensive test for filter_fn parameter functionality"""
+
+        def custom_filter(module, fqn):
+            return isinstance(module, nn.Linear) and "target" in fqn
+
+        # Test 1: TorchAO recipes accept filter_fn and default to None
+        torchao_recipes = [
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+        ]
+
+        for recipe_type in torchao_recipes:
+            with self.subTest(f"{recipe_type.value}_default"):
+                # Test default behavior (None)
+                recipe = self.provider.create_recipe(recipe_type)
+                config = recipe.quantization_recipe.ao_quantization_configs[0]
+                self.assertIsNone(config.filter_fn)
+
+            with self.subTest(f"{recipe_type.value}_custom"):
+                # Test custom filter_fn
+                recipe = self.provider.create_recipe(
+                    recipe_type, filter_fn=custom_filter
+                )
+                config = recipe.quantization_recipe.ao_quantization_configs[0]
+                self.assertEqual(config.filter_fn, custom_filter)
+
+        # Test 2: Codebook recipe accepts filter_fn and has sensible default
+        with self.subTest("codebook_default"):
+            recipe = self.provider.create_recipe(CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY)
+            config = recipe.quantization_recipe.ao_quantization_configs[0]
+            self.assertIsNotNone(config.filter_fn)
+
+            # Test default filter targets Linear and Embedding layers
+            linear_module = nn.Linear(10, 5)
+            embedding_module = nn.Embedding(100, 10)
+            conv_module = nn.Conv2d(3, 16, 3)
+
+            self.assertTrue(config.filter_fn(linear_module, "linear"))
+            self.assertTrue(config.filter_fn(embedding_module, "embedding"))
+            self.assertFalse(config.filter_fn(conv_module, "conv"))
+
+        with self.subTest("codebook_custom"):
+            recipe = self.provider.create_recipe(
+                CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, filter_fn=custom_filter
+            )
+            config = recipe.quantization_recipe.ao_quantization_configs[0]
+            self.assertEqual(config.filter_fn, custom_filter)
+
+    def test_quantization_recipe_structure(self):
+        """Test that quantization recipes have proper structure"""
+        quantization_recipes = [
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,
+        ]
+
+        for recipe_type in quantization_recipes:
+            with self.subTest(recipe=recipe_type.value):
+                recipe = self.provider.create_recipe(recipe_type)
+                self.assertIsNotNone(recipe)
+
+                # Should have quantization recipe with ao_quantization_configs
+                self.assertIsNotNone(recipe.quantization_recipe)
+                self.assertIsNotNone(recipe.quantization_recipe.ao_quantization_configs)
+                self.assertEqual(
+                    len(recipe.quantization_recipe.ao_quantization_configs), 1
+                )
+
+                # Should have lowering recipe
+                self.assertIsNotNone(recipe.lowering_recipe)
+                self.assertIsNotNone(recipe.lowering_recipe.partitioners)
+
+    def test_recipe_creation_with_defaults(self):
+        """Test that recipes work with default parameters"""
+        # Test that all recipes can be created without explicit parameters
+        all_recipes = [
+            CoreMLRecipeType.FP32,
+            CoreMLRecipeType.FP16,
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,  # should use default group_size=32
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,  # should use default group_size=32
+            CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY,  # should use default bits=3, block_size=[-1,16]
+        ]
+
+        for recipe_type in all_recipes:
+            with self.subTest(recipe=recipe_type.value):
+                recipe = self.provider.create_recipe(recipe_type)
+                self.assertIsNotNone(recipe)
+                self.assertEqual(recipe.name, recipe_type.value)
+
+    def test_minimum_deployment_target_validation(self):
+        """Test that minimum_deployment_target validation works correctly for quantization recipes"""
+        test_cases = [
+            (CoreMLRecipeType.PT2E_INT8_STATIC, ct.target.iOS17),
+            (CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY, ct.target.iOS17),
+            (CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL, ct.target.iOS18),
+            (CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP, ct.target.iOS18),
+            (CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL, ct.target.iOS18),
+            (CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP, ct.target.iOS18),
+            (CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY, ct.target.iOS18),
+        ]
+
+        for recipe_type, min_target in test_cases:
+            with self.subTest(recipe=recipe_type.value):
+
+                # Test 1: Providing deployment target below minimum should raise ValueError
+                too_low_target = ct.target.iOS15
+                with self.assertRaises(ValueError) as cm:
+                    self.provider.create_recipe(
+                        recipe_type, minimum_deployment_target=too_low_target
+                    )
+                error_msg = str(cm.exception)
+                self.assertIn(
+                    f"minimum_deployment_target must be {str(min_target)} or higher",
+                    error_msg,
+                )
+
+                # Test 2: Providing valid deployment target should work
+                valid_recipe = self.provider.create_recipe(
+                    recipe_type, minimum_deployment_target=min_target
+                )
+                self.assertIsNotNone(valid_recipe)
+
+                # Test 3: Not providing deployment target should default to minimum
+                default_recipe = self.provider.create_recipe(recipe_type)
+                self.assertIsNotNone(default_recipe)
+
+                # Test 4: Providing deployment target higher than minimum should work
+                higher_target = (
+                    ct.target.iOS18
+                    if min_target == ct.target.iOS17
+                    else ct.target.iOS18
+                )
+                higher_recipe = self.provider.create_recipe(
+                    recipe_type, minimum_deployment_target=higher_target
+                )
+                self.assertIsNotNone(higher_recipe)