pytorch
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 3 additions & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 18 additions & 12 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_provider.py‎
Lines changed: 18 additions & 12 deletions
diff --git a/‎backends/apple/coreml/recipes/coreml_recipe_types.py‎
Lines changed: 10 additions & 10 deletions b/‎backends/apple/coreml/recipes/coreml_recipe_types.py‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm‎
Lines changed: 2 additions & 1 deletion b/‎backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/apple/coreml/runtime/include/coreml_backend/delegate.h‎
Lines changed: 1 addition & 1 deletion b/‎backends/apple/coreml/runtime/include/coreml_backend/delegate.h‎
Lines changed: 1 addition & 1 deletion
@@ -801,6 +801,8 @@ jobs:
       id-token: write
       contents: read
     strategy:
+      matrix:
+        enable-etdump: ['', '--enable-etdump']
       fail-fast: false
     with:
       runner: linux.2xlarge
@@ -820,7 +822,7 @@ jobs:
         source .ci/scripts/setup-emscripten.sh
 
         # Test selective build
-        bash scripts/build_wasm_tests.sh
+        bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
 
         # Install Jest
         cd cmake-out-wasm/extension/wasm/test
 
@@ -763,6 +763,10 @@ if(EXECUTORCH_BUILD_PYBIND)
     list(APPEND _dep_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
   endif()
 
+  if(EXECUTORCH_BUILD_VULKAN)
+    list(APPEND _dep_libs vulkan_backend)
+  endif()
+
   # compile options for pybind
   set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
                               -fexceptions
 
@@ -67,14 +67,14 @@ def create_recipe(
             return self._build_pt2e_quantized_recipe(
                 recipe_type, activation_dtype=torch.float32, **kwargs
             )
-        elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL:
+        elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL:
             return self._build_torchao_quantized_recipe(
                 recipe_type,
                 weight_dtype=torch.int4,
                 is_per_channel=True,
                 **kwargs,
             )
-        elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP:
+        elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP:
             group_size = kwargs.pop("group_size", 32)
             return self._build_torchao_quantized_recipe(
                 recipe_type,
@@ -83,11 +83,11 @@ def create_recipe(
                 group_size=group_size,
                 **kwargs,
             )
-        elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL:
+        elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL:
             return self._build_torchao_quantized_recipe(
                 recipe_type, weight_dtype=torch.int8, is_per_channel=True, **kwargs
             )
-        elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP:
+        elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP:
             group_size = kwargs.pop("group_size", 32)
             return self._build_torchao_quantized_recipe(
                 recipe_type,
@@ -97,8 +97,8 @@ def create_recipe(
                 **kwargs,
             )
         elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
-            bits = kwargs.pop("bits", 3)
-            block_size = kwargs.pop("block_size", [-1, 16])
+            bits = kwargs.pop("bits")
+            block_size = kwargs.pop("block_size")
             return self._build_codebook_quantized_recipe(
                 recipe_type, bits=bits, block_size=block_size, **kwargs
             )
@@ -124,13 +124,13 @@ def _get_expected_keys(self, recipe_type: RecipeType) -> set:
         common_keys = {"minimum_deployment_target", "compute_unit"}
 
         if recipe_type in [
-            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
-            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+            CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
         ]:
             return common_keys | {"group_size", "filter_fn"}
         elif recipe_type in [
-            CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
-            CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
+            CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
         ]:
             return common_keys | {"filter_fn"}
         elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
@@ -161,8 +161,8 @@ def _validate_group_size_parameter(
         if (
             recipe_type
             in [
-                CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
-                CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
+                CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
+                CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
             ]
             and "group_size" in kwargs
         ):
@@ -183,6 +183,12 @@ def _validate_codebook_parameters(
         if recipe_type != CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
             return
 
+        # Both bits and block_size must be present
+        if not ("bits" in kwargs and "block_size" in kwargs):
+            raise ValueError(
+                "Parameters 'bits' and 'block_size' must be present for codebook recipes"
+            )
+
         if "bits" in kwargs:
             bits = kwargs["bits"]
             if not isinstance(bits, int):
 
@@ -31,21 +31,21 @@ class CoreMLRecipeType(RecipeType):
     ## TorchAO-based quantization recipes
     # All TorchAO recipes accept filter_fn kwarg to control which layers are quantized
     # INT4 Weight-only Quantization, per-channel (axis=0)
-    # Additional kwargs: filter_fn (default: None - quantizes linear layers)
-    INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_int4_weight_only_per_channel"
+    # Additional kwargs: filter_fn (default: Embedding and linear layers)
+    TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int4_weight_only_per_channel"
     # INT4 Weight-only Quantization, per-group
-    # Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
-    INT4_WEIGHT_ONLY_PER_GROUP = "coreml_int4_weight_only_per_group"
+    # Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
+    TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int4_weight_only_per_group"
     # INT8 Weight-only Quantization, per-channel (axis=0)
-    # Additional kwargs: filter_fn (default: None - quantizes linear layers)
-    INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_int8_weight_only_per_channel"
+    # Additional kwargs: filter_fn (default: Embedding and linear layers)
+    TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int8_weight_only_per_channel"
     # INT8 Weight-only Quantization, per-group
-    # Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
-    INT8_WEIGHT_ONLY_PER_GROUP = "coreml_int8_weight_only_per_group"
+    # Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
+    TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int8_weight_only_per_group"
 
     ## Codebook/Palettization Quantization
-    # Additional kwargs: bits (1-8, default: 3), block_size (default: [-1, 16]),
-    # filter_fn (default: targets Linear and Embedding layers only)
+    # Additional mandatory kwargs: bits (range: 1-8), block_size (list of ints),
+    # filter_fn (default: targets Linear and Embedding layers)
     CODEBOOK_WEIGHT_ONLY = "coreml_codebook_weight_only"
 
     @classmethod
 
@@ -46,6 +46,7 @@
 using executorch::runtime::get_backend_class;
 using executorch::runtime::Result;
 using executorch::aten::SizesType;
+using executorch::runtime::Span;
 using executorch::aten::Tensor;
 using executorch::runtime::kTensorDimensionLimit;
 
@@ -197,7 +198,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
 
 Error CoreMLBackendDelegate::execute(BackendExecutionContext& context,
                                      DelegateHandle* handle,
-                                     EValue** args) const {
+                                     Span<EValue*> args) const {
     const auto& nArgs = impl_->get_num_arguments(handle);
     std::vector<MultiArray> delegate_args;
     size_t nInputs = nArgs.first;
 
@@ -48,7 +48,7 @@ class CoreMLBackendDelegate final : public ::executorch::runtime::BackendInterfa
     /// @retval On success, `Error::Ok` otherwise any other `Error` case.
     executorch::runtime::Error execute(executorch::runtime::BackendExecutionContext& context,
                                        executorch::runtime::DelegateHandle* handle,
-                                       executorch::runtime::EValue** args) const override;
+                                       executorch::runtime::Span<executorch::runtime::EValue*> args) const override;
 
     /// Returns `true` if the delegate is available otherwise `false`.
     bool is_available() const override;