Skip to content

Commit cb7440e

Browse files
Update
[ghstack-poisoned]
2 parents e33390f + fab2d54 commit cb7440e

File tree

122 files changed

+5147
-581
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+5147
-581
lines changed

.github/workflows/pull.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,6 +801,8 @@ jobs:
801801
id-token: write
802802
contents: read
803803
strategy:
804+
matrix:
805+
enable-etdump: ['', '--enable-etdump']
804806
fail-fast: false
805807
with:
806808
runner: linux.2xlarge
@@ -820,7 +822,7 @@ jobs:
820822
source .ci/scripts/setup-emscripten.sh
821823
822824
# Test selective build
823-
bash scripts/build_wasm_tests.sh
825+
bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
824826
825827
# Install Jest
826828
cd cmake-out-wasm/extension/wasm/test

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,10 @@ if(EXECUTORCH_BUILD_PYBIND)
763763
list(APPEND _dep_libs xnnpack_backend XNNPACK xnnpack-microkernels-prod)
764764
endif()
765765

766+
if(EXECUTORCH_BUILD_VULKAN)
767+
list(APPEND _dep_libs vulkan_backend)
768+
endif()
769+
766770
# compile options for pybind
767771
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
768772
-fexceptions

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,14 +67,14 @@ def create_recipe(
6767
return self._build_pt2e_quantized_recipe(
6868
recipe_type, activation_dtype=torch.float32, **kwargs
6969
)
70-
elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL:
70+
elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL:
7171
return self._build_torchao_quantized_recipe(
7272
recipe_type,
7373
weight_dtype=torch.int4,
7474
is_per_channel=True,
7575
**kwargs,
7676
)
77-
elif recipe_type == CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP:
77+
elif recipe_type == CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP:
7878
group_size = kwargs.pop("group_size", 32)
7979
return self._build_torchao_quantized_recipe(
8080
recipe_type,
@@ -83,11 +83,11 @@ def create_recipe(
8383
group_size=group_size,
8484
**kwargs,
8585
)
86-
elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL:
86+
elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL:
8787
return self._build_torchao_quantized_recipe(
8888
recipe_type, weight_dtype=torch.int8, is_per_channel=True, **kwargs
8989
)
90-
elif recipe_type == CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP:
90+
elif recipe_type == CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP:
9191
group_size = kwargs.pop("group_size", 32)
9292
return self._build_torchao_quantized_recipe(
9393
recipe_type,
@@ -97,8 +97,8 @@ def create_recipe(
9797
**kwargs,
9898
)
9999
elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
100-
bits = kwargs.pop("bits", 3)
101-
block_size = kwargs.pop("block_size", [-1, 16])
100+
bits = kwargs.pop("bits")
101+
block_size = kwargs.pop("block_size")
102102
return self._build_codebook_quantized_recipe(
103103
recipe_type, bits=bits, block_size=block_size, **kwargs
104104
)
@@ -124,13 +124,13 @@ def _get_expected_keys(self, recipe_type: RecipeType) -> set:
124124
common_keys = {"minimum_deployment_target", "compute_unit"}
125125

126126
if recipe_type in [
127-
CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
128-
CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
127+
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
128+
CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
129129
]:
130130
return common_keys | {"group_size", "filter_fn"}
131131
elif recipe_type in [
132-
CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_CHANNEL,
133-
CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_CHANNEL,
132+
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
133+
CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL,
134134
]:
135135
return common_keys | {"filter_fn"}
136136
elif recipe_type == CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
@@ -161,8 +161,8 @@ def _validate_group_size_parameter(
161161
if (
162162
recipe_type
163163
in [
164-
CoreMLRecipeType.INT4_WEIGHT_ONLY_PER_GROUP,
165-
CoreMLRecipeType.INT8_WEIGHT_ONLY_PER_GROUP,
164+
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
165+
CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,
166166
]
167167
and "group_size" in kwargs
168168
):
@@ -183,6 +183,12 @@ def _validate_codebook_parameters(
183183
if recipe_type != CoreMLRecipeType.CODEBOOK_WEIGHT_ONLY:
184184
return
185185

186+
# Both bits and block_size must be present
187+
if not ("bits" in kwargs and "block_size" in kwargs):
188+
raise ValueError(
189+
"Parameters 'bits' and 'block_size' must be present for codebook recipes"
190+
)
191+
186192
if "bits" in kwargs:
187193
bits = kwargs["bits"]
188194
if not isinstance(bits, int):

backends/apple/coreml/recipes/coreml_recipe_types.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,21 +31,21 @@ class CoreMLRecipeType(RecipeType):
3131
## TorchAO-based quantization recipes
3232
# All TorchAO recipes accept filter_fn kwarg to control which layers are quantized
3333
# INT4 Weight-only Quantization, per-channel (axis=0)
34-
# Additional kwargs: filter_fn (default: None - quantizes linear layers)
35-
INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_int4_weight_only_per_channel"
34+
# Additional kwargs: filter_fn (default: Embedding and linear layers)
35+
TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int4_weight_only_per_channel"
3636
# INT4 Weight-only Quantization, per-group
37-
# Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
38-
INT4_WEIGHT_ONLY_PER_GROUP = "coreml_int4_weight_only_per_group"
37+
# Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
38+
TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int4_weight_only_per_group"
3939
# INT8 Weight-only Quantization, per-channel (axis=0)
40-
# Additional kwargs: filter_fn (default: None - quantizes linear layers)
41-
INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_int8_weight_only_per_channel"
40+
# Additional kwargs: filter_fn (default: Embedding and linear layers)
41+
TORCHAO_INT8_WEIGHT_ONLY_PER_CHANNEL = "coreml_torchao_int8_weight_only_per_channel"
4242
# INT8 Weight-only Quantization, per-group
43-
# Additional kwargs: group_size (default: 32), filter_fn (default: None - quantizes linear layers)
44-
INT8_WEIGHT_ONLY_PER_GROUP = "coreml_int8_weight_only_per_group"
43+
# Additional kwargs: group_size (default: 32), filter_fn (default: Embedding and linear layers)
44+
TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP = "coreml_torchao_int8_weight_only_per_group"
4545

4646
## Codebook/Palettization Quantization
47-
# Additional kwargs: bits (1-8, default: 3), block_size (default: [-1, 16]),
48-
# filter_fn (default: targets Linear and Embedding layers only)
47+
# Additional mandatory kwargs: bits (range: 1-8), block_size (list of ints),
48+
# filter_fn (default: targets Linear and Embedding layers)
4949
CODEBOOK_WEIGHT_ONLY = "coreml_codebook_weight_only"
5050

5151
@classmethod

backends/apple/coreml/runtime/delegate/coreml_backend_delegate.mm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
using executorch::runtime::get_backend_class;
4747
using executorch::runtime::Result;
4848
using executorch::aten::SizesType;
49+
using executorch::runtime::Span;
4950
using executorch::aten::Tensor;
5051
using executorch::runtime::kTensorDimensionLimit;
5152

@@ -197,7 +198,7 @@ ModelLoggingOptions get_logging_options(BackendExecutionContext& context) {
197198

198199
Error CoreMLBackendDelegate::execute(BackendExecutionContext& context,
199200
DelegateHandle* handle,
200-
EValue** args) const {
201+
Span<EValue*> args) const {
201202
const auto& nArgs = impl_->get_num_arguments(handle);
202203
std::vector<MultiArray> delegate_args;
203204
size_t nInputs = nArgs.first;

backends/apple/coreml/runtime/include/coreml_backend/delegate.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class CoreMLBackendDelegate final : public ::executorch::runtime::BackendInterfa
4848
/// @retval On success, `Error::Ok` otherwise any other `Error` case.
4949
executorch::runtime::Error execute(executorch::runtime::BackendExecutionContext& context,
5050
executorch::runtime::DelegateHandle* handle,
51-
executorch::runtime::EValue** args) const override;
51+
executorch::runtime::Span<executorch::runtime::EValue*> args) const override;
5252

5353
/// Returns `true` if the delegate is available otherwise `false`.
5454
bool is_available() const override;

0 commit comments

Comments
 (0)