Skip to content

Commit bdad6bb

Browse files
committed
Update
[ghstack-poisoned]
2 parents 223d5d6 + 41c299f commit bdad6bb

File tree

110 files changed

+3085
-450
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

110 files changed

+3085
-450
lines changed

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ test_model() {
9797
bash examples/models/llava/install_requirements.sh
9898
STRICT="--no-strict"
9999
fi
100-
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
100+
if [[ "${MODEL_NAME}" == "qwen2_5_1_5b" ]]; then
101101
# Install requirements for export_llama
102102
bash examples/models/llama/install_requirements.sh
103103
# Test export_llm script: python3 -m extension.llm.export.export_llm.

.github/workflows/trunk.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
- model: phi_4_mini
177177
backend: portable
178178
runner: linux.arm64.m7g.4xlarge
179-
- model: qwen2_5
179+
- model: qwen2_5_1_5b
180180
backend: portable
181181
runner: linux.arm64.2xlarge
182182
- model: llama3_2_vision_encoder
@@ -823,10 +823,10 @@ jobs:
823823
--tsv_path ${TSV_PATH}
824824
echo "::endgroup::"
825825
826-
test-huggingface-transformers-coreml:
826+
test-huggingface-transformers-macos:
827827
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
828828
if: ${{ !github.event.pull_request.head.repo.fork }}
829-
name: test-huggingface-transformers-coreml
829+
name: test-huggingface-transformers-macos
830830
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
831831
permissions:
832832
id-token: write
@@ -844,10 +844,10 @@ jobs:
844844
# phi4-mini|xnnpack|--quantize,
845845
# smollm2-135m|xnnpack|--quantize,
846846
# smollm3-3b|xnnpack|--quantize,
847+
# qwen3-1.7b|xnnpack|--quantize,
847848
# CoreML.
848849
llama3.2-1b|coreml_fp32_gpu|--quantize,
849850
qwen3-0.6b|coreml_fp32_gpu|--quantize,
850-
qwen3-1.7b|xnnpack|--quantize,
851851
smollm2-135m|coreml_fp32_gpu|--quantize,
852852
olmo-1b|coreml_fp32_gpu|--quantize,
853853
bert|coreml_fp32_gpu|--quantize,

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ To get started you can:
5252

5353
- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
5454
- Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
55-
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
55+
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
5656

5757
## Feedback and Engagement
5858

backends/apple/coreml/TARGETS

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,21 @@ runtime.python_library(
6161
)
6262

6363
runtime.python_library(
64-
name = "recipes",
65-
srcs = glob([
66-
"recipes/*.py",
67-
]),
64+
name = "coreml_recipes",
65+
srcs = [
66+
"recipes/__init__.py",
67+
"recipes/coreml_recipe_provider.py"
68+
],
6869
visibility = [
6970
"@EXECUTORCH_CLIENTS",
71+
"//executorch/export/...",
7072
],
7173
deps = [
7274
"fbsource//third-party/pypi/coremltools:coremltools",
75+
":coreml_recipe_types",
7376
":backend",
77+
":partitioner",
78+
":quantizer",
7479
"//caffe2:torch",
7580
"//executorch/exir:lib",
7681
"//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
8085
],
8186
)
8287

88+
runtime.python_library(
89+
name = "coreml_recipe_types",
90+
srcs = [
91+
"recipes/coreml_recipe_types.py",
92+
],
93+
visibility = [
94+
"@EXECUTORCH_CLIENTS",
95+
"//executorch/export/...",
96+
],
97+
deps = [
98+
"//executorch/export:recipe",
99+
],
100+
)
101+
83102
runtime.cxx_python_extension(
84103
name = "executorchcoreml",
85104
srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
124143
"fbsource//third-party/pypi/pytest:pytest",
125144
":partitioner",
126145
":quantizer",
127-
":recipes",
146+
":coreml_recipes",
128147
"//caffe2:torch",
129148
"//pytorch/vision:torchvision",
130149
"fbsource//third-party/pypi/scikit-learn:scikit-learn",

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def dequantize_affine(context, node):
175175
int_data.astype(quantized_np_dtype),
176176
zero_point,
177177
scale,
178-
axis=-1,
179178
name=node.name,
180179
)
181180
context.add(output, node.name)

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
44

55

6+
import logging
67
from typing import Any, Optional, Sequence
78

89
import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
111112

112113
unexpected = set(kwargs.keys()) - expected_keys
113114
if unexpected:
114-
raise ValueError(
115-
f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
115+
logging.warning(
116+
f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
117+
f"Expected parameters: {list(expected_keys)}"
116118
)
117119

118120
self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
121123

122124
def _get_expected_keys(self, recipe_type: RecipeType) -> set:
123125
"""Get expected parameter keys for a recipe type"""
124-
common_keys = {"minimum_deployment_target", "compute_unit"}
126+
common_keys = {
127+
"minimum_deployment_target",
128+
"compute_unit",
129+
"skip_ops_for_coreml_delegation",
130+
"lower_full_graph",
131+
"take_over_constant_data",
132+
}
125133

126134
if recipe_type in [
127135
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
377385
if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
378386
take_over_mutable_buffer = False
379387

388+
# Extract additional partitioner parameters
389+
skip_ops_for_coreml_delegation = kwargs.get(
390+
"skip_ops_for_coreml_delegation", None
391+
)
392+
lower_full_graph = kwargs.get("lower_full_graph", False)
393+
take_over_constant_data = kwargs.get("take_over_constant_data", True)
394+
380395
partitioner = CoreMLPartitioner(
381396
compile_specs=compile_specs,
382397
take_over_mutable_buffer=take_over_mutable_buffer,
398+
skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
399+
lower_full_graph=lower_full_graph,
400+
take_over_constant_data=take_over_constant_data,
383401
)
384402

385403
edge_compile_config = EdgeCompileConfig(

backends/apple/coreml/test/test_coreml_recipes.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
185185
)
186186
self.assertIn("must be positive", str(cm.exception))
187187

188-
# Test unexpected parameter
189-
with self.assertRaises(ValueError) as cm:
190-
self.provider.create_recipe(
191-
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
192-
group_size=32, # group_size not valid for per-channel
193-
)
194-
self.assertIn("unexpected parameters", str(cm.exception))
195-
196188
def test_int8_weight_only_per_channel(self):
197189
"""Test INT8 weight-only per-channel quantization"""
198190
model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
385377
self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
386378
self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
387379

388-
def test_pt2e_recipes_parameter_rejection(self):
389-
"""Test that PT2E recipes reject TorchAO-specific parameters"""
390-
# PT2E recipes should reject TorchAO-specific parameters
391-
pt2e_recipes = [
392-
CoreMLRecipeType.PT2E_INT8_STATIC,
393-
CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
394-
]
395-
torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
396-
397-
for recipe_type in pt2e_recipes:
398-
for param in torchao_params:
399-
with self.subTest(recipe=recipe_type.value, param=param):
400-
kwargs = {param: "dummy_value"}
401-
with self.assertRaises(ValueError) as cm:
402-
self.provider.create_recipe(recipe_type, **kwargs)
403-
self.assertIn("unexpected parameters", str(cm.exception).lower())
404-
405380
def test_filter_fn_comprehensive(self):
406381
"""Comprehensive test for filter_fn parameter functionality"""
407382

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
class TestTorchOps(unittest.TestCase):
2828
edge_compile_config = executorch.exir.EdgeCompileConfig()
2929

30-
def _coreml_partitioner(self):
30+
def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
3131
compile_specs = CoreMLBackend.generate_compile_specs(
32-
minimum_deployment_target=ct.target.iOS18
32+
minimum_deployment_target=minimum_deployment_target
3333
)
3434
return CoreMLPartitioner(compile_specs=compile_specs)
3535

@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
158158
et_prog = delegated_program.to_executorch()
159159
self._compare_outputs(et_prog, model, example_inputs)
160160

161+
def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
162+
model, example_inputs = self._get_test_model()
163+
quantize_(
164+
model,
165+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
166+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
167+
)
168+
quantize_(
169+
model,
170+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
171+
)
172+
ep = torch.export.export(model, example_inputs)
173+
delegated_program = executorch.exir.to_edge_transform_and_lower(
174+
ep,
175+
partitioner=[
176+
self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
177+
],
178+
)
179+
for node in delegated_program.exported_program().graph.nodes:
180+
if node.op == "call_function":
181+
assert node.target.__name__ in [
182+
"executorch_call_delegate",
183+
"getitem",
184+
], f"Got unexpected node target after delegation: {node.target.__name__}"
185+
et_prog = delegated_program.to_executorch()
186+
self._compare_outputs(et_prog, model, example_inputs)
187+
161188
def test_dequantize_codebook_linear_per_grouped_col(self):
162189
model, example_inputs = self._get_test_model()
163190
quantize_(
@@ -298,6 +325,7 @@ def forward(self, x):
298325
test_runner.test_dequantize_affine_c4w_embedding()
299326
test_runner.test_dequantize_affine_c4w_linear()
300327
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
328+
test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
301329
test_runner.test_dequantize_codebook_linear_per_grouped_col()
302330
test_runner.test_dequantize_codebook_linear_per_grouped_row()
303331
test_runner.test_dequantize_codebook_embedding_per_grouped_col()

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass # noqa
3838
from .decompose_cumsum_pass import DecomposeCumsumPass # noqa
3939
from .decompose_div_pass import DecomposeDivPass # noqa
40+
from .decompose_div_tensor_mode import DecomposeDivTensorModePass # noqa
4041
from .decompose_elu_pass import DecomposeEluPass # noqa
4142
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa
4243
from .decompose_expm1_pass import DecomposeExpm1Pass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
DecomposeCosineSimilarityPass,
4343
DecomposeCumsumPass,
4444
DecomposeDivPass,
45+
DecomposeDivTensorModePass,
4546
DecomposeEluPass,
4647
DecomposeEmbeddingPass,
4748
DecomposeExpm1Pass,
@@ -211,6 +212,7 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
211212
DecomposeMeanDimPass(exported_program.graph_module, self.tosa_spec)
212213
)
213214
self.add_pass(DecomposeNotEqualPass())
215+
self.add_pass(DecomposeDivTensorModePass())
214216
self.add_pass(DecomposeDivPass())
215217
self.add_pass(DecomposeSoftmaxPass())
216218
self.add_pass(DecomposeGeluPass())
@@ -289,6 +291,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
289291
self.add_pass(DecomposeNotEqualPass())
290292
self.add_pass(DecomposeCosineSimilarityPass())
291293
self.add_pass(DecomposeGluPass())
294+
self.add_pass(DecomposeDivTensorModePass())
292295
self.add_pass(DecomposeDivPass())
293296
self.add_pass(DecomposeLeakyReLUPass())
294297
self.add_pass(DecomposeLinearVectorNormPass())

0 commit comments

Comments
 (0)