Skip to content

Commit 0cbb5e0

Browse files
authored
Merge branch 'main' into add-dim-order-clone-transform
2 parents 17f2e6c + b02db12 commit 0cbb5e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1395
-520
lines changed

.github/workflows/trunk.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -823,10 +823,10 @@ jobs:
823823
--tsv_path ${TSV_PATH}
824824
echo "::endgroup::"
825825
826-
test-huggingface-transformers-coreml:
826+
test-huggingface-transformers-macos:
827827
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
828828
if: ${{ !github.event.pull_request.head.repo.fork }}
829-
name: test-huggingface-transformers-coreml
829+
name: test-huggingface-transformers-macos
830830
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
831831
permissions:
832832
id-token: write
@@ -844,10 +844,10 @@ jobs:
844844
# phi4-mini|xnnpack|--quantize,
845845
# smollm2-135m|xnnpack|--quantize,
846846
# smollm3-3b|xnnpack|--quantize,
847+
# qwen3-1.7b|xnnpack|--quantize,
847848
# CoreML.
848849
llama3.2-1b|coreml_fp32_gpu|--quantize,
849850
qwen3-0.6b|coreml_fp32_gpu|--quantize,
850-
qwen3-1.7b|xnnpack|--quantize,
851851
smollm2-135m|coreml_fp32_gpu|--quantize,
852852
olmo-1b|coreml_fp32_gpu|--quantize,
853853
bert|coreml_fp32_gpu|--quantize,

backends/apple/coreml/TARGETS

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,21 @@ runtime.python_library(
6161
)
6262

6363
runtime.python_library(
64-
name = "recipes",
65-
srcs = glob([
66-
"recipes/*.py",
67-
]),
64+
name = "coreml_recipes",
65+
srcs = [
66+
"recipes/__init__.py",
67+
"recipes/coreml_recipe_provider.py"
68+
],
6869
visibility = [
6970
"@EXECUTORCH_CLIENTS",
71+
"//executorch/export/...",
7072
],
7173
deps = [
7274
"fbsource//third-party/pypi/coremltools:coremltools",
75+
":coreml_recipe_types",
7376
":backend",
77+
":partitioner",
78+
":quantizer",
7479
"//caffe2:torch",
7580
"//executorch/exir:lib",
7681
"//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
8085
],
8186
)
8287

88+
runtime.python_library(
89+
name = "coreml_recipe_types",
90+
srcs = [
91+
"recipes/coreml_recipe_types.py",
92+
],
93+
visibility = [
94+
"@EXECUTORCH_CLIENTS",
95+
"//executorch/export/...",
96+
],
97+
deps = [
98+
"//executorch/export:recipe",
99+
],
100+
)
101+
83102
runtime.cxx_python_extension(
84103
name = "executorchcoreml",
85104
srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
124143
"fbsource//third-party/pypi/pytest:pytest",
125144
":partitioner",
126145
":quantizer",
127-
":recipes",
146+
":coreml_recipes",
128147
"//caffe2:torch",
129148
"//pytorch/vision:torchvision",
130149
"fbsource//third-party/pypi/scikit-learn:scikit-learn",

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
44

55

6+
import logging
67
from typing import Any, Optional, Sequence
78

89
import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
111112

112113
unexpected = set(kwargs.keys()) - expected_keys
113114
if unexpected:
114-
raise ValueError(
115-
f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
115+
logging.warning(
116+
f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
117+
f"Expected parameters: {list(expected_keys)}"
116118
)
117119

118120
self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
121123

122124
def _get_expected_keys(self, recipe_type: RecipeType) -> set:
123125
"""Get expected parameter keys for a recipe type"""
124-
common_keys = {"minimum_deployment_target", "compute_unit"}
126+
common_keys = {
127+
"minimum_deployment_target",
128+
"compute_unit",
129+
"skip_ops_for_coreml_delegation",
130+
"lower_full_graph",
131+
"take_over_constant_data",
132+
}
125133

126134
if recipe_type in [
127135
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
377385
if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
378386
take_over_mutable_buffer = False
379387

388+
# Extract additional partitioner parameters
389+
skip_ops_for_coreml_delegation = kwargs.get(
390+
"skip_ops_for_coreml_delegation", None
391+
)
392+
lower_full_graph = kwargs.get("lower_full_graph", False)
393+
take_over_constant_data = kwargs.get("take_over_constant_data", True)
394+
380395
partitioner = CoreMLPartitioner(
381396
compile_specs=compile_specs,
382397
take_over_mutable_buffer=take_over_mutable_buffer,
398+
skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
399+
lower_full_graph=lower_full_graph,
400+
take_over_constant_data=take_over_constant_data,
383401
)
384402

385403
edge_compile_config = EdgeCompileConfig(

backends/apple/coreml/test/test_coreml_recipes.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
185185
)
186186
self.assertIn("must be positive", str(cm.exception))
187187

188-
# Test unexpected parameter
189-
with self.assertRaises(ValueError) as cm:
190-
self.provider.create_recipe(
191-
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
192-
group_size=32, # group_size not valid for per-channel
193-
)
194-
self.assertIn("unexpected parameters", str(cm.exception))
195-
196188
def test_int8_weight_only_per_channel(self):
197189
"""Test INT8 weight-only per-channel quantization"""
198190
model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
385377
self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
386378
self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
387379

388-
def test_pt2e_recipes_parameter_rejection(self):
389-
"""Test that PT2E recipes reject TorchAO-specific parameters"""
390-
# PT2E recipes should reject TorchAO-specific parameters
391-
pt2e_recipes = [
392-
CoreMLRecipeType.PT2E_INT8_STATIC,
393-
CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
394-
]
395-
torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
396-
397-
for recipe_type in pt2e_recipes:
398-
for param in torchao_params:
399-
with self.subTest(recipe=recipe_type.value, param=param):
400-
kwargs = {param: "dummy_value"}
401-
with self.assertRaises(ValueError) as cm:
402-
self.provider.create_recipe(recipe_type, **kwargs)
403-
self.assertIn("unexpected parameters", str(cm.exception).lower())
404-
405380
def test_filter_fn_comprehensive(self):
406381
"""Comprehensive test for filter_fn parameter functionality"""
407382

backends/cadence/aot/ref_implementations.py

Lines changed: 74 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
@impl(m, "quantize_per_tensor")
2626
def quantize_per_tensor(
27-
input: torch.Tensor,
27+
input_tensor: torch.Tensor,
2828
scale: float,
2929
zero_point: int,
3030
quant_min: int,
@@ -35,10 +35,10 @@ def quantize_per_tensor(
3535
Quantizes a floating-point tensor to an integral tensor.
3636
3737
Args:
38-
- input (Tensor): input tensor
39-
- scale (float): Quantization scale. Derived from the ratio
38+
- input_tensor (Tensor): input tensor
39+
- scale (float): Inverse of quantization scale. Derived from the ratio
4040
between the min/max of the floating-point tensor and the
41-
min/max of the quantized range.
41+
min/max of the quantized range, and then inverted.
4242
- zero_point (int): The point which represents 0 in the quantized
4343
range. For example, consider the floating point range [-1., 2.] and
4444
quantized integer range [-7, 7]. In this case, 0 is 1/3 of way from
@@ -61,7 +61,12 @@ def quantize_per_tensor(
6161
raise ValueError(
6262
f"Unsupported dtype to quantize to. Supported dtypes must be one of {supported_quant_types}"
6363
)
64-
return torch.round(input / scale + zero_point).to(dtype)
64+
65+
dequantized = torch.round(input_tensor * scale + zero_point).to(dtype)
66+
return torch.max(
67+
torch.min(dequantized, torch.tensor(quant_max)),
68+
torch.tensor(quant_min),
69+
)
6570

6671

6772
@impl(m, "dequantize_per_tensor")
@@ -173,9 +178,16 @@ def quantized_add(
173178
dequant_X = X_scale * (X - X_zero_point)
174179
dequant_Y = Y_scale * (Y - Y_zero_point)
175180

181+
out_scale_inv = 1 / out_scale
182+
176183
# q_min/q_max are unused args
177184
return quantize_per_tensor(
178-
dequant_X + dequant_Y, out_scale, out_zero_point, -128, 127, dtype
185+
dequant_X + dequant_Y,
186+
out_scale_inv,
187+
out_zero_point,
188+
torch.iinfo(dtype).min,
189+
torch.iinfo(dtype).max,
190+
dtype,
179191
)
180192

181193

@@ -206,6 +218,7 @@ def quantized_linear(
206218
- offset (Tensor): Unused
207219
"""
208220
out_scale = -out_multiplier * (1 / (1 << 31)) * (2 ** out_shift[0])
221+
out_scale_inv = 1 / out_scale
209222

210223
N, K = weight.shape
211224

@@ -223,10 +236,64 @@ def quantized_linear(
223236
src - in_zero_point, weight - weight_zero_point, bias
224237
)
225238
return quantize_per_tensor(
226-
out, out_scale, out_zero_point, -128, 127, dtype
239+
out,
240+
out_scale_inv,
241+
out_zero_point,
242+
torch.iinfo(dtype).min,
243+
torch.iinfo(dtype).max,
244+
dtype,
227245
).reshape(*leading_dims, N)
228246

229247

248+
@impl(m, "quantized_layer_norm_per_tensor")
249+
def quantized_layer_norm_per_tensor(
250+
input_tensor: torch.Tensor,
251+
X_scale: float,
252+
X_zero_point: int,
253+
normalized_shape: int,
254+
weight: torch.Tensor,
255+
bias: torch.Tensor,
256+
eps: float,
257+
output_scale: float,
258+
output_zero_point: int,
259+
) -> torch.Tensor:
260+
"""
261+
Quantized layer norm operation.
262+
263+
Args:
264+
- input_tensor (Tensor): The activations tensor
265+
- X_scale (float): The scale of the input
266+
- X_zero_point (int): The zero point of the input
267+
- normalized_shape (int): The shape of the input
268+
- weight (Tensor): The weight tensor
269+
- bias (Tensor): The bias tensor
270+
- eps (float): The epsilon value
271+
- output_scale (float): The scale of the output
272+
- output_zero_point (int): The zero point of the output
273+
"""
274+
supported_dtypes = [torch.int8, torch.uint8]
275+
if input_tensor.dtype not in supported_dtypes:
276+
raise ValueError(
277+
f"Input dtype must be one of {supported_dtypes}. Got {input_tensor.dtype}"
278+
)
279+
280+
float_input_tensor = dequantize_per_tensor(
281+
input_tensor, X_scale, X_zero_point, -128, 127, torch.float32
282+
)
283+
out = torch.nn.functional.layer_norm(
284+
float_input_tensor, (normalized_shape,), weight, bias, eps=eps
285+
)
286+
287+
return quantize_per_tensor(
288+
out,
289+
1 / output_scale,
290+
output_zero_point,
291+
torch.iinfo(input_tensor.dtype).min,
292+
torch.iinfo(input_tensor.dtype).max,
293+
input_tensor.dtype,
294+
)
295+
296+
230297
@impl(m, "requantize")
231298
def requantize(
232299
input: torch.Tensor,

0 commit comments

Comments
 (0)