Skip to content

Commit 8cae8d5

Browse files
author
ssjia
committed
Update on "[ET-VK] Quantized Int8 Convolution"
See the below diff; this diff implements int8 quantized conv2d using the quantized linear layer introduced below. Note that the current implementation doesn't yet support depthwise convs; a specialized implementation will need to be added for that. Differential Revision: [D81330809](https://our.internmc.facebook.com/intern/diff/D81330809/) [ghstack-poisoned]
2 parents 1aa9a15 + dcc73ed commit 8cae8d5

File tree

232 files changed

+4297
-992
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

232 files changed

+4297
-992
lines changed

.github/workflows/trunk.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -823,10 +823,10 @@ jobs:
823823
--tsv_path ${TSV_PATH}
824824
echo "::endgroup::"
825825
826-
test-huggingface-transformers-coreml:
826+
test-huggingface-transformers-macos:
827827
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
828828
if: ${{ !github.event.pull_request.head.repo.fork }}
829-
name: test-huggingface-transformers-coreml
829+
name: test-huggingface-transformers-macos
830830
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
831831
permissions:
832832
id-token: write
@@ -844,10 +844,10 @@ jobs:
844844
# phi4-mini|xnnpack|--quantize,
845845
# smollm2-135m|xnnpack|--quantize,
846846
# smollm3-3b|xnnpack|--quantize,
847+
# qwen3-1.7b|xnnpack|--quantize,
847848
# CoreML.
848849
llama3.2-1b|coreml_fp32_gpu|--quantize,
849850
qwen3-0.6b|coreml_fp32_gpu|--quantize,
850-
qwen3-1.7b|xnnpack|--quantize,
851851
smollm2-135m|coreml_fp32_gpu|--quantize,
852852
olmo-1b|coreml_fp32_gpu|--quantize,
853853
bert|coreml_fp32_gpu|--quantize,

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ To get started you can:
5252

5353
- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/stable/getting-started.html) to get things running locally and deploy a model to a device
5454
- Use this [Colab Notebook](https://colab.research.google.com/drive/1qpxrXC3YdJQzly3mRg-4ayYiOjC6rue3?usp=sharing) to start playing around right away
55-
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), and [Llava](examples/models/llava/README.md)
55+
- Jump straight into LLM use cases by following specific instructions for popular open-source models such as [Llama](examples/models/llama/README.md), [Qwen 3](examples/models/qwen3/README.md), [Phi-4-mini](examples/models/phi_4_mini/README.md), [Llava](examples/models/llava/README.md), [Voxtral](examples/models/voxtral/README.md), and [LFM2](examples/models/lfm2/README.md).
5656

5757
## Feedback and Engagement
5858

backends/apple/coreml/TARGETS

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,21 @@ runtime.python_library(
6161
)
6262

6363
runtime.python_library(
64-
name = "recipes",
65-
srcs = glob([
66-
"recipes/*.py",
67-
]),
64+
name = "coreml_recipes",
65+
srcs = [
66+
"recipes/__init__.py",
67+
"recipes/coreml_recipe_provider.py"
68+
],
6869
visibility = [
6970
"@EXECUTORCH_CLIENTS",
71+
"//executorch/export/...",
7072
],
7173
deps = [
7274
"fbsource//third-party/pypi/coremltools:coremltools",
75+
":coreml_recipe_types",
7376
":backend",
77+
":partitioner",
78+
":quantizer",
7479
"//caffe2:torch",
7580
"//executorch/exir:lib",
7681
"//executorch/exir/backend:compile_spec_schema",
@@ -80,6 +85,20 @@ runtime.python_library(
8085
],
8186
)
8287

88+
runtime.python_library(
89+
name = "coreml_recipe_types",
90+
srcs = [
91+
"recipes/coreml_recipe_types.py",
92+
],
93+
visibility = [
94+
"@EXECUTORCH_CLIENTS",
95+
"//executorch/export/...",
96+
],
97+
deps = [
98+
"//executorch/export:recipe",
99+
],
100+
)
101+
83102
runtime.cxx_python_extension(
84103
name = "executorchcoreml",
85104
srcs = [
@@ -124,7 +143,7 @@ runtime.python_test(
124143
"fbsource//third-party/pypi/pytest:pytest",
125144
":partitioner",
126145
":quantizer",
127-
":recipes",
146+
":coreml_recipes",
128147
"//caffe2:torch",
129148
"//pytorch/vision:torchvision",
130149
"fbsource//third-party/pypi/scikit-learn:scikit-learn",

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from coremltools.converters.mil.frontend.torch.ops import (
1616
_get_inputs,
1717
_get_kwinputs,
18+
noop,
1819
NUM_TO_NUMPY_DTYPE,
1920
NUM_TO_TORCH_DTYPE,
2021
split,
@@ -91,6 +92,28 @@ def _to_dim_order_copy(context, node):
9192
to(context, node)
9293

9394

95+
@register_torch_op(
96+
torch_alias=[
97+
"dim_order_ops::_clone_dim_order",
98+
"dim_order_ops._clone_dim_order",
99+
],
100+
override=False,
101+
)
102+
def _clone_dim_order(context, node):
103+
dim_order = _get_kwinputs(context, node, "dim_order", default=[None])[0]
104+
node.kwinputs.pop("dim_order")
105+
106+
# In CoreML, dim_order.val will be a ndarray, so we convert it to a list to check memory format.
107+
dim_order = [int(d) for d in dim_order.val]
108+
memory_format = get_memory_format(dim_order)
109+
assert (
110+
memory_format == _torch.contiguous_format
111+
), "Only contiguous memory format is supported in CoreML"
112+
113+
# Since CoreML only supports contiguous format, no dim_order preservation is needed. Treat this as a no-op clone.
114+
noop(context, node)
115+
116+
94117
# https://github.com/apple/coremltools/pull/2558
95118
@register_torch_op(
96119
torch_alias=["torchao::dequantize_affine", "torchao.dequantize_affine"],

backends/apple/coreml/recipes/coreml_recipe_provider.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
44

55

6+
import logging
67
from typing import Any, Optional, Sequence
78

89
import coremltools as ct
@@ -111,8 +112,9 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
111112

112113
unexpected = set(kwargs.keys()) - expected_keys
113114
if unexpected:
114-
raise ValueError(
115-
f"Recipe '{recipe_type.value}' received unexpected parameters: {list(unexpected)}"
115+
logging.warning(
116+
f"CoreML recipe '{recipe_type.value}' ignoring unexpected parameters: {list(unexpected)}. "
117+
f"Expected parameters: {list(expected_keys)}"
116118
)
117119

118120
self._validate_base_parameters(kwargs)
@@ -121,7 +123,13 @@ def _validate_recipe_kwargs(self, recipe_type: RecipeType, **kwargs: Any) -> Non
121123

122124
def _get_expected_keys(self, recipe_type: RecipeType) -> set:
123125
"""Get expected parameter keys for a recipe type"""
124-
common_keys = {"minimum_deployment_target", "compute_unit"}
126+
common_keys = {
127+
"minimum_deployment_target",
128+
"compute_unit",
129+
"skip_ops_for_coreml_delegation",
130+
"lower_full_graph",
131+
"take_over_constant_data",
132+
}
125133

126134
if recipe_type in [
127135
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,
@@ -377,9 +385,19 @@ def _get_coreml_lowering_recipe(
377385
if minimum_deployment_target and minimum_deployment_target < ct.target.iOS18:
378386
take_over_mutable_buffer = False
379387

388+
# Extract additional partitioner parameters
389+
skip_ops_for_coreml_delegation = kwargs.get(
390+
"skip_ops_for_coreml_delegation", None
391+
)
392+
lower_full_graph = kwargs.get("lower_full_graph", False)
393+
take_over_constant_data = kwargs.get("take_over_constant_data", True)
394+
380395
partitioner = CoreMLPartitioner(
381396
compile_specs=compile_specs,
382397
take_over_mutable_buffer=take_over_mutable_buffer,
398+
skip_ops_for_coreml_delegation=skip_ops_for_coreml_delegation,
399+
lower_full_graph=lower_full_graph,
400+
take_over_constant_data=take_over_constant_data,
383401
)
384402

385403
edge_compile_config = EdgeCompileConfig(

backends/apple/coreml/test/test_coreml_recipes.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -185,14 +185,6 @@ def test_int4_weight_only_per_group_validation(self):
185185
)
186186
self.assertIn("must be positive", str(cm.exception))
187187

188-
# Test unexpected parameter
189-
with self.assertRaises(ValueError) as cm:
190-
self.provider.create_recipe(
191-
CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_CHANNEL,
192-
group_size=32, # group_size not valid for per-channel
193-
)
194-
self.assertIn("unexpected parameters", str(cm.exception))
195-
196188
def test_int8_weight_only_per_channel(self):
197189
"""Test INT8 weight-only per-channel quantization"""
198190
model = TestHelperModules.TwoLinearModule().eval()
@@ -385,23 +377,6 @@ def forward(self, x):
385377
self._compare_eager_quantized_model_outputs(session, example_inputs, atol=1e-2)
386378
self._compare_eager_unquantized_model_outputs(session, model, example_inputs)
387379

388-
def test_pt2e_recipes_parameter_rejection(self):
389-
"""Test that PT2E recipes reject TorchAO-specific parameters"""
390-
# PT2E recipes should reject TorchAO-specific parameters
391-
pt2e_recipes = [
392-
CoreMLRecipeType.PT2E_INT8_STATIC,
393-
CoreMLRecipeType.PT2E_INT8_WEIGHT_ONLY,
394-
]
395-
torchao_params = ["filter_fn", "group_size", "bits", "block_size"]
396-
397-
for recipe_type in pt2e_recipes:
398-
for param in torchao_params:
399-
with self.subTest(recipe=recipe_type.value, param=param):
400-
kwargs = {param: "dummy_value"}
401-
with self.assertRaises(ValueError) as cm:
402-
self.provider.create_recipe(recipe_type, **kwargs)
403-
self.assertIn("unexpected parameters", str(cm.exception).lower())
404-
405380
def test_filter_fn_comprehensive(self):
406381
"""Comprehensive test for filter_fn parameter functionality"""
407382

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,28 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
268268
et_prog = delegated_program.to_executorch()
269269
self._compare_outputs(et_prog, model, example_inputs)
270270

271+
def test__clone_dim_order_contiguous(self):
272+
class Model(torch.nn.Module):
273+
def forward(self, x):
274+
return torch.ops.dim_order_ops._clone_dim_order(
275+
x, dim_order=[0, 1, 2, 3]
276+
)
277+
278+
model, example_inputs = Model(), (torch.randn(1, 3, 8, 8),)
279+
ep = torch.export.export(model, example_inputs)
280+
delegated_program = executorch.exir.to_edge_transform_and_lower(
281+
ep,
282+
partitioner=[self._coreml_partitioner()],
283+
)
284+
for node in delegated_program.exported_program().graph.nodes:
285+
if node.op == "call_function":
286+
assert node.target.__name__ in [
287+
"executorch_call_delegate",
288+
"getitem",
289+
], f"Got unexpected node target after delegation: {node.target.__name__}"
290+
et_prog = delegated_program.to_executorch()
291+
self._compare_outputs(et_prog, model, example_inputs)
292+
271293

272294
if __name__ == "__main__":
273295
test_runner = TestTorchOps()
@@ -280,3 +302,4 @@ def test_dequantize_codebook_embedding_per_grouped_row(self):
280302
test_runner.test_dequantize_codebook_linear_per_grouped_row()
281303
test_runner.test_dequantize_codebook_embedding_per_grouped_col()
282304
test_runner.test_dequantize_codebook_embedding_per_grouped_row()
305+
test_runner.test__clone_dim_order_contiguous()

backends/arm/_passes/arm_pass_manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@
9191
UnsqueezeBeforeRepeatPass,
9292
UnsqueezeScalarPlaceholdersPass,
9393
)
94-
from executorch.backends.arm.tosa_specification import (
94+
95+
from executorch.backends.arm.tosa.specification import (
9596
TosaLoweringContext,
9697
TosaSpecification,
9798
)

backends/arm/_passes/decompose_meandim_pass.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import torch
1010
from executorch.backends.arm._passes import ArmPass
1111
from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
12-
from executorch.backends.arm.operator_support.pool_2d_support import AvgPool2dSupported
1312
from executorch.exir.backend.utils import WhyNoPartitionReporter
1413
from executorch.exir.dialects._ops import ops as exir_ops
1514

@@ -67,6 +66,11 @@ def __init__(self, graph_module, tosa_spec):
6766
super().__init__()
6867
self._graph_module = graph_module
6968
self._tosa_spec = tosa_spec
69+
# Lazy import to avoid circular dependency with operator_support
70+
from executorch.backends.arm.operator_support.pool_2d_support import (
71+
AvgPool2dSupported,
72+
)
73+
7074
self._avg_pool_checker = AvgPool2dSupported(
7175
self._tosa_spec, WhyNoPartitionReporter()
7276
)

backends/arm/_passes/remove_clone_pass.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class RemoveClonePass(ExportPass):
1818
"""Remove all clones from graph_module"""
1919

2020
def call_operator(self, op, args, kwargs, meta):
21-
if op != exir_ops.edge.aten.clone.default:
21+
if op != exir_ops.edge.dim_order_ops._clone_dim_order.default:
2222
return super().call_operator(op, args, kwargs, meta)
2323

2424
if len(args) != 1:

0 commit comments

Comments
 (0)