Skip to content

Commit 7e76ba2

Browse files
authored
Fix torchao quantization for deployment targets under iOS18 (#13896)
_construct_constexpr_dequant_op should use axis = None (default value), rather than -1. This will infer the axis from the shape of scales. On iOS18, this was not an issue because the axis is not used. For iOS16, the axis is used and the value -1 usually caused an issue during export.
1 parent accc65e commit 7e76ba2

File tree

2 files changed

+30
-3
lines changed

2 files changed

+30
-3
lines changed

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def dequantize_affine(context, node):
175175
int_data.astype(quantized_np_dtype),
176176
zero_point,
177177
scale,
178-
axis=-1,
179178
name=node.name,
180179
)
181180
context.add(output, node.name)

backends/apple/coreml/test/test_torch_ops.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727
class TestTorchOps(unittest.TestCase):
2828
edge_compile_config = executorch.exir.EdgeCompileConfig()
2929

30-
def _coreml_partitioner(self):
30+
def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18):
3131
compile_specs = CoreMLBackend.generate_compile_specs(
32-
minimum_deployment_target=ct.target.iOS18
32+
minimum_deployment_target=minimum_deployment_target
3333
)
3434
return CoreMLPartitioner(compile_specs=compile_specs)
3535

@@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self):
158158
et_prog = delegated_program.to_executorch()
159159
self._compare_outputs(et_prog, model, example_inputs)
160160

161+
def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self):
162+
model, example_inputs = self._get_test_model()
163+
quantize_(
164+
model,
165+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
166+
lambda m, fqn: isinstance(m, torch.nn.Embedding),
167+
)
168+
quantize_(
169+
model,
170+
IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)),
171+
)
172+
ep = torch.export.export(model, example_inputs)
173+
delegated_program = executorch.exir.to_edge_transform_and_lower(
174+
ep,
175+
partitioner=[
176+
self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16)
177+
],
178+
)
179+
for node in delegated_program.exported_program().graph.nodes:
180+
if node.op == "call_function":
181+
assert node.target.__name__ in [
182+
"executorch_call_delegate",
183+
"getitem",
184+
], f"Got unexpected node target after delegation: {node.target.__name__}"
185+
et_prog = delegated_program.to_executorch()
186+
self._compare_outputs(et_prog, model, example_inputs)
187+
161188
def test_dequantize_codebook_linear_per_grouped_col(self):
162189
model, example_inputs = self._get_test_model()
163190
quantize_(
@@ -298,6 +325,7 @@ def forward(self, x):
298325
test_runner.test_dequantize_affine_c4w_embedding()
299326
test_runner.test_dequantize_affine_c4w_linear()
300327
test_runner.test_dequantize_affine_c8w_embedding_b4w_linear()
328+
test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16()
301329
test_runner.test_dequantize_codebook_linear_per_grouped_col()
302330
test_runner.test_dequantize_codebook_linear_per_grouped_row()
303331
test_runner.test_dequantize_codebook_embedding_per_grouped_col()

0 commit comments

Comments
 (0)