diff --git a/backends/apple/coreml/compiler/torch_ops.py b/backends/apple/coreml/compiler/torch_ops.py index ad696f50492..53ac436fe38 100644 --- a/backends/apple/coreml/compiler/torch_ops.py +++ b/backends/apple/coreml/compiler/torch_ops.py @@ -175,7 +175,6 @@ def dequantize_affine(context, node): int_data.astype(quantized_np_dtype), zero_point, scale, - axis=-1, name=node.name, ) context.add(output, node.name) diff --git a/backends/apple/coreml/test/test_torch_ops.py b/backends/apple/coreml/test/test_torch_ops.py index 309db16928a..de54b684ee7 100644 --- a/backends/apple/coreml/test/test_torch_ops.py +++ b/backends/apple/coreml/test/test_torch_ops.py @@ -27,9 +27,9 @@ class TestTorchOps(unittest.TestCase): edge_compile_config = executorch.exir.EdgeCompileConfig() - def _coreml_partitioner(self): + def _coreml_partitioner(self, *, minimum_deployment_target=ct.target.iOS18): compile_specs = CoreMLBackend.generate_compile_specs( - minimum_deployment_target=ct.target.iOS18 + minimum_deployment_target=minimum_deployment_target ) return CoreMLPartitioner(compile_specs=compile_specs) @@ -158,6 +158,33 @@ def test_dequantize_affine_c8w_embedding_b4w_linear(self): et_prog = delegated_program.to_executorch() self._compare_outputs(et_prog, model, example_inputs) + def test_dequantize_affine_c8w_embedding_c8w_linear_ios16(self): + model, example_inputs = self._get_test_model() + quantize_( + model, + IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)), + lambda m, fqn: isinstance(m, torch.nn.Embedding), + ) + quantize_( + model, + IntxWeightOnlyConfig(weight_dtype=torch.int8, granularity=PerAxis(0)), + ) + ep = torch.export.export(model, example_inputs) + delegated_program = executorch.exir.to_edge_transform_and_lower( + ep, + partitioner=[ + self._coreml_partitioner(minimum_deployment_target=ct.target.iOS16) + ], + ) + for node in delegated_program.exported_program().graph.nodes: + if node.op == "call_function": + assert node.target.__name__ in [ + "executorch_call_delegate", + "getitem", + ], f"Got unexpected node target after delegation: {node.target.__name__}" + et_prog = delegated_program.to_executorch() + self._compare_outputs(et_prog, model, example_inputs) + def test_dequantize_codebook_linear_per_grouped_col(self): model, example_inputs = self._get_test_model() quantize_( @@ -298,6 +325,7 @@ def forward(self, x): test_runner.test_dequantize_affine_c4w_embedding() test_runner.test_dequantize_affine_c4w_linear() test_runner.test_dequantize_affine_c8w_embedding_b4w_linear() + test_runner.test_dequantize_affine_c8w_embedding_c8w_linear_ios16() test_runner.test_dequantize_codebook_linear_per_grouped_col() test_runner.test_dequantize_codebook_linear_per_grouped_row() test_runner.test_dequantize_codebook_embedding_per_grouped_col()