diff --git a/backends/arm/operators/op_conv2d.py b/backends/arm/operators/op_conv2d.py index 469e6613c1f..933e353387b 100644 --- a/backends/arm/operators/op_conv2d.py +++ b/backends/arm/operators/op_conv2d.py @@ -182,11 +182,11 @@ def define_node( acc_type = ts.DType.FP32 tosa_graph.addConst( - [1], output.dtype, [input_zp], name=f"{conv2d_output_name}_input_zp" + [1], inputs[0].dtype, [input_zp], name=f"{conv2d_output_name}_input_zp" ) tosa_graph.addConst( [1], - output.dtype, + inputs[1].dtype, weight_zp, name=f"{conv2d_output_name}_weight_zp", ) @@ -269,7 +269,7 @@ def define_node( # For quantized convolution, rescale the output value back to the same # integer value domain of the next op. Otherwise return float32 output. - if inputs[0].dtype == ts.DType.INT8 or inputs[0].dtype == ts.DType.INT16: + if output.dtype == ts.DType.INT8 or output.dtype == ts.DType.INT16: # Get scale_factor from input, weight, and output. input_scale = input_qparams[0].get_scale_per_tensor() # type: ignore[possibly-undefined] # pyre-ignore [61] per_channel_quant = input_qparams[1].per_channel # pyre-ignore [61] diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py index bd719954ff5..4029fcef54e 100644 --- a/backends/arm/test/ops/test_linear.py +++ b/backends/arm/test/ops/test_linear.py @@ -8,8 +8,6 @@ from typing import Tuple -import pytest - import torch from executorch.backends.arm.quantizer.arm_quantizer import ( get_symmetric_a16w8_quantization_config, @@ -313,12 +311,8 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor): pipeline.run() -@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT) +@common.parametrize("test_data", test_data_all_16a8w) @common.XfailIfNoCorstone300 -@pytest.mark.xfail( - reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947", - strict=False, -) def test_linear_16a8w_u55_INT16(test_data: torch.Tensor): """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)""" test_data, out_features, has_bias, per_channel_quantization = test_data() @@ -347,12 +341,8 @@ def test_linear_16a8w_u55_INT16(test_data: torch.Tensor): pipeline.run() -@common.parametrize("test_data", test_data_rank1_INT | test_data_rank4_INT) +@common.parametrize("test_data", test_data_all_16a8w) @common.XfailIfNoCorstone320 -@pytest.mark.xfail( - reason="Ethos-U55 A16W8 linear: int16 matmul not yet supported; pending backend support or linear->conv1x1 lowering. See: https://github.com/pytorch/executorch/issues/13947", - strict=False, -) def test_linear_16a8w_u85_INT16(test_data: torch.Tensor): """Test linear operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)""" test_data, out_features, has_bias, per_channel_quantization = test_data()