From 9953890714067ab66e9175fb6109029c716c1ca5 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Mon, 13 Oct 2025 16:40:53 +0800 Subject: [PATCH] Qualcomm AI Engine Direct - Suite operator fix part 3 --- backends/qualcomm/builders/node_visitor.py | 18 +- backends/qualcomm/builders/op_conv.py | 52 ++++- backends/qualcomm/builders/op_elu.py | 5 +- backends/qualcomm/quantizer/qconfig.py | 28 ++- backends/qualcomm/quantizer/quantizer.py | 85 +++++-- backends/qualcomm/tests/models.py | 23 +- backends/qualcomm/tests/test_qnn_delegate.py | 226 +++++++++++++++++-- backends/test/suite/operators/test_div.py | 6 + backends/test/suite/operators/test_log.py | 28 ++- backends/test/suite/operators/test_rsqrt.py | 7 +- backends/test/suite/operators/test_sqrt.py | 7 +- 11 files changed, 409 insertions(+), 76 deletions(-) diff --git a/backends/qualcomm/builders/node_visitor.py b/backends/qualcomm/builders/node_visitor.py index 8cbf3a50e22..f3dadb99129 100644 --- a/backends/qualcomm/builders/node_visitor.py +++ b/backends/qualcomm/builders/node_visitor.py @@ -153,6 +153,13 @@ def make_qnn_per_block_config(self, node: torch.fx.Node, quant_attrs: Dict): scales, scale_offset, quantized_scales = quant_attrs[QCOM_SCALE], [], [] # channel in observers defaults to zero num_channels = node.meta["val"].shape[0] + user_0 = self.get_first_user(node) + + ch_axis = 0 + # args[6] to check if it is transpose conv + if user_0.target == exir_ops.edge.aten.convolution.default and user_0.args[6]: + num_channels = node.meta["val"].shape[1] + ch_axis = 1 # TODO: expand this when QNN starts to support more configurations bitwidth_of_scale = 4 quant_scales_dtype = torch.uint8 @@ -162,9 +169,10 @@ def make_qnn_per_block_config(self, node: torch.fx.Node, quant_attrs: Dict): ) for ch in range(num_channels): - max_scale = scales[ch].reshape(1, -1).amax(dim=-1) / num_steps + candidates = scales[ch] if ch_axis == 0 else scales[:, ch, ...] + max_scale = candidates.reshape(1, -1).amax(dim=-1) / num_steps q_scales = torch.clamp( - input=torch.round(input=scales[ch] / max_scale), + input=torch.round(input=candidates / max_scale), min=1, max=2**bitwidth_of_scale, ).to(quant_scales_dtype) @@ -174,11 +182,11 @@ def make_qnn_per_block_config(self, node: torch.fx.Node, quant_attrs: Dict): # skip dequantize op, e.g. frozen_param -> dq -> conv2d user_0 = self.get_first_user(node) - if "convolution" in user_0.target.__name__: + if user_0.target == exir_ops.edge.aten.convolution.default: # OIHW (pytorch) -> HWIO (QNN) quant_config[QCOM_AXIS] = node.meta["val"].dim() - 1 quant_config[QCOM_AXIS_ORDER] = (2, 3, 1, 0) - elif "linear" in user_0.target.__name__: + elif user_0.target == exir_ops.edge.aten.linear.default: # OI (pytorch) -> OI (QNN) quant_config[QCOM_AXIS] = 0 quant_config[QCOM_AXIS_ORDER] = (0, 1) @@ -217,7 +225,7 @@ def make_qnn_per_channel_config(self, node: torch.fx.Node, quant_attrs: Dict): # skip dequantize op, e.g. frozen_param -> dq -> conv2d user_0 = self.get_first_user(node) # Memory layout of QNN conv weight always ends in Output. Like conv2d is HWIO - if "convolution" in user_0.target.__name__: + if user_0.target == exir_ops.edge.aten.convolution.default: quant_config[QCOM_AXIS] = node.meta["val"].dim() - 1 else: quant_config[QCOM_AXIS] = quant_attrs[QCOM_AXIS] diff --git a/backends/qualcomm/builders/op_conv.py b/backends/qualcomm/builders/op_conv.py index 2bc0b41524d..317a3269ede 100644 --- a/backends/qualcomm/builders/op_conv.py +++ b/backends/qualcomm/builders/op_conv.py @@ -9,9 +9,9 @@ import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper import numpy as np import torch -from executorch.backends.qualcomm.utils.constants import QCOM_DATA +from executorch.backends.qualcomm.utils.constants import QCOM_DATA, QCOM_QUANT_ATTRS -from .node_visitor import NodeVisitor +from .node_visitor import NodeVisitor, PER_CHANNEL_ENCODING from .node_visitor_manager import register_node_visitor from .qnn_constants import ( OpConv2d, @@ -101,6 +101,29 @@ def _add_conv_op_parameter( return conv_op + def _reduce_bias_scales( + self, + node: torch.fx.Node, + filter_node: torch.fx.Node, + bias_node: torch.fx.Node, + groups: int, + ): + """_summary_ + If transpose_conv has groups, need special handle for bias_node's per channel quant. + Check _derived_bias_quant_spec under backends/qualcomm/quantizer/qconfig.py for more info. + """ + + filter_scales = filter_node.meta[QCOM_QUANT_ATTRS]["scales"] + bias_scales = bias_node.meta[QCOM_QUANT_ATTRS]["scales"] + bias_zero_points = bias_node.meta[QCOM_QUANT_ATTRS]["zero_points"] + + # Adding this condition to prevent reduce twice: op_validation and qnn_preprocess + if filter_scales.numel() != bias_scales.numel(): + bias_scales = bias_scales.view(-1, groups)[:, 0] + bias_zero_points = bias_zero_points.view(-1, groups)[:, 0] + bias_node.meta[QCOM_QUANT_ATTRS]["scales"] = bias_scales + bias_node.meta[QCOM_QUANT_ATTRS]["zero_points"] = bias_zero_points + def define_node( self, node: torch.fx.Node, @@ -127,8 +150,15 @@ def define_node( filter_node = self.get_node(node.args[1]) filter_tensor = get_parameter(filter_node, self.edge_program) + + stride = cast(List[int], node.args[3]) + padding = cast(List[int], node.args[4]) + dilation = cast(List[int], node.args[5]) + output_padding = cast(List[int], node.args[7]) + groups = cast(int, node.args[8]) + # weight of pytorch OIHW(conv2d) / OIDHW(conv3d) or IOHW(conv_transpose2d) / IODHW(conv_transpose3d), - # yet QNN is HWIO or DHWIO + # yet QNN is HWIO or DHWIO for both conv and conv_transpose. is_transpose_conv = cast(bool, node.args[6]) if is_conv2d: filter_axis_order = (2, 3, 0, 1) if is_transpose_conv else (2, 3, 1, 0) @@ -147,6 +177,16 @@ def define_node( conv_input_tensors = [input_tensor_wrapper, filter_tensor_wrapper] if node.args[2] is not None: bias_node = self.get_node(node.args[2]) + # TODO: Double check on condition below once QNN supports transpose_conv with block_quant. + # By checking node.args[1].target, only allow per_channel_quant to go through and bypass block_quant. + if ( + is_transpose_conv + and groups != 1 + and bias_node.meta.get(QCOM_QUANT_ATTRS) is not None + and node.args[1].target in PER_CHANNEL_ENCODING + ): + self._reduce_bias_scales(node, filter_node, bias_node, groups) + bias_tensor = get_parameter(bias_node, self.edge_program) bias_tensor_wrapper = self.define_tensor( bias_node, @@ -156,7 +196,6 @@ def define_node( nodes_to_wrappers, ) conv_input_tensors.append(bias_tensor_wrapper) - output_tensor = self.get_tensor(node, node) output_tensor_wrapper = self.define_tensor( node, @@ -167,11 +206,6 @@ def define_node( ) conv_output_tensors = [output_tensor_wrapper] - stride = cast(List[int], node.args[3]) - padding = cast(List[int], node.args[4]) - dilation = cast(List[int], node.args[5]) - output_padding = cast(List[int], node.args[7]) - groups = cast(int, node.args[8]) # Qnn filter tensor is (H, W, Cin, Cout) or (D, H, W, Cin, Cout) group_input_channels = filter_tensor.shape[-2] group_output_channels = int(filter_tensor.shape[-1] / groups) diff --git a/backends/qualcomm/builders/op_elu.py b/backends/qualcomm/builders/op_elu.py index 65e8d93f414..215fe654948 100644 --- a/backends/qualcomm/builders/op_elu.py +++ b/backends/qualcomm/builders/op_elu.py @@ -58,12 +58,11 @@ def define_node( ) elu_op.AddInputTensors(elu_input_tensors) elu_op.AddOutputTensors(elu_output_tensors) - - if len(node.args) == 2: + if len(node.args) > 1: elu_op.AddScalarParam( OpElu.param_alpha, PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_FLOAT_32, - {QCOM_DATA: np.uint32(node.args[1])}, + {QCOM_DATA: np.float32(node.args[1])}, ) return elu_op diff --git a/backends/qualcomm/quantizer/qconfig.py b/backends/qualcomm/quantizer/qconfig.py index 3c8191dc57d..e22d5b30fa7 100644 --- a/backends/qualcomm/quantizer/qconfig.py +++ b/backends/qualcomm/quantizer/qconfig.py @@ -52,6 +52,22 @@ def _derive_bias_qparams_fn( act_scale, weight_scale ) derived_scale = (broadcast_act_scale * broadcast_weight_scale).to(torch.float32) + # TransposeConv per channel axis=1, and the weight_shape[1] = out_channel / groups. + # E.g., out_channel = 6, groups = 2, weight_shape[1] = 3, which means there are 3 pairs of scale/offset. + # However, bias still has 6 values, meaning it requires repeat_interleave 2 times derived_scale in order to + # generate 6 pairs of scale/offset to perform per channel quantization. For bias node, Conv OP builder will later + # only pass 3 pairs of scale/offset to QNN. + if ( + node.target + in { + torch.ops.aten.conv_transpose2d.input, + torch.ops.aten.conv_transpose3d.input, + } + and len(node.args) > 6 + and node.args[6] != 1 + ): + groups = node.args[6] + derived_scale = derived_scale.repeat_interleave(groups) derived_zero = torch.zeros(derived_scale.size(), device=weight_zp.device).to( torch.int32 ) @@ -68,7 +84,6 @@ def _derive_bias_qparams_fn( assert isinstance(input_act, Node) weight = node.args[1] assert isinstance(weight, Node) - return DerivedQuantizationSpec( derived_from=[(input_act, node), (weight, node)], derive_qparams_fn=_derive_bias_qparams_fn, @@ -300,6 +315,7 @@ def get_ptq_per_channel_quant_config( weight_dtype=torch.int8, act_observer=MovingAverageMinMaxObserver, act_symmetric: bool = False, + ch_axis: int = 0, ) -> QuantizationConfig: extra_args: Dict[str, Any] = {"eps": 2**-12} @@ -349,7 +365,7 @@ def get_ptq_per_channel_quant_config( ), quant_max=7 if weight_dtype == torch.int4 else torch.iinfo(weight_dtype).max, qscheme=torch.per_channel_symmetric, - ch_axis=0, + ch_axis=ch_axis, observer_or_fake_quant_ctr=PerChannelMinMaxObserver.with_args(**extra_args), ) @@ -370,6 +386,7 @@ def get_ptq_per_block_quant_config( weight_dtype=torch.int8, act_observer=MovingAverageMinMaxObserver, act_symmetric: bool = False, + ch_axis: int = 0, ) -> QuantizationConfig: extra_args: Dict[str, Any] = {"eps": 2**-12} quantization_config = get_ptq_per_channel_quant_config( @@ -385,7 +402,7 @@ def get_ptq_per_block_quant_config( ), quant_max=7 if weight_dtype == torch.int4 else torch.iinfo(weight_dtype).max, qscheme=torch.per_channel_symmetric, - ch_axis=0, + ch_axis=ch_axis, observer_or_fake_quant_ctr=PerBlockParamObserver.with_args(**extra_args), ) return QuantizationConfig( @@ -522,6 +539,7 @@ def get_qat_per_channel_quant_config( weight_dtype=torch.int8, act_observer=MovingAverageMinMaxObserver, act_symmetric=False, + ch_axis: int = 0, ) -> QuantizationConfig: supported_act_types = { torch.uint8, @@ -577,7 +595,7 @@ def get_qat_per_channel_quant_config( ), quant_max=7 if weight_dtype == torch.int4 else torch.iinfo(weight_dtype).max, qscheme=torch.per_channel_symmetric, - ch_axis=0, + ch_axis=ch_axis, observer=MovingAveragePerChannelMinMaxObserver, ) weight_quantization_spec = QuantizationSpec( @@ -587,7 +605,7 @@ def get_qat_per_channel_quant_config( ), quant_max=7 if weight_dtype == torch.int4 else torch.iinfo(weight_dtype).max, qscheme=torch.per_channel_symmetric, - ch_axis=0, + ch_axis=ch_axis, observer_or_fake_quant_ctr=weight_fake_quant_ctr, ) diff --git a/backends/qualcomm/quantizer/quantizer.py b/backends/qualcomm/quantizer/quantizer.py index 44d129d5544..4d0f1098a62 100644 --- a/backends/qualcomm/quantizer/quantizer.py +++ b/backends/qualcomm/quantizer/quantizer.py @@ -150,33 +150,62 @@ def __post_init__(self): if self.act_observer else quant_config_func() ) - self.per_channel_quant_config = ( - per_channel_quant_config_func(act_observer=self.act_observer) - if self.act_observer - else per_channel_quant_config_func() - ) - self.use_per_channel_weight_quant_ops = set() + + # Assume per_channel_quant/per_block_quant only happen on axis_0 or axis_1, increase the range if there's a need + potential_axis = 2 + + self.per_channel_quant_config_list = [] + for i in range(potential_axis): + self.per_channel_quant_config_list.append( + ( + per_channel_quant_config_func( + act_observer=self.act_observer, ch_axis=i + ) + if self.act_observer + else per_channel_quant_config_func(ch_axis=i) + ) + ) + + # Key is the node target, and value is the axis to perform per channel quantization + self.op_axis_dict = { + torch.ops.aten.conv1d.default: 0, + torch.ops.aten.conv2d.default: 0, + torch.ops.aten.conv3d.default: 0, + torch.ops.aten.conv_transpose2d.input: 1, + torch.ops.aten.conv_transpose3d.input: 1, + torch.ops.aten.linear.default: 0, + } + + self.use_per_channel_weight_quant_ops = {} if self.is_conv_per_channel: + conv_ops = [ + torch.ops.aten.conv1d.default, + torch.ops.aten.conv2d.default, + torch.ops.aten.conv3d.default, + torch.ops.aten.conv_transpose2d.input, + torch.ops.aten.conv_transpose3d.input, + ] self.use_per_channel_weight_quant_ops.update( - { - torch.ops.aten.conv1d.default, - torch.ops.aten.conv2d.default, - torch.ops.aten.conv3d.default, - torch.ops.aten.conv_transpose2d.input, - } + {k: self.op_axis_dict[k] for k in conv_ops if k in self.op_axis_dict} ) if self.is_linear_per_channel: + linear_ops = [torch.ops.aten.linear.default] self.use_per_channel_weight_quant_ops.update( - { - torch.ops.aten.linear.default, - } + {k: self.op_axis_dict[k] for k in linear_ops if k in self.op_axis_dict} ) + if per_block_quant_config_func: - self.per_block_quant_config = ( - per_block_quant_config_func(act_observer=self.act_observer) - if self.act_observer - else per_block_quant_config_func() - ) + self.per_block_quant_config_list = [] + for i in range(potential_axis): + self.per_block_quant_config_list.append( + ( + per_block_quant_config_func( + act_observer=self.act_observer, ch_axis=i + ) + if self.act_observer + else per_block_quant_config_func(ch_axis=i) + ) + ) class QnnQuantizer(Quantizer): @@ -269,16 +298,22 @@ def _get_quant_config(self, node: torch.fx.Node) -> Optional[QuantizationConfig] op = node.target if isinstance(op, str): return - + config = self._get_submodule_qconfig(node) if block_size := self.block_size_map.get(node.name): - config = self.default_quant_config.per_block_quant_config + ch_axis = config.op_axis_dict.get(node.target, 0) + assert ( + len(config.per_block_quant_config_list) > ch_axis + ), f"Unsupported per block quantization axis: {ch_axis}, please increase the range of per_block_quant_config_list" + config = config.per_block_quant_config_list[ch_axis] config.block_size = block_size return config - config = self._get_submodule_qconfig(node) - if op in config.use_per_channel_weight_quant_ops: - return config.per_channel_quant_config + ch_axis = config.use_per_channel_weight_quant_ops[op] + assert ( + len(config.per_channel_quant_config_list) > ch_axis + ), f"Unsupported per channel quantization axis: {ch_axis}, please increase the range of per_channel_quant_config_list" + return config.per_channel_quant_config_list[ch_axis] if op in self.quant_ops: return config.quant_config diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py index 5ea6caf54ad..58647441210 100644 --- a/backends/qualcomm/tests/models.py +++ b/backends/qualcomm/tests/models.py @@ -746,15 +746,26 @@ def forward(self, x): class ConvTranspose2dSingle(torch.nn.Module): - def __init__(self, bias=True, dilation=1): + def __init__( + self, + bias=True, + in_channels=1, + out_channels=3, + kernel_size=1, + stride=1, + padding=1, + dilation=1, + groups=1, + ): super().__init__() self.conv_transpose = torch.nn.ConvTranspose2d( - in_channels=1, - out_channels=3, - kernel_size=3, - stride=2, - padding=1, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, dilation=dilation, + groups=groups, bias=bias, ) diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 2641acc5a2d..b1882a7deca 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -352,17 +352,98 @@ def test_qnn_backend_conv_transpose1d(self): self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_conv_transpose2d(self): - modules = [ - ConvTranspose2dSingle(), # noqa: F405 - ConvTranspose2dSingle(bias=False), # noqa: F405 - ConvTranspose2dSingle(dilation=2), # noqa: F405 - ConvTranspose2dSingle(dilation=(2, 3)), # noqa: F405 - ConvTranspose2dSingle(dilation=(2, 1)), # noqa: F405 + test_comb = [ + { + QCOM_MODULE: [ConvTranspose2dSingle()], # noqa: F405 + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 1, 16, 16),), + ], + }, + { + QCOM_MODULE: [ConvTranspose2dSingle(bias=False)], # noqa: F405 + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 1, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=2, + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 3), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 1), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 1), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=6, + out_channels=6, + kernel_size=3, + padding=0, + groups=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(4, 6, 16, 16),), + ], + }, ] - sample_input = (torch.randn([1, 1, 33, 33]),) - for i, module in enumerate(modules): - with self.subTest(i=i): - self.lower_module_and_test_output(module, sample_input) + + index = 0 + for comb in test_comb: + for module in comb[QCOM_MODULE]: + for sample_input in comb[QCOM_SAMPLE_INPUTS]: + with self.subTest(i=index): + index += 1 + self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_conv_transpose3d(self): modules = [ @@ -611,7 +692,6 @@ def test_qnn_backend_element_wise_sub(self): index += 1 self.lower_module_and_test_output(module, sample_input) - @unittest.expectedFailure def test_qnn_backend_elu(self): module = Elu() # noqa: F405 sample_input = (torch.randn(2, 5, 1, 3),) @@ -2248,16 +2328,128 @@ def test_qnn_backend_conv_transpose1d(self): self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_conv_transpose2d(self): + test_comb = [ + { + QCOM_MODULE: [ConvTranspose2dSingle()], # noqa: F405 + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 1, 16, 16),), + ], + }, + { + QCOM_MODULE: [ConvTranspose2dSingle(bias=False)], # noqa: F405 + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 1, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=2, + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 3), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 1), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=2, + out_channels=3, + dilation=(2, 1), + kernel_size=3, + stride=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(1, 2, 16, 16),), + ], + }, + { + QCOM_MODULE: [ + ConvTranspose2dSingle( # noqa: F405 + in_channels=6, + out_channels=6, + kernel_size=3, + padding=0, + groups=2, + ) + ], + QCOM_SAMPLE_INPUTS: [ + (torch.randn(4, 6, 16, 16),), + ], + }, + ] + + index = 0 + for comb in test_comb: + for module in comb[QCOM_MODULE]: + for sample_input in comb[QCOM_SAMPLE_INPUTS]: + with self.subTest(i=index): + index += 1 + gm = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(gm, sample_input) + + @unittest.skip("As of QNN 2.37, transpose conv block quant is not supported") + def test_qnn_backend_conv_transpose2d_block(self): + i_ch, o_ch, kernel, padding = 128, 32, (1, 1), 0 modules = [ - ConvTranspose2dSingle(), # noqa: F405 - ConvTranspose2dSingle(bias=False), # noqa: F405 - ConvTranspose2dSingle(dilation=(2, 3)), # noqa: F405 - ConvTranspose2dSingle(dilation=(2, 1)), # noqa: F405 + ConvTranspose2dSingle( # noqa: F405 + bias=False, + in_channels=i_ch, + out_channels=o_ch, + kernel_size=kernel, + padding=padding, + ), + ConvTranspose2dSingle( # noqa: F405 + in_channels=i_ch, + out_channels=o_ch, + kernel_size=kernel, + padding=padding, + ), ] - sample_input = (torch.randn([1, 1, 3, 3]),) + + sample_input = (torch.randn(1, 128, 16, 16),) for i, module in enumerate(modules): with self.subTest(i=i): - module = self.get_qdq_module(module, sample_input) + module = self.get_qdq_module( + module, + sample_input, + quant_dtype=QuantDtype.use_16a4w_block, + block_size_map={"conv_transpose2d": (16, 1, 1, 1)}, + ) self.lower_module_and_test_output(module, sample_input) def test_qnn_backend_conv_transpose3d(self): diff --git a/backends/test/suite/operators/test_div.py b/backends/test/suite/operators/test_div.py index 656d350585d..d493c97a20d 100644 --- a/backends/test/suite/operators/test_div.py +++ b/backends/test/suite/operators/test_div.py @@ -46,6 +46,7 @@ def test_divide_dtype(self, flow: TestFlow, dtype) -> None: ), # Adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) def test_divide_f32_bcast_first(self, flow: TestFlow) -> None: @@ -57,6 +58,7 @@ def test_divide_f32_bcast_first(self, flow: TestFlow) -> None: + 0.1, # Using abs and adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) def test_divide_f32_bcast_second(self, flow: TestFlow) -> None: @@ -68,6 +70,7 @@ def test_divide_f32_bcast_second(self, flow: TestFlow) -> None: + 0.1, # Using abs and adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) def test_divide_f32_bcast_unary(self, flow: TestFlow) -> None: @@ -79,6 +82,7 @@ def test_divide_f32_bcast_unary(self, flow: TestFlow) -> None: + 0.1, # Using abs and adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) def test_divide_f32_trunc(self, flow: TestFlow) -> None: @@ -90,6 +94,7 @@ def test_divide_f32_trunc(self, flow: TestFlow) -> None: + 0.1, # Using abs and adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) def test_divide_f32_floor(self, flow: TestFlow) -> None: @@ -101,4 +106,5 @@ def test_divide_f32_floor(self, flow: TestFlow) -> None: + 0.1, # Using abs and adding 0.1 to avoid division by zero ), flow, + generate_random_test_inputs=False, ) diff --git a/backends/test/suite/operators/test_log.py b/backends/test/suite/operators/test_log.py index c4af1fe442b..320f4fe463b 100644 --- a/backends/test/suite/operators/test_log.py +++ b/backends/test/suite/operators/test_log.py @@ -34,19 +34,39 @@ def test_log_dtype(self, flow: TestFlow, dtype) -> None: # Test with different dtypes model = LogModel().to(dtype) # Use positive values only for log - self._test_op(model, (torch.rand(10, 10).to(dtype) + 0.01,), flow) + self._test_op( + model, + (torch.rand(10, 10).to(dtype) + 0.01,), + flow, + generate_random_test_inputs=False, + ) def test_log_shapes(self, flow: TestFlow) -> None: # Test with different tensor shapes # 1D tensor - self._test_op(LogModel(), (torch.rand(20) + 0.01,), flow) + self._test_op( + LogModel(), + (torch.rand(20) + 0.01,), + flow, + generate_random_test_inputs=False, + ) # 2D tensor - self._test_op(LogModel(), (torch.rand(5, 10) + 0.01,), flow) + self._test_op( + LogModel(), + (torch.rand(5, 10) + 0.01,), + flow, + generate_random_test_inputs=False, + ) # 3D tensor - self._test_op(LogModel(), (torch.rand(3, 4, 5) + 0.01,), flow) + self._test_op( + LogModel(), + (torch.rand(3, 4, 5) + 0.01,), + flow, + generate_random_test_inputs=False, + ) @unittest.skip("NaN and Inf are not enforced for backends.") def test_log_edge_cases(self, flow: TestFlow) -> None: diff --git a/backends/test/suite/operators/test_rsqrt.py b/backends/test/suite/operators/test_rsqrt.py index bb51b213dd4..0b7c9739cf7 100644 --- a/backends/test/suite/operators/test_rsqrt.py +++ b/backends/test/suite/operators/test_rsqrt.py @@ -33,7 +33,12 @@ def test_rsqrt_dtype(self, flow: TestFlow, dtype) -> None: # Test with different dtypes model = RsqrtModel().to(dtype) # Use positive values only for rsqrt to avoid division by zero - self._test_op(model, (torch.rand(10, 10).to(dtype) + 0.01,), flow) + self._test_op( + model, + (torch.rand(10, 10).to(dtype) + 0.01,), + flow, + generate_random_test_inputs=False, + ) def test_rsqrt_shapes(self, flow: TestFlow) -> None: # Test with different tensor shapes diff --git a/backends/test/suite/operators/test_sqrt.py b/backends/test/suite/operators/test_sqrt.py index 92fbc64878e..4a3f931204d 100644 --- a/backends/test/suite/operators/test_sqrt.py +++ b/backends/test/suite/operators/test_sqrt.py @@ -33,7 +33,12 @@ def test_sqrt_dtype(self, flow: TestFlow, dtype) -> None: # Test with different dtypes model = SqrtModel().to(dtype) # Use non-negative values only for sqrt - self._test_op(model, (torch.rand(10, 10).to(dtype),), flow) + self._test_op( + model, + (torch.rand(10, 10).to(dtype),), + flow, + generate_random_test_inputs=False, + ) def test_sqrt_shapes(self, flow: TestFlow) -> None: # Test with different tensor shapes