diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 1839b0e1a47..507ceb55465 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -208,7 +208,7 @@ def get_symmetric_a16w8_quantization_config( # 16-bit activation quantization spec act_quantization_spec = QuantizationSpec( dtype=torch.int16, - quant_min=torch.iinfo(torch.int16).min, # -32768 + quant_min=torch.iinfo(torch.int16).min + 1, # -32767 quant_max=torch.iinfo(torch.int16).max, # 32767 qscheme=torch.per_tensor_symmetric, is_dynamic=is_dynamic, diff --git a/backends/arm/quantizer/quantization_config.py b/backends/arm/quantizer/quantization_config.py index 36ab233bdb6..3e2939cff61 100644 --- a/backends/arm/quantizer/quantization_config.py +++ b/backends/arm/quantizer/quantization_config.py @@ -206,8 +206,8 @@ def _derive_qparams_fn( derived_from=[(input_act, node), (weight, node)], # type: ignore[list-item] derive_qparams_fn=_derive_qparams_fn, dtype=torch.int32, - quant_min=torch.iinfo(torch.int32).min, - quant_max=torch.iinfo(torch.int32).max - 1, + quant_min=torch.iinfo(torch.int32).min + 1, + quant_max=torch.iinfo(torch.int32).max, qscheme=qscheme, ch_axis=ch_axis, )