@@ -205,7 +205,6 @@ def get_16a8w_qnn_qat_config(
205205 quant_min = torch .iinfo (torch .uint16 ).min ,
206206 quant_max = torch .iinfo (torch .uint16 ).max ,
207207 qscheme = torch .per_tensor_affine ,
208- reduce_range = True ,
209208 observer = act_observer .with_args (** extra_args ),
210209 )
211210 act_quantization_spec = QuantizationSpec (
@@ -220,7 +219,6 @@ def get_16a8w_qnn_qat_config(
220219 quant_min = torch .iinfo (torch .int8 ).min + 1 ,
221220 quant_max = torch .iinfo (torch .int8 ).max ,
222221 qscheme = torch .per_tensor_symmetric ,
223- reduce_range = True ,
224222 observer = MovingAverageMinMaxObserver ,
225223 )
226224 weight_quantization_spec = QuantizationSpec (
@@ -421,7 +419,6 @@ def get_8a8w_qnn_qat_config(
421419 quant_min = torch .iinfo (torch .int8 ).min + 1 ,
422420 quant_max = torch .iinfo (torch .int8 ).max ,
423421 qscheme = torch .per_tensor_symmetric ,
424- reduce_range = True ,
425422 observer = MovingAverageMinMaxObserver ,
426423 )
427424 weight_quantization_spec = QuantizationSpec (
@@ -438,7 +435,6 @@ def get_8a8w_qnn_qat_config(
438435 quant_min = torch .iinfo (torch .int32 ).min ,
439436 quant_max = torch .iinfo (torch .int32 ).max ,
440437 qscheme = torch .per_tensor_symmetric ,
441- reduce_range = True ,
442438 observer = MovingAverageMinMaxObserver ,
443439 )
444440 bias_quantization_spec = QuantizationSpec (
@@ -467,7 +463,6 @@ def get_16a4w_qnn_qat_config(
467463 quant_min = torch .iinfo (torch .uint16 ).min ,
468464 quant_max = torch .iinfo (torch .uint16 ).max ,
469465 qscheme = torch .per_tensor_affine ,
470- reduce_range = True ,
471466 observer = act_observer ,
472467 )
473468 act_quantization_spec = QuantizationSpec (
@@ -484,7 +479,6 @@ def get_16a4w_qnn_qat_config(
484479 quant_max = 7 ,
485480 qscheme = torch .per_tensor_symmetric ,
486481 ch_axis = 0 ,
487- reduce_range = True ,
488482 observer = MovingAverageMinMaxObserver ,
489483 )
490484 weight_quantization_spec = QuantizationSpec (
@@ -501,7 +495,6 @@ def get_16a4w_qnn_qat_config(
501495 quant_min = torch .iinfo (torch .int32 ).min ,
502496 quant_max = torch .iinfo (torch .int32 ).max ,
503497 qscheme = torch .per_tensor_symmetric ,
504- reduce_range = True ,
505498 observer = MovingAverageMinMaxObserver ,
506499 )
507500 bias_quantization_spec = QuantizationSpec (
@@ -551,7 +544,6 @@ def get_qat_per_channel_quant_config(
551544 act_fake_quant_ctr = FakeQuantize .with_args (
552545 dtype = torch .int32 if act_dtype == torch .uint16 else act_dtype ,
553546 qscheme = torch .per_tensor_symmetric ,
554- reduce_range = True ,
555547 observer = act_observer ,
556548 )
557549 act_quantization_spec = QuantizationSpec (
@@ -566,7 +558,6 @@ def get_qat_per_channel_quant_config(
566558 quant_min = torch .iinfo (act_dtype ).min ,
567559 quant_max = torch .iinfo (act_dtype ).max ,
568560 qscheme = torch .per_tensor_affine ,
569- reduce_range = True ,
570561 observer = act_observer ,
571562 )
572563 act_quantization_spec = QuantizationSpec (
0 commit comments