@@ -205,7 +205,6 @@ def get_16a8w_qnn_qat_config(
205
205
quant_min = torch .iinfo (torch .uint16 ).min ,
206
206
quant_max = torch .iinfo (torch .uint16 ).max ,
207
207
qscheme = torch .per_tensor_affine ,
208
- reduce_range = True ,
209
208
observer = act_observer .with_args (** extra_args ),
210
209
)
211
210
act_quantization_spec = QuantizationSpec (
@@ -220,7 +219,6 @@ def get_16a8w_qnn_qat_config(
220
219
quant_min = torch .iinfo (torch .int8 ).min + 1 ,
221
220
quant_max = torch .iinfo (torch .int8 ).max ,
222
221
qscheme = torch .per_tensor_symmetric ,
223
- reduce_range = True ,
224
222
observer = MovingAverageMinMaxObserver ,
225
223
)
226
224
weight_quantization_spec = QuantizationSpec (
@@ -421,7 +419,6 @@ def get_8a8w_qnn_qat_config(
421
419
quant_min = torch .iinfo (torch .int8 ).min + 1 ,
422
420
quant_max = torch .iinfo (torch .int8 ).max ,
423
421
qscheme = torch .per_tensor_symmetric ,
424
- reduce_range = True ,
425
422
observer = MovingAverageMinMaxObserver ,
426
423
)
427
424
weight_quantization_spec = QuantizationSpec (
@@ -438,7 +435,6 @@ def get_8a8w_qnn_qat_config(
438
435
quant_min = torch .iinfo (torch .int32 ).min ,
439
436
quant_max = torch .iinfo (torch .int32 ).max ,
440
437
qscheme = torch .per_tensor_symmetric ,
441
- reduce_range = True ,
442
438
observer = MovingAverageMinMaxObserver ,
443
439
)
444
440
bias_quantization_spec = QuantizationSpec (
@@ -467,7 +463,6 @@ def get_16a4w_qnn_qat_config(
467
463
quant_min = torch .iinfo (torch .uint16 ).min ,
468
464
quant_max = torch .iinfo (torch .uint16 ).max ,
469
465
qscheme = torch .per_tensor_affine ,
470
- reduce_range = True ,
471
466
observer = act_observer ,
472
467
)
473
468
act_quantization_spec = QuantizationSpec (
@@ -484,7 +479,6 @@ def get_16a4w_qnn_qat_config(
484
479
quant_max = 7 ,
485
480
qscheme = torch .per_tensor_symmetric ,
486
481
ch_axis = 0 ,
487
- reduce_range = True ,
488
482
observer = MovingAverageMinMaxObserver ,
489
483
)
490
484
weight_quantization_spec = QuantizationSpec (
@@ -501,7 +495,6 @@ def get_16a4w_qnn_qat_config(
501
495
quant_min = torch .iinfo (torch .int32 ).min ,
502
496
quant_max = torch .iinfo (torch .int32 ).max ,
503
497
qscheme = torch .per_tensor_symmetric ,
504
- reduce_range = True ,
505
498
observer = MovingAverageMinMaxObserver ,
506
499
)
507
500
bias_quantization_spec = QuantizationSpec (
@@ -551,7 +544,6 @@ def get_qat_per_channel_quant_config(
551
544
act_fake_quant_ctr = FakeQuantize .with_args (
552
545
dtype = torch .int32 if act_dtype == torch .uint16 else act_dtype ,
553
546
qscheme = torch .per_tensor_symmetric ,
554
- reduce_range = True ,
555
547
observer = act_observer ,
556
548
)
557
549
act_quantization_spec = QuantizationSpec (
@@ -566,7 +558,6 @@ def get_qat_per_channel_quant_config(
566
558
quant_min = torch .iinfo (act_dtype ).min ,
567
559
quant_max = torch .iinfo (act_dtype ).max ,
568
560
qscheme = torch .per_tensor_affine ,
569
- reduce_range = True ,
570
561
observer = act_observer ,
571
562
)
572
563
act_quantization_spec = QuantizationSpec (
0 commit comments