Skip to content

Commit 07e6a73

Browse files
navsudfacebook-github-bot
authored andcommitted
Remove reduce_range as it is not relevant for HTP
Summary: `reduce_range=True` reduces the available bit width by 1, in cases where quant_min, quant_max are not provided. It was originally intended for intel `fbgemm` kernels but I don't think this quantization setting is relevant for HTP. Also, PTQ quantization config doesn't use it, so removing it in all the QAT configs. This helped improve the QAT model quality. Differential Revision: D82867843
1 parent 291121c commit 07e6a73

File tree

1 file changed

+0
-9
lines changed

1 file changed

+0
-9
lines changed

backends/qualcomm/quantizer/qconfig.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,6 @@ def get_16a8w_qnn_qat_config(
205205
quant_min=torch.iinfo(torch.uint16).min,
206206
quant_max=torch.iinfo(torch.uint16).max,
207207
qscheme=torch.per_tensor_affine,
208-
reduce_range=True,
209208
observer=act_observer.with_args(**extra_args),
210209
)
211210
act_quantization_spec = QuantizationSpec(
@@ -220,7 +219,6 @@ def get_16a8w_qnn_qat_config(
220219
quant_min=torch.iinfo(torch.int8).min + 1,
221220
quant_max=torch.iinfo(torch.int8).max,
222221
qscheme=torch.per_tensor_symmetric,
223-
reduce_range=True,
224222
observer=MovingAverageMinMaxObserver,
225223
)
226224
weight_quantization_spec = QuantizationSpec(
@@ -421,7 +419,6 @@ def get_8a8w_qnn_qat_config(
421419
quant_min=torch.iinfo(torch.int8).min + 1,
422420
quant_max=torch.iinfo(torch.int8).max,
423421
qscheme=torch.per_tensor_symmetric,
424-
reduce_range=True,
425422
observer=MovingAverageMinMaxObserver,
426423
)
427424
weight_quantization_spec = QuantizationSpec(
@@ -438,7 +435,6 @@ def get_8a8w_qnn_qat_config(
438435
quant_min=torch.iinfo(torch.int32).min,
439436
quant_max=torch.iinfo(torch.int32).max,
440437
qscheme=torch.per_tensor_symmetric,
441-
reduce_range=True,
442438
observer=MovingAverageMinMaxObserver,
443439
)
444440
bias_quantization_spec = QuantizationSpec(
@@ -467,7 +463,6 @@ def get_16a4w_qnn_qat_config(
467463
quant_min=torch.iinfo(torch.uint16).min,
468464
quant_max=torch.iinfo(torch.uint16).max,
469465
qscheme=torch.per_tensor_affine,
470-
reduce_range=True,
471466
observer=act_observer,
472467
)
473468
act_quantization_spec = QuantizationSpec(
@@ -484,7 +479,6 @@ def get_16a4w_qnn_qat_config(
484479
quant_max=7,
485480
qscheme=torch.per_tensor_symmetric,
486481
ch_axis=0,
487-
reduce_range=True,
488482
observer=MovingAverageMinMaxObserver,
489483
)
490484
weight_quantization_spec = QuantizationSpec(
@@ -501,7 +495,6 @@ def get_16a4w_qnn_qat_config(
501495
quant_min=torch.iinfo(torch.int32).min,
502496
quant_max=torch.iinfo(torch.int32).max,
503497
qscheme=torch.per_tensor_symmetric,
504-
reduce_range=True,
505498
observer=MovingAverageMinMaxObserver,
506499
)
507500
bias_quantization_spec = QuantizationSpec(
@@ -551,7 +544,6 @@ def get_qat_per_channel_quant_config(
551544
act_fake_quant_ctr = FakeQuantize.with_args(
552545
dtype=torch.int32 if act_dtype == torch.uint16 else act_dtype,
553546
qscheme=torch.per_tensor_symmetric,
554-
reduce_range=True,
555547
observer=act_observer,
556548
)
557549
act_quantization_spec = QuantizationSpec(
@@ -566,7 +558,6 @@ def get_qat_per_channel_quant_config(
566558
quant_min=torch.iinfo(act_dtype).min,
567559
quant_max=torch.iinfo(act_dtype).max,
568560
qscheme=torch.per_tensor_affine,
569-
reduce_range=True,
570561
observer=act_observer,
571562
)
572563
act_quantization_spec = QuantizationSpec(

0 commit comments

Comments
 (0)