Skip to content

Commit a0e82c2

Browse files
authored
[Cherry-pick]Implemented AddQuantDequantPass in imperative quantization. (#26692) (#30525)
* Implemented AddQuantDequantPass in imperative quantization. * support 2.0 API such as Pool2D and ReLU
1 parent 3688d9e commit a0e82c2

File tree

5 files changed

+549
-8
lines changed

5 files changed

+549
-8
lines changed

python/paddle/fluid/contrib/slim/quantization/imperative/qat.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def __init__(self,
8686
'moving_average_abs_max', the static quantization scale will be calculated
8787
during training and used in inference.
8888
moving_rate(float): the parameter for 'moving_average_abs_max' quantization.
89-
quantizable_op_type(list[str]): List the type of layers that will be quantized.
89+
quantizable_layer_type(list[str]): List the type of layers that will be quantized.
9090
Default is ['Conv2D', 'Linear']. The quantizable_op_type in
9191
QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
9292
weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
@@ -229,7 +229,17 @@ def forward(self, inputs):
229229
"'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now."
230230
% (str(weight_quantize_type)))
231231

232-
self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear}
232+
self._quant_layers_map = {
233+
'Conv2D': Conv2D,
234+
'Linear': Linear,
235+
'Pool2D': Pool2D,
236+
'ReLU': ReLU,
237+
'LeakyReLU': LeakyReLU,
238+
'ReLU6': ReLU6,
239+
'Softmax': Softmax,
240+
'Tanh': Tanh,
241+
'Swish': Swish
242+
}
233243
self._quantizable_layer_type = tuple(
234244
self._quant_layers_map[layer]
235245
if layer in self._quant_layers_map else layer
@@ -291,7 +301,12 @@ def _get_quantized_counterpart(self, layer):
291301
layer.full_name()))
292302
sys.exit(-1)
293303

294-
quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
304+
layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
305+
if quantized_counterpart[index] not in layer_with_weight:
306+
quant_layer_class_name = 'QuantizedNoweightLayer'
307+
else:
308+
quant_layer_class_name = quantized_counterpart[index]
309+
quantized_layer = quant_nn.__dict__[quant_layer_class_name](
295310
layer, self._weight_bits, self._activation_bits, self._moving_rate,
296311
self._weight_quantize_type, self._activation_quantize_type,
297312
self._weight_pre_layer, self._act_pre_layer,

python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
from paddle.nn import functional as F
2525

2626
__all__ = [
27-
'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
28-
'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax',
29-
'MovingAverageAbsMaxScale'
27+
'FakeQuantMovingAverage', 'FakeQuantAbsMax',
28+
'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
29+
'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
3030
]
3131

3232

@@ -478,6 +478,30 @@ def forward(self, input):
478478
return out
479479

480480

481+
class QuantizedNoweightLayer(layers.Layer):
482+
def __init__(self,
483+
layer,
484+
weight_bits=8,
485+
activation_bits=8,
486+
moving_rate=0.9,
487+
*args,
488+
**kwargs):
489+
490+
super(QuantizedNoweightLayer, self).__init__()
491+
self._layer = layer
492+
self._fake_quant_input = _get_fake_quant_type(
493+
'moving_average_abs_max',
494+
name=layer.full_name(),
495+
moving_rate=moving_rate,
496+
quant_bits=activation_bits,
497+
dtype=self._dtype,
498+
quant_on_weight=False)
499+
500+
def forward(self, input):
501+
quant_input = self._fake_quant_input(input)
502+
return self._layer.forward(quant_input)
503+
504+
481505
class MovingAverageAbsMaxScale(layers.Layer):
482506
def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
483507
r"""

python/paddle/fluid/contrib/slim/tests/CMakeLists.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,30 @@ list(REMOVE_ITEM TEST_OPS
270270
LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
271271
LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
272272

273+
# only tests on singal GPU environment
274+
LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
275+
276+
py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
277+
CUDA_VISIBLE_DEVICES=0)
278+
279+
# fix
280+
if(WIN32)
281+
SET(SINGLE_CARD_TEST_OPS
282+
test_user_defined_quantization
283+
test_quantization_scale_pass
284+
test_quantization_pass
285+
test_moving_average_abs_max_scale_op
286+
test_imperative_qat_channelwise
287+
test_imperative_qat
288+
test_imperative_out_scale
289+
test_graph)
290+
LIST(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS})
291+
foreach(src ${SINGLE_CARD_TEST_OPS})
292+
py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0)
293+
endforeach()
294+
endif()
295+
296+
273297
foreach(src ${TEST_OPS})
274298
py_test(${src} SRCS ${src}.py)
275299
endforeach()
@@ -288,6 +312,7 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
288312
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
289313
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
290314
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
315+
set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
291316
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
292317
if(LINUX AND WITH_MKLDNN)
293318
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)

0 commit comments

Comments
 (0)