PaddlePaddle
diff --git a/‎python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Lines changed: 18 additions & 3 deletions b/‎python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Lines changed: 18 additions & 3 deletions
diff --git a/‎python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
Lines changed: 27 additions & 3 deletions b/‎python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
Lines changed: 27 additions & 3 deletions
diff --git a/‎python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
Lines changed: 25 additions & 0 deletions b/‎python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
Lines changed: 25 additions & 0 deletions
@@ -86,7 +86,7 @@ def __init__(self,
                 'moving_average_abs_max', the static quantization scale will be calculated
                 during training and used in inference.
             moving_rate(float): the parameter for 'moving_average_abs_max' quantization.
-            quantizable_op_type(list[str]): List the type of layers that will be quantized. 
+            quantizable_layer_type(list[str]): List the type of layers that will be quantized. 
                 Default is ['Conv2D', 'Linear']. The quantizable_op_type in
                 QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
             weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
@@ -229,7 +229,17 @@ def forward(self, inputs):
                 "'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now."
                 % (str(weight_quantize_type)))
 
-        self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear}
+        self._quant_layers_map = {
+            'Conv2D': Conv2D,
+            'Linear': Linear,
+            'Pool2D': Pool2D,
+            'ReLU': ReLU,
+            'LeakyReLU': LeakyReLU,
+            'ReLU6': ReLU6,
+            'Softmax': Softmax,
+            'Tanh': Tanh,
+            'Swish': Swish
+        }
         self._quantizable_layer_type = tuple(
             self._quant_layers_map[layer]
             if layer in self._quant_layers_map else layer
@@ -291,7 +301,12 @@ def _get_quantized_counterpart(self, layer):
                 layer.full_name()))
             sys.exit(-1)
 
-        quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
+        layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
+        if quantized_counterpart[index] not in layer_with_weight:
+            quant_layer_class_name = 'QuantizedNoweightLayer'
+        else:
+            quant_layer_class_name = quantized_counterpart[index]
+        quantized_layer = quant_nn.__dict__[quant_layer_class_name](
             layer, self._weight_bits, self._activation_bits, self._moving_rate,
             self._weight_quantize_type, self._activation_quantize_type,
             self._weight_pre_layer, self._act_pre_layer,
 
@@ -24,9 +24,9 @@
 from paddle.nn import functional as F
 
 __all__ = [
-    'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
-    'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax',
-    'MovingAverageAbsMaxScale'
+    'FakeQuantMovingAverage', 'FakeQuantAbsMax',
+    'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
+    'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
 ]
 
 
@@ -478,6 +478,30 @@ def forward(self, input):
         return out
 
 
+class QuantizedNoweightLayer(layers.Layer):
+    def __init__(self,
+                 layer,
+                 weight_bits=8,
+                 activation_bits=8,
+                 moving_rate=0.9,
+                 *args,
+                 **kwargs):
+
+        super(QuantizedNoweightLayer, self).__init__()
+        self._layer = layer
+        self._fake_quant_input = _get_fake_quant_type(
+            'moving_average_abs_max',
+            name=layer.full_name(),
+            moving_rate=moving_rate,
+            quant_bits=activation_bits,
+            dtype=self._dtype,
+            quant_on_weight=False)
+
+    def forward(self, input):
+        quant_input = self._fake_quant_input(input)
+        return self._layer.forward(quant_input)
+
+
 class MovingAverageAbsMaxScale(layers.Layer):
     def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
         r"""
 
@@ -270,6 +270,30 @@ list(REMOVE_ITEM TEST_OPS
 LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
 LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
 
+# only tests on singal GPU environment
+LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
+
+py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
+	CUDA_VISIBLE_DEVICES=0)
+	
+# fix
+if(WIN32)
+    SET(SINGLE_CARD_TEST_OPS
+        test_user_defined_quantization
+        test_quantization_scale_pass
+        test_quantization_pass
+        test_moving_average_abs_max_scale_op
+        test_imperative_qat_channelwise
+        test_imperative_qat
+        test_imperative_out_scale
+        test_graph)
+    LIST(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS})
+    foreach(src ${SINGLE_CARD_TEST_OPS})
+        py_test(${src} SRCS ${src}.py ENVS CUDA_VISIBLE_DEVICES=0)
+    endforeach()
+endif()
+
+
 foreach(src ${TEST_OPS})
     py_test(${src} SRCS ${src}.py)
 endforeach()
@@ -288,6 +312,7 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
 set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
+set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
 if(LINUX AND WITH_MKLDNN)
     set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)