When using annotate_quantize_layer, the expectation after conversion to TFLite is that the annotated layer should be quantized. However, this is not true when the input layer is not quantized.

daverim · tensorflower-gardener · commit 3c4f3b29f16b · 2021-03-25T01:21:33.000-07:00
Add a wrapper config for layers which are not annotated but consumed by an annotated layer.

PiperOrigin-RevId: 364984791
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize.py
@@ -341,12 +341,22 @@ def _clone_model_with_weights(model_to_clone):
   def _extract_original_model(model_to_unwrap):
     """Extracts original model by removing wrappers."""
     layer_quantize_map = {}
+    requires_output_quantize = set()
 
     def _unwrap(layer):
       if not isinstance(layer, quantize_annotate_mod.QuantizeAnnotate):
         return layer
 
       annotate_wrapper = layer
+      # pylint: disable=protected-access
+      if layer._inbound_nodes and len(layer._inbound_nodes) == 1:
+        node = layer._inbound_nodes[0]
+        inbound_layers = tf.nest.flatten(node.inbound_layers)
+        if len(inbound_layers) == 1 and not isinstance(
+            inbound_layers[0], quantize_annotate_mod.QuantizeAnnotate):
+          requires_output_quantize.add(inbound_layers[0].name)
+      # pylint: enable=protected-access
+
       layer_quantize_map[annotate_wrapper.layer.name] = {
           'quantize_config': annotate_wrapper.quantize_config
       }
@@ -355,15 +365,53 @@ def _unwrap(layer):
     unwrapped_model = keras.models.clone_model(
         model_to_unwrap, input_tensors=None, clone_function=_unwrap)
 
-    return unwrapped_model, layer_quantize_map
+    return unwrapped_model, layer_quantize_map, requires_output_quantize
+
+  class OutputOnlyConfig(quantize_config_mod.QuantizeConfig):
+    """QuantizeConfig that only quantizes output."""
+
+    def __init__(self, quantize_config):
+      self.quantize_config = quantize_config
+
+    def get_weights_and_quantizers(self, layer):
+      return []
+
+    def set_quantize_weights(self, layer, quantize_weights):
+      pass
+
+    def get_activations_and_quantizers(self, layer):
+      return self.quantize_config.get_activations_and_quantizers(layer)
+
+    def set_quantize_activations(self, layer, quantize_activations):
+      return self.quantize_config.set_quantize_activations(
+          layer, quantize_activations)
+
+    def get_output_quantizers(self, layer):
+      return self.quantize_config.get_output_quantizers(layer)
+
+    def get_config(self):
+      return {'quantize_config': self.quantize_config}
+
+    @classmethod
+    def from_config(cls, config):
+      return cls(**config)
 
   def _quantize(layer):  # pylint: disable=missing-docstring
-    if layer.name not in layer_quantize_map:
+    if (layer.name not in layer_quantize_map and
+        layer.name not in requires_output_quantize):
       return layer
 
-    quantize_config = layer_quantize_map[layer.name].get('quantize_config')
-    if not quantize_config and quantize_registry.supports(layer):
-      quantize_config = quantize_registry.get_quantize_config(layer)
+    if layer.name in requires_output_quantize:
+      if not quantize_registry.supports(layer):
+        return layer
+      full_quantize_config = quantize_registry.get_quantize_config(layer)
+      if not full_quantize_config:
+        return layer
+      quantize_config = OutputOnlyConfig(full_quantize_config)
+    else:
+      quantize_config = layer_quantize_map[layer.name].get('quantize_config')
+      if not quantize_config and quantize_registry.supports(layer):
+        quantize_config = quantize_registry.get_quantize_config(layer)
 
     if not quantize_config:
       error_msg = (
@@ -395,7 +443,8 @@ def _quantize(layer):  # pylint: disable=missing-docstring
   # 2. Remove QuantizeAnnotate wrappers from the layers in the model. This
   # extracts the original model structure (easier to transform), and
   # stores relevant quantization information in a map.
-  unwrapped_model, layer_quantize_map = _extract_original_model(model_copy)
+  (unwrapped_model, layer_quantize_map,
+   requires_output_quantize) = _extract_original_model(model_copy)
   # Model cloning excludes input layers. Add input layers into the map
   # since they need to be matched for patterns as well.
   # pylint: disable=protected-access
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_test.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_test.py
@@ -288,12 +288,44 @@ def _assert_model_quantized(
         zip(annotated_model.layers, quantized_model.layers):
 
       if not isinstance(layer_annotated, QuantizeAnnotate):
-        self.assertNotIsInstance(layer_quantized, QuantizeWrapper)
+        # Possibly wrapped for input quantization.
+        if isinstance(layer_quantized, QuantizeWrapper):
+          self.assertLen(layer_annotated._outbound_nodes, 1)
+          self.assertIsInstance(
+              layer_annotated._outbound_nodes[0].outbound_layer,
+              QuantizeAnnotate)
+
+          # Ensure that only outputs are quantized.
+          self.assertFalse(
+              layer_quantized.quantize_config.get_weights_and_quantizers(
+                  layer_quantized.layer))
         continue
 
       self._assert_layer_quantized(
           layer_annotated, layer_quantized, exclude_keys)
 
+  def _assert_nonannotated_input_layer_quantized(
+      self, quantized_model, layer_index):
+    output_quantized_layer = quantized_model.layers[layer_index]
+    self.assertIsInstance(output_quantized_layer, QuantizeWrapper)
+    output_quantized_config = output_quantized_layer.quantize_config
+    default_quantized_config = output_quantized_config.get_config()[
+        'quantize_config']
+    self.assertIsInstance(
+        default_quantized_config,
+        default_8bit_quantize_registry.Default8BitQuantizeConfig)
+    self.assertFalse(output_quantized_config.get_weights_and_quantizers(
+        output_quantized_layer.layer))
+    self.assertTrue(default_quantized_config.get_weights_and_quantizers(
+        output_quantized_layer.layer))
+    output_quantizers = output_quantized_config.get_output_quantizers(
+        output_quantized_layer.layer)
+    default_output_quantizers = default_quantized_config.get_output_quantizers(
+        output_quantized_layer.layer)
+    for output_quantizer, default_quantizer in zip(output_quantizers,
+                                                   default_output_quantizers):
+      self.assertEqual(output_quantizer, default_quantizer)
+
   # quantize_apply Tests
 
   class CustomLayer(keras.layers.Dense):
@@ -379,6 +411,18 @@ def testAppliesQuantizationToAnnotatedModel_Sequential(self):
 
     self._assert_model_quantized(model, quantized_model, ['activation'])
 
+  def testAppliesQuantizationToInputsToAnnotatedModel_Sequential(self):
+    model = keras.Sequential([
+        keras.layers.Conv2D(32, 5, input_shape=(28, 28, 1), activation='relu'),
+        keras.layers.Dense(10, activation='relu'),
+        quantize_annotate_layer(keras.layers.Dense(5, activation='softmax')),
+    ])
+    quantized_model = quantize_apply(model)
+    self._assert_model_quantized(model, quantized_model, ['activation'])
+    # Test that Dense layer has output only quantization config.
+    self._assert_nonannotated_input_layer_quantized(
+        quantized_model, layer_index=1)
+
   def testAppliesQuantizationToAnnotatedModel_PreservesBuiltState(self):
     model = keras_test_utils.build_simple_dense_model()
     annotated_model = quantize_annotate_model(model)
@@ -404,6 +448,20 @@ def testAppliesQuantizationToAnnotatedModel_Functional(self):
 
     self._assert_model_quantized(model, quantized_model, ['activation'])
 
+  def testAppliesQuantizationToInputsToAnnotatedModel_Functional(self):
+    inputs = keras.Input(shape=(28, 28, 1))
+    x = keras.layers.Conv2D(32, 5, activation='relu')(inputs)
+    x = keras.layers.Dense(10, activation='relu')(x)
+    results = quantize_annotate_layer(
+        keras.layers.Dense(5, activation='softmax'))(
+            x)
+    model = keras.Model(inputs=inputs, outputs=results)
+    quantized_model = quantize_apply(model)
+    self._assert_model_quantized(model, quantized_model, ['activation'])
+    # Test that Dense layer has output only quantization config.
+    self._assert_nonannotated_input_layer_quantized(
+        quantized_model, layer_index=2)
+
   def testDoesNotQuantizeInputLayer_OutboundLayerNotQuantized(self):
     model = self._get_simple_functional_model()