quantize_apply function to implement quantization.

nutsiepully · tensorflower-gardener · commit 031ef21d48ce · 2019-06-28T15:31:26.000-07:00
This adds support for applying QuantizeAwareActivation to
to annotated layers. Also adds support for implementing
quantization by cloning model with existing annotation.

PiperOrigin-RevId: 255680020
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/BUILD b/tensorflow_model_optimization/python/core/quantization/keras/BUILD
@@ -92,6 +92,7 @@ py_library(
     visibility = ["//visibility:public"],
     deps = [
         ":quantize_annotate",
+        ":quantize_aware_activation",
         ":quantize_emulate_wrapper",
         # tensorflow dep1,
         # python/keras tensorflow dep2,
@@ -174,6 +175,7 @@ py_test(
     srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = [
+        ":quantize_aware_activation",
         ":quantize_emulate",
         ":quantize_emulate_wrapper",
         # absl/testing:parameterized dep1,
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_annotate.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_annotate.py
@@ -61,13 +61,16 @@ def __init__(self,
   def call(self, inputs, training=None):
     return self.layer.call(inputs)
 
-  def get_config(self):
-    base_config = super(QuantizeAnnotate, self).get_config()
-    config = {
+  def get_quantize_params(self):
+    return {
         'num_bits': self._num_bits,
         'symmetric': self._symmetric,
         'narrow_range': self._narrow_range
     }
+
+  def get_config(self):
+    base_config = super(QuantizeAnnotate, self).get_config()
+    config = self.get_quantize_params()
     return dict(list(base_config.items()) + list(config.items()))
 
   @classmethod
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_aware_activation.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_aware_activation.py
@@ -137,6 +137,12 @@ def call(self, inputs, training=None):
 
     return x
 
+  def get_quantize_params(self):
+    return {
+        'num_bits': self.num_bits,
+        'symmetric': self.symmetric,
+    }
+
   def compute_output_shape(self, input_shape):
     return input_shape
 
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate.py
@@ -17,6 +17,7 @@
 from tensorflow.python import keras
 
 from tensorflow_model_optimization.python.core.quantization.keras import quantize_annotate as quant_annotate
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_aware_activation
 from tensorflow_model_optimization.python.core.quantization.keras.quantize_emulate_wrapper import QuantizeEmulateWrapper
 
 
@@ -132,3 +133,105 @@ def _add_quant_wrapper(layer):
   elif isinstance(to_quantize, keras.layers.Layer):
     quant_params.update(**kwargs)
     return quant_annotate.QuantizeAnnotate(to_quantize, **quant_params)
+
+
+def quantize_apply(model):
+  """Apply quantization operations to a keras model.
+
+  This function takes a keras model which has been annotated with
+  `quantize_annotate` and constructs a new keras model in which each of the
+  annotated layers have been quantized. The quantization process introduces
+  new quantization ops in the Tensorflow graph to appropriately emulate
+  quantization loss.
+
+  Note that to exactly emulate quantization loss, certain graph/model
+  transformations may be applied. This is required since the actual quantized
+  kernel implementations may apply similar transformations.
+
+  Args:
+    model: A keras Sequential or Functional model which has been annotated
+    with `quantize_annotate`.
+
+  Returns:
+    Returns a new cloned keras model in which the annotated layers have been
+    quantized. All the existing layers are cloned.
+  """
+
+  if not isinstance(model, keras.Model):
+    raise ValueError('Only a keras `Model` instance can be used.')
+
+  if not isinstance(model, keras.Sequential) \
+      and not model._is_graph_network:  # pylint: disable=protected-access
+    raise ValueError('model should be either a keras.Sequential or a '
+                     'keras functional model.')
+
+  # Have at least 1 layer annotated with QuantizeAnnotate
+  if not any(isinstance(layer, quant_annotate.QuantizeAnnotate)
+             for layer in model.layers):
+    raise ValueError('model does not contain any layers which have been '
+                     'annotated with `quantize_annotate`. There are no layers '
+                     'to quantize.')
+
+  def _clone_layer(layer):
+    return layer.__class__.from_config(layer.get_config())
+
+  def _quantize_activation(activation, parent_class, quantize_params):
+    try:
+      return quantize_aware_activation.QuantizeAwareActivation(
+          activation.__name__, parent_class, **quantize_params)
+    except TypeError:
+      # Non-standard activation. Could be a custom callable, or an advanced
+      # activation. Simply return the original activation for now.
+      # TODO(pulkitb): Determine how to handle custom activations and advanced
+      # activations.
+      return activation
+
+  def _get_quantize_activation_params(layer):
+    quant_params = layer.get_quantize_params()
+    # narrow_range is not relevant to quantizing activations.
+    quant_params.pop('narrow_range')
+
+    return quant_params
+
+  def _apply_quantization(quant_annotate_layer):
+    layer_to_quantize = _clone_layer(quant_annotate_layer.layer)
+    quantize_params = quant_annotate_layer.get_quantize_params()
+
+    return QuantizeEmulateWrapper(layer_to_quantize, **quantize_params)
+
+  # Apply all graph level transformations.
+  replace_map = {}
+
+  # Replace activations in layers with QuantAwareActivation.
+  # Dense(activation='relu') -> Dense(activation=QuantAwareActivation('relu'))
+  # TODO(pulkitb): Not all layers (LSTMs) have just activation. Add
+  # generic handling for all layers.
+  for layer in model.layers:
+    if isinstance(layer, quant_annotate.QuantizeAnnotate) and \
+        (layer.layer.activation is not None and
+         layer.layer.activation != keras.activations.linear):
+      quantized_layer = _apply_quantization(layer)
+
+      quantized_layer.layer.activation = _quantize_activation(
+          layer.layer.activation, layer.layer.__class__,
+          _get_quantize_activation_params(layer))
+
+      replace_map[layer] = quantized_layer
+
+  # TODO(pulkitb): Transform [Dense(), ReLU()] to be quant aware.
+
+  def _add_quant_emulate_wrapper(layer):  # pylint: disable=missing-docstring
+    # Quantized layer has been constructed during graph transformation. Return.
+    if layer in replace_map:
+      return replace_map[layer]
+
+    # No need to quantize layer. Simply clone and return.
+    if not isinstance(layer, quant_annotate.QuantizeAnnotate):
+      return _clone_layer(layer)
+
+    # Use QuantizeEmulate wrapper on annotated layer which actually
+    # quantization ops.
+    return _apply_quantization(layer)
+
+  return keras.models.clone_model(
+      model, input_tensors=None, clone_function=_add_quant_emulate_wrapper)
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_test.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests for keras pruning wrapper."""
+"""Tests for quantize API functions."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -23,11 +23,14 @@
 from tensorflow.python import keras
 from tensorflow.python.platform import test
 from tensorflow_model_optimization.python.core.quantization.keras import quantize_annotate as quant_annotate
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_aware_activation
 from tensorflow_model_optimization.python.core.quantization.keras import quantize_emulate
-from tensorflow_model_optimization.python.core.quantization.keras.quantize_emulate import QuantizeEmulate
-from tensorflow_model_optimization.python.core.quantization.keras.quantize_emulate_wrapper import QuantizeEmulateWrapper
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_emulate_wrapper
 
 quantize_annotate = quantize_emulate.quantize_annotate
+QuantizeEmulate = quantize_emulate.QuantizeEmulate
+QuantizeEmulateWrapper = quantize_emulate_wrapper.QuantizeEmulateWrapper
+QuantizeAwareActivation = quantize_aware_activation.QuantizeAwareActivation
 
 
 class QuantizeEmulateTest(test.TestCase):
@@ -131,5 +134,155 @@ def testQuantizeAnnotateModel_HasAnnotatedLayers(self):
     self.assertAllEqual(model.predict(inputs), annotated_model.predict(inputs))
 
 
+class QuantizeApplyTest(test.TestCase):
+
+  def setUp(self):
+    self.quant_params1 = {
+        'num_bits': 8,
+        'narrow_range': True,
+        'symmetric': True
+    }
+    self.quant_params2 = {
+        'num_bits': 4,
+        'narrow_range': False,
+        'symmetric': False
+    }
+
+  # Validation tests
+
+  def testRaisesErrorIfNotKerasModel(self):
+    with self.assertRaises(ValueError):
+      quantize_emulate.quantize_apply(keras.layers.Dense(32))
+
+  def testRaisesErrorIfKerasSubclassedModel(self):
+    class MyModel(keras.Model):
+      def call(self, inputs, training=None, mask=None):  # pylint: disable=g-wrong-blank-lines
+        return inputs
+
+    with self.assertRaises(ValueError):
+      quantize_emulate.quantize_apply(MyModel())
+
+  def testRaisesErrorNoAnnotatedLayers_Sequential(self):
+    model = keras.Sequential([
+        keras.layers.Dense(10), keras.layers.Dropout(0.4)])
+
+    with self.assertRaises(ValueError):
+      quantize_emulate.quantize_apply(model)
+
+  def testRaisesErrorNoAnnotatedLayers_Functional(self):
+    inputs = keras.Input(shape=(10,))
+    x = keras.layers.Dense(32, activation='relu')(inputs)
+    results = keras.layers.Dense(5, activation='softmax')(x)
+    model = keras.Model(inputs=inputs, outputs=results)
+
+    with self.assertRaises(ValueError):
+      quantize_emulate.quantize_apply(model)
+
+  # Quantization Apply Tests
+
+  def _get_annotated_sequential_model(self):
+    return keras.Sequential([
+        quantize_annotate(keras.layers.Conv2D(32, 5), input_shape=(28, 28, 1),
+                          **self.quant_params1),
+        quantize_annotate(keras.layers.Dense(10), **self.quant_params2)
+    ])
+
+  def _get_annotated_functional_model(self):
+    inputs = keras.Input(shape=(28, 28, 1))
+    x = quantize_annotate(
+        keras.layers.Conv2D(32, 5), **self.quant_params1)(inputs)
+    results = quantize_annotate(keras.layers.Dense(10), **self.quant_params2)(x)
+
+    return keras.Model(inputs=inputs, outputs=results)
+
+  def _assert_layer_emulated(
+      self, annotated_layer, emulated_layer, exclude_keys=None):
+    self.assertIsInstance(emulated_layer, QuantizeEmulateWrapper)
+
+    self.assertEqual(annotated_layer.get_quantize_params(),
+                     emulated_layer.get_quantize_params())
+
+    # Extract configs of the inner layers they wrap.
+    annotated_config = annotated_layer.layer.get_config()
+    emulated_config = emulated_layer.layer.get_config()
+
+    # The underlying layers aren't always exactly the same. For example,
+    # activations in the underlying layers might be replaced. Exclude keys
+    # if required.
+    if exclude_keys:
+      for key in exclude_keys:
+        annotated_config.pop(key)
+        emulated_config.pop(key)
+
+    self.assertEqual(annotated_config, emulated_config)
+
+  def _assert_model_emulated(
+      self, annotated_model, emulated_model, exclude_keys=None):
+    for annotated_layer, emulated_layer in zip(annotated_model.layers,
+                                               emulated_model.layers):
+      if isinstance(emulated_layer, keras.layers.InputLayer):
+        continue
+
+      self._assert_layer_emulated(annotated_layer, emulated_layer, exclude_keys)
+
+  def testAppliesQuantizationToAnnotatedModel_Sequential(self):
+    model = self._get_annotated_sequential_model()
+
+    quantized_model = quantize_emulate.quantize_apply(model)
+
+    self._assert_model_emulated(model, quantized_model)
+
+  def testAppliesQuantizationToAnnotatedModel_Functional(self):
+    model = self._get_annotated_functional_model()
+
+    quantized_model = quantize_emulate.quantize_apply(model)
+
+    self._assert_model_emulated(model, quantized_model)
+
+  # Transformation Tests
+
+  def testQuantizesActivationsWithinLayer_Sequential(self):
+    quant_params = {'num_bits': 8, 'symmetric': True}
+    model = keras.Sequential([
+        quantize_annotate(
+            keras.layers.Conv2D(32, 5, activation='relu'),
+            input_shape=(28, 28, 1),
+            **quant_params)
+    ])
+
+    quantized_model = quantize_emulate.quantize_apply(model)
+
+    # We expect activation to be modified.
+    self._assert_model_emulated(model, quantized_model, ['activation'])
+
+    conv_layer = quantized_model.layers[0].layer
+    self.assertIsInstance(conv_layer.activation, QuantizeAwareActivation)
+    self.assertEqual(
+        keras.activations.get('relu'), conv_layer.activation.activation)
+    self.assertEqual(keras.layers.Conv2D, conv_layer.activation.parent_layer)
+    self.assertEqual(quant_params, conv_layer.activation.get_quantize_params())
+
+  def testQuantizesActivationsWithinLayer_Functional(self):
+    quant_params = {'num_bits': 8, 'symmetric': True}
+
+    inputs = keras.Input(shape=(28, 28, 1))
+    results = quantize_annotate(
+        keras.layers.Conv2D(32, 5, activation='relu'),
+        **self.quant_params1)(inputs)
+    model = keras.Model(inputs=inputs, outputs=results)
+
+    quantized_model = quantize_emulate.quantize_apply(model)
+
+    # We expect activation to be modified.
+    self._assert_model_emulated(model, quantized_model, ['activation'])
+
+    conv_layer = quantized_model.layers[1].layer
+    self.assertIsInstance(conv_layer.activation, QuantizeAwareActivation)
+    self.assertEqual(
+        keras.activations.get('relu'), conv_layer.activation.activation)
+    self.assertEqual(keras.layers.Conv2D, conv_layer.activation.parent_layer)
+    self.assertEqual(quant_params, conv_layer.activation.get_quantize_params())
+
+
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_wrapper.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_wrapper.py
@@ -185,13 +185,16 @@ def fn():
 
     return outputs
 
-  def get_config(self):
-    base_config = super(QuantizeEmulateWrapper, self).get_config()
-    config = {
+  def get_quantize_params(self):
+    return {
         'num_bits': self._num_bits,
         'symmetric': self._symmetric,
         'narrow_range': self._narrow_range
     }
+
+  def get_config(self):
+    base_config = super(QuantizeEmulateWrapper, self).get_config()
+    config = self.get_quantize_params()
     return dict(list(base_config.items()) + list(config.items()))
 
   @classmethod