Clone weights of layers before quantizing.

nutsiepully · tensorflower-gardener · commit 9d0ca181b599 · 2019-06-28T16:40:19.000-07:00
Ensures that variables/layers of models which are cloned
carry over weights so training can proceed on the original
layers.

Tests verify this as quantization is applied on annotated
layers.

PiperOrigin-RevId: 255691355
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate.py
@@ -172,8 +172,11 @@ def quantize_apply(model):
                      'annotated with `quantize_annotate`. There are no layers '
                      'to quantize.')
 
-  def _clone_layer(layer):
-    return layer.__class__.from_config(layer.get_config())
+  def _clone_model_with_weights(model_to_clone):
+    cloned_model = keras.models.clone_model(model_to_clone)
+    cloned_model.set_weights(model_to_clone.get_weights())
+
+    return cloned_model
 
   def _quantize_activation(activation, parent_class, quantize_params):
     try:
@@ -194,10 +197,13 @@ def _get_quantize_activation_params(layer):
     return quant_params
 
   def _apply_quantization(quant_annotate_layer):
-    layer_to_quantize = _clone_layer(quant_annotate_layer.layer)
-    quantize_params = quant_annotate_layer.get_quantize_params()
+    return QuantizeEmulateWrapper(
+        quant_annotate_layer.layer,
+        **(quant_annotate_layer.get_quantize_params()))
 
-    return QuantizeEmulateWrapper(layer_to_quantize, **quantize_params)
+  # Create a copy of the model with the same weights. We can then quantize this
+  # model without modifying the weights of the original model.
+  model_copy = _clone_model_with_weights(model)
 
   # Apply all graph level transformations.
   replace_map = {}
@@ -206,7 +212,7 @@ def _apply_quantization(quant_annotate_layer):
   # Dense(activation='relu') -> Dense(activation=QuantAwareActivation('relu'))
   # TODO(pulkitb): Not all layers (LSTMs) have just activation. Add
   # generic handling for all layers.
-  for layer in model.layers:
+  for layer in model_copy.layers:
     if isinstance(layer, quant_annotate.QuantizeAnnotate) and \
         (layer.layer.activation is not None and
          layer.layer.activation != keras.activations.linear):
@@ -225,13 +231,12 @@ def _add_quant_emulate_wrapper(layer):  # pylint: disable=missing-docstring
     if layer in replace_map:
       return replace_map[layer]
 
-    # No need to quantize layer. Simply clone and return.
     if not isinstance(layer, quant_annotate.QuantizeAnnotate):
-      return _clone_layer(layer)
+      return layer
 
     # Use QuantizeEmulate wrapper on annotated layer which actually
     # quantization ops.
     return _apply_quantization(layer)
 
   return keras.models.clone_model(
-      model, input_tensors=None, clone_function=_add_quant_emulate_wrapper)
+      model_copy, input_tensors=None, clone_function=_add_quant_emulate_wrapper)
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_test.py b/tensorflow_model_optimization/python/core/quantization/keras/quantize_emulate_test.py
@@ -21,6 +21,7 @@
 import numpy as np
 
 from tensorflow.python import keras
+from tensorflow.python.keras import backend as K
 from tensorflow.python.platform import test
 from tensorflow_model_optimization.python.core.quantization.keras import quantize_annotate as quant_annotate
 from tensorflow_model_optimization.python.core.quantization.keras import quantize_aware_activation
@@ -195,6 +196,20 @@ def _get_annotated_functional_model(self):
 
     return keras.Model(inputs=inputs, outputs=results)
 
+  def _assert_weights_equal_value(self, annotated_weights, emulated_weights):
+    annotated_weight_values = K.batch_get_value(annotated_weights)
+    emulated_weight_values = K.batch_get_value(emulated_weights)
+
+    self.assertEqual(len(annotated_weight_values), len(emulated_weight_values))
+    for aw, ew in zip(annotated_weight_values, emulated_weight_values):
+      self.assertAllClose(aw, ew)
+
+  def _assert_weights_different_objects(
+      self, annotated_weights, emulated_weights):
+    self.assertEqual(len(annotated_weights), len(emulated_weights))
+    for aw, ew in zip(annotated_weights, emulated_weights):
+      self.assertNotEqual(id(aw), id(ew))
+
   def _assert_layer_emulated(
       self, annotated_layer, emulated_layer, exclude_keys=None):
     self.assertIsInstance(emulated_layer, QuantizeEmulateWrapper)
@@ -216,6 +231,20 @@ def _assert_layer_emulated(
 
     self.assertEqual(annotated_config, emulated_config)
 
+    def _sort_weights(weights):
+      # Variables are named `quantize_annotate0/kernel:0` and
+      # `quantize_emulate0/kernel:0`. Strip layer name to sort.
+      return sorted(weights, key=lambda w: w.name.split('/')[1])
+
+    annotated_weights = _sort_weights(annotated_layer.trainable_weights)
+    emulated_weights = _sort_weights(emulated_layer.trainable_weights)
+
+    # Quantized model should pick the same weight values from the original
+    # model. However, they should not be the same weight objects. We don't
+    # want training the quantized model to change weights in the original model.
+    self._assert_weights_different_objects(annotated_weights, emulated_weights)
+    self._assert_weights_equal_value(annotated_weights, emulated_weights)
+
   def _assert_model_emulated(
       self, annotated_model, emulated_model, exclude_keys=None):
     for annotated_layer, emulated_layer in zip(annotated_model.layers,