Implement AllValuesQuantizer

nutsiepully · tensorflower-gardener · commit baf1bb894e7a · 2020-07-07T16:53:06.000-07:00
AllValuesQuantizer calculates the range based on the
largest and smallest values seen by the Tensor

PiperOrigin-RevId: 320088710
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quant_ops.py b/tensorflow_model_optimization/python/core/quantization/keras/quant_ops.py
@@ -44,6 +44,75 @@ def FixedQuantize(inputs, init_min=-6.0, init_max=6.0, scope=None):
         inputs, min=init_min, max=init_max)
 
 
+def AllValuesQuantize(inputs,
+                      min_var,
+                      max_var,
+                      name_prefix='AllValuesQuantize',
+                      is_training=True,
+                      num_bits=8,
+                      narrow_range=False,
+                      symmetric=False):
+  """Adds a layer that collects quantization ranges as min/max of tensor values.
+
+  AllValuesQuantize creates variables called 'min' and 'max',
+  representing the interval used for quantization and clamping.
+
+  Args:
+    inputs: a tensor containing values to be quantized.
+    min_var: Variable which stores the min value of tensor.
+    max_var: Variable which stores the max value of tensor.
+    name_prefix: name_prefix for created nodes.
+    is_training: Whether the op is applied to a training or eval graph.
+    num_bits: Number of bits to use for quantization, must be between 2 and 8.
+    narrow_range: Whether to use the narrow quantization range
+      [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1].
+    symmetric: If true, use symmetric quantization limits instead of training
+      the minimum and maximum of each quantization range separately.
+  Returns:
+    a tensor containing quantized values.
+  """
+  with tf.name_scope(name_prefix):
+    if not is_training:
+      return _FakeQuantWithMinMaxVars(
+          inputs,
+          min_var,
+          max_var,
+          per_channel=False,
+          num_bits=num_bits,
+          narrow_range=narrow_range)
+
+    batch_min = tf.math.reduce_min(inputs, name='BatchMin')
+    batch_max = tf.math.reduce_max(inputs, name='BatchMax')
+
+    if symmetric:
+      if narrow_range:
+        min_max_ratio = -1
+      else:
+        # In two's complement notation, the negative range is slightly larger
+        # than the positive range.
+        min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits)
+
+      # TFLite requires that 0.0 is always in the [min; max] range. Because
+      # batch_min <= batch_max, it follows that range_min <= 0 <= range_max.
+      batch_min = tf.math.minimum(batch_min, batch_max / min_max_ratio)
+      batch_max = tf.math.maximum(batch_max, batch_min * min_max_ratio)
+
+    # TFLite requires that 0.0 if always in the [min; max] range.
+    range_min = tf.math.minimum(tf.math.minimum(min_var, batch_min), 0.0)
+    range_max = tf.math.maximum(tf.math.maximum(max_var, batch_max), 0.0)
+
+    assign_min = tf_compat.assign(min_var, range_min, name='AssignMinAllValue')
+    assign_max = tf_compat.assign(max_var, range_max, name='AssignMaxAllValue')
+
+    return _FakeQuantWithMinMaxVars(
+        inputs,
+        assign_min,
+        assign_max,
+        per_channel=False,
+        num_bits=num_bits,
+        narrow_range=narrow_range)
+
+
 def LastValueQuantize(inputs,
                       min_var,
                       max_var,
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quant_ops_test.py b/tensorflow_model_optimization/python/core/quantization/keras/quant_ops_test.py
@@ -32,6 +32,31 @@
 @keras_parameterized.run_all_keras_modes
 class QuantOpsTest(tf.test.TestCase, parameterized.TestCase):
 
+  def testAllValuesQuantiize_TrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.AllValuesQuantize,
+        [tf.constant([-5.0, 1.0]), tf.constant([-1.0, 5.0])])
+
+    self.assertEqual(min_value, -5.0)
+    self.assertEqual(max_value, 5.0)
+
+  def testAllValuesQuantiize_SymmetricTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.AllValuesQuantize,
+        [tf.constant([-_SYMMETRIC_RANGE_RATIO, _SYMMETRIC_RANGE_RATIO])],
+        symmetric=True,
+        narrow_range=False)
+    self.assertEqual(min_value, -1.0)
+    self.assertEqual(max_value, _SYMMETRIC_RANGE_RATIO)
+
+  def testAllValuesQuantiize_SymmetricNarrowRangeTrainingAssign(self):
+    min_value, max_value = self._GetMinMaxValues(
+        quant_ops.AllValuesQuantize, [tf.constant([-1, 0.5])],
+        symmetric=True,
+        narrow_range=True)
+    self.assertEqual(min_value, -1.0)
+    self.assertEqual(max_value, 1)
+
   def testLastValueQuantizeTrainingAssign(self):
     min_value, max_value = self._GetMinMaxValues(quant_ops.LastValueQuantize,
                                                  [tf.constant([-1.0, 1.0])])
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantizers.py b/tensorflow_model_optimization/python/core/quantization/keras/quantizers.py
@@ -288,8 +288,87 @@ def __ne__(self, other):
     return not self.__eq__(other)
 
 
+class AllValuesQuantizer(_QuantizeHelper, Quantizer):
+  """Quantize tensor based on min/max of tensor values across all batches."""
+
+  def __init__(self, num_bits, per_axis, symmetric, narrow_range):
+    """Construct an AllValuesQuantizer.
+
+    This is an experimental API not subject to backward compatibility.
+
+    Args:
+      num_bits: Number of bits for quantization
+      per_axis: Whether to apply per_axis quantization. The last dimension is
+        used as the axis.
+      symmetric: If true, use symmetric quantization limits instead of training
+        the minimum and maximum of each quantization range separately.
+      narrow_range: In case of 8 bits, narrow_range nudges the quantized range
+        to be [-127, 127] instead of [-128, 127]. This ensures symmetric
+        range has 0 as the centre.
+    """
+    self.num_bits = num_bits
+    self.per_axis = per_axis
+    self.symmetric = symmetric
+    self.narrow_range = narrow_range
+
+  def build(self, tensor_shape, name, layer):
+    min_weight = layer.add_weight(
+        name + '_min',
+        initializer=keras.initializers.Constant(0.0),
+        trainable=False)
+    max_weight = layer.add_weight(
+        name + '_max',
+        initializer=keras.initializers.Constant(0.0),
+        trainable=False)
+    return {'min_var': min_weight, 'max_var': max_weight}
+
+  def __call__(self, inputs, training, weights, **kwargs):
+    """Quantize tensor.
+
+    Args:
+      inputs: Input tensor to be quantized.
+      training: Whether the graph is currently training.
+      weights: Dictionary of weights the quantizer can use to quantize the
+        tensor. This contains the weights created in the `build` function.
+      **kwargs: Additional variables which may be passed to the quantizer.
+
+    Returns:
+      Quantized tensor.
+    """
+    return quant_ops.AllValuesQuantize(
+        inputs,
+        weights['min_var'],
+        weights['max_var'],
+        is_training=training,
+        num_bits=self.num_bits,
+        symmetric=self.symmetric,
+        narrow_range=self.narrow_range,
+    )
+
+  def get_config(self):
+    return {
+        'num_bits': self.num_bits,
+        'per_axis': self.per_axis,
+        'symmetric': self.symmetric,
+        'narrow_range': self.narrow_range
+    }
+
+  def __eq__(self, other):
+    if not isinstance(other, AllValuesQuantizer):
+      return False
+
+    return (self.num_bits == other.num_bits and
+            self.per_axis == other.per_axis and
+            self.symmetric == other.symmetric and
+            self.narrow_range == other.narrow_range)
+
+  def __ne__(self, other):
+    return not self.__eq__(other)
+
+
 def _types_dict():
   return {
+      'AllValuesQuantizer': AllValuesQuantizer,
       'LastValueQuantizer': LastValueQuantizer,
       'MovingAverageQuantizer': MovingAverageQuantizer
   }
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/quantizers_test.py b/tensorflow_model_optimization/python/core/quantization/keras/quantizers_test.py
@@ -33,7 +33,9 @@
 
 @keras_parameterized.run_all_keras_modes
 @parameterized.parameters(
-    quantizers.LastValueQuantizer, quantizers.MovingAverageQuantizer)
+    quantizers.LastValueQuantizer,
+    quantizers.MovingAverageQuantizer,
+    quantizers.AllValuesQuantizer)
 class QuantizersTest(tf.test.TestCase, parameterized.TestCase):
 
   def setUp(self):