Adds power law entropy model for use with run-length gamma code.

Johannes Ballé · copybara-github · commit c89da5380d8a · 2022-05-10T08:43:00.000-07:00
PiperOrigin-RevId: 447742176
Change-Id: Ib35819497aed4fd5e34a9c03312e08783b1c1a75
diff --git a/tensorflow_compression/all_tests.py b/tensorflow_compression/all_tests.py
@@ -29,6 +29,7 @@
 
 from tensorflow_compression.python.entropy_models.continuous_batched_test import *
 from tensorflow_compression.python.entropy_models.continuous_indexed_test import *
+from tensorflow_compression.python.entropy_models.power_law_test import *
 from tensorflow_compression.python.entropy_models.universal_test import *
 
 from tensorflow_compression.python.layers.gdn_test import *
diff --git a/tensorflow_compression/python/entropy_models/BUILD b/tensorflow_compression/python/entropy_models/BUILD
@@ -10,6 +10,7 @@ py_library(
     deps = [
         ":continuous_batched",
         ":continuous_indexed",
+        ":power_law",
         ":universal",
     ],
 )
@@ -66,6 +67,21 @@ py_test(
     ],
 )
 
+py_library(
+    name = "power_law",
+    srcs = ["power_law.py"],
+    deps = [
+        "//tensorflow_compression/python/ops:gen_ops",
+        "//tensorflow_compression/python/ops:round_ops",
+    ],
+)
+
+py_test(
+    name = "power_law_test",
+    srcs = ["power_law_test.py"],
+    deps = [":power_law"],
+)
+
 py_library(
     name = "universal",
     srcs = ["universal.py"],
diff --git a/tensorflow_compression/python/entropy_models/__init__.py b/tensorflow_compression/python/entropy_models/__init__.py
@@ -16,4 +16,5 @@
 
 from tensorflow_compression.python.entropy_models.continuous_batched import *
 from tensorflow_compression.python.entropy_models.continuous_indexed import *
+from tensorflow_compression.python.entropy_models.power_law import *
 from tensorflow_compression.python.entropy_models.universal import *
diff --git a/tensorflow_compression/python/entropy_models/power_law.py b/tensorflow_compression/python/entropy_models/power_law.py
@@ -0,0 +1,210 @@
+# Copyright 2022 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""An entropy model for the run-length gamma code."""
+
+import tensorflow as tf
+from tensorflow_compression.python.ops import gen_ops
+from tensorflow_compression.python.ops import round_ops
+
+
+__all__ = [
+    "PowerLawEntropyModel",
+]
+
+
+class PowerLawEntropyModel(tf.Module):
+  """Entropy model for power-law distributed random variables.
+
+  This entropy model handles quantization of a bottleneck tensor and implements
+  a cross entropy penalty that is consistent with the Elias gamma code.
+
+  The gamma code has code lengths `1 + 2 floor(log_2(x))`, for `x` a positive
+  integer. For details on the gamma code, see:
+
+  > "Universal Codeword Sets and Representations of the Integers"<br />
+  > P. Elias<br />
+  > https://doi.org/10.1109/TIT.1975.1055349
+
+  Given a signed integer, `run_length_gamma_encode` encodes zeros using a
+  run-length code, the sign using a uniform bit, and applies the gamma code to
+  the magnitude.
+
+  The penalty applied by this class is given by:
+  ```
+  -log_2 p(x), with p(x) = alpha / 2 * (x + alpha) ** -2
+  ```
+  Like the gamma code, this follows a symmetrized power law, but only
+  approximately for `alpha > 0`. Without `alpha`, the distribution would not be
+  normalizable, and the penalty would have a singularity at zero. Setting
+  `alpha` to a small positive value ensures that the penalty is non-negative,
+  and that its gradients are useful for optimization.
+  """
+
+  def __init__(self,
+               coding_rank,
+               alpha=1e-2,
+               bottleneck_dtype=None):
+    """Initializes the instance.
+
+    Args:
+      coding_rank: Integer. Number of innermost dimensions considered a coding
+        unit. Each coding unit is compressed to its own bit string, and the
+        estimated rate is summed over each coding unit in `bits()`.
+      alpha: Float. Regularization parameter preventing gradient singularity
+        around zero.
+      bottleneck_dtype: `tf.dtypes.DType`. Data type of bottleneck tensor.
+        Defaults to `tf.keras.mixed_precision.global_policy().compute_dtype`.
+    """
+    self._coding_rank = int(coding_rank)
+    if self.coding_rank < 0:
+      raise ValueError("`coding_rank` must be at least 0.")
+    self._alpha = float(alpha)
+    if self.alpha <= 0:
+      raise ValueError("`alpha` must be greater than 0.")
+    if bottleneck_dtype is None:
+      bottleneck_dtype = tf.keras.mixed_precision.global_policy().compute_dtype
+    if bottleneck_dtype is None:
+      bottleneck_dtype = tf.keras.backend.floatx()
+    self._bottleneck_dtype = tf.as_dtype(bottleneck_dtype)
+    super().__init__()
+
+  @property
+  def alpha(self):
+    """Alpha parameter."""
+    return self._alpha
+
+  @property
+  def bottleneck_dtype(self):
+    """Data type of the bottleneck tensor."""
+    return self._bottleneck_dtype
+
+  @property
+  def coding_rank(self):
+    """Number of innermost dimensions considered a coding unit."""
+    return self._coding_rank
+
+  @tf.Module.with_name_scope
+  def __call__(self, bottleneck):
+    """Perturbs a tensor with (quantization) noise and computes penalty.
+
+    Args:
+      bottleneck: `tf.Tensor` containing the data to be compressed. Must have at
+        least `self.coding_rank` dimensions.
+
+    Returns:
+      A tuple `(self.quantize(bottleneck), self.penalty(bottleneck))`.
+    """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
+    return self.quantize(bottleneck), self.penalty(bottleneck)
+
+  @tf.Module.with_name_scope
+  def penalty(self, bottleneck):
+    """Computes cross-entropy penalty.
+
+    Args:
+      bottleneck: `tf.Tensor` containing the data to be compressed. Must have at
+        least `self.coding_rank` dimensions.
+
+    Returns:
+      Penalty, which has the same shape as `bottleneck` without the
+      `self.coding_rank` innermost dimensions, and corresponds to a cross
+      entropy.
+    """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
+    log_alpha = tf.math.log(
+        tf.constant(self.alpha, dtype=self.bottleneck_dtype))
+    log_2 = tf.math.log(tf.constant(2, dtype=self.bottleneck_dtype))
+    penalty = ((1. - log_alpha / log_2) +
+               tf.math.log(abs(bottleneck) + self.alpha) * (2. / log_2))
+    return tf.reduce_sum(penalty, axis=tuple(range(-self.coding_rank, 0)))
+
+  @tf.Module.with_name_scope
+  def quantize(self, bottleneck):
+    """Quantizes a floating-point bottleneck tensor.
+
+    The tensor is rounded to integer values. The gradient of this rounding
+    operation is overridden with the identity (straight-through gradient
+    estimator).
+
+    Args:
+      bottleneck: `tf.Tensor` containing the data to be quantized.
+
+    Returns:
+      A `tf.Tensor` containing the quantized values.
+    """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
+    return round_ops.round_st(bottleneck)
+
+  @tf.Module.with_name_scope
+  def compress(self, bottleneck):
+    """Compresses a floating-point tensor.
+
+    Compresses the tensor to bit strings. `bottleneck` is first quantized
+    as in `quantize()`, and then compressed using the run-length gamma code. The
+    quantized tensor can later be recovered by calling `decompress()`.
+
+    The innermost `self.coding_rank` dimensions are treated as one coding unit,
+    i.e. are compressed into one string each. Any additional dimensions to the
+    left are treated as batch dimensions.
+
+    Args:
+      bottleneck: `tf.Tensor` containing the data to be compressed. Must have at
+        least `self.coding_rank` dimensions.
+
+    Returns:
+      A `tf.Tensor` having the same shape as `bottleneck` without the
+      `self.coding_rank` innermost dimensions, containing a string for each
+      coding unit.
+    """
+    bottleneck = tf.convert_to_tensor(bottleneck, dtype=self.bottleneck_dtype)
+
+    shape = tf.shape(bottleneck)
+    if self.coding_rank == 0:
+      flat_shape = [-1]
+      strings_shape = shape
+    else:
+      flat_shape = tf.concat([[-1], shape[-self.coding_rank:]], 0)
+      strings_shape = shape[:-self.coding_rank]
+
+    symbols = tf.cast(tf.round(bottleneck), tf.int32)
+    symbols = tf.reshape(symbols, flat_shape)
+
+    strings = tf.map_fn(
+        gen_ops.run_length_gamma_encode, symbols,
+        fn_output_signature=tf.TensorSpec((), dtype=tf.string))
+    return tf.reshape(strings, strings_shape)
+
+  @tf.Module.with_name_scope
+  def decompress(self, strings, code_shape):
+    """Decompresses a tensor.
+
+    Reconstructs the quantized tensor from bit strings produced by `compress()`.
+
+    Args:
+      strings: `tf.Tensor` containing the compressed bit strings.
+      code_shape: Shape of innermost dimensions of the output `tf.Tensor`.
+
+    Returns:
+      A `tf.Tensor` of shape `tf.shape(strings) + code_shape`.
+    """
+    strings = tf.convert_to_tensor(strings, dtype=tf.string)
+    strings_shape = tf.shape(strings)
+    symbols = tf.map_fn(
+        lambda x: gen_ops.run_length_gamma_decode(x, code_shape),
+        tf.reshape(strings, [-1]),
+        fn_output_signature=tf.TensorSpec(
+            [None] * self.coding_rank, dtype=tf.int32))
+    symbols = tf.reshape(symbols, tf.concat([strings_shape, code_shape], 0))
+    return tf.cast(symbols, self.bottleneck_dtype)
diff --git a/tensorflow_compression/python/entropy_models/power_law_test.py b/tensorflow_compression/python/entropy_models/power_law_test.py
@@ -0,0 +1,122 @@
+# Copyright 2022 Google LLC. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests of power law entropy model."""
+
+import tensorflow as tf
+from tensorflow_compression.python.entropy_models.power_law import PowerLawEntropyModel
+
+
+class PowerLawEntropyModelTest(tf.test.TestCase):
+
+  def test_can_instantiate(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    self.assertEqual(em.coding_rank, 1)
+    self.assertEqual(em.bottleneck_dtype, tf.float32)
+
+  def test_requires_coding_rank_greater_equal_zero(self):
+    with self.assertRaises(ValueError):
+      PowerLawEntropyModel(coding_rank=-1)
+
+  def test_quantizes_to_integers(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    x = tf.range(-20., 20.)
+    x_perturbed = x + tf.random.uniform(x.shape, -.49, .49)
+    x_quantized = em.quantize(x_perturbed)
+    self.assertAllEqual(x, x_quantized)
+
+  def test_gradients_are_straight_through(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    x = tf.range(-20., 20.)
+    x_perturbed = x + tf.random.uniform(x.shape, -.49, .49)
+    with tf.GradientTape() as tape:
+      tape.watch(x_perturbed)
+      x_quantized = em.quantize(x_perturbed)
+    gradients = tape.gradient(x_quantized, x_perturbed)
+    self.assertAllEqual(gradients, tf.ones_like(gradients))
+
+  def test_compression_consistent_with_quantization(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    x = tf.range(-20., 20.)
+    x += tf.random.uniform(x.shape, -.49, .49)
+    x_quantized = em.quantize(x)
+    x_decompressed = em.decompress(em.compress(x), x.shape)
+    self.assertAllEqual(x_decompressed, x_quantized)
+
+  def test_penalty_is_proportional_to_code_length(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    # Sample some values from a Laplacian distribution.
+    u = tf.random.uniform((100, 1), minval=-1., maxval=1.)
+    values = 100. * tf.math.log(abs(u)) * tf.sign(u)
+    # Ensure there are some large values.
+    self.assertGreater(tf.reduce_sum(tf.cast(abs(values) > 100, tf.int32)), 0)
+    strings = em.compress(tf.broadcast_to(values, (100, 100)))
+    code_lengths = tf.cast(tf.strings.length(strings, unit="BYTE"), tf.float32)
+    code_lengths *= 8 / 100
+    penalties = em.penalty(values)
+    self.assertAllInRange(penalties - code_lengths, 4, 7)
+
+  def test_penalty_is_differentiable(self):
+    em = PowerLawEntropyModel(coding_rank=1)
+    # Sample some values from a Laplacian distribution.
+    u = tf.random.uniform((100, 1), minval=-1., maxval=1.)
+    values = 100. * tf.math.log(abs(u)) * tf.sign(u)
+    with tf.GradientTape() as tape:
+      tape.watch(values)
+      penalties = em.penalty(values)
+    gradients = tape.gradient(penalties, values)
+    self.assertAllEqual(tf.sign(gradients), tf.sign(values))
+
+  def test_compression_works_in_tf_function(self):
+    samples = tf.random.stateless_normal([100], (34, 232))
+
+    # Since tf.function traces each function twice, and only allows variable
+    # creation in the first call, we need to have a stateful object in which we
+    # create the entropy model only the first time the function is called, and
+    # store it for the second time.
+
+    class Compressor:
+
+      def compress(self, values):
+        if not hasattr(self, "em"):
+          self.em = PowerLawEntropyModel(coding_rank=1)
+        compressed = self.em.compress(values)
+        return self.em.decompress(compressed, [100])
+
+    values_eager = Compressor().compress(samples)
+    values_function = tf.function(Compressor().compress)(samples)
+    self.assertAllClose(samples, values_eager, rtol=0., atol=.5)
+    self.assertAllEqual(values_eager, values_function)
+
+  def test_dtypes_are_correct_with_mixed_precision(self):
+    tf.keras.mixed_precision.set_global_policy("mixed_float16")
+    try:
+      em = PowerLawEntropyModel(coding_rank=1)
+      self.assertEqual(em.bottleneck_dtype, tf.float16)
+      x = tf.random.stateless_normal((2, 5), seed=(0, 1), dtype=tf.float16)
+      x_tilde, penalty = em(x)
+      bitstring = em.compress(x)
+      x_hat = em.decompress(bitstring, (5,))
+      self.assertEqual(x_hat.dtype, tf.float16)
+      self.assertAllClose(x, x_hat, rtol=0, atol=.5)
+      self.assertEqual(x_tilde.dtype, tf.float16)
+      self.assertAllClose(x, x_tilde, rtol=0, atol=.5)
+      self.assertEqual(penalty.dtype, tf.float16)
+      self.assertEqual(penalty.shape, (2,))
+    finally:
+      tf.keras.mixed_precision.set_global_policy(None)
+
+
+if __name__ == "__main__":
+  tf.test.main()
diff --git a/tensorflow_compression/python/entropy_models/universal.py b/tensorflow_compression/python/entropy_models/universal.py
@@ -1,3 +1,5 @@
+# Copyright 2020 Google LLC. All Rights Reserved.
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+# Copyright 2020 Google LLC. All Rights Reserved.`
	`2`	`+#`
`1`	`3`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`2`	`4`	`# you may not use this file except in compliance with the License.`
`3`	`5`	`# You may obtain a copy of the License at`