Fixes for DeepFactorized.

Johannes Ballé · copybara-github · commit 424ef03e92f3 · 2020-11-02T13:08:05.000-08:00
- Implements forgotten survival function.
- Fixes incorrect prob() and log_prob(): We need to broadcast the inputs before
  calling tape.gradient(), or else the return value will be a reduced sum over
  the dimensions that happened to be broadcast.
- Removes an unused attribute and makes the code a little more readable.
- Adds custom lower_tail and upper_tail methods that are better behaved.

PiperOrigin-RevId: 340299486
Change-Id: I9c9287eb3a3eaca021bb85b0930377c84a7baf5b
diff --git a/tensorflow_compression/python/distributions/BUILD b/tensorflow_compression/python/distributions/BUILD
@@ -20,7 +20,10 @@ py_library(
     name = "deep_factorized",
     srcs = ["deep_factorized.py"],
     srcs_version = "PY3",
-    deps = [":uniform_noise"],
+    deps = [
+        ":helpers",
+        ":uniform_noise",
+    ],
 )
 
 py_test(
diff --git a/tensorflow_compression/python/distributions/deep_factorized.py b/tensorflow_compression/python/distributions/deep_factorized.py
@@ -17,6 +17,7 @@
 import tensorflow.compat.v2 as tf
 import tensorflow_probability as tfp
 
+from tensorflow_compression.python.distributions import helpers
 from tensorflow_compression.python.distributions import uniform_noise
 
 
@@ -89,7 +90,6 @@ def __init__(self,
     self._batch_shape_tuple = tuple(int(s) for s in batch_shape)
     self._num_filters = tuple(int(f) for f in num_filters)
     self._init_scale = float(init_scale)
-    self._estimated_tail_mass = None
     super().__init__(
         dtype=dtype,
         reparameterization_type=tfp.distributions.NOT_REPARAMETERIZED,
@@ -130,10 +130,8 @@ def matrix_initializer(i=i):
       self._matrices.append(matrix)
 
       def bias_initializer(i=i):
-        return tf.random.uniform((channels, filters[i + 1], 1),
-                                 -.5,
-                                 .5,
-                                 dtype=self.dtype)
+        return tf.random.uniform(
+            (channels, filters[i + 1], 1), -.5, .5, dtype=self.dtype)
 
       bias = tf.Variable(bias_initializer, name="bias_{}".format(i))
       self._biases.append(bias)
@@ -158,6 +156,11 @@ def _event_shape_tensor(self):
   def _event_shape(self):
     return tf.TensorShape(())
 
+  def _broadcast_inputs(self, inputs):
+    shape = tf.broadcast_dynamic_shape(
+        tf.shape(inputs), self.batch_shape_tensor())
+    return tf.broadcast_to(inputs, shape)
+
   def _logits_cumulative(self, inputs):
     """Evaluate logits of the cumulative densities.
 
@@ -170,9 +173,6 @@ def _logits_cumulative(self, inputs):
     """
     # Convert to (channels, 1, batch) format by collapsing dimensions and then
     # commuting channels to front.
-    inputs = tf.broadcast_to(
-        inputs,
-        tf.broadcast_dynamic_shape(tf.shape(inputs), self.batch_shape_tensor()))
     shape = tf.shape(inputs)
     inputs = tf.reshape(inputs, (-1, 1, self.batch_shape.num_elements()))
     inputs = tf.transpose(inputs, (2, 1, 0))
@@ -191,35 +191,46 @@ def _logits_cumulative(self, inputs):
     return logits
 
   def _log_cdf(self, inputs):
+    inputs = self._broadcast_inputs(inputs)
     logits = self._logits_cumulative(inputs)
     return tf.math.log_sigmoid(logits)
 
   def _log_survival_function(self, inputs):
+    inputs = self._broadcast_inputs(inputs)
     logits = self._logits_cumulative(inputs)
     # 1-sigmoid(x) = sigmoid(-x)
     return tf.math.log_sigmoid(-logits)
 
   def _cdf(self, inputs):
+    inputs = self._broadcast_inputs(inputs)
     logits = self._logits_cumulative(inputs)
     return tf.math.sigmoid(logits)
 
+  def _survival_function(self, inputs):
+    inputs = self._broadcast_inputs(inputs)
+    logits = self._logits_cumulative(inputs)
+    # 1-sigmoid(x) = sigmoid(-x)
+    return tf.math.sigmoid(-logits)
+
   def _prob(self, inputs):
-    with tf.GradientTape() as tape:
+    inputs = self._broadcast_inputs(inputs)
+    with tf.GradientTape(watch_accessed_variables=False) as tape:
       tape.watch(inputs)
       cdf = self._cdf(inputs)
     prob = tape.gradient(cdf, inputs)
     return prob
 
   def _log_prob(self, inputs):
-    # let x=inputs and s(x)=sigmoid(x).
-    with tf.GradientTape() as tape:
+    inputs = self._broadcast_inputs(inputs)
+    with tf.GradientTape(watch_accessed_variables=False) as tape:
       tape.watch(inputs)
       logits = self._logits_cumulative(inputs)
-    # We have F(x) = s(logits(x))
+    # Let x=inputs and s(x)=sigmoid(x).
+    # We have F(x) = s(logits(x)),
     # so p(x) = F'(x)
     #         = s'(logits(x)) * logits'(x)
     #         = s(logits(x))*s(-logits(x)) * logits'(x)
-    # so log p(x) = log(s(logits(x)) + log(s(-logits(x)) + log(logits'(x))
+    # so log p(x) = log(s(logits(x)) + log(s(-logits(x)) + log(logits'(x)).
     log_s_logits = tf.math.log_sigmoid(logits)
     log_s_neg_logits = tf.math.log_sigmoid(-logits)
     dlogits = tape.gradient(logits, inputs)
@@ -228,6 +239,16 @@ def _log_prob(self, inputs):
   def _quantization_offset(self):
     return tf.constant(0, dtype=self.dtype)
 
+  def _lower_tail(self, tail_mass):
+    logits = tf.math.log(tail_mass / 2 / (1. - tail_mass / 2))
+    return helpers.estimate_tails(
+        self._logits_cumulative, logits, self.batch_shape_tensor(), self.dtype)
+
+  def _upper_tail(self, tail_mass):
+    logits = -tf.math.log(tail_mass / 2 / (1. - tail_mass / 2))
+    return helpers.estimate_tails(
+        self._logits_cumulative, logits, self.batch_shape_tensor(), self.dtype)
+
 
 class NoisyDeepFactorized(uniform_noise.UniformNoiseAdapter):
   """DeepFactorized that is convolved with uniform noise."""
diff --git a/tensorflow_compression/python/distributions/deep_factorized_test.py b/tensorflow_compression/python/distributions/deep_factorized_test.py
@@ -14,14 +14,15 @@
 # ==============================================================================
 """Tests of deep factorized distribution."""
 
+from absl.testing import parameterized
 import tensorflow.compat.v2 as tf
 import tensorflow_probability as tfp
 
 from tensorflow_compression.python.distributions import deep_factorized
 from tensorflow_compression.python.distributions import helpers
 
 
-class DeepFactorizedTest(tf.test.TestCase):
+class DeepFactorizedTest(tf.test.TestCase, parameterized.TestCase):
 
   def test_can_instantiate_scalar(self):
     df = deep_factorized.DeepFactorized()
@@ -37,56 +38,31 @@ def test_can_instantiate_batched(self):
     self.assertEqual(df.num_filters, (3, 3))
     self.assertEqual(df.init_scale, 10)
 
-  def test_logistic_is_special_case_prob(self):
+  @parameterized.parameters(
+      "prob", "log_prob",
+      "cdf", "log_cdf",
+      "survival_function", "log_survival_function",
+  )
+  def test_logistic_is_special_case(self, method):
     # With no hidden units, the density should collapse to a logistic
     # distribution.
     df = deep_factorized.DeepFactorized(num_filters=(), init_scale=1)
     logistic = tfp.distributions.Logistic(loc=-df._biases[0][0, 0], scale=1.)
     x = tf.linspace(-5., 5., 20)
-    prob_df = df.prob(x)
-    prob_logistic = logistic.prob(x)
-    self.assertAllClose(prob_df, prob_logistic)
-
-  def test_logistic_is_special_case_cdf(self):
-    # With no hidden units, the density should collapse to a logistic
-    # distribution.
-    df = deep_factorized.DeepFactorized(num_filters=(), init_scale=1)
-    logistic = tfp.distributions.Logistic(loc=-df._biases[0][0, 0], scale=1.)
-    x = tf.linspace(-5., 5., 20)
-    cdf_df = df.cdf(x)
-    cdf_logistic = logistic.cdf(x)
-    self.assertAllClose(cdf_df, cdf_logistic)
-
-  def test_logistic_is_special_case_log_prob(self):
-    # With no hidden units, the density should collapse to a logistic
-    # distribution.
-    df = deep_factorized.DeepFactorized(num_filters=(), init_scale=1)
-    logistic = tfp.distributions.Logistic(loc=-df._biases[0][0, 0], scale=1.)
-    x = tf.linspace(-5000., 5000., 1000)
-    log_prob_df = df.log_prob(x)
-    log_prob_logistic = logistic.log_prob(x)
-    self.assertAllClose(log_prob_df, log_prob_logistic)
-
-  def test_logistic_is_special_case_log_cdf(self):
-    # With no hidden units, the density should collapse to a logistic
-    # distribution.
-    df = deep_factorized.DeepFactorized(num_filters=(), init_scale=1)
-    logistic = tfp.distributions.Logistic(loc=-df._biases[0][0, 0], scale=1.)
-    x = tf.linspace(-5000., 5000., 1000)
-    log_cdf_df = df.log_cdf(x)
-    log_cdf_logistic = logistic.log_cdf(x)
-    self.assertAllClose(log_cdf_df, log_cdf_logistic)
-
-  def test_logistic_is_special_case_log_survival_function(self):
-    # With no hidden units, the density should collapse to a logistic
-    # distribution.
-    df = deep_factorized.DeepFactorized(num_filters=(), init_scale=1)
-    logistic = tfp.distributions.Logistic(loc=-df._biases[0][0, 0], scale=1.)
-    x = tf.linspace(-5000., 5000., 1000)
-    log_survival_function_df = df.log_survival_function(x)
-    log_survival_function_logistic = logistic.log_survival_function(x)
-    self.assertAllClose(log_survival_function_df,
-                        log_survival_function_logistic)
+    val_df = getattr(df, method)(x)
+    val_logistic = getattr(logistic, method)(x)
+    self.assertAllClose(val_df, val_logistic)
+
+  @parameterized.parameters(
+      "prob", "log_prob",
+      "cdf", "log_cdf",
+      "survival_function", "log_survival_function",
+  )
+  def test_broadcasts_correctly(self, method):
+    df = deep_factorized.DeepFactorized(batch_shape=(2, 3))
+    x = tf.reshape(tf.linspace(-5., 5., 20), (4, 5, 1, 1))
+    val = getattr(df, method)(x)
+    self.assertEqual(val.shape, (4, 5, 2, 3))
 
 
 class NoisyDeepFactorizedTest(tf.test.TestCase):
@@ -140,13 +116,11 @@ def test_quantization_offset_is_zero(self):
     df = deep_factorized.NoisyDeepFactorized()
     self.assertEqual(helpers.quantization_offset(df), 0)
 
-  def test_tails_and_offset_are_in_order(self):
+  def test_tails_are_in_order(self):
     df = deep_factorized.NoisyDeepFactorized()
-    offset = helpers.quantization_offset(df)
     lower_tail = helpers.lower_tail(df, 2**-8)
     upper_tail = helpers.upper_tail(df, 2**-8)
-    self.assertGreater(upper_tail, offset)
-    self.assertGreater(offset, lower_tail)
+    self.assertGreater(upper_tail, lower_tail)
 
   def test_stats_throw_error(self):
     df = deep_factorized.NoisyDeepFactorized()