Correctly compute Categorical.entropy for large negative float32 logits.

Googler · tensorflower-gardener · commit 12d1809c43fd · 2021-06-23T08:44:15.000-07:00
PiperOrigin-RevId: 381039181
diff --git a/tensorflow_probability/python/distributions/categorical.py b/tensorflow_probability/python/distributions/categorical.py
@@ -303,31 +303,23 @@ def _entropy(self):
       return -tf.reduce_sum(
           tf.math.multiply_no_nan(tf.math.log(probs), probs),
           axis=-1)
-    # The following result can be derived as follows. Write log(p[i]) as:
-    # s[i]-m-lse(s[i]-m) where m=max(s), then you have:
-    #   sum_i exp(s[i]-m-lse(s-m)) (s[i] - m - lse(s-m))
-    #   = -m - lse(s-m) + sum_i s[i] exp(s[i]-m-lse(s-m))
-    #   = -m - lse(s-m) + (1/exp(lse(s-m))) sum_i s[i] exp(s[i]-m)
-    #   = -m - lse(s-m) + (1/sumexp(s-m)) sum_i s[i] exp(s[i]-m)
-    # Write x[i]=s[i]-m then you have:
-    #   = -m - lse(x) + (1/sum_exp(x)) sum_i s[i] exp(x[i])
-    # Negating all of this result is the Shanon (discrete) entropy.
+    # The following result can be derived as follows. Let s[i] be a logit.
+    # The entropy is:
+    #   H = -sum_i(p[i] * log(p[i]))
+    #     = -sum_i(p[i] * (s[i] - logsumexp(s))
+    #     = logsumexp(s) - sum_i(p[i] * s[i])
     logits = tf.convert_to_tensor(self._logits)
-    m = tf.reduce_max(logits, axis=-1, keepdims=True)
-    x = logits - m
-    sum_exp_x = tf.reduce_sum(tf.math.exp(x), axis=-1)
-    lse_logits = m[..., 0] + tf.math.log(sum_exp_x)
+    logits = logits - tf.reduce_max(logits, axis=-1, keepdims=True)
+    lse_logits = tf.reduce_logsumexp(logits, axis=-1)
+
     # TODO(b/161014180): Workaround to support correct gradient calculations
     # with -inf logits.
-    is_inf_logits = tf.cast(tf.math.is_inf(logits), dtype=tf.float32)
-    is_negative_logits = tf.cast(logits < 0, dtype=tf.float32)
     masked_logits = tf.where(
-        tf.cast((is_inf_logits * is_negative_logits), dtype=bool),
+        (tf.math.is_inf(logits) & (logits < 0)),
         tf.cast(1.0, dtype=logits.dtype), logits)
-
     return lse_logits - tf.reduce_sum(
-        tf.math.multiply_no_nan(masked_logits, tf.math.exp(x)),
-        axis=-1) / sum_exp_x
+        tf.math.multiply_no_nan(masked_logits, tf.math.exp(logits)),
+        axis=-1) / tf.math.exp(lse_logits)
 
   def _mode(self):
     x = self._probs if self._logits is None else self._logits
diff --git a/tensorflow_probability/python/distributions/categorical_test.py b/tensorflow_probability/python/distributions/categorical_test.py
@@ -393,6 +393,18 @@ def testEntropyWithNegInfLogits(self):
     ans = [-(0.5*np.log(0.5) + 0.5*np.log(0.5)), -(np.log(1))]
     self.assertAllClose(self.evaluate(dist_entropy), ans)
 
+  def testEntropyWithLargeNegLogits(self):
+    num_categories = 11
+    logits = np.array([
+        [-1e7] * num_categories,
+        [-1e8] * num_categories,
+        [-1e9] * num_categories], dtype=np.float32)
+    dist = tfd.Categorical(logits=logits, validate_args=True)
+    dist_entropy = dist.entropy()
+
+    ans = [np.log(num_categories)] * 3
+    self.assertAllClose(self.evaluate(dist_entropy), ans)
+
   def testSample(self):
     histograms = np.array([[[0.2, 0.8], [0.4, 0.6]]])
     dist = tfd.Categorical(tf.math.log(histograms) - 50., validate_args=True)