Removes numerical tweaks for soft_round.

Johannes Ballé · copybara-github · commit e0bc5c576896 · 2020-11-06T08:25:51.000-08:00
It seems they are mostly not necessary, except for one in soft_round_inverse. I
changed this one to clip r instead of s, since it doesn't require picking
constants (r is between -.5 and .5 by definition; we can simply enforce that).

PiperOrigin-RevId: 341053012
Change-Id: I8ec84b2eb5fa8e2c8d6292cab8ac69878fc0746b
diff --git a/tensorflow_compression/python/ops/soft_round_ops.py b/tensorflow_compression/python/ops/soft_round_ops.py
@@ -20,7 +20,7 @@
 __all__ = ["soft_round", "soft_round_inverse", "soft_round_conditional_mean"]
 
 
-def soft_round(x, alpha, eps=1e-12):
+def soft_round(x, alpha, eps=1e-3):
   """Differentiable approximation to round().
 
   Larger alphas correspond to closer approximations of the round function.
@@ -39,28 +39,19 @@ def soft_round(x, alpha, eps=1e-12):
   Returns:
     tf.Tensor
   """
-
   if isinstance(alpha, (float, int)) and alpha < eps:
     return tf.identity(x, name="soft_round")
 
-  m = tf.floor(x) + 0.5
+  m = tf.floor(x) + .5
   r = x - m
-  z = tf.maximum(tf.tanh(alpha / 2.0) * 2.0, eps)
+  z = tf.tanh(alpha / 2.) * 2.
   y = m + tf.tanh(alpha * r) / z
 
   # For very low alphas, soft_round behaves like identity
   return tf.where(alpha < eps, x, y, name="soft_round")
 
 
-@tf.custom_gradient
-def _clip_st(s):
-  """Clip s to [-1 + 1e-7, 1 - 1e-7] with straight-through gradients."""
-  s = tf.clip_by_value(s, -1 + 1e-7, 1 - 1e-7)
-  grad = lambda x: x
-  return s, grad
-
-
-def soft_round_inverse(y, alpha, eps=1e-12):
+def soft_round_inverse(y, alpha, eps=1e-3):
   """Inverse of soft_round().
 
   This is described in Sec. 4.1. in the paper
@@ -77,21 +68,19 @@ def soft_round_inverse(y, alpha, eps=1e-12):
   Returns:
     tf.Tensor
   """
-
   if isinstance(alpha, (float, int)) and alpha < eps:
     return tf.identity(y, name="soft_round_inverse")
 
-  m = tf.floor(y) + 0.5
-  s = (y - m) * (tf.tanh(alpha / 2.0) * 2.0)
-  # We have -0.5 <= (y-m) <= 0.5 and -1 < tanh < 1, so
-  # -1 <= s <= 1. However tf.atanh is only stable for inputs
-  # in the range [-1+1e-7, 1-1e-7], so we (safely) clip s to this range.
-  # In the rare case where `1-|s| < 1e-7`, we use straight-through for the
-  # gradient.
-  s = _clip_st(s)
-  r = tf.atanh(s) / tf.maximum(alpha, eps)
+  m = tf.floor(y) + .5
+  s = (y - m) * (tf.tanh(alpha / 2.) * 2.)
+  r = tf.atanh(s) / alpha
+  # `r` must be between -.5 and .5 by definition. In case atanh becomes +-inf
+  # due to numerical instability, this prevents the forward pass from yielding
+  # infinite values. Note that it doesn't prevent the backward pass from
+  # returning non-finite values.
+  r = tf.clip_by_value(r, -.5, .5)
 
-  # For very low alphas, soft_round behaves like identity
+  # For very low alphas, soft_round behaves like identity.
   return tf.where(alpha < eps, y, m + r, name="soft_round_inverse")
 
 
@@ -107,12 +96,11 @@ def soft_round_conditional_mean(inputs, alpha):
   > Eirikur Agustsson & Lucas Theis<br />
   > https://arxiv.org/abs/2006.09952
 
-
   Args:
     inputs: The input tensor.
     alpha: The softround alpha.
 
   Returns:
     The conditional mean, of same shape as `inputs`.
   """
-  return soft_round_inverse(inputs - 0.5, alpha) + 0.5
+  return soft_round_inverse(inputs - .5, alpha) + .5
diff --git a/tensorflow_compression/python/ops/soft_round_ops_test.py b/tensorflow_compression/python/ops/soft_round_ops_test.py
@@ -14,12 +14,13 @@
 # ==============================================================================
 """Tests for soft round."""
 
+from absl.testing import parameterized
 import tensorflow as tf
 
 from tensorflow_compression.python.ops import soft_round_ops
 
 
-class SoftRoundTest(tf.test.TestCase):
+class SoftRoundTest(tf.test.TestCase, parameterized.TestCase):
 
   def test_soft_round_small_alpha_is_identity(self):
     x = tf.linspace(-2., 2., 50)
@@ -58,5 +59,32 @@ def test_conditional_mean_large_alpha_is_round(self):
       y = soft_round_ops.soft_round_conditional_mean(x, alpha=5000.0)
       self.assertAllClose(tf.math.round(x), y, atol=0.001)
 
+  @parameterized.parameters(0., 1e-6, 1e-2, 5., 1e6)
+  def test_soft_round_values_and_gradients_are_finite(self, alpha):
+    x = tf.linspace(0., 1., 11)  # covers exact integers and half-integers
+    with tf.GradientTape() as tape:
+      tape.watch(x)
+      y = soft_round_ops.soft_round(x, alpha=alpha)
+    dy = tape.gradient(y, x)
+    self.assertAllEqual(tf.math.is_finite(y), tf.ones(x.shape, dtype=bool))
+    self.assertAllEqual(tf.math.is_finite(dy), tf.ones(x.shape, dtype=bool))
+
+  @parameterized.parameters(0., 1e-6, 1e-2, 5., 1e6)
+  def test_soft_round_inverse_values_and_gradients_are_finite(self, alpha):
+    x = tf.linspace(-.5, .5, 11)  # covers exact integers and half-integers
+    with tf.GradientTape() as tape:
+      tape.watch(x)
+      y = soft_round_ops.soft_round_inverse(x, alpha=alpha)
+    dy = tape.gradient(y, x)
+    self.assertAllEqual(tf.math.is_finite(y), tf.ones(x.shape, dtype=bool))
+    if alpha > 15:
+      # We allow non-finite values for large alphas, since the function simply
+      # is extremely steep there.
+      expected_finite = tf.one_hot(5, 11, False, True)
+    else:
+      expected_finite = tf.ones(x.shape, dtype=bool)
+    self.assertAllEqual(tf.math.is_finite(dy), expected_finite)
+
+
 if __name__ == "__main__":
   tf.test.main()