Add epsilon to maximize_within_unit_norm to avoid division by 0.

csferng · tensorflow-copybara · commit c1fc41808e43 · 2020-02-11T15:33:19.000-08:00
PiperOrigin-RevId: 294535768
diff --git a/neural_structured_learning/lib/BUILD b/neural_structured_learning/lib/BUILD
@@ -137,6 +137,7 @@ py_test(
     srcs_version = "PY2AND3",
     deps = [
         ":utils",
+        # package absl/testing:parameterized
         "//neural_structured_learning/configs",
         # package numpy
         # package tensorflow
diff --git a/neural_structured_learning/lib/utils.py b/neural_structured_learning/lib/utils.py
@@ -59,18 +59,20 @@ def normalize(tensor, norm_type, epsilon=1e-6):
     norm = tf.maximum(norm, epsilon)
     normalized_tensor = tensor / norm
   elif norm_type == configs.NormType.L2:
-    normalized_tensor = tf.nn.l2_normalize(tensor, axis=target_axes)
+    normalized_tensor = tf.nn.l2_normalize(
+        tensor, axis=target_axes, epsilon=epsilon**2)
   else:
     raise NotImplementedError('Unrecognized or unimplemented "norm_type": %s' %
                               norm_type)
   return normalized_tensor
 
 
 def _expand_to_rank(vector, rank):
+  """Expands a batched scalar to a tensor of certain rank."""
   return tf.reshape(vector, shape=[-1] + [1] * (rank - 1))
 
 
-def maximize_within_unit_norm(weights, norm_type):
+def maximize_within_unit_norm(weights, norm_type, epsilon=1e-6):
   """Solves the maximization problem weights^T*x with the constraint norm(x)=1.
 
   This op solves a batch of maximization problems at one time. The first axis of
@@ -91,6 +93,7 @@ def maximize_within_unit_norm(weights, norm_type):
       size).
     norm_type: One of `nsl.configs.NormType`, the type of the norm in the
       constraint.
+    epsilon: A lower bound value for the norm to avoid division by 0.
 
   Returns:
     A `Tensor` or a collection of `Tensor` objects (with the same structure and
@@ -122,7 +125,7 @@ def reduce_across_tensors(reduce_fn, input_tensors):
   if norm_type == configs.NormType.L2:
     squared_norm = reduce_across_tensors(tf.reduce_sum,
                                          [tf.square(t) for t in tensors])
-    inv_global_norm = tf.math.rsqrt(squared_norm)
+    inv_global_norm = tf.math.rsqrt(tf.maximum(squared_norm, epsilon**2))
     normalized_tensors = [
         tensor * _expand_to_rank(inv_global_norm, rank)
         for tensor, rank in zip(tensors, tensor_ranks)
@@ -141,8 +144,9 @@ def reduce_across_tensors(reduce_fn, input_tensors):
         for t, rank in zip(abs_tensors, tensor_ranks)
     ]
     num_nonzero = reduce_across_tensors(tf.reduce_sum, is_max_elem)
+    denominator = tf.maximum(num_nonzero, epsilon)
     mask = [
-        is_max * tf.sign(t) / _expand_to_rank(num_nonzero, rank)
+        is_max * tf.sign(t) / _expand_to_rank(denominator, rank)
         for t, rank, is_max in zip(tensors, tensor_ranks, is_max_elem)
     ]
     return tf.nest.pack_sequence_as(weights, mask)
diff --git a/neural_structured_learning/lib/utils_test.py b/neural_structured_learning/lib/utils_test.py
@@ -19,13 +19,14 @@
 
 import math
 
+from absl.testing import parameterized
 import neural_structured_learning.configs as configs
 from neural_structured_learning.lib import utils
 import numpy as np
 import tensorflow as tf
 
 
-class UtilsTest(tf.test.TestCase):
+class UtilsTest(tf.test.TestCase, parameterized.TestCase):
 
   def testNormalizeInf(self):
     target_tensor = tf.constant([[1.0, 2.0, -4.0], [-1.0, 5.0, -3.0]])
@@ -109,6 +110,12 @@ def testMaximizeWithinUnitNormWithMultipleInputs(self):
     }
     self.assertAllClose(actual, expected)
 
+  @parameterized.parameters('l2', 'l1', 'infinity')
+  def testMaximizeWithinUnitNormL2WithZeroInputShouldReturnZero(self, norm):
+    weights = tf.constant([[0.0, 0.0]])
+    actual = self.evaluate(utils.maximize_within_unit_norm(weights, norm))
+    self.assertAllEqual(actual, weights)
+
   def testReplicateEmbeddingsWithConstant(self):
     """Test the replicate_embeddings function with constant replicate_times."""
     input_embeddings = tf.constant([