Add an is_missing argument to GaussianProcess.log_prob.

jburnim · tensorflower-gardener · commit 1d651d67264d · 2022-02-09T09:13:58.000-08:00
When `is_missing` is passed, `GaussianProcess.log_prob` returns the log-probability of the marginal distribution where each event dimension for which `is_missing` is `True` is marginalized out.

PiperOrigin-RevId: 427487729
diff --git a/tensorflow_probability/python/distributions/BUILD b/tensorflow_probability/python/distributions/BUILD
@@ -724,6 +724,7 @@ multi_substrate_py_library(
         ":kullback_leibler",
         ":mvn_linear_operator",
         ":normal",
+        # numpy dep,
         # tensorflow dep,
         "//tensorflow_probability/python/bijectors:identity",
         "//tensorflow_probability/python/internal:distribution_util",
@@ -732,6 +733,7 @@ multi_substrate_py_library(
         "//tensorflow_probability/python/internal:reparameterization",
         "//tensorflow_probability/python/internal:tensor_util",
         "//tensorflow_probability/python/internal:tensorshape_util",
+        "//tensorflow_probability/python/math/psd_kernels/internal:util",
     ],
 )
 
diff --git a/tensorflow_probability/python/distributions/gaussian_process.py b/tensorflow_probability/python/distributions/gaussian_process.py
@@ -17,6 +17,7 @@
 import warnings
 
 # Dependency imports
+import numpy as np
 import tensorflow.compat.v2 as tf
 
 from tensorflow_probability.python.bijectors import identity as identity_bijector
@@ -33,6 +34,7 @@
 from tensorflow_probability.python.internal import reparameterization
 from tensorflow_probability.python.internal import tensor_util
 from tensorflow_probability.python.internal import tensorshape_util
+from tensorflow_probability.python.math.psd_kernels.internal import util as psd_kernels_util
 from tensorflow.python.util import deprecation  # pylint: disable=g-direct-tensorflow-import
 
 __all__ = [
@@ -425,30 +427,44 @@ def get_marginal_distribution(self, index_points=None):
         points, respectively.
     """
     with self._name_and_control_scope('get_marginal_distribution'):
-      # TODO(cgs): consider caching the result here, keyed on `index_points`.
-      index_points = self._get_index_points(index_points)
-      covariance = self._compute_covariance(index_points)
-      loc = self._mean_fn(index_points)
-      # If we're sure the number of index points is 1, we can just construct a
-      # scalar Normal. This has computational benefits and supports things like
-      # CDF that aren't otherwise straightforward to provide.
-      if self._is_univariate_marginal(index_points):
-        scale = tf.sqrt(covariance)
-        # `loc` has a trailing 1 in the shape; squeeze it.
-        loc = tf.squeeze(loc, axis=-1)
-        return normal.Normal(
-            loc=loc,
-            scale=scale,
-            validate_args=self._validate_args,
-            allow_nan_stats=self._allow_nan_stats,
-            name='marginal_distribution')
+      return self._get_marginal_distribution(index_points=index_points)
+
+  def _get_marginal_distribution(self, index_points=None, is_missing=None):
+    # TODO(cgs): consider caching the result here, keyed on `index_points`.
+    index_points = self._get_index_points(index_points)
+    covariance = self._compute_covariance(index_points)
+    is_univariate_marginal = self._is_univariate_marginal(index_points)
+
+    loc = self._mean_fn(index_points)
+    if is_univariate_marginal:
+      # `loc` has a trailing 1 in the shape; squeeze it.
+      loc = tf.squeeze(loc, axis=-1)
+
+    if is_missing is not None:
+      loc = tf.where(is_missing, 0., loc)
+      if is_univariate_marginal:
+        covariance = tf.where(is_missing, 1., covariance)
       else:
-        return self._marginal_fn(
-            loc=loc,
-            covariance=covariance,
-            validate_args=self._validate_args,
-            allow_nan_stats=self._allow_nan_stats,
-            name='marginal_distribution')
+        covariance = psd_kernels_util.mask_matrix(covariance, ~is_missing)  # pylint:disable=invalid-unary-operand-type
+
+    # If we're sure the number of index points is 1, we can just construct a
+    # scalar Normal. This has computational benefits and supports things like
+    # CDF that aren't otherwise straightforward to provide.
+    if is_univariate_marginal:
+      scale = tf.sqrt(covariance)
+      return normal.Normal(
+          loc=loc,
+          scale=scale,
+          validate_args=self._validate_args,
+          allow_nan_stats=self._allow_nan_stats,
+          name='marginal_distribution')
+    else:
+      return self._marginal_fn(
+          loc=loc,
+          covariance=covariance,
+          validate_args=self._validate_args,
+          allow_nan_stats=self._allow_nan_stats,
+          name='marginal_distribution')
 
   @property
   def mean_fn(self):
@@ -524,8 +540,47 @@ def _get_index_points(self, index_points=None):
     return tf.convert_to_tensor(
         index_points if index_points is not None else  self._index_points)
 
-  def _log_prob(self, value, index_points=None):
-    return self.get_marginal_distribution(index_points).log_prob(value)
+  @distribution_util.AppendDocstring(kwargs_dict={
+      'index_points':
+          'optional `float` `Tensor` representing a finite (batch of) of '
+          'points in the index set over which this GP is defined.  The shape '
+          'has the form `[b1, ..., bB, e, f1, ..., fF]` where `F` is the '
+          'number of feature dimensions and must equal '
+          '`self.kernel.feature_ndims` and `e` is the number of index points '
+          'in each batch.  Ultimately, this distribution  corresponds to an '
+          '`e`-dimensional multivariate normal. The batch shape must be '
+          'broadcastable with `kernel.batch_shape` and any batch dims yielded '
+          'by `mean_fn`.  If not specified, `self.index_points` is used.  '
+          'Default value: `None`.',
+      'is_missing':
+          'optional `bool` `Tensor` of shape `[..., e]`, where `e` is the '
+          'number of index points in each batch.  Represents a batch of '
+          'Boolean masks.  When `is_missing` is not `None`, the returned '
+          'log-prob is for the *marginal* distribution, in which all '
+          'dimensions for which `is_missing` is `True` have been marginalized '
+          'out.  The batch dimensions of `is_missing` must broadcast with the '
+          'sample and batch dimensions of `value` and of this `Distribution`. '
+          'Default value: `None`.'
+  })
+  def _log_prob(self, value, index_points=None, is_missing=None):
+    if is_missing is not None:
+      is_missing = tf.convert_to_tensor(is_missing)
+    index_points = self._get_index_points(index_points)
+    mvn = self._get_marginal_distribution(index_points, is_missing=is_missing)
+    if is_missing is None:
+      return mvn.log_prob(value)
+
+    # Subtract out the Normal distribution's log normalizer for each dimension
+    # that is masked out.
+    lp = mvn.log_prob(tf.where(is_missing, 0., value))
+    num_masked_dims = tf.cast(is_missing, mvn.dtype)
+    if not self._is_univariate_marginal(index_points):
+      event_shape = self._event_shape_tensor(index_points=index_points)
+      num_masked_dims = tf.reduce_sum(
+          num_masked_dims * tf.ones(event_shape, dtype=mvn.dtype),
+          axis=-1)
+    correction = num_masked_dims * -0.5 * np.log(2. * np.pi)
+    return lp - correction
 
   def _event_shape_tensor(self, index_points=None):
     index_points = self._get_index_points(index_points)
diff --git a/tensorflow_probability/python/distributions/gaussian_process_test.py b/tensorflow_probability/python/distributions/gaussian_process_test.py
@@ -330,6 +330,77 @@ def testGPPosteriorPredictive(self):
         self.evaluate(expected_gprm.log_prob(samples)),
         self.evaluate(actual_gprm.log_prob(samples)))
 
+  def testLogProbWithIsMissing(self):
+    index_points = tf.Variable(
+        [[-1.0, 0.0], [-0.5, -0.5], [1.5, 0.0], [1.6, 1.5]],
+        shape=None if self.is_static else tf.TensorShape(None))
+    self.evaluate(index_points.initializer)
+    amplitude = tf.convert_to_tensor(1.1)
+    length_scale = tf.convert_to_tensor(0.9)
+
+    gp = tfd.GaussianProcess(
+        kernel=psd_kernels.ExponentiatedQuadratic(
+            amplitude, length_scale),
+        index_points=index_points,
+        mean_fn=lambda x: tf.reduce_mean(x, axis=-1),
+        observation_noise_variance=.05,
+        jitter=0.0)
+
+    x = gp.sample(5, seed=test_util.test_seed())
+
+    is_missing = np.array([
+        [False, True, False, False],
+        [False, False, False, False],
+        [True, False, True, True],
+        [True, False, False, True],
+        [False, False, True, True],
+    ])
+
+    lp = gp.log_prob(tf.where(is_missing, np.nan, x), is_missing=is_missing)
+
+    # For each batch member, check that the log_prob is the same as for a
+    # GaussianProcess without the missing index points.
+    for i in range(5):
+      gp_i = tfd.GaussianProcess(
+          kernel=psd_kernels.ExponentiatedQuadratic(
+              amplitude, length_scale),
+          index_points=tf.gather(index_points, (~is_missing[i]).nonzero()[0]),
+          mean_fn=lambda x: tf.reduce_mean(x, axis=-1),
+          observation_noise_variance=.05,
+          jitter=0.0)
+      lp_i = gp_i.log_prob(tf.gather(x[i], (~is_missing[i]).nonzero()[0]))
+      # NOTE: This reshape is necessary because lp_i has shape [1] when
+      # gp_i.index_points contains a single index point.
+      self.assertAllClose(tf.reshape(lp_i, []), lp[i])
+
+    # The log_prob should be zero when all points are missing out.
+    self.assertAllClose(tf.zeros((3, 2)),
+                        gp.log_prob(tf.ones((3, 1, 4)) * np.nan,
+                                    is_missing=tf.constant(True, shape=(2, 4))))
+
+  def testUnivariateLogProbWithIsMissing(self):
+    index_points = tf.convert_to_tensor([[[0.0, 0.0]], [[0.5, 1.0]]])
+    amplitude = tf.convert_to_tensor(1.1)
+    length_scale = tf.convert_to_tensor(0.9)
+
+    gp = tfd.GaussianProcess(
+        kernel=psd_kernels.ExponentiatedQuadratic(
+            amplitude, length_scale),
+        index_points=index_points,
+        mean_fn=lambda x: tf.reduce_mean(x, axis=-1),
+        observation_noise_variance=.05,
+        jitter=0.0)
+
+    x = gp.sample(3, seed=test_util.test_seed())
+    lp = gp.log_prob(x)
+
+    self.assertAllClose(lp, gp.log_prob(x, is_missing=[False, False]))
+    self.assertAllClose(tf.convert_to_tensor([np.zeros((3, 2)), lp]),
+                        gp.log_prob(x, is_missing=[[[True]], [[False]]]))
+    self.assertAllClose(
+        tf.convert_to_tensor([[lp[0, 0], 0.0], [0.0, 0.0], [0., lp[2, 1]]]),
+        gp.log_prob(x, is_missing=[[False, True], [True, True], [True, False]]))
+
 
 @test_util.test_all_tf_execution_regimes
 class GaussianProcessStaticTest(_GaussianProcessTest, test_util.TestCase):
diff --git a/tensorflow_probability/python/math/psd_kernels/BUILD b/tensorflow_probability/python/math/psd_kernels/BUILD
@@ -281,8 +281,8 @@ multi_substrate_py_library(
         "//tensorflow_probability/python/distributions:cholesky_util",
         "//tensorflow_probability/python/internal:dtype_util",
         "//tensorflow_probability/python/internal:parameter_properties",
-        "//tensorflow_probability/python/internal:prefer_static",
         "//tensorflow_probability/python/internal:tensorshape_util",
+        "//tensorflow_probability/python/math/psd_kernels/internal:util",
     ],
 )
 
diff --git a/tensorflow_probability/python/math/psd_kernels/internal/util.py b/tensorflow_probability/python/math/psd_kernels/internal/util.py
@@ -22,6 +22,7 @@
 from tensorflow_probability.python.internal import tensorshape_util
 
 __all__ = [
+    'mask_matrix',
     'maybe_get_common_dtype',
     'pad_shape_with_ones',
     'pairwise_square_distance_matrix',
@@ -284,3 +285,29 @@ def pairwise_square_distance_tensor(
   # Now we need to undo the transformation.
   return tf.reshape(pairwise, tf.concat([
       tf.shape(pairwise)[:-2], x1_example_shape, x2_example_shape], axis=0))
+
+
+def mask_matrix(x, mask=None):
+  """Copies a matrix, replacing masked-out rows/cols from the identity matrix.
+
+  Args:
+    x: A Tensor of shape `[..., n, n]`, representing a batch of n-by-n matrices.
+    mask: A boolean Tensor of shape `[..., n]`, representing a batch of masks.
+      If `mask` is None, `x` is returned.
+  Returns:
+    A Tensor of shape `[..., n, n]`, representing a batch of n-by-n matrices.
+    For each batch member `r`, element `r[i, j]` equals `eye(n)[i, j]` if
+    dimension `i` or `j` is False in the corresponding input mask.  Otherwise,
+    `r[i, j]` equals the corresponding element from `x`.
+  """
+  if mask is None:
+    return x
+
+  x = tf.convert_to_tensor(x)
+  mask = tf.convert_to_tensor(mask, dtype=tf.bool)
+
+  n = ps.dimension_size(x, -1)
+
+  return tf.where(~mask[..., tf.newaxis] | ~mask[..., tf.newaxis, :],
+                  tf.eye(n, dtype=x.dtype),
+                  x)
diff --git a/tensorflow_probability/python/math/psd_kernels/schur_complement.py b/tensorflow_probability/python/math/psd_kernels/schur_complement.py
@@ -18,7 +18,6 @@
 from tensorflow_probability.python.internal import distribution_util
 from tensorflow_probability.python.internal import dtype_util
 from tensorflow_probability.python.internal import parameter_properties
-from tensorflow_probability.python.internal import prefer_static as ps
 from tensorflow_probability.python.internal import tensor_util
 from tensorflow_probability.python.math.psd_kernels import positive_semidefinite_kernel as psd_kernel
 from tensorflow_probability.python.math.psd_kernels.internal import util
@@ -58,32 +57,6 @@ def _compute_divisor_matrix(
   return divisor_matrix
 
 
-def _mask_matrix(x, mask=None):
-  """Copies a matrix, replacing masked-out rows/cols from the identity matrix.
-
-  Args:
-    x: A Tensor of shape `[..., n, n]`, representing a batch of n-by-n matrices.
-    mask: A boolean Tensor of shape `[..., n]`, representing a batch of masks.
-      If `mask` is None, `x` is returned.
-  Returns:
-    A Tensor of shape `[..., n, n]`, representing a batch of n-by-n matrices.
-    For each batch member `r`, element `r[i, j]` equals `eye(n)[i, j]` if
-    dimension `i` or `j` is False in the corresponding input mask.  Otherwise,
-    `r[i, j]` equals the corresponding element from `x`.
-  """
-  if mask is None:
-    return x
-
-  x = tf.convert_to_tensor(x)
-  mask = tf.convert_to_tensor(mask, dtype=tf.bool)
-
-  n = ps.dimension_size(x, -1)
-
-  return tf.where(~mask[..., tf.newaxis] | ~mask[..., tf.newaxis, :],
-                  tf.eye(n, dtype=x.dtype),
-                  x)
-
-
 class SchurComplement(psd_kernel.AutoCompositeTensorPsdKernel):
   """The SchurComplement kernel.
 
@@ -363,7 +336,7 @@ def with_precomputed_divisor(
 
     # TODO(b/196219597): Add a check to ensure that we have a `base_kernel`
     # that is explicitly concretized.
-    divisor_matrix_cholesky = cholesky_fn(_mask_matrix(
+    divisor_matrix_cholesky = cholesky_fn(util.mask_matrix(
         _compute_divisor_matrix(base_kernel,
                                 diag_shift=diag_shift,
                                 fixed_inputs=fixed_inputs),
@@ -529,7 +502,7 @@ def _divisor_matrix(self, fixed_inputs=None, fixed_inputs_mask=None):
     # NOTE: Replacing masked-out rows/columns of the divisor matrix with
     # rows/columns from the identity matrix is equivalent to using a divisor
     # matrix in which those rows and columns have been dropped.
-    return _mask_matrix(
+    return util.mask_matrix(
         _compute_divisor_matrix(self._base_kernel,
                                 diag_shift=self._diag_shift,
                                 fixed_inputs=fixed_inputs),

Original file line number	Diff line number	Diff line change
`@@ -281,8 +281,8 @@ multi_substrate_py_library(`
`281`	`281`	`"//tensorflow_probability/python/distributions:cholesky_util",`
`282`	`282`	`"//tensorflow_probability/python/internal:dtype_util",`
`283`	`283`	`"//tensorflow_probability/python/internal:parameter_properties",`
`284`		`- "//tensorflow_probability/python/internal:prefer_static",`
`285`	`284`	`"//tensorflow_probability/python/internal:tensorshape_util",`
	`285`	`+ "//tensorflow_probability/python/math/psd_kernels/internal:util",`
`286`	`286`	`],`
`287`	`287`	`)`
`288`	`288`