Add tfp.math.psd_kernels.SpectralMixture.

srvasude · tensorflower-gardener · commit 273eba3166aa · 2022-02-09T00:15:30.000-08:00
PiperOrigin-RevId: 427396648
diff --git a/tensorflow_probability/python/math/psd_kernels/BUILD b/tensorflow_probability/python/math/psd_kernels/BUILD
@@ -47,6 +47,7 @@ multi_substrate_py_library(
         ":positive_semidefinite_kernel",
         ":rational_quadratic",
         ":schur_complement",
+        ":spectral_mixture",
         "//tensorflow_probability/python/internal:all_util",
         "//tensorflow_probability/python/internal:dtype_util",
         "//tensorflow_probability/python/math/psd_kernels/internal",
@@ -301,6 +302,37 @@ multi_substrate_py_test(
     ],
 )
 
+multi_substrate_py_library(
+    name = "spectral_mixture",
+    srcs = ["spectral_mixture.py"],
+    deps = [
+        ":positive_semidefinite_kernel",
+        # numpy dep,
+        # tensorflow dep,
+        "//tensorflow_probability/python/internal:assert_util",
+        "//tensorflow_probability/python/internal:dtype_util",
+        "//tensorflow_probability/python/internal:parameter_properties",
+        "//tensorflow_probability/python/internal:tensor_util",
+        "//tensorflow_probability/python/internal:tensorshape_util",
+        "//tensorflow_probability/python/math:generic",
+        "//tensorflow_probability/python/math/psd_kernels/internal:util",
+    ],
+)
+
+multi_substrate_py_test(
+    name = "spectral_mixture_test",
+    size = "small",
+    srcs = ["spectral_mixture_test.py"],
+    jax_size = "medium",
+    deps = [
+        # absl/testing:parameterized dep,
+        # numpy dep,
+        # tensorflow dep,
+        "//tensorflow_probability",
+        "//tensorflow_probability/python/internal:test_util",
+    ],
+)
+
 multi_substrate_py_library(
     name = "feature_scaled",
     srcs = ["feature_scaled.py"],
diff --git a/tensorflow_probability/python/math/psd_kernels/__init__.py b/tensorflow_probability/python/math/psd_kernels/__init__.py
@@ -35,6 +35,7 @@
 from tensorflow_probability.python.math.psd_kernels.positive_semidefinite_kernel import PositiveSemidefiniteKernel
 from tensorflow_probability.python.math.psd_kernels.rational_quadratic import RationalQuadratic
 from tensorflow_probability.python.math.psd_kernels.schur_complement import SchurComplement
+from tensorflow_probability.python.math.psd_kernels.spectral_mixture import SpectralMixture
 
 _allowed_symbols = [
     'AutoCompositeTensorPsdKernel',
@@ -57,6 +58,7 @@
     'PositiveSemidefiniteKernel',
     'RationalQuadratic',
     'SchurComplement',
+    'SpectralMixture',
 ]
 
 all_util.remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow_probability/python/math/psd_kernels/hypothesis_testlib.py b/tensorflow_probability/python/math/psd_kernels/hypothesis_testlib.py
@@ -38,7 +38,8 @@
     'FeatureScaled',
     'KumaraswamyTransformed',
     'PointwiseExponential',
-    'SchurComplement'
+    'SchurComplement',
+    'SpectralMixture',
 ]
 
 
@@ -751,6 +752,7 @@ def schur_complements(
       'fixed_inputs': fixed_inputs,
       'diag_shift': diag_shift
   }
+
   for param_name in schur_complement_params:
     if enable_vars and draw(hps.booleans()):
       kernel_variable_names.append(param_name)
@@ -768,6 +770,96 @@ def schur_complements(
   return result_kernel, kernel_variable_names
 
 
+@hps.composite
+def spectral_mixtures(
+    draw,
+    batch_shape=None,
+    event_dim=None,
+    feature_dim=None,
+    feature_ndims=None,
+    enable_vars=None,
+    depth=None):
+  """Strategy for drawing `SpectralMixture` kernels.
+
+  The underlying kernel is drawn from the `kernels` strategy.
+
+  Args:
+    draw: Hypothesis strategy sampler supplied by `@hps.composite`.
+    batch_shape: An optional `TensorShape`.  The batch shape of the resulting
+      Kernel.  Hypothesis will pick a batch shape if omitted.
+    event_dim: Optional Python int giving the size of each of the
+      kernel's parameters' event dimensions.  This is shared across all
+      parameters, permitting square event matrices, compatible location and
+      scale Tensors, etc. If omitted, Hypothesis will choose one.
+    feature_dim: Optional Python int giving the size of each feature dimension.
+      If omitted, Hypothesis will choose one.
+    feature_ndims: Optional Python int stating the number of feature dimensions
+      inputs will have. If omitted, Hypothesis will choose one.
+    enable_vars: TODO(bjp): Make this `True` all the time and put variable
+      initialization in slicing_test.  If `False`, the returned parameters are
+      all Tensors, never Variables or DeferredTensor.
+    depth: Python `int` giving maximum nesting depth of compound kernel.
+
+  Returns:
+    kernels: A strategy for drawing `SchurComplement` kernels with the specified
+      `batch_shape` (or an arbitrary one if omitted).
+  """
+  if depth is None:
+    depth = draw(depths())
+  if batch_shape is None:
+    batch_shape = draw(tfp_hps.shapes())
+  if event_dim is None:
+    event_dim = draw(hps.integers(min_value=2, max_value=6))
+  if feature_dim is None:
+    feature_dim = draw(hps.integers(min_value=2, max_value=6))
+  if feature_ndims is None:
+    feature_ndims = draw(hps.integers(min_value=2, max_value=6))
+
+  num_mixtures = draw(hps.integers(min_value=2, max_value=5))
+
+  logits = draw(kernel_input(
+      batch_shape=batch_shape,
+      example_ndims=0,
+      feature_dim=num_mixtures,
+      feature_ndims=1))
+
+  locs = draw(kernel_input(
+      batch_shape=batch_shape,
+      example_ndims=1,
+      example_dim=num_mixtures,
+      feature_dim=feature_dim,
+      feature_ndims=feature_ndims))
+
+  scales = tfp_hps.softplus_plus_eps()(draw(kernel_input(
+      batch_shape=batch_shape,
+      example_ndims=1,
+      example_dim=num_mixtures,
+      feature_dim=feature_dim,
+      feature_ndims=feature_ndims)))
+
+  hp.note(f'Forming SpectralMixture kernel with logits: {logits} '
+          f'locs: {locs} and scales: {scales}')
+
+  spectral_mixture_params = {'locs': locs, 'logits': logits, 'scales': scales}
+
+  kernel_variable_names = []
+  for param_name in spectral_mixture_params:
+    if enable_vars and draw(hps.booleans()):
+      kernel_variable_names.append(param_name)
+      spectral_mixture_params[param_name] = tf.Variable(
+          spectral_mixture_params[param_name], name=param_name)
+      if draw(hps.booleans()):
+        spectral_mixture_params[param_name] = tfp_hps.defer_and_count_usage(
+            spectral_mixture_params[param_name])
+  result_kernel = tfpk.SpectralMixture(
+      logits=spectral_mixture_params['logits'],
+      locs=spectral_mixture_params['locs'],
+      scales=spectral_mixture_params['scales'],
+      feature_ndims=feature_ndims,
+      validate_args=True)
+  return result_kernel, kernel_variable_names
+
+
 @hps.composite
 def base_kernels(
     draw,
@@ -932,6 +1024,14 @@ def kernels(
         feature_ndims=feature_ndims,
         enable_vars=enable_vars,
         depth=depth))
+  elif kernel_name == 'SpectralMixture':
+    return draw(spectral_mixtures(
+        batch_shape=batch_shape,
+        event_dim=event_dim,
+        feature_dim=feature_dim,
+        feature_ndims=feature_ndims,
+        enable_vars=enable_vars,
+        depth=depth))
 
   raise ValueError('Kernel name {} not found.'.format(kernel_name))
 
@@ -952,6 +1052,7 @@ def constrain_to_range(low, high):
     'concentration0': constrain_to_range(1., 2.),
     'concentration1': constrain_to_range(1., 2.),
     'df': constrain_to_range(2., 5.),
+    'scales': constrain_to_range(1., 2.),
     'slope_variance': constrain_to_range(0.1, 0.5),
     'exponent': lambda x: tf.math.floor(constrain_to_range(1, 4.)(x)),
     'length_scale': constrain_to_range(1., 6.),
diff --git a/tensorflow_probability/python/math/psd_kernels/spectral_mixture.py b/tensorflow_probability/python/math/psd_kernels/spectral_mixture.py
@@ -0,0 +1,193 @@
+# Copyright 2021 The TensorFlow Probability Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""The SpectralMixture kernel."""
+
+import numpy as np
+import tensorflow.compat.v2 as tf
+
+from tensorflow_probability.python.internal import assert_util
+from tensorflow_probability.python.internal import dtype_util
+from tensorflow_probability.python.internal import parameter_properties
+from tensorflow_probability.python.internal import prefer_static as ps
+from tensorflow_probability.python.internal import tensor_util
+from tensorflow_probability.python.math import generic as tfp_math
+from tensorflow_probability.python.math.psd_kernels import positive_semidefinite_kernel as psd_kernel
+from tensorflow_probability.python.math.psd_kernels.internal import util
+
+
+__all__ = ['SpectralMixture']
+
+
+class SpectralMixture(psd_kernel.AutoCompositeTensorPsdKernel):
+  """The SpectralMixture kernel.
+
+  This kernel is derived from parameterizing the spectral density of a
+  stationary kernel by a mixture of `m` diagonal multivariate normal
+  distributions [1].
+
+  This in turn parameterizes the following kernel:
+
+    ```none
+    k(x, y) = sum_j w[j] (prod_i
+        exp(-2 * (pi * (x[i] - y[i]) * s[j][i])**2) *
+        cos(2 * pi * (x[i] - y[i]) * m[j][i]))
+    ```
+
+  where:
+    * `j` is the number of mixtures (as mentioned above).
+    * `w[j]` are the mixture weights.
+    * `m[j]` and `s[j]` parameterize a `MultivariateNormalDiag(m[j], s[j])`.
+      In other words, they are the mean and diagonal scale for each mixture
+      component.
+
+  NOTE: This kernel can result in negative off-diagonal entries.
+
+  #### References
+  [1]: A. Wilson, R. P. Adams.
+       Gaussian Process Kernels for Pattern Discovery and Extrapolation.
+       https://arxiv.org/abs/1302.4245
+  """
+
+  def __init__(self,
+               logits,
+               locs,
+               scales,
+               feature_ndims=1,
+               validate_args=False,
+               name='SpectralMixture'):
+    """Construct a SpectralMixture kernel instance.
+
+    Args:
+      logits: Floating-point `Tensor` of shape `[..., M]`, whose softmax
+        represents the mixture weights for the spectral density. Must
+        be broadcastable with `locs` and `scales`.
+      locs: Floating-point `Tensor` of shape `[..., M, F1, F2, ... FN]`, which
+        represents the location parameter of each of the `M` mixture components.
+        `N` is `feature_ndims`. Must be broadcastable with `logits` and
+        `scales`.
+      scales: Positive Floating-point `Tensor` of shape
+        `[..., M, F1, F2, ..., FN]`, which represents the scale parameter of
+        each of the `M` mixture components. `N` is `feature_ndims`. Must be
+        broadcastable with `locs` and `logits`. These parameters act like
+        inverse length scale parameters.
+      feature_ndims: Python `int` number of rightmost dims to include in the
+        squared difference norm in the exponential.
+      validate_args: If `True`, parameters are checked for validity despite
+        possibly degrading runtime performance
+      name: Python `str` name prefixed to Ops created by this class.
+    """
+    parameters = dict(locals())
+    with tf.name_scope(name):
+      dtype = util.maybe_get_common_dtype([logits, locs, scales])
+      self._logits = tensor_util.convert_nonref_to_tensor(
+          logits, name='logits', dtype=dtype)
+      self._locs = tensor_util.convert_nonref_to_tensor(
+          locs, name='locs', dtype=dtype)
+      self._scales = tensor_util.convert_nonref_to_tensor(
+          scales, name='scales', dtype=dtype)
+      super(SpectralMixture, self).__init__(
+          feature_ndims,
+          dtype=dtype,
+          name=name,
+          validate_args=validate_args,
+          parameters=parameters)
+
+  @property
+  def logits(self):
+    """Logits parameter."""
+    return self._logits
+
+  @property
+  def locs(self):
+    """Location parameter."""
+    return self._locs
+
+  @property
+  def scales(self):
+    """Scale parameter."""
+    return self._scales
+
+  @classmethod
+  def _parameter_properties(cls, dtype):
+    from tensorflow_probability.python.bijectors import softplus  # pylint:disable=g-import-not-at-top
+    return dict(
+        logits=parameter_properties.ParameterProperties(event_ndims=1),
+        locs=parameter_properties.ParameterProperties(
+            event_ndims=lambda self: self.feature_ndims + 1),
+        scales=parameter_properties.ParameterProperties(
+            event_ndims=lambda self: self.feature_ndims + 1,
+            default_constraining_bijector_fn=(
+                lambda: softplus.Softplus(low=dtype_util.eps(dtype)))))
+
+  def _apply_with_distance(
+      self, x1, x2, pairwise_square_distance, example_ndims=0):
+    exponent = -2. * pairwise_square_distance
+    locs = util.pad_shape_with_ones(
+        self.locs, ndims=example_ndims, start=-(self.feature_ndims + 1))
+    cos_coeffs = tf.math.cos(2 * np.pi * (x1 - x2) * locs)
+    feature_ndims = ps.cast(self.feature_ndims, ps.rank(cos_coeffs).dtype)
+    reduction_axes = ps.range(
+        ps.rank(cos_coeffs) - feature_ndims, ps.rank(cos_coeffs))
+    coeff_sign = tf.math.reduce_prod(
+        tf.math.sign(cos_coeffs), axis=reduction_axes)
+    log_cos_coeffs = tf.math.reduce_sum(
+        tf.math.log(tf.math.abs(cos_coeffs)), axis=reduction_axes)
+
+    logits = util.pad_shape_with_ones(
+        self.logits, ndims=example_ndims, start=-1)
+
+    log_result, sign = tfp_math.reduce_weighted_logsumexp(
+        exponent + log_cos_coeffs + logits,
+        coeff_sign, return_sign=True, axis=-(example_ndims + 1))
+
+    return sign * tf.math.exp(log_result)
+
+  def _apply(self, x1, x2, example_ndims=0):
+    # Add an extra dimension to x1 and x2 so it broadcasts with scales.
+    # [B1, ...., E1, ...., E2, M, F1, ..., F2]
+    x1 = util.pad_shape_with_ones(
+        x1, ndims=1, start=-(self.feature_ndims + example_ndims + 1))
+    x2 = util.pad_shape_with_ones(
+        x2, ndims=1, start=-(self.feature_ndims + example_ndims + 1))
+    scales = util.pad_shape_with_ones(
+        self.scales, ndims=example_ndims, start=-(self.feature_ndims + 1))
+    pairwise_square_distance = util.sum_rightmost_ndims_preserving_shape(
+        tf.math.square(np.pi * (x1 - x2) * scales), ndims=self.feature_ndims)
+    return self._apply_with_distance(
+        x1, x2, pairwise_square_distance, example_ndims=example_ndims)
+
+  def _matrix(self, x1, x2):
+    # Add an extra dimension to x1 and x2 so it broadcasts with scales.
+    x1 = util.pad_shape_with_ones(x1, ndims=1, start=-(self.feature_ndims + 2))
+    x2 = util.pad_shape_with_ones(x2, ndims=1, start=-(self.feature_ndims + 2))
+    scales = util.pad_shape_with_ones(
+        self.scales, ndims=1, start=-(self.feature_ndims + 1))
+    pairwise_square_distance = util.pairwise_square_distance_matrix(
+        np.pi * x1 * scales, np.pi * x2 * scales, self.feature_ndims)
+    x1 = util.pad_shape_with_ones(x1, ndims=1, start=-(self.feature_ndims + 1))
+    x2 = util.pad_shape_with_ones(x2, ndims=1, start=-(self.feature_ndims + 2))
+    # Expand `x1` and `x2` so that the broadcast against each other.
+    return self._apply_with_distance(
+        x1, x2, pairwise_square_distance, example_ndims=2)
+
+  def _parameter_control_dependencies(self, is_init):
+    if not self.validate_args:
+      return []
+    assertions = []
+    if is_init != tensor_util.is_ref(self._scales):
+      assertions.append(assert_util.assert_positive(
+          self._scales,
+          message='`scales` must be positive.'))
+    return assertions
diff --git a/tensorflow_probability/python/math/psd_kernels/spectral_mixture_test.py b/tensorflow_probability/python/math/psd_kernels/spectral_mixture_test.py