Add an experimental_sample_and_log_prob method to TFP Distributions.

davmre · tensorflower-gardener · commit fd8be3ca1243 · 2021-05-20T18:26:13.000-07:00
This is an alternative to bijector caching, intended to be useful (at least) in cases where the cache would otherwise miss. For example, autobatched JDs break caching (unavoidably?), because the values that the bijector sees inside the vmap are from a different graph than those returned to the user. But `sample_and_log_prob` still works in this case for efficient VI.

Specialized implementations for specific distributions will be added in a future change.

PiperOrigin-RevId: 374990765
diff --git a/tensorflow_probability/python/distributions/distribution.py b/tensorflow_probability/python/distributions/distribution.py
@@ -449,6 +449,7 @@ def _log_prob(self, value):
   - `_default_event_space_bijector`.
   - `_parameter_properties` (to support automatic batch shape derivation,
     batch slicing and other features).
+  - `_sample_and_log_prob`.
 
   Note that subclasses of existing Distributions that redefine `__init__` do
   *not* automatically inherit
@@ -1166,22 +1167,21 @@ def _sample_n(self, n, seed=None, **kwargs):
     raise NotImplementedError('sample_n is not implemented: {}'.format(
         type(self).__name__))
 
-  def _call_sample_n(self, sample_shape, seed, name, **kwargs):
+  def _call_sample_n(self, sample_shape, seed, **kwargs):
     """Wrapper around _sample_n."""
-    with self._name_and_control_scope(name):
-      if JAX_MODE and seed is None:
-        raise ValueError('Must provide JAX PRNGKey as `dist.sample(seed=.)`')
-      sample_shape = ps.convert_to_shape_tensor(
-          ps.cast(sample_shape, tf.int32), name='sample_shape')
-      sample_shape, n = self._expand_sample_shape_to_vector(
-          sample_shape, 'sample_shape')
-      samples = self._sample_n(
-          n, seed=seed() if callable(seed) else seed, **kwargs)
-      batch_event_shape = ps.shape(samples)[1:]
-      final_shape = ps.concat([sample_shape, batch_event_shape], 0)
-      samples = tf.reshape(samples, final_shape)
-      samples = self._set_sample_static_shape(samples, sample_shape)
-      return samples
+    if JAX_MODE and seed is None:
+      raise ValueError('Must provide JAX PRNGKey as `dist.sample(seed=.)`')
+    sample_shape = ps.convert_to_shape_tensor(
+        ps.cast(sample_shape, tf.int32), name='sample_shape')
+    sample_shape, n = self._expand_sample_shape_to_vector(
+        sample_shape, 'sample_shape')
+    samples = self._sample_n(
+        n, seed=seed() if callable(seed) else seed, **kwargs)
+    batch_event_shape = ps.shape(samples)[1:]
+    final_shape = ps.concat([sample_shape, batch_event_shape], 0)
+    samples = tf.reshape(samples, final_shape)
+    samples = self._set_sample_static_shape(samples, sample_shape)
+    return samples
 
   def sample(self, sample_shape=(), seed=None, name='sample', **kwargs):
     """Generate samples of the specified shape.
@@ -1198,7 +1198,62 @@ def sample(self, sample_shape=(), seed=None, name='sample', **kwargs):
     Returns:
       samples: a `Tensor` with prepended dimensions `sample_shape`.
     """
-    return self._call_sample_n(sample_shape, seed, name, **kwargs)
+    with self._name_and_control_scope(name):
+      return self._call_sample_n(sample_shape, seed, **kwargs)
+
+  def _call_sample_and_log_prob(self, sample_shape, seed, **kwargs):
+    """Wrapper around `_sample_and_log_prob`."""
+    if hasattr(self, '_sample_and_log_prob'):
+      sample_shape = ps.convert_to_shape_tensor(
+          ps.cast(sample_shape, tf.int32), name='sample_shape')
+      return self._sample_and_log_prob(
+          distribution_util.expand_to_vector(
+              sample_shape, tensor_name='sample_shape'),
+          seed=seed, **kwargs)
+
+    # Naive default implementation. This calls private, rather than public,
+    # methods, to avoid duplicating the name_and_control_scope.
+    value = self._call_sample_n(sample_shape, seed=seed, **kwargs)
+    if hasattr(self, '_log_prob'):
+      log_prob = self._log_prob(value, **kwargs)
+    elif hasattr(self, '_prob'):
+      log_prob = tf.math.log(self._prob(value, **kwargs))
+    else:
+      raise NotImplementedError('log_prob is not implemented: {}'.format(
+          type(self).__name__))
+    return value, log_prob
+
+  def experimental_sample_and_log_prob(self, sample_shape=(), seed=None,
+                                       name='sample_and_log_prob', **kwargs):
+    """Samples from this distribution and returns the log density of the sample.
+
+    The default implementation simply calls `sample` and `log_prob`:
+
+    ```
+    def _sample_and_log_prob(self, sample_shape, seed, **kwargs):
+      x = self.sample(sample_shape=sample_shape, seed=seed, **kwargs)
+      return x, self.log_prob(x, **kwargs)
+    ```
+
+    However, some subclasses may provide more efficient and/or numerically
+    stable implementations.
+
+    Args:
+      sample_shape: integer `Tensor` desired shape of samples to draw.
+        Default value: `()`.
+      seed: Python integer or `tfp.util.SeedStream` instance, for seeding PRNG.
+        Default value: `None`.
+      name: name to give to the op.
+        Default value: `'sample_and_log_prob'`.
+      **kwargs: Named arguments forwarded to subclass implementation.
+    Returns:
+      samples: a `Tensor`, or structure of `Tensor`s, with prepended dimensions
+        `sample_shape`.
+      log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
+        values of type `self.dtype`.
+    """
+    with self._name_and_control_scope(name):
+      return self._call_sample_and_log_prob(sample_shape, seed=seed, **kwargs)
 
   def _call_log_prob(self, value, name, **kwargs):
     """Wrapper around _log_prob."""
diff --git a/tensorflow_probability/python/distributions/distribution_properties_test.py b/tensorflow_probability/python/distributions/distribution_properties_test.py
@@ -288,6 +288,32 @@ def testDistribution(self, dist_name, data):
     self.assertAllEqual(s1, s2)
 
 
+@test_util.test_all_tf_execution_regimes
+class SampleAndLogProbTest(test_util.TestCase):
+
+  @parameterized.named_parameters(
+      {'testcase_name': dname, 'dist_name': dname}
+      for dname in sorted(list(dhps.INSTANTIABLE_BASE_DISTS.keys()) +
+                          list(dhps.INSTANTIABLE_META_DISTS)))
+  @hp.given(hps.data())
+  @tfp_hps.tfp_hp_settings()
+  def testDistribution(self, dist_name, data):
+    dist = data.draw(dhps.distributions(dist_name=dist_name, enable_vars=False,
+                                        validate_args=False))
+    seed = test_util.test_seed(sampler_type='stateless')
+    sample_shape = [2, 1]
+    with tfp_hps.no_tf_rank_errors(), kernel_hps.no_pd_errors():
+      s1, lp1 = dist.experimental_sample_and_log_prob(sample_shape, seed=seed)
+      s2 = dist.sample(sample_shape, seed=seed)
+      self.assertAllClose(s1, s2, atol=1e-4)
+
+      # Sanity-check the log prob. The actual values may differ arbitrarily (if
+      # the `sample_and_log_prob` implementation is more stable) or be NaN, but
+      # they should at least have the same shape.
+      lp2 = dist.log_prob(s1)
+      self.assertAllEqual(lp1.shape, lp2.shape)
+
+
 @test_util.test_all_tf_execution_regimes
 class NoNansTest(test_util.TestCase, dhps.TestCase):
 
diff --git a/tensorflow_probability/python/distributions/joint_distribution.py b/tensorflow_probability/python/distributions/joint_distribution.py
@@ -727,14 +727,13 @@ def _flat_resolve_names(self, dummy_name='var'):
   # tactically implement the `_call_sample_n` redirector.  We don't want to
   # override the public level because then tfp.layers can't take generic
   # `Distribution.sample` as argument for the `convert_to_tensor_fn` parameter.
-  def _call_sample_n(self, sample_shape, seed, name, value=None, **kwargs):
-    with self._name_and_control_scope(name):
-      return self._sample_n(
-          sample_shape,
-          seed=seed() if callable(seed) else seed,
-          value=self._resolve_value(value=value,
-                                    allow_partially_specified=True,
-                                    **kwargs))
+  def _call_sample_n(self, sample_shape, seed, value=None, **kwargs):
+    return self._sample_n(
+        sample_shape,
+        seed=seed() if callable(seed) else seed,
+        value=self._resolve_value(value=value,
+                                  allow_partially_specified=True,
+                                  **kwargs))
 
   def _execute_model(self,
                      sample_shape=(),
diff --git a/tensorflow_probability/python/distributions/transformed_distribution.py b/tensorflow_probability/python/distributions/transformed_distribution.py
@@ -318,21 +318,41 @@ def _batch_shape(self):
           tf.broadcast_static_shape, tf.nest.flatten(batch_shape))
     return batch_shape
 
-  def _call_sample_n(self, sample_shape, seed, name, **kwargs):
+  def _call_sample_n(self, sample_shape, seed, **kwargs):
     # We override `_call_sample_n` rather than `_sample_n` so we can ensure that
     # the result of `self.bijector.forward` is not modified (and thus caching
     # works).
-    with self._name_and_control_scope(name):
-      distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)
-
-      # First, generate samples from the base distribution.
-      x = self.distribution.sample(sample_shape=sample_shape,
-                                   seed=seed,
-                                   **distribution_kwargs)
-      # Apply the bijector's forward transformation. For caching to
-      # work, it is imperative that this is the last modification to the
-      # returned result.
-      return self.bijector.forward(x, **bijector_kwargs)
+    distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)
+
+    # First, generate samples from the base distribution.
+    x = self.distribution.sample(sample_shape=sample_shape,
+                                 seed=seed,
+                                 **distribution_kwargs)
+    # Apply the bijector's forward transformation. For caching to
+    # work, it is imperative that this is the last modification to the
+    # returned result.
+    return self.bijector.forward(x, **bijector_kwargs)
+
+  def _sample_and_log_prob(self, sample_shape, seed, **kwargs):
+    if not self.bijector._is_injective:  # pylint: disable=protected-access
+      # Computing log_prob with a non-injective bijector requires an explicit
+      # inverse to get all points in the inverse image, so we can't get by
+      # with just doing the forward pass.
+      return super()._sample_and_log_prob(sample_shape, seed=seed, **kwargs)
+
+    distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)
+    x, base_distribution_log_prob = (
+        self.distribution.experimental_sample_and_log_prob(
+            sample_shape, seed, **distribution_kwargs))
+    y = self.bijector.forward(x, **bijector_kwargs)
+    fldj = self.bijector.forward_log_det_jacobian(
+        x,
+        event_ndims=tf.nest.map_structure(
+            ps.rank_from_shape,
+            self.distribution.event_shape_tensor()),
+        **bijector_kwargs)
+    return y, (base_distribution_log_prob -
+               tf.cast(fldj, base_distribution_log_prob.dtype))
 
   def _log_prob(self, y, **kwargs):
     distribution_kwargs, bijector_kwargs = self._kwargs_split_fn(kwargs)
diff --git a/tensorflow_probability/python/distributions/transformed_distribution_test.py b/tensorflow_probability/python/distributions/transformed_distribution_test.py
@@ -259,6 +259,41 @@ def _forward_log_det_jacobian(self, x):
     identity_log_normal.log_prob(
         identity_log_normal.sample([2, 3], seed=test_util.test_seed()))
 
+  def testSampleAndLogprob(self):
+    class ExpForwardOnly(tfb.Bijector):
+
+      def __init__(self):
+        super(ExpForwardOnly, self).__init__(forward_min_event_ndims=0)
+
+      def _forward(self, x):
+        return tf.exp(x)
+
+      def _forward_log_det_jacobian(self, x):
+        return tf.convert_to_tensor(value=x)
+
+    exp_forward_only = ExpForwardOnly()
+
+    mu = 3.0
+    sigma = 0.02
+    log_normal = tfd.TransformedDistribution(
+        distribution=tfd.Normal(loc=mu, scale=sigma),
+        bijector=exp_forward_only)
+
+    sample, log_pdf = self.evaluate(log_normal.experimental_sample_and_log_prob(
+        [2, 3], seed=test_util.test_seed()))
+    expected_log_pdf = stats.lognorm.logpdf(
+        sample, s=sigma, scale=np.exp(mu))
+    self.assertAllClose(expected_log_pdf, log_pdf, rtol=1e-4, atol=0.)
+
+    sample, log_pdf = self.evaluate(
+        log_normal.experimental_sample_and_log_prob(seed=test_util.test_seed()))
+    expected_log_pdf = stats.lognorm.logpdf(
+        sample, s=sigma, scale=np.exp(mu))
+    self.assertAllClose(expected_log_pdf, log_pdf, rtol=1e-4, atol=0.)
+
+    sample2 = self.evaluate(log_normal.sample(seed=test_util.test_seed()))
+    self.assertAllClose(sample, sample2, rtol=1e-4)
+
   def testCachedSamplesInvert(self):
     class ExpInverseOnly(tfb.Bijector):