Implement sample_and_log_prob for BatchBroadcast.

davmre · tensorflower-gardener · commit 193fc506dab1 · 2021-05-25T12:36:24.000-07:00
PiperOrigin-RevId: 375770302
diff --git a/tensorflow_probability/python/distributions/batch_broadcast.py b/tensorflow_probability/python/distributions/batch_broadcast.py
@@ -224,47 +224,90 @@ def _event_shape(self):
   def _event_shape_tensor(self):
     return self.distribution.event_shape_tensor()
 
-  def _sample_n(self, n, seed=None):
+  def _augment_sample_shape(self, sample_shape):
+    # Suppose we have:
+    #   - sample shape of `[n]`,
+    #   - underlying distribution batch shape of `[2, 1]`,
+    #   - final broadcast batch shape of `[4, 2, 3]`.
+    # Then we must draw `sample_shape + [12]` samples, where
+    # `12 == n_batch // underlying_n_batch`.
     batch_shape = self.batch_shape_tensor()
-    batch_rank = ps.rank_from_shape(batch_shape)
     n_batch = ps.reduce_prod(batch_shape)
+    underlying_batch_shape = self.distribution.batch_shape_tensor()
+    underlying_n_batch = ps.reduce_prod(underlying_batch_shape)
+    return ps.concat(
+        [sample_shape,
+         [ps.maximum(0, n_batch // underlying_n_batch)]],
+        axis=0)
+
+  def _transpose_and_reshape_result(self, x, sample_shape, event_shape=None):
+    if event_shape is None:
+      event_shape = self.event_shape_tensor()
+
+    batch_shape = self.batch_shape_tensor()
+    batch_rank = ps.rank_from_shape(batch_shape)
 
     underlying_batch_shape = self.distribution.batch_shape_tensor()
     underlying_batch_rank = ps.rank_from_shape(underlying_batch_shape)
-    underlying_n_batch = ps.reduce_prod(underlying_batch_shape)
 
-    # Left pad underlying shape with any necessary ones.
+    # Continuing the example from `_augment_sample_shape`, suppose we have:
+    #   - sample shape of `[n]`,
+    #   - underlying distribution batch shape of `[2, 1]`,
+    #   - final broadcast batch shape of `[4, 2, 3]`.
+    # and have drawn an `x` of shape `[n, 12, 2, 1] + event_shape`, which we
+    # ultimately want to have shape `[n, 4, 2, 3] + event_shape`.
+
+    # First, we reshape to expand out the batch elements:
+    # `shape_with_doubled_batch == [n] + [4, 1, 3] + [1, 2, 1] + event_shape`,
+    # where `[1, 2, 1]` is the fully-expanded underlying batch shape, and
+    # `[4, 1, 3]` is the shape of the elements being added by broadcasting.
     underlying_bcast_shp = ps.concat(
         [ps.ones([ps.maximum(batch_rank - underlying_batch_rank, 0)],
                  dtype=underlying_batch_shape.dtype),
          underlying_batch_shape],
         axis=0)
-
-    # Determine how many underlying samples to produce.
-    n_bcast_samples = ps.maximum(0, n_batch // underlying_n_batch)
-    samps = self.distribution.sample([n, n_bcast_samples], seed=seed)
-
     is_dim_bcast = ps.not_equal(batch_shape, underlying_bcast_shp)
+    x_with_doubled_batch = tf.reshape(
+        x,
+        ps.concat([sample_shape,
+                   ps.where(is_dim_bcast, batch_shape, 1),
+                   underlying_bcast_shp,
+                   event_shape], axis=0))
+
+    # Next, construct the permutation that interleaves the batch dimensions,
+    # resulting in samples with shape
+    # `[n] + [4, 1] + [1, 2] + [3, 1] + event_shape`.
+    # Note that each interleaved pair of batch dimensions contains exactly one
+    # dim of size `1` and one of size `>= 1`.
+    sample_ndims = ps.rank_from_shape(sample_shape)
+    x_with_interleaved_batch = tf.transpose(
+        x_with_doubled_batch,
+        perm=ps.concat([
+            ps.range(sample_ndims),
+            sample_ndims + ps.reshape(
+                ps.stack([ps.range(batch_rank),
+                          ps.range(batch_rank) + batch_rank], axis=-1),
+                [-1]),
+            sample_ndims + 2 * batch_rank + ps.range(
+                ps.rank_from_shape(event_shape))], axis=0))
+
+    # Final reshape to remove the spurious `1` dimensions.
+    return tf.reshape(
+        x_with_interleaved_batch,
+        ps.concat([sample_shape, batch_shape, event_shape], axis=0))
 
-    event_shape = self.event_shape_tensor()
-    event_rank = ps.rank_from_shape(event_shape)
-    shp = ps.concat([[n], ps.where(is_dim_bcast, batch_shape, 1),
-                     underlying_bcast_shp,
-                     event_shape], axis=0)
-    # Reshape to expand n_bcast_samples and ones-padded underlying_bcast_shp.
-    samps = tf.reshape(samps, shp)
-    # Interleave broadcast and underlying axis indices for transpose.
-    interleaved_batch_axes = ps.reshape(
-        ps.stack([ps.range(batch_rank),
-                  ps.range(batch_rank) + batch_rank],
-                 axis=-1),
-        [-1]) + 1
-
-    event_axes = ps.range(event_rank) + (1 + 2 * batch_rank)
-    perm = ps.concat([[0], interleaved_batch_axes, event_axes], axis=0)
-    samps = tf.transpose(samps, perm=perm)
-    # Finally, reshape to the fully-broadcast batch shape.
-    return tf.reshape(samps, ps.concat([[n], batch_shape, event_shape], axis=0))
+  def _sample_n(self, n, seed=None):
+    sample_shape = ps.reshape(n, [1])
+    x = self.distribution.sample(
+        self._augment_sample_shape(sample_shape), seed=seed)
+    return self._transpose_and_reshape_result(x, sample_shape=sample_shape)
+
+  def _sample_and_log_prob(self, sample_shape, seed):
+    x, lp = self.distribution.experimental_sample_and_log_prob(
+        self._augment_sample_shape(sample_shape), seed=seed)
+    return (self._transpose_and_reshape_result(x, sample_shape),
+            self._transpose_and_reshape_result(lp, sample_shape,
+                                               event_shape=()))
 
   _log_prob = _make_bcast_fn('log_prob', n_event_shapes=0)
   _prob = _make_bcast_fn('prob', n_event_shapes=0)
diff --git a/tensorflow_probability/python/distributions/batch_broadcast_test.py b/tensorflow_probability/python/distributions/batch_broadcast_test.py
@@ -76,7 +76,8 @@ def test_sample(self, data):
                                     batch_shape,
                                     dist.event_shape_tensor()],
                                    axis=0)
-    sample = dist.sample(sample_shape, seed=test_util.test_seed())
+    sample = dist.sample(sample_shape,
+                         seed=test_util.test_seed(sampler_type='stateless'))
     if self.is_static_shape:
       self.assertEqual(tf.TensorShape(self.evaluate(sample_batch_event)),
                        sample.shape)
@@ -89,6 +90,16 @@ def test_sample(self, data):
                         sample,
                         atol=.1)
 
+    # Check that `sample_and_log_prob` also gives a correctly-shaped sample
+    # with correct log-prob.
+    sample2, lp = dist.experimental_sample_and_log_prob(
+        sample_shape, seed=test_util.test_seed(sampler_type='stateless'))
+    if self.is_static_shape:
+      self.assertEqual(tf.TensorShape(self.evaluate(sample_batch_event)),
+                       sample2.shape)
+    self.assertAllEqual(sample_batch_event, tf.shape(sample2))
+    self.assertAllClose(lp, dist.log_prob(sample2))
+
   @hp.given(hps.data())
   @tfp_hps.tfp_hp_settings(default_max_examples=5)
   def test_log_prob(self, data):