BREAKING CHANGE: Stop unpacking seeds when splitting in JAX

sharadmv · tensorflower-gardener · commit 2c56628708c4 · 2021-07-23T16:59:10.000-07:00
Before this change `tfp.random.split_seed` would return a list of seeds instead.
of an array of seeds, causing a gather for each seed. With large number of seeds,
this can cause a slowdown in both trace and compile time. This change returns
an array of seeds. If your code relies on using a list of seeds instead of an
array, you can wrap the call to `tfp.random.split_seed` in a `list` or
`jnp.unstack`.

PiperOrigin-RevId: 386558360
diff --git a/tensorflow_probability/python/internal/samplers.py b/tensorflow_probability/python/internal/samplers.py
@@ -160,12 +160,13 @@ def split_seed(seed, n=2, salt=None, name=None):
 
   See https://github.com/tensorflow/probability/blob/main/PRNGS.md
   for details.
-
   Args:
     seed: The seed to split; may be an `int`, an `(int, int) tuple`, or a
       `Tensor`. `int` seeds are converted to `Tensor` seeds using
       `tf.random.uniform` stateful sampling. Tuples are converted to `Tensor`.
-    n: The number of splits to return.
+    n: The number of splits to return. In TensorFlow, if `n` is an integer, this
+      function returns a list of seeds and otherwise returns a `Tensor` of
+      seeds.  In JAX, this function always returns an array of seeds.
     salt: Optional `str` salt to mix with the seed.
     name: Optional name to scope related ops.
 
@@ -184,7 +185,7 @@ def split_seed(seed, n=2, salt=None, name=None):
     seed = sanitize_seed(seed, salt=salt)
     if JAX_MODE:
       from jax import random as jaxrand  # pylint: disable=g-import-not-at-top
-      return list(jaxrand.split(seed, n))
+      return jaxrand.split(seed, n)
     seeds = tf.random.stateless_uniform(
         [n, 2], seed=seed, minval=None, maxval=None, dtype=SEED_DTYPE)
     if isinstance(n, six.integer_types):
diff --git a/tensorflow_probability/python/mcmc/hmc.py b/tensorflow_probability/python/mcmc/hmc.py
@@ -720,7 +720,7 @@ def one_step(self, current_state, previous_kernel_results, seed=None):
           state_gradients_are_stopped=self.state_gradients_are_stopped)
 
       seed = samplers.sanitize_seed(seed)  # Retain for diagnostics.
-      seeds = samplers.split_seed(seed, n=len(current_state_parts))
+      seeds = list(samplers.split_seed(seed, n=len(current_state_parts)))
       seeds = distribute_lib.fold_in_axis_index(
           seeds, self.experimental_shard_axis_names)
 
diff --git a/tensorflow_probability/python/mcmc/langevin.py b/tensorflow_probability/python/mcmc/langevin.py
@@ -455,8 +455,8 @@ def one_step(self, current_state, previous_kernel_results, seed=None):
             self.parallel_iterations)
 
         seed = samplers.sanitize_seed(seed)  # Retain for diagnostics.
-        seeds = samplers.split_seed(
-            seed, n=len(current_state_parts), salt='langevin.one_step')
+        seeds = list(samplers.split_seed(
+            seed, n=len(current_state_parts), salt='langevin.one_step'))
         seeds = distribute_lib.fold_in_axis_index(
             seeds, self.experimental_shard_axis_names)
 
diff --git a/tensorflow_probability/python/mcmc/nuts.py b/tensorflow_probability/python/mcmc/nuts.py
@@ -519,7 +519,7 @@ def experimental_with_shard_axes(self, shard_axis_names):
   def _start_trajectory_batched(self, state, target_log_prob, seed):
     """Computations needed to start a trajectory."""
     with tf.name_scope('start_trajectory_batched'):
-      seeds = samplers.split_seed(seed, n=len(state) + 1)
+      seeds = list(samplers.split_seed(seed, n=len(state) + 1))
       momentum_seeds = distribute_lib.fold_in_axis_index(
           seeds[:-1], self.experimental_shard_axis_names)
       momentum = [
diff --git a/tensorflow_probability/python/mcmc/random_walk_metropolis.py b/tensorflow_probability/python/mcmc/random_walk_metropolis.py
@@ -104,7 +104,7 @@ def _fn(state_parts, seed, experimental_shard_axis_names=None):
       if len(state_parts) != len(scales):
         raise ValueError('`scale` must broadcast with `state_parts`.')
 
-      part_seeds = samplers.split_seed(seed, n=len(state_parts))
+      part_seeds = list(samplers.split_seed(seed, n=len(state_parts)))
       part_seeds = distribute_lib.fold_in_axis_index(
           part_seeds, experimental_shard_axis_names)
 
diff --git a/tensorflow_probability/python/mcmc/slice_sampler_kernel.py b/tensorflow_probability/python/mcmc/slice_sampler_kernel.py
@@ -351,7 +351,7 @@ def experimental_with_shard_axes(self, shard_axis_names):
 def _choose_random_direction(current_state_parts, batch_rank, seed=None,
                              experimental_shard_axis_names=None):
   """Chooses a random direction in the event space."""
-  seeds = samplers.split_seed(seed, n=len(current_state_parts))
+  seeds = list(samplers.split_seed(seed, n=len(current_state_parts)))
   seeds = distribute_lib.fold_in_axis_index(
       seeds, experimental_shard_axis_names)
   # Sample random directions across each of the input components.