Allow partially specifying values for nested joint distributions.

SiegeLordEx · tensorflower-gardener · commit 40d9757ce66d · 2021-06-02T16:17:22.000-07:00
The issue was twofold. First, the various sample_and_value functions were over-eager in bypassing the sample method. This was fixed via a cludge-like change, it kind of feels like the `value` kwarg should be moved up to Distribution so we can just call sample unconditionally.

The second issue was two places where we used map_structure to convert value to a nest of Tensors. This isn't a reasonable thing to do when there are None's present. This was replaced with a recursive call of a utility implemented via _model_flatten/unflatten etc.

PiperOrigin-RevId: 377168479
diff --git a/tensorflow_probability/python/distributions/joint_distribution.py b/tensorflow_probability/python/distributions/joint_distribution.py
@@ -81,27 +81,36 @@ def trace_distributions_and_values(dist, sample_shape, seed, value=None):
   """Draws a sample, and traces both the distribution and sampled value."""
   if value is None:
     value = dist.sample(sample_shape, seed=seed)
+  elif tf.nest.is_nested(dist.dtype) and any(
+      v is None for v in tf.nest.flatten(value)):
+    # TODO(siege): This is making an assumption that nested dtype => partial
+    # value support, which is not necessarily reasonable.
+    value = dist.sample(sample_shape, seed=seed, value=value)
   return ValueWithTrace(value=value, traced=(dist, value))
 
 
 def trace_distributions_only(dist, sample_shape, seed, value=None):
   """Draws a sample, and traces the sampled value."""
-  if value is None:
-    value = dist.sample(sample_shape, seed=seed)
-  return ValueWithTrace(value=value, traced=dist)
+  ret = trace_distributions_and_values(dist, sample_shape, seed, value)
+  return ret._replace(traced=ret.traced[0])
 
 
 def trace_values_only(dist, sample_shape, seed, value=None):
   """Draws a sample, and traces the sampled value."""
-  if value is None:
-    value = dist.sample(sample_shape, seed=seed)
-  return ValueWithTrace(value=value, traced=value)
+  ret = trace_distributions_and_values(dist, sample_shape, seed, value)
+  return ret._replace(traced=ret.traced[1])
 
 
 def trace_values_and_log_probs(dist, sample_shape, seed, value=None):
   """Draws a sample, and traces both the sampled value and its log density."""
   if value is None:
     value, lp = dist.experimental_sample_and_log_prob(sample_shape, seed=seed)
+  elif tf.nest.is_nested(dist.dtype) and any(
+      v is None for v in tf.nest.flatten(value)):
+    # TODO(siege): This is making an assumption that nested dtype => partial
+    # value support, which is not necessarily reasonable.
+    value, lp = dist.experimental_sample_and_log_prob(
+        sample_shape, seed=seed, value=value)
   else:
     lp = dist.log_prob(value)
   return ValueWithTrace(value=value, traced=(value, lp))
@@ -210,7 +219,9 @@ class JointDistribution(distribution_lib.Distribution):
   - `_model_coroutine`: A generator that yields a sequence of
     `tfd.Distribution`-like instances.
 
-  - `_model_flatten`: takes a structured input and returns a sequence.
+  - `_model_flatten`: takes a structured input and returns a sequence. The
+    sequence order must match the order distributions are yielded from
+    `_model_coroutine`.
 
   - `_model_unflatten`: takes a sequence and returns a structure matching the
     semantics of the `JointDistribution` subclass.
@@ -613,33 +624,14 @@ def _map_attr_over_dists(self, attr, dists=None):
              if dists is None else dists)
     return (getattr(d, attr)() for d in dists)
 
-  def _sanitize_value(self, value):
-    """Ensures `value` matches `self.dtype` with `Tensor` or `None` elements."""
-    if value is None:
-      return value
-
-    if len(value) < len(self.dtype):
-      # Fill in missing entries with `None`.
-      if hasattr(self.dtype, 'keys'):
-        value = {k: value.get(k, None) for k in self.dtype.keys()}
-      else:  # dtype is a sequence.
-        value = [value[i] if i < len(value) else None
-                 for i in range(len(self.dtype))]
-
-    value = nest_util.cast_structure(value, self.dtype)
-    return nest.map_structure_up_to(
-        self.dtype,
-        lambda x, d: x if x is None else tf.convert_to_tensor(x, dtype_hint=d),
-        value, self.dtype)
-
   def _resolve_value(self, *args, allow_partially_specified=False, **kwargs):
     """Resolves a `value` structure from user-passed arguments."""
     value = kwargs.pop('value', None)
     if not (args or kwargs):
-       # Fast path when `value` is the only kwarg. The case where `value` is
-       # passed as a positional arg is handled by `_resolve_value_from_args`
-       # below.
-      return self._sanitize_value(value)
+      # Fast path when `value` is the only kwarg. The case where `value` is
+      # passed as a positional arg is handled by `_resolve_value_from_args`
+      # below.
+      return _sanitize_value(self, value)
     elif value is not None:
       raise ValueError('Supplied both `value` and keyword '
                        'arguments to parameterize sampling. Supplied keyword '
@@ -665,7 +657,7 @@ def _resolve_value(self, *args, allow_partially_specified=False, **kwargs):
           'Found unexpected keyword arguments. Distribution names '
           'are\n{}\nbut received\n{}\nThese names were '
           'invalid:\n{}'.format(dist_name_str, kwarg_names, unmatched_str))
-    return self._sanitize_value(value)
+    return _sanitize_value(self, value)
 
   def _call_execute_model(self,
                           sample_shape=(),
@@ -793,17 +785,7 @@ def _execute_model(self,
         value_at_index = None
         if (value is not None and len(value) > index and
             value[index] is not None):
-
-          def convert_tree_to_tensor(x, dtype_hint):
-            return tf.convert_to_tensor(x, dtype_hint=dtype_hint)
-
-          # This signature does not allow kwarg names. Applies
-          # `convert_to_tensor` on the next value.
-          value_at_index = nest.map_structure_up_to(
-              actual_distribution.dtype,  # shallow_tree
-              convert_tree_to_tensor,  # func
-              value[index],  # x
-              actual_distribution.dtype)  # dtype_hint
+          value_at_index = _sanitize_value(actual_distribution, value[index])
         try:
           next_value, traced_values = sample_and_trace_fn(
               actual_distribution,
@@ -1175,6 +1157,46 @@ def _inverse_log_det_jacobian(self, y, event_ndims, **kwargs):
         y, event_ndims, _jd_conditioning=y, **kwargs)
 
 
+def _sanitize_value(distribution, value):
+  """Ensures `value` matches `distribution.dtype`, adding `None`s as needed."""
+  if value is None:
+    return value
+
+  if not tf.nest.is_nested(distribution.dtype):
+    return tf.convert_to_tensor(value, dtype_hint=distribution.dtype)
+
+  if len(value) < len(distribution.dtype):
+    # Fill in missing entries with `None`.
+    if hasattr(distribution.dtype, 'keys'):
+      value = {k: value.get(k, None) for k in distribution.dtype.keys()}
+    else:  # dtype is a sequence.
+      value = [value[i] if i < len(value) else None
+               for i in range(len(distribution.dtype))]
+
+  value = nest_util.cast_structure(value, distribution.dtype)
+  jdlike_attrs = [
+      '_get_single_sample_distributions',
+      '_model_flatten',
+      '_model_unflatten',
+  ]
+  if all(hasattr(distribution, attr) for attr in jdlike_attrs):
+    flat_dists = distribution._get_single_sample_distributions()
+    flat_value = distribution._model_flatten(value)
+    flat_value = map(_sanitize_value, flat_dists, flat_value)
+    return distribution._model_unflatten(flat_value)
+  else:
+    # A joint distribution that isn't tfd.JointDistribution-like; assume it has
+    # some reasonable dtype semantics. We can't use this for
+    # tfd.JointDistribution because we might have a None standing in for a
+    # sub-tree (e.g. consider omitting a nested JD).
+    return nest.map_structure_up_to(
+        distribution.dtype,
+        lambda x, d: x if x is None else tf.convert_to_tensor(x, dtype_hint=d),
+        value,
+        distribution.dtype,
+    )
+
+
 @log_prob_ratio.RegisterLogProbRatio(JointDistribution)
 def _jd_log_prob_ratio(p, x, q, y, name=None):
   """Implements `log_prob_ratio` for tfd.JointDistribution*."""
diff --git a/tensorflow_probability/python/distributions/joint_distribution_coroutine_test.py b/tensorflow_probability/python/distributions/joint_distribution_coroutine_test.py
@@ -980,6 +980,49 @@ def _get_support_bijectors(dists, xs=None, ys=None):
           self.evaluate(bijectors[i].inverse_event_shape_tensor(
               event_shapes[i])))
 
+  @parameterized.named_parameters(
+      ('_sample', lambda d, **kwargs: d.sample(**kwargs)),
+      ('_sample_and_log_prob',
+       lambda d, **kwargs: d.experimental_sample_and_log_prob(**kwargs)[0]),
+  )
+  def test_nested_partial_value(self, sample_fn):
+    @tfd.JointDistributionCoroutine
+    def innermost():
+      a = yield Root(tfd.Exponential(1., name='a'))
+      yield tfd.Sample(tfd.LogNormal(a, a), [5], name='b')
+
+    @tfd.JointDistributionCoroutine
+    def inner():
+      yield Root(tfd.Exponential(1., name='c'))
+      yield Root(innermost.copy(name='d'))
+
+    @tfd.JointDistributionCoroutine
+    def outer():
+      yield Root(tfd.Exponential(1., name='e'))
+      yield Root(inner.copy(name='f'))
+
+    seed = test_util.test_seed(sampler_type='stateless')
+    true_xs = outer.sample(seed=seed)
+
+    # These asserts work because we advance the stateless seed inside the model
+    # whether or not a sample is actually generated.
+    partial_xs = true_xs._replace(f=None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = true_xs._replace(e=None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = true_xs._replace(f=true_xs.f._replace(d=None))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = true_xs._replace(
+        f=true_xs.f._replace(d=true_xs.f.d._replace(a=None)))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
   def test_default_event_space_bijector_nested(self):
     @tfd.JointDistributionCoroutine
     def inner():
diff --git a/tensorflow_probability/python/distributions/joint_distribution_named_test.py b/tensorflow_probability/python/distributions/joint_distribution_named_test.py
@@ -270,6 +270,53 @@ def test_can_call_log_prob_with_kwargs(self):
                                  "can't take positional args"):
       lp_kwargs = d.log_prob(e, a, x)
 
+  @parameterized.named_parameters(
+      ('_sample', lambda d, **kwargs: d.sample(**kwargs)),
+      ('_sample_and_log_prob',
+       lambda d, **kwargs: d.experimental_sample_and_log_prob(**kwargs)[0]),
+  )
+  def test_nested_partial_value(self, sample_fn):
+    innermost = tfd.JointDistributionNamed({
+        'a': tfd.Exponential(1.),
+        'b': lambda a: tfd.Sample(tfd.LogNormal(a, a), [5]),
+    })
+
+    inner = tfd.JointDistributionNamed({
+        'c': tfd.Exponential(1.),
+        'd': innermost,
+    })
+
+    outer = tfd.JointDistributionNamed({
+        'e': tfd.Exponential(1.),
+        'f': inner,
+    })
+
+    seed = test_util.test_seed(sampler_type='stateless')
+    true_xs = outer.sample(seed=seed)
+
+    def _update(dict_, **kwargs):
+      dict_.copy().update(**kwargs)
+      return dict_
+
+    # These asserts work because we advance the stateless seed inside the model
+    # whether or not a sample is actually generated.
+    partial_xs = _update(true_xs, f=None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(true_xs, e=None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(true_xs, f=_update(true_xs['f'], d=None))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(
+        true_xs, f=_update(true_xs['f'], d=_update(true_xs['f']['d'], a=None)))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
   @parameterized.named_parameters(
       ('basic', basic_ordered_model_fn),
       ('nested_lists', nested_lists_model_fn))
diff --git a/tensorflow_probability/python/distributions/joint_distribution_sequential_test.py b/tensorflow_probability/python/distributions/joint_distribution_sequential_test.py
@@ -326,6 +326,54 @@ def test_dist_fn_takes_varargs(self):
     lp = dist.log_prob(dist.sample(5, seed=test_util.test_seed()))
     self.assertAllEqual(lp.shape, [5])
 
+  @parameterized.named_parameters(
+      ('_sample', lambda d, **kwargs: d.sample(**kwargs)),
+      ('_sample_and_log_prob',
+       lambda d, **kwargs: d.experimental_sample_and_log_prob(**kwargs)[0]),
+  )
+  def test_nested_partial_value(self, sample_fn):
+    innermost = tfd.JointDistributionSequential((
+        tfd.Exponential(1.),
+        lambda a: tfd.Sample(tfd.LogNormal(a, a), [5]),
+    ))
+
+    inner = tfd.JointDistributionSequential((
+        tfd.Exponential(1.),
+        innermost,
+    ))
+
+    outer = tfd.JointDistributionSequential((
+        tfd.Exponential(1.),
+        inner,
+    ))
+
+    seed = test_util.test_seed(sampler_type='stateless')
+    true_xs = outer.sample(seed=seed)
+
+    def _update(tuple_, index, value):
+      res = list(tuple_)
+      res[index] = value
+      return tuple(res)
+
+    # These asserts work because we advance the stateless seed inside the model
+    # whether or not a sample is actually generated.
+    partial_xs = _update(true_xs, 1, None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(true_xs, 0, None)
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(true_xs, 1, _update(true_xs[1], 1, None))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
+    partial_xs = _update(
+        true_xs, 1, _update(true_xs[1], 1, _update(true_xs[1][1], 0, None)))
+    xs = sample_fn(outer, value=partial_xs, seed=seed)
+    self.assertAllCloseNested(true_xs, xs)
+
   @parameterized.named_parameters(
       ('basic', basic_model_fn),
       ('nested_lists', nested_lists_model_fn))