BREAKING CHANGE: Make _TensorCoercible meta-distribution-like, such that instances no longer share the __class__ of their instantiating distribution, in preparation to convert most TFP distributions to CompositeTensor.

emilyfertig · tensorflower-gardener · commit 0fc11b30b30b · 2021-06-08T13:04:32.000-07:00
This means that distributions output from `tfp.layers` are instances of `_TensorCoercible` and not instances of the TFP library distribution with which they were constructed. For example:

```
d = 4
p = tfpl.MultivariateNormalTriL.params_size(d)
layer = tfpl.MultivariateNormalTriL(d, tfd.Distribution.mean)
t = tfd.Normal(0, 1).sample([2, 3, p], seed=42)
x = layer(t)

# Newly fails; x is a `_TensorCoercible` instance.
assert isinstance(x, tfd.MultivariateNormalTriL)

# Still works: attributes of the inner `MultivariateNormalTriL` are accessible.
x.loc
```

PiperOrigin-RevId: 378224934
diff --git a/tensorflow_probability/python/layers/distribution_layer.py b/tensorflow_probability/python/layers/distribution_layer.py
@@ -193,10 +193,10 @@ def _fn(*fargs, **fkwargs):
         value.shape = value[-1].shape
         value.get_shape = value[-1].get_shape
         value.dtype = value[-1].dtype
-        distribution.shape = value[-1].shape
+        distribution._shape = value[-1].shape  # pylint: disable=protected-access
         distribution.get_shape = value[-1].get_shape
       else:
-        distribution.shape = value.shape
+        distribution._shape = value.shape  # pylint: disable=protected-access
         distribution.get_shape = value.get_shape
       return distribution, value
 
diff --git a/tensorflow_probability/python/layers/distribution_layer_test.py b/tensorflow_probability/python/layers/distribution_layer_test.py
@@ -50,6 +50,13 @@ def _vec_pad(x, value=0):
   return tf.pad(x, paddings=paddings, constant_values=value)
 
 
+def _unwrap_tensor_coercible(dist):
+  inner_dist = getattr(dist, 'tensor_distribution', dist)
+  if inner_dist is dist:
+    return inner_dist
+  return _unwrap_tensor_coercible(inner_dist)
+
+
 # TODO(b/143642032): Figure out how to solve issues with save/load, so that we
 # can decorate all of these tests with @test_util.test_all_tf_execution_regimes
 @test_util.test_graph_and_eager_modes
@@ -142,8 +149,8 @@ def accuracy(x, rv_x):
                   validation_data=(self.x_test, self.x_test),
                   shuffle=True)
     yhat = vae_model(tf.convert_to_tensor(self.x_test))
-    self.assertIsInstance(yhat, tfd.Independent)
-    self.assertIsInstance(yhat.distribution, tfd.Bernoulli)
+    self.assertIsInstance(yhat.tensor_distribution, tfd.Independent)
+    self.assertIsInstance(yhat.tensor_distribution.distribution, tfd.Bernoulli)
 
   def test_keras_functional_api(self):
     """Test `DistributionLambda`s are composable via Keras functional API."""
@@ -193,8 +200,8 @@ def test_keras_functional_api(self):
                   validation_data=(self.x_test, self.x_test),
                   shuffle=True)
     yhat = vae_model(tf.convert_to_tensor(self.x_test))
-    self.assertIsInstance(yhat, tfd.Independent)
-    self.assertIsInstance(yhat.distribution, tfd.Bernoulli)
+    self.assertIsInstance(yhat.tensor_distribution, tfd.Independent)
+    self.assertIsInstance(yhat.tensor_distribution.distribution, tfd.Bernoulli)
 
   def test_keras_model_api(self):
     """Test `DistributionLambda`s are composable via Keras `Model` API."""
@@ -249,8 +256,8 @@ def call(self, inputs):
                   epochs=1,
                   validation_data=(self.x_test, self.x_test))
     yhat = vae_model(tf.convert_to_tensor(self.x_test))
-    self.assertIsInstance(yhat, tfd.Independent)
-    self.assertIsInstance(yhat.distribution, tfd.Bernoulli)
+    self.assertIsInstance(yhat.tensor_distribution, tfd.Independent)
+    self.assertIsInstance(yhat.tensor_distribution.distribution, tfd.Bernoulli)
 
   def test_keras_sequential_api_multiple_draws(self):
     num_draws = 2
@@ -293,8 +300,8 @@ def test_keras_sequential_api_multiple_draws(self):
                   steps_per_epoch=1,  # Usually `n // batch_size`.
                   validation_data=(self.x_test, self.x_test))
     yhat = vae_model(tf.convert_to_tensor(self.x_test))
-    self.assertIsInstance(yhat, tfd.Independent)
-    self.assertIsInstance(yhat.distribution, tfd.Bernoulli)
+    self.assertIsInstance(yhat.tensor_distribution, tfd.Independent)
+    self.assertIsInstance(yhat.tensor_distribution.distribution, tfd.Bernoulli)
 
   def test_side_variable_is_auto_tracked(self):
     # `s` is the "side variable".
@@ -587,7 +594,7 @@ def test_layer(self):
     layer = tfpl.MultivariateNormalTriL(d, tfd.Distribution.mean)
     t = tfd.Normal(0, 1).sample([2, 3, p], seed=42)
     x = layer(t)
-    self._check_distribution(t, x)
+    self._check_distribution(t, x.tensor_distribution)
 
   def test_doc_string(self):
     # Load data.
@@ -654,7 +661,7 @@ def test_layer(self):
     layer = tfpl.OneHotCategorical(d, validate_args=True)
     t = tfd.Normal(0, 1).sample([2, 3, p], seed=42)
     x = layer(t)
-    self._check_distribution(t, x)
+    self._check_distribution(t, x.tensor_distribution)
 
   def test_doc_string(self):
     # Load data.
@@ -692,9 +699,11 @@ def test_doc_string(self):
 class CategoricalMixtureOfOneHotCategoricalTest(test_util.TestCase):
 
   def _check_distribution(self, t, x):
-    self.assertIsInstance(x, tfd.MixtureSameFamily)
-    self.assertIsInstance(x.mixture_distribution, tfd.Categorical)
-    self.assertIsInstance(x.components_distribution, tfd.OneHotCategorical)
+    self.assertIsInstance(_unwrap_tensor_coercible(x), tfd.MixtureSameFamily)
+    self.assertIsInstance(_unwrap_tensor_coercible(x.mixture_distribution),
+                          tfd.Categorical)
+    self.assertIsInstance(_unwrap_tensor_coercible(x.components_distribution),
+                          tfd.OneHotCategorical)
     t_back = tf.concat([
         x.mixture_distribution.logits,
         tf.reshape(x.components_distribution.logits, shape=[2, 3, -1]),
@@ -768,9 +777,12 @@ def test_doc_string(self):
               shuffle=True)
 
     yhat = model(x)
-    self.assertIsInstance(yhat, tfd.MixtureSameFamily)
-    self.assertIsInstance(yhat.mixture_distribution, tfd.Categorical)
-    self.assertIsInstance(yhat.components_distribution, tfd.OneHotCategorical)
+    self.assertIsInstance(_unwrap_tensor_coercible(yhat), tfd.MixtureSameFamily)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(yhat.mixture_distribution), tfd.Categorical)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(yhat.components_distribution),
+        tfd.OneHotCategorical)
     # TODO(b/120221303): For now we just check that the code executes and we get
     # back a distribution instance. Better would be to change the data
     # generation so the model becomes well-specified (and we can check correctly
@@ -834,7 +846,7 @@ def test_layer(self):
 
     layer = self.layer_class(validate_args=True, dtype=self.dtype)
     x = layer(t)
-    self._check_distribution(t, x, batch_shape)
+    self._check_distribution(t, x.tensor_distribution, batch_shape)
 
   def test_serialization(self):
     event_shape = []
@@ -1163,11 +1175,14 @@ def _build_tensor(self, ndarray, dtype=None):
         ndarray, shape=ndarray.shape if self.use_static_shape else None)
 
   def _check_distribution(self, t, x, batch_shape):
-    self.assertIsInstance(x, tfd.MixtureSameFamily)
-    self.assertIsInstance(x.mixture_distribution, tfd.Categorical)
-    self.assertIsInstance(x.components_distribution, tfd.Independent)
-    self.assertIsInstance(x.components_distribution.distribution,
-                          self.dist_class)
+    self.assertIsInstance(_unwrap_tensor_coercible(x), tfd.MixtureSameFamily)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(x.mixture_distribution), tfd.Categorical)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(x.components_distribution), tfd.Independent)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(x.components_distribution.distribution),
+        self.dist_class)
     self.assertEqual(self.dtype, x.dtype)
 
     t_back = self._distribution_to_params(x, batch_shape)
@@ -1413,9 +1428,12 @@ def _build_tensor(self, ndarray, dtype=None):
         ndarray, shape=ndarray.shape if self.use_static_shape else None)
 
   def _check_distribution(self, t, x, batch_shape):
-    self.assertIsInstance(x, tfd.MixtureSameFamily)
-    self.assertIsInstance(x.mixture_distribution, tfd.Categorical)
-    self.assertIsInstance(x.components_distribution, tfd.MultivariateNormalTriL)
+    self.assertIsInstance(_unwrap_tensor_coercible(x), tfd.MixtureSameFamily)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(x.mixture_distribution), tfd.Categorical)
+    self.assertIsInstance(
+        _unwrap_tensor_coercible(x.components_distribution),
+        tfd.MultivariateNormalTriL)
 
     shape = tf.concat([batch_shape, [-1]], axis=0)
     batch_and_n_shape = tf.concat(
diff --git a/tensorflow_probability/python/layers/internal/BUILD b/tensorflow_probability/python/layers/internal/BUILD
@@ -40,6 +40,7 @@ py_library(
     deps = [
         # tensorflow dep,
         "//tensorflow_probability/python/distributions:distribution",
+        "//tensorflow_probability/python/distributions:kullback_leibler",
         "//tensorflow_probability/python/internal:nest_util",
         "//tensorflow_probability/python/internal:parameter_properties",
         "//tensorflow_probability/python/util",
diff --git a/tensorflow_probability/python/layers/internal/distribution_tensor_coercible.py b/tensorflow_probability/python/layers/internal/distribution_tensor_coercible.py
@@ -18,20 +18,23 @@
 from __future__ import division
 from __future__ import print_function
 
-import copy
 import six
 
 import tensorflow.compat.v2 as tf
 
 from tensorflow_probability.python.distributions import distribution as tfd
+from tensorflow_probability.python.distributions import kullback_leibler
 from tensorflow_probability.python.internal import nest_util
 from tensorflow_probability.python.internal import parameter_properties
 from tensorflow_probability.python.util.deferred_tensor import TensorMetaClass
 from tensorflow.python.framework import composite_tensor  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.training.tracking import data_structures  # pylint: disable=g-direct-tensorflow-import
 
 
 __all__ = []  # We intend nothing public.
 
+_NOT_FOUND = object()
+
 
 # Define mixin type because Distribution already has its own metaclass.
 class _DistributionAndTensorCoercibleMeta(type(tfd.Distribution),
@@ -43,43 +46,123 @@ class _DistributionAndTensorCoercibleMeta(type(tfd.Distribution),
 class _TensorCoercible(tfd.Distribution):
   """Docstring."""
 
-  registered_class_list = {}
-
-  def __new__(cls, distribution, convert_to_tensor_fn=tfd.Distribution.sample):
-    if isinstance(distribution, cls):
-      return distribution
-    if not isinstance(distribution, tfd.Distribution):
-      raise TypeError('`distribution` argument must be a '
-                      '`tfd.Distribution` instance; '
-                      'saw "{}" of type "{}".'.format(
-                          distribution, type(distribution)))
-    self = copy.copy(distribution)
-    distcls = distribution.__class__
-    self_class = _TensorCoercible.registered_class_list.get(distcls)
-    if not self_class:
-      self_class = type(distcls.__name__, (cls, distcls), {})
-      _TensorCoercible.registered_class_list[distcls] = self_class
-    self.__class__ = self_class
-    return self
-
   def __init__(self,
                distribution,
                convert_to_tensor_fn=tfd.Distribution.sample):
     self._concrete_value = None  # pylint: disable=protected-access
     self._convert_to_tensor_fn = convert_to_tensor_fn  # pylint: disable=protected-access
+    self.tensor_distribution = distribution
+    super(_TensorCoercible, self).__init__(
+        dtype=distribution.dtype,
+        reparameterization_type=distribution.reparameterization_type,
+        validate_args=distribution.validate_args,
+        allow_nan_stats=distribution.allow_nan_stats,
+        parameters=distribution.parameters)
+
+  def __setattr__(self, name, value):
+    """Support self.foo = trackable syntax.
+
+    Redefined from `tensorflow/python/training/tracking/tracking.py` to avoid
+    calling `getattr`, which causes an infinite loop.
+
+    Args:
+      name: str, name of the attribute to be set.
+      value: value to be set.
+    """
+    if vars(self).get(name, _NOT_FOUND) is value:
+      return
+
+    if vars(self).get('_self_setattr_tracking', True):
+      value = data_structures.sticky_attribute_assignment(
+          trackable=self, value=value, name=name)
+    object.__setattr__(self, name, value)
+
+  def __getattr__(self, name):
+    # If the attribute is set in the _TensorCoercible object, return it. This
+    # ensures that direct calls to `getattr` behave as expected.
+    if name in vars(self):
+      return vars(self)[name]
+    # Look for the attribute in `tensor_distribution`, unless it's a `_tracking`
+    # attribute accessed directly by `getattr` in the `Trackable` base class, in
+    # which case the default passed to `getattr` should be returned.
+    if 'tensor_distribution' in vars(self) and '_tracking' not in name:
+      return getattr(vars(self)['tensor_distribution'], name)
+    # Otherwise invoke `__getattribute__`, which will return the default passed
+    # to `getattr` if the attribute was not found.
+    return self.__getattribute__(name)
 
   @classmethod
   def _parameter_properties(cls, dtype, num_classes=None):
     return dict(distribution=parameter_properties.BatchedComponentProperties())
 
+  # pylint: disable=protected-access
   def _batch_shape_tensor(self, **parameter_kwargs):
-    # Any parameter kwargs are for the inner distribution, so pass them
-    # to its `_batch_shape_tensor` method instead of handling them directly.
-    return self.parameters['distribution']._batch_shape_tensor(  # pylint: disable=protected-access
-        **parameter_kwargs)
+    return self.tensor_distribution._batch_shape_tensor(**parameter_kwargs)
+
+  def _batch_shape(self):
+    return self.tensor_distribution._batch_shape()
+
+  def _event_shape_tensor(self):
+    return self.tensor_distribution._event_shape_tensor()
+
+  def _event_shape(self):
+    return self.tensor_distribution._event_shape()
+
+  def sample(self, sample_shape=(), seed=None, name='sample', **kwargs):
+    return self.tensor_distribution.sample(
+        sample_shape=sample_shape, seed=seed, name=name, **kwargs)
+
+  def _log_prob(self, value, **kwargs):
+    return self.tensor_distribution._log_prob(value, **kwargs)
+
+  def _prob(self, value, **kwargs):
+    return self.tensor_distribution._prob(value, **kwargs)
+
+  def _log_cdf(self, value, **kwargs):
+    return self.tensor_distribution._log_cdf(value, **kwargs)
+
+  def _cdf(self, value, **kwargs):
+    return self.tensor_distribution._cdf(value, **kwargs)
+
+  def _log_survival_function(self, value, **kwargs):
+    return self.tensor_distribution._log_survival_function(value, **kwargs)
+
+  def _survival_function(self, value, **kwargs):
+    return self.tensor_distribution._survival_function(value, **kwargs)
+
+  def _entropy(self, **kwargs):
+    return self.tensor_distribution._entropy(**kwargs)
+
+  def _mean(self, **kwargs):
+    return self.tensor_distribution._mean(**kwargs)
+
+  def _quantile(self, value, **kwargs):
+    return self.tensor_distribution._quantile(value, **kwargs)
+
+  def _variance(self, **kwargs):
+    return self.tensor_distribution._variance(**kwargs)
+
+  def _stddev(self, **kwargs):
+    return self.tensor_distribution._stddev(**kwargs)
+
+  def _covariance(self, **kwargs):
+    return self.tensor_distribution._covariance(**kwargs)
+
+  def _mode(self, **kwargs):
+    return self.tensor_distribution._mode(**kwargs)
+
+  def _default_event_space_bijector(self, *args, **kwargs):
+    return self.tensor_distribution._default_event_space_bijector(
+        *args, **kwargs)
+
+  def _parameter_control_dependencies(self, is_init):
+    return self.tensor_distribution._parameter_control_dependencies(is_init)
 
   @property
   def shape(self):
+    return self._shape
+
+  def _shape(self):
     return (tf.TensorShape(None) if self._concrete_value is None
             else self._concrete_value.shape)
 
@@ -130,15 +213,26 @@ def _value(self, dtype=None, name=None, as_ref=False):
             ' results in `tf.convert_to_tensor(x)` being identical to '
             '`x.mean()`.'.format(type(self), self))
       with self._name_and_control_scope('value'):
-        self._concrete_value = (self._convert_to_tensor_fn(self)
-                                if callable(self._convert_to_tensor_fn)
-                                else self._convert_to_tensor_fn)
+        self._concrete_value = (
+            self._convert_to_tensor_fn(self.tensor_distribution)
+            if callable(self._convert_to_tensor_fn)
+            else self._convert_to_tensor_fn)
         if (not tf.is_tensor(self._concrete_value) and
             not isinstance(self._concrete_value,
                            composite_tensor.CompositeTensor)):
           self._concrete_value = nest_util.convert_to_nested_tensor(  # pylint: disable=protected-access
               self._concrete_value,
               name=name or 'concrete_value',
               dtype=dtype,
-              dtype_hint=self.dtype)
+              dtype_hint=self.tensor_distribution.dtype)
     return self._concrete_value
+
+
+@kullback_leibler.RegisterKL(_TensorCoercible, tfd.Distribution)
+def _kl_tensor_coercible_distribution(a, b, name=None):
+  return kullback_leibler.kl_divergence(a.tensor_distribution, b, name=name)
+
+
+@kullback_leibler.RegisterKL(tfd.Distribution, _TensorCoercible)
+def _kl_distribution_tensor_coercible(a, b, name=None):
+  return kullback_leibler.kl_divergence(a, b.tensor_distribution, name=name)
diff --git a/tensorflow_probability/python/layers/internal/distribution_tensor_coercible_test.py b/tensorflow_probability/python/layers/internal/distribution_tensor_coercible_test.py
diff --git a/tensorflow_probability/python/layers/variable_input_test.py b/tensorflow_probability/python/layers/variable_input_test.py