When inflating a continuous distribution, change the log_prob

Googler · tensorflower-gardener · commit 50c549ab1c5d · 2022-09-09T12:01:06.000-07:00
implementation to one with better maximum likelihood estimation
properties.

PiperOrigin-RevId: 473308814
diff --git a/tensorflow_probability/python/distributions/inflated.py b/tensorflow_probability/python/distributions/inflated.py
@@ -102,7 +102,7 @@ def __init__(self,
             lambda logit: tf.stack([logit, -logit], axis=-1),
             dtype=self._inflated_loc_logits.dtype,
             shape=self._inflated_loc_logits.shape + (2,))
-        categorical_dist = categorical.Categorical(
+        self._categorical_dist = categorical.Categorical(
             logits=cat_logits,
             validate_args=validate_args,
             allow_nan_stats=allow_nan_stats)
@@ -116,14 +116,14 @@ def __init__(self,
             dtype=self._inflated_loc_probs.dtype,
             shape=self._inflated_loc_probs.shape + (2,)
         )
-        categorical_dist = categorical.Categorical(
+        self._categorical_dist = categorical.Categorical(
             probs=cat_probs,
             validate_args=validate_args,
             allow_nan_stats=allow_nan_stats)
         probs_or_logits = self._inflated_loc_probs
 
       super(_Inflated, self).__init__(
-          cat=categorical_dist,
+          cat=self._categorical_dist,
           components=[
               deterministic.Deterministic(
                   DeferredTensor(
@@ -151,6 +151,24 @@ def _parameter_properties(cls, dtype, num_classes=None):
         ),
         inflated_loc=parameter_properties.ParameterProperties())
 
+  def _log_prob(self, x):
+    # We override the log_prob implementation from Mixture in the case
+    # where we are inflating a continuous distribution, because we have
+    # found that this "censored" version gives a good maximum likelihood
+    # estimate of the continuous distribution's parameters but the
+    # default implementation doesn't.  This follows the proposal in
+    # https://arxiv.org/pdf/2010.09647.pdf for summing distributions of
+    # different Hausdorff dimension.
+    if isinstance(self._distribution,
+                  distribution_lib.DiscreteDistributionMixin):
+      return super(_Inflated, self)._log_prob(x)
+    else:
+      return tf.where(
+          tf.equal(x, self._inflated_loc),
+          self._categorical_dist.log_prob(0),
+          self._categorical_dist.log_prob(1) +
+          self._distribution.log_prob(x))
+
   @property
   def distribution(self):
     """The distribution used for the non-inflated part."""
diff --git a/tensorflow_probability/python/distributions/inflated_test.py b/tensorflow_probability/python/distributions/inflated_test.py
@@ -59,6 +59,18 @@ def test_inflated_batched(self):
     samples = zinb.sample(seed=test_util.test_seed())
     self.assertEqual((5,), samples.shape)
 
+  def test_inflated_continuous_log_prob(self):
+    spike_and_slab = inflated.Inflated(
+        normal.Normal(loc=1.0, scale=2.0), inflated_loc_probs=0.1)
+    self.assertEqual(self.evaluate(tf.math.log(0.1)),
+                     self.evaluate(spike_and_slab.log_prob(0.0)))
+    self.assertNear(
+        self.evaluate(tf.math.log(0.9) + normal.Normal(
+            loc=1.0, scale=2.0).log_prob(2.0)),
+        self.evaluate(spike_and_slab.log_prob(2.0)),
+        1e-6
+    )
+
   def test_inflated_factory(self):
     spike_and_slab_class = inflated.inflated_factory('SpikeAndSlab',
                                                      normal.Normal, 0.0)