Add a perturbed_observations option to ensemble_kalman_filter_log_marginal_likelihood.

langmore · tensorflower-gardener · commit bc6c411b0fbd · 2022-05-29T16:10:15.000-07:00
If False, the observation covariance is computed in a less-stochastic manner that guarantees an SPD result, even with small ensemble sizes.

The name "perturbed observations" is chosen because this corresponds to the (well-known) "perturbed observation" *update* step. There is no well-known name for this technique as applied to marginal likelihood (as I've done here), but borrowing the same name seems appropriate.

PiperOrigin-RevId: 451771148
diff --git a/tensorflow_probability/python/experimental/sequential/ensemble_kalman_filter.py b/tensorflow_probability/python/experimental/sequential/ensemble_kalman_filter.py
@@ -29,6 +29,10 @@
 ]
 
 
+class InsufficientEnsembleSizeError(Exception):
+  """Raise when the ensemble size is insufficient for a function."""
+
+
 # Sample covariance. Handles differing shapes.
 def _covariance(x, y=None):
   """Sample covariance, assuming samples are the leftmost axis."""
@@ -304,6 +308,7 @@ def ensemble_kalman_filter_log_marginal_likelihood(
     state,
     observation,
     observation_fn,
+    perturbed_observations=True,
     seed=None,
     name=None):
   """Ensemble Kalman filter log marginal likelihood.
@@ -332,6 +337,11 @@ def ensemble_kalman_filter_log_marginal_likelihood(
     observation_fn: callable returning an instance of
       `tfd.MultivariateNormalLinearOperator` along with an extra information
       to be returned in the `EnsembleKalmanFilterState`.
+    perturbed_observations: Whether the marginal distribution `p(Y[t] | ...)`
+      is estimated using samples from the `observation_fn`'s distribution. If
+      `False`, the distribution's covariance matrix is used directly. This
+      latter choice is less common in the literature, but works even if the
+      ensemble size is smaller than the number of observations.
     seed: PRNG seed; see `tfp.random.sanitize_seed` for details.
     name: Python `str` name for ops created by this method.
       Default value: `None`
@@ -340,6 +350,10 @@ def ensemble_kalman_filter_log_marginal_likelihood(
   Returns:
     log_marginal_likelihood: `Tensor` with same dtype as `state`.
 
+  Raises:
+    InsufficientEnsembleSizeError: If `perturbed_observations=True` and the
+      ensemble size is not at least one greater than the number of observations.
+
   #### References
 
   [1] Geir Evensen. Sequential data assimilation with a nonlinear
@@ -360,16 +374,37 @@ def ensemble_kalman_filter_log_marginal_likelihood(
 
     observation = tf.convert_to_tensor(observation, dtype=common_dtype)
 
-    if not isinstance(observation_particles_dist,
-                      distributions.MultivariateNormalLinearOperator):
-      raise ValueError('Expected `observation_fn` to return an instance of '
-                       '`MultivariateNormalLinearOperator`')
-
-    observation_particles = observation_particles_dist.sample(seed=seed)
-    observation_dist = distributions.MultivariateNormalTriL(
-        loc=tf.reduce_mean(observation_particles, axis=0),
-        scale_tril=tf.linalg.cholesky(_covariance(observation_particles)))
-
+    if perturbed_observations:
+      # With G the observation operator and B the batch shape,
+      # observation_particles = G(X) + η, where η ~ Normal(0, Γ).
+      # Both are shape [n_ensemble] + B + [n_observations]
+      observation_particles = observation_particles_dist.sample(seed=seed)
+      n_observations = observation_particles_dist.event_shape[0]
+      n_ensemble = observation_particles_dist.batch_shape[0]
+      if (n_ensemble is not None and n_observations is not None and
+          n_ensemble < n_observations + 1):
+        raise InsufficientEnsembleSizeError(
+            f'When `perturbed_observations=True`, ensemble size ({n_ensemble}) '
+            'must be at least one greater than the number of observations '
+            f'({n_observations}), but it was not.')
+      observation_dist = distributions.MultivariateNormalTriL(
+          loc=tf.reduce_mean(observation_particles, axis=0),
+          # Cholesky(Cov(G(X) + η)), where Cov(..) is the ensemble covariance.
+          scale_tril=tf.linalg.cholesky(_covariance(observation_particles)))
+    else:
+      # predicted_observation = G(X),
+      # and is shape [n_ensemble] + B.
+      predicted_observation = observation_particles_dist.mean()
+      observation_dist = distributions.MultivariateNormalTriL(
+          loc=tf.reduce_mean(predicted_observation, axis=0),  # ensemble mean
+          # Cholesky(Cov(G(X)) + Γ), where Cov(..) is the ensemble covariance.
+          scale_tril=tf.linalg.cholesky(
+              _covariance(predicted_observation) +
+              _linop_covariance(observation_particles_dist).to_dense()))
+
+    # Above we computed observation_dist, the distribution of observations given
+    # the predictive distribution of states (e.g. states from previous time).
+    # Here we evaluate the log_prob on the actual observations.
     return observation_dist.log_prob(observation)
 
 
diff --git a/tensorflow_probability/python/experimental/sequential/ensemble_kalman_filter_test.py b/tensorflow_probability/python/experimental/sequential/ensemble_kalman_filter_test.py
@@ -269,6 +269,7 @@ def observation_fn(_, particles, extra):
       self.assertAllEqual(particles_shape[1:-1], log_ml.shape)
       self.assertIn('observation_count', state.extra)
       self.assertEqual(3 * i + 1, state.extra['observation_count'])
+      self.assertFalse(np.any(np.isnan(self.evaluate(log_ml))))
 
       log_ml_krazy_obs = tfs.ensemble_kalman_filter_log_marginal_likelihood(
           state,
@@ -293,6 +294,38 @@ def observation_fn(_, particles, extra):
         self.evaluate(tf.reduce_mean(state.particles['x'], axis=0)),
         rtol=0.05)
 
+  def test_log_marginal_likelihood_with_small_ensemble_no_perturb_obs(self):
+    # With perturbed_observations=False, we should be able to handle the small
+    # ensemble without NaN.
+
+    # Initialize an ensemble with that is smaller than the event size.
+    seed_stream = test_util.test_seed_stream()
+    n_ensemble = 3
+    event_size = 5
+    self.assertLess(n_ensemble, event_size)
+    particles_shape = (n_ensemble, event_size)
+
+    particles = {
+        'x':
+            self.evaluate(
+                tf.random.normal(shape=particles_shape, seed=seed_stream())),
+    }
+
+    def observation_fn(_, particles, extra):
+      return tfd.MultivariateNormalDiag(
+          loc=particles['x'], scale_diag=[1e-2] * event_size), extra
+
+    # Marginal likelihood.
+    log_ml = tfs.ensemble_kalman_filter_log_marginal_likelihood(
+        state=tfs.EnsembleKalmanFilterState(
+            step=0, particles=particles, extra={}),
+        observation=tf.random.normal(shape=(event_size,), seed=seed_stream()),
+        observation_fn=observation_fn,
+        perturbed_observations=False,
+        seed=test_util.test_seed())
+    self.assertAllEqual(particles_shape[1:-1], log_ml.shape)
+    self.assertFalse(np.any(np.isnan(self.evaluate(log_ml))))
+
 
 # Parameters defining a linear/Gaussian state space model.
 LinearModelParams = collections.namedtuple('LinearModelParams', [
@@ -484,8 +517,15 @@ def _enkf_solve(self, observation, enkf_params, predict_kwargs, update_kwargs,
           noise_level=[0.001, 0.1, 1.0],
           n_states=[2, 5],
           n_observations=[2, 5],
+          perturbed_observations=[False, True],
       ))
-  def test_same_solution(self, noise_level, n_states, n_observations):
+  def test_same_solution(
+      self,
+      noise_level,
+      n_states,
+      n_observations,
+      perturbed_observations,
+  ):
     """Check that the KF and EnKF solutions are the same."""
     # Tests pass with n_ensemble = 1e7. The KF vs. EnKF tolerance is
     # proportional to 1 / sqrt(n_ensemble), so this shows good agreement.
@@ -496,7 +536,9 @@ def test_same_solution(self, noise_level, n_states, n_observations):
     dtype = tf.float64
     predict_kwargs = {}
     update_kwargs = {}
-    log_marginal_likelihood_kwargs = {}
+    log_marginal_likelihood_kwargs = {
+        'perturbed_observations': perturbed_observations
+    }
 
     linear_model_params = self._get_linear_model_params(
         noise_level=noise_level,