Merge pull request #584 from mrava87/dev

mrava87 · web-flow · commit b19a63c0831d · 2024-06-16T18:36:27.000+03:00
doc: updated bayesian tutorial
diff --git a/tutorials/bayesian.py b/tutorials/bayesian.py
@@ -21,19 +21,26 @@
 
 Based on the above definition, we construct some prior models in the frequency
 domain, convert each of them to the time domain and use such an ensemble
-to estimate the prior mean :math:`\mu_\mathbf{x}` and model
-covariance :math:`\mathbf{C_x}`.
+to estimate the prior mean :math:`\mathbf{x}_0` and model
+covariance :math:`\mathbf{C}_{x_0}`.
 
-We then create our data by sampling the true signal at certain  locations
+We then create our data by sampling the true signal at certain locations
 and solve the resconstruction problem within a Bayesian framework. Since we are
 assuming gaussianity in our priors, the equation to obtain the posterion mean
-can be derived analytically:
+and covariance can be derived analytically:
 
 .. math::
-    \mathbf{x} = \mathbf{x_0} + \mathbf{C}_x \mathbf{R}^T
-    (\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
+    \mathbf{x} = \mathbf{x_0} + \mathbf{C}_{x_0} \mathbf{R}^T
+    (\mathbf{R} \mathbf{C}_{x_0} \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
     \mathbf{R} \mathbf{x_0})
 
+and
+
+.. math::
+    \mathbf{C}_x = \mathbf{C}_{x_0} - \mathbf{C}_{x_0} \mathbf{R}^T
+    (\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1}
+    \mathbf{R}  \mathbf{C}_{x_0}
+
 """
 import matplotlib.pyplot as plt
 
@@ -80,22 +87,26 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
 sigmaa = [0.1, 0.5, 0.6]
 phi0 = [-90.0, 0.0, 0.0]
 sigmaphi = [0.1, 0.2, 0.4]
-sigmad = 1e-2
+sigmad = 1
+scaling = 100  # Scale by a factor to allow noise std=1
 
 # Prior models
 nt = 200
 nfft = 2**11
 dt = 0.004
 t = np.arange(nt) * dt
-xs = np.array(
+xs = scaling * np.array(
     [
         prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft)
         for _ in range(nreals)
     ]
 )
 
 # True model (taken as one possible realization)
-x = prior_realization(f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft)
+x = scaling * prior_realization(
+    f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft
+)
+
 
 ###############################################################################
 # We have now a set of prior models in time domain. We can easily use sample
@@ -110,7 +121,7 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
 N = 30  # lenght of decorrelation
 diags = np.array([Cm[i, i - N : i + N + 1] for i in range(N, nt - N)])
 diag_ave = np.average(diags, axis=0)
-# add a taper at the end to avoid edge effects
+# add a taper at the start and end to avoid edge effects
 diag_ave *= np.hamming(2 * N + 1)
 
 fig, ax = plt.subplots(1, 1, figsize=(12, 4))
@@ -157,65 +168,107 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
 ynmask = Rop.mask(x + n)
 
 ###############################################################################
-# First we apply the Bayesian inversion equation
-xbayes = x0 + Cm_op * Rop.H * (
+# First, since the problem is rather small, we construct the dense version of
+# all our matrices and we compute the analytical posterior mean and covariance
+
+Cm = Cm_op.todense()
+Cd = Cd_op.todense()
+R = Rop.todense()
+
+# Bayesian analytical solution
+xpost_ana = x0 + Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, yn - R @ x0))
+Cmpost_ana = Cm - Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, R @ Cm))
+
+###############################################################################
+# Next we solve the same Bayesian inversion equation iteratively. We will see
+# that provided we use enough iterations we can retrieve the same values of
+# the analytical posterior mean
+xpost_iter = x0 + Cm_op * Rop.H * (
     lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0]
 )
 
-# Visualize
+###############################################################################
+# But what is the problem did not allow creating dense matrices for both the
+# operator and the input covariance matrices. In this case, we can resort to the
+# Randomize-Then-Optimize algorithm of Bardsley et al., 2014, which simply solves
+# the same problem that we solved to find the MAP solution repeatedly by adding
+# random noise to the data. It can be shown that the sample mean and covariance
+# of the solutions of the different perturbed problems provide a good
+# approximation for the true posterior mean and covariance.
+
+# RTO number of solutions
+nreals = 1000
+
+xrto = []
+for ireal in range(nreals):
+    yreal = yn + Rop * np.random.normal(0, sigmad, nt)
+    xrto.append(
+        x0
+        + Cm_op
+        * Rop.H
+        * (lsqr(Rop * Cm_op * Rop.H + Cd_op, yreal - Rop * x0, iter_lim=400))[0]
+    )
+
+xrto = np.array(xrto)
+xpost_rto = np.average(xrto, axis=0)
+Cmpost_rto = ((xrto - xpost_rto).T @ (xrto - xpost_rto)) / nreals
+
+###############################################################################
+# Finally we visualize the different results
+
+# Means
 fig, ax = plt.subplots(1, 1, figsize=(12, 5))
 ax.plot(t, x, "k", lw=6, label="true")
+ax.plot(t, xpost_ana, "r", lw=7, label="bayesian inverse (ana)")
+ax.plot(t, xpost_iter, "g", lw=5, label="bayesian inverse (iter)")
+ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
 ax.plot(t, ymask, ".k", ms=25, label="available samples")
 ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
-ax.plot(t, xbayes, "r", lw=3, label="bayesian inverse")
 ax.legend()
-ax.set_title("Signal")
+ax.set_title("Mean reconstruction")
 ax.set_xlim(0, 0.8)
-plt.tight_layout()
 
-###############################################################################
-# So far we have been able to estimate our posterion mean. What about its
-# uncertainties (i.e., posterion covariance)?
-#
-# In real-life applications it is very difficult (if not impossible)
-# to directly compute the posterior covariance matrix. It is much more
-# useful to create a set of models that sample the posterion probability.
-# We can do that by solving our problem several times using different prior
-# realizations as starting guesses:
-
-xpost = [
-    x0
-    + Cm_op
-    * Rop.H
-    * (lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0])
-    for x0 in xs[:30]
-]
-xpost = np.array(xpost)
-
-x0post = np.average(xpost, axis=0)
-Cm_post = ((xpost - x0post).T @ (xpost - x0post)) / nreals
-
-# Visualize
+# RTO realizations
 fig, ax = plt.subplots(1, 1, figsize=(12, 5))
 ax.plot(t, x, "k", lw=6, label="true")
-ax.plot(t, xpost.T, "--r", lw=1)
-ax.plot(t, x0post, "r", lw=3, label="bayesian inverse")
+ax.plot(t, xrto[::10].T, "--b", lw=0.5)
+ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
 ax.plot(t, ymask, ".k", ms=25, label="available samples")
 ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
 ax.legend()
-ax.set_title("Signal")
+ax.set_title("RTO realizations")
 ax.set_xlim(0, 0.8)
 
-fig, ax = plt.subplots(1, 1, figsize=(5, 4))
-im = ax.imshow(
-    Cm_post, interpolation="nearest", cmap="seismic", extent=(t[0], t[-1], t[-1], t[0])
+# Covariances
+fig, axs = plt.subplots(1, 2, figsize=(12, 4))
+axs[0].imshow(
+    Cmpost_ana,
+    interpolation="nearest",
+    cmap="seismic",
+    vmin=-5e-1,
+    vmax=2,
+    extent=(t[0], t[-1], t[-1], t[0]),
+)
+axs[0].set_title(r"$\mathbf{C}_m^{post,ANA}$")
+axs[0].axis("tight")
+
+axs[1].imshow(
+    Cmpost_rto,
+    interpolation="nearest",
+    cmap="seismic",
+    vmin=-5e-1,
+    vmax=2,
+    extent=(t[0], t[-1], t[-1], t[0]),
 )
-ax.set_title(r"$\mathbf{C}_m^{posterior}$")
-ax.axis("tight")
+axs[1].set_title(r"$\mathbf{C}_m^{post,RTO}$")
+axs[1].axis("tight")
 plt.tight_layout()
 
 ###############################################################################
 # Note that here we have been able to compute a sample posterior covariance
-# from its estimated samples. By displaying it we can see  how both the overall
+# from its estimated samples. By displaying it we can see how both the overall
 # variances and the correlation between different parameters have become
-# narrower compared to their prior counterparts.
+# narrower compared to their prior counterparts. Moreover, whilst the RTO
+# covariance seems to be slightly under-estimated, this represents an appealing
+# alternative to the closed-form solution for large-scale problems under
+# Gaussian assumptions.