Skip to content

Commit b19a63c

Browse files
authored
Merge pull request #584 from mrava87/dev
doc: updated bayesian tutorial
2 parents d1a341a + 3f271e2 commit b19a63c

File tree

1 file changed

+102
-49
lines changed

1 file changed

+102
-49
lines changed

tutorials/bayesian.py

Lines changed: 102 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,26 @@
2121
2222
Based on the above definition, we construct some prior models in the frequency
2323
domain, convert each of them to the time domain and use such an ensemble
24-
to estimate the prior mean :math:`\mu_\mathbf{x}` and model
25-
covariance :math:`\mathbf{C_x}`.
24+
to estimate the prior mean :math:`\mathbf{x}_0` and model
25+
covariance :math:`\mathbf{C}_{x_0}`.
2626
27-
We then create our data by sampling the true signal at certain locations
27+
We then create our data by sampling the true signal at certain locations
2828
and solve the resconstruction problem within a Bayesian framework. Since we are
2929
assuming gaussianity in our priors, the equation to obtain the posterion mean
30-
can be derived analytically:
30+
and covariance can be derived analytically:
3131
3232
.. math::
33-
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_x \mathbf{R}^T
34-
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
33+
\mathbf{x} = \mathbf{x_0} + \mathbf{C}_{x_0} \mathbf{R}^T
34+
(\mathbf{R} \mathbf{C}_{x_0} \mathbf{R}^T + \mathbf{C}_y)^{-1} (\mathbf{y} -
3535
\mathbf{R} \mathbf{x_0})
3636
37+
and
38+
39+
.. math::
40+
\mathbf{C}_x = \mathbf{C}_{x_0} - \mathbf{C}_{x_0} \mathbf{R}^T
41+
(\mathbf{R} \mathbf{C}_x \mathbf{R}^T + \mathbf{C}_y)^{-1}
42+
\mathbf{R} \mathbf{C}_{x_0}
43+
3744
"""
3845
import matplotlib.pyplot as plt
3946

@@ -80,22 +87,26 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
8087
sigmaa = [0.1, 0.5, 0.6]
8188
phi0 = [-90.0, 0.0, 0.0]
8289
sigmaphi = [0.1, 0.2, 0.4]
83-
sigmad = 1e-2
90+
sigmad = 1
91+
scaling = 100 # Scale by a factor to allow noise std=1
8492

8593
# Prior models
8694
nt = 200
8795
nfft = 2**11
8896
dt = 0.004
8997
t = np.arange(nt) * dt
90-
xs = np.array(
98+
xs = scaling * np.array(
9199
[
92100
prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft)
93101
for _ in range(nreals)
94102
]
95103
)
96104

97105
# True model (taken as one possible realization)
98-
x = prior_realization(f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft)
106+
x = scaling * prior_realization(
107+
f0, a0, phi0, [0, 0, 0], [0, 0, 0], [0, 0, 0], dt, nt, nfft
108+
)
109+
99110

100111
###############################################################################
101112
# We have now a set of prior models in time domain. We can easily use sample
@@ -110,7 +121,7 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
110121
N = 30 # lenght of decorrelation
111122
diags = np.array([Cm[i, i - N : i + N + 1] for i in range(N, nt - N)])
112123
diag_ave = np.average(diags, axis=0)
113-
# add a taper at the end to avoid edge effects
124+
# add a taper at the start and end to avoid edge effects
114125
diag_ave *= np.hamming(2 * N + 1)
115126

116127
fig, ax = plt.subplots(1, 1, figsize=(12, 4))
@@ -157,65 +168,107 @@ def prior_realization(f0, a0, phi0, sigmaf, sigmaa, sigmaphi, dt, nt, nfft):
157168
ynmask = Rop.mask(x + n)
158169

159170
###############################################################################
160-
# First we apply the Bayesian inversion equation
161-
xbayes = x0 + Cm_op * Rop.H * (
171+
# First, since the problem is rather small, we construct the dense version of
172+
# all our matrices and we compute the analytical posterior mean and covariance
173+
174+
Cm = Cm_op.todense()
175+
Cd = Cd_op.todense()
176+
R = Rop.todense()
177+
178+
# Bayesian analytical solution
179+
xpost_ana = x0 + Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, yn - R @ x0))
180+
Cmpost_ana = Cm - Cm @ R.T @ (np.linalg.solve(R @ Cm @ R.T + Cd, R @ Cm))
181+
182+
###############################################################################
183+
# Next we solve the same Bayesian inversion equation iteratively. We will see
184+
# that provided we use enough iterations we can retrieve the same values of
185+
# the analytical posterior mean
186+
xpost_iter = x0 + Cm_op * Rop.H * (
162187
lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0]
163188
)
164189

165-
# Visualize
190+
###############################################################################
191+
# But what is the problem did not allow creating dense matrices for both the
192+
# operator and the input covariance matrices. In this case, we can resort to the
193+
# Randomize-Then-Optimize algorithm of Bardsley et al., 2014, which simply solves
194+
# the same problem that we solved to find the MAP solution repeatedly by adding
195+
# random noise to the data. It can be shown that the sample mean and covariance
196+
# of the solutions of the different perturbed problems provide a good
197+
# approximation for the true posterior mean and covariance.
198+
199+
# RTO number of solutions
200+
nreals = 1000
201+
202+
xrto = []
203+
for ireal in range(nreals):
204+
yreal = yn + Rop * np.random.normal(0, sigmad, nt)
205+
xrto.append(
206+
x0
207+
+ Cm_op
208+
* Rop.H
209+
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yreal - Rop * x0, iter_lim=400))[0]
210+
)
211+
212+
xrto = np.array(xrto)
213+
xpost_rto = np.average(xrto, axis=0)
214+
Cmpost_rto = ((xrto - xpost_rto).T @ (xrto - xpost_rto)) / nreals
215+
216+
###############################################################################
217+
# Finally we visualize the different results
218+
219+
# Means
166220
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
167221
ax.plot(t, x, "k", lw=6, label="true")
222+
ax.plot(t, xpost_ana, "r", lw=7, label="bayesian inverse (ana)")
223+
ax.plot(t, xpost_iter, "g", lw=5, label="bayesian inverse (iter)")
224+
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
168225
ax.plot(t, ymask, ".k", ms=25, label="available samples")
169226
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
170-
ax.plot(t, xbayes, "r", lw=3, label="bayesian inverse")
171227
ax.legend()
172-
ax.set_title("Signal")
228+
ax.set_title("Mean reconstruction")
173229
ax.set_xlim(0, 0.8)
174-
plt.tight_layout()
175230

176-
###############################################################################
177-
# So far we have been able to estimate our posterion mean. What about its
178-
# uncertainties (i.e., posterion covariance)?
179-
#
180-
# In real-life applications it is very difficult (if not impossible)
181-
# to directly compute the posterior covariance matrix. It is much more
182-
# useful to create a set of models that sample the posterion probability.
183-
# We can do that by solving our problem several times using different prior
184-
# realizations as starting guesses:
185-
186-
xpost = [
187-
x0
188-
+ Cm_op
189-
* Rop.H
190-
* (lsqr(Rop * Cm_op * Rop.H + Cd_op, yn - Rop * x0, iter_lim=400)[0])
191-
for x0 in xs[:30]
192-
]
193-
xpost = np.array(xpost)
194-
195-
x0post = np.average(xpost, axis=0)
196-
Cm_post = ((xpost - x0post).T @ (xpost - x0post)) / nreals
197-
198-
# Visualize
231+
# RTO realizations
199232
fig, ax = plt.subplots(1, 1, figsize=(12, 5))
200233
ax.plot(t, x, "k", lw=6, label="true")
201-
ax.plot(t, xpost.T, "--r", lw=1)
202-
ax.plot(t, x0post, "r", lw=3, label="bayesian inverse")
234+
ax.plot(t, xrto[::10].T, "--b", lw=0.5)
235+
ax.plot(t, xpost_rto, "b", lw=3, label="bayesian inverse (rto)")
203236
ax.plot(t, ymask, ".k", ms=25, label="available samples")
204237
ax.plot(t, ynmask, ".r", ms=25, label="available noisy samples")
205238
ax.legend()
206-
ax.set_title("Signal")
239+
ax.set_title("RTO realizations")
207240
ax.set_xlim(0, 0.8)
208241

209-
fig, ax = plt.subplots(1, 1, figsize=(5, 4))
210-
im = ax.imshow(
211-
Cm_post, interpolation="nearest", cmap="seismic", extent=(t[0], t[-1], t[-1], t[0])
242+
# Covariances
243+
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
244+
axs[0].imshow(
245+
Cmpost_ana,
246+
interpolation="nearest",
247+
cmap="seismic",
248+
vmin=-5e-1,
249+
vmax=2,
250+
extent=(t[0], t[-1], t[-1], t[0]),
251+
)
252+
axs[0].set_title(r"$\mathbf{C}_m^{post,ANA}$")
253+
axs[0].axis("tight")
254+
255+
axs[1].imshow(
256+
Cmpost_rto,
257+
interpolation="nearest",
258+
cmap="seismic",
259+
vmin=-5e-1,
260+
vmax=2,
261+
extent=(t[0], t[-1], t[-1], t[0]),
212262
)
213-
ax.set_title(r"$\mathbf{C}_m^{posterior}$")
214-
ax.axis("tight")
263+
axs[1].set_title(r"$\mathbf{C}_m^{post,RTO}$")
264+
axs[1].axis("tight")
215265
plt.tight_layout()
216266

217267
###############################################################################
218268
# Note that here we have been able to compute a sample posterior covariance
219-
# from its estimated samples. By displaying it we can see how both the overall
269+
# from its estimated samples. By displaying it we can see how both the overall
220270
# variances and the correlation between different parameters have become
221-
# narrower compared to their prior counterparts.
271+
# narrower compared to their prior counterparts. Moreover, whilst the RTO
272+
# covariance seems to be slightly under-estimated, this represents an appealing
273+
# alternative to the closed-form solution for large-scale problems under
274+
# Gaussian assumptions.

0 commit comments

Comments
 (0)