@@ -156,9 +156,9 @@ def plot_total(ax, mean_samples, var_samples=None, bootstrap=True, n_boots=100):
156
156
# Estimate the aggregate behavior using samples from each normal distribution in the posterior
157
157
samples = (
158
158
rng.normal(
159
- mean_samples.T[:, : , None],
160
- np.sqrt(var_samples).T[:, : , None],
161
- (*mean_samples.T.shape, n_boots),
159
+ mean_samples.values.T[... , None],
160
+ np.sqrt(var_samples.values ).T[... , None],
161
+ (*mean_samples.values. T.shape, n_boots),
162
162
)
163
163
.reshape(len(Xnew_), -1)
164
164
.T
@@ -269,7 +269,7 @@ This approach captured slightly more nuance in the overall uncertainty than the
269
269
270
270
Now let's model the mean and the log of the variance as separate GPs through PyMC's ` Latent ` implementation, feeding both into a ` Normal ` likelihood. Note that we add a small amount of diagonal noise to the individual covariances in order to stabilize them for inversion.
271
271
272
- The ` Latent ` parameterization takes signifiantly longer to sample than the ` Marginal ` approach , so we are going to accerelate the sampling with the Numpyro NUTS sampler.
272
+ The ` Latent ` parameterization takes signifiantly longer to sample than the ` Marginal ` model , so we are going to accerelate the sampling with the Numpyro NUTS sampler.
273
273
274
274
``` {code-cell} ipython3
275
275
with pm.Model() as model_ht:
@@ -295,7 +295,7 @@ with pm.Model() as model_ht:
295
295
trace_ht = pm.sample(
296
296
target_accept=0.95,
297
297
chains=2,
298
- nuts_sampler="nutpie ",
298
+ nuts_sampler="numpyro ",
299
299
return_inferencedata=True,
300
300
random_seed=SEED,
301
301
)
@@ -311,15 +311,6 @@ with model_ht:
311
311
)
312
312
```
313
313
314
- ``` {code-cell} ipython3
315
- _, axs = plt.subplots(1, 3, figsize=(18, 4))
316
- μ_samples = samples_ht["μ_pred_ht"]
317
- σ_samples = np.exp(samples_ht["lg_σ_pred_ht"])
318
- plot_mean(axs[0], μ_samples)
319
- plot_var(axs[1], σ_samples**2)
320
- plot_total(axs[2], μ_samples, σ_samples**2)
321
- ```
322
-
323
314
``` {code-cell} ipython3
324
315
_, axs = plt.subplots(1, 3, figsize=(18, 4))
325
316
mu_samples = az.extract(trace_ht.predictions["mu_pred_ht"])["mu_pred_ht"]
@@ -330,7 +321,7 @@ plot_var(axs[1], sigma_samples.T**2)
330
321
plot_total(axs[2], mu_samples.T, sigma_samples.T**2)
331
322
```
332
323
333
- That looks much better! We've accurately captured the mean behavior of our system along with an understanding of the underlying trend in the variance, with appropriate uncertainty. Crucially, the aggregate behavior of the model integrates both epistemic * and* aleatoric uncertainty, and the ~ 5% of our observations fall outside the 2σ band are more or less evenly distributed across the domain. However, that took * over two hours * to sample only 4k NUTS iterations. Due to the expense of the requisite matrix inversions, GPs are notoriously inefficient for large data sets. Let's reformulate this model using a sparse approximation.
324
+ That looks much better! We've accurately captured the mean behavior of our system, as well as the underlying trend in the variance ( with appropriate uncertainty) . Crucially, the aggregate behavior of the model integrates both epistemic * and* aleatoric uncertainty, and the ~ 5% of our observations fall outside the 2σ band are more or less evenly distributed across the domain. However, even with the Numpyro sampler, this took nearly an hour on a Ryen 7040 laptop to sample only 4k NUTS iterations. Due to the expense of the requisite matrix inversions, GPs are notoriously inefficient for large data sets. Let's reformulate this model using a sparse approximation.
334
325
335
326
+++
336
327
@@ -347,25 +338,25 @@ class SparseLatent:
347
338
348
339
def prior(self, name, X, Xu):
349
340
Kuu = self.cov(Xu)
350
- self.L = pm.gp.util .cholesky(pm.gp.util.stabilize(Kuu))
341
+ self.L = pt.linalg .cholesky(pm.gp.util.stabilize(Kuu))
351
342
352
- self.v = pm.Normal(f"u_rotated_{name}", mu=0.0, sd =1.0, shape=len(Xu))
353
- self.u = pm.Deterministic(f"u_{name}", tt .dot(self.L, self.v))
343
+ self.v = pm.Normal(f"u_rotated_{name}", mu=0.0, sigma =1.0, shape=len(Xu))
344
+ self.u = pm.Deterministic(f"u_{name}", pt .dot(self.L, self.v))
354
345
355
346
Kfu = self.cov(X, Xu)
356
- self.Kuiu = tt .slinalg.solve_upper_triangular (
357
- self.L.T, tt .slinalg.solve_lower_triangular (self.L, self.u)
347
+ self.Kuiu = pt .slinalg.solve_triangular (
348
+ self.L.T, pt .slinalg.solve_triangular (self.L, self.u, lower=True), lower=False
358
349
)
359
- self.mu = pm.Deterministic(f"mu_{name}", tt .dot(Kfu, self.Kuiu))
350
+ self.mu = pm.Deterministic(f"mu_{name}", pt .dot(Kfu, self.Kuiu))
360
351
return self.mu
361
352
362
353
def conditional(self, name, Xnew, Xu):
363
354
Ksu = self.cov(Xnew, Xu)
364
- mus = tt .dot(Ksu, self.Kuiu)
365
- tmp = tt .slinalg.solve_lower_triangular (self.L, Ksu.T)
366
- Qss = tt .dot(tmp.T, tmp) # Qss = tt.dot(tt.dot(Ksu, tt.nlinalg.pinv(Kuu)), Ksu.T )
355
+ mus = pt .dot(Ksu, self.Kuiu)
356
+ tmp = pt .slinalg.solve_triangular (self.L, Ksu.T, lower=True )
357
+ Qss = pt .dot(tmp.T, tmp)
367
358
Kss = self.cov(Xnew)
368
- Lss = pm.gp.util .cholesky(pm.gp.util.stabilize(Kss - Qss))
359
+ Lss = pt.linalg .cholesky(pm.gp.util.stabilize(Kss - Qss))
369
360
mu_pred = pm.MvNormal(name, mu=mus, chol=Lss, shape=len(Xnew))
370
361
return mu_pred
371
362
```
@@ -375,39 +366,51 @@ class SparseLatent:
375
366
Xu = X[1::2]
376
367
377
368
with pm.Model() as model_hts:
378
- ℓ = pm.InverseGamma("ℓ ", mu=ℓ_μ , sigma=ℓ_σ )
379
- η = pm.Gamma("η ", alpha=2, beta=1)
380
- cov = η **2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ℓ )
369
+ ell = pm.InverseGamma("ell ", mu=ell_mu , sigma=ell_sigma )
370
+ eta = pm.Gamma("eta ", alpha=2, beta=1)
371
+ cov = eta **2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ell )
381
372
382
- μ_gp = SparseLatent(cov)
383
- μ_f = μ_gp .prior("μ ", X_obs, Xu)
373
+ mu_gp = SparseLatent(cov)
374
+ mu_f = mu_gp .prior("mu ", X_obs, Xu)
384
375
385
- σ_ℓ = pm.InverseGamma("σ_ℓ ", mu=ℓ_μ , sigma=ℓ_σ )
386
- σ_η = pm.Gamma("σ_η ", alpha=2, beta=1)
387
- σ_cov = σ_η **2 * pm.gp.cov.ExpQuad(input_dim=1, ls=σ_ℓ )
376
+ sigma_ell = pm.InverseGamma("sigma_ell ", mu=ell_mu , sigma=ell_sigma )
377
+ sigma_η = pm.Gamma("sigma_η ", alpha=2, beta=1)
378
+ sigma_cov = sigma_η **2 * pm.gp.cov.ExpQuad(input_dim=1, ls=sigma_ell )
388
379
389
- lg_σ_gp = SparseLatent(σ_cov )
390
- lg_σ_f = lg_σ_gp .prior("lg_σ_f ", X_obs, Xu)
391
- σ_f = pm.Deterministic("σ_f ", pm.math.exp(lg_σ_f ))
380
+ lg_sigma_gp = SparseLatent(sigma_cov )
381
+ lg_sigma_f = lg_sigma_gp .prior("lg_sigma_f ", X_obs, Xu)
382
+ sigma_f = pm.Deterministic("sigma_f ", pm.math.exp(lg_sigma_f ))
392
383
393
- lik_hts = pm.Normal("lik_hts", mu=μ_f, sd=σ_f, observed=y_obs_)
394
- trace_hts = pm.sample(target_accept=0.95, return_inferencedata=True, random_seed=SEED)
384
+ lik_hts = pm.Normal("lik_hts", mu=mu_f, sigma=sigma_f, observed=y_obs_)
385
+ trace_hts = pm.sample(
386
+ target_accept=0.95,
387
+ nuts_sampler="numpyro",
388
+ chains=2,
389
+ return_inferencedata=True,
390
+ random_seed=SEED,
391
+ )
395
392
396
393
with model_hts:
397
- μ_pred = μ_gp.conditional("μ_pred", Xnew, Xu)
398
- lg_σ_pred = lg_σ_gp.conditional("lg_σ_pred", Xnew, Xu)
399
- samples_hts = pm.sample_posterior_predictive(trace_hts, var_names=["μ_pred", "lg_σ_pred"])
394
+ mu_pred = mu_gp.conditional("mu_pred", Xnew, Xu)
395
+ lg_sigma_pred = lg_sigma_gp.conditional("lg_sigma_pred", Xnew, Xu)
396
+ pm.sample_posterior_predictive(
397
+ trace_hts,
398
+ var_names=["mu_pred", "lg_sigma_pred"],
399
+ extend_inferencedata=True,
400
+ predictions=True,
401
+ )
400
402
```
401
403
402
404
``` {code-cell} ipython3
403
405
_, axs = plt.subplots(1, 3, figsize=(18, 4))
404
- μ_samples = samples_hts["μ_pred"]
405
- σ_samples = np.exp(samples_hts["lg_σ_pred"])
406
- plot_mean(axs[0], μ_samples)
406
+ mu_samples = az.extract(trace_hts.predictions["mu_pred"])["mu_pred"]
407
+ sigma_samples = np.exp(az.extract(trace_hts.predictions["lg_sigma_pred"])["lg_sigma_pred"])
408
+
409
+ plot_mean(axs[0], mu_samples.T)
407
410
plot_inducing_points(axs[0])
408
- plot_var(axs[1], σ_samples **2)
411
+ plot_var(axs[1], sigma_samples.T **2)
409
412
plot_inducing_points(axs[1])
410
- plot_total(axs[2], μ_samples, σ_samples **2)
413
+ plot_total(axs[2], mu_samples.T, sigma_samples.T **2)
411
414
plot_inducing_points(axs[2])
412
415
```
413
416
@@ -429,31 +432,60 @@ def add_coreg_idx(x):
429
432
Xu_c, X_obs_c, Xnew_c = [add_coreg_idx(x) for x in [Xu, X_obs, Xnew]]
430
433
431
434
with pm.Model() as model_htsc:
432
- ℓ = pm.InverseGamma("ℓ ", mu=ℓ_μ , sigma=ℓ_σ )
433
- η = pm.Gamma("η ", alpha=2, beta=1)
434
- EQcov = η **2 * pm.gp.cov.ExpQuad(input_dim=1, active_dims=[0], ls=ℓ )
435
+ ell = pm.InverseGamma("ell ", mu=ell_mu , sigma=ell_sigma )
436
+ eta = pm.Gamma("eta ", alpha=2, beta=1)
437
+ cov = eta **2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ell )
435
438
436
439
D_out = 2 # two output dimensions, mean and variance
437
440
rank = 2 # two basis GPs
438
- W = pm.Normal("W", mu=0, sd =3, shape=(D_out, rank), testval =np.full([D_out, rank], 0.1))
441
+ W = pm.Normal("W", mu=0, sigma =3, shape=(D_out, rank), initval =np.full([D_out, rank], 0.1))
439
442
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=D_out)
440
443
coreg = pm.gp.cov.Coregion(input_dim=1, active_dims=[0], kappa=kappa, W=W)
441
444
442
- cov = pm.gp.cov.Kron([EQcov , coreg])
445
+ cov = pm.gp.cov.Kron([cov , coreg])
443
446
444
447
gp_LMC = SparseLatent(cov)
445
448
LMC_f = gp_LMC.prior("LMC", X_obs_c, Xu_c)
446
449
447
- μ_f = LMC_f[: len(y_obs_)]
448
- lg_σ_f = LMC_f[len(y_obs_) :]
449
- σ_f = pm.Deterministic("σ_f ", pm.math.exp(lg_σ_f ))
450
+ mu_f = LMC_f[: len(y_obs_)]
451
+ lg_sigma_f = LMC_f[len(y_obs_) :]
452
+ sigma_f = pm.Deterministic("sigma_f ", pm.math.exp(lg_sigma_f ))
450
453
451
- lik_htsc = pm.Normal("lik_htsc", mu=μ_f, sd=σ_f, observed=y_obs_)
452
- trace_htsc = pm.sample(target_accept=0.95, return_inferencedata=True, random_seed=SEED)
454
+ lik_htsc = pm.Normal("lik_htsc", mu=mu_f, sigma=sigma_f, observed=y_obs_)
455
+ trace_htsc = pm.sample(
456
+ target_accept=0.95,
457
+ chains=2,
458
+ nuts_sampler="numpyro",
459
+ return_inferencedata=True,
460
+ random_seed=SEED,
461
+ )
453
462
454
463
with model_htsc:
455
464
c_mu_pred = gp_LMC.conditional("c_mu_pred", Xnew_c, Xu_c)
456
- samples_htsc = pm.sample_posterior_predictive(trace_htsc, var_names=["c_mu_pred"])
465
+ pm.sample_posterior_predictive(
466
+ trace_htsc, var_names=["c_mu_pred"], extend_inferencedata=True, predictions=True
467
+ )
468
+ ```
469
+
470
+ ``` {code-cell} ipython3
471
+ sigma_samples.shape
472
+ ```
473
+
474
+ ``` {code-cell} ipython3
475
+ # μ_samples = samples_htsc["c_mu_pred"][:, : len(Xnew)]
476
+ # σ_samples = np.exp(samples_htsc["c_mu_pred"][:, len(Xnew) :])
477
+ mu_samples = az.extract(trace_htsc.predictions["c_mu_pred"])["c_mu_pred"][: len(Xnew)]
478
+ sigma_samples = np.exp(az.extract(trace_htsc.predictions["c_mu_pred"])["c_mu_pred"])[len(Xnew) :]
479
+
480
+ _, axs = plt.subplots(1, 3, figsize=(18, 4))
481
+ plot_mean(axs[0], mu_samples.T)
482
+ plot_inducing_points(axs[0])
483
+ plot_var(axs[1], sigma_samples.T**2)
484
+ axs[1].set_ylim(-0.01, 0.2)
485
+ axs[1].legend(loc="upper left")
486
+ plot_inducing_points(axs[1])
487
+ plot_total(axs[2], mu_samples.T, sigma_samples.T**2)
488
+ plot_inducing_points(axs[2])
457
489
```
458
490
459
491
``` {code-cell} ipython3
@@ -478,13 +510,17 @@ with model_htsc:
478
510
B_samples = pm.sample_posterior_predictive(trace_htsc, var_names=["W", "kappa"])
479
511
```
480
512
513
+ ``` {code-cell} ipython3
514
+ kappa.shape
515
+ ```
516
+
481
517
``` {code-cell} ipython3
482
518
# Keep in mind that the first dimension in all arrays is the sampling dimension
483
- W = B_samples["W"]
519
+ W = az.extract( B_samples.posterior_predictive ["W"])["W"].values.T
484
520
W_T = np.swapaxes(W, 1, 2)
485
521
WW_T = np.matmul(W, W_T)
486
522
487
- kappa = B_samples["kappa"]
523
+ kappa = az.extract( B_samples.posterior_predictive ["kappa"])["kappa"].values.T
488
524
I = np.tile(np.identity(2), [kappa.shape[0], 1, 1])
489
525
# einsum is just a concise way of doing multiplication and summation over arbitrary axes
490
526
diag_kappa = np.einsum("ij,ijk->ijk", kappa, I)
0 commit comments