pymc-devs · juanitorduz · Aug 5, 2024 · Aug 4, 2024 · Aug 5, 2024 · Aug 5, 2024
diff --git a/examples/howto/updating_priors.ipynb b/examples/howto/updating_priors.ipynb
diff --git a/examples/howto/updating_priors.myst.md b/examples/howto/updating_priors.myst.md
@@ -10,56 +10,61 @@ kernelspec:
   name: python3
 ---
 
-# Updating priors
+(updating-priors)=
+# Updating Priors
+
+:::{post} January, 2017
+:tags: priors
+:category: intermediate, how-to
+:::
 
 +++
 
-In this notebook, I will show how it is possible to update the priors as new data becomes available. The example is a slightly modified version of the linear regression in the [Getting started with PyMC3](https://github.com/pymc-devs/pymc3/blob/master/docs/source/notebooks/getting_started.ipynb) notebook.
+In this notebook, we will show how it is possible to update the priors as new data becomes available.
 
 ```{code-cell} ipython3
-%matplotlib inline
-import warnings
-
 import arviz as az
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
-import pymc3 as pm
-import theano.tensor as tt
+import pymc as pm
+import pytensor.tensor as pt
 
-from pymc3 import Model, Normal, Slice, sample
-from pymc3.distributions import Interpolated
 from scipy import stats
-from theano import as_op
+from tqdm.notebook import trange
+
+az.style.use("arviz-darkgrid")
+plt.rcParams["figure.figsize"] = [12, 7]
+plt.rcParams["figure.dpi"] = 100
+plt.rcParams["figure.facecolor"] = "white"
 
-plt.style.use("seaborn-darkgrid")
-print(f"Running on PyMC3 v{pm.__version__}")
+%load_ext autoreload
+%autoreload 2
+%config InlineBackend.figure_format = "retina"
 ```
 
 ```{code-cell} ipython3
-warnings.filterwarnings("ignore")
+rng: np.random.Generator = np.random.default_rng(seed=42)
 ```
 
 ## Generating data
 
 ```{code-cell} ipython3
-# Initialize random number generator
-np.random.seed(93457)
-
 # True parameter values
 alpha_true = 5
 beta0_true = 7
 beta1_true = 13
+sigma_true = 2
 
 # Size of dataset
 size = 100
 
 # Predictor variable
-X1 = np.random.randn(size)
-X2 = np.random.randn(size) * 0.2
+X1 = rng.normal(size=size)
+X2 = rng.normal(size=size) * 0.2
 
 # Simulate outcome variable
-Y = alpha_true + beta0_true * X1 + beta1_true * X2 + np.random.randn(size)
+Y = alpha_true + beta0_true * X1 + beta1_true * X2 + rng.normal(size=size, scale=sigma_true)
 ```
 
 ## Model specification
@@ -69,35 +74,48 @@ Y = alpha_true + beta0_true * X1 + beta1_true * X2 + np.random.randn(size)
 Our initial beliefs about the parameters are quite informative (sigma=1) and a bit off the true values.
 
 ```{code-cell} ipython3
-basic_model = Model()
-
-with basic_model:
+with pm.Model() as model:
     # Priors for unknown model parameters
-    alpha = Normal("alpha", mu=0, sigma=1)
-    beta0 = Normal("beta0", mu=12, sigma=1)
-    beta1 = Normal("beta1", mu=18, sigma=1)
+    alpha = pm.Normal("alpha", mu=0, sigma=5)
+    beta0 = pm.Normal("beta0", mu=0, sigma=5)
+    beta1 = pm.Normal("beta1", mu=0, sigma=5)
+
+    sigma = pm.HalfNormal("sigma", sigma=1)
 
     # Expected value of outcome
     mu = alpha + beta0 * X1 + beta1 * X2
 
     # Likelihood (sampling distribution) of observations
-    Y_obs = Normal("Y_obs", mu=mu, sigma=1, observed=Y)
+    Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=Y)
 
-    # draw 1000 posterior samples
-    trace = sample(1000)
+    # draw 2_000 posterior samples
+    trace = pm.sample(
+        tune=1_500, draws=2_000, target_accept=0.9, progressbar=False, random_seed=rng
+    )
 ```
 
 ```{code-cell} ipython3
-az.plot_trace(trace);
+axes = az.plot_trace(
+    data=trace,
+    compact=True,
+    lines=[
+        ("alpha", {}, alpha_true),
+        ("beta0", {}, beta0_true),
+        ("beta1", {}, beta1_true),
+        ("sigma", {}, sigma_true),
+    ],
+    backend_kwargs={"figsize": (12, 9), "layout": "constrained"},
+)
+plt.gcf().suptitle("Trace", fontsize=16);
 ```
 
 In order to update our beliefs about the parameters, we use the posterior distributions, which will be used as the prior distributions for the next inference. The data used for each inference iteration has to be independent from the previous iterations, otherwise the same (possibly wrong) belief is injected over and over in the system, amplifying the errors and misleading the inference. By ensuring the data is independent, the system should converge to the true parameter values.
 
-Because we draw samples from the posterior distribution (shown on the right in the figure above), we need to estimate their probability density (shown on the left in the figure above). [Kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) (KDE) is a way to achieve this, and we will use this technique here. In any case, it is an empirical distribution that cannot be expressed analytically. Fortunately PyMC3 provides a way to use custom distributions, via `Interpolated` class.
+Because we draw samples from the posterior distribution (shown on the right in the figure above), we need to estimate their probability density (shown on the left in the figure above). [Kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) (KDE) is a way to achieve this, and we will use this technique here. In any case, it is an empirical distribution that cannot be expressed analytically. Fortunately PyMC provides a way to use custom distributions, via {class}`~pymc.Interpolated` class.
 
 ```{code-cell} ipython3
 def from_posterior(param, samples):
-    smin, smax = np.min(samples), np.max(samples)
+    smin, smax = samples.min().item(), samples.max().item()
     width = smax - smin
     x = np.linspace(smin, smax, 100)
     y = stats.gaussian_kde(samples)(x)
@@ -106,7 +124,7 @@ def from_posterior(param, samples):
     # so we'll extend the domain and use linear approximation of density on it
     x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
     y = np.concatenate([[0], y, [0]])
-    return Interpolated(param, x, y)
+    return pm.Interpolated(param, x, y)
 ```
 
 Now we just need to generate more data and build our Bayesian model so that the prior distributions for the current iteration are the posterior distributions from the previous iteration. It is still possible to continue using NUTS sampling method because `Interpolated` class implements calculation of gradients that are necessary for Hamiltonian Monte Carlo samplers.
@@ -116,46 +134,50 @@ traces = [trace]
 ```
 
 ```{code-cell} ipython3
-for _ in range(10):
+n_iterations = 10
+
+for _ in trange(n_iterations):
     # generate more data
-    X1 = np.random.randn(size)
-    X2 = np.random.randn(size) * 0.2
-    Y = alpha_true + beta0_true * X1 + beta1_true * X2 + np.random.randn(size)
+    X1 = rng.normal(size=size)
+    X2 = rng.normal(size=size) * 0.2
+    Y = alpha_true + beta0_true * X1 + beta1_true * X2 + rng.normal(size=size, scale=sigma_true)
 
-    model = Model()
-    with model:
+    with pm.Model() as model:
         # Priors are posteriors from previous iteration
-        alpha = from_posterior("alpha", trace["alpha"])
-        beta0 = from_posterior("beta0", trace["beta0"])
-        beta1 = from_posterior("beta1", trace["beta1"])
+        alpha = from_posterior("alpha", az.extract(trace, group="posterior", var_names=["alpha"]))
+        beta0 = from_posterior("beta0", az.extract(trace, group="posterior", var_names=["beta0"]))
+        beta1 = from_posterior("beta1", az.extract(trace, group="posterior", var_names=["beta1"]))
+        sigma = from_posterior("sigma", az.extract(trace, group="posterior", var_names=["sigma"]))
 
         # Expected value of outcome
         mu = alpha + beta0 * X1 + beta1 * X2
 
         # Likelihood (sampling distribution) of observations
-        Y_obs = Normal("Y_obs", mu=mu, sigma=1, observed=Y)
+        Y_obs = pm.Normal("Y_obs", mu=mu, sigma=sigma, observed=Y)
 
-        # draw 10000 posterior samples
-        trace = sample(1000)
+        # draw 2_000 posterior samples
+        trace = pm.sample(
+            tune=1_500, draws=2_000, target_accept=0.9, progressbar=False, random_seed=rng
+        )
         traces.append(trace)
 ```
 
 ```{code-cell} ipython3
-print("Posterior distributions after " + str(len(traces)) + " iterations.")
-cmap = mpl.cm.autumn
-for param in ["alpha", "beta0", "beta1"]:
-    plt.figure(figsize=(8, 2))
+fig, ax = plt.subplots(nrows=4, ncols=1, figsize=(12, 12), sharex=False, sharey=False)
+
+cmap = mpl.cm.viridis
+
+for i, (param, true_value) in enumerate(
+    zip(["alpha", "beta0", "beta1", "sigma"], [alpha_true, beta0_true, beta1_true, sigma_true])
+):
     for update_i, trace in enumerate(traces):
-        samples = trace[param]
+        samples = az.extract(trace, group="posterior", var_names=param)
         smin, smax = np.min(samples), np.max(samples)
         x = np.linspace(smin, smax, 100)
         y = stats.gaussian_kde(samples)(x)
-        plt.plot(x, y, color=cmap(1 - update_i / len(traces)))
-    plt.axvline({"alpha": alpha_true, "beta0": beta0_true, "beta1": beta1_true}[param], c="k")
-    plt.ylabel("Frequency")
-    plt.title(param)
-
-plt.tight_layout();
+        ax[i].plot(x, y, color=cmap(1 - update_i / len(traces)))
+        ax[i].axvline(true_value, c="k")
+        ax[i].set(title=param)
 ```
 
 You can re-execute the last two cells to generate more updates.
@@ -166,3 +188,6 @@ What is interesting to note is that the posterior distributions for our paramete
 %load_ext watermark
 %watermark -n -u -v -iv -w
 ```
+
+:::{include} ../page_footer.md
+:::