WIP: MarginalLaplaceRV

Michal-Novomestsky · Michal-Novomestsky · commit 263f6121a45e · 2025-07-16T18:57:56.000+10:00
diff --git a/pymc_extras/inference/__init__.py b/pymc_extras/inference/__init__.py
@@ -14,7 +14,8 @@
 
 from pymc_extras.inference.find_map import find_MAP
 from pymc_extras.inference.fit import fit
+from pymc_extras.inference.inla import fit_INLA
 from pymc_extras.inference.laplace import fit_laplace
 from pymc_extras.inference.pathfinder.pathfinder import fit_pathfinder
 
-__all__ = ["fit", "fit_pathfinder", "fit_laplace", "find_MAP"]
+__all__ = ["fit", "fit_pathfinder", "fit_laplace", "find_MAP", "fit_INLA"]
diff --git a/pymc_extras/inference/inla.py b/pymc_extras/inference/inla.py
@@ -92,7 +92,8 @@ def get_conditional_gaussian_approximation(
 
     # Currently x is passed both as the query point for f(x, args) = logp(x | y, params) AND as an initial guess for x0. This may cause issues if the query point is
     # far from the mode x0 or in a neighbourhood which results in poor convergence.
-    return x0, pm.MvNormal(f"{x.name}_laplace_approx", mu=x0, tau=tau)
+    _, logdetTau = pt.nlinalg.slogdet(tau)
+    return x0, 0.5 * logdetTau - 0.5 * x0.shape[0] * np.log(2 * np.pi)
 
 
 def get_log_marginal_likelihood(
@@ -107,14 +108,17 @@ def get_log_marginal_likelihood(
 ) -> TensorVariable:
     model = pm.modelcontext(model)
 
-    x0, laplace_approx = get_conditional_gaussian_approximation(
+    x0, log_laplace_approx = get_conditional_gaussian_approximation(
         x, Q, mu, model, method, use_jac, use_hess, optimizer_kwargs
     )
-    log_laplace_approx = pm.logp(laplace_approx, model.rvs_to_values[x])
+    # log_laplace_approx = pm.logp(laplace_approx, x)#model.rvs_to_values[x])
 
     _, logdetQ = pt.nlinalg.slogdet(Q)
+    # log_x_likelihood = (
+    #     -0.5 * (x - mu).T @ Q @ (x - mu) + 0.5 * logdetQ - 0.5 * x.shape[0] * np.log(2 * np.pi)
+    # )
     log_x_likelihood = (
-        -0.5 * (x - mu).T @ Q @ (x - mu) + 0.5 * logdetQ - 0.5 * x.shape[0] * np.log(2 * np.pi)
+        -0.5 * (x0 - mu).T @ Q @ (x0 - mu) + 0.5 * logdetQ - 0.5 * x0.shape[0] * np.log(2 * np.pi)
     )
 
     log_likelihood = (  # logp(y | params) =
@@ -123,7 +127,7 @@ def get_log_marginal_likelihood(
         - log_laplace_approx  # / logp(x | y, params)
     )
 
-    return log_likelihood
+    return x0, log_likelihood
 
 
 def fit_INLA(
@@ -139,23 +143,25 @@ def fit_INLA(
     model = pm.modelcontext(model)
 
     # logp(y | params)
-    log_likelihood = get_log_marginal_likelihood(
+    x0, log_likelihood = get_log_marginal_likelihood(
         x, Q, mu, model, method, use_jac, use_hess, optimizer_kwargs
     )
 
     # TODO How to obtain prior? It can parametrise Q, mu, y, etc. Not sure if we could extract from model.logp somehow. Otherwise simply specify as a user input
+    # Perhaps obtain as RVs which y depends on which aren't x?
     prior = None
     params = None
     log_prior = pm.logp(prior, model.rvs_to_values[params])
 
     # logp(params | y) = logp(y | params) + logp(params) + const
     log_posterior = log_likelihood + log_prior
+    log_posterior = pytensor.graph.replace.graph_replace(log_posterior, {x: x0})
 
     # TODO log_marginal_x_likelihood is almost the same as log_likelihood, but need to do some sampling?
     log_marginal_x_likelihood = None
     log_marginal_x_posterior = log_marginal_x_likelihood + log_prior
 
-    # TODO can we sample over log likelihoods?
+    # TODO can we sample over log likelihoods?w
     # Marginalize params
     idata_params = log_posterior.sample()  # TODO something like NUTS, QMC, etc.?
     idata_x = log_marginal_x_posterior.sample()
diff --git a/pymc_extras/model/marginal/distributions.py b/pymc_extras/model/marginal/distributions.py
@@ -132,6 +132,10 @@ class MarginalDiscreteMarkovChainRV(MarginalRV):
     """Base class for Marginalized Discrete Markov Chain RVs"""
 
 
+class MarginalLaplaceRV(MarginalRV):
+    """Base class for Marginalized Laplace-Approximated RVs"""
+
+
 def get_domain_of_finite_discrete_rv(rv: TensorVariable) -> tuple[int, ...]:
     op = rv.owner.op
     dist_params = rv.owner.op.dist_params(rv.owner)
@@ -371,3 +375,56 @@ def step_alpha(logp_emission, log_alpha, log_P):
     warn_non_separable_logp(values)
     dummy_logps = (DUMMY_ZERO,) * (len(values) - 1)
     return joint_logp, *dummy_logps
+
+
+@_logprob.register(MarginalLaplaceRV)
+def laplace_marginal_rv_logp(op: MarginalLaplaceRV, values, *inputs, **kwargs):
+    # Clone the inner RV graph of the Marginalized RV
+    x, *inner_rvs = inline_ofg_outputs(op, inputs)
+
+    # Obtain the joint_logp graph of the inner RV graph
+    inner_rv_values = dict(zip(inner_rvs, values))
+    marginalized_vv = x.clone()
+    rv_values = inner_rv_values | {x: marginalized_vv}
+    logps_dict = conditional_logp(rv_values=rv_values, **kwargs)
+
+    logp = pt.sum(
+        [pt.sum(logps_dict[k]) for k in logps_dict]
+    )  # TODO check this gives the proper p(y | x, params)
+
+    import pytensor
+
+    from pytensor.tensor.optimize import minimize
+
+    # Maximize log(p(x | y, params)) wrt x to find mode x0
+    x0, _ = minimize(
+        objective=-logp,
+        x=marginalized_vv,
+        method="BFGS",
+        # jac=use_jac,
+        # hess=use_hess,
+        optimizer_kwargs={"tol": 1e-8},
+    )
+
+    # require f''(x0) for Laplace approx
+    hess = pytensor.gradient.hessian(logp, marginalized_vv)
+    # hess = pytensor.graph.replace.graph_replace(hess, {marginalized_vv: x0})
+
+    # Could be made more efficient with adding diagonals only
+    rng = np.random.default_rng(12345)
+    d = 3
+    Q = np.diag(rng.random(d))
+    tau = Q - hess
+
+    # Currently x is passed both as the query point for f(x, args) = logp(x | y, params) AND as an initial guess for x0. This may cause issues if the query point is
+    # far from the mode x0 or in a neighbourhood which results in poor convergence.
+    _, logdetTau = pt.nlinalg.slogdet(tau)
+    log_laplace_approx = 0.5 * logdetTau - 0.5 * x0.shape[0] * np.log(2 * np.pi)
+
+    # Reduce logp dimensions corresponding to broadcasted variables
+    # marginalized_logp = logps_dict.pop(marginalized_vv)
+    joint_logp = logp - log_laplace_approx
+
+    joint_logp = pytensor.graph.replace.graph_replace(joint_logp, {marginalized_vv: x0})
+
+    return joint_logp  # TODO check if pm.sample adds on p(params). Otherwise this is p(y|params) not p(params|y)