pymc-devs
diff --git a/‎notebooks/DFM_Example_(Coincident_Index).ipynb‎
Lines changed: 2107 additions & 0 deletions b/‎notebooks/DFM_Example_(Coincident_Index).ipynb‎
Lines changed: 2107 additions & 0 deletions
diff --git a/‎notebooks/deterministic_advi_example.ipynb‎
Lines changed: 975 additions & 0 deletions b/‎notebooks/deterministic_advi_example.ipynb‎
Lines changed: 975 additions & 0 deletions
diff --git a/‎pymc_extras/inference/__init__.py‎
Lines changed: 9 additions & 1 deletion b/‎pymc_extras/inference/__init__.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎pymc_extras/inference/dadvi/__init__.py‎ b/‎pymc_extras/inference/dadvi/__init__.py‎
diff --git a/‎pymc_extras/inference/dadvi/dadvi.py‎
Lines changed: 261 additions & 0 deletions b/‎pymc_extras/inference/dadvi/dadvi.py‎
Lines changed: 261 additions & 0 deletions
diff --git a/‎pymc_extras/inference/fit.py‎
Lines changed: 5 additions & 0 deletions b/‎pymc_extras/inference/fit.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎pymc_extras/inference/laplace_approx/find_map.py‎
Lines changed: 16 additions & 8 deletions b/‎pymc_extras/inference/laplace_approx/find_map.py‎
Lines changed: 16 additions & 8 deletions
diff --git a/‎pymc_extras/inference/laplace_approx/idata.py‎
Lines changed: 5 additions & 2 deletions b/‎pymc_extras/inference/laplace_approx/idata.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎pymc_extras/inference/laplace_approx/laplace.py‎
Lines changed: 1 addition & 0 deletions b/‎pymc_extras/inference/laplace_approx/laplace.py‎
Lines changed: 1 addition & 0 deletions
@@ -12,10 +12,18 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+from pymc_extras.inference.dadvi.dadvi import fit_dadvi
 from pymc_extras.inference.fit import fit
 from pymc_extras.inference.INLA.inla import fit_INLA
 from pymc_extras.inference.laplace_approx.find_map import find_MAP
 from pymc_extras.inference.laplace_approx.laplace import fit_laplace
 from pymc_extras.inference.pathfinder.pathfinder import fit_pathfinder
 
-__all__ = ["fit", "fit_pathfinder", "fit_laplace", "find_MAP", "fit_INLA"]
+__all__ = [
+    "find_MAP",
+    "fit",
+    "fit_laplace",
+    "fit_pathfinder",
+    "fit_dadvi",
+    "fit_INLA"
+]
@@ -0,0 +1,261 @@
+import arviz as az
+import numpy as np
+import pymc
+import pytensor
+import pytensor.tensor as pt
+import xarray
+
+from better_optimize import minimize
+from better_optimize.constants import minimize_method
+from pymc import DictToArrayBijection, Model, join_nonshared_inputs
+from pymc.backends.arviz import (
+    PointFunc,
+    apply_function_over_dataset,
+    coords_and_dims_for_inferencedata,
+)
+from pymc.util import RandomSeed, get_default_varnames
+from pytensor.tensor.variable import TensorVariable
+
+from pymc_extras.inference.laplace_approx.laplace import unstack_laplace_draws
+from pymc_extras.inference.laplace_approx.scipy_interface import (
+    _compile_functions_for_scipy_optimize,
+)
+
+
+def fit_dadvi(
+    model: Model | None = None,
+    n_fixed_draws: int = 30,
+    random_seed: RandomSeed = None,
+    n_draws: int = 1000,
+    keep_untransformed: bool = False,
+    optimizer_method: minimize_method = "trust-ncg",
+    use_grad: bool = True,
+    use_hessp: bool = True,
+    use_hess: bool = False,
+    **minimize_kwargs,
+) -> az.InferenceData:
+    """
+    Does inference using deterministic ADVI (automatic differentiation
+    variational inference), DADVI for short.
+
+    For full details see the paper cited in the references:
+    https://www.jmlr.org/papers/v25/23-1015.html
+
+    Parameters
+    ----------
+    model : pm.Model
+        The PyMC model to be fit. If None, the current model context is used.
+
+    n_fixed_draws : int
+        The number of fixed draws to use for the optimisation. More
+        draws will result in more accurate estimates, but also
+        increase inference time. Usually, the default of 30 is a good
+        tradeoff.between speed and accuracy.
+
+    random_seed: int
+        The random seed to use for the fixed draws. Running the optimisation
+        twice with the same seed should arrive at the same result.
+
+    n_draws: int
+        The number of draws to return from the variational approximation.
+
+    keep_untransformed: bool
+        Whether or not to keep the unconstrained variables (such as
+        logs of positive-constrained parameters) in the output.
+
+    optimizer_method: str
+        Which optimization method to use. The function calls
+        ``scipy.optimize.minimize``, so any of the methods there can
+        be used. The default is trust-ncg, which uses second-order
+        information and is generally very reliable. Other methods such
+        as L-BFGS-B might be faster but potentially more brittle and
+        may not converge exactly to the optimum.
+
+    minimize_kwargs:
+        Additional keyword arguments to pass to the
+        ``scipy.optimize.minimize`` function. See the documentation of
+        that function for details.
+
+    use_grad:
+        If True, pass the gradient function to
+        `scipy.optimize.minimize` (where it is referred to as `jac`).
+
+    use_hessp:
+        If True, pass the hessian vector product to `scipy.optimize.minimize`.
+
+    use_hess:
+        If True, pass the hessian to `scipy.optimize.minimize`. Note that
+        this is generally not recommended since its computation can be slow
+        and memory-intensive if there are many parameters.
+
+    Returns
+    -------
+    :class:`~arviz.InferenceData`
+        The inference data containing the results of the DADVI algorithm.
+
+    References
+    ----------
+    Giordano, R., Ingram, M., & Broderick, T. (2024). Black Box
+    Variational Inference with a Deterministic Objective: Faster, More
+    Accurate, and Even More Black Box. Journal of Machine Learning
+    Research, 25(18), 1–39.
+    """
+
+    model = pymc.modelcontext(model) if model is None else model
+
+    initial_point_dict = model.initial_point()
+    n_params = DictToArrayBijection.map(initial_point_dict).data.shape[0]
+
+    var_params, objective = create_dadvi_graph(
+        model,
+        n_fixed_draws=n_fixed_draws,
+        random_seed=random_seed,
+        n_params=n_params,
+    )
+
+    f_fused, f_hessp = _compile_functions_for_scipy_optimize(
+        objective,
+        [var_params],
+        compute_grad=use_grad,
+        compute_hessp=use_hessp,
+        compute_hess=use_hess,
+    )
+
+    derivative_kwargs = {}
+
+    if use_grad:
+        derivative_kwargs["jac"] = True
+    if use_hessp:
+        derivative_kwargs["hessp"] = f_hessp
+    if use_hess:
+        derivative_kwargs["hess"] = True
+
+    result = minimize(
+        f_fused,
+        np.zeros(2 * n_params),
+        method=optimizer_method,
+        **derivative_kwargs,
+        **minimize_kwargs,
+    )
+
+    opt_var_params = result.x
+    opt_means, opt_log_sds = np.split(opt_var_params, 2)
+
+    # Make the draws:
+    generator = np.random.default_rng(seed=random_seed)
+    draws_raw = generator.standard_normal(size=(n_draws, n_params))
+
+    draws = opt_means + draws_raw * np.exp(opt_log_sds)
+    draws_arviz = unstack_laplace_draws(draws, model, chains=1, draws=n_draws)
+
+    transformed_draws = transform_draws(draws_arviz, model, keep_untransformed=keep_untransformed)
+
+    return transformed_draws
+
+
+def create_dadvi_graph(
+    model: Model,
+    n_params: int,
+    n_fixed_draws: int = 30,
+    random_seed: RandomSeed = None,
+) -> tuple[TensorVariable, TensorVariable]:
+    """
+    Sets up the DADVI graph in pytensor and returns it.
+
+    Parameters
+    ----------
+    model : pm.Model
+        The PyMC model to be fit.
+
+    n_params: int
+        The total number of parameters in the model.
+
+    n_fixed_draws : int
+        The number of fixed draws to use.
+
+    random_seed: int
+        The random seed to use for the fixed draws.
+
+    Returns
+    -------
+    Tuple[TensorVariable, TensorVariable]
+        A tuple whose first element contains the variational parameters,
+        and whose second contains the DADVI objective.
+    """
+
+    # Make the fixed draws
+    generator = np.random.default_rng(seed=random_seed)
+    draws = generator.standard_normal(size=(n_fixed_draws, n_params))
+
+    inputs = model.continuous_value_vars + model.discrete_value_vars
+    initial_point_dict = model.initial_point()
+    logp = model.logp()
+
+    # Graph in terms of a flat input
+    [logp], flat_input = join_nonshared_inputs(
+        point=initial_point_dict, outputs=[logp], inputs=inputs
+    )
+
+    var_params = pt.vector(name="eta", shape=(2 * n_params,))
+
+    means, log_sds = pt.split(var_params, axis=0, splits_size=[n_params, n_params], n_splits=2)
+
+    draw_matrix = pt.constant(draws)
+    samples = means + pt.exp(log_sds) * draw_matrix
+
+    logp_vectorized_draws = pytensor.graph.vectorize_graph(logp, replace={flat_input: samples})
+
+    mean_log_density = pt.mean(logp_vectorized_draws)
+    entropy = pt.sum(log_sds)
+
+    objective = -mean_log_density - entropy
+
+    return var_params, objective
+
+
+def transform_draws(
+    unstacked_draws: xarray.Dataset,
+    model: Model,
+    keep_untransformed: bool = False,
+):
+    """
+    Transforms the unconstrained draws back into the constrained space.
+
+    Parameters
+    ----------
+    unstacked_draws : xarray.Dataset
+        The draws to constrain back into the original space.
+
+    model : Model
+        The PyMC model the variables were derived from.
+
+    n_draws: int
+        The number of draws to return from the variational approximation.
+
+    keep_untransformed: bool
+        Whether or not to keep the unconstrained variables in the output.
+
+    Returns
+    -------
+    :class:`~arviz.InferenceData`
+        Draws from the original constrained parameters.
+    """
+
+    filtered_var_names = model.unobserved_value_vars
+    vars_to_sample = list(
+        get_default_varnames(filtered_var_names, include_transformed=keep_untransformed)
+    )
+    fn = pytensor.function(model.value_vars, vars_to_sample)
+    point_func = PointFunc(fn)
+
+    coords, dims = coords_and_dims_for_inferencedata(model)
+
+    transformed_result = apply_function_over_dataset(
+        point_func,
+        unstacked_draws,
+        output_var_names=[x.name for x in vars_to_sample],
+        coords=coords,
+        dims=dims,
+    )
+
+    return transformed_result
@@ -45,6 +45,11 @@ def fit(method: str, **kwargs) -> az.InferenceData:
         from pymc_extras.inference.INLA import fit_INLA
 
         return fit_INLA(**kwargs)
+      
+    elif method == "dadvi":
+        from pymc_extras.inference import fit_dadvi
+        
+        return fit_dadvi(**kwargs)
 
     else:
         raise ValueError(
 
@@ -198,6 +198,7 @@ def find_MAP(
     include_transformed: bool = True,
     gradient_backend: GradientBackend = "pytensor",
     compile_kwargs: dict | None = None,
+    compute_hessian: bool = False,
     **optimizer_kwargs,
 ) -> (
     dict[str, np.ndarray]
@@ -239,6 +240,10 @@ def find_MAP(
         Whether to include transformed variable values in the returned dictionary. Defaults to True.
     gradient_backend: str, default "pytensor"
         Which backend to use to compute gradients. Must be one of "pytensor" or "jax".
+    compute_hessian: bool
+        If True, the inverse Hessian matrix at the optimum will be computed and included in the returned
+        InferenceData object. This is needed for the Laplace approximation, but can be computationally expensive for
+        high-dimensional problems. Defaults to False.
     compile_kwargs: dict, optional
         Additional options to pass to the ``pytensor.function`` function when compiling loss functions.
     **optimizer_kwargs
@@ -316,14 +321,17 @@ def find_MAP(
             **optimizer_kwargs,
         )
 
-    H_inv = _compute_inverse_hessian(
-        optimizer_result=optimizer_result,
-        optimal_point=None,
-        f_fused=f_fused,
-        f_hessp=f_hessp,
-        use_hess=use_hess,
-        method=method,
-    )
+    if compute_hessian:
+        H_inv = _compute_inverse_hessian(
+            optimizer_result=optimizer_result,
+            optimal_point=None,
+            f_fused=f_fused,
+            f_hessp=f_hessp,
+            use_hess=use_hess,
+            method=method,
+        )
+    else:
+        H_inv = None
 
     raveled_optimized = RaveledVars(optimizer_result.x, initial_params.point_map_info)
     unobserved_vars = get_default_varnames(model.unobserved_value_vars, include_transformed=True)
 
@@ -136,7 +136,10 @@ def map_results_to_inference_data(
 
 
 def add_fit_to_inference_data(
-    idata: az.InferenceData, mu: RaveledVars, H_inv: np.ndarray, model: pm.Model | None = None
+    idata: az.InferenceData,
+    mu: RaveledVars,
+    H_inv: np.ndarray | None,
+    model: pm.Model | None = None,
 ) -> az.InferenceData:
     """
     Add the mean vector and covariance matrix of the Laplace approximation to an InferenceData object.
@@ -147,7 +150,7 @@ def add_fit_to_inference_data(
         An InferenceData object containing the approximated posterior samples.
     mu: RaveledVars
         The MAP estimate of the model parameters.
-    H_inv: np.ndarray
+    H_inv: np.ndarray, optional
         The inverse Hessian matrix of the log-posterior evaluated at the MAP estimate.
     model: Model, optional
         A PyMC model. If None, the model is taken from the current model context.
 
@@ -288,6 +288,7 @@ def fit_laplace(
         include_transformed=include_transformed,
         gradient_backend=gradient_backend,
         compile_kwargs=compile_kwargs,
+        compute_hessian=True,
         **optimizer_kwargs,
     )
Original file line number	Diff line number	Diff line change
`@@ -288,6 +288,7 @@ def fit_laplace(`
`288`	`288`	`include_transformed=include_transformed,`
`289`	`289`	`gradient_backend=gradient_backend,`
`290`	`290`	`compile_kwargs=compile_kwargs,`
	`291`	`+ compute_hessian=True,`
`291`	`292`	`**optimizer_kwargs,`
`292`	`293`	`)`
`293`	`294`