Add more backend tests to fit_MAP/laplace

jessegrabowski · jessegrabowski · commit b876114ad2e0 · 2025-02-05T21:14:42.000+08:00
diff --git a/pymc_extras/inference/find_map.py b/pymc_extras/inference/find_map.py
@@ -32,14 +32,26 @@ def set_optimizer_function_defaults(method, use_grad, use_hess, use_hessp):
 
     if use_hess and use_hessp:
         _log.warning(
-            'Both "use_hess" and "use_hessp" are set to True. scipy.optimize.minimize never uses both at the '
-            'same time. Setting "use_hess" to False.'
+            'Both "use_hess" and "use_hessp" are set to True, but scipy.optimize.minimize never uses both at the '
+            'same time. When possible "use_hessp" is preferred because its is computationally more efficient. '
+            'Setting "use_hess" to False.'
         )
         use_hess = False
 
     use_grad = use_grad if use_grad is not None else method_info["uses_grad"]
-    use_hess = use_hess if use_hess is not None else method_info["uses_hess"]
-    use_hessp = use_hessp if use_hessp is not None else method_info["uses_hessp"]
+
+    if use_hessp is not None and use_hess is None:
+        use_hess = not use_hessp
+
+    elif use_hess is not None and use_hessp is None:
+        use_hessp = not use_hess
+
+    elif use_hessp is None and use_hess is None:
+        use_hessp = method_info["uses_hessp"]
+        use_hess = method_info["uses_hess"]
+        if use_hessp and use_hess:
+            # If a method could use either hess or hessp, we default to using hessp
+            use_hess = False
 
     return use_grad, use_hess, use_hessp
 
@@ -63,7 +75,7 @@ def get_nearest_psd(A: np.ndarray) -> np.ndarray:
         The nearest positive semi-definite matrix to the input matrix.
     """
     C = (A + A.T) / 2
-    eigval, eigvec = np.linalg.eig(C)
+    eigval, eigvec = np.linalg.eigh(C)
     eigval[eigval < 0] = 0
 
     return eigvec @ np.diag(eigval) @ eigvec.T
diff --git a/tests/test_laplace.py b/tests/test_laplace.py
@@ -19,7 +19,7 @@
 
 import pymc_extras as pmx
 
-from pymc_extras.inference.find_map import find_MAP
+from pymc_extras.inference.find_map import GradientBackend, find_MAP
 from pymc_extras.inference.laplace import (
     fit_laplace,
     fit_mvn_at_MAP,
@@ -37,7 +37,11 @@ def rng():
     "ignore:hessian will stop negating the output in a future version of PyMC.\n"
     + "To suppress this warning set `negate_output=False`:FutureWarning",
 )
-def test_laplace():
+@pytest.mark.parametrize(
+    "mode, gradient_backend",
+    [(None, "pytensor"), ("NUMBA", "pytensor"), ("JAX", "jax"), ("JAX", "pytensor")],
+)
+def test_laplace(mode, gradient_backend: GradientBackend):
     # Example originates from Bayesian Data Analyses, 3rd Edition
     # By Andrew Gelman, John Carlin, Hal Stern, David Dunson,
     # Aki Vehtari, and Donald Rubin.
@@ -55,7 +59,13 @@ def test_laplace():
         vars = [mu, logsigma]
 
         idata = pmx.fit(
-            method="laplace", optimize_method="trust-ncg", draws=draws, random_seed=173300, chains=1
+            method="laplace",
+            optimize_method="trust-ncg",
+            draws=draws,
+            random_seed=173300,
+            chains=1,
+            compile_kwargs={"mode": mode},
+            gradient_backend=gradient_backend,
         )
 
     assert idata.posterior["mu"].shape == (1, draws)
@@ -71,7 +81,11 @@ def test_laplace():
     np.testing.assert_allclose(idata.fit["covariance_matrix"].values, bda_cov, atol=1e-4)
 
 
-def test_laplace_only_fit():
+@pytest.mark.parametrize(
+    "mode, gradient_backend",
+    [(None, "pytensor"), ("NUMBA", "pytensor"), ("JAX", "jax"), ("JAX", "pytensor")],
+)
+def test_laplace_only_fit(mode, gradient_backend: GradientBackend):
     # Example originates from Bayesian Data Analyses, 3rd Edition
     # By Andrew Gelman, John Carlin, Hal Stern, David Dunson,
     # Aki Vehtari, and Donald Rubin.
@@ -90,8 +104,8 @@ def test_laplace_only_fit():
             method="laplace",
             optimize_method="BFGS",
             progressbar=True,
-            gradient_backend="jax",
-            compile_kwargs={"mode": "JAX"},
+            gradient_backend=gradient_backend,
+            compile_kwargs={"mode": mode},
             optimizer_kwargs=dict(maxiter=100_000, gtol=1e-100),
             random_seed=173300,
         )
@@ -111,8 +125,11 @@ def test_laplace_only_fit():
     [True, False],
     ids=["transformed", "untransformed"],
 )
-@pytest.mark.parametrize("mode", ["JAX", None], ids=["jax", "pytensor"])
-def test_fit_laplace_coords(rng, transform_samples, mode):
+@pytest.mark.parametrize(
+    "mode, gradient_backend",
+    [(None, "pytensor"), ("NUMBA", "pytensor"), ("JAX", "jax"), ("JAX", "pytensor")],
+)
+def test_fit_laplace_coords(rng, transform_samples, mode, gradient_backend: GradientBackend):
     coords = {"city": ["A", "B", "C"], "obs_idx": np.arange(100)}
     with pm.Model(coords=coords) as model:
         mu = pm.Normal("mu", mu=3, sigma=0.5, dims=["city"])
@@ -131,7 +148,7 @@ def test_fit_laplace_coords(rng, transform_samples, mode):
             use_hessp=True,
             progressbar=False,
             compile_kwargs=dict(mode=mode),
-            gradient_backend="jax" if mode == "JAX" else "pytensor",
+            gradient_backend=gradient_backend,
         )
 
         for value in optimized_point.values():
@@ -163,7 +180,11 @@ def test_fit_laplace_coords(rng, transform_samples, mode):
     ]
 
 
-def test_fit_laplace_ragged_coords(rng):
+@pytest.mark.parametrize(
+    "mode, gradient_backend",
+    [(None, "pytensor"), ("NUMBA", "pytensor"), ("JAX", "jax"), ("JAX", "pytensor")],
+)
+def test_fit_laplace_ragged_coords(mode, gradient_backend: GradientBackend, rng):
     coords = {"city": ["A", "B", "C"], "feature": [0, 1], "obs_idx": np.arange(100)}
     with pm.Model(coords=coords) as ragged_dim_model:
         X = pm.Data("X", np.ones((100, 2)), dims=["obs_idx", "feature"])
@@ -188,8 +209,8 @@ def test_fit_laplace_ragged_coords(rng):
             progressbar=False,
             use_grad=True,
             use_hessp=True,
-            gradient_backend="jax",
-            compile_kwargs={"mode": "JAX"},
+            gradient_backend=gradient_backend,
+            compile_kwargs={"mode": mode},
         )
 
     assert idata["posterior"].beta.shape[-2:] == (3, 2)
@@ -206,7 +227,11 @@ def test_fit_laplace_ragged_coords(rng):
     [True, False],
     ids=["transformed", "untransformed"],
 )
-def test_fit_laplace(fit_in_unconstrained_space):
+@pytest.mark.parametrize(
+    "mode, gradient_backend",
+    [(None, "pytensor"), ("NUMBA", "pytensor"), ("JAX", "jax"), ("JAX", "pytensor")],
+)
+def test_fit_laplace(fit_in_unconstrained_space, mode, gradient_backend: GradientBackend):
     with pm.Model() as simp_model:
         mu = pm.Normal("mu", mu=3, sigma=0.5)
         sigma = pm.Exponential("sigma", 1)
@@ -223,6 +248,8 @@ def test_fit_laplace(fit_in_unconstrained_space):
             use_hessp=True,
             fit_in_unconstrained_space=fit_in_unconstrained_space,
             optimizer_kwargs=dict(maxiter=100_000, tol=1e-100),
+            compile_kwargs={"mode": mode},
+            gradient_backend=gradient_backend,
         )
 
         np.testing.assert_allclose(np.mean(idata.posterior.mu, axis=1), np.full((2,), 3), atol=0.1)