Updated LBFGS status handling and alpha_recover function

aphc14 · aphc14 · commit 32fa4139dc77 · 2025-04-20T03:12:53.000+10:00
- Corrected the condition for LOW_UPDATE_PCT in LBFGS status handling.
- Removed update_mask references in alpha_recover and inverse_hessian_factors
- Adjusted test cases to reflect changes in status messages and function signatures.
diff --git a/pymc_extras/inference/pathfinder/lbfgs.py b/pymc_extras/inference/pathfinder/lbfgs.py
@@ -218,7 +218,7 @@ def minimize(self, x0) -> tuple[NDArray, NDArray, int, LBFGSStatus]:
         elif result.status == 2:
             # precision loss resulting to inf or nan
             lbfgs_status = LBFGSStatus.NON_FINITE
-        elif history.count < low_update_threshold * result.nit:
+        elif history.count * low_update_threshold < result.nit:
             lbfgs_status = LBFGSStatus.LOW_UPDATE_PCT
         else:
             lbfgs_status = LBFGSStatus.CONVERGED
diff --git a/pymc_extras/inference/pathfinder/pathfinder.py b/pymc_extras/inference/pathfinder/pathfinder.py
@@ -262,7 +262,7 @@ def alpha_recover(
     shapes: L=batch_size, N=num_params
     """
 
-    def compute_alpha_l(alpha_lm1, s_l, z_l) -> TensorVariable:
+    def compute_alpha_l(s_l, z_l, alpha_lm1) -> TensorVariable:
         # alpha_lm1: (N,)
         # s_l: (N,)
         # z_l: (N,)
@@ -290,7 +290,7 @@ def compute_alpha_l(alpha_lm1, s_l, z_l) -> TensorVariable:
     )
 
     # assert np.all(alpha.eval() > 0), "alpha cannot be negative"
-    # alpha: (L, N), update_mask: (L, N)
+    # alpha: (L, N)
     return alpha, s, z
 
 
@@ -368,8 +368,8 @@ def get_chi_matrix_2(diff: TensorVariable, J: TensorConstant) -> TensorVariable:
     L, N = alpha.shape
 
     # changed to get_chi_matrix_2 after removing update_mask
-    S = get_chi_matrix_1(s, J)
-    Z = get_chi_matrix_1(z, J)
+    S = get_chi_matrix_2(s, J)
+    Z = get_chi_matrix_2(z, J)
 
     # E: (L, J, J)
     Ij = pt.eye(J)[None, ...]
diff --git a/tests/test_pathfinder.py b/tests/test_pathfinder.py
@@ -106,8 +106,8 @@ def test_unstable_lbfgs_update_mask(capsys, jitter):
             )
         out, err = capsys.readouterr()
         status_pattern = [
-            r"INIT_FAILED_LOW_UPDATE_MASK\s+\d+",
-            r"LOW_UPDATE_MASK_RATIO\s+\d+",
+            r"INIT_FAILED_LOW_UPDATE_PCT\s+\d+",
+            r"LOW_UPDATE_PCT\s+\d+",
             r"LBFGS_FAILED\s+\d+",
             r"SUCCESS\s+\d+",
         ]
@@ -126,8 +126,8 @@ def test_unstable_lbfgs_update_mask(capsys, jitter):
             out, err = capsys.readouterr()
 
             status_pattern = [
-                r"INIT_FAILED_LOW_UPDATE_MASK\s+2",
-                r"LOW_UPDATE_MASK_RATIO\s+2",
+                r"INIT_FAILED_LOW_UPDATE_PCT\s+2",
+                r"LOW_UPDATE_PCT\s+2",
                 r"LBFGS_FAILED\s+4",
             ]
             for pattern in status_pattern:
@@ -232,12 +232,11 @@ def test_bfgs_sample():
     # get factors
     x_full = pt.as_tensor(x_data, dtype="float64")
     g_full = pt.as_tensor(g_data, dtype="float64")
-    epsilon = 1e-11
 
     x = x_full[1:]
     g = g_full[1:]
-    alpha, S, Z, update_mask = alpha_recover(x_full, g_full, epsilon)
-    beta, gamma = inverse_hessian_factors(alpha, S, Z, update_mask, J)
+    alpha, s, z = alpha_recover(x_full, g_full)
+    beta, gamma = inverse_hessian_factors(alpha, s, z, J)
 
     # sample
     phi, logq = bfgs_sample(
@@ -252,8 +251,8 @@ def test_bfgs_sample():
     # check shapes
     assert beta.eval().shape == (L, N, 2 * J)
     assert gamma.eval().shape == (L, 2 * J, 2 * J)
-    assert phi.eval().shape == (L, num_samples, N)
-    assert logq.eval().shape == (L, num_samples)
+    assert all(phi.shape.eval() == (L, num_samples, N))
+    assert all(logq.shape.eval() == (L, num_samples))
 
 
 @pytest.mark.parametrize("importance_sampling", ["psis", "psir", "identity", None])