stan-dev · SteveBronder · Sep 29, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
diff --git a/doxygen/doxygen.cfg b/doxygen/doxygen.cfg
@@ -2743,15 +2743,17 @@ MSCGEN_TOOL            =
 MSCFILE_DIRS           =
 
 ALIASES += laplace_options="\
-\param[in] theta_0 the initial guess for the Laplace approximation. \
-\param[in] tolerance controls the convergence criterion when finding the mode in the Laplace approximation. \
-\param[in] max_num_steps maximum number of steps before the Newton solver breaks and returns an error. \
-\param[in] hessian_block_size Block size of Hessian of log likelihood w.r.t latent Gaussian variable theta. \
-\param[in] solver Type of Newton solver. Each corresponds to a distinct choice of B matrix (i.e. application SWM formula): \
-1. computes square-root of negative Hessian. \
-2. computes square-root of covariance matrix. \
-3. computes no square-root and uses LU decomposition. \
-\param[in] max_steps_line_search Number of steps after which the algorithm gives up on doing a line search. If 0, no linesearch. \
+\param[in] ops Options for controlling Laplace approximation. The following options are available: \
+  - theta_0 the initial guess for the Laplace approximation. \
+  - tolerance controls the convergence criterion when finding the mode in the Laplace approximation. \
+  - max_num_steps maximum number of steps before the Newton solver breaks and returns an error. \
+  - hessian_block_size Block size of Hessian of log likelihood w.r.t latent Gaussian variable theta. \
+  - solver Type of Newton solver. Each corresponds to a distinct choice of B matrix (i.e. application SWM formula): \
+    1. computes square-root of negative Hessian. \
+    2. computes square-root of covariance matrix. \
+    3. computes no square-root and uses LU decomposition. \
+  - max_steps_line_search Number of steps after which the algorithm gives up on doing a line search. If 0, no linesearch. \
+  - allow_fallthrough If true, if solver 1 fails then solver 2 is tried, and if that fails solver 3 is tried. \
 "
 
 ALIASES += laplace_common_template_args="\
@@ -2761,6 +2763,7 @@ ALIASES += laplace_common_template_args="\
  should be a type inheriting from `Eigen::EigenBase` with dynamic sized \
  rows and columns. \
  \tparam CovarArgs A tuple of types to passed as the first arguments of `CovarFun::operator()`\
+ \tparam OpsTuple A tuple of laplace_options types \
 "
 
 ALIASES += laplace_common_args="\

diff --git a/stan/math/mix/functor/barzilai_borwein_step_size.hpp b/stan/math/mix/functor/barzilai_borwein_step_size.hpp
@@ -0,0 +1,124 @@
+#ifndef STAN_MATH_MIX_FUNCTOR_BARZILAI_BORWEIN_STEP_SIZE_HPP
+#define STAN_MATH_MIX_FUNCTOR_BARZILAI_BORWEIN_STEP_SIZE_HPP
+#include <stan/math/prim/fun/Eigen.hpp>
+#include <algorithm>
+#include <numeric>
+#include <cmath>
+
+namespace stan::math::internal {
+/**
+ * @brief  Curvature-aware Barzilai–Borwein (BB) step length with robust
+ * safeguards.
+ *
+ * Given successive parameter displacements \f$s = x_k - x_{k-1}\f$ and
+ * gradients \f$g_k\f$, \f$g_{k-1}\f$, this routine forms
+ * \f$y = g_k - g_{k-1}\f$ and computes the two classical BB candidates
+ *
+ * \f{align*}{
+ *   \alpha_{\text{BB1}} &= \frac{\langle s,s\rangle}{\langle s,y\rangle},\\
+ *   \alpha_{\text{BB2}} &= \frac{\langle s,y\rangle}{\langle y,y\rangle},
+ * \f}
+ *
+ * then chooses between them using the **spectral cosine**
+ * \f$r = \cos^2\!\angle(s,y) = \dfrac{\langle s,y\rangle^2}
+ *                                      {\langle s,s\rangle\,\langle
+ * y,y\rangle}\in[0,1]\f$:
+ *
+ *  - if \f$r > 0.9\f$ (well-aligned curvature) and the previous line search
+ *    did **≤ 1** backtrack, prefer the “long” step \f$\alpha_{\text{BB1}}\f$;
+ *  - if \f$0.1 \le r \le 0.9\f$, take the neutral geometric mean
+ *    \f$\sqrt{\alpha_{\text{BB1}}\alpha_{\text{BB2}}}\f$;
+ *  - otherwise default to the “short” step \f$\alpha_{\text{BB2}}\f$.
+ *
+ * All candidates are clamped into \f$[\text{min\_alpha},\,\text{max\_alpha}]\f$
+ * and must be finite and positive.
+ * If the curvature scalars are ill-posed (non-finite or too small),
+ * \f$\langle s,y\rangle \le \varepsilon\f$, or if `last_backtracks == 99`
+ * (explicitly disabling BB for this iteration), the function falls back to a
+ * **safe** step:
+ * use `prev_step` when finite and positive, otherwise \f$1.0\f$, then clamp to
+ * \f$[\text{min\_alpha},\,\text{max\_alpha}]\f$.
+ *
+ * @param s          Displacement between consecutive iterates
+ *                   (\f$s = x_k - x_{k-1}\f$).
+ * @param g_curr     Gradient at the current iterate \f$g_k\f$.
+ * @param g_prev     Gradient at the previous iterate \f$g_{k-1}\f$.
+ * @param prev_step  Previously accepted step length (used by the fallback).
+ * @param last_backtracks
+ *                   Number of backtracking contractions performed by the most
+ *                   recent line search; set to 99 to force the safe fallback.
+ * @param min_alpha  Lower bound for the returned step length.
+ * @param max_alpha  Upper bound for the returned step length.
+ *
+ * @return A finite, positive BB-style step length \f$\alpha \in
+ *         [\text{min\_alpha},\,\text{max\_alpha}]\f$ suitable for seeding a
+ *         line search or as a spectral preconditioner scale.
+ *
+ * @note Uses \f$\varepsilon=10^{-16}\f$ to guard against division by very
+ *       small curvature terms, and applies `std::abs` to BB ratios to avoid
+ *       negative steps; descent is enforced by the line search.
+ * @warning The vectors must have identical size. Non-finite inputs yield the
+ *          safe fallback.
+ */
+inline double barzilai_borwein_step_size(const Eigen::VectorXd& s,
+                                         const Eigen::VectorXd& g_curr,
+                                         const Eigen::VectorXd& g_prev,
+                                         double prev_step, int last_backtracks,
+                                         double min_alpha, double max_alpha) {
+  // Fallbacks
+  auto safe_fallback = [&]() -> double {
+    double a = std::clamp(
+        prev_step > 0.0 && std::isfinite(prev_step) ? prev_step : 1.0,
+        min_alpha, max_alpha);
+    return a;
+  };
+
+  const Eigen::VectorXd y = g_curr - g_prev;
+  const double sty = s.dot(y);
+  const double sts = s.squaredNorm();
+  const double yty = y.squaredNorm();
+
+  // Basic validity checks
+  constexpr double eps = 1e-16;
+  if (!(std::isfinite(sty) && std::isfinite(sts) && std::isfinite(yty))
+      || sts <= eps || yty <= eps || sty <= eps || last_backtracks == 99) {
+    return safe_fallback();
+  }
+
+  // BB candidates
+  double alpha_bb1 = std::clamp(std::abs(sts / sty), min_alpha, max_alpha);
+  double alpha_bb2 = std::clamp(std::abs(sty / yty), min_alpha, max_alpha);
+
+  // Safeguard candidates
+  if (!std::isfinite(alpha_bb1) || !std::isfinite(alpha_bb2) || alpha_bb1 <= 0.0
+      || alpha_bb2 <= 0.0) {
+    return safe_fallback();
+  }
+
+  // Spectral cosine r = cos^2(angle(s, y)) in [0,1]
+  const double r = (sty * sty) / (sts * yty);
+
+  // Heuristic thresholds (robust defaults)
+  constexpr double kLoose = 0.9;  // "nice" curvature
+  constexpr double kTight = 0.1;  // "dodgy" curvature
+
+  double alpha0 = alpha_bb2;  // default to short BB for robustness
+  if (r > kLoose && last_backtracks <= 1) {
+    // Spectrum looks friendly and line search was not harsh -> try long BB
+    alpha0 = alpha_bb1;
+  } else if (r >= kTight && r <= kLoose) {
+    // Neither clearly friendly nor clearly dodgy -> neutral middle
+    alpha0 = std::sqrt(alpha_bb1 * alpha_bb2);
+  }  // else keep alpha_bb2
+
+  // Clip to user bounds
+  alpha0 = std::clamp(alpha0, min_alpha, max_alpha);
+
+  if (!std::isfinite(alpha0) || alpha0 <= 0.0) {
+    return safe_fallback();
+  }
+  return alpha0;
+}
+
+}  // namespace stan::math::internal
+#endif
diff --git a/stan/math/mix/functor/conditional_copy_and_promote.hpp b/stan/math/mix/functor/conditional_copy_and_promote.hpp
@@ -0,0 +1,106 @@
+#ifndef STAN_MATH_MIX_FUNCTOR_CONDITIONAL_COPY_AND_PROMOTE_HPP
+#define STAN_MATH_MIX_FUNCTOR_CONDITIONAL_COPY_AND_PROMOTE_HPP
+
+#include <stan/math/mix/functor/hessian_block_diag.hpp>
+#include <stan/math/prim/functor.hpp>
+#include <stan/math/prim/fun.hpp>
+
+namespace stan::math::internal {
+
+/**
+ * Decide if object should be deep or shallow copied when
+ * using @ref conditional_copy_and_promote .
+ */
+enum class COPY_TYPE : uint8_t { SHALLOW = 0, DEEP = 1 };
+
+/**
+ * Conditional copy and promote a type's scalar type to a `PromotedType`.
+ * @tparam Filter type trait with a static constexpr bool member `value`
+ *  that is true if the type should be promoted. Otherwise, the type is
+ *  left unchanged.
+ * @tparam PromotedType type to promote the scalar to.
+ * @tparam CopyType type of copy to perform.
+ * @tparam Args variadic arguments.
+ * @param args variadic arguments to conditionally copy and promote.
+ * @return a tuple where each element is either a reference to the original
+ * argument or a promoted copy of the argument.
+ */
+template <template <typename...> class Filter,
+          typename PromotedType = stan::math::var,
+          COPY_TYPE CopyType = COPY_TYPE::DEEP, typename... Args>
+inline auto conditional_copy_and_promote(Args&&... args) {
+  return map_if<Filter>(
+      [](auto&& arg) {
+        if constexpr (is_tuple_v<decltype(arg)>) {
+          return stan::math::apply(
+              [](auto&&... inner_args) {
+                return make_holder_tuple(
+                    conditional_copy_and_promote<Filter, PromotedType,
+                                                 CopyType>(
+                        std::forward<decltype(inner_args)>(inner_args))...);
+              },
+              std::forward<decltype(arg)>(arg));
+        } else if constexpr (is_std_vector_v<decltype(arg)>) {
+          std::vector<decltype(conditional_copy_and_promote<
+                               Filter, PromotedType, CopyType>(arg[0]))>
+              ret;
+          for (std::size_t i = 0; i < arg.size(); ++i) {
+            ret.push_back(
+                conditional_copy_and_promote<Filter, PromotedType, CopyType>(
+                    arg[i]));
+          }
+          return ret;
+        } else {
+          if constexpr (CopyType == COPY_TYPE::DEEP) {
+            return stan::math::eval(promote_scalar<PromotedType>(
+                value_of_rec(std::forward<decltype(arg)>(arg))));
+          } else if (CopyType == COPY_TYPE::SHALLOW) {
+            if constexpr (std::is_same_v<PromotedType,
+                                         scalar_type_t<decltype(arg)>>) {
+              return std::forward<decltype(arg)>(arg);
+            } else {
+              return stan::math::eval(promote_scalar<PromotedType>(
+                  std::forward<decltype(arg)>(arg)));
+            }
+          }
+        }
+      },
+      std::forward<Args>(args)...);
+}
+
+/**
+ * Conditional deep copy types with a `var` scalar type to `PromotedType`.
+ * @tparam PromotedType type to promote the scalar to.
+ * @tparam Args variadic arguments.
+ * @param args variadic arguments to conditionally copy and promote.
+ * @return a tuple where each element is either a reference to the original
+ * argument or a promoted copy of the argument.
+ */
+template <typename PromotedType, typename... Args>
+inline auto deep_copy_vargs(Args&&... args) {
+  return conditional_copy_and_promote<is_any_var_scalar, PromotedType,
+                                      COPY_TYPE::DEEP>(
+      std::forward<Args>(args)...);
+}
+
+/**
+ * Conditional shallow copy types with a `var` scalar type to `PromotedType`.
+ * @note This function is useful whenever you are inside of nested autodiff
+ *  and want to allow the input arguments from an outer autodiff to be used
+ *  in an inner autodiff without making a hard copy of the input arguments.
+ * @tparam PromotedType type to promote the scalar to.
+ * @tparam Args variadic arguments.
+ * @param args variadic arguments to conditionally copy and promote.
+ * @return a tuple where each element is either a reference to the original
+ * argument or a promoted copy of the argument.
+ */
+template <typename PromotedType, typename... Args>
+inline auto shallow_copy_vargs(Args&&... args) {
+  return conditional_copy_and_promote<is_any_var_scalar, PromotedType,
+                                      COPY_TYPE::SHALLOW>(
+      std::forward<Args>(args)...);
+}
+
+}  // namespace stan::math::internal
+
+#endif
diff --git a/stan/math/mix/functor/laplace_base_rng.hpp b/stan/math/mix/functor/laplace_base_rng.hpp
@@ -38,12 +38,13 @@ inline Eigen::VectorXd laplace_base_rng(
     LLFunc&& ll_fun, LLArgs&& ll_args, CovarFun&& covariance_function,
     CovarArgs&& covar_args, const laplace_options<InitTheta>& options, RNG& rng,
     std::ostream* msgs) {
+  Eigen::MatrixXd covariance_train = stan::math::apply(
+      [msgs, &covariance_function](auto&&... args) {
+        return covariance_function(std::forward<decltype(args)>(args)..., msgs);
+      },
+      std::forward<CovarArgs>(covar_args));
   auto md_est = internal::laplace_marginal_density_est(
-      ll_fun, std::forward<LLArgs>(ll_args),
-      std::forward<CovarFun>(covariance_function),
-      to_ref(std::forward<CovarArgs>(covar_args)), options, msgs);
-  // Modified R&W method
-  auto&& covariance_train = md_est.covariance;
+      ll_fun, std::forward<LLArgs>(ll_args), covariance_train, options, msgs);
   Eigen::VectorXd mean_train = covariance_train * md_est.theta_grad;
   if (options.solver == 1 || options.solver == 2) {
     Eigen::MatrixXd V_dec