Skip to content

Commit 3401cc5

Browse files
Fix false convergence when LBFGSB encounters Inf/NaN at bounds (issue #1094)
The L-BFGS-B algorithm evaluates the function and gradient at bounds during its Cauchy step. When a function has a singularity at the boundary (e.g., log(0)), this produces Inf/NaN values that corrupt the optimization state. The Fortran code would then incorrectly report convergence. This fix: - Tracks whether Inf/NaN values are encountered in the objective or gradient - Detects false convergence: when Inf/NaN was encountered, optimizer reports Success, but the solution hasn't moved from the starting point - Returns Failure instead of Success in such cases with a helpful warning Closes #1094 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 25ff63f commit 3401cc5

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

lib/OptimizationLBFGSB/src/OptimizationLBFGSB.jl

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
197197
stats = stats, retcode = opt_ret)
198198
else
199199
iter_count = Ref(0)
200+
encountered_inf_nan = Ref(false)
201+
200202
_loss = function (θ)
201203
x = cache.f(θ, cache.p)
202204
iter_count[] += 1
205+
# Track if we encounter Inf/NaN values in the objective
206+
if !isfinite(x[1])
207+
encountered_inf_nan[] = true
208+
end
203209
opt_state = OptimizationBase.OptimizationState(
204210
u = θ, objective = x[1])
205211
if cache.callback(opt_state, x...)
@@ -208,6 +214,15 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
208214
return x[1]
209215
end
210216

217+
# Wrap gradient function to track Inf/NaN values
218+
_grad! = function (G, θ)
219+
cache.f.grad(G, θ)
220+
# Track if we encounter Inf/NaN values in the gradient
221+
if !all(isfinite, G)
222+
encountered_inf_nan[] = true
223+
end
224+
end
225+
211226
n = length(cache.u0)
212227

213228
if cache.lb === nothing
@@ -225,14 +240,24 @@ function SciMLBase.__solve(cache::OptimizationCache{O}) where {O <: LBFGSB}
225240
t0 = time()
226241

227242
res = optimizer(
228-
_loss, cache.f.grad, cache.u0, bounds; m = cache.opt.m, solver_kwargs...)
243+
_loss, _grad!, cache.u0, bounds; m = cache.opt.m, solver_kwargs...)
229244

230245
# Extract the task message from the result
231246
stop_reason = task_message_to_string(optimizer.task)
232247

233248
# Deduce the return code from the stop reason
234249
opt_ret = deduce_retcode(stop_reason)
235250

251+
# Detect false convergence due to Inf/NaN values
252+
# If we encountered Inf/NaN and the optimizer claims success but the solution
253+
# is essentially unchanged from the starting point, this is a false convergence
254+
if encountered_inf_nan[] && opt_ret == ReturnCode.Success
255+
if isapprox(res[2], cache.u0; rtol = 1e-8, atol = 1e-12)
256+
@warn "LBFGSB encountered Inf/NaN values during optimization (likely due to function singularity at bounds). The solution has not moved from the initial point. Consider using bounds that exclude singularities."
257+
opt_ret = ReturnCode.Failure
258+
end
259+
end
260+
236261
t1 = time()
237262

238263
stats = OptimizationBase.OptimizationStats(; iterations = optimizer.isave[30],

lib/OptimizationLBFGSB/test/runtests.jl

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,40 @@ using Test
5454
lb = [-10.0, -10.0, -10.0, -10.0, -10.0], ub = [10.0, 10.0, 10.0, 10.0, 10.0])
5555
opt1 = solve(prob, OptimizationLBFGSB.LBFGSB(), maxiters = 1000, callback = callback)
5656
@test opt1.objective < l0
57-
end
57+
58+
# Test for issue #1094: LBFGSB should return Failure when encountering Inf/NaN
59+
# at bounds (e.g., due to function singularity)
60+
@testset "Inf/NaN detection at bounds (issue #1094)" begin
61+
# Function with singularity at α = -1 (log(0) = -Inf)
62+
ne = [47.79, 54.64, 60.68, 65.85, 70.10]
63+
nt = [49.01, 56.09, 62.38, 67.80, 72.29]
64+
65+
function chi2_singular(alpha, p)
66+
n_th = (1 + alpha[1]) * nt
67+
total = 0.0
68+
for i in eachindex(ne)
69+
if ne[i] == 0.0
70+
total += 2 * n_th[i]
71+
else
72+
total += 2 * (n_th[i] - ne[i] + ne[i] * log(ne[i] / n_th[i]))
73+
end
74+
end
75+
return total
76+
end
77+
78+
# With bounds including singularity at -1, should fail
79+
optf_singular = OptimizationFunction(chi2_singular, OptimizationBase.AutoForwardDiff())
80+
prob_singular = OptimizationProblem(optf_singular, [0.0]; lb = [-1.0], ub = [1.0])
81+
res_singular = solve(prob_singular, OptimizationLBFGSB.LBFGSB())
82+
@test res_singular.retcode == ReturnCode.Failure
83+
84+
# With safe bounds (away from singularity), should succeed
85+
# The optimizer should find a minimum with a negative value of alpha
86+
prob_safe = OptimizationProblem(optf_singular, [0.0]; lb = [-0.9], ub = [1.0])
87+
res_safe = solve(prob_safe, OptimizationLBFGSB.LBFGSB())
88+
@test res_safe.retcode == ReturnCode.Success
89+
# The minimum should be negative (somewhere between -0.1 and 0)
90+
@test res_safe.u[1] < 0.0
91+
@test res_safe.u[1] > -0.5
92+
end
93+
end

0 commit comments

Comments
 (0)