diff --git a/src/backtracking.jl b/src/backtracking.jl index 1eb98db..db36ff7 100644 --- a/src/backtracking.jl +++ b/src/backtracking.jl @@ -9,116 +9,120 @@ there exists a factor ρ = ρ(c₁) such that α' ≦ ρ α. This is a modification of the algorithm described in Nocedal Wright (2nd ed), Sec. 3.5. """ @with_kw struct BackTracking{TF, TI} <: AbstractLineSearch - c_1::TF = 1e-4 - ρ_hi::TF = 0.5 - ρ_lo::TF = 0.1 - iterations::TI = 1_000 - order::TI = 3 - maxstep::TF = Inf - cache::Union{Nothing,LineSearchCache{TF}} = nothing + c_1::TF = 1e-4 + ρ_hi::TF = 0.5 + ρ_lo::TF = 0.1 + iterations::TI = 1_000 + order::TI = 3 + maxstep::TF = Inf + cache::Union{Nothing, LineSearchCache{TF}} = nothing end -BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF,Int}(args...; kwargs...) +BackTracking{TF}(args...; kwargs...) where TF = BackTracking{TF, Int}(args...; kwargs...) function (ls::BackTracking)(df::AbstractObjective, x::AbstractArray{T}, s::AbstractArray{T}, - α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = convert(real(T), Inf)) where {T, Tα} - ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s) - - if ϕ_0 == nothing - ϕ_0 = ϕ(Tα(0)) - end - if dϕ_0 == nothing - dϕ_0 = dϕ(Tα(0)) - end - - α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf))) - ls(ϕ, α_0, ϕ_0, dϕ_0) + α_0::Tα = real(T)(1), x_new::AbstractArray{T} = similar(x), ϕ_0 = nothing, dϕ_0 = nothing, alphamax = typemax(real(T))) where {T, Tα} + ϕ, dϕ = make_ϕ_dϕ(df, x_new, x, s) + + if isnothing(ϕ_0) + ϕ_0 = ϕ(Tα(0)) + end + if isnothing(dϕ_0) + dϕ_0 = dϕ(Tα(0)) + end + + α_0 = min(α_0, min(alphamax, ls.maxstep / norm(s, Inf))) + ls(ϕ, α_0, ϕ_0, dϕ_0) end (ls::BackTracking)(ϕ, dϕ, ϕdϕ, αinitial, ϕ_0, dϕ_0) = ls(ϕ, αinitial, ϕ_0, dϕ_0) # TODO: Should we deprecate the interface that only uses the ϕ argument? function (ls::BackTracking)(ϕ, αinitial::Tα, ϕ_0, dϕ_0) where Tα - @unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls - emptycache!(cache) - pushcache!(cache, 0, ϕ_0, dϕ_0) # backtracking doesn't use the slope except here - - iterfinitemax = -log2(eps(real(Tα))) - - @assert order in (2,3) - # Check the input is valid, and modify otherwise - #backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor - #if c_1 >= backtrack_condition - # warn("""The Armijo constant c_1 is too large; replacing it with - # $(backtrack_condition)""") - # c_1 = backtrack_condition - #end - - # Count the total number of iterations - iteration = 0 - - ϕx_0, ϕx_1 = ϕ_0, ϕ_0 - - α_1, α_2 = αinitial, αinitial - - ϕx_1 = ϕ(α_1) - - # Hard-coded backtrack until we find a finite function value - iterfinite = 0 - while !isfinite(ϕx_1) && iterfinite < iterfinitemax - iterfinite += 1 - α_1 = α_2 - α_2 = α_1/2 - - ϕx_1 = ϕ(α_2) - end - pushcache!(cache, αinitial, ϕx_1) - # TODO: check if value is finite (maybe iterfinite > iterfinitemax) - - # Backtrack until we satisfy sufficient decrease condition - while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0 - # Increment the number of steps we've had to perform - iteration += 1 - - # Ensure termination - if iteration > iterations - throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).", - α_2)) - end - - # Shrink proposed step-size: - if order == 2 || iteration == 1 - # backtracking via quadratic interpolation: - # This interpolates the available data - # f(0), f'(0), f(α) - # with a quadractic which is then minimised; this comes with a - # guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1 - # provided that c_1 < 1/2; the backtrack_condition at the beginning - # of the function guarantees at least a backtracking factor ρ. - α_tmp = - (dϕ_0 * α_2^2) / ( 2 * (ϕx_1 - ϕ_0 - dϕ_0*α_2) ) - else - div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1)) - a = (α_1^2*(ϕx_1 - ϕ_0 - dϕ_0*α_2) - α_2^2*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div - b = (-α_1^3*(ϕx_1 - ϕ_0 - dϕ_0*α_2) + α_2^3*(ϕx_0 - ϕ_0 - dϕ_0*α_1))*div - - if isapprox(a, zero(a), atol=eps(real(Tα))) - α_tmp = dϕ_0 / (2*b) - else - # discriminant - d = max(b^2 - 3*a*dϕ_0, Tα(0)) - # quadratic equation root - α_tmp = (-b + sqrt(d)) / (3*a) - end - end - - α_1 = α_2 - - α_tmp = NaNMath.min(α_tmp, α_2*ρ_hi) # avoid too small reductions - α_2 = NaNMath.max(α_tmp, α_2*ρ_lo) # avoid too big reductions - - # Evaluate f(x) at proposed position - ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2) - pushcache!(cache, α_2, ϕx_1) - end - - return α_2, ϕx_1 + @unpack c_1, ρ_hi, ρ_lo, iterations, order, cache = ls + emptycache!(cache) + pushcache!(cache, 0, ϕ_0, dϕ_0) # backtracking doesn't use the slope except here + + iterfinitemax = -log2(eps(real(Tα))) + + @assert order in (2, 3) + # Check the input is valid, and modify otherwise + #backtrack_condition = 1.0 - 1.0/(2*ρ) # want guaranteed backtrack factor + #if c_1 >= backtrack_condition + # warn("""The Armijo constant c_1 is too large; replacing it with + # $(backtrack_condition)""") + # c_1 = backtrack_condition + #end + + # Count the total number of iterations + iteration = 0 + + ϕx_0, ϕx_1 = ϕ_0, ϕ_0 + + α_1, α_2 = αinitial, αinitial + + ϕx_1 = ϕ(α_1) + + # Hard-coded backtrack until we find a finite function value + iterfinite = 0 + while !isfinite(ϕx_1) && iterfinite < iterfinitemax + iterfinite += 1 + α_1 = α_2 + α_2 = α_1 / 2 + + ϕx_1 = ϕ(α_2) + end + pushcache!(cache, αinitial, ϕx_1) + + + if !isfinite(ϕx_1) + error("Could not find a α, where the function is finite") + end + + # Backtrack until we satisfy sufficient decrease condition + while ϕx_1 > ϕ_0 + c_1 * α_2 * dϕ_0 + # Increment the number of steps we've had to perform + iteration += 1 + + # Ensure termination + if iteration > iterations + throw(LineSearchException("Linesearch failed to converge, reached maximum iterations $(iterations).", + α_2)) + end + + # Shrink proposed step-size: + if order == 2 || iteration == 1 + # backtracking via quadratic interpolation: + # This interpolates the available data + # f(0), f'(0), f(α) + # with a quadractic which is then minimised; this comes with a + # guaranteed backtracking factor 0.5 * (1-c_1)^{-1} which is < 1 + # provided that c_1 < 1/2; the backtrack_condition at the beginning + # of the function guarantees at least a backtracking factor ρ. + α_tmp = -(dϕ_0 * α_2^2) / (2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2)) + else + div = one(Tα) / (α_1^2 * α_2^2 * (α_2 - α_1)) + a = (α_1^2 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) - α_2^2 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div + b = (-α_1^3 * (ϕx_1 - ϕ_0 - dϕ_0 * α_2) + α_2^3 * (ϕx_0 - ϕ_0 - dϕ_0 * α_1)) * div + + if isapprox(a, zero(a), atol = eps(real(Tα))) + α_tmp = dϕ_0 / (2 * b) + else + # discriminant + d = max(b^2 - 3 * a * dϕ_0, Tα(0)) + # quadratic equation root + α_tmp = (-b + sqrt(d)) / (3 * a) + end + end + + α_1 = α_2 + + α_tmp = NaNMath.min(α_tmp, α_2 * ρ_hi) # avoid too small reductions + α_2 = NaNMath.max(α_tmp, α_2 * ρ_lo) # avoid too big reductions + + # Evaluate f(x) at proposed position + ϕx_0, ϕx_1 = ϕx_1, ϕ(α_2) + pushcache!(cache, α_2, ϕx_1) + end + + return α_2, ϕx_1 end diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl index 9d1be56..31172ba 100644 --- a/src/hagerzhang.jl +++ b/src/hagerzhang.jl @@ -113,15 +113,14 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, linesearchmax, psi3, display, mayterminate, cache = ls emptycache!(cache) - zeroT = convert(T, 0) - pushcache!(cache, zeroT, phi_0, dphi_0) + pushcache!(cache, zero(T), phi_0, dphi_0) if !(isfinite(phi_0) && isfinite(dphi_0)) throw(LineSearchException("Value and slope at step length = 0 must be finite.", T(0))) end if dphi_0 >= eps(T) * abs(phi_0) throw(LineSearchException("Search direction is not a direction of descent.", T(0))) elseif dphi_0 >= 0 - return zeroT, phi_0 + return zero(T), phi_0 end # Prevent values of x_new = x+αs that are likely to make @@ -130,7 +129,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, if cache !== nothing @unpack alphas, values, slopes = cache else - alphas = [zeroT] # for bisection + alphas = [zero(T)] # for bisection values = [phi_0] slopes = [dphi_0] end @@ -141,7 +140,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, phi_lim = phi_0 + epsilon * abs(phi_0) @assert c >= 0 - c <= eps(T) && return zeroT, phi_0 + c <= eps(T) && return zero(T), phi_0 @assert isfinite(c) && c <= alphamax phi_c, dphi_c = ϕdϕ(c) iterfinite = 1 @@ -154,7 +153,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, if !(isfinite(phi_c) && isfinite(dphi_c)) @warn("Failed to achieve finite new evaluation point, using alpha=0") mayterminate[] = false # reset in case another initial guess is used next - return zeroT, phi_0 + return zero(T), phi_0 end push!(alphas, c) push!(values, phi_c) @@ -185,7 +184,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, ", phi_c = ", phi_c, ", dphi_c = ", dphi_c) end - if dphi_c >= zeroT + if dphi_c >= zero(T) # We've reached the upward slope, so we have b; examine # previous values to find a ib = length(alphas) @@ -201,7 +200,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ, # have crested over the peak. Use bisection. ib = length(alphas) ia = 1 - if c ≉ alphas[ib] || slopes[ib] >= zeroT + if c ≉ alphas[ib] || slopes[ib] >= zero(T) error("c = ", c) end # ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options @@ -360,8 +359,7 @@ function secant2!(ϕdϕ, dphi_a = slopes[ia] dphi_b = slopes[ib] T = eltype(slopes) - zeroT = convert(T, 0) - if !(dphi_a < zeroT && dphi_b >= zeroT) + if !(dphi_a < zero(T) && dphi_b >= zero(T)) error(string("Search direction is not a direction of descent; ", "this error may indicate that user-provided derivatives are inaccurate. ", @sprintf "(dphi_a = %f; dphi_b = %f)" dphi_a dphi_b)) @@ -445,11 +443,10 @@ function update!(ϕdϕ, a = alphas[ia] b = alphas[ib] T = eltype(slopes) - zeroT = convert(T, 0) # Debugging (HZ, eq. 4.4): - @assert slopes[ia] < zeroT + @assert slopes[ia] < zero(T) @assert values[ia] <= phi_lim - @assert slopes[ib] >= zeroT + @assert slopes[ib] >= zero(T) @assert b > a c = alphas[ic] phi_c = values[ic] @@ -466,7 +463,7 @@ function update!(ϕdϕ, if c < a || c > b return ia, ib #, 0, 0 # it's out of the bracketing interval end - if dphi_c >= zeroT + if dphi_c >= zero(T) return ia, ic #, 0, 0 # replace b with a closer point end # We know dphi_c < 0. However, phi may not be monotonic between a @@ -495,10 +492,9 @@ function bisect!(ϕdϕ, a = alphas[ia] b = alphas[ib] # Debugging (HZ, conditions shown following U3) - zeroT = convert(T, 0) - @assert slopes[ia] < zeroT + @assert slopes[ia] < zero(T) @assert values[ia] <= phi_lim - @assert slopes[ib] < zeroT # otherwise we wouldn't be here + @assert slopes[ib] < zero(T) # otherwise we wouldn't be here @assert values[ib] > phi_lim @assert b > a while b - a > eps(b) @@ -514,7 +510,7 @@ function bisect!(ϕdϕ, push!(slopes, gphi) id = length(alphas) - if gphi >= zeroT + if gphi >= zero(T) return ia, id # replace b, return end if phi_d <= phi_lim diff --git a/src/initialguess.jl b/src/initialguess.jl index 74b334b..8373a4e 100644 --- a/src/initialguess.jl +++ b/src/initialguess.jl @@ -274,14 +274,13 @@ function _hzI0(x::AbstractArray{Tx}, f_x::T, alphamax::T, psi0::T = convert(T, 1)/100) where {Tx,T} - zeroT = convert(T, 0) - alpha = convert(T, 1) + alpha = one(T) gr_max = maximum(abs, gr) - if gr_max != zeroT + if gr_max != zero(T) x_max = maximum(abs, x) - if x_max != zeroT + if x_max != zero(T) alpha = psi0 * x_max / gr_max - elseif f_x != zeroT + elseif f_x != zero(T) alpha = psi0 * abs(f_x) / norm(gr)^2 end end diff --git a/src/morethuente.jl b/src/morethuente.jl index 79576d9..fd7e68e 100644 --- a/src/morethuente.jl +++ b/src/morethuente.jl @@ -169,19 +169,18 @@ function (ls::MoreThuente)(ϕdϕ, info = 0 info_cstep = 1 # Info from step - zeroT = convert(T, 0) - pushcache!(cache, zeroT, ϕ_0, dϕ_0) + pushcache!(cache, zero(T), ϕ_0, dϕ_0) # # Check the input parameters for errors. # - if alpha <= zeroT || f_tol < zeroT || gtol < zeroT || - x_tol < zeroT || alphamin < zeroT || alphamax < alphamin || maxfev <= zeroT + if alpha <= zero(T) || f_tol < zero(T) || gtol < zero(T) || + x_tol < zero(T) || alphamin < zero(T) || alphamax < alphamin || maxfev <= zero(T) throw(LineSearchException("Invalid parameters to MoreThuente.", 0)) end - if dϕ_0 >= zeroT + if dϕ_0 >= zero(T) throw(LineSearchException("Search direction is not a direction of descent.", 0)) end @@ -209,10 +208,10 @@ function (ls::MoreThuente)(ϕdϕ, # function, and derivative at the current step. # - stx = zeroT + stx = zero(T) fx = finit dgx = dϕ_0 - sty = zeroT + sty = zero(T) fy = finit dgy = dϕ_0 @@ -465,7 +464,6 @@ function cstep(stx::Real, fx::Real, dgx::Real, bracketed::Bool, alphamin::Real, alphamax::Real) T = promote_type(typeof(stx), typeof(fx), typeof(dgx), typeof(sty), typeof(fy), typeof(dgy), typeof(alpha), typeof(f), typeof(dg), typeof(alphamin), typeof(alphamax)) - zeroT = convert(T, 0) info = 0 # @@ -473,7 +471,7 @@ function cstep(stx::Real, fx::Real, dgx::Real, # if (bracketed && (alpha <= min(stx, sty) || alpha >= max(stx, sty))) || - dgx * (alpha - stx) >= zeroT || alphamax < alphamin + dgx * (alpha - stx) >= zero(T) || alphamax < alphamin throw(ArgumentError("Minimizer not bracketed")) end @@ -519,7 +517,7 @@ function cstep(stx::Real, fx::Real, dgx::Real, # the cubic step is taken, else the quadratic step is taken # -elseif sgnd < zeroT +elseif sgnd < zero(T) info = 2 bound = false theta = 3 * (fx - f) / (alpha - stx) + dgx + dg @@ -572,7 +570,7 @@ elseif sgnd < zeroT p = gamma - dg + theta q = gamma + dgx - dg + gamma r = p / q - if r < zeroT && gamma != zeroT + if r < zero(T) && gamma != zero(T) alphac = alpha + r * (stx - alpha) elseif alpha > stx alphac = alphamax @@ -635,7 +633,7 @@ elseif sgnd < zeroT fy = f dgy = dg else - if sgnd < zeroT + if sgnd < zero(T) sty = stx fy = fx dgy = dgx diff --git a/src/strongwolfe.jl b/src/strongwolfe.jl index 335f248..ba65d26 100644 --- a/src/strongwolfe.jl +++ b/src/strongwolfe.jl @@ -15,14 +15,14 @@ use `MoreThuente`, `HagerZhang` or `BackTracking`. * `ρ = 2.0` : bracket growth """ @with_kw struct StrongWolfe{T} <: AbstractLineSearch - c_1::T = 1e-4 - c_2::T = 0.9 - ρ::T = 2.0 - cache::Union{Nothing,LineSearchCache{T}} = nothing + c_1::T = 1e-4 + c_2::T = 0.9 + ρ::T = 2.0 + cache::Union{Nothing, LineSearchCache{T}} = nothing end """ - (ls::StrongWolfe)(df, x::AbstractArray, p::AbstractArray, alpha0::Real, x_new, ϕ_0, dϕ_0) -> alpha, ϕalpha + (ls::StrongWolfe)(df, x::AbstractArray, p::AbstractArray, alpha0::Real, x_new, ϕ_0, dϕ_0) -> alpha, ϕalpha Given a differentiable function `df` (in the sense of `NLSolversBase.OnceDifferentiable` or `NLSolversBase.TwiceDifferentiable`), a multidimensional starting point `x` and step `p`, @@ -31,14 +31,14 @@ and a guess `alpha0` for the step length, find an `alpha` satisfying the strong See the one-dimensional method for additional details. """ function (ls::StrongWolfe)(df, x::AbstractArray{T}, - p::AbstractArray{T}, α::Real, x_new::AbstractArray{T}, - ϕ_0, dϕ_0) where T - ϕ, dϕ, ϕdϕ = make_ϕ_dϕ_ϕdϕ(df, x_new, x, p) - ls(ϕ, dϕ, ϕdϕ, α, ϕ_0, dϕ_0) + p::AbstractArray{T}, α::Real, x_new::AbstractArray{T}, + ϕ_0, dϕ_0) where T + ϕ, dϕ, ϕdϕ = make_ϕ_dϕ_ϕdϕ(df, x_new, x, p) + ls(ϕ, dϕ, ϕdϕ, α, ϕ_0, dϕ_0) end """ - (ls::StrongWolfe)(ϕ, dϕ, ϕdϕ, alpha0, ϕ_0, dϕ_0) -> alpha, ϕalpha + (ls::StrongWolfe)(ϕ, dϕ, ϕdϕ, alpha0, ϕ_0, dϕ_0) -> alpha, ϕalpha Given `ϕ(alpha::Real)`, its derivative `dϕ`, a combined-evaluation function `ϕdϕ(alpha) -> (ϕ(alpha), dϕ(alpha))`, and an initial guess `alpha0`, @@ -49,155 +49,145 @@ identify a value of `alpha > 0` satisfying the strong Wolfe conditions. Both `alpha` and `ϕ(alpha)` are returned. """ function (ls::StrongWolfe)(ϕ, dϕ, ϕdϕ, - alpha0::T, ϕ_0, dϕ_0) where T<:Real - @unpack c_1, c_2, ρ, cache = ls - emptycache!(cache) - - zeroT = convert(T, 0) - pushcache!(cache, zeroT, ϕ_0, dϕ_0) - - # Step-sizes - a_0 = zeroT - a_iminus1 = a_0 - a_i = alpha0 - a_max = convert(T, 65536) - - # ϕ(alpha) = df.f(x + alpha * p) - ϕ_a_iminus1 = ϕ_0 - ϕ_a_i = convert(T, NaN) - - # ϕ'(alpha) = dot(g(x + alpha * p), p) - dϕ_a_i = convert(T, NaN) - - # Iteration counter - i = 1 - - while a_i < a_max - ϕ_a_i = ϕ(a_i) - pushcache!(cache, a_i, ϕ_a_i) - - # Test Wolfe conditions - if (ϕ_a_i > ϕ_0 + c_1 * a_i * dϕ_0) || - (ϕ_a_i >= ϕ_a_iminus1 && i > 1) - a_star = zoom(a_iminus1, a_i, - dϕ_0, ϕ_0, - ϕ, dϕ, ϕdϕ, cache) - return a_star, ϕ(a_star) - end - - dϕ_a_i = dϕ(a_i) - if cache !== nothing - push!(cache.slopes, dϕ_a_i) - end - - # Check condition 2 - if abs(dϕ_a_i) <= -c_2 * dϕ_0 - return a_i, ϕ_a_i - end - - # Check condition 3 - if dϕ_a_i >= zeroT # FIXME untested! - a_star = zoom(a_i, a_iminus1, - dϕ_0, ϕ_0, ϕ, dϕ, ϕdϕ, cache) - return a_star, ϕ(a_star) - end - - # Choose a_iplus1 from the interval (a_i, a_max) - a_iminus1 = a_i - a_i *= ρ - - # Update ϕ_a_iminus1 - ϕ_a_iminus1 = ϕ_a_i - - # Update iteration count - i += 1 - end - - # Quasi-error response TODO make this error instead - return a_max, ϕ(a_max) + alpha0::T, ϕ_0, dϕ_0) where T <: Real + @unpack c_1, c_2, ρ, cache = ls + emptycache!(cache) + + pushcache!(cache, zero(T), ϕ_0, dϕ_0) + + # Step-sizes + a_0 = zero(T) + a_iminus1 = a_0 + a_i = alpha0 + a_max = convert(T, 1000.0) + + # ϕ(alpha) = df.f(x + alpha * p) + ϕ_a_iminus1 = ϕ_0 + ϕ_a_i = convert(T, NaN) + + # ϕ'(alpha) = dot(g(x + alpha * p), p) + dϕ_a_i = convert(T, NaN) + i = 1 + while a_i < a_max + ϕ_a_i = ϕ(a_i) + + # Test Wolfe conditions + if (ϕ_a_i > ϕ_0 + c_1 * a_i * dϕ_0) || + (ϕ_a_i >= ϕ_a_iminus1 && i > 1) + a_star = zoom(a_iminus1, a_i, + dϕ_0, ϕ_0, + ϕ, dϕ, ϕdϕ, cache) + return a_star, ϕ(a_star) + end + + dϕ_a_i = dϕ(a_i) + pushcache!(cache, a_i, ϕ_a_i, dϕ_a_i) + + # Check condition 2 + if abs(dϕ_a_i) <= -c_2 * dϕ_0 + return a_i, ϕ_a_i + end + + # Check condition 3 + if dϕ_a_i >= zero(T) # FIXME untested! + a_star = zoom(a_i, a_iminus1, + dϕ_0, ϕ_0, ϕ, dϕ, ϕdϕ, cache) + return a_star, ϕ(a_star) + end + + # Choose a_iplus1 from the interval (a_i, a_max) + a_iminus1 = a_i + a_i *= ρ + + # Update ϕ_a_iminus1 + ϕ_a_iminus1 = ϕ_a_i + i += 1 + end + + error("StrongWolfe did not converge") end -function zoom(a_lo::T, - a_hi::T, - dϕ_0::Real, - ϕ_0::Real, - ϕ, - dϕ, - ϕdϕ, - cache, - c_1::Real = convert(T, 1)/10^4, - c_2::Real = convert(T, 9)/10) where T - - zeroT = convert(T, 0) - # Step-size - a_j = convert(T, NaN) - - # Count iterations - iteration = 0 - max_iterations = 10 - - # Shrink bracket - while iteration < max_iterations - iteration += 1 - - ϕ_a_lo, ϕprime_a_lo = ϕdϕ(a_lo) - pushcache!(cache, a_lo, ϕ_a_lo, ϕprime_a_lo) - - ϕ_a_hi, ϕprime_a_hi = ϕdϕ(a_hi) - pushcache!(cache, a_hi, ϕ_a_hi, ϕprime_a_hi) - - # Interpolate a_j - if a_lo < a_hi - a_j = interpolate(a_lo, a_hi, - ϕ_a_lo, ϕ_a_hi, - ϕprime_a_lo, ϕprime_a_hi) - else - # TODO: Check if this is needed - a_j = interpolate(a_hi, a_lo, - ϕ_a_hi, ϕ_a_lo, - ϕprime_a_hi, ϕprime_a_lo) - end - - # Evaluate ϕ(a_j) - ϕ_a_j = ϕ(a_j) - pushcache!(cache, a_j, ϕ_a_j) - - # Check Armijo - if (ϕ_a_j > ϕ_0 + c_1 * a_j * dϕ_0) || - (ϕ_a_j > ϕ_a_lo) - a_hi = a_j - else - # Evaluate ϕprime(a_j) - ϕprime_a_j = dϕ(a_j) - if cache !== nothing - push!(cache.slopes, ϕprime_a_j) - end - - if abs(ϕprime_a_j) <= -c_2 * dϕ_0 - return a_j - end - - if ϕprime_a_j * (a_hi - a_lo) >= zeroT - a_hi = a_lo - end - - a_lo = a_j - end - end - - # Quasi-error response - return a_j +function zoom( + a_lo::T, + a_hi::T, + dϕ_0::Real, + ϕ_0::Real, + ϕ, + dϕ, + ϕdϕ, + cache, + c_1::Real = 1.0 / 10.0^4, + c_2::Real = 0.9) where T + + # Step-size + a_j = convert(T, NaN) + + # Count iterations + iteration = 0 + max_iterations = 10 + + # Shrink bracket + while iteration < max_iterations + iteration += 1 + + ϕ_a_lo, ϕprime_a_lo = ϕdϕ(a_lo) + pushcache!(cache, a_lo, ϕ_a_lo, ϕprime_a_lo) + + ϕ_a_hi, ϕprime_a_hi = ϕdϕ(a_hi) + + pushcache!(cache, a_hi, ϕ_a_hi, ϕprime_a_hi) + # Interpolate a_j + if a_lo < a_hi + a_j = interpolate(a_lo, a_hi, + ϕ_a_lo, ϕ_a_hi, + ϕprime_a_lo, ϕprime_a_hi) + else + # TODO: Check if this is needed + a_j = interpolate(a_hi, a_lo, + ϕ_a_hi, ϕ_a_lo, + ϕprime_a_hi, ϕprime_a_lo) + end + + # Evaluate ϕ(a_j) + ϕ_a_j = ϕ(a_j) + pushcache!(cache, a_j, ϕ_a_j) + + # Check Armijo + if (ϕ_a_j > ϕ_0 + c_1 * a_j * dϕ_0) || + (ϕ_a_j > ϕ_a_lo) + a_hi = a_j + else + # Evaluate ϕprime(a_j) + ϕprime_a_j = dϕ(a_j) + if cache !== nothing + push!(cache.slopes, ϕprime_a_j) + end + + if abs(ϕprime_a_j) <= -c_2 * dϕ_0 + return a_j + end + + if ϕprime_a_j * (a_hi - a_lo) >= zero(T) + a_hi = a_lo + end + + a_lo = a_j + end + end + + # Quasi-error response + return a_j end # a_lo = a_{i - 1} # a_hi = a_{i} -function interpolate(a_i1::Real, a_i::Real, - ϕ_a_i1::Real, ϕ_a_i::Real, - dϕ_a_i1::Real, dϕ_a_i::Real) - d1 = dϕ_a_i1 + dϕ_a_i - - 3 * (ϕ_a_i1 - ϕ_a_i) / (a_i1 - a_i) - d2 = sqrt(d1 * d1 - dϕ_a_i1 * dϕ_a_i) - return a_i - (a_i - a_i1) * - ((dϕ_a_i + d2 - d1) / - (dϕ_a_i - dϕ_a_i1 + 2 * d2)) +function interpolate(a_lo::Real, a_hi::Real, + ϕ_a_lo::Real, ϕ_a_hi::Real, + dϕ_a_lo::Real, dϕ_a_hi::Real) + + d1 = dϕ_a_lo + dϕ_a_hi - 3 * (ϕ_a_lo - ϕ_a_hi) / (a_lo - a_hi) + d2 = sqrt(d1 * d1 - dϕ_a_lo * dϕ_a_hi) + return a_hi - (a_hi - a_lo) * + ((dϕ_a_hi + d2 - d1) / + (dϕ_a_hi - dϕ_a_lo + 2 * d2)) end diff --git a/test/runtests.jl b/test/runtests.jl index 4910d53..5b47b5d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,7 +5,7 @@ using OptimTestProblems using DoubleFloats: Double64, Double32, Double16 import NLSolversBase -debug_printing = false +dddebug_printing = false lstypes = (Static(), HagerZhang(), StrongWolfe(), MoreThuente(), BackTracking(), BackTracking(order=2) )