|
1 | 1 | export LBFGS, update! |
2 | 2 |
|
3 | | -# TODO make Ac_mul_B! |
4 | | -# Edit: Ac_mul_B! is not really needed for this operator |
5 | | -# Edit2: you never known! anyway for completeness would be cool to have it! |
6 | 3 | """ |
7 | | -`LBFGS(T::Type, dim::Tuple, Memory::Int)` |
| 4 | +`LBFGS(domainType::Type,dim_in::Tuple, M::Integer)` |
8 | 5 |
|
9 | | -`LBFGS{N}(T::NTuple{N,Type}, dim::NTuple{N,Tuple}, M::Int)` |
| 6 | +`LBFGS(dim_in::Tuple, M::Integer)` |
10 | 7 |
|
11 | | -`LBFGS(x::AbstractArray, Memory::Int)` |
| 8 | +`LBFGS(x::AbstractArray, M::Integer)` |
12 | 9 |
|
13 | 10 | Construct a Limited-Memory BFGS `LinearOperator` with memory `M`. The memory of `LBFGS` can be updated using the function `update!`, where the current iteration variable and gradient (`x`, `grad`) and the previous ones (`x_prev` and `grad_prev`) are needed: |
14 | 11 |
|
15 | 12 | ``` |
16 | 13 | julia> L = LBFGS(Float64,(4,),5) |
17 | 14 | LBFGS ℝ^4 -> ℝ^4 |
18 | 15 |
|
19 | | -julia> update!(L,x,x_prev,grad,grad_prev); #update memory |
| 16 | +julia> update!(L,x,x_prev,grad,grad_prev); # update memory |
20 | 17 |
|
21 | | -julia> d = L*x; #compute new direction |
| 18 | +julia> d = L*grad; # compute new direction |
22 | 19 |
|
23 | 20 | ``` |
24 | | -
|
25 | 21 | """ |
26 | 22 |
|
27 | | -mutable struct LBFGS{M, N, R <: Real, T <: Union{R, Complex{R}}, A<:AbstractArray{T,N}} <: LinearOperator |
28 | | - currmem::Int |
29 | | - curridx::Int |
30 | | - s::A |
31 | | - y::A |
32 | | - s_m::NTuple{M, A} |
33 | | - y_m::NTuple{M, A} |
34 | | - ys_m::Array{R, 1} |
| 23 | +mutable struct LBFGS{R, T <: BlockArray, M, I <: Integer} <: LinearOperator |
| 24 | + currmem::I |
| 25 | + curridx::I |
| 26 | + s::T |
| 27 | + y::T |
| 28 | + s_M::Array{T, 1} |
| 29 | + y_M::Array{T, 1} |
| 30 | + ys_M::Array{R, 1} |
35 | 31 | alphas::Array{R, 1} |
36 | 32 | H::R |
37 | 33 | end |
38 | 34 |
|
39 | | -# Constructors |
40 | | -#default |
41 | | -function LBFGS(T::Type, dim::NTuple{N,Int}, M::Int) where {N} |
42 | | - s_m = tuple([deepzeros(T,dim) for i = 1:M]...) |
43 | | - y_m = tuple([deepzeros(T,dim) for i = 1:M]...) |
44 | | - s = deepzeros(T,dim) |
45 | | - y = deepzeros(T,dim) |
46 | | - R = real(T) |
47 | | - ys_m = zeros(R, M) |
| 35 | +#default constructor |
| 36 | + |
| 37 | +function LBFGS(domainType, dim_in, M::I) where {I <: Integer} |
| 38 | + s_M = [blockzeros(domainType, dim_in) for i = 1:M] |
| 39 | + y_M = [blockzeros(domainType, dim_in) for i = 1:M] |
| 40 | + s = blockzeros(domainType, dim_in) |
| 41 | + y = blockzeros(domainType, dim_in) |
| 42 | + T = typeof(s) |
| 43 | + R = typeof(domainType) <: Tuple ? real(domainType[1]) : real(domainType) |
| 44 | + ys_M = zeros(R, M) |
48 | 45 | alphas = zeros(R, M) |
49 | | - LBFGS{M,N,R,T,typeof(s)}(0, 0, s, y, s_m, y_m, ys_m, alphas, one(R)) |
| 46 | + LBFGS{R, T, M, I}(0, 0, s, y, s_M, y_M, ys_M, alphas, one(R)) |
50 | 47 | end |
51 | 48 |
|
52 | | -LBFGS(x::AbstractArray,M::Int) = LBFGS(eltype(x),size(x),M) |
| 49 | +function LBFGS(dim_in, M::I) where {I <: Integer} |
| 50 | + domainType = eltype(dim_in) <: Integer ? Float64 : ([Float64 for i in eachindex(dim_in)]...) |
| 51 | + LBFGS(domainType, dim_in, M) |
| 52 | +end |
| 53 | + |
| 54 | +function LBFGS(x::T, M::I) where {T <: BlockArray, I <: Integer} |
| 55 | + domainType = blockeltype(x) |
| 56 | + dim_in = blocksize(x) |
| 57 | + LBFGS(domainType, dim_in, M) |
| 58 | +end |
53 | 59 |
|
54 | 60 | """ |
55 | 61 | `update!(L::LBFGS, x, x_prex, grad, grad_prev)` |
56 | 62 |
|
57 | | -See `LBFGS` documentation. |
58 | | -
|
| 63 | +See the documentation for `LBFGS`. |
59 | 64 | """ |
60 | 65 |
|
61 | | -function update!(L::LBFGS{M,N,R,T,A}, |
62 | | - x::A, |
63 | | - x_prev::A, |
64 | | - gradx::A, |
65 | | - gradx_prev::A) where {M,N,R,T,A} |
66 | | - |
67 | | - ys = update_s_y(L,x,x_prev,gradx,gradx_prev) |
68 | | - |
| 66 | +function update!(L::LBFGS{R, T, M, I}, x::T, x_prev::T, gradx::T, gradx_prev::T) where {R, T, M, I} |
| 67 | + L.s .= x .- x_prev |
| 68 | + L.y .= gradx .- gradx_prev |
| 69 | + ys = real(blockvecdot(L.s, L.y)) |
69 | 70 | if ys > 0 |
70 | 71 | L.curridx += 1 |
71 | 72 | if L.curridx > M L.curridx = 1 end |
72 | 73 | L.currmem += 1 |
73 | 74 | if L.currmem > M L.currmem = M end |
74 | | - |
75 | | - |
76 | | - yty = update_s_m_y_m(L,L.curridx) |
77 | | - L.ys_m[L.curridx] = ys |
| 75 | + L.ys_M[L.curridx] = ys |
| 76 | + blockcopy!(L.s_M[L.curridx], L.s) |
| 77 | + blockcopy!(L.y_M[L.curridx], L.y) |
| 78 | + yty = real(blockvecdot(L.y, L.y)) |
78 | 79 | L.H = ys/yty |
79 | 80 | end |
80 | 81 | return L |
81 | 82 | end |
82 | 83 |
|
83 | | -function update_s_y(L::LBFGS{M,N,R,T,A}, x::A, x_prev::A, gradx::A, gradx_prev::A) where {M,N,R,T,A} |
84 | | - L.s .= (-).(x, x_prev) |
85 | | - L.y .= (-).(gradx, gradx_prev) |
86 | | - ys = real(vecdot(L.s,L.y)) |
87 | | - return ys |
88 | | -end |
| 84 | +# LBFGS operators are symmetric |
89 | 85 |
|
90 | | -function update_s_m_y_m(L::LBFGS{M,N,R,T,A}, curridx::Int) where {M,N,R,T,A} |
91 | | - L.s_m[curridx] .= L.s |
92 | | - L.y_m[curridx] .= L.y |
| 86 | +Ac_mul_B!(x::T, L::LBFGS{R, T, M, I}, y::T) where {R, T, M, I} = A_mul_B!(x, L, y) |
93 | 87 |
|
94 | | - yty = real(vecdot(L.y,L.y)) |
95 | | - return yty |
96 | | -end |
| 88 | +# Two-loop recursion |
97 | 89 |
|
98 | | -function A_mul_B!(d::A, L::LBFGS{M,N,R,T,A}, gradx::A) where {M,N,R,T,A} |
99 | | - d .= (-).(gradx) |
| 90 | +function A_mul_B!(d::T, L::LBFGS{R, T, M, I}, gradx::T) where {R, T, M, I} |
| 91 | + d .= gradx |
100 | 92 | idx = loop1!(d,L) |
101 | 93 | d .= (*).(L.H, d) |
102 | 94 | d = loop2!(d,idx,L) |
103 | 95 | end |
104 | 96 |
|
105 | | -function loop1!(d::A, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} |
| 97 | +function loop1!(d::T, L::LBFGS{R, T, M, I}) where {R, T, M, I} |
106 | 98 | idx = L.curridx |
107 | | - for i=1:L.currmem |
108 | | - L.alphas[idx] = real(vecdot(L.s_m[idx], d))/L.ys_m[idx] |
109 | | - d .-= L.alphas[idx].*L.y_m[idx] |
| 99 | + for i = 1:L.currmem |
| 100 | + L.alphas[idx] = real(blockvecdot(L.s_M[idx], d))/L.ys_M[idx] |
| 101 | + d .-= L.alphas[idx] .* L.y_M[idx] |
110 | 102 | idx -= 1 |
111 | 103 | if idx == 0 idx = M end |
112 | 104 | end |
113 | 105 | return idx |
114 | 106 | end |
115 | 107 |
|
116 | | -function loop2!(d::A, idx::Int, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} |
117 | | - for i=1:L.currmem |
| 108 | +function loop2!(d::T, idx::Int, L::LBFGS{R, T, M, I}) where {R, T, M, I} |
| 109 | + for i = 1:L.currmem |
118 | 110 | idx += 1 |
119 | 111 | if idx > M idx = 1 end |
120 | | - beta = real(vecdot(L.y_m[idx], d))/L.ys_m[idx] |
121 | | - d .+= (L.alphas[idx].-beta).*L.s_m[idx] |
| 112 | + beta = real(blockvecdot(L.y_M[idx], d))/L.ys_M[idx] |
| 113 | + d .+= (L.alphas[idx] - beta) .* L.s_M[idx] |
122 | 114 | end |
123 | 115 | return d |
124 | 116 | end |
125 | 117 |
|
126 | 118 | # Properties |
127 | | - domainType(L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} = T |
128 | | -codomainType(L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} = T |
129 | 119 |
|
130 | | -size(A::LBFGS) = (size(A.s), size(A.s)) |
| 120 | +domainType(L::LBFGS{R, T, M, I}) where {R, T, M, I} = blockeltype(L.y_M[1]) |
| 121 | +codomainType(L::LBFGS{R, T, M, I}) where {R, T, M, I} = blockeltype(L.y_M[1]) |
| 122 | + |
| 123 | +size(A::LBFGS) = (blocksize(A.s), blocksize(A.s)) |
131 | 124 |
|
132 | 125 | fun_name(A::LBFGS) = "LBFGS" |
0 commit comments