|
1 | 1 | export LBFGS, update! |
2 | 2 |
|
3 | 3 | """ |
4 | | -`LBFGS(T::Type, dim::Tuple, Memory::Int)` |
| 4 | +`LBFGS(x::Tuple, M::Integer)` |
5 | 5 |
|
6 | | -`LBFGS{N}(T::NTuple{N,Type}, dim::NTuple{N,Tuple}, M::Int)` |
7 | | -
|
8 | | -`LBFGS(x::AbstractArray, Memory::Int)` |
| 6 | +`LBFGS(x::AbstractArray, M::Integer)` |
9 | 7 |
|
10 | 8 | Construct a Limited-Memory BFGS `LinearOperator` with memory `M`. The memory of `LBFGS` can be updated using the function `update!`, where the current iteration variable and gradient (`x`, `grad`) and the previous ones (`x_prev` and `grad_prev`) are needed: |
11 | 9 |
|
12 | 10 | ``` |
13 | 11 | julia> L = LBFGS(Float64,(4,),5) |
14 | 12 | LBFGS ℝ^4 -> ℝ^4 |
15 | 13 |
|
16 | | -julia> update!(L,x,x_prev,grad,grad_prev); #update memory |
| 14 | +julia> update!(L,x,x_prev,grad,grad_prev); # update memory |
17 | 15 |
|
18 | | -julia> d = L*x; #compute new direction |
| 16 | +julia> d = L*grad; # compute new direction |
19 | 17 |
|
20 | 18 | ``` |
21 | 19 | """ |
22 | 20 |
|
23 | 21 | mutable struct LBFGS{R, T <: BlockArray, M} <: LinearOperator |
24 | | - currmem::Int |
25 | | - curridx::Int |
| 22 | + currmem::Integer |
| 23 | + curridx::Integer |
26 | 24 | s::T |
27 | 25 | y::T |
28 | | - s_m::NTuple{M, T} |
29 | | - y_m::NTuple{M, T} |
30 | | - ys_m::Array{R, 1} |
| 26 | + s_M::Array{T, 1} |
| 27 | + y_M::Array{T, 1} |
| 28 | + ys_M::Array{R, 1} |
31 | 29 | alphas::Array{R, 1} |
32 | 30 | H::R |
33 | 31 | end |
34 | 32 |
|
35 | 33 | # Constructors |
36 | 34 |
|
37 | | -function LBFGS(T::Type, dim::NTuple{N,Int}, M::Int) where {N} |
38 | | - s_m = tuple([deepzeros(T,dim) for i = 1:M]...) |
39 | | - y_m = tuple([deepzeros(T,dim) for i = 1:M]...) |
40 | | - s = deepzeros(T,dim) |
41 | | - y = deepzeros(T,dim) |
42 | | - R = real(T) |
43 | | - ys_m = zeros(R, M) |
44 | | - alphas = zeros(R, M) |
45 | | - LBFGS{M,N,R,T,typeof(s)}(0, 0, s, y, s_m, y_m, ys_m, alphas, one(R)) |
46 | | -end |
47 | | - |
48 | | -function LBFGS(x::T, M::Int) |
49 | | - |
| 35 | +function LBFGS(x::T, M::Integer) where {R, T <: BlockArray{R}} |
| 36 | + s_M = [blockzeros(x) for i = 1:M] |
| 37 | + y_M = [blockzeros(x) for i = 1:M] |
| 38 | + s = blockzeros(x) |
| 39 | + y = blockzeros(x) |
| 40 | + ys_M = zeros(M) |
| 41 | + alphas = zeros(M) |
| 42 | + LBFGS{R, T, M}(0, 0, s, y, s_M, y_M, ys_M, alphas, one(R)) |
50 | 43 | end |
51 | 44 |
|
52 | 45 | """ |
53 | 46 | `update!(L::LBFGS, x, x_prex, grad, grad_prev)` |
54 | 47 |
|
55 | | -See `LBFGS` documentation. |
56 | | -
|
| 48 | +See the documentation for `LBFGS`. |
57 | 49 | """ |
58 | 50 |
|
59 | | -function update!(L::LBFGS{M,N,R,T,A}, |
60 | | - x::A, |
61 | | - x_prev::A, |
62 | | - gradx::A, |
63 | | - gradx_prev::A) where {M,N,R,T,A} |
64 | | - |
65 | | - ys = update_s_y(L,x,x_prev,gradx,gradx_prev) |
66 | | - |
| 51 | +function update!(L::LBFGS{R, T, M}, x::T, x_prev::T, gradx::T, gradx_prev::T) where {R, T, M} |
| 52 | + L.s .= x .- x_prev |
| 53 | + L.y .= gradx .- gradx_prev |
| 54 | + ys = real(blockvecdot(L.s, L.y)) |
67 | 55 | if ys > 0 |
68 | 56 | L.curridx += 1 |
69 | 57 | if L.curridx > M L.curridx = 1 end |
70 | 58 | L.currmem += 1 |
71 | 59 | if L.currmem > M L.currmem = M end |
72 | | - |
73 | | - |
74 | | - yty = update_s_m_y_m(L,L.curridx) |
75 | | - L.ys_m[L.curridx] = ys |
| 60 | + L.ys_M[L.curridx] = ys |
| 61 | + blockcopy!(L.s_M[L.curridx], L.s) |
| 62 | + blockcopy!(L.y_M[L.curridx], L.y) |
| 63 | + yty = real(vecdot(L.y, L.y)) |
76 | 64 | L.H = ys/yty |
77 | 65 | end |
78 | 66 | return L |
79 | 67 | end |
80 | 68 |
|
81 | | -function update_s_y(L::LBFGS{M,N,R,T,A}, x::A, x_prev::A, gradx::A, gradx_prev::A) where {M,N,R,T,A} |
82 | | - L.s .= (-).(x, x_prev) |
83 | | - L.y .= (-).(gradx, gradx_prev) |
84 | | - ys = real(vecdot(L.s,L.y)) |
85 | | - return ys |
86 | | -end |
| 69 | +# LBFGS operators are symmetric |
87 | 70 |
|
88 | | -function update_s_m_y_m(L::LBFGS{M,N,R,T,A}, curridx::Int) where {M,N,R,T,A} |
89 | | - L.s_m[curridx] .= L.s |
90 | | - L.y_m[curridx] .= L.y |
| 71 | +Ac_mul_B!(x::T, L::LBFGS{R, T, M}, y::T) where {R, T, M} = A_mul_B!(x, L, y) |
91 | 72 |
|
92 | | - yty = real(vecdot(L.y,L.y)) |
93 | | - return yty |
94 | | -end |
| 73 | +# Two-loop recursion |
95 | 74 |
|
96 | | -function A_mul_B!(d::A, L::LBFGS{M,N,R,T,A}, gradx::A) where {M,N,R,T,A} |
97 | | - d .= (-).(gradx) |
| 75 | +function A_mul_B!(d::T, L::LBFGS{R, T, M}, gradx::T) where {R, T, M} |
| 76 | + d .= gradx |
98 | 77 | idx = loop1!(d,L) |
99 | 78 | d .= (*).(L.H, d) |
100 | 79 | d = loop2!(d,idx,L) |
101 | 80 | end |
102 | 81 |
|
103 | | -function loop1!(d::A, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} |
| 82 | +function loop1!(d::T, L::LBFGS{R, T, M}) where {R, T, M} |
104 | 83 | idx = L.curridx |
105 | | - for i=1:L.currmem |
106 | | - L.alphas[idx] = real(vecdot(L.s_m[idx], d))/L.ys_m[idx] |
107 | | - d .-= L.alphas[idx].*L.y_m[idx] |
| 84 | + for i = 1:L.currmem |
| 85 | + L.alphas[idx] = real(vecdot(L.s_M[idx], d))/L.ys_M[idx] |
| 86 | + d .-= L.alphas[idx] .* L.y_M[idx] |
108 | 87 | idx -= 1 |
109 | 88 | if idx == 0 idx = M end |
110 | 89 | end |
111 | 90 | return idx |
112 | 91 | end |
113 | 92 |
|
114 | | -function loop2!(d::A, idx::Int, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} |
115 | | - for i=1:L.currmem |
| 93 | +function loop2!(d::T, idx::Int, L::LBFGS{R, T, M}) where {R, T, M} |
| 94 | + for i = 1:L.currmem |
116 | 95 | idx += 1 |
117 | 96 | if idx > M idx = 1 end |
118 | | - beta = real(vecdot(L.y_m[idx], d))/L.ys_m[idx] |
119 | | - d .+= (L.alphas[idx].-beta).*L.s_m[idx] |
| 97 | + beta = real(vecdot(L.y_M[idx], d))/L.ys_M[idx] |
| 98 | + d .+= (L.alphas[idx] - beta) .* L.s_M[idx] |
120 | 99 | end |
121 | 100 | return d |
122 | 101 | end |
|
125 | 104 | domainType(L::LBFGS{R, T, M}) where {R, T, M} = T |
126 | 105 | codomainType(L::LBFGS{R, T, M}) where {R, T, M} = T |
127 | 106 |
|
128 | | -size(A::LBFGS) = (size(A.s), size(A.s)) |
| 107 | +size(A::LBFGS) = (blocksize(A.s), blocksize(A.s)) |
129 | 108 |
|
130 | 109 | fun_name(A::LBFGS) = "LBFGS" |
0 commit comments