Skip to content

Commit d8c0343

Browse files
Merge pull request #35 from SciML/lbfgs
lbfgs
2 parents b6113fa + e2a33fe commit d8c0343

File tree

8 files changed

+207
-26
lines changed

8 files changed

+207
-26
lines changed

Project.toml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@ authors = ["Utkarsh <[email protected]> and contributors"]
44
version = "1.0.0-DEV"
55

66
[deps]
7-
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
7+
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
8+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
9+
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
810
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
911
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
12+
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
13+
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
1014
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
1115
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
1216
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
1317
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
1418
SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
19+
SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
1520
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
21+
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
1622

1723
[compat]
1824
julia = "1.6"

src/PSOGPU.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
module PSOGPU
22

33
using SciMLBase, StaticArrays, Setfield, KernelAbstractions
4-
using QuasiMonteCarlo
4+
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
5+
import Adapt
56

6-
import DiffEqGPU: GPUTsit5, vectorized_asolve, make_prob_compatible
7+
# import DiffEqGPU: GPUTsit5, vectorized_asolve, make_prob_compatible
78

89
## Use lb and ub either as StaticArray or pass them separately as CuArrays
910
## Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
@@ -47,10 +48,12 @@ end
4748

4849
include("./algorithms.jl")
4950
include("./utils.jl")
50-
include("./ode_pso.jl")
51+
# include("./ode_pso.jl")
5152
include("./kernels.jl")
5253
include("./lowerlevel_solve.jl")
5354
include("./solve.jl")
55+
include("./lbfgs.jl")
56+
include("./hybrid.jl")
5457

5558
export ParallelPSOKernel,
5659
ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve

src/algorithms.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
abstract type PSOAlogrithm end
2+
abstract type PSOAlgorithm end
33

44
"""
55
```julia
@@ -23,7 +23,7 @@ This is the price to be paid to fuse all the updates into a single kernel. Techn
2323
updates can be used to fix this.
2424
2525
"""
26-
struct ParallelPSOKernel{Backend, T, G, H} <: PSOAlogrithm
26+
struct ParallelPSOKernel{Backend, T, G, H} <: PSOAlgorithm
2727
num_particles::Int
2828
global_update::Bool
2929
backend::Backend
@@ -46,7 +46,7 @@ on a GPU. However, it requires a synchronization after each generation to calcul
4646
- backend: defaults to `CPU()`. The KernelAbstractions backend for performing the computation.
4747
4848
"""
49-
struct ParallelSyncPSOKernel{Backend, T, G, H} <: PSOAlogrithm
49+
struct ParallelSyncPSOKernel{Backend, T, G, H} <: PSOAlgorithm
5050
num_particles::Int
5151
backend::Backend
5252
θ::T
@@ -73,7 +73,7 @@ This is the price to be paid to fuse all the updates into a single kernel. Techn
7373
updates can be used to fix this.
7474
7575
"""
76-
struct ParallelPSOArray{T, G, H} <: PSOAlogrithm
76+
struct ParallelPSOArray{T, G, H} <: PSOAlgorithm
7777
num_particles::Int
7878
θ::T
7979
γ::G
@@ -91,7 +91,7 @@ Serial Particle Swarm Optimization on a CPU.
9191
- num_particles: Number of particles in the simulation
9292
9393
"""
94-
struct SerialPSO{T, G, H} <: PSOAlogrithm
94+
struct SerialPSO{T, G, H} <: PSOAlgorithm
9595
num_particles::Int
9696
θ::T
9797
γ::G
@@ -116,5 +116,5 @@ function SerialPSO(num_particles::Int; θ = θ_default, γ = γ_default, h = sqr
116116
SerialPSO(num_particles, θ, γ, h)
117117
end
118118

119-
SciMLBase.allowsbounds(::PSOAlogrithm) = true
120-
SciMLBase.allowsconstraints(::PSOAlogrithm) = true
119+
SciMLBase.allowsbounds(::PSOAlgorithm) = true
120+
SciMLBase.allowsconstraints(::PSOAlgorithm) = true

src/hybrid.jl

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
struct HybridPSOLBFGS
2+
pso::PSOAlgorithm
3+
lbfgs::LBFGS
4+
end
5+
6+
function HybridPSOLBFGS(; pso = PSOGPU.ParallelPSOKernel(100 ; global_update = false), lbfgs = LBFGS())
7+
HybridPSOLBFGS(pso, lbfgs)
8+
end
9+
10+
@kernel function lbfgs_run!(nlcaches, x0s, result)
11+
i = @index(Global, Linear)
12+
# nlcache = reinit!(nlcaches[i], x0s[i])
13+
# @show nlcache.u
14+
res = solve!(nlcaches[i])
15+
# @show res
16+
# @show res.resid
17+
result[i] = res
18+
end
19+
20+
@kernel function simplelbfgs_run!(nlprob, x0s, result)
21+
i = @index(Global, Linear)
22+
nlcache = remake(nlprob; u0 = x0s[i])
23+
result[i] = solve(nlcache, SimpleLimitedMemoryBroyden(; threshold = 10))
24+
end
25+
26+
@kernel function simplebfgs_run!(nlprob, x0s, result, opt, maxiters)
27+
i = @index(Global, Linear)
28+
nlcache = remake(nlprob; u0 = x0s[i])
29+
sol = SciMLBase.__solve(nlcache, opt, maxiters = maxiters, abstol = 1f-6, reltol = 1f-6)
30+
result[i] = sol.u
31+
end
32+
33+
function SciMLBase.__solve(prob::SciMLBase.OptimizationProblem, opt::HybridPSOLBFGS, args...; maxiters = 1000, kwargs...)
34+
t0 = time()
35+
psoalg = opt.pso
36+
lbfgsalg = opt.lbfgs
37+
38+
sol_pso = solve(prob, psoalg, args...; maxiters, kwargs...)
39+
40+
# @show sol_pso.u
41+
42+
x0s = sol_pso.original
43+
# @show prob.u0
44+
# @show x0s
45+
prob = remake(prob, lb = nothing, ub = nothing)
46+
# @show length(x0s)
47+
# f = Optimization.instantiate_function(prob.f, prob.u0, prob.f.adtype, prob.p, 0)
48+
f = Base.Fix2(prob.f.f, prob.p)
49+
function _g(θ, _p = nothing)
50+
return ForwardDiff.gradient(f , θ)
51+
end
52+
# @show prob.u0
53+
# nlcaches = [init(NonlinearProblem(NonlinearFunction(_g), x0), LimitedMemoryBroyden(; threshold = lbfgsalg.m, linesearch = LiFukushimaLineSearch()))
54+
# for x0 in x0s
55+
# ]
56+
# @show nlcaches[1]
57+
# @show ismutable(nlcaches[1])
58+
backend = lbfgsalg.backend
59+
# kernel = lbfgs_run!(backend)
60+
# result = KernelAbstractions.allocate(lbfgsalg.backend, SciMLBase.NonlinearSolution, length(x0s))
61+
62+
# kernel(nlcaches, x0s, result; ndrange = length(x0s))
63+
64+
# kernel = simplelbfgs_run!(backend)
65+
# result = KernelAbstractions.allocate(backend, SciMLBase.NonlinearSolution, length(x0s))
66+
# nlprob = NonlinearProblem(NonlinearFunction(_g), prob.u0)
67+
# kernel(nlprob, x0s, result; ndrange = length(x0s))
68+
69+
kernel = simplebfgs_run!(backend)
70+
result = KernelAbstractions.allocate(backend, typeof(prob.u0), length(x0s))
71+
nlprob = NonlinearProblem{false}(_g, prob.u0)
72+
73+
kernel(nlprob, x0s, result, SimpleLimitedMemoryBroyden(; threshold = lbfgsalg.m, linesearch = Val(true)), maxiters; ndrange = length(x0s))
74+
75+
# @show result
76+
t1 = time()
77+
sol_bfgs = (x -> prob.f(x, prob.p)).(result)
78+
# sol_bfgs = [prob.f(θ, prob.p) for θ in result]
79+
# @show typeof(sol_bfgs)
80+
81+
sol_bfgs = (x -> isnan(x) ? Inf32 : x).(sol_bfgs)
82+
# @show minimum(sol_bfgs)
83+
84+
minobj, ind = findmin(sol_bfgs)
85+
86+
SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
87+
view(result, ind) , minobj)
88+
end

src/lbfgs.jl

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
struct LBFGS{Backend}
2+
ϵ::Float64
3+
m::Int
4+
backend::Backend
5+
end
6+
7+
function LBFGS(; ϵ = 1e-8, m = 10, backend = CPU())
8+
LBFGS(ϵ, m, backend)
9+
end
10+
11+
@kernel function solve_lbfgs(nlprob::NonlinearProblem, opt, result, maxiters = 1000)
12+
result .= SciMLBase.__solve(nlprob, opt; maxiters = maxiters).u
13+
end
14+
15+
function SciMLBase.__solve(prob::SciMLBase.OptimizationProblem, opt::LBFGS, args...; maxiters = 1000, kwargs...)
16+
f = Base.Fix2(prob.f.f, prob.p)
17+
18+
function _g(θ, _p = nothing)
19+
return ForwardDiff.gradient(f , θ)
20+
end
21+
22+
kernel = solve_lbfgs(opt.backend)
23+
# @show cache.u0
24+
t0 = time()
25+
result = KernelAbstractions.allocate(opt.backend, eltype(prob.u0), size(prob.u0))
26+
27+
nlprob = NonlinearProblem{false}(_g, prob.u0)
28+
nlsol = kernel(nlprob, SimpleLimitedMemoryBroyden(; threshold = opt.m, linesearch = Val(true)), result, maxiters; ndrange = (1,))
29+
t1 = time()
30+
θ = result
31+
# @show nlsol.stats
32+
# @show nlsol.resid
33+
34+
SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt, θ, prob.f(θ, prob.p))
35+
end

src/lowerlevel_solve.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ function vectorized_solve!(prob,
2020
w = w * wdamp
2121
end
2222

23-
return gbest
23+
return gbest, gpu_particles
2424
end
2525

2626
function vectorized_solve!(prob,
@@ -43,7 +43,7 @@ function vectorized_solve!(prob,
4343
w = w * wdamp
4444
end
4545

46-
return Array(gbest)[1]
46+
return Array(gbest)[1], gpu_particles
4747
end
4848

4949
function vectorized_solve!(prob,
@@ -66,7 +66,7 @@ function vectorized_solve!(prob,
6666
ndrange = length(gpu_particles))
6767

6868
best_particle = minimum(gpu_particles)
69-
return SPSOGBest(best_particle.best_position, best_particle.best_cost)
69+
return SPSOGBest(best_particle.best_position, best_particle.best_cost), gpu_particles
7070
end
7171

7272
function vectorized_solve!(prob, gbest,
@@ -120,7 +120,7 @@ function vectorized_solve!(prob, gbest,
120120
end
121121
w = w * wdamp
122122
end
123-
gbest
123+
gbest, particles
124124
end
125125

126126
function update_particle_states_cpu!(prob, particles, gbest_ref, w, iter, opt;
@@ -155,5 +155,5 @@ function vectorized_solve!(prob,
155155
update_particle_states_cpu!(prob, particles, sol_ref, w, i, opt)
156156
w = w * wdamp
157157
end
158-
return sol_ref[]
158+
return sol_ref[], particles
159159
end

src/solve.jl

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1-
function SciMLBase.__solve(prob::OptimizationProblem, opt::PSOAlogrithm, args...; kwargs...)
1+
function get_pos(particle)
2+
return particle.position
3+
end
4+
5+
function SciMLBase.__solve(prob::OptimizationProblem, opt::PSOAlgorithm, args...; kwargs...)
6+
lb, ub = check_init_bounds(prob)
27
lb, ub = check_init_bounds(prob)
38
prob = remake(prob; lb = lb, ub = ub)
49

5-
gbest = pso_solve(prob, opt, args...; kwargs...)
6-
10+
gbest, particles = pso_solve(prob, opt)
11+
particles_positions = get_pos.(particles)
712
SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
8-
gbest.position, prob.f(gbest.position, prob.p))
13+
gbest.position, prob.f(gbest.position, prob.p), original = particles_positions)
914
end
1015

1116
function pso_solve(prob::OptimizationProblem,
@@ -24,29 +29,29 @@ function pso_solve(prob::OptimizationProblem,
2429
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
2530
copyto!(gpu_init_gbest, [init_gbest])
2631

27-
gbest = vectorized_solve!(prob,
32+
gbest, particles = vectorized_solve!(prob,
2833
gpu_init_gbest,
2934
gpu_particles,
3035
opt,
3136
Val(opt.global_update),
3237
args...;
3338
kwargs...)
34-
gbest
39+
gbest, particles
3540
end
3641

3742
function pso_solve(prob::OptimizationProblem,
3843
opt::ParallelPSOArray,
3944
args...;
4045
kwargs...)
4146
init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
42-
gbest = vectorized_solve!(prob, init_gbest, particles, opt, args...; kwargs...)
43-
gbest
47+
gbest, particles = vectorized_solve!(prob, init_gbest, particles, opt, args...; kwargs...)
48+
gbest, particles
4449
end
4550

4651
function pso_solve(prob::OptimizationProblem, opt::SerialPSO, args...; kwargs...)
4752
init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
48-
gbest = vectorized_solve!(prob, init_gbest, particles, opt; kwargs...)
49-
gbest
53+
gbest, particles = vectorized_solve!(prob, init_gbest, particles, opt; kwargs...)
54+
gbest, particles
5055
end
5156

5257
function pso_solve(prob::OptimizationProblem,

test/lbfgs.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
using PSOGPU, Optimization, CUDA
2+
using Zygote, StaticArrays, KernelAbstractions
3+
using Optimization
4+
5+
function objf(x, p)
6+
return 1 - x[1]^2 - x[2]^2
7+
end
8+
9+
optprob = OptimizationFunction(objf, Optimization.AutoZygote())
10+
x0 = rand(2)
11+
x0 = SVector{2}(x0)
12+
prob = OptimizationProblem(optprob, x0)
13+
l1 = objf(x0, nothing)
14+
sol = Optimization.solve(prob,
15+
PSOGPU.LBFGS(;backend = CUDABackend()),
16+
maxiters = 10)
17+
18+
N = 10
19+
function rosenbrock(x, p)
20+
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
21+
end
22+
x0 = @SArray rand(Float32, N)
23+
p = @SArray Float32[1.0, 100.0]
24+
optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
25+
prob = OptimizationProblem(optf, x0, p)
26+
l0 = rosenbrock(x0, p)
27+
28+
@time sol = Optimization.solve(prob,
29+
PSOGPU.LBFGS(; m = 7, backend = CUDABackend()),
30+
maxiters = 20,
31+
)
32+
@show sol.objective
33+
@time sol = Optimization.solve(prob,
34+
PSOGPU.ParallelPSOKernel(100, backend = CUDABackend()),
35+
maxiters = 100,
36+
)
37+
@show sol.objective
38+
39+
@time sol = Optimization.solve(prob,
40+
PSOGPU.HybridPSOLBFGS(pso = PSOGPU.ParallelPSOKernel(100; backend = CUDABackend()), lbfgs = PSOGPU.LBFGS(; m = 7, backend = CUDABackend())),
41+
EnsembleThreads(),
42+
maxiters = 30,
43+
)
44+
@show sol.objective

0 commit comments

Comments
 (0)