Skip to content

Commit 3a3fc70

Browse files
authored
Merge pull request #45 from SciML/u/cache
Start towards caching and perf optimizations
2 parents 1641249 + be7d98f commit 3a3fc70

File tree

11 files changed

+531
-109
lines changed

11 files changed

+531
-109
lines changed

Project.toml

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,12 @@ NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
1515
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
1616
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
1717
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
18+
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
1819
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
1920
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
2021
SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
2122
SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
2223
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
2324

2425
[compat]
25-
julia = "1.6"
26-
27-
[extras]
28-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
29-
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
30-
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
31-
32-
[targets]
33-
test = ["Test", "StaticArrays", "LinearAlgebra", "Optimization", "ForwardDiff"]
26+
julia = "1.6"

src/PSOGPU.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,15 @@ module PSOGPU
33
using SciMLBase, StaticArrays, Setfield, KernelAbstractions
44
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
55
import Adapt
6+
import Adapt: adapt
67
import Enzyme: autodiff_deferred, Active, Reverse
78
import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
89
using QuasiMonteCarlo
910
import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve
1011

12+
using Reexport
13+
@reexport using SciMLBase
14+
1115
## Use lb and ub either as StaticArray or pass them separately as CuArrays
1216
## Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
1317
struct SPSOParticle{T1, T2 <: eltype(T1)}
@@ -63,10 +67,11 @@ include("./utils.jl")
6367
include("./ode_pso.jl")
6468
include("./kernels.jl")
6569
include("./lowerlevel_solve.jl")
70+
include("init.jl")
6671
include("./solve.jl")
6772
include("./bfgs.jl")
6873
include("./hybrid.jl")
6974

7075
export ParallelPSOKernel,
71-
ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve
76+
ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO
7277
end

src/algorithms.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11

22
abstract type PSOAlgorithm end
33
abstract type HybridPSOAlgorithm{LocalOpt} end
4+
abstract type GPUSamplingAlgorithm end
5+
6+
struct GPUUniformSampler <: GPUSamplingAlgorithm
7+
end
8+
9+
struct GPUUnboundedSampler <: GPUSamplingAlgorithm
10+
end
11+
412
"""
513
```julia
614
ParallelPSOKernel(num_particles = 100)

src/hybrid.jl

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,26 @@
55
result[i] = sol.u
66
end
77

8-
function SciMLBase.__solve(prob::SciMLBase.OptimizationProblem,
9-
opt::HybridPSO{Backend, LocalOpt},
10-
args...;
8+
function SciMLBase.solve!(
9+
cache::HybridPSOCache, opt::HybridPSO{Backend, LocalOpt}, args...;
1110
abstol = nothing,
1211
reltol = nothing,
13-
maxiters = 100,
14-
local_maxiters = 10,
15-
kwargs...) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
16-
t0 = time()
17-
psoalg = opt.pso
18-
local_opt = opt.local_opt
19-
backend = opt.backend
12+
maxiters = 100, local_maxiters = 10, kwargs...) where {
13+
Backend, LocalOpt <: Union{LBFGS, BFGS}}
14+
pso_cache = cache.pso_cache
2015

21-
sol_pso = solve(prob, psoalg, args...; maxiters, kwargs...)
16+
sol_pso = solve!(pso_cache)
2217
x0s = sol_pso.original
23-
prob = remake(prob, lb = nothing, ub = nothing)
18+
19+
backend = opt.backend
20+
21+
prob = remake(cache.prob, lb = nothing, ub = nothing)
2422
f = Base.Fix2(prob.f.f, prob.p)
2523
∇f = instantiate_gradient(f, prob.f.adtype)
2624

2725
kernel = simplebfgs_run!(backend)
28-
result = KernelAbstractions.allocate(backend, typeof(prob.u0), length(x0s))
26+
result = cache.start_points
27+
copyto!(result, x0s)
2928
nlprob = NonlinearProblem{false}(∇f, prob.u0)
3029

3130
nlalg = LocalOpt isa LBFGS ?

src/init.jl

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
mutable struct PSOCache{TP, TAlg, TPart, TGbest, TSampler}
2+
prob::TP
3+
alg::TAlg
4+
particles::TPart
5+
gbest::TGbest
6+
sampler::TSampler
7+
end
8+
9+
struct HybridPSOCache{TPc, TSp, TAlg}
10+
pso_cache::TPc
11+
start_points::TSp
12+
alg::TAlg
13+
end
14+
15+
function __init!(particles, prob::OptimizationProblem,
16+
opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel}, sampler::T,
17+
args...; kwargs...) where {T <: QuasiMonteCarlo.SamplingAlgorithm}
18+
backend = opt.backend
19+
20+
qmc_samples = QuasiMonteCarlo.sample(opt.num_particles, prob.lb, prob.ub, sampler)
21+
22+
qmc_samples = adapt(backend, qmc_samples)
23+
24+
kernel! = gpu_init_particles!(backend)
25+
26+
kernel!(
27+
particles, qmc_samples, prob, opt, typeof(prob.u0), T; ndrange = opt.num_particles)
28+
29+
best_particle = minimum(particles)
30+
init_gbest = SPSOGBest(best_particle.best_position, best_particle.best_cost)
31+
32+
return particles, init_gbest
33+
end
34+
35+
function __init!(particles, prob::OptimizationProblem,
36+
opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel}, sampler::T,
37+
args...; kwargs...) where {T <: GPUSamplingAlgorithm}
38+
backend = opt.backend
39+
40+
kernel! = gpu_init_particles!(backend)
41+
42+
kernel!(particles, prob, opt, typeof(prob.u0), T; ndrange = opt.num_particles)
43+
44+
best_particle = minimum(particles)
45+
46+
init_gbest = SPSOGBest(best_particle.best_position, best_particle.best_cost)
47+
48+
particles, init_gbest
49+
end
50+
51+
function SciMLBase.init(
52+
prob::OptimizationProblem, opt::ParallelPSOKernel, args...; sampler = GPUUniformSampler(), kwargs...)
53+
@assert prob.u0 isa SArray
54+
55+
## Bounds check
56+
lb, ub = check_init_bounds(prob)
57+
lb, ub = check_init_bounds(prob)
58+
prob = remake(prob; lb = lb, ub = ub)
59+
60+
particles = KernelAbstractions.allocate(
61+
opt.backend, SPSOParticle{typeof(prob.u0), eltype(typeof(prob.u0))}, opt.num_particles)
62+
63+
_sampler = if lb === nothing || ub === nothing || (all(isinf, lb) && all(isinf, ub))
64+
GPUUnboundedSampler()
65+
else
66+
sampler
67+
end
68+
69+
particles, _init_gbest = __init!(particles, prob, opt, _sampler, args...; kwargs...)
70+
71+
init_gbest = KernelAbstractions.allocate(opt.backend, typeof(_init_gbest), (1,))
72+
copyto!(init_gbest, [_init_gbest])
73+
74+
return PSOCache{
75+
typeof(prob), typeof(opt), typeof(particles), typeof(init_gbest), typeof(_sampler)}(
76+
prob, opt, particles, init_gbest, _sampler)
77+
end
78+
79+
function SciMLBase.init(
80+
prob::OptimizationProblem, opt::ParallelSyncPSOKernel, args...; sampler = GPUUniformSampler(), kwargs...)
81+
@assert prob.u0 isa SArray
82+
83+
## Bounds check
84+
lb, ub = check_init_bounds(prob)
85+
lb, ub = check_init_bounds(prob)
86+
prob = remake(prob; lb = lb, ub = ub)
87+
88+
particles = KernelAbstractions.allocate(
89+
opt.backend, SPSOParticle{typeof(prob.u0), eltype(typeof(prob.u0))}, opt.num_particles)
90+
91+
_sampler = if lb === nothing || ub === nothing || (all(isinf, lb) && all(isinf, ub))
92+
GPUUnboundedSampler()
93+
else
94+
sampler
95+
end
96+
97+
particles, init_gbest = __init!(particles, prob, opt, _sampler, args...; kwargs...)
98+
99+
return PSOCache{
100+
typeof(prob), typeof(opt), typeof(particles), typeof(init_gbest), typeof(_sampler)}(
101+
prob, opt, particles, init_gbest, _sampler)
102+
end
103+
104+
function SciMLBase.reinit!(cache::Union{PSOCache, HybridPSOCache})
105+
reinit_cache!(cache, cache.alg)
106+
end
107+
108+
function reinit_cache!(cache::PSOCache, opt::ParallelPSOKernel)
109+
prob = cache.prob
110+
particles = cache.particles
111+
112+
particles, _init_gbest = __init!(particles, prob, opt, cache.sampler)
113+
114+
copyto!(cache.gbest, [_init_gbest])
115+
116+
return nothing
117+
end
118+
119+
function reinit_cache!(cache::PSOCache, opt::ParallelSyncPSOKernel)
120+
prob = cache.prob
121+
particles = cache.particles
122+
123+
particles, init_gbest = __init!(particles, prob, opt, cache.sampler)
124+
125+
cache.gbest = init_gbest
126+
127+
return nothing
128+
end
129+
130+
function SciMLBase.init(
131+
prob::OptimizationProblem, opt::HybridPSO{Backend, LocalOpt}, args...;
132+
kwargs...) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
133+
psoalg = opt.pso
134+
backend = opt.backend
135+
136+
pso_cache = init(prob, psoalg, args...; kwargs...)
137+
138+
start_points = KernelAbstractions.allocate(
139+
backend, typeof(prob.u0), opt.pso.num_particles)
140+
141+
return HybridPSOCache{
142+
typeof(pso_cache), typeof(start_points), typeof(opt)}(pso_cache, start_points, opt)
143+
end
144+
145+
function reinit_cache!(cache::HybridPSOCache,
146+
opt::HybridPSO{Backend, LocalOpt}) where {Backend, LocalOpt <: Union{LBFGS, BFGS}}
147+
reinit!(cache.pso_cache)
148+
fill!(cache.start_points, zero(eltype(cache.start_points)))
149+
return nothing
150+
end
151+
152+
function Base.getproperty(cache::HybridPSOCache, name::Symbol)
153+
if name (:start_points, :pso_cache, :alg)
154+
return getfield(cache, name)
155+
else
156+
return getproperty(cache.pso_cache, name)
157+
end
158+
end
159+
160+
function Base.setproperty!(cache::HybridPSOCache, name::Symbol, val)
161+
if name (:start_points, :pso_cache, :alg)
162+
return setfield!(cache, name, val)
163+
else
164+
return setproperty!(cache.pso_cache, name, val)
165+
end
166+
end

src/solve.jl

Lines changed: 49 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,55 @@ function get_pos(particle)
22
return particle.position
33
end
44

5+
function SciMLBase.solve!(
6+
cache::Union{PSOCache, HybridPSOCache}, args...; maxiters = 100, kwargs...)
7+
solve!(cache, cache.alg, args...; maxiters, kwargs...)
8+
end
9+
10+
function SciMLBase.solve!(
11+
cache::PSOCache, opt::ParallelPSOKernel, args...; maxiters = 100, kwargs...)
12+
prob = cache.prob
13+
t0 = time()
14+
gbest, particles = vectorized_solve!(cache.prob,
15+
cache.gbest,
16+
cache.particles,
17+
opt,
18+
Val(opt.global_update),
19+
args...;
20+
maxiters, kwargs...)
21+
t1 = time()
22+
23+
particles_positions = get_pos.(particles)
24+
SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
25+
gbest.position, prob.f(gbest.position, prob.p), original = particles_positions,
26+
stats = Optimization.OptimizationStats(; time = t1 - t0))
27+
end
28+
29+
function SciMLBase.solve!(
30+
cache::PSOCache, opt::ParallelSyncPSOKernel, args...; maxiters = 100, kwargs...)
31+
prob = cache.prob
32+
t0 = time()
33+
gbest, particles = vectorized_solve!(prob,
34+
cache.gbest,
35+
cache.particles,
36+
opt,
37+
args...;
38+
maxiters,
39+
kwargs...)
40+
t1 = time()
41+
42+
particles_positions = get_pos.(particles)
43+
SciMLBase.build_solution(SciMLBase.DefaultOptimizationCache(prob.f, prob.p), opt,
44+
gbest.position, prob.f(gbest.position, prob.p), original = particles_positions,
45+
stats = Optimization.OptimizationStats(; time = t1 - t0))
46+
end
47+
48+
function SciMLBase.solve(prob::OptimizationProblem,
49+
opt::Union{ParallelPSOKernel, ParallelSyncPSOKernel, HybridPSO},
50+
args...; maxiters = 100, kwargs...)
51+
solve!(init(prob, opt, args...; kwargs...), opt, args...; maxiters, kwargs...)
52+
end
53+
554
function SciMLBase.__solve(prob::OptimizationProblem,
655
opt::PSOAlgorithm,
756
args...;
@@ -18,34 +67,6 @@ function SciMLBase.__solve(prob::OptimizationProblem,
1867
stats = Optimization.OptimizationStats(; time = solve_time))
1968
end
2069

21-
function pso_solve(prob::OptimizationProblem,
22-
opt::ParallelPSOKernel,
23-
args...;
24-
kwargs...)
25-
backend = opt.backend
26-
@assert prob.u0 isa SArray
27-
init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
28-
# TODO: Do the equivalent of cu()/roc()
29-
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
30-
gpu_particles = KernelAbstractions.allocate(backend,
31-
particles_eltype,
32-
size(particles))
33-
copyto!(gpu_particles, particles)
34-
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
35-
copyto!(gpu_init_gbest, [init_gbest])
36-
37-
t0 = time()
38-
gbest, particles = vectorized_solve!(prob,
39-
gpu_init_gbest,
40-
gpu_particles,
41-
opt,
42-
Val(opt.global_update),
43-
args...;
44-
kwargs...)
45-
t1 = time()
46-
gbest, particles, t1 - t0
47-
end
48-
4970
function pso_solve(prob::OptimizationProblem,
5071
opt::ParallelPSOArray,
5172
args...;
@@ -69,25 +90,3 @@ function pso_solve(prob::OptimizationProblem, opt::SerialPSO, args...; kwargs...
6990
t1 = time()
7091
gbest, particles, t1 - t0
7192
end
72-
73-
function pso_solve(prob::OptimizationProblem,
74-
opt::ParallelSyncPSOKernel,
75-
args...;
76-
kwargs...)
77-
backend = opt.backend
78-
init_gbest, particles = init_particles(prob, opt, typeof(prob.u0))
79-
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
80-
gpu_particles = KernelAbstractions.allocate(backend, particles_eltype, size(particles))
81-
copyto!(gpu_particles, particles)
82-
init_gbest = init_gbest
83-
84-
t0 = time()
85-
gbest, particles = vectorized_solve!(prob,
86-
init_gbest,
87-
gpu_particles,
88-
opt,
89-
args...;
90-
kwargs...)
91-
t1 = time()
92-
gbest, particles, t1 - t0
93-
end

0 commit comments

Comments
 (0)