Skip to content

Commit 9b6f12c

Browse files
committed
Complete refactor of CPU and GPU PSO codes
1 parent 13172dd commit 9b6f12c

File tree

12 files changed

+175
-345
lines changed

12 files changed

+175
-345
lines changed

src/PSOGPU.jl

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,44 +6,51 @@ import DiffEqGPU: GPUTsit5, vectorized_asolve, make_prob_compatible
66

77
## Use lb and ub either as StaticArray or pass them separately as CuArrays
88
## Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
9-
struct PSOParticle{T1, T2 <: eltype(T1)}
9+
struct SPSOParticle{T1, T2 <: eltype(T1)}
1010
position::T1
1111
velocity::T1
1212
cost::T2
1313
best_position::T1
1414
best_cost::T2
1515
end
16-
struct PSOGBest{T1, T2 <: eltype(T1)}
16+
struct SPSOGBest{T1, T2 <: eltype(T1)}
1717
position::T1
1818
cost::T2
1919
end
2020

21+
mutable struct MPSOParticle{T}
22+
position::AbstractArray{T}
23+
velocity::AbstractArray{T}
24+
cost::T
25+
best_position::AbstractArray{T}
26+
best_cost::T
27+
end
28+
mutable struct MPSOGBest{T}
29+
position::AbstractArray{T}
30+
cost::T
31+
end
32+
2133
## required overloads for min or max computation on particles
22-
function Base.isless(a::PSOGPU.PSOParticle{T1, T2},
23-
b::PSOGPU.PSOParticle{T1, T2}) where {T1, T2}
34+
function Base.isless(a::PSOGPU.SPSOParticle{T1, T2},
35+
b::PSOGPU.SPSOParticle{T1, T2}) where {T1, T2}
2436
a.best_cost < b.best_cost
2537
end
2638

27-
function Base.typemax(::Type{PSOGPU.PSOParticle{T1, T2}}) where {T1, T2}
28-
PSOGPU.PSOParticle{T1, T2}(similar(T1),
39+
function Base.typemax(::Type{PSOGPU.SPSOParticle{T1, T2}}) where {T1, T2}
40+
PSOGPU.SPSOParticle{T1, T2}(similar(T1),
2941
similar(T1),
3042
typemax(T2),
3143
similar(T1),
3244
typemax(T2))
3345
end
3446

3547
include("./algorithms.jl")
36-
include("./pso_cpu.jl")
37-
include("./pso_gpu.jl")
38-
include("./pso_async_gpu.jl")
3948
include("./utils.jl")
40-
include("./pso_sync_gpu.jl")
4149
include("./ode_pso.jl")
4250
include("./kernels.jl")
4351
include("./lowerlevel_solve.jl")
4452
include("./solve.jl")
4553

4654
export ParallelPSOKernel,
4755
ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve
48-
4956
end

src/algorithms.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ struct SerialPSO <: PSOAlogrithm
8787
end
8888

8989
function ParallelPSOKernel(num_particles::Int;
90-
async = false, backend = CPU())
91-
ParallelPSOKernel(num_particles, async, backend)
90+
global_update = false, backend = CPU())
91+
ParallelPSOKernel(num_particles, global_update, backend)
9292
end
9393

9494
function ParallelSyncPSOKernel(num_particles::Int;

src/kernels.jl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,30 @@ end
5656

5757
@inbounds gpu_particles[i] = particle
5858
end
59+
60+
@kernel function update_particle_states_async!(prob,
61+
gpu_particles,
62+
gbest_ref,
63+
w, wdamp, maxiters;
64+
c1 = 1.4962f0,
65+
c2 = 1.4962f0)
66+
i = @index(Global, Linear)
67+
68+
gbest = gbest_ref[1]
69+
70+
## Access the particle
71+
@inbounds particle = gpu_particles[i]
72+
73+
## Run all generations
74+
for i in 1:maxiters
75+
particle = update_particle_state(particle, prob, gbest, w, c1, c2)
76+
if particle.best_cost < gbest.cost
77+
@set! gbest.position = particle.best_position
78+
@set! gbest.cost = particle.best_cost
79+
end
80+
w = w * wdamp
81+
end
82+
83+
@inbounds gpu_particles[i] = particle
84+
@inbounds gbest_ref[1] = gbest
85+
end

src/lowerlevel_solve.jl

Lines changed: 96 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ function vectorized_solve!(prob,
1616
w, opt;
1717
ndrange = length(gpu_particles))
1818
best_particle = minimum(gpu_particles)
19-
gbest = PSOGBest(best_particle.position, best_particle.best_cost)
19+
gbest = SPSOGBest(best_particle.position, best_particle.best_cost)
2020
w = w * wdamp
2121
end
2222

@@ -25,7 +25,7 @@ end
2525

2626
function vectorized_solve!(prob,
2727
gbest,
28-
gpu_particles, opt::ParallelPSOKernel;
28+
gpu_particles, opt::ParallelPSOKernel, ::Val{true};
2929
maxiters = 100,
3030
w = 0.7298f0,
3131
wdamp = 1.0f0,
@@ -45,3 +45,97 @@ function vectorized_solve!(prob,
4545

4646
return Array(gbest)[1]
4747
end
48+
49+
function vectorized_solve!(prob,
50+
gbest,
51+
gpu_particles, opt::ParallelPSOKernel, ::Val{false};
52+
maxiters = 100,
53+
w = 0.7298f0,
54+
wdamp = 1.0f0,
55+
debug = false)
56+
backend = get_backend(gpu_particles)
57+
58+
kernel = update_particle_states_async!(backend)
59+
kernel(prob, gpu_particles, gbest, w, wdamp, maxiters; ndrange = length(gpu_particles))
60+
61+
best_particle = minimum(gpu_particles)
62+
return SPSOGBest(best_particle.best_position, best_particle.best_cost)
63+
end
64+
65+
function vectorized_solve!(prob, gbest,
66+
particles, opt::ParallelPSOArray;
67+
maxiters = 100,
68+
w = 0.7298f0,
69+
wdamp = 1.0f0,
70+
c1 = 1.4962f0,
71+
c2 = 1.4962f0,
72+
verbose = false)
73+
cost_func = prob.f
74+
num_particles = length(particles)
75+
rand_eltype = eltype(particles[1].velocity)
76+
# main loop
77+
78+
for iter in 1:maxiters
79+
Threads.@threads for i in 1:num_particles
80+
particles[i].velocity .= w .* particles[i].velocity .+
81+
c1 .* rand.(rand_eltype) .*
82+
(particles[i].best_position .-
83+
particles[i].position) .+
84+
c2 .* rand.(rand_eltype) .*
85+
(gbest.position .- particles[i].position)
86+
87+
particles[i].position .= particles[i].position .+ particles[i].velocity
88+
particles[i].position .= max.(particles[i].position, prob.lb)
89+
particles[i].position .= min.(particles[i].position, prob.ub)
90+
91+
particles[i].cost = cost_func(particles[i].position, prob.p)
92+
93+
if particles[i].cost < particles[i].best_cost
94+
copy!(particles[i].best_position, particles[i].position)
95+
particles[i].best_cost = particles[i].cost
96+
97+
## Possible race condition here
98+
if particles[i].best_cost < gbest.cost
99+
copy!(gbest.position, particles[i].best_position)
100+
gbest.cost = particles[i].best_cost
101+
end
102+
end
103+
end
104+
w = w * wdamp
105+
end
106+
gbest
107+
end
108+
109+
function update_particle_states_cpu!(prob, particles, gbest_ref, w; c1 = 1.4962f0,
110+
c2 = 1.4962f0)
111+
gbest = gbest_ref[]
112+
113+
for i in eachindex(particles)
114+
@inbounds particle = particles[i]
115+
particle = update_particle_state(particle, prob, gbest, w, c1, c2)
116+
117+
if particle.best_cost < gbest.cost
118+
@set! gbest.position = particle.best_position
119+
@set! gbest.cost = particle.best_cost
120+
end
121+
122+
particles[i] = particle
123+
end
124+
gbest_ref[] = gbest
125+
return nothing
126+
end
127+
128+
function vectorized_solve!(prob,
129+
gbest,
130+
particles, opt::SerialPSO;
131+
maxiters = 100,
132+
w = 0.7298f0,
133+
wdamp = 1.0f0,
134+
debug = false)
135+
sol_ref = Ref(gbest)
136+
for i in 1:maxiters
137+
update_particle_states_cpu!(prob, particles, sol_ref, w)
138+
w = w * wdamp
139+
end
140+
return sol_ref[]
141+
end

src/ode_pso.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,13 @@ function parameter_estim_ode!(prob::ODEProblem,
8383
update_costs!(losses, gpu_particles; ndrange = length(losses))
8484

8585
best_particle = minimum(gpu_particles,
86-
init = PSOGPU.PSOParticle(gbest.position,
86+
init = PSOGPU.SPSOParticle(gbest.position,
8787
gbest.position,
8888
gbest.cost,
8989
gbest.position,
9090
gbest.cost))
9191

92-
gbest = PSOGPU.PSOGBest(best_particle.best_position, best_particle.best_cost)
92+
gbest = PSOGPU.SPSOGBest(best_particle.best_position, best_particle.best_cost)
9393
w = w * wdamp
9494
end
9595
return gbest

src/pso_async_gpu.jl

Lines changed: 0 additions & 69 deletions
This file was deleted.

0 commit comments

Comments
 (0)