Skip to content

Commit 1469841

Browse files
committed
update algorithms to use backend
1 parent 0b4fc8f commit 1469841

File tree

8 files changed

+60
-51
lines changed

8 files changed

+60
-51
lines changed

examples/neural_network/nn.jl

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ using SimpleChains
22
using IterTools
33
using MLDatasets
44
using Random
5-
dataset = MLDatasets.Iris().dataframe
5+
dataset = MLDatasets.Iris().dataframe
66

77
data = Array(dataset)
88
data = data[shuffle(1:end), :]
@@ -17,11 +17,9 @@ function mapstrtoclass(flower)
1717
end
1818
end
1919
ytrain = map(mapstrtoclass, data[:, 5])
20-
lenet = SimpleChain(
21-
static(4),
20+
lenet = SimpleChain(static(4),
2221
TurboDense{true}(tanh, 20),
23-
TurboDense{true}(identity, 3),
24-
)
22+
TurboDense{true}(identity, 3))
2523
lenetloss = SimpleChains.add_loss(lenet, LogitCrossEntropyLoss(ytrain))
2624

2725
p = SimpleChains.init_params(lenet);
@@ -30,18 +28,16 @@ G = SimpleChains.alloc_threaded_grad(lenet);
3028

3129
lenetloss(xtrain, p)
3230

33-
report = let mlpdloss = lenetloss, X=xtrain
34-
p -> begin
35-
let train = mlpdloss(X, p)
36-
@info "Loss:" train
37-
end
31+
report = let mlpdloss = lenetloss, X = xtrain
32+
p -> begin
33+
let train = mlpdloss(X, p)
34+
@info "Loss:" train
35+
end
3836
end
3937
end
4038

4139
for _ in 1:3
42-
@time SimpleChains.train_unbatched!(
43-
G, p, lenetloss, xtrain, SimpleChains.ADAM(), 5000
44-
);
40+
@time SimpleChains.train_unbatched!(G, p, lenetloss, xtrain, SimpleChains.ADAM(), 5000)
4541
report(p)
4642
end
4743

@@ -53,10 +49,10 @@ using Optimization, PSOGPU
5349

5450
lb = -ones(length(p)) .* 10
5551
ub = ones(length(p)) .* 10
56-
prob = OptimizationProblem((u,data) -> lenetloss(data, u), p, xtrain; lb = lb, ub = ub)
52+
prob = OptimizationProblem((u, data) -> lenetloss(data, u), p, xtrain; lb = lb, ub = ub)
5753

5854
n_particles = 1000
5955

6056
sol = solve(prob,
61-
ParallelPSOKernel(n_particles; gpu = false, threaded = true),
62-
maxiters = 1000)
57+
ParallelPSOKernel(n_particles; threaded = true),
58+
maxiters = 1000)

src/PSOGPU.jl

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@ struct PSOGBest{T1, T2 <: eltype(T1)}
1919
cost::T2
2020
end
2121

22-
struct ParallelPSOKernel
22+
struct ParallelPSOKernel{Backend}
2323
num_particles::Int
2424
async::Bool
25-
gpu::Bool
2625
threaded::Bool
26+
backend::Backend
2727
end
28-
struct ParallelSyncPSO
28+
struct ParallelSyncPSO{Backend}
2929
num_particles::Int
30+
backend::Backend
3031
end
3132

3233
function ParallelPSOKernel(num_particles::Int;
33-
async = false,
34-
gpu = false, threaded = false)
35-
ParallelPSOKernel(num_particles, async, gpu, threaded)
34+
async = false, threaded = false, backend = CPU())
35+
ParallelPSOKernel(num_particles, async, threaded, backend)
3636
end
3737

3838
SciMLBase.allowsbounds(::ParallelPSOKernel) = true
@@ -55,7 +55,8 @@ function SciMLBase.__solve(prob::OptimizationProblem,
5555

5656
prob = remake(prob; lb = lb, ub = ub)
5757

58-
if !(opt.gpu)
58+
## TODO: Compare the performance of KA kernels with CPU backend with CPU implementations
59+
if opt.backend isa CPU
5960
if opt.threaded
6061
gbest = PSO(prob; population = opt.num_particles, kwargs...)
6162
else
@@ -67,7 +68,9 @@ function SciMLBase.__solve(prob::OptimizationProblem,
6768
init_gbest, particles = init_particles(prob, opt.num_particles)
6869
# TODO: Do the equivalent of cu()/roc()
6970
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
70-
gpu_particles = KernelAbstractions.allocate(backend, particles_eltype, size(particles))
71+
gpu_particles = KernelAbstractions.allocate(backend,
72+
particles_eltype,
73+
size(particles))
7174
copyto!(gpu_particles, particles)
7275
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
7376
copyto!(gpu_init_gbest, [init_gbest])
@@ -90,9 +93,11 @@ function SciMLBase.__solve(prob::OptimizationProblem,
9093
ub = prob.ub === nothing ? fill(eltype(prob.u0)(Inf), length(prob.u0)) : prob.ub
9194

9295
prob = remake(prob; lb = lb, ub = ub)
93-
96+
backend = opt.backend
9497
init_gbest, particles = init_particles(prob, opt.num_particles)
95-
gpu_particles = cu(particles)
98+
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
99+
gpu_particles = KernelAbstractions.allocate(backend, particles_eltype, size(particles))
100+
copyto!(gpu_particles, particles)
96101
init_gbest = init_gbest
97102
gbest = pso_solve_sync_gpu!(prob, init_gbest, gpu_particles; kwargs...)
98103

src/ode_pso.jl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
@inbounds particle = gpu_particles[i]
66

77
updated_velocity = w .* particle.velocity .+
8-
c1 .* rand(typeof(particle.velocity)) .* (particle.best_position -
8+
c1 .* rand(typeof(particle.velocity)) .*
9+
(particle.best_position -
910
particle.position) .+
1011
c2 .* rand(typeof(particle.velocity)) .*
1112
(gbest.position - particle.position)
@@ -54,8 +55,8 @@ function parameter_estim_ode!(prob::ODEProblem,
5455
prob_func = default_prob_func,
5556
w = 0.72980f0,
5657
wdamp = 1.0f0,
57-
maxiters = 100,
58-
backend = CPU(), kwargs...)
58+
maxiters = 100, kwargs...)
59+
backend = get_backend(gpu_particles)
5960
update_states! = PSOGPU._update_particle_states!(backend)
6061

6162
losses = KernelAbstractions.ones(backend, 1, length(gpu_particles))
@@ -69,7 +70,7 @@ function parameter_estim_ode!(prob::ODEProblem,
6970
ub,
7071
gbest,
7172
w;
72-
ndrange=length(gpu_particles))
73+
ndrange = length(gpu_particles))
7374

7475
probs = prob_func.(Ref(improb), gpu_particles)
7576

@@ -79,7 +80,7 @@ function parameter_estim_ode!(prob::ODEProblem,
7980

8081
sum!(losses, (map(x -> sum(x .^ 2), data .- us)))
8182

82-
update_costs!(losses, gpu_particles; ndrange=length(losses))
83+
update_costs!(losses, gpu_particles; ndrange = length(losses))
8384

8485
best_particle = minimum(gpu_particles,
8586
init = PSOGPU.PSOParticle(gbest.position,

src/pso_async_gpu.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,14 @@ function pso_solve_async_gpu!(prob,
5555
maxiters = 100,
5656
w = 0.7298f0,
5757
wdamp = 1.0f0,
58-
debug = false,
59-
backend = CPU())
58+
debug = false)
6059

6160
## Initialize stuff
6261

62+
backend = get_backend(gpu_particles)
63+
6364
kernel = update_particle_states_async!(backend)
64-
kernel(prob, gpu_particles, gbest, w, wdamp, maxiters; ndrange=length(gpu_particles))
65+
kernel(prob, gpu_particles, gbest, w, wdamp, maxiters; ndrange = length(gpu_particles))
6566

6667
best_particle = minimum(gpu_particles)
6768
return PSOGBest(best_particle.best_position, best_particle.best_cost)

src/pso_gpu.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
## Update velocity
1515

1616
updated_velocity = w .* particle.velocity .+
17-
c1 .* rand(typeof(particle.velocity)) .* (particle.best_position -
17+
c1 .* rand(typeof(particle.velocity)) .*
18+
(particle.best_position -
1819
particle.position) .+
1920
c2 .* rand(typeof(particle.velocity)) .*
2021
(gbest.position - particle.position)
@@ -54,16 +55,17 @@ function pso_solve_gpu!(prob,
5455
maxiters = 100,
5556
w = 0.7298f0,
5657
wdamp = 1.0f0,
57-
debug = false,
58-
backend = CPU())
58+
debug = false)
5959

6060
## Initialize stuff
6161

62+
backend = get_backend(gpu_particles)
63+
6264
kernel = update_particle_states!(backend)
6365

6466
for i in 1:maxiters
6567
## Invoke GPU Kernel here
66-
kernel(prob, gpu_particles, gbest, w; ndrange=length(gpu_particles))
68+
kernel(prob, gpu_particles, gbest, w; ndrange = length(gpu_particles))
6769
w = w * wdamp
6870
end
6971

src/pso_sync_gpu.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
## Update velocity
99

1010
updated_velocity = w .* particle.velocity .+
11-
c1 .* rand(typeof(particle.velocity)) .* (particle.best_position -
11+
c1 .* rand(typeof(particle.velocity)) .*
12+
(particle.best_position -
1213
particle.position) .+
1314
c2 .* rand(typeof(particle.velocity)) .*
1415
(gbest.position - particle.position)
@@ -38,14 +39,17 @@ function pso_solve_sync_gpu!(prob,
3839
maxiters = 100,
3940
w = 0.7298f0,
4041
wdamp = 1.0f0,
41-
debug = false,
42-
backend = CPU())
43-
@show minimum(gpu_particles)
42+
debug = false)
43+
backend = get_backend(gpu_particles)
4444

4545
update_particle_kernel = _update_particle_states!(backend)
4646

4747
for i in 1:maxiters
48-
update_particle_kernel(prob, gpu_particles, gbest, w; ndrange=length(gpu_particles))
48+
update_particle_kernel(prob,
49+
gpu_particles,
50+
gbest,
51+
w;
52+
ndrange = length(gpu_particles))
4953
best_particle = minimum(gpu_particles)
5054
gbest = PSOGBest(best_particle.position, best_particle.best_cost)
5155
w = w * wdamp

test/gpu.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ prob = OptimizationProblem(rosenbrock, x0, p; lb = lb, ub = ub)
1616

1717
n_particles = 1000
1818

19-
sol = solve(prob, ParallelPSOKernel(n_particles; gpu = true), maxiters = 500)
19+
sol = solve(prob, ParallelPSOKernel(n_particles; backend = CUDABackend()), maxiters = 500)
2020

2121
@test sol.objective < 1e-4
2222

@@ -25,7 +25,7 @@ prob = OptimizationProblem(rosenbrock, x0, p)
2525
n_particles = 2000
2626

2727
sol = solve(prob,
28-
ParallelPSOKernel(n_particles; gpu = false, threaded = true),
28+
ParallelPSOKernel(n_particles; threaded = true),
2929
maxiters = 500)
3030

3131
@test sol.objective < 1e-4

test/regression.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ prob = OptimizationProblem(rosenbrock, x0, p; lb = lb, ub = ub)
1717
n_particles = 1000
1818

1919
sol = solve(prob,
20-
ParallelPSOKernel(n_particles; gpu = false, threaded = true),
20+
ParallelPSOKernel(n_particles; threaded = true),
2121
maxiters = 500)
2222

2323
@test sol.objective < 1e-4
2424

2525
sol = solve(prob,
26-
ParallelPSOKernel(n_particles; gpu = false, threaded = false),
26+
ParallelPSOKernel(n_particles; threaded = false),
2727
maxiters = 500)
2828

2929
@test sol.objective < 1e-4
@@ -35,13 +35,13 @@ prob = OptimizationProblem(rosenbrock, x0, p; lb = lb, ub = ub, N)
3535
n_particles = 2000
3636

3737
sol = solve(prob,
38-
ParallelPSOKernel(n_particles; gpu = false, threaded = true),
38+
ParallelPSOKernel(n_particles; threaded = true),
3939
maxiters = 500)
4040

4141
@test sol.objective < 1e-4
4242

4343
sol = solve(prob,
44-
ParallelPSOKernel(n_particles; gpu = false, threaded = false),
44+
ParallelPSOKernel(n_particles; threaded = false),
4545
maxiters = 500)
4646

4747
@test sol.objective < 1e-4
@@ -51,13 +51,13 @@ prob = OptimizationProblem(rosenbrock, x0, p)
5151
n_particles = 2000
5252

5353
sol = solve(prob,
54-
ParallelPSOKernel(n_particles; gpu = false, threaded = true),
54+
ParallelPSOKernel(n_particles; threaded = true),
5555
maxiters = 500)
5656

5757
@test sol.objective < 1e-4
5858

5959
sol = solve(prob,
60-
ParallelPSOKernel(n_particles; gpu = false, threaded = false),
60+
ParallelPSOKernel(n_particles; threaded = false),
6161
maxiters = 500)
6262

6363
@test sol.objective < 1e-4

0 commit comments

Comments
 (0)