@@ -19,20 +19,20 @@ struct PSOGBest{T1, T2 <: eltype(T1)}
1919 cost:: T2
2020end
2121
22- struct ParallelPSOKernel
22+ struct ParallelPSOKernel{Backend}
2323 num_particles:: Int
2424 async:: Bool
25- gpu:: Bool
2625 threaded:: Bool
26+ backend:: Backend
2727end
28- struct ParallelSyncPSO
28+ struct ParallelSyncPSO{Backend}
2929 num_particles:: Int
30+ backend:: Backend
3031end
3132
3233function ParallelPSOKernel (num_particles:: Int ;
33- async = false ,
34- gpu = false , threaded = false )
35- ParallelPSOKernel (num_particles, async, gpu, threaded)
34+ async = false , threaded = false , backend = CPU ())
35+ ParallelPSOKernel (num_particles, async, threaded, backend)
3636end
3737
3838SciMLBase. allowsbounds (:: ParallelPSOKernel ) = true
@@ -55,7 +55,8 @@ function SciMLBase.__solve(prob::OptimizationProblem,
5555
5656 prob = remake (prob; lb = lb, ub = ub)
5757
58- if ! (opt. gpu)
58+ # # TODO : Compare the performance of KA kernels with CPU backend with CPU implementations
59+ if opt. backend isa CPU
5960 if opt. threaded
6061 gbest = PSO (prob; population = opt. num_particles, kwargs... )
6162 else
@@ -67,7 +68,9 @@ function SciMLBase.__solve(prob::OptimizationProblem,
6768 init_gbest, particles = init_particles (prob, opt. num_particles)
6869 # TODO : Do the equivalent of cu()/roc()
6970 particles_eltype = eltype (particles) === Float64 ? Float32 : eltype (particles)
70- gpu_particles = KernelAbstractions. allocate (backend, particles_eltype, size (particles))
71+ gpu_particles = KernelAbstractions. allocate (backend,
72+ particles_eltype,
73+ size (particles))
7174 copyto! (gpu_particles, particles)
7275 gpu_init_gbest = KernelAbstractions. allocate (backend, typeof (init_gbest), (1 ,))
7376 copyto! (gpu_init_gbest, [init_gbest])
@@ -90,9 +93,11 @@ function SciMLBase.__solve(prob::OptimizationProblem,
9093 ub = prob. ub === nothing ? fill (eltype (prob. u0)(Inf ), length (prob. u0)) : prob. ub
9194
9295 prob = remake (prob; lb = lb, ub = ub)
93-
96+ backend = opt . backend
9497 init_gbest, particles = init_particles (prob, opt. num_particles)
95- gpu_particles = cu (particles)
98+ particles_eltype = eltype (particles) === Float64 ? Float32 : eltype (particles)
99+ gpu_particles = KernelAbstractions. allocate (backend, particles_eltype, size (particles))
100+ copyto! (gpu_particles, particles)
96101 init_gbest = init_gbest
97102 gbest = pso_solve_sync_gpu! (prob, init_gbest, gpu_particles; kwargs... )
98103
0 commit comments