@@ -6,120 +6,51 @@ import DiffEqGPU: GPUTsit5, vectorized_asolve, make_prob_compatible
66
77# # Use lb and ub either as StaticArray or pass them separately as CuArrays
88# # Passing as CuArrays makes more sense, or maybe SArray? The based on no. of dimension
9- struct PSOParticle {T1, T2 <: eltype (T1)}
9+ struct SPSOParticle {T1, T2 <: eltype (T1)}
1010 position:: T1
1111 velocity:: T1
1212 cost:: T2
1313 best_position:: T1
1414 best_cost:: T2
1515end
16-
17- struct PSOGBest{T1, T2 <: eltype (T1)}
16+ struct SPSOGBest{T1, T2 <: eltype (T1)}
1817 position:: T1
1918 cost:: T2
2019end
2120
22- struct ParallelPSOKernel{Backend}
23- num_particles:: Int
24- async:: Bool
25- threaded:: Bool
26- backend:: Backend
27- end
28- struct ParallelSyncPSO{Backend}
29- num_particles:: Int
30- backend:: Backend
31- end
32-
33- function ParallelPSOKernel (num_particles:: Int ;
34- async = false , threaded = false , backend = CPU ())
35- ParallelPSOKernel (num_particles, async, threaded, backend)
36- end
37-
38- SciMLBase. allowsbounds (:: ParallelPSOKernel ) = true
39- SciMLBase. allowsbounds (:: ParallelSyncPSO ) = true
40- # SciMLBase.requiresbounds(::ParallelPSOKernel) = true
41-
42- include (" ./pso_cpu.jl" )
43- include (" ./pso_gpu.jl" )
44- include (" ./pso_async_gpu.jl" )
45- include (" ./utils.jl" )
46- include (" ./pso_sync_gpu.jl" )
47- include (" ./ode_pso.jl" )
48-
49- function SciMLBase. __solve (prob:: OptimizationProblem ,
50- opt:: ParallelPSOKernel ,
51- args... ;
52- kwargs... )
53- lb = prob. lb === nothing ? fill (eltype (prob. u0)(- Inf ), length (prob. u0)) : prob. lb
54- ub = prob. ub === nothing ? fill (eltype (prob. u0)(Inf ), length (prob. u0)) : prob. ub
55-
56- prob = remake (prob; lb = lb, ub = ub)
57-
58- # # TODO : Compare the performance of KA kernels with CPU backend with CPU implementations
59- if opt. backend isa CPU
60- if opt. threaded
61- gbest = PSO (prob; population = opt. num_particles, kwargs... )
62- else
63- init_gbest, particles = init_particles (prob, opt. num_particles)
64- gbest = pso_solve_cpu! (prob, init_gbest, particles; kwargs... )
65- end
66- else
67- backend = opt. backend
68- init_gbest, particles = init_particles (prob, opt. num_particles)
69- # TODO : Do the equivalent of cu()/roc()
70- particles_eltype = eltype (particles) === Float64 ? Float32 : eltype (particles)
71- gpu_particles = KernelAbstractions. allocate (backend,
72- particles_eltype,
73- size (particles))
74- copyto! (gpu_particles, particles)
75- gpu_init_gbest = KernelAbstractions. allocate (backend, typeof (init_gbest), (1 ,))
76- copyto! (gpu_init_gbest, [init_gbest])
77- if opt. async
78- gbest = pso_solve_async_gpu! (prob, gpu_init_gbest, gpu_particles; kwargs... )
79- else
80- gbest = pso_solve_gpu! (prob, gpu_init_gbest, gpu_particles; kwargs... )
81- end
82- end
83-
84- SciMLBase. build_solution (SciMLBase. DefaultOptimizationCache (prob. f, prob. p), opt,
85- gbest. position, gbest. cost)
21+ mutable struct MPSOParticle{T}
22+ position:: AbstractArray{T}
23+ velocity:: AbstractArray{T}
24+ cost:: T
25+ best_position:: AbstractArray{T}
26+ best_cost:: T
8627end
87-
88- function SciMLBase. __solve (prob:: OptimizationProblem ,
89- opt:: ParallelSyncPSO ,
90- args... ;
91- kwargs... )
92- lb = prob. lb === nothing ? fill (eltype (prob. u0)(- Inf ), length (prob. u0)) : prob. lb
93- ub = prob. ub === nothing ? fill (eltype (prob. u0)(Inf ), length (prob. u0)) : prob. ub
94-
95- prob = remake (prob; lb = lb, ub = ub)
96- backend = opt. backend
97- init_gbest, particles = init_particles (prob, opt. num_particles)
98- particles_eltype = eltype (particles) === Float64 ? Float32 : eltype (particles)
99- gpu_particles = KernelAbstractions. allocate (backend, particles_eltype, size (particles))
100- copyto! (gpu_particles, particles)
101- init_gbest = init_gbest
102- gbest = pso_solve_sync_gpu! (prob, init_gbest, gpu_particles; kwargs... )
103-
104- SciMLBase. build_solution (SciMLBase. DefaultOptimizationCache (prob. f, prob. p), opt,
105- gbest. position, gbest. cost)
28+ mutable struct MPSOGBest{T}
29+ position:: AbstractArray{T}
30+ cost:: T
10631end
10732
108- using Base
109-
11033# # required overloads for min or max computation on particles
111- function Base. isless (a:: PSOGPU.PSOParticle {T1, T2} ,
112- b:: PSOGPU.PSOParticle {T1, T2} ) where {T1, T2}
34+ function Base. isless (a:: PSOGPU.SPSOParticle {T1, T2} ,
35+ b:: PSOGPU.SPSOParticle {T1, T2} ) where {T1, T2}
11336 a. best_cost < b. best_cost
11437end
11538
116- function Base. typemax (:: Type{PSOGPU.PSOParticle {T1, T2}} ) where {T1, T2}
117- PSOGPU. PSOParticle {T1, T2} (similar (T1),
39+ function Base. typemax (:: Type{PSOGPU.SPSOParticle {T1, T2}} ) where {T1, T2}
40+ PSOGPU. SPSOParticle {T1, T2} (similar (T1),
11841 similar (T1),
11942 typemax (T2),
12043 similar (T1),
12144 typemax (T2))
12245end
12346
124- export ParallelPSOKernel, ParallelSyncPSO, OptimizationProblem, solve
47+ include (" ./algorithms.jl" )
48+ include (" ./utils.jl" )
49+ include (" ./ode_pso.jl" )
50+ include (" ./kernels.jl" )
51+ include (" ./lowerlevel_solve.jl" )
52+ include (" ./solve.jl" )
53+
54+ export ParallelPSOKernel,
55+ ParallelSyncPSOKernel, ParallelPSOArray, SerialPSO, OptimizationProblem, solve
12556end
0 commit comments