1- function _update_particle_states! (gpu_particles, lb, ub, gbest, w; c1 = 1.4962f0 ,
1+ @kernel function _update_particle_states! (gpu_particles, lb, ub, gbest, w; c1 = 1.4962f0 ,
22 c2 = 1.4962f0 )
3- i = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
4- i > length (gpu_particles) && return
3+ i = @index (Global, Linear)
4+ if i <= length (gpu_particles)
5+ @inbounds particle = gpu_particles[i]
56
6- @inbounds particle = gpu_particles[i]
7+ updated_velocity = w .* particle. velocity .+
8+ c1 .* rand (typeof (particle. velocity)) .*
9+ (particle. best_position -
10+ particle. position) .+
11+ c2 .* rand (typeof (particle. velocity)) .*
12+ (gbest. position - particle. position)
713
8- updated_velocity = w .* particle. velocity .+
9- c1 .* rand (typeof (particle. velocity)) .* (particle. best_position -
10- particle. position) .+
11- c2 .* rand (typeof (particle. velocity)) .*
12- (gbest. position - particle. position)
14+ @set! particle. velocity = updated_velocity
1315
14- @set! particle. velocity = updated_velocity
16+ @set! particle. position = particle . position + particle . velocity
1517
16- @set! particle. position = particle. position + particle. velocity
18+ update_pos = max (particle. position, lb)
19+ update_pos = min (update_pos, ub)
1720
18- update_pos = max (particle. position, lb)
19- update_pos = min (update_pos, ub)
21+ @set! particle. position = update_pos
2022
21- @set! particle. position = update_pos
22-
23- @inbounds gpu_particles[i] = particle
24-
25- return nothing
23+ @inbounds gpu_particles[i] = particle
24+ end
2625end
2726
28- function _update_particle_costs! (losses, gpu_particles)
29- i = (blockIdx (). x - 1 ) * blockDim (). x + threadIdx (). x
30- i > length (losses) && return
27+ @kernel function _update_particle_costs! (losses, gpu_particles)
28+ i = @index (Global, Linear)
29+ if i <= length (losses)
30+ @inbounds particle = gpu_particles[i]
31+ @inbounds loss = losses[i]
3132
32- @inbounds particle = gpu_particles[i]
33- @inbounds loss = losses[i]
33+ @set! particle. cost = loss
3434
35- @set! particle. cost = loss
35+ if particle. cost < particle. best_cost
36+ @set! particle. best_position = particle. position
37+ @set! particle. best_cost = particle. cost
38+ end
3639
37- if particle. cost < particle. best_cost
38- @set! particle. best_position = particle. position
39- @set! particle. best_cost = particle. cost
40+ @inbounds gpu_particles[i] = particle
4041 end
41-
42- @inbounds gpu_particles[i] = particle
43-
44- return nothing
4542end
4643
4744function default_prob_func (prob, gpu_particle)
@@ -59,16 +56,11 @@ function parameter_estim_ode!(prob::ODEProblem,
5956 w = 0.72980f0 ,
6057 wdamp = 1.0f0 ,
6158 maxiters = 100 , kwargs... )
62- update_states! = @cuda launch= false PSOGPU. _update_particle_states! (gpu_particles, lb,
63- ub,
64- gbest,
65- w)
66-
67- losses = CUDA. ones (1 , length (gpu_particles))
68- update_costs! = @cuda launch= false PSOGPU. _update_particle_costs! (losses, gpu_particles)
59+ backend = get_backend (gpu_particles)
60+ update_states! = PSOGPU. _update_particle_states! (backend)
6961
70- config_states = launch_configuration (update_states! . fun )
71- config_costs = launch_configuration (update_costs! . fun )
62+ losses = KernelAbstractions . ones (backend, 1 , length (gpu_particles) )
63+ update_costs! = PSOGPU . _update_particle_costs! (backend )
7264
7365 improb = make_prob_compatible (prob)
7466
@@ -78,8 +70,7 @@ function parameter_estim_ode!(prob::ODEProblem,
7870 ub,
7971 gbest,
8072 w;
81- config_states. threads,
82- config_states... )
73+ ndrange = length (gpu_particles))
8374
8475 probs = prob_func .(Ref (improb), gpu_particles)
8576
@@ -89,7 +80,7 @@ function parameter_estim_ode!(prob::ODEProblem,
8980
9081 sum! (losses, (map (x -> sum (x .^ 2 ), data .- us)))
9182
92- update_costs! (losses, gpu_particles; config_costs . threads, config_costs ... )
83+ update_costs! (losses, gpu_particles; ndrange = length (losses) )
9384
9485 best_particle = minimum (gpu_particles,
9586 init = PSOGPU. PSOParticle (gbest. position,
0 commit comments