|
33 | 33 | particle |
34 | 34 | end |
35 | 35 |
|
36 | | -@kernel function update_particle_states!(prob, gpu_particles, gbest_ref, w, |
37 | | - opt::ParallelPSOKernel; c1 = 1.4962f0, |
38 | | - c2 = 1.4962f0) |
| 36 | +@kernel function update_particle_states!(prob, |
| 37 | + gpu_particles::AbstractArray{SPSOParticle{T1, T2}}, gbest_ref, w, |
| 38 | + opt::ParallelPSOKernel, lock; c1 = 1.4962f0, |
| 39 | + c2 = 1.4962f0) where {T1, T2} |
39 | 40 | i = @index(Global, Linear) |
| 41 | + tidx = @index(Local, Linear) |
40 | 42 |
|
41 | | - @inbounds gbest = gbest_ref[1] |
42 | | - @inbounds particle = gpu_particles[i] |
| 43 | + @uniform gs = @groupsize()[1] |
43 | 44 |
|
44 | | - particle = update_particle_state(particle, prob, gbest, w, c1, c2, i, opt) |
| 45 | + best_queue = @localmem SPSOGBest{T1, T2} (gs) |
| 46 | + queue_num = @localmem UInt32 1 |
| 47 | + |
| 48 | + particle = @private SPSOParticle{T1, T2} 1 |
| 49 | + |
| 50 | + # Initialize cost to be Inf |
| 51 | + @inbounds particle[1] = gpu_particles[i] |
| 52 | + best_queue[tidx] = SPSOGBest(particle[1].best_position, |
| 53 | + convert(T2, Inf)) |
| 54 | + queue_num[1] = UInt32(0) |
| 55 | + |
| 56 | + @synchronize |
| 57 | + |
| 58 | + @inbounds particle[1] = gpu_particles[i] |
| 59 | + gbest = @inbounds gbest_ref[1] |
| 60 | + @inbounds particle[1] = update_particle_state(particle[1], prob, gbest, w, c1, c2, i, opt) |
| 61 | + |
| 62 | + @synchronize |
45 | 63 |
|
46 | | - ## NOTE: This causes thread races to update global best particle. |
47 | | - if particle.best_cost < gbest.cost |
48 | | - @set! gbest.position = particle.best_position |
49 | | - @set! gbest.cost = particle.best_cost |
| 64 | + @inbounds particle[1] = gpu_particles[i] |
| 65 | + gbest = @inbounds gbest_ref[1] |
| 66 | + if particle[1].best_cost < gbest.cost |
| 67 | + queue_idx = @atomic queue_num[1] += UInt32(1) |
| 68 | + @inbounds best_queue[queue_idx] = SPSOGBest(particle[1].best_position, |
| 69 | + particle[1].best_cost) |
50 | 70 | end |
51 | 71 |
|
52 | | - @inbounds gbest_ref[1] = gbest |
| 72 | + @synchronize |
53 | 73 |
|
54 | | - @inbounds gpu_particles[i] = particle |
| 74 | + if tidx == 1 |
| 75 | + if queue_num[1] > 1 |
| 76 | + # Find best fit in block |
| 77 | + for j in 2:queue_num[1] |
| 78 | + @inbounds if best_queue[j].cost < best_queue[1].cost |
| 79 | + best_queue[1] = best_queue[j] |
| 80 | + end |
| 81 | + end |
| 82 | + |
| 83 | + # Take lock |
| 84 | + while true |
| 85 | + res = @atomicreplace lock[1] UInt32(0)=>UInt32(1) |
| 86 | + if res.success |
| 87 | + break |
| 88 | + end |
| 89 | + end |
| 90 | + |
| 91 | + # Update global best fit |
| 92 | + gbest = @inbounds gbest_ref[1] |
| 93 | + @inbounds if best_queue[1].cost < gbest.cost |
| 94 | + gbest_ref[1] = best_queue[1] |
| 95 | + end |
| 96 | + |
| 97 | + # Release lock |
| 98 | + @atomicswap lock[1] = 0 |
| 99 | + end |
| 100 | + end |
55 | 101 | end |
56 | 102 |
|
57 | 103 | @kernel function update_particle_states!(prob, |
|
0 commit comments