|
54 | 54 | @inbounds gpu_particles[i] = particle |
55 | 55 | end |
56 | 56 |
|
57 | | -@kernel function update_particle_states!(prob, gpu_particles, gbest, w, |
| 57 | +@kernel function update_particle_states!(prob, |
| 58 | + gpu_particles::AbstractArray{SPSOParticle{T1, T2}}, block_particles, gbest, w, |
58 | 59 | opt::ParallelSyncPSOKernel; c1 = 1.4962f0, |
59 | | - c2 = 1.4962f0) |
| 60 | + c2 = 1.4962f0) where {T1, T2} |
60 | 61 | i = @index(Global, Linear) |
| 62 | + tidx = @index(Local, Linear) |
| 63 | + gidx = @index(Group, Linear) |
| 64 | + |
| 65 | + @uniform gs = @groupsize()[1] |
| 66 | + |
| 67 | + group_particles = @localmem SPSOGBest{T1, T2} (gs) |
| 68 | + |
| 69 | + if tidx == 1 |
| 70 | + fill!(group_particles, SPSOGBest(gbest.position, convert(typeof(gbest.cost), Inf))) |
| 71 | + end |
| 72 | + |
| 73 | + @synchronize |
61 | 74 |
|
62 | 75 | @inbounds particle = gpu_particles[i] |
63 | 76 |
|
64 | 77 | particle = update_particle_state(particle, prob, gbest, w, c1, c2, i, opt) |
65 | 78 |
|
| 79 | + @inbounds group_particles[tidx] = SPSOGBest(particle.best_position, particle.best_cost) |
| 80 | + |
| 81 | + stride = gs ÷ 2 |
| 82 | + |
| 83 | + while stride >= 1 |
| 84 | + @synchronize |
| 85 | + if tidx <= stride |
| 86 | + @inbounds if group_particles[tidx].cost > group_particles[tidx + stride].cost |
| 87 | + group_particles[tidx] = group_particles[tidx + stride] |
| 88 | + end |
| 89 | + end |
| 90 | + stride = stride ÷ 2 |
| 91 | + end |
| 92 | + |
| 93 | + @synchronize |
| 94 | + |
| 95 | + if tidx == 1 |
| 96 | + @inbounds block_particles[gidx] = group_particles[tidx] |
| 97 | + end |
| 98 | + |
66 | 99 | @inbounds gpu_particles[i] = particle |
67 | 100 | end |
68 | 101 |
|
|
0 commit comments