Skip to content

Commit 04fa7ce

Browse files
committed
ParallelPSOKernel fixups
1 parent fc99855 commit 04fa7ce

File tree

1 file changed

+35
-34
lines changed

1 file changed

+35
-34
lines changed

src/kernels.jl

Lines changed: 35 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,63 +38,64 @@ end
3838
opt::ParallelPSOKernel, lock; c1 = 1.4962f0,
3939
c2 = 1.4962f0) where {T1, T2}
4040
i = @index(Global, Linear)
41-
# FIXME: Determine the right amount of shmem to use
41+
tidx = @index(Local, Linear)
4242

4343
@uniform gs = @groupsize()[1]
4444

4545
best_queue = @localmem SPSOGBest{T1, T2} (gs)
4646
queue_num = @localmem UInt32 1
4747

48-
@inbounds gbest = gbest_ref[1]
49-
@inbounds particle = gpu_particles[i]
48+
particle = @private SPSOParticle{T1, T2} 1
5049

5150
# Initialize cost to be Inf
52-
for bq_idx in 1:gs
53-
best_queue[bq_idx] = SPSOGBest(particle.best_position,
54-
convert(typeof(particle.cost), Inf))
55-
end
51+
@inbounds particle[1] = gpu_particles[i]
52+
best_queue[tidx] = SPSOGBest(particle[1].best_position,
53+
convert(T2, Inf))
54+
queue_num[1] = UInt32(0)
5655

5756
@synchronize
5857

59-
particle = update_particle_state(particle, prob, gbest, w, c1, c2, i, opt)
60-
@inbounds gpu_particles[i] = particle
58+
@inbounds particle[1] = gpu_particles[i]
59+
gbest = @inbounds gbest_ref[1]
60+
@inbounds particle[1] = update_particle_state(particle[1], prob, gbest, w, c1, c2, i, opt)
6161

6262
@synchronize
6363

64-
if particle.best_cost < gbest.cost
64+
@inbounds particle[1] = gpu_particles[i]
65+
gbest = @inbounds gbest_ref[1]
66+
if particle[1].best_cost < gbest.cost
6567
queue_idx = @atomic queue_num[1] += UInt32(1)
66-
@inbounds best_queue[queue_idx] = SPSOGBest(particle.best_position,
67-
particle.best_cost)
68+
@inbounds best_queue[queue_idx] = SPSOGBest(particle[1].best_position,
69+
particle[1].best_cost)
6870
end
71+
6972
@synchronize
70-
if i <= first(@ndrange())
71-
tidx = @index(Local, Linear)
72-
if tidx == 1
73-
if queue_num[1] > 1
74-
# Find best fit in block
75-
for j in 2:queue_num[1]
76-
@inbounds if best_queue[j].cost < best_queue[1].cost
77-
best_queue[1] = best_queue[j]
78-
end
79-
end
8073

81-
# Take lock
82-
while true
83-
res = @atomicreplace lock[1] UInt32(0)=>UInt32(1)
84-
if res.success
85-
break
86-
end
74+
if tidx == 1
75+
if queue_num[1] > 1
76+
# Find best fit in block
77+
for j in 2:queue_num[1]
78+
@inbounds if best_queue[j].cost < best_queue[1].cost
79+
best_queue[1] = best_queue[j]
8780
end
81+
end
8882

89-
# Update global best fit
90-
gbest = @inbounds gbest_ref[1]
91-
@inbounds if best_queue[1].cost < gbest.cost
92-
gbest_ref[1] = best_queue[1]
83+
# Take lock
84+
while true
85+
res = @atomicreplace lock[1] UInt32(0)=>UInt32(1)
86+
if res.success
87+
break
9388
end
89+
end
9490

95-
# Release lock
96-
@atomicswap lock[1] = 0
91+
# Update global best fit
92+
gbest = @inbounds gbest_ref[1]
93+
@inbounds if best_queue[1].cost < gbest.cost
94+
gbest_ref[1] = best_queue[1]
9795
end
96+
97+
# Release lock
98+
@atomicswap lock[1] = 0
9899
end
99100
end
100101
end

0 commit comments

Comments
 (0)