1+ using Pkg
2+
3+ Pkg. activate (@__DIR__ )
4+
15using PSOGPU, StaticArrays, KernelAbstractions, Optimization
26using CUDA
37
48device! (2 )
59
6- N = 3
10+ N = 10
711function rosenbrock (x, p)
812 sum (p[2 ] * (x[i + 1 ] - x[i]^ 2 )^ 2 + (p[1 ] - x[i])^ 2 for i in 1 : (length (x) - 1 ))
913end
@@ -35,9 +39,33 @@ sol = solve(prob,
3539@show sol. objective
3640@show sol. stats. time
3741
42+ sol = solve (prob,
43+ ParallelPSOKernel (n_particles; backend = CUDABackend (), global_update = true ),
44+ maxiters = 500 )
45+
46+ @show sol. objective
47+ @show sol. stats. time
48+
49+ sol = solve (prob,
50+ PSOGPU. HybridPSO (; backend = CUDABackend (),
51+ pso = PSOGPU. ParallelPSOKernel (n_particles;
52+ global_update = false ,
53+ backend = CUDABackend ()),
54+ local_opt = PSOGPU. LBFGS ()), maxiters = 500 ,
55+ local_maxiters = 30 )
56+
57+ @show sol. objective
58+ @show sol. stats. time
59+
3860cpu_times = Float64[]
3961gpu_sync_times = Float64[]
4062gpu_async_times = Float64[]
63+ gpu_queue_lock_times = Float64[]
64+
65+ using Random
66+ rng = Random. default_rng ()
67+
68+ Random. seed! (rng, 0 )
4169
4270Ns = [2 ^ i for i in 3 : 2 : 20 ]
4371for n_particles in Ns
@@ -108,17 +136,61 @@ for n_particles in Ns
108136 opt, Val (opt. global_update); maxiters = 500 )
109137
110138 push! (gpu_async_times, el_time)
139+
140+ opt = ParallelPSOKernel (n_particles; backend, global_update = true )
141+
142+ gpu_init_gbest = KernelAbstractions. allocate (backend, typeof (init_gbest), (1 ,))
143+ copyto! (gpu_init_gbest, [init_gbest])
144+
145+ PSOGPU. vectorized_solve! (prob,
146+ gpu_init_gbest,
147+ backend_particles,
148+ opt, Val (opt. global_update); maxiters = 500 )
149+
150+ el_time = @elapsed PSOGPU. vectorized_solve! (prob,
151+ gpu_init_gbest,
152+ backend_particles,
153+ opt, Val (opt. global_update); maxiters = 500 )
154+
155+ push! (gpu_queue_lock_times, el_time)
156+ end
157+
158+ gpu_hybrid_times = Float64[]
159+
160+ Random. seed! (rng, 0 )
161+
162+ for n_particles in Ns
163+ @info n_particles
164+
165+ sol = solve (prob,
166+ PSOGPU. HybridPSO (; backend = CUDABackend (),
167+ pso = PSOGPU. ParallelPSOKernel (n_particles;
168+ global_update = false ,
169+ backend = CUDABackend ()),
170+ local_opt = PSOGPU. LBFGS ()), maxiters = 500 ,
171+ local_maxiters = 30 )
172+
173+ sol = solve (prob,
174+ PSOGPU. HybridPSO (; backend = CUDABackend (),
175+ pso = PSOGPU. ParallelPSOKernel (n_particles;
176+ global_update = false ,
177+ backend = CUDABackend ()),
178+ local_opt = PSOGPU. LBFGS ()), maxiters = 500 ,
179+ local_maxiters = 30 )
180+
181+ push! (gpu_hybrid_times, sol. stats. time)
111182end
112183
113184@show cpu_times
114185@show gpu_sync_times
115186@show gpu_async_times
187+ @show gpu_queue_lock_times
116188
117189using Plots
118190
119191xticks = 10 .^ round .(range (1 , 7 , length = 13 ), digits = 2 )
120192
121- yticks = 10 .^ round .(range (1 , - 3 , length = 11 ), digits = 2 )
193+ yticks = 10 .^ round .(range (1 , - 3 , length = 9 ), digits = 2 )
122194
123195plt = plot (Ns,
124196 gpu_sync_times,
@@ -127,14 +199,15 @@ plt = plot(Ns,
127199 linewidth = 2 ,
128200 label = " ParallelSyncPSOKernel: GPU" ,
129201 ylabel = " Time (s)" ,
130- xlabel = " Trajectories " ,
131- title = " Bechmarking the 10D Rosenbrock Problem" ,
202+ xlabel = " No. of Particles " ,
203+ title = " Benchmarking the 10D Rosenbrock Problem" ,
132204 legend = :topleft ,
133205 xticks = xticks,
134206 yticks = yticks,
135207 marker = :circle ,
136- dpi = 600 ,
137- color = :Green )
208+ dpi = 600
209+ # color = :Green
210+ )
138211
139212plt = plot! (Ns,
140213 cpu_times,
@@ -143,17 +216,168 @@ plt = plot!(Ns,
143216 linewidth = 2 ,
144217 label = " ParallelSyncPSOKernel: CPU" ,
145218 marker = :circle ,
146- color = :Orange )
219+ ls = :dash
220+ # color = :Orange
221+ )
147222
148223plt = plot! (Ns,
149224 gpu_async_times,
150225 xaxis = :log ,
151226 yaxis = :log ,
152227 linewidth = 2 ,
153228 label = " ParallelPSOKernel (Async): GPU" ,
154- marker = :circle ,
155- color = :Green )
229+ marker = :circle
230+ # color = :Green
231+ )
232+
233+ plt = plot! (Ns,
234+ gpu_queue_lock_times,
235+ xaxis = :log ,
236+ yaxis = :log ,
237+ linewidth = 2 ,
238+ label = " ParallelPSOKernel (Queue-lock): GPU" ,
239+ marker = :circle
240+ # color = :Green
241+ )
242+
243+ plt = plot! (Ns,
244+ gpu_hybrid_times,
245+ xaxis = :log ,
246+ yaxis = :log ,
247+ linewidth = 2 ,
248+ label = " HybridPSO-LBFGS: GPU" ,
249+ marker = :circle
250+ # color = :Green
251+ )
252+
253+ savefig (" benchmark_hybrid.svg" )
254+
255+ using Statistics
156256
157257@show mean (cpu_times ./ gpu_sync_times)
158258
159259@show mean (cpu_times ./ gpu_async_times)
260+
261+ @show mean (cpu_times ./ gpu_queue_lock_times)
262+
263+ @show mean (cpu_times ./ gpu_hybrid_times)
264+
265+ @show mean (gpu_sync_times ./ gpu_queue_lock_times)
266+
267+ # cpu_times_total = Float64[]
268+ # gpu_sync_times_total = Float64[]
269+ # gpu_async_times_total = Float64[]
270+ # gpu_queue_lock_times_total = Float64[]
271+ # gpu_hybrid_times_total = Float64[]
272+
273+ # for n_particles in Ns
274+ # @info n_particles
275+
276+ # sol = solve(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()), maxiters = 500)
277+
278+ # el_time = @elapsed solve(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()), maxiters = 500)
279+
280+ # push!(cpu_times_total, el_time)
281+
282+ # sol = solve(prob,
283+ # ParallelSyncPSOKernel(n_particles; backend = CUDABackend()),
284+ # maxiters = 500)
285+
286+ # el_time = @elapsed solve(prob,
287+ # ParallelSyncPSOKernel(n_particles; backend = CUDABackend()),
288+ # maxiters = 500)
289+
290+ # push!(gpu_sync_times_total, el_time)
291+
292+ # sol = solve(prob,
293+ # ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = false),
294+ # maxiters = 500)
295+
296+ # el_time = @elapsed solve(prob,
297+ # ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = false),
298+ # maxiters = 500)
299+
300+ # push!(gpu_async_times_total, el_time)
301+
302+ # sol = solve(prob,
303+ # ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = true),
304+ # maxiters = 500)
305+
306+ # el_time = @elapsed solve(prob,
307+ # ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = true),
308+ # maxiters = 500)
309+
310+ # push!(gpu_queue_lock_times_total, el_time)
311+
312+ # sol = solve(prob,
313+ # PSOGPU.HybridPSO(; backend = CUDABackend(),
314+ # pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
315+ # local_opt = PSOGPU.LBFGS()), maxiters = 500,
316+ # local_maxiters = 30)
317+
318+ # el_time = @elapsed solve(prob,
319+ # PSOGPU.HybridPSO(; backend = CUDABackend(),
320+ # pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
321+ # local_opt = PSOGPU.LBFGS()), maxiters = 500,
322+ # local_maxiters = 30)
323+
324+ # push!(gpu_hybrid_times_total, el_time)
325+
326+ # end
327+
328+ # plt = plot(Ns,
329+ # gpu_sync_times_total,
330+ # xaxis = :log,
331+ # yaxis = :log,
332+ # linewidth = 2,
333+ # label = "ParallelSyncPSOKernel: GPU",
334+ # ylabel = "Time (s)",
335+ # xlabel = "No. of Particles",
336+ # title = "Bechmarking the 10D Rosenbrock Problem",
337+ # legend = :topleft,
338+ # xticks = xticks,
339+ # yticks = yticks,
340+ # marker = :circle,
341+ # dpi = 600,
342+ # # color = :Green
343+ # )
344+
345+ # plt = plot!(Ns,
346+ # cpu_times_total,
347+ # xaxis = :log,
348+ # yaxis = :log,
349+ # linewidth = 2,
350+ # label = "ParallelSyncPSOKernel: CPU",
351+ # marker = :circle,
352+ # # color = :Orange
353+ # )
354+
355+ # plt = plot!(Ns,
356+ # gpu_async_times_total,
357+ # xaxis = :log,
358+ # yaxis = :log,
359+ # linewidth = 2,
360+ # label = "ParallelPSOKernel (Async): GPU",
361+ # marker = :circle,
362+ # # color = :Green
363+ # )
364+
365+ # plt = plot!(Ns,
366+ # gpu_queue_lock_times_total,
367+ # xaxis = :log,
368+ # yaxis = :log,
369+ # linewidth = 2,
370+ # label = "ParallelPSOKernel (Queue-lock): GPU",
371+ # marker = :circle,
372+ # # color = :Green
373+ # )
374+
375+ # plt = plot!(Ns,
376+ # gpu_hybrid_times_total,
377+ # xaxis = :log,
378+ # yaxis = :log,
379+ # linewidth = 2,
380+ # label = "HybridPSO-LBFGS: GPU",
381+ # marker = :circle,
382+ # # color = :Green
383+ # )
0 commit comments