Skip to content

Commit a441d2c

Browse files
committed
Update benchmarks with final scripts
1 parent c3762e0 commit a441d2c

File tree

15 files changed

+4879
-120
lines changed

15 files changed

+4879
-120
lines changed

benchmarks/CPU_vs_GPU/Manifest.toml

Lines changed: 2274 additions & 0 deletions
Large diffs are not rendered by default.

benchmarks/CPU_vs_GPU/Project.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[deps]
2+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
3+
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
4+
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
5+
OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b"
6+
OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86"
7+
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
8+
OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
9+
PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
10+
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

benchmarks/CPU_vs_GPU/benchmark.jl

Lines changed: 233 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
using Pkg
2+
3+
Pkg.activate(@__DIR__)
4+
15
using PSOGPU, StaticArrays, KernelAbstractions, Optimization
26
using CUDA
37

48
device!(2)
59

6-
N = 3
10+
N = 10
711
function rosenbrock(x, p)
812
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
913
end
@@ -35,9 +39,33 @@ sol = solve(prob,
3539
@show sol.objective
3640
@show sol.stats.time
3741

42+
sol = solve(prob,
43+
ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = true),
44+
maxiters = 500)
45+
46+
@show sol.objective
47+
@show sol.stats.time
48+
49+
sol = solve(prob,
50+
PSOGPU.HybridPSO(; backend = CUDABackend(),
51+
pso = PSOGPU.ParallelPSOKernel(n_particles;
52+
global_update = false,
53+
backend = CUDABackend()),
54+
local_opt = PSOGPU.LBFGS()), maxiters = 500,
55+
local_maxiters = 30)
56+
57+
@show sol.objective
58+
@show sol.stats.time
59+
3860
cpu_times = Float64[]
3961
gpu_sync_times = Float64[]
4062
gpu_async_times = Float64[]
63+
gpu_queue_lock_times = Float64[]
64+
65+
using Random
66+
rng = Random.default_rng()
67+
68+
Random.seed!(rng, 0)
4169

4270
Ns = [2^i for i in 3:2:20]
4371
for n_particles in Ns
@@ -108,17 +136,61 @@ for n_particles in Ns
108136
opt, Val(opt.global_update); maxiters = 500)
109137

110138
push!(gpu_async_times, el_time)
139+
140+
opt = ParallelPSOKernel(n_particles; backend, global_update = true)
141+
142+
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
143+
copyto!(gpu_init_gbest, [init_gbest])
144+
145+
PSOGPU.vectorized_solve!(prob,
146+
gpu_init_gbest,
147+
backend_particles,
148+
opt, Val(opt.global_update); maxiters = 500)
149+
150+
el_time = @elapsed PSOGPU.vectorized_solve!(prob,
151+
gpu_init_gbest,
152+
backend_particles,
153+
opt, Val(opt.global_update); maxiters = 500)
154+
155+
push!(gpu_queue_lock_times, el_time)
156+
end
157+
158+
gpu_hybrid_times = Float64[]
159+
160+
Random.seed!(rng, 0)
161+
162+
for n_particles in Ns
163+
@info n_particles
164+
165+
sol = solve(prob,
166+
PSOGPU.HybridPSO(; backend = CUDABackend(),
167+
pso = PSOGPU.ParallelPSOKernel(n_particles;
168+
global_update = false,
169+
backend = CUDABackend()),
170+
local_opt = PSOGPU.LBFGS()), maxiters = 500,
171+
local_maxiters = 30)
172+
173+
sol = solve(prob,
174+
PSOGPU.HybridPSO(; backend = CUDABackend(),
175+
pso = PSOGPU.ParallelPSOKernel(n_particles;
176+
global_update = false,
177+
backend = CUDABackend()),
178+
local_opt = PSOGPU.LBFGS()), maxiters = 500,
179+
local_maxiters = 30)
180+
181+
push!(gpu_hybrid_times, sol.stats.time)
111182
end
112183

113184
@show cpu_times
114185
@show gpu_sync_times
115186
@show gpu_async_times
187+
@show gpu_queue_lock_times
116188

117189
using Plots
118190

119191
xticks = 10 .^ round.(range(1, 7, length = 13), digits = 2)
120192

121-
yticks = 10 .^ round.(range(1, -3, length = 11), digits = 2)
193+
yticks = 10 .^ round.(range(1, -3, length = 9), digits = 2)
122194

123195
plt = plot(Ns,
124196
gpu_sync_times,
@@ -127,14 +199,15 @@ plt = plot(Ns,
127199
linewidth = 2,
128200
label = "ParallelSyncPSOKernel: GPU",
129201
ylabel = "Time (s)",
130-
xlabel = "Trajectories",
131-
title = "Bechmarking the 10D Rosenbrock Problem",
202+
xlabel = "No. of Particles",
203+
title = "Benchmarking the 10D Rosenbrock Problem",
132204
legend = :topleft,
133205
xticks = xticks,
134206
yticks = yticks,
135207
marker = :circle,
136-
dpi = 600,
137-
color = :Green)
208+
dpi = 600
209+
# color = :Green
210+
)
138211

139212
plt = plot!(Ns,
140213
cpu_times,
@@ -143,17 +216,168 @@ plt = plot!(Ns,
143216
linewidth = 2,
144217
label = "ParallelSyncPSOKernel: CPU",
145218
marker = :circle,
146-
color = :Orange)
219+
ls = :dash
220+
# color = :Orange
221+
)
147222

148223
plt = plot!(Ns,
149224
gpu_async_times,
150225
xaxis = :log,
151226
yaxis = :log,
152227
linewidth = 2,
153228
label = "ParallelPSOKernel (Async): GPU",
154-
marker = :circle,
155-
color = :Green)
229+
marker = :circle
230+
# color = :Green
231+
)
232+
233+
plt = plot!(Ns,
234+
gpu_queue_lock_times,
235+
xaxis = :log,
236+
yaxis = :log,
237+
linewidth = 2,
238+
label = "ParallelPSOKernel (Queue-lock): GPU",
239+
marker = :circle
240+
# color = :Green
241+
)
242+
243+
plt = plot!(Ns,
244+
gpu_hybrid_times,
245+
xaxis = :log,
246+
yaxis = :log,
247+
linewidth = 2,
248+
label = "HybridPSO-LBFGS: GPU",
249+
marker = :circle
250+
# color = :Green
251+
)
252+
253+
savefig("benchmark_hybrid.svg")
254+
255+
using Statistics
156256

157257
@show mean(cpu_times ./ gpu_sync_times)
158258

159259
@show mean(cpu_times ./ gpu_async_times)
260+
261+
@show mean(cpu_times ./ gpu_queue_lock_times)
262+
263+
@show mean(cpu_times ./ gpu_hybrid_times)
264+
265+
@show mean(gpu_sync_times ./ gpu_queue_lock_times)
266+
267+
# cpu_times_total = Float64[]
268+
# gpu_sync_times_total = Float64[]
269+
# gpu_async_times_total = Float64[]
270+
# gpu_queue_lock_times_total = Float64[]
271+
# gpu_hybrid_times_total = Float64[]
272+
273+
# for n_particles in Ns
274+
# @info n_particles
275+
276+
# sol = solve(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()), maxiters = 500)
277+
278+
# el_time = @elapsed solve(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()), maxiters = 500)
279+
280+
# push!(cpu_times_total, el_time)
281+
282+
# sol = solve(prob,
283+
# ParallelSyncPSOKernel(n_particles; backend = CUDABackend()),
284+
# maxiters = 500)
285+
286+
# el_time = @elapsed solve(prob,
287+
# ParallelSyncPSOKernel(n_particles; backend = CUDABackend()),
288+
# maxiters = 500)
289+
290+
# push!(gpu_sync_times_total, el_time)
291+
292+
# sol = solve(prob,
293+
# ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = false),
294+
# maxiters = 500)
295+
296+
# el_time = @elapsed solve(prob,
297+
# ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = false),
298+
# maxiters = 500)
299+
300+
# push!(gpu_async_times_total, el_time)
301+
302+
# sol = solve(prob,
303+
# ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = true),
304+
# maxiters = 500)
305+
306+
# el_time = @elapsed solve(prob,
307+
# ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = true),
308+
# maxiters = 500)
309+
310+
# push!(gpu_queue_lock_times_total, el_time)
311+
312+
# sol = solve(prob,
313+
# PSOGPU.HybridPSO(; backend = CUDABackend(),
314+
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
315+
# local_opt = PSOGPU.LBFGS()), maxiters = 500,
316+
# local_maxiters = 30)
317+
318+
# el_time = @elapsed solve(prob,
319+
# PSOGPU.HybridPSO(; backend = CUDABackend(),
320+
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
321+
# local_opt = PSOGPU.LBFGS()), maxiters = 500,
322+
# local_maxiters = 30)
323+
324+
# push!(gpu_hybrid_times_total, el_time)
325+
326+
# end
327+
328+
# plt = plot(Ns,
329+
# gpu_sync_times_total,
330+
# xaxis = :log,
331+
# yaxis = :log,
332+
# linewidth = 2,
333+
# label = "ParallelSyncPSOKernel: GPU",
334+
# ylabel = "Time (s)",
335+
# xlabel = "No. of Particles",
336+
# title = "Bechmarking the 10D Rosenbrock Problem",
337+
# legend = :topleft,
338+
# xticks = xticks,
339+
# yticks = yticks,
340+
# marker = :circle,
341+
# dpi = 600,
342+
# # color = :Green
343+
# )
344+
345+
# plt = plot!(Ns,
346+
# cpu_times_total,
347+
# xaxis = :log,
348+
# yaxis = :log,
349+
# linewidth = 2,
350+
# label = "ParallelSyncPSOKernel: CPU",
351+
# marker = :circle,
352+
# # color = :Orange
353+
# )
354+
355+
# plt = plot!(Ns,
356+
# gpu_async_times_total,
357+
# xaxis = :log,
358+
# yaxis = :log,
359+
# linewidth = 2,
360+
# label = "ParallelPSOKernel (Async): GPU",
361+
# marker = :circle,
362+
# # color = :Green
363+
# )
364+
365+
# plt = plot!(Ns,
366+
# gpu_queue_lock_times_total,
367+
# xaxis = :log,
368+
# yaxis = :log,
369+
# linewidth = 2,
370+
# label = "ParallelPSOKernel (Queue-lock): GPU",
371+
# marker = :circle,
372+
# # color = :Green
373+
# )
374+
375+
# plt = plot!(Ns,
376+
# gpu_hybrid_times_total,
377+
# xaxis = :log,
378+
# yaxis = :log,
379+
# linewidth = 2,
380+
# label = "HybridPSO-LBFGS: GPU",
381+
# marker = :circle,
382+
# # color = :Green
383+
# )

0 commit comments

Comments
 (0)