Skip to content

Commit 9960c48

Browse files
authored
Merge pull request #40 from SciML/u/benchmarks
[WIP] Add benchmark scripts
2 parents 4305d3a + 811f1bb commit 9960c48

File tree

8 files changed

+3209
-18
lines changed

8 files changed

+3209
-18
lines changed

.github/workflows/CI.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ jobs:
1919
fail-fast: false
2020
matrix:
2121
version:
22-
- '1.6'
2322
- '1'
2423
os:
2524
- ubuntu-latest

Project.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "1.0.0-DEV"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
8+
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
89
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
910
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
1011
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -29,4 +30,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
2930
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
3031

3132
[targets]
32-
test = ["Test", "StaticArrays", "LinearAlgebra", "Optimization", "ForwardDiff", ]
33+
test = ["Test", "StaticArrays", "LinearAlgebra", "Optimization", "ForwardDiff"]

benchmarks/CPU_vs_GPU/benchmark.jl

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
using PSOGPU, StaticArrays, KernelAbstractions, Optimization
2+
using CUDA
3+
4+
device!(2)
5+
6+
N = 10
7+
function rosenbrock(x, p)
8+
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
9+
end
10+
x0 = @SArray zeros(Float32, N)
11+
p = @SArray Float32[1.0, 100.0]
12+
lb = @SArray fill(Float32(-1.0), N)
13+
ub = @SArray fill(Float32(10.0), N)
14+
optf = OptimizationFunction(rosenbrock, Optimization.AutoForwardDiff())
15+
prob = OptimizationProblem(optf, x0, p; lb = lb, ub = ub)
16+
17+
n_particles = 10_000
18+
19+
sol = solve(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()), maxiters = 500)
20+
21+
@show sol.objective
22+
@show sol.stats.time
23+
24+
sol = solve(prob,
25+
ParallelSyncPSOKernel(n_particles; backend = CUDABackend()),
26+
maxiters = 500)
27+
28+
@show sol.objective
29+
@show sol.stats.time
30+
31+
sol = solve(prob,
32+
ParallelPSOKernel(n_particles; backend = CUDABackend(), global_update = false),
33+
maxiters = 500)
34+
35+
@show sol.objective
36+
@show sol.stats.time
37+
38+
cpu_times = Float64[]
39+
gpu_sync_times = Float64[]
40+
gpu_async_times = Float64[]
41+
42+
Ns = [2^i for i in 3:2:20]
43+
for n_particles in Ns
44+
@info n_particles
45+
## CPU solve
46+
backend = CPU()
47+
opt = ParallelSyncPSOKernel(n_particles; backend)
48+
init_gbest, particles = PSOGPU.init_particles(prob, opt, typeof(prob.u0))
49+
50+
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
51+
52+
backend_particles = KernelAbstractions.allocate(backend,
53+
particles_eltype,
54+
size(particles))
55+
56+
copyto!(backend_particles, particles)
57+
58+
PSOGPU.vectorized_solve!(prob,
59+
init_gbest,
60+
backend_particles,
61+
opt; maxiters = 500)
62+
63+
el_time = @elapsed PSOGPU.vectorized_solve!(prob,
64+
init_gbest,
65+
backend_particles,
66+
opt; maxiters = 500)
67+
68+
push!(cpu_times, el_time)
69+
## GPU Solve
70+
71+
backend = CUDABackend()
72+
73+
opt = ParallelSyncPSOKernel(n_particles; backend)
74+
75+
particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)
76+
77+
backend_particles = KernelAbstractions.allocate(backend,
78+
particles_eltype,
79+
size(particles))
80+
81+
copyto!(backend_particles, particles)
82+
83+
PSOGPU.vectorized_solve!(prob,
84+
init_gbest,
85+
backend_particles,
86+
opt; maxiters = 500)
87+
88+
el_time = @elapsed PSOGPU.vectorized_solve!(prob,
89+
init_gbest,
90+
backend_particles,
91+
opt; maxiters = 500)
92+
93+
push!(gpu_sync_times, el_time)
94+
95+
opt = ParallelPSOKernel(n_particles; backend, global_update = false)
96+
97+
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
98+
copyto!(gpu_init_gbest, [init_gbest])
99+
100+
PSOGPU.vectorized_solve!(prob,
101+
gpu_init_gbest,
102+
backend_particles,
103+
opt, Val(opt.global_update); maxiters = 500)
104+
105+
el_time = @elapsed PSOGPU.vectorized_solve!(prob,
106+
gpu_init_gbest,
107+
backend_particles,
108+
opt, Val(opt.global_update); maxiters = 500)
109+
110+
push!(gpu_async_times, el_time)
111+
end
112+
113+
@show cpu_times
114+
@show gpu_sync_times
115+
@show gpu_async_times
116+
117+
using Plots
118+
119+
xticks = 10 .^ round.(range(1, 7, length = 13), digits = 2)
120+
121+
yticks = 10 .^ round.(range(1, -3, length = 11), digits = 2)
122+
123+
plt = plot(Ns,
124+
gpu_sync_times,
125+
xaxis = :log,
126+
yaxis = :log,
127+
linewidth = 2,
128+
label = "ParallelSyncPSOKernel: GPU",
129+
ylabel = "Time (s)",
130+
xlabel = "Trajectories",
131+
title = "Bechmarking the 10D Rosenbrock Problem",
132+
legend = :topleft,
133+
xticks = xticks,
134+
yticks = yticks,
135+
marker = :circle,
136+
dpi = 600,
137+
color = :Green)
138+
139+
plt = plot!(Ns,
140+
cpu_times,
141+
xaxis = :log,
142+
yaxis = :log,
143+
linewidth = 2,
144+
label = "ParallelSyncPSOKernel: CPU",
145+
marker = :circle,
146+
color = :Orange)
147+
148+
plt = plot!(Ns,
149+
gpu_async_times,
150+
xaxis = :log,
151+
yaxis = :log,
152+
linewidth = 2,
153+
label = "ParallelPSOKernel (Async): GPU",
154+
marker = :circle,
155+
color = :Green)
156+
157+
@show mean(cpu_times ./ gpu_sync_times)
158+
159+
@show mean(cpu_times ./ gpu_async_times)

0 commit comments

Comments
 (0)