Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name = "PSOGPU"
name = "ParallelParticleSwarms"
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
authors = ["Utkarsh <[email protected]> and contributors"]
version = "0.1.0"
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# PSOGPU
# ParallelParticleSwarms

[![Build Status](https://github.com/utkarsh530/PSOGPU.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/utkarsh530/PSOGPU.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![codecov](https://codecov.io/gh/utkarsh530/PSOGPU.jl/graph/badge.svg?token=H5U5UAIRXX)](https://codecov.io/gh/utkarsh530/PSOGPU.jl)
[![Build Status](https://github.com/utkarsh530/ParallelParticleSwarms.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/utkarsh530/ParallelParticleSwarms.jl/actions/workflows/CI.yml?query=branch%3Amain)
[![codecov](https://codecov.io/gh/utkarsh530/ParallelParticleSwarms.jl/graph/badge.svg?token=H5U5UAIRXX)](https://codecov.io/gh/utkarsh530/ParallelParticleSwarms.jl)

Accelerating convex/non-convex optimization with GPUs using Particle-Swarm based methods

Supports generic Julia's SciML interface

```julia

using PSOGPU, StaticArrays, CUDA
using ParallelParticleSwarms, StaticArrays, CUDA

lb = @SArray [-1.0f0, -1.0f0, -1.0f0]
ub = @SArray [10.0f0, 10.0f0, 10.0f0]
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/CPU_vs_GPU/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1484,9 +1484,9 @@ git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541"
uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab"
version = "4.2.7+0"

[[deps.PSOGPU]]
[[deps.ParallelParticleSwarms]]
deps = ["Adapt", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"]
path = "/home/utkarsh530/.julia/dev/PSOGPU"
path = "/home/utkarsh530/.julia/dev/ParallelParticleSwarms"
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
version = "1.0.0-DEV"

Expand Down
2 changes: 1 addition & 1 deletion benchmarks/CPU_vs_GPU/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b"
OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86"
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
51 changes: 26 additions & 25 deletions benchmarks/CPU_vs_GPU/benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ using Pkg

Pkg.activate(@__DIR__)

using PSOGPU, StaticArrays, KernelAbstractions, Optimization
using ParallelParticleSwarms, StaticArrays, KernelAbstractions, Optimization
using CUDA

device!(2)
Expand Down Expand Up @@ -51,11 +51,11 @@ sol = solve(prob,
@show sol.stats.time

sol = solve(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), maxiters = 500,
local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
local_maxiters = 30)

@show sol.objective
Expand All @@ -77,7 +77,8 @@ for n_particles in Ns
## CPU solve
backend = CPU()
opt = ParallelSyncPSOKernel(n_particles; backend)
init_gbest, particles = PSOGPU.init_particles(prob, opt, typeof(prob.u0))
init_gbest, particles = ParallelParticleSwarms.init_particles(
prob, opt, typeof(prob.u0))

particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles)

Expand All @@ -87,12 +88,12 @@ for n_particles in Ns

copyto!(backend_particles, particles)

PSOGPU.vectorized_solve!(prob,
ParallelParticleSwarms.vectorized_solve!(prob,
init_gbest,
backend_particles,
opt; maxiters = 500)

el_time = @elapsed PSOGPU.vectorized_solve!(prob,
el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob,
init_gbest,
backend_particles,
opt; maxiters = 500)
Expand All @@ -112,12 +113,12 @@ for n_particles in Ns

copyto!(backend_particles, particles)

PSOGPU.vectorized_solve!(prob,
ParallelParticleSwarms.vectorized_solve!(prob,
init_gbest,
backend_particles,
opt; maxiters = 500)

el_time = @elapsed PSOGPU.vectorized_solve!(prob,
el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob,
init_gbest,
backend_particles,
opt; maxiters = 500)
Expand All @@ -129,12 +130,12 @@ for n_particles in Ns
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
copyto!(gpu_init_gbest, [init_gbest])

PSOGPU.vectorized_solve!(prob,
ParallelParticleSwarms.vectorized_solve!(prob,
gpu_init_gbest,
backend_particles,
opt, Val(opt.global_update); maxiters = 500)

el_time = @elapsed PSOGPU.vectorized_solve!(prob,
el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob,
gpu_init_gbest,
backend_particles,
opt, Val(opt.global_update); maxiters = 500)
Expand All @@ -146,12 +147,12 @@ for n_particles in Ns
gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,))
copyto!(gpu_init_gbest, [init_gbest])

PSOGPU.vectorized_solve!(prob,
ParallelParticleSwarms.vectorized_solve!(prob,
gpu_init_gbest,
backend_particles,
opt, Val(opt.global_update); maxiters = 500)

el_time = @elapsed PSOGPU.vectorized_solve!(prob,
el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob,
gpu_init_gbest,
backend_particles,
opt, Val(opt.global_update); maxiters = 500)
Expand All @@ -167,19 +168,19 @@ for n_particles in Ns
@info n_particles

sol = solve(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), maxiters = 500,
local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
local_maxiters = 30)

sol = solve(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), maxiters = 500,
local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
local_maxiters = 30)

push!(gpu_hybrid_times, sol.stats.time)
Expand Down Expand Up @@ -309,15 +310,15 @@ using Statistics
# push!(gpu_queue_lock_times_total, el_time)

# sol = solve(prob,
# PSOGPU.HybridPSO(; backend = CUDABackend(),
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = PSOGPU.LBFGS()), maxiters = 500,
# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
# local_maxiters = 30)

# el_time = @elapsed solve(prob,
# PSOGPU.HybridPSO(; backend = CUDABackend(),
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = PSOGPU.LBFGS()), maxiters = 500,
# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
# local_maxiters = 30)

# push!(gpu_hybrid_times_total, el_time)
Expand Down
42 changes: 21 additions & 21 deletions benchmarks/CPU_vs_GPU/wp_algs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ using Pkg

Pkg.activate(@__DIR__)

using PSOGPU, StaticArrays, KernelAbstractions, Optimization
using ParallelParticleSwarms, StaticArrays, KernelAbstractions, Optimization
using CUDA

device!(2)
Expand Down Expand Up @@ -55,11 +55,11 @@ sol = solve(prob,
@show sol.stats.time

sol = solve(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), maxiters = 500,
local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
local_maxiters = 30)

@show sol.objective
Expand Down Expand Up @@ -91,7 +91,7 @@ function solve_run(prob, alg, maxiters; runs = 10, kwargs...)
# 4 was a good candidate
Random.seed!(rng, 1)
for run in 1:runs
sol = if alg isa PSOGPU.HybridPSO
sol = if alg isa ParallelParticleSwarms.HybridPSO
solve(prob, alg; maxiters, local_maxiters = 30)
else
solve(prob, alg; maxiters, kwargs...)
Expand Down Expand Up @@ -135,11 +135,11 @@ for n_particles in Ns
push!(gpu_queue_lock_times, sol_time)

obj, solve_time = solve_run(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), 500)
local_opt = ParallelParticleSwarms.LBFGS()), 500)

push!(gpu_hybrid_loss, obj)
push!(gpu_hybrid_times, solve_time)
Expand Down Expand Up @@ -256,11 +256,11 @@ sol = solve(prob,
@show sol.stats.time

sol = solve(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), maxiters = 500,
local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500,
local_maxiters = 30)

@show sol.objective
Expand Down Expand Up @@ -300,7 +300,7 @@ function solve_run(prob, alg, maxiters; runs = 10, kwargs...)
# 4 was a good candidate
Random.seed!(rng, 9)
for run in 1:runs
sol = if alg isa PSOGPU.HybridPSO
sol = if alg isa ParallelParticleSwarms.HybridPSO
solve(prob, alg; maxiters, local_maxiters = maxiters)
else
solve(prob, alg; maxiters, kwargs...)
Expand Down Expand Up @@ -377,23 +377,23 @@ begin
push!(pso_cpu_time, sol_time)

# sol = solve(prob,
# PSOGPU.HybridPSO(; backend = CUDABackend(),
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = PSOGPU.LBFGS()), maxiters = iters,
# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = iters,
# local_maxiters = iters)

# sol = solve(prob,
# PSOGPU.HybridPSO(; backend = CUDABackend(),
# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = PSOGPU.LBFGS()), maxiters = iters,
# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()),
# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = iters,
# local_maxiters = iters)

obj, solve_time = solve_run(prob,
PSOGPU.HybridPSO(; backend = CUDABackend(),
pso = PSOGPU.ParallelPSOKernel(n_particles;
ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(),
pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles;
global_update = false,
backend = CUDABackend()),
local_opt = PSOGPU.LBFGS()), iters)
local_opt = ParallelParticleSwarms.LBFGS()), iters)

push!(hybrid_losses, obj)
push!(hybrid_time, solve_time)
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Fitzhugh_Nagumo/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1515,7 +1515,7 @@ git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541"
uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab"
version = "4.2.7+0"

[[deps.PSOGPU]]
[[deps.ParallelParticleSwarms]]
deps = ["Adapt", "CUDA", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"]
path = "../.."
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Fitzhugh_Nagumo/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
10 changes: 5 additions & 5 deletions benchmarks/Fitzhugh_Nagumo/ode_estimation.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Pkg
Pkg.activate(@__DIR__)
using PSOGPU, OrdinaryDiffEq, StaticArrays
using ParallelParticleSwarms, OrdinaryDiffEq, StaticArrays
using SciMLSensitivity, Optimization

function f(u, p, t)
Expand Down Expand Up @@ -91,7 +91,7 @@ using OptimizationOptimJL

optprob = OptimizationProblem(loss, prob.p, (prob, t_short); lb = lb, ub = ub)

using PSOGPU
using ParallelParticleSwarms
using CUDA

using Random
Expand All @@ -101,7 +101,7 @@ rng = Random.default_rng()
Random.seed!(rng, 0)

opt = ParallelPSOKernel(n_particles)
gbest, particles = PSOGPU.init_particles(optprob, opt, typeof(prob.u0))
gbest, particles = ParallelParticleSwarms.init_particles(optprob, opt, typeof(prob.u0))

gpu_data = cu([SVector{length(prob.u0), eltype(prob.u0)}(@view data_short[:, i])
for i in 1:length(t_short)])
Expand All @@ -118,7 +118,7 @@ solver_cache = (; losses, gpu_particles, gpu_data, gbest)

adaptive = false

@time gsol = PSOGPU.parameter_estim_ode!(prob,
@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob,
solver_cache,
lb,
ub, Val(adaptive);
Expand All @@ -128,7 +128,7 @@ adaptive = false

using BenchmarkTools

@benchmark PSOGPU.parameter_estim_ode!($prob,
@benchmark ParallelParticleSwarms.parameter_estim_ode!($prob,
$(deepcopy(solver_cache)),
$lb,
$ub, $Val(adaptive);
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Lotka_Volterra/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1527,7 +1527,7 @@ git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65"
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
version = "0.11.31"

[[deps.PSOGPU]]
[[deps.ParallelParticleSwarms]]
deps = ["Adapt", "CUDA", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"]
path = "../.."
uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Lotka_Volterra/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86"
OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e"
OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Loading