From 378b20fb83bf924c28ba1426fa427f4808beb282 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 1 Apr 2025 18:33:32 -0400 Subject: [PATCH 1/2] Rename PSOGPU->ParallelParticleSwarms --- Project.toml | 2 +- README.md | 8 +-- benchmarks/CPU_vs_GPU/Manifest.toml | 4 +- benchmarks/CPU_vs_GPU/Project.toml | 2 +- benchmarks/CPU_vs_GPU/benchmark.jl | 50 +++++++++---------- benchmarks/CPU_vs_GPU/wp_algs.jl | 42 ++++++++-------- benchmarks/Fitzhugh_Nagumo/Manifest.toml | 2 +- benchmarks/Fitzhugh_Nagumo/Project.toml | 2 +- benchmarks/Fitzhugh_Nagumo/ode_estimation.jl | 10 ++-- benchmarks/Lotka_Volterra/Manifest.toml | 2 +- benchmarks/Lotka_Volterra/Project.toml | 2 +- benchmarks/Lotka_Volterra/lotka_volterra.jl | 8 +-- benchmarks/NeuralODE/Manifest.toml | 4 +- benchmarks/NeuralODE/Project.toml | 2 +- benchmarks/NeuralODE/adam_opt.jl | 10 ++-- benchmarks/PINN/pinn.jl | 2 +- benchmarks/adam_opt.jl | 10 ++-- examples/neural_network/nn.jl | 2 +- .../ode_estimation/Neural_ODE/neural_ode.jl | 6 +-- src/{PSOGPU.jl => ParallelParticleSwarms.jl} | 18 +++---- src/algorithms.jl | 2 +- src/ode_pso.jl | 12 ++--- test/constraints.jl | 2 +- test/gpu.jl | 2 +- test/lbfgs.jl | 20 ++++---- test/regression.jl | 2 +- test/reinit.jl | 6 +-- 27 files changed, 117 insertions(+), 117 deletions(-) rename src/{PSOGPU.jl => ParallelParticleSwarms.jl} (72%) diff --git a/Project.toml b/Project.toml index 4630218..978d830 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,4 @@ -name = "PSOGPU" +name = "ParallelParticleSwarms" uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" authors = ["Utkarsh and contributors"] version = "0.1.0" diff --git a/README.md b/README.md index 9ee86f3..f15f3ea 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -# PSOGPU +# ParallelParticleSwarms -[![Build Status](https://github.com/utkarsh530/PSOGPU.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/utkarsh530/PSOGPU.jl/actions/workflows/CI.yml?query=branch%3Amain) -[![codecov](https://codecov.io/gh/utkarsh530/PSOGPU.jl/graph/badge.svg?token=H5U5UAIRXX)](https://codecov.io/gh/utkarsh530/PSOGPU.jl) +[![Build Status](https://github.com/utkarsh530/ParallelParticleSwarms.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/utkarsh530/ParallelParticleSwarms.jl/actions/workflows/CI.yml?query=branch%3Amain) +[![codecov](https://codecov.io/gh/utkarsh530/ParallelParticleSwarms.jl/graph/badge.svg?token=H5U5UAIRXX)](https://codecov.io/gh/utkarsh530/ParallelParticleSwarms.jl) Accelerating convex/non-convex optimization with GPUs using Particle-Swarm based methods @@ -9,7 +9,7 @@ Supports generic Julia's SciML interface ```julia -using PSOGPU, StaticArrays, CUDA +using ParallelParticleSwarms, StaticArrays, CUDA lb = @SArray [-1.0f0, -1.0f0, -1.0f0] ub = @SArray [10.0f0, 10.0f0, 10.0f0] diff --git a/benchmarks/CPU_vs_GPU/Manifest.toml b/benchmarks/CPU_vs_GPU/Manifest.toml index dfcd7b2..51acb91 100644 --- a/benchmarks/CPU_vs_GPU/Manifest.toml +++ b/benchmarks/CPU_vs_GPU/Manifest.toml @@ -1484,9 +1484,9 @@ git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541" uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab" version = "4.2.7+0" -[[deps.PSOGPU]] +[[deps.ParallelParticleSwarms]] deps = ["Adapt", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"] -path = "/home/utkarsh530/.julia/dev/PSOGPU" +path = "/home/utkarsh530/.julia/dev/ParallelParticleSwarms" uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" version = "1.0.0-DEV" diff --git a/benchmarks/CPU_vs_GPU/Project.toml b/benchmarks/CPU_vs_GPU/Project.toml index 9fb6965..ac76bb8 100644 --- a/benchmarks/CPU_vs_GPU/Project.toml +++ b/benchmarks/CPU_vs_GPU/Project.toml @@ -6,5 +6,5 @@ OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b" OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1" -PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" +ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/benchmarks/CPU_vs_GPU/benchmark.jl b/benchmarks/CPU_vs_GPU/benchmark.jl index 3421e90..3247bf8 100644 --- a/benchmarks/CPU_vs_GPU/benchmark.jl +++ b/benchmarks/CPU_vs_GPU/benchmark.jl @@ -2,7 +2,7 @@ using Pkg Pkg.activate(@__DIR__) -using PSOGPU, StaticArrays, KernelAbstractions, Optimization +using ParallelParticleSwarms, StaticArrays, KernelAbstractions, Optimization using CUDA device!(2) @@ -51,11 +51,11 @@ sol = solve(prob, @show sol.stats.time sol = solve(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), maxiters = 500, + local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, local_maxiters = 30) @show sol.objective @@ -77,7 +77,7 @@ for n_particles in Ns ## CPU solve backend = CPU() opt = ParallelSyncPSOKernel(n_particles; backend) - init_gbest, particles = PSOGPU.init_particles(prob, opt, typeof(prob.u0)) + init_gbest, particles = ParallelParticleSwarms.init_particles(prob, opt, typeof(prob.u0)) particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles) @@ -87,12 +87,12 @@ for n_particles in Ns copyto!(backend_particles, particles) - PSOGPU.vectorized_solve!(prob, + ParallelParticleSwarms.vectorized_solve!(prob, init_gbest, backend_particles, opt; maxiters = 500) - el_time = @elapsed PSOGPU.vectorized_solve!(prob, + el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob, init_gbest, backend_particles, opt; maxiters = 500) @@ -112,12 +112,12 @@ for n_particles in Ns copyto!(backend_particles, particles) - PSOGPU.vectorized_solve!(prob, + ParallelParticleSwarms.vectorized_solve!(prob, init_gbest, backend_particles, opt; maxiters = 500) - el_time = @elapsed PSOGPU.vectorized_solve!(prob, + el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob, init_gbest, backend_particles, opt; maxiters = 500) @@ -129,12 +129,12 @@ for n_particles in Ns gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,)) copyto!(gpu_init_gbest, [init_gbest]) - PSOGPU.vectorized_solve!(prob, + ParallelParticleSwarms.vectorized_solve!(prob, gpu_init_gbest, backend_particles, opt, Val(opt.global_update); maxiters = 500) - el_time = @elapsed PSOGPU.vectorized_solve!(prob, + el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob, gpu_init_gbest, backend_particles, opt, Val(opt.global_update); maxiters = 500) @@ -146,12 +146,12 @@ for n_particles in Ns gpu_init_gbest = KernelAbstractions.allocate(backend, typeof(init_gbest), (1,)) copyto!(gpu_init_gbest, [init_gbest]) - PSOGPU.vectorized_solve!(prob, + ParallelParticleSwarms.vectorized_solve!(prob, gpu_init_gbest, backend_particles, opt, Val(opt.global_update); maxiters = 500) - el_time = @elapsed PSOGPU.vectorized_solve!(prob, + el_time = @elapsed ParallelParticleSwarms.vectorized_solve!(prob, gpu_init_gbest, backend_particles, opt, Val(opt.global_update); maxiters = 500) @@ -167,19 +167,19 @@ for n_particles in Ns @info n_particles sol = solve(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), maxiters = 500, + local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, local_maxiters = 30) sol = solve(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), maxiters = 500, + local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, local_maxiters = 30) push!(gpu_hybrid_times, sol.stats.time) @@ -309,15 +309,15 @@ using Statistics # push!(gpu_queue_lock_times_total, el_time) # sol = solve(prob, -# PSOGPU.HybridPSO(; backend = CUDABackend(), -# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), -# local_opt = PSOGPU.LBFGS()), maxiters = 500, +# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), +# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), +# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, # local_maxiters = 30) # el_time = @elapsed solve(prob, -# PSOGPU.HybridPSO(; backend = CUDABackend(), -# pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), -# local_opt = PSOGPU.LBFGS()), maxiters = 500, +# ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), +# pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), +# local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, # local_maxiters = 30) # push!(gpu_hybrid_times_total, el_time) diff --git a/benchmarks/CPU_vs_GPU/wp_algs.jl b/benchmarks/CPU_vs_GPU/wp_algs.jl index ade5392..f822e74 100644 --- a/benchmarks/CPU_vs_GPU/wp_algs.jl +++ b/benchmarks/CPU_vs_GPU/wp_algs.jl @@ -2,7 +2,7 @@ using Pkg Pkg.activate(@__DIR__) -using PSOGPU, StaticArrays, KernelAbstractions, Optimization +using ParallelParticleSwarms, StaticArrays, KernelAbstractions, Optimization using CUDA device!(2) @@ -55,11 +55,11 @@ sol = solve(prob, @show sol.stats.time sol = solve(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), maxiters = 500, + local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, local_maxiters = 30) @show sol.objective @@ -91,7 +91,7 @@ function solve_run(prob, alg, maxiters; runs = 10, kwargs...) # 4 was a good candidate Random.seed!(rng, 1) for run in 1:runs - sol = if alg isa PSOGPU.HybridPSO + sol = if alg isa ParallelParticleSwarms.HybridPSO solve(prob, alg; maxiters, local_maxiters = 30) else solve(prob, alg; maxiters, kwargs...) @@ -135,11 +135,11 @@ for n_particles in Ns push!(gpu_queue_lock_times, sol_time) obj, solve_time = solve_run(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), 500) + local_opt = ParallelParticleSwarms.LBFGS()), 500) push!(gpu_hybrid_loss, obj) push!(gpu_hybrid_times, solve_time) @@ -256,11 +256,11 @@ sol = solve(prob, @show sol.stats.time sol = solve(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), maxiters = 500, + local_opt = ParallelParticleSwarms.LBFGS()), maxiters = 500, local_maxiters = 30) @show sol.objective @@ -300,7 +300,7 @@ function solve_run(prob, alg, maxiters; runs = 10, kwargs...) # 4 was a good candidate Random.seed!(rng, 9) for run in 1:runs - sol = if alg isa PSOGPU.HybridPSO + sol = if alg isa ParallelParticleSwarms.HybridPSO solve(prob, alg; maxiters, local_maxiters = maxiters) else solve(prob, alg; maxiters, kwargs...) @@ -377,23 +377,23 @@ begin push!(pso_cpu_time, sol_time) # sol = solve(prob, - # PSOGPU.HybridPSO(; backend = CUDABackend(), - # pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - # local_opt = PSOGPU.LBFGS()), maxiters = iters, + # ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + # pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), + # local_opt = ParallelParticleSwarms.LBFGS()), maxiters = iters, # local_maxiters = iters) # sol = solve(prob, - # PSOGPU.HybridPSO(; backend = CUDABackend(), - # pso = PSOGPU.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - # local_opt = PSOGPU.LBFGS()), maxiters = iters, + # ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + # pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), + # local_opt = ParallelParticleSwarms.LBFGS()), maxiters = iters, # local_maxiters = iters) obj, solve_time = solve_run(prob, - PSOGPU.HybridPSO(; backend = CUDABackend(), - pso = PSOGPU.ParallelPSOKernel(n_particles; + ParallelParticleSwarms.HybridPSO(; backend = CUDABackend(), + pso = ParallelParticleSwarms.ParallelPSOKernel(n_particles; global_update = false, backend = CUDABackend()), - local_opt = PSOGPU.LBFGS()), iters) + local_opt = ParallelParticleSwarms.LBFGS()), iters) push!(hybrid_losses, obj) push!(hybrid_time, solve_time) diff --git a/benchmarks/Fitzhugh_Nagumo/Manifest.toml b/benchmarks/Fitzhugh_Nagumo/Manifest.toml index cec2b8e..1691595 100644 --- a/benchmarks/Fitzhugh_Nagumo/Manifest.toml +++ b/benchmarks/Fitzhugh_Nagumo/Manifest.toml @@ -1515,7 +1515,7 @@ git-tree-sha1 = "8b3b19351fa24791f94d7ae85faf845ca1362541" uuid = "32165bc3-0280-59bc-8c0b-c33b6203efab" version = "4.2.7+0" -[[deps.PSOGPU]] +[[deps.ParallelParticleSwarms]] deps = ["Adapt", "CUDA", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"] path = "../.." uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" diff --git a/benchmarks/Fitzhugh_Nagumo/Project.toml b/benchmarks/Fitzhugh_Nagumo/Project.toml index 32cc022..910779f 100644 --- a/benchmarks/Fitzhugh_Nagumo/Project.toml +++ b/benchmarks/Fitzhugh_Nagumo/Project.toml @@ -4,6 +4,6 @@ BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" +ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/benchmarks/Fitzhugh_Nagumo/ode_estimation.jl b/benchmarks/Fitzhugh_Nagumo/ode_estimation.jl index 1e032ec..89a751e 100644 --- a/benchmarks/Fitzhugh_Nagumo/ode_estimation.jl +++ b/benchmarks/Fitzhugh_Nagumo/ode_estimation.jl @@ -1,6 +1,6 @@ using Pkg Pkg.activate(@__DIR__) -using PSOGPU, OrdinaryDiffEq, StaticArrays +using ParallelParticleSwarms, OrdinaryDiffEq, StaticArrays using SciMLSensitivity, Optimization function f(u, p, t) @@ -91,7 +91,7 @@ using OptimizationOptimJL optprob = OptimizationProblem(loss, prob.p, (prob, t_short); lb = lb, ub = ub) -using PSOGPU +using ParallelParticleSwarms using CUDA using Random @@ -101,7 +101,7 @@ rng = Random.default_rng() Random.seed!(rng, 0) opt = ParallelPSOKernel(n_particles) -gbest, particles = PSOGPU.init_particles(optprob, opt, typeof(prob.u0)) +gbest, particles = ParallelParticleSwarms.init_particles(optprob, opt, typeof(prob.u0)) gpu_data = cu([SVector{length(prob.u0), eltype(prob.u0)}(@view data_short[:, i]) for i in 1:length(t_short)]) @@ -118,7 +118,7 @@ solver_cache = (; losses, gpu_particles, gpu_data, gbest) adaptive = false -@time gsol = PSOGPU.parameter_estim_ode!(prob, +@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob, solver_cache, lb, ub, Val(adaptive); @@ -128,7 +128,7 @@ adaptive = false using BenchmarkTools -@benchmark PSOGPU.parameter_estim_ode!($prob, +@benchmark ParallelParticleSwarms.parameter_estim_ode!($prob, $(deepcopy(solver_cache)), $lb, $ub, $Val(adaptive); diff --git a/benchmarks/Lotka_Volterra/Manifest.toml b/benchmarks/Lotka_Volterra/Manifest.toml index 91d8d9b..45b2d92 100644 --- a/benchmarks/Lotka_Volterra/Manifest.toml +++ b/benchmarks/Lotka_Volterra/Manifest.toml @@ -1527,7 +1527,7 @@ git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" version = "0.11.31" -[[deps.PSOGPU]] +[[deps.ParallelParticleSwarms]] deps = ["Adapt", "CUDA", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"] path = "../.." uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" diff --git a/benchmarks/Lotka_Volterra/Project.toml b/benchmarks/Lotka_Volterra/Project.toml index fd37329..7890032 100644 --- a/benchmarks/Lotka_Volterra/Project.toml +++ b/benchmarks/Lotka_Volterra/Project.toml @@ -5,7 +5,7 @@ CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" +ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" diff --git a/benchmarks/Lotka_Volterra/lotka_volterra.jl b/benchmarks/Lotka_Volterra/lotka_volterra.jl index ad11bd5..db51464 100644 --- a/benchmarks/Lotka_Volterra/lotka_volterra.jl +++ b/benchmarks/Lotka_Volterra/lotka_volterra.jl @@ -83,7 +83,7 @@ using OptimizationOptimJL optprob = OptimizationProblem(loss, prob.p, (prob, t_short); lb = lb, ub = ub) -using PSOGPU +using ParallelParticleSwarms using CUDA CUDA.allowscalar(false) @@ -91,7 +91,7 @@ CUDA.allowscalar(false) n_particles = 10_000 opt = ParallelPSOKernel(n_particles) -gbest, particles = PSOGPU.init_particles(optprob, opt, typeof(prob.u0)) +gbest, particles = ParallelParticleSwarms.init_particles(optprob, opt, typeof(prob.u0)) @show gbest @@ -114,13 +114,13 @@ solver_cache = (; losses, gpu_particles, gpu_data, gbest) adaptive = false -@time gsol = PSOGPU.parameter_estim_ode!(prob, solver_cache, +@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob, solver_cache, lb, ub, Val(adaptive); saveat = t_short, dt = 0.1, maxiters = 100) using BenchmarkTools -@benchmark PSOGPU.parameter_estim_ode!($prob, $(deepcopy(solver_cache)), +@benchmark ParallelParticleSwarms.parameter_estim_ode!($prob, $(deepcopy(solver_cache)), $lb, $ub, Val(adaptive); saveat = t_short, dt = 0.1, maxiters = 100) diff --git a/benchmarks/NeuralODE/Manifest.toml b/benchmarks/NeuralODE/Manifest.toml index 9052b9c..64e8bdd 100644 --- a/benchmarks/NeuralODE/Manifest.toml +++ b/benchmarks/NeuralODE/Manifest.toml @@ -1723,9 +1723,9 @@ git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" version = "0.11.31" -[[deps.PSOGPU]] +[[deps.ParallelParticleSwarms]] deps = ["Adapt", "CUDA", "DiffEqGPU", "Enzyme", "ForwardDiff", "KernelAbstractions", "MLDatasets", "NonlinearSolve", "Optimization", "QuasiMonteCarlo", "Random", "SciMLBase", "Setfield", "SimpleChains", "SimpleNonlinearSolve", "StaticArrays"] -path = "/home/utkarsh530/.julia/dev/PSOGPU" +path = "/home/utkarsh530/.julia/dev/ParallelParticleSwarms" uuid = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" version = "1.0.0-DEV" diff --git a/benchmarks/NeuralODE/Project.toml b/benchmarks/NeuralODE/Project.toml index b187a9c..5cd02e5 100644 --- a/benchmarks/NeuralODE/Project.toml +++ b/benchmarks/NeuralODE/Project.toml @@ -6,7 +6,7 @@ Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" OptimizationFlux = "253f991c-a7b2-45f8-8852-8b9a9df78a86" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed" -PSOGPU = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" +ParallelParticleSwarms = "ab63da0c-63b4-40fa-a3b7-d2cba5be6419" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5" diff --git a/benchmarks/NeuralODE/adam_opt.jl b/benchmarks/NeuralODE/adam_opt.jl index 08e38be..986354d 100644 --- a/benchmarks/NeuralODE/adam_opt.jl +++ b/benchmarks/NeuralODE/adam_opt.jl @@ -82,7 +82,7 @@ moptprob = OptimizationProblem(optf, MArray{Tuple{size(p_nn)...}}(p_nn...)) @benchmark Optimization.solve(moptprob, LBFGS(), maxiters = 100) -## PSOGPU stuff +## ParallelParticleSwarms stuff function nn_fn(u::T, p, t)::T where {T} nn, ps = p @@ -114,7 +114,7 @@ ub = @SArray fill(Float32(Inf), length(p_static)) soptprob = OptimizationProblem(loss, prob_nn.p[2], (prob_nn, tsteps); lb = lb, ub = ub) -using PSOGPU +using ParallelParticleSwarms using CUDA using KernelAbstractions using Adapt @@ -126,7 +126,7 @@ backend = CUDABackend() Random.seed!(rng, 0) opt = ParallelPSOKernel(n_particles) -gbest, particles = PSOGPU.init_particles(soptprob, opt, typeof(prob.u0)) +gbest, particles = ParallelParticleSwarms.init_particles(soptprob, opt, typeof(prob.u0)) gpu_data = adapt(backend, [SVector{length(prob_nn.u0), eltype(prob_nn.u0)}(@view data[:, i]) @@ -146,7 +146,7 @@ solver_cache = (; losses, gpu_particles, gpu_data, gbest) adaptive = true -@time gsol = PSOGPU.parameter_estim_ode!(prob_nn, +@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob_nn, solver_cache, lb, ub, Val(adaptive); @@ -155,7 +155,7 @@ adaptive = true prob_func = prob_func, maxiters = 100) -@benchmark PSOGPU.parameter_estim_ode!($prob_nn, +@benchmark ParallelParticleSwarms.parameter_estim_ode!($prob_nn, $(deepcopy(solver_cache)), $lb, $ub, Val(adaptive); diff --git a/benchmarks/PINN/pinn.jl b/benchmarks/PINN/pinn.jl index 373dbe7..c36dc93 100644 --- a/benchmarks/PINN/pinn.jl +++ b/benchmarks/PINN/pinn.jl @@ -1,4 +1,4 @@ -using NeuralPDE, Lux, Optimization, OptimizationOptimJL, PSOGPU +using NeuralPDE, Lux, Optimization, OptimizationOptimJL, ParallelParticleSwarms import ModelingToolkit: Interval @parameters t, x diff --git a/benchmarks/adam_opt.jl b/benchmarks/adam_opt.jl index e8879de..9483061 100644 --- a/benchmarks/adam_opt.jl +++ b/benchmarks/adam_opt.jl @@ -60,7 +60,7 @@ optprob = Optimization.OptimizationProblem(optf, p_nn) @benchmark Optimization.solve(optprob, ADAM(0.05), maxiters = 100) -## PSOGPU stuff +## ParallelParticleSwarms stuff function nn_fn(u::T, p, t)::T where {T} nn, ps = p @@ -92,7 +92,7 @@ ub = @SArray fill(Float32(Inf), length(p_static)) soptprob = OptimizationProblem(loss, prob_nn.p[2], (prob_nn, tsteps); lb = lb, ub = ub) -using PSOGPU +using ParallelParticleSwarms using CUDA using KernelAbstractions using Adapt @@ -100,7 +100,7 @@ using Adapt backend = CUDABackend() ## Initialize Particles -gbest, particles = PSOGPU.init_particles(soptprob, +gbest, particles = ParallelParticleSwarms.init_particles(soptprob, ParallelSyncPSOKernel(n_particles; backend = CUDABackend()), typeof(prob.u0)) @@ -120,7 +120,7 @@ losses = adapt(backend, ones(eltype(prob.u0), (1, n_particles))) solver_cache = (; losses, gpu_particles, gpu_data, gbest) -@time gsol = PSOGPU.parameter_estim_ode!(prob_nn, +@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob_nn, solver_cache, lb, ub; @@ -129,7 +129,7 @@ solver_cache = (; losses, gpu_particles, gpu_data, gbest) prob_func = prob_func, maxiters = 100) -@benchmark PSOGPU.parameter_estim_ode!($prob_nn, +@benchmark ParallelParticleSwarms.parameter_estim_ode!($prob_nn, $(deepcopy(solver_cache)), $lb, $ub; diff --git a/examples/neural_network/nn.jl b/examples/neural_network/nn.jl index 76c3d70..0858dba 100644 --- a/examples/neural_network/nn.jl +++ b/examples/neural_network/nn.jl @@ -45,7 +45,7 @@ p = SimpleChains.init_params(lenet); lenetloss(xtrain, p) -using Optimization, PSOGPU +using Optimization, ParallelParticleSwarms lb = -ones(length(p)) .* 10 ub = ones(length(p)) .* 10 diff --git a/examples/ode_estimation/Neural_ODE/neural_ode.jl b/examples/ode_estimation/Neural_ODE/neural_ode.jl index 0449db0..210b191 100644 --- a/examples/ode_estimation/Neural_ODE/neural_ode.jl +++ b/examples/ode_estimation/Neural_ODE/neural_ode.jl @@ -47,11 +47,11 @@ ub = SVector{length(p_static), eltype(p_static)}(fill(eltype(p_static)(10), optprob = OptimizationProblem(loss, prob_nn.p[2], (prob_nn, tsteps); lb = lb, ub = ub) -using PSOGPU +using ParallelParticleSwarms using CUDA opt = ParallelPSOKernel(n_particles) -gbest, particles = PSOGPU.init_particles(optprob, opt, typeof(prob.u0)) +gbest, particles = ParallelParticleSwarms.init_particles(optprob, opt, typeof(prob.u0)) gpu_data = cu([SVector{length(prob_nn.u0), eltype(prob_nn.u0)}(@view data[:, i]) for i in 1:length(tsteps)]) @@ -64,7 +64,7 @@ function prob_func(prob, gpu_particle) return remake(prob, p = (prob.p[1], gpu_particle.position)) end -@time gsol = PSOGPU.parameter_estim_ode!(prob_nn, +@time gsol = ParallelParticleSwarms.parameter_estim_ode!(prob_nn, gpu_particles, gbest, gpu_data, diff --git a/src/PSOGPU.jl b/src/ParallelParticleSwarms.jl similarity index 72% rename from src/PSOGPU.jl rename to src/ParallelParticleSwarms.jl index 4a30f28..9b86942 100644 --- a/src/PSOGPU.jl +++ b/src/ParallelParticleSwarms.jl @@ -1,4 +1,4 @@ -module PSOGPU +module ParallelParticleSwarms using SciMLBase, StaticArrays, Setfield, KernelAbstractions using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff @@ -39,26 +39,26 @@ mutable struct MPSOGBest{T} end ## required overloads for min or max computation on particles -function Base.isless(a::PSOGPU.SPSOParticle{T1, T2}, - b::PSOGPU.SPSOParticle{T1, T2}) where {T1, T2} +function Base.isless(a::ParallelParticleSwarms.SPSOParticle{T1, T2}, + b::ParallelParticleSwarms.SPSOParticle{T1, T2}) where {T1, T2} a.best_cost < b.best_cost end -function Base.isless(a::PSOGPU.SPSOGBest{T1, T2}, - b::PSOGPU.SPSOGBest{T1, T2}) where {T1, T2} +function Base.isless(a::ParallelParticleSwarms.SPSOGBest{T1, T2}, + b::ParallelParticleSwarms.SPSOGBest{T1, T2}) where {T1, T2} a.cost < b.cost end -function Base.typemax(::Type{PSOGPU.SPSOParticle{T1, T2}}) where {T1, T2} - PSOGPU.SPSOParticle{T1, T2}(similar(T1), +function Base.typemax(::Type{ParallelParticleSwarms.SPSOParticle{T1, T2}}) where {T1, T2} + ParallelParticleSwarms.SPSOParticle{T1, T2}(similar(T1), similar(T1), typemax(T2), similar(T1), typemax(T2)) end -function Base.typemax(::Type{PSOGPU.SPSOGBest{T1, T2}}) where {T1, T2} - PSOGPU.SPSOGBest{T1, T2}(similar(T1), +function Base.typemax(::Type{ParallelParticleSwarms.SPSOGBest{T1, T2}}) where {T1, T2} + ParallelParticleSwarms.SPSOGBest{T1, T2}(similar(T1), typemax(T2)) end diff --git a/src/algorithms.jl b/src/algorithms.jl index 20e514a..290b8f5 100644 --- a/src/algorithms.jl +++ b/src/algorithms.jl @@ -145,7 +145,7 @@ end function HybridPSO(; backend = CPU(), - pso = PSOGPU.ParallelPSOKernel(100; global_update = false, backend), + pso = ParallelParticleSwarms.ParallelPSOKernel(100; global_update = false, backend), local_opt = LBFGS()) HybridPSO(pso, local_opt, backend) end diff --git a/src/ode_pso.jl b/src/ode_pso.jl index 14ca2e9..42c4f2d 100644 --- a/src/ode_pso.jl +++ b/src/ode_pso.jl @@ -55,8 +55,8 @@ function parameter_estim_ode!(prob::ODEProblem, cache, maxiters = 100, kwargs...) (losses, gpu_particles, gpu_data, gbest) = cache backend = get_backend(gpu_particles) - update_states! = PSOGPU._update_particle_states!(backend) - update_costs! = PSOGPU._update_particle_costs!(backend) + update_states! = ParallelParticleSwarms._update_particle_states!(backend) + update_costs! = ParallelParticleSwarms._update_particle_costs!(backend) improb = make_prob_compatible(prob) @@ -92,7 +92,7 @@ function parameter_estim_ode!(prob::ODEProblem, cache, KernelAbstractions.synchronize(backend) - gbest = PSOGPU.SPSOGBest(best_particle.best_position, best_particle.best_cost) + gbest = ParallelParticleSwarms.SPSOGBest(best_particle.best_position, best_particle.best_cost) w = w * wdamp end return gbest @@ -108,8 +108,8 @@ function parameter_estim_ode!(prob::ODEProblem, cache, maxiters = 100, kwargs...) (losses, gpu_particles, gpu_data, gbest) = cache backend = get_backend(gpu_particles) - update_states! = PSOGPU._update_particle_states!(backend) - update_costs! = PSOGPU._update_particle_costs!(backend) + update_states! = ParallelParticleSwarms._update_particle_states!(backend) + update_costs! = ParallelParticleSwarms._update_particle_costs!(backend) improb = make_prob_compatible(prob) @@ -145,7 +145,7 @@ function parameter_estim_ode!(prob::ODEProblem, cache, KernelAbstractions.synchronize(backend) - gbest = PSOGPU.SPSOGBest(best_particle.best_position, best_particle.best_cost) + gbest = ParallelParticleSwarms.SPSOGBest(best_particle.best_position, best_particle.best_cost) w = w * wdamp end return gbest diff --git a/test/constraints.jl b/test/constraints.jl index 35cf6e0..b25432d 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -1,4 +1,4 @@ -using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random include("./utils.jl") diff --git a/test/gpu.jl b/test/gpu.jl index c072517..70ab4fb 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -1,4 +1,4 @@ -using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random include("./utils.jl") diff --git a/test/lbfgs.jl b/test/lbfgs.jl index 59817d6..2da82ea 100644 --- a/test/lbfgs.jl +++ b/test/lbfgs.jl @@ -1,4 +1,4 @@ -using PSOGPU, Optimization, StaticArrays +using ParallelParticleSwarms, Optimization, StaticArrays include("./utils.jl") @@ -12,7 +12,7 @@ x0 = SVector{2}(x0) prob = OptimizationProblem(optprob, x0) l1 = objf(x0, nothing) sol = Optimization.solve(prob, - PSOGPU.LBFGS(), + ParallelParticleSwarms.LBFGS(), maxiters = 10) N = 10 @@ -30,21 +30,21 @@ prob = OptimizationProblem(optf, x0, p) l0 = rosenbrock(x0, p) @time sol = Optimization.solve(prob, - PSOGPU.LBFGS(; threshold = 7), + ParallelParticleSwarms.LBFGS(; threshold = 7), maxiters = 20) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.ParallelPSOKernel(100; backend), + ParallelParticleSwarms.ParallelPSOKernel(100; backend), maxiters = 100) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.HybridPSO(; backend), + ParallelParticleSwarms.HybridPSO(; backend), maxiters = 30) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = backend), + ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = backend), maxiters = 30) @show sol.objective @@ -53,20 +53,20 @@ prob = OptimizationProblem(optf, x0, p) l0 = rosenbrock(x0, p) @time sol = Optimization.solve(prob, - PSOGPU.LBFGS(; threshold = 7), + ParallelParticleSwarms.LBFGS(; threshold = 7), maxiters = 20) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.ParallelPSOKernel(100, backend = backend), + ParallelParticleSwarms.ParallelPSOKernel(100, backend = backend), maxiters = 100) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.HybridPSO(; backend = backend), + ParallelParticleSwarms.HybridPSO(; backend = backend), local_maxiters = 30) @show sol.objective @time sol = Optimization.solve(prob, - PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = backend), + ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = backend), local_maxiters = 30) @show sol.objective diff --git a/test/regression.jl b/test/regression.jl index ccf1963..18d58fe 100644 --- a/test/regression.jl +++ b/test/regression.jl @@ -1,4 +1,4 @@ -using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions using QuasiMonteCarlo @testset "Rosenbrock test dimension = $(N)" for N in 2:3 diff --git a/test/reinit.jl b/test/reinit.jl index 6c60e0c..6264347 100644 --- a/test/reinit.jl +++ b/test/reinit.jl @@ -1,4 +1,4 @@ -using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions using QuasiMonteCarlo ## Solving the rosenbrock problem @@ -28,10 +28,10 @@ cache = init(prob, ParallelSyncPSOKernel(n_particles; backend = CPU())) reinit!(cache) -cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = CPU())) +cache = init(prob, ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = CPU())) reinit!(cache) -cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.LBFGS(), backend = CPU())) +cache = init(prob, ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.LBFGS(), backend = CPU())) reinit!(cache) From 3cfa812300fcf0b8b2aa08ed2e3986601cdd21c3 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 1 Apr 2025 18:39:01 -0400 Subject: [PATCH 2/2] format --- benchmarks/CPU_vs_GPU/benchmark.jl | 3 ++- src/ode_pso.jl | 6 ++++-- test/lbfgs.jl | 6 ++++-- test/regression.jl | 3 ++- test/reinit.jl | 11 ++++++++--- 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/benchmarks/CPU_vs_GPU/benchmark.jl b/benchmarks/CPU_vs_GPU/benchmark.jl index 3247bf8..3202a24 100644 --- a/benchmarks/CPU_vs_GPU/benchmark.jl +++ b/benchmarks/CPU_vs_GPU/benchmark.jl @@ -77,7 +77,8 @@ for n_particles in Ns ## CPU solve backend = CPU() opt = ParallelSyncPSOKernel(n_particles; backend) - init_gbest, particles = ParallelParticleSwarms.init_particles(prob, opt, typeof(prob.u0)) + init_gbest, particles = ParallelParticleSwarms.init_particles( + prob, opt, typeof(prob.u0)) particles_eltype = eltype(particles) === Float64 ? Float32 : eltype(particles) diff --git a/src/ode_pso.jl b/src/ode_pso.jl index 42c4f2d..318671d 100644 --- a/src/ode_pso.jl +++ b/src/ode_pso.jl @@ -92,7 +92,8 @@ function parameter_estim_ode!(prob::ODEProblem, cache, KernelAbstractions.synchronize(backend) - gbest = ParallelParticleSwarms.SPSOGBest(best_particle.best_position, best_particle.best_cost) + gbest = ParallelParticleSwarms.SPSOGBest( + best_particle.best_position, best_particle.best_cost) w = w * wdamp end return gbest @@ -145,7 +146,8 @@ function parameter_estim_ode!(prob::ODEProblem, cache, KernelAbstractions.synchronize(backend) - gbest = ParallelParticleSwarms.SPSOGBest(best_particle.best_position, best_particle.best_cost) + gbest = ParallelParticleSwarms.SPSOGBest( + best_particle.best_position, best_particle.best_cost) w = w * wdamp end return gbest diff --git a/test/lbfgs.jl b/test/lbfgs.jl index 2da82ea..1eedb19 100644 --- a/test/lbfgs.jl +++ b/test/lbfgs.jl @@ -44,7 +44,8 @@ l0 = rosenbrock(x0, p) @show sol.objective @time sol = Optimization.solve(prob, - ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = backend), + ParallelParticleSwarms.HybridPSO(; + local_opt = ParallelParticleSwarms.BFGS(), backend = backend), maxiters = 30) @show sol.objective @@ -67,6 +68,7 @@ l0 = rosenbrock(x0, p) @show sol.objective @time sol = Optimization.solve(prob, - ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = backend), + ParallelParticleSwarms.HybridPSO(; + local_opt = ParallelParticleSwarms.BFGS(), backend = backend), local_maxiters = 30) @show sol.objective diff --git a/test/regression.jl b/test/regression.jl index 18d58fe..a50b7a8 100644 --- a/test/regression.jl +++ b/test/regression.jl @@ -1,4 +1,5 @@ -using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, + KernelAbstractions using QuasiMonteCarlo @testset "Rosenbrock test dimension = $(N)" for N in 2:3 diff --git a/test/reinit.jl b/test/reinit.jl index 6264347..3fb0ae5 100644 --- a/test/reinit.jl +++ b/test/reinit.jl @@ -1,4 +1,5 @@ -using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, KernelAbstractions +using ParallelParticleSwarms, StaticArrays, SciMLBase, Test, LinearAlgebra, Random, + KernelAbstractions using QuasiMonteCarlo ## Solving the rosenbrock problem @@ -28,10 +29,14 @@ cache = init(prob, ParallelSyncPSOKernel(n_particles; backend = CPU())) reinit!(cache) -cache = init(prob, ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.BFGS(), backend = CPU())) +cache = init(prob, + ParallelParticleSwarms.HybridPSO(; + local_opt = ParallelParticleSwarms.BFGS(), backend = CPU())) reinit!(cache) -cache = init(prob, ParallelParticleSwarms.HybridPSO(; local_opt = ParallelParticleSwarms.LBFGS(), backend = CPU())) +cache = init(prob, + ParallelParticleSwarms.HybridPSO(; + local_opt = ParallelParticleSwarms.LBFGS(), backend = CPU())) reinit!(cache)