Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .buildkite/runtests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ steps:
setup:
version:
- "1"
- "1.10"
env:
GROUP: CUDA
plugins:
Expand All @@ -30,7 +31,7 @@ steps:
matrix:
setup:
version:
- "1"
- "1.10"
env:
GROUP: AMDGPU
plugins:
Expand Down
18 changes: 13 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,30 @@ version = "1.0.0-DEV"

[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[compat]
julia = "1.6"
Adapt = "4.3"
DiffEqGPU = "3.4"
Enzyme = "<0.13.35"
ForwardDiff = "0.10"
KernelAbstractions = "<0.9.30"
Optimization = "4.1"
QuasiMonteCarlo = "0.3"
Reexport = "1.2"
SciMLBase = "2.79"
Setfield = "1.1"
SimpleNonlinearSolve = "2.2"
StaticArrays = "1.9"
julia = "1.10"
6 changes: 5 additions & 1 deletion benchmarks/CPU_vs_GPU/benchmark.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ device!(2)

N = 10
function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end
x0 = @SArray zeros(Float32, N)
p = @SArray Float32[1.0, 100.0]
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/CPU_vs_GPU/wp_algs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ device!(2)

N = 10
function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end
# x0 = @SArray zeros(Float32, N)

Expand Down
2 changes: 1 addition & 1 deletion src/PSOGPU.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using SciMLBase, StaticArrays, Setfield, KernelAbstractions
using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff
import Adapt
import Adapt: adapt
import Enzyme: autodiff_deferred, Active, Reverse
import Enzyme: autodiff_deferred, Active, Reverse, Const
import KernelAbstractions: @atomic, @atomicreplace, @atomicswap
using QuasiMonteCarlo
import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve
Expand Down
11 changes: 6 additions & 5 deletions src/hybrid.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
i = @index(Global, Linear)
nlcache = remake(nlprob; u0 = x0s[i])
sol = solve(nlcache, opt; maxiters, abstol, reltol)
result[i] = sol.u
@inbounds result[i] = sol.u
end

function SciMLBase.solve!(
Expand All @@ -19,13 +19,14 @@ function SciMLBase.solve!(
backend = opt.backend

prob = remake(cache.prob, lb = nothing, ub = nothing)
f = Base.Fix2(prob.f.f, prob.p)
∇f = instantiate_gradient(f, prob.f.adtype)

kernel = simplebfgs_run!(backend)
result = cache.start_points
copyto!(result, x0s)
nlprob = NonlinearProblem{false}(∇f, prob.u0)

∇f = instantiate_gradient(prob.f.f, prob.f.adtype)

kernel = simplebfgs_run!(backend)
nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p)

nlalg = LocalOpt isa LBFGS ?
SimpleLimitedMemoryBroyden(;
Expand Down
11 changes: 9 additions & 2 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import SciMLBase: @add_kwonly, AbstractNonlinearProblem, AbstractNonlinearFunction,
AbstractODEFunction, AbstractODEProblem, warn_paramtype, ConstructionBase,
NullParameters, StandardNonlinearProblem, @reset, updated_u0_p,
remake_initialization_data, maybe_eager_initialize_problem

@inbounds function uniform_itr(
dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T}
(rand(T) * (ub[i] - lb[i]) + lb[i] for i in 1:dim)
Expand Down Expand Up @@ -342,10 +347,12 @@ Based on the paper: Particle swarm optimization method for constrained optimizat
penalty
end

#TODO: Possible migration to DifferentiationInterface.jl,
# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025
@inline function instantiate_gradient(f, adtype::AutoForwardDiff)
(θ, p) -> ForwardDiff.gradient(f, θ)
(θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ)
end

@inline function instantiate_gradient(f, adtype::AutoEnzyme)
(θ, p) -> autodiff_deferred(Reverse, f, Active, Active(θ))[1][1]
(θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1]
end
1 change: 0 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
Expand Down
10 changes: 1 addition & 9 deletions test/constraints.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random

DEVICE = get(ENV, "GROUP", "CUDA")

@eval using $(Symbol(DEVICE))

if DEVICE == "CUDA"
backend = CUDABackend()
elseif DEVICE == "AMDGPU"
backend = ROCBackend()
end
include("./utils.jl")

Random.seed!(1234)

Expand Down
16 changes: 6 additions & 10 deletions test/gpu.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random

DEVICE = get(ENV, "GROUP", "CUDA")

@eval using $(Symbol(DEVICE))

if DEVICE == "CUDA"
backend = CUDABackend()
elseif DEVICE == "AMDGPU"
backend = ROCBackend()
end
include("./utils.jl")

@testset "Rosenbrock GPU tests $(N)" for N in 2:4
Random.seed!(1234)
Expand All @@ -19,7 +11,11 @@ end
ub = @SArray fill(Float32(10.0), N)

function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end

x0 = @SArray zeros(Float32, N)
Expand Down
16 changes: 6 additions & 10 deletions test/lbfgs.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
using PSOGPU, Optimization, StaticArrays

DEVICE = get(ENV, "GROUP", "CUDA")

@eval using $(Symbol(DEVICE))

if DEVICE == "CUDA"
backend = CUDABackend()
elseif DEVICE == "AMDGPU"
backend = ROCBackend()
end
include("./utils.jl")

function objf(x, p)
return 1 - x[1]^2 - x[2]^2
Expand All @@ -25,7 +17,11 @@ sol = Optimization.solve(prob,

N = 10
function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end
x0 = @SArray rand(Float32, N)
p = @SArray Float32[1.0, 100.0]
Expand Down
12 changes: 10 additions & 2 deletions test/regression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ using QuasiMonteCarlo
ub = @SArray fill(Float32(10.0), N)

function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end

x0 = @SArray zeros(Float32, N)
Expand Down Expand Up @@ -157,7 +161,11 @@ end
ub = @SArray fill(Float32(10.0), N)

function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end

x0 = @SArray zeros(Float32, N)
Expand Down
11 changes: 7 additions & 4 deletions test/reinit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@ lb = @SArray fill(Float32(-1.0), 3)
ub = @SArray fill(Float32(10.0), 3)

function rosenbrock(x, p)
sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1))
res = zero(eltype(x))
for i in 1:(length(x) - 1)
res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2
end
res
end

x0 = @SArray zeros(Float32, 3)
p = @SArray Float32[1.0, 100.0]

Expand All @@ -25,10 +28,10 @@ cache = init(prob, ParallelSyncPSOKernel(n_particles; backend = CPU()))

reinit!(cache)

cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = backend))
cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = CPU()))

reinit!(cache)

cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.LBFGS(), backend = backend))
cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.LBFGS(), backend = CPU()))

reinit!(cache)
13 changes: 8 additions & 5 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
using SafeTestsets
using Test

const GROUP = get(ENV, "GROUP", "CPU")
global CI_GROUP = get(ENV, "GROUP", "CPU")

@safetestset "Regression tests" include("./regression.jl")
@safetestset "Reinitialization tests" include("./reinit.jl")

if GROUP != "CPU"
@safetestset "GPU optimizers tests" include("./gpu.jl")
@safetestset "GPU optimizers with constraints tests" include("./constraints.jl")
@safetestset "GPU hybrid optimizers" include("./lbfgs.jl")
#TODO: Curent throws warning for redefinition with the use of @testset multiple times. Migrate to TestItemRunners.jl
@testset for BACKEND in unique(("CPU", CI_GROUP))
global GROUP = BACKEND
@testset "$(BACKEND) optimizers tests" include("./gpu.jl")
@testset "$(BACKEND) optimizers with constraints tests" include("./constraints.jl")
@testset "$(BACKEND) hybrid optimizers" include("./lbfgs.jl")
end
10 changes: 10 additions & 0 deletions test/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
global backend = if GROUP == "CUDA"
using CUDA
CUDA.CUDABackend()
elseif GROUP == "AMDGPU"
using AMDGPU
AMDGPU.ROCBackend()
else
using KernelAbstractions
backend = CPU()
end