From 84dd97e13f6cb6546a0df82b686bbd8ba12c96da Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 25 Mar 2025 11:26:35 -0400 Subject: [PATCH 1/8] Prepare the package for release --- Project.toml | 5 ++--- benchmarks/CPU_vs_GPU/benchmark.jl | 6 +++++- benchmarks/CPU_vs_GPU/wp_algs.jl | 6 +++++- src/PSOGPU.jl | 2 +- src/hybrid.jl | 11 ++++++----- src/utils.jl | 11 +++++++++-- test/constraints.jl | 10 +--------- test/gpu.jl | 16 ++++++---------- test/lbfgs.jl | 16 ++++++---------- test/regression.jl | 12 ++++++++++-- test/reinit.jl | 11 +++++++---- test/runtests.jl | 10 ++++++---- test/utils.jl | 11 +++++++++++ 13 files changed, 75 insertions(+), 52 deletions(-) create mode 100644 test/utils.jl diff --git a/Project.toml b/Project.toml index df16254..70b2a20 100644 --- a/Project.toml +++ b/Project.toml @@ -5,13 +5,11 @@ version = "1.0.0-DEV" [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" -NonlinearSolve = "8913a72c-1f9b-4ce2-8d82-65094dcecaec" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -23,4 +21,5 @@ SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] -julia = "1.6" \ No newline at end of file +KernelAbstractions = "<0.9.30" +julia = "1.6" diff --git a/benchmarks/CPU_vs_GPU/benchmark.jl b/benchmarks/CPU_vs_GPU/benchmark.jl index 533ad91..3421e90 100644 --- a/benchmarks/CPU_vs_GPU/benchmark.jl +++ b/benchmarks/CPU_vs_GPU/benchmark.jl @@ -9,7 +9,11 @@ device!(2) N = 10 function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end x0 = @SArray zeros(Float32, N) p = @SArray Float32[1.0, 100.0] diff --git a/benchmarks/CPU_vs_GPU/wp_algs.jl b/benchmarks/CPU_vs_GPU/wp_algs.jl index 3df3a1e..ade5392 100644 --- a/benchmarks/CPU_vs_GPU/wp_algs.jl +++ b/benchmarks/CPU_vs_GPU/wp_algs.jl @@ -9,7 +9,11 @@ device!(2) N = 10 function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end # x0 = @SArray zeros(Float32, N) diff --git a/src/PSOGPU.jl b/src/PSOGPU.jl index c1ee642..4a30f28 100644 --- a/src/PSOGPU.jl +++ b/src/PSOGPU.jl @@ -4,7 +4,7 @@ using SciMLBase, StaticArrays, Setfield, KernelAbstractions using QuasiMonteCarlo, Optimization, SimpleNonlinearSolve, ForwardDiff import Adapt import Adapt: adapt -import Enzyme: autodiff_deferred, Active, Reverse +import Enzyme: autodiff_deferred, Active, Reverse, Const import KernelAbstractions: @atomic, @atomicreplace, @atomicswap using QuasiMonteCarlo import DiffEqGPU: GPUTsit5, make_prob_compatible, vectorized_solve, vectorized_asolve diff --git a/src/hybrid.jl b/src/hybrid.jl index af0502d..5f6d67d 100644 --- a/src/hybrid.jl +++ b/src/hybrid.jl @@ -2,7 +2,7 @@ i = @index(Global, Linear) nlcache = remake(nlprob; u0 = x0s[i]) sol = solve(nlcache, opt; maxiters, abstol, reltol) - result[i] = sol.u + @inbounds result[i] = sol.u end function SciMLBase.solve!( @@ -19,13 +19,14 @@ function SciMLBase.solve!( backend = opt.backend prob = remake(cache.prob, lb = nothing, ub = nothing) - f = Base.Fix2(prob.f.f, prob.p) - ∇f = instantiate_gradient(f, prob.f.adtype) - kernel = simplebfgs_run!(backend) result = cache.start_points copyto!(result, x0s) - nlprob = NonlinearProblem{false}(∇f, prob.u0) + + ∇f = instantiate_gradient(prob.f.f, prob.f.adtype) + + kernel = simplebfgs_run!(backend) + nlprob = SimpleNonlinearSolve.ImmutableNonlinearProblem{false}(∇f, prob.u0, prob.p) nlalg = LocalOpt isa LBFGS ? SimpleLimitedMemoryBroyden(; diff --git a/src/utils.jl b/src/utils.jl index 59565f0..e47bb7a 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,3 +1,8 @@ +import SciMLBase: @add_kwonly, AbstractNonlinearProblem, AbstractNonlinearFunction, + AbstractODEFunction, AbstractODEProblem, warn_paramtype, ConstructionBase, + NullParameters, StandardNonlinearProblem, @reset, updated_u0_p, + remake_initialization_data, maybe_eager_initialize_problem + @inbounds function uniform_itr( dim::Int, lb::AbstractArray{T}, ub::AbstractArray{T}) where {T} (rand(T) * (ub[i] - lb[i]) + lb[i] for i in 1:dim) @@ -342,10 +347,12 @@ Based on the paper: Particle swarm optimization method for constrained optimizat penalty end +#TODO: Possible migration to DifferentiationInterface.jl, +# however I cannot compile GPU-compatible gradients with Enzyme as Mar 2025 @inline function instantiate_gradient(f, adtype::AutoForwardDiff) - (θ, p) -> ForwardDiff.gradient(f, θ) + (θ, p) -> ForwardDiff.gradient(x -> f(x, p), θ) end @inline function instantiate_gradient(f, adtype::AutoEnzyme) - (θ, p) -> autodiff_deferred(Reverse, f, Active, Active(θ))[1][1] + (θ, p) -> autodiff_deferred(Reverse, Const(x -> f(x, p)), Active, Active(θ))[1][1] end diff --git a/test/constraints.jl b/test/constraints.jl index ab31e52..35cf6e0 100644 --- a/test/constraints.jl +++ b/test/constraints.jl @@ -1,14 +1,6 @@ using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random -DEVICE = get(ENV, "GROUP", "CUDA") - -@eval using $(Symbol(DEVICE)) - -if DEVICE == "CUDA" - backend = CUDABackend() -elseif DEVICE == "AMDGPU" - backend = ROCBackend() -end +include("./utils.jl") Random.seed!(1234) diff --git a/test/gpu.jl b/test/gpu.jl index 4292b63..c072517 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -1,14 +1,6 @@ using PSOGPU, StaticArrays, SciMLBase, Test, LinearAlgebra, Random -DEVICE = get(ENV, "GROUP", "CUDA") - -@eval using $(Symbol(DEVICE)) - -if DEVICE == "CUDA" - backend = CUDABackend() -elseif DEVICE == "AMDGPU" - backend = ROCBackend() -end +include("./utils.jl") @testset "Rosenbrock GPU tests $(N)" for N in 2:4 Random.seed!(1234) @@ -19,7 +11,11 @@ end ub = @SArray fill(Float32(10.0), N) function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end x0 = @SArray zeros(Float32, N) diff --git a/test/lbfgs.jl b/test/lbfgs.jl index b15e7aa..59817d6 100644 --- a/test/lbfgs.jl +++ b/test/lbfgs.jl @@ -1,14 +1,6 @@ using PSOGPU, Optimization, StaticArrays -DEVICE = get(ENV, "GROUP", "CUDA") - -@eval using $(Symbol(DEVICE)) - -if DEVICE == "CUDA" - backend = CUDABackend() -elseif DEVICE == "AMDGPU" - backend = ROCBackend() -end +include("./utils.jl") function objf(x, p) return 1 - x[1]^2 - x[2]^2 @@ -25,7 +17,11 @@ sol = Optimization.solve(prob, N = 10 function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end x0 = @SArray rand(Float32, N) p = @SArray Float32[1.0, 100.0] diff --git a/test/regression.jl b/test/regression.jl index 27f767d..ccf1963 100644 --- a/test/regression.jl +++ b/test/regression.jl @@ -9,7 +9,11 @@ using QuasiMonteCarlo ub = @SArray fill(Float32(10.0), N) function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end x0 = @SArray zeros(Float32, N) @@ -157,7 +161,11 @@ end ub = @SArray fill(Float32(10.0), N) function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end x0 = @SArray zeros(Float32, N) diff --git a/test/reinit.jl b/test/reinit.jl index 1e0e27b..6c60e0c 100644 --- a/test/reinit.jl +++ b/test/reinit.jl @@ -7,9 +7,12 @@ lb = @SArray fill(Float32(-1.0), 3) ub = @SArray fill(Float32(10.0), 3) function rosenbrock(x, p) - sum(p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 for i in 1:(length(x) - 1)) + res = zero(eltype(x)) + for i in 1:(length(x) - 1) + res += p[2] * (x[i + 1] - x[i]^2)^2 + (p[1] - x[i])^2 + end + res end - x0 = @SArray zeros(Float32, 3) p = @SArray Float32[1.0, 100.0] @@ -25,10 +28,10 @@ cache = init(prob, ParallelSyncPSOKernel(n_particles; backend = CPU())) reinit!(cache) -cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = backend)) +cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.BFGS(), backend = CPU())) reinit!(cache) -cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.LBFGS(), backend = backend)) +cache = init(prob, PSOGPU.HybridPSO(; local_opt = PSOGPU.LBFGS(), backend = CPU())) reinit!(cache) diff --git a/test/runtests.jl b/test/runtests.jl index 3925374..643891f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,9 +4,11 @@ using Test const GROUP = get(ENV, "GROUP", "CPU") @safetestset "Regression tests" include("./regression.jl") +@safetestset "Reinitialization tests" include("./reinit.jl") -if GROUP != "CPU" - @safetestset "GPU optimizers tests" include("./gpu.jl") - @safetestset "GPU optimizers with constraints tests" include("./constraints.jl") - @safetestset "GPU hybrid optimizers" include("./lbfgs.jl") +#TODO: Curent throws warning for redefinition with the use of @testset multiple times. Migrate to TestItemRunners.jl +@testset for GROUP in unique(("CPU", GROUP)) + @testset "$(GROUP) optimizers tests" include("./gpu.jl") + @testset "$(GROUP) optimizers with constraints tests" include("./constraints.jl") + @testset "$(GROUP) hybrid optimizers" include("./lbfgs.jl") end diff --git a/test/utils.jl b/test/utils.jl new file mode 100644 index 0000000..d5886d2 --- /dev/null +++ b/test/utils.jl @@ -0,0 +1,11 @@ +const GROUP = get(ENV, "GROUP", "CPU") +const backend = if GROUP == "CUDA" + using CUDA + CUDA.CUDABackend() +elseif GROUP == "AMDGPU" + using AMDGPU + AMDGPU.ROCBackend() +else + using KernelAbstractions + backend = CPU() +end From 01fc0ae0b1c9edf31d26071d9271b67a12c50aee Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 25 Mar 2025 12:04:26 -0400 Subject: [PATCH 2/8] Update runtests.yml --- .buildkite/runtests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index bc95451..b3a6203 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -3,7 +3,7 @@ steps: matrix: setup: version: - - "1" + - "1.10" env: GROUP: CUDA plugins: From 95e07d1539c52c7b70d354d5d1f2de46831ff5fb Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Tue, 25 Mar 2025 12:04:53 -0400 Subject: [PATCH 3/8] rm CUDA from test/Project.toml --- test/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 93b17e3..acaf70a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,4 @@ [deps] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" From 3e768fcbdb6e77ddd692e885e182b3a62ee7e992 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Wed, 26 Mar 2025 00:34:54 -0400 Subject: [PATCH 4/8] Add compat entry for Enzyme --- Project.toml | 2 +- test/runtests.jl | 11 ++++++----- test/utils.jl | 3 +-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 70b2a20..cd359ca 100644 --- a/Project.toml +++ b/Project.toml @@ -22,4 +22,4 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] KernelAbstractions = "<0.9.30" -julia = "1.6" +Enzyme = "<0.13.35" diff --git a/test/runtests.jl b/test/runtests.jl index 643891f..2649148 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,15 @@ using SafeTestsets using Test -const GROUP = get(ENV, "GROUP", "CPU") +global CI_GROUP = get(ENV, "GROUP", "CPU") @safetestset "Regression tests" include("./regression.jl") @safetestset "Reinitialization tests" include("./reinit.jl") #TODO: Curent throws warning for redefinition with the use of @testset multiple times. Migrate to TestItemRunners.jl -@testset for GROUP in unique(("CPU", GROUP)) - @testset "$(GROUP) optimizers tests" include("./gpu.jl") - @testset "$(GROUP) optimizers with constraints tests" include("./constraints.jl") - @testset "$(GROUP) hybrid optimizers" include("./lbfgs.jl") +@testset for BACKEND in unique(("CPU", CI_GROUP)) + global GROUP = BACKEND + @testset "$(BACKEND) optimizers tests" include("./gpu.jl") + @testset "$(BACKEND) optimizers with constraints tests" include("./constraints.jl") + @testset "$(BACKEND) hybrid optimizers" include("./lbfgs.jl") end diff --git a/test/utils.jl b/test/utils.jl index d5886d2..c841b9d 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,5 +1,4 @@ -const GROUP = get(ENV, "GROUP", "CPU") -const backend = if GROUP == "CUDA" +global backend = if GROUP == "CUDA" using CUDA CUDA.CUDABackend() elseif GROUP == "AMDGPU" From 21be905c4f6e8f2f32660b76de35734d62112fe3 Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Wed, 26 Mar 2025 00:35:30 -0400 Subject: [PATCH 5/8] Test both on lts and 1 --- .buildkite/runtests.yml | 1 + test/Project.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index b3a6203..2ea273d 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -3,6 +3,7 @@ steps: matrix: setup: version: + - "1" - "1.10" env: GROUP: CUDA diff --git a/test/Project.toml b/test/Project.toml index acaf70a..93b17e3 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" From 00966cf781243605edb265f7eb884a7a52994d8a Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Wed, 26 Mar 2025 01:16:32 -0400 Subject: [PATCH 6/8] format --- test/Project.toml | 1 - test/runtests.jl | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/test/Project.toml b/test/Project.toml index 93b17e3..acaf70a 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,4 @@ [deps] -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" diff --git a/test/runtests.jl b/test/runtests.jl index 2649148..e6454c6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,7 +7,7 @@ global CI_GROUP = get(ENV, "GROUP", "CPU") @safetestset "Reinitialization tests" include("./reinit.jl") #TODO: Curent throws warning for redefinition with the use of @testset multiple times. Migrate to TestItemRunners.jl -@testset for BACKEND in unique(("CPU", CI_GROUP)) +@testset for BACKEND in unique(("CPU", CI_GROUP)) global GROUP = BACKEND @testset "$(BACKEND) optimizers tests" include("./gpu.jl") @testset "$(BACKEND) optimizers with constraints tests" include("./constraints.jl") From e252f230869e3ea7bb27958d8733116fd4ad009c Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 27 Mar 2025 01:20:17 -0400 Subject: [PATCH 7/8] Add compat entries --- Project.toml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index cd359ca..a2842c5 100644 --- a/Project.toml +++ b/Project.toml @@ -9,17 +9,26 @@ DiffEqGPU = "071ae1c0-96b5-11e9-1965-c90190d839ea" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" -MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" -SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5" SimpleNonlinearSolve = "727e6d20-b764-4bd8-a329-72de5adea6c7" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" [compat] -KernelAbstractions = "<0.9.30" +Adapt = "4.3" +DiffEqGPU = "3.4" Enzyme = "<0.13.35" +ForwardDiff = "0.10" +KernelAbstractions = "<0.9.30" +Optimization = "4.1" +QuasiMonteCarlo = "0.3" +Reexport = "1.2" +SciMLBase = "2.79" +Setfield = "1.1" +SimpleNonlinearSolve = "2.2" +StaticArrays = "1.9" +julia = "1.10" From b8b0b3c47fbbff688359f6729d7185ce0501d3ce Mon Sep 17 00:00:00 2001 From: Utkarsh Date: Thu, 27 Mar 2025 01:31:01 -0400 Subject: [PATCH 8/8] Update runtests.yml --- .buildkite/runtests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/runtests.yml b/.buildkite/runtests.yml index 2ea273d..e6e39e3 100644 --- a/.buildkite/runtests.yml +++ b/.buildkite/runtests.yml @@ -31,7 +31,7 @@ steps: matrix: setup: version: - - "1" + - "1.10" env: GROUP: AMDGPU plugins: