Investigate better initialization strategies for higher dimensional problems

From the Neural ODE example:

```julia
using SimpleChains, StaticArrays, OrdinaryDiffEq

u0 = @SArray [2.0, 0.0]
datasize = 30
tspan = (0.0, 1.5)
tsteps = range(tspan[1], tspan[2], length = datasize)

function trueODE(u, p, t)
    true_A = @SMatrix [-0.1 2.0; -2.0 -0.1]
    ((u .^ 3)'true_A)'
end

prob = ODEProblem(trueODE, u0, tspan)
data = Array(solve(prob, Tsit5(), saveat = tsteps))

sc = SimpleChain(static(2),
    Activation(x -> x .* 3),
    TurboDense{true}(tanh, static(2)),
    TurboDense{true}(identity, static(2)))

p_nn = SimpleChains.init_params(sc, Float64)

function nn_fn(u::T, p, t)::T where {T}
    nn, ps = p
    return nn(u, ps)
end

nn = (u, p, t) -> sc(u, p)

p_static = SArray{Tuple{size(p_nn)...}}(p_nn...)

prob_nn = ODEProblem(nn_fn, u0, tspan, (sc, p_static))

n_particles = 10_000

function loss(u, p)
    odeprob, t = p
    prob = remake(odeprob; p = u)
    pred = Array(solve(prob, Tsit5(), saveat = t))
    sum(abs2, data .- pred)
end

lb = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(-1),
    length(optprob.u0))...)
ub = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(1),
    length(optprob.u0))...)

optprob = OptimizationProblem(loss, prob_nn.p[2], (prob_n, tsteps); lb = lb, ub = ub)

using PSOGPU
using CUDA

gbest, particles = PSOGPU.init_particles(optprob, n_particles)

gpu_data = cu([SVector{length(prob_nn.u0), eltype(prob_nn.u0)}(@view data[:, i])
               for i in 1:length(tsteps)])

gpu_particles = cu(particles)

CUDA.allowscalar(false)

gg = copy(gpu_particles)

function prob_func(prob, gpu_particle)
    return remake(prob, p = (prob.p[1], gpu_particle.position))
end

@time gsol = PSOGPU.parameter_estim_ode!(prob_nn,
    gg,
    gbest,
    gpu_data,
    lb,
    ub; saveat = tsteps, dt = 0.1, prob_func = prob_func, maxiters = 1000)

```

The best solution that I was able to get was:
```julia
gbest = PSOGPU.PSOGBest{SVector{12, Float64}, Float64}([0.6874401673017222, -15.593770810494114, 0.46391816314374695, -0.9466586408201431, -0.7460369202292048, 0.05240032443001442, -4.078687202392302, 1.1292140007987868, 1.3178086552221244, 0.3031322864356441, -0.96809309999136, 0.09967222862683514], 75.45484892026886)
gbest.cost #75.45484892026886
```
Compared to ADAM:

```julia
julia> res.objective
7.6828628f0
```
There's a lot of scope of improvement 😓 
![image](https://github.com/SciML/PSOGPU.jl/assets/37050056/3e275bcc-b330-482f-a740-f70478ecc671)


A decent-sized neural network should work (as shown in the docs in python PSO library), I think we should try to get this working for starters: https://pyswarms.readthedocs.io/en/latest/examples/usecases/train_neural_network.html

cc @ChrisRackauckas


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Investigate better initialization strategies for higher dimensional problems #21

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Investigate better initialization strategies for higher dimensional problems #21

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions