Skip to content

Investigate better initialization strategies for higher dimensional problems #21

@utkarsh530

Description

@utkarsh530

From the Neural ODE example:

using SimpleChains, StaticArrays, OrdinaryDiffEq

u0 = @SArray [2.0, 0.0]
datasize = 30
tspan = (0.0, 1.5)
tsteps = range(tspan[1], tspan[2], length = datasize)

function trueODE(u, p, t)
    true_A = @SMatrix [-0.1 2.0; -2.0 -0.1]
    ((u .^ 3)'true_A)'
end

prob = ODEProblem(trueODE, u0, tspan)
data = Array(solve(prob, Tsit5(), saveat = tsteps))

sc = SimpleChain(static(2),
    Activation(x -> x .* 3),
    TurboDense{true}(tanh, static(2)),
    TurboDense{true}(identity, static(2)))

p_nn = SimpleChains.init_params(sc, Float64)

function nn_fn(u::T, p, t)::T where {T}
    nn, ps = p
    return nn(u, ps)
end

nn = (u, p, t) -> sc(u, p)

p_static = SArray{Tuple{size(p_nn)...}}(p_nn...)

prob_nn = ODEProblem(nn_fn, u0, tspan, (sc, p_static))

n_particles = 10_000

function loss(u, p)
    odeprob, t = p
    prob = remake(odeprob; p = u)
    pred = Array(solve(prob, Tsit5(), saveat = t))
    sum(abs2, data .- pred)
end

lb = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(-1),
    length(optprob.u0))...)
ub = SVector{length(optprob.u0), eltype(optprob.u0)}(fill(eltype(optprob.u0)(1),
    length(optprob.u0))...)

optprob = OptimizationProblem(loss, prob_nn.p[2], (prob_n, tsteps); lb = lb, ub = ub)

using PSOGPU
using CUDA

gbest, particles = PSOGPU.init_particles(optprob, n_particles)

gpu_data = cu([SVector{length(prob_nn.u0), eltype(prob_nn.u0)}(@view data[:, i])
               for i in 1:length(tsteps)])

gpu_particles = cu(particles)

CUDA.allowscalar(false)

gg = copy(gpu_particles)

function prob_func(prob, gpu_particle)
    return remake(prob, p = (prob.p[1], gpu_particle.position))
end

@time gsol = PSOGPU.parameter_estim_ode!(prob_nn,
    gg,
    gbest,
    gpu_data,
    lb,
    ub; saveat = tsteps, dt = 0.1, prob_func = prob_func, maxiters = 1000)

The best solution that I was able to get was:

gbest = PSOGPU.PSOGBest{SVector{12, Float64}, Float64}([0.6874401673017222, -15.593770810494114, 0.46391816314374695, -0.9466586408201431, -0.7460369202292048, 0.05240032443001442, -4.078687202392302, 1.1292140007987868, 1.3178086552221244, 0.3031322864356441, -0.96809309999136, 0.09967222862683514], 75.45484892026886)
gbest.cost #75.45484892026886

Compared to ADAM:

julia> res.objective
7.6828628f0

There's a lot of scope of improvement 😓
image

A decent-sized neural network should work (as shown in the docs in python PSO library), I think we should try to get this working for starters: https://pyswarms.readthedocs.io/en/latest/examples/usecases/train_neural_network.html

cc @ChrisRackauckas

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions