From f17686c013816036088e561d11b29ddfbc01d383 Mon Sep 17 00:00:00 2001 From: Sagar Date: Sat, 1 Apr 2023 12:18:03 +0530 Subject: [PATCH 1/9] T1 some implementations v1 --- Project.toml | 2 ++ dummy/basic_eg.jl | 57 ++++++++++++++++++++++++++++++++++++++++++++ dummy/basic_nnode.jl | 17 +++++++++++++ dummy/flux_eg.jl | 0 dummy/t1.jl | 38 +++++++++++++++++++++++++++++ dummy/t2.jl | 5 ++++ dummy/t3.jl | 6 +++++ 7 files changed, 125 insertions(+) create mode 100644 dummy/basic_eg.jl create mode 100644 dummy/basic_nnode.jl create mode 100644 dummy/flux_eg.jl create mode 100644 dummy/t1.jl create mode 100644 dummy/t2.jl create mode 100644 dummy/t3.jl diff --git a/Project.toml b/Project.toml index 5890628584..aa145eec3a 100644 --- a/Project.toml +++ b/Project.toml @@ -23,10 +23,12 @@ ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/dummy/basic_eg.jl b/dummy/basic_eg.jl new file mode 100644 index 0000000000..b99c5419fc --- /dev/null +++ b/dummy/basic_eg.jl @@ -0,0 +1,57 @@ +using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers +import ModelingToolkit: Interval, infimum, supremum + +@parameters x y +@variables u(..) +Dxx = Differential(x)^2 +Dyy = Differential(y)^2 + +# 2D PDE +eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y) + +# Boundary conditions +bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0, + u(x, 0) ~ 0.0, u(x, 1) ~ 0] +# Space and time domains +domains = [x ∈ Interval(0.0, 1.0), + y ∈ Interval(0.0, 1.0)] +# Discretization +dx = 0.1 + +# Neural network +dim = 2 # number of dimensions +chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1)) + +discretization = PhysicsInformedNN(chain, QuadratureTraining()) + +@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)]) +prob = discretize(pde_system, discretization) + +callback = function (p, l) + println("Current loss is: $l") + return false +end + +res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000) +prob = remake(prob, u0 = res.minimizer) +res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000) +phi = discretization.phi + + + + + +xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains] +analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2) + +u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys], + (length(xs), length(ys))) +u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys], + (length(xs), length(ys))) +diff_u = abs.(u_predict .- u_real) + +using Plots +p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic"); +p2 = plot(xs, ys, u_predict, linetype = :contourf, title = "predict"); +p3 = plot(xs, ys, diff_u, linetype = :contourf, title = "error"); +plot(p1, p2, p3) \ No newline at end of file diff --git a/dummy/basic_nnode.jl b/dummy/basic_nnode.jl new file mode 100644 index 0000000000..0b8cb5eec1 --- /dev/null +++ b/dummy/basic_nnode.jl @@ -0,0 +1,17 @@ +using NeuralPDE, Flux, OptimizationOptimisers + +linear(u, p, t) = cos(2pi * t) +tspan = (0.0f0, 1.0f0) +u0 = 0.0f0 +prob = ODEProblem(linear, u0, tspan) + + +chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) + + +opt = OptimizationOptimisers.Adam(0.1) +alg = NeuralPDE.NNODE(chain, opt) + + +sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, abstol = 1.0f-6, + maxiters = 200) \ No newline at end of file diff --git a/dummy/flux_eg.jl b/dummy/flux_eg.jl new file mode 100644 index 0000000000..e69de29bb2 diff --git a/dummy/t1.jl b/dummy/t1.jl new file mode 100644 index 0000000000..a9baff0506 --- /dev/null +++ b/dummy/t1.jl @@ -0,0 +1,38 @@ +using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers +import ModelingToolkit: Interval, infimum, supremum + +@parameters x y +@variables u(..) +Dxx = Differential(x)^2 +Dyy = Differential(y)^2 + +# 2D PDE +eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y) + +# Boundary conditions +bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0, + u(x, 0) ~ 0.0, u(x, 1) ~ 0] +# Space and time domains +domains = [x ∈ Interval(0.0, 1.0), + y ∈ Interval(0.0, 1.0)] +# Discretization +dx = 0.1 + +# Neural network +dim = 2 # number of dimensions +chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1)) + +discretization = PhysicsInformedNN(chain, QuadratureTraining()) + +@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)]) +prob = discretize(pde_system, discretization) + +callback = function (p, l) + println("Current loss is: $l") + return false +end + +res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000) +prob = remake(prob, u0 = res.minimizer) +res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000) +phi = discretization.phi \ No newline at end of file diff --git a/dummy/t2.jl b/dummy/t2.jl new file mode 100644 index 0000000000..37e6201ebd --- /dev/null +++ b/dummy/t2.jl @@ -0,0 +1,5 @@ +using Flux, Tracker +x = [0.8; 0.8] +ann = Chain(Dense(2, 10, tanh), Dense(10, 1)) +p, re = Flux.destructure(ann) +z = re(Float64(p)) \ No newline at end of file diff --git a/dummy/t3.jl b/dummy/t3.jl new file mode 100644 index 0000000000..0545e777a8 --- /dev/null +++ b/dummy/t3.jl @@ -0,0 +1,6 @@ +using NeuralPDE, Flux, OptimizationOptimisers + +linear(u, p, t) = cos(2pi * t) +tspan = (0.0f0, 1.0f0) +u0 = 0.0f0 +prob = ODEProblem(linear, u0, tspan) \ No newline at end of file From 17e791f7671d7fc3bc0d96f7b5ccc679bb10bd4c Mon Sep 17 00:00:00 2001 From: Sagar Date: Sat, 1 Apr 2023 19:06:00 +0530 Subject: [PATCH 2/9] Added Inverse Dirichlet Loss and Neural Tangent Loss implementations --- dummy/basic_eg.jl | 57 ----------- dummy/basic_nnode.jl | 17 ---- dummy/flux_eg.jl | 0 dummy/t1.jl | 38 -------- dummy/t2.jl | 5 - dummy/t3.jl | 6 -- src/NeuralPDE.jl | 2 + src/adaptive_losses.jl | 188 ++++++++++++++++++++++++++++++++++++ test/adaptive_loss_tests.jl | 8 +- 9 files changed, 196 insertions(+), 125 deletions(-) delete mode 100644 dummy/basic_eg.jl delete mode 100644 dummy/basic_nnode.jl delete mode 100644 dummy/flux_eg.jl delete mode 100644 dummy/t1.jl delete mode 100644 dummy/t2.jl delete mode 100644 dummy/t3.jl diff --git a/dummy/basic_eg.jl b/dummy/basic_eg.jl deleted file mode 100644 index b99c5419fc..0000000000 --- a/dummy/basic_eg.jl +++ /dev/null @@ -1,57 +0,0 @@ -using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers -import ModelingToolkit: Interval, infimum, supremum - -@parameters x y -@variables u(..) -Dxx = Differential(x)^2 -Dyy = Differential(y)^2 - -# 2D PDE -eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y) - -# Boundary conditions -bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0, - u(x, 0) ~ 0.0, u(x, 1) ~ 0] -# Space and time domains -domains = [x ∈ Interval(0.0, 1.0), - y ∈ Interval(0.0, 1.0)] -# Discretization -dx = 0.1 - -# Neural network -dim = 2 # number of dimensions -chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1)) - -discretization = PhysicsInformedNN(chain, QuadratureTraining()) - -@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)]) -prob = discretize(pde_system, discretization) - -callback = function (p, l) - println("Current loss is: $l") - return false -end - -res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000) -prob = remake(prob, u0 = res.minimizer) -res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000) -phi = discretization.phi - - - - - -xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains] -analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2) - -u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys], - (length(xs), length(ys))) -u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys], - (length(xs), length(ys))) -diff_u = abs.(u_predict .- u_real) - -using Plots -p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic"); -p2 = plot(xs, ys, u_predict, linetype = :contourf, title = "predict"); -p3 = plot(xs, ys, diff_u, linetype = :contourf, title = "error"); -plot(p1, p2, p3) \ No newline at end of file diff --git a/dummy/basic_nnode.jl b/dummy/basic_nnode.jl deleted file mode 100644 index 0b8cb5eec1..0000000000 --- a/dummy/basic_nnode.jl +++ /dev/null @@ -1,17 +0,0 @@ -using NeuralPDE, Flux, OptimizationOptimisers - -linear(u, p, t) = cos(2pi * t) -tspan = (0.0f0, 1.0f0) -u0 = 0.0f0 -prob = ODEProblem(linear, u0, tspan) - - -chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1)) - - -opt = OptimizationOptimisers.Adam(0.1) -alg = NeuralPDE.NNODE(chain, opt) - - -sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, abstol = 1.0f-6, - maxiters = 200) \ No newline at end of file diff --git a/dummy/flux_eg.jl b/dummy/flux_eg.jl deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/dummy/t1.jl b/dummy/t1.jl deleted file mode 100644 index a9baff0506..0000000000 --- a/dummy/t1.jl +++ /dev/null @@ -1,38 +0,0 @@ -using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers -import ModelingToolkit: Interval, infimum, supremum - -@parameters x y -@variables u(..) -Dxx = Differential(x)^2 -Dyy = Differential(y)^2 - -# 2D PDE -eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y) - -# Boundary conditions -bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0, - u(x, 0) ~ 0.0, u(x, 1) ~ 0] -# Space and time domains -domains = [x ∈ Interval(0.0, 1.0), - y ∈ Interval(0.0, 1.0)] -# Discretization -dx = 0.1 - -# Neural network -dim = 2 # number of dimensions -chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1)) - -discretization = PhysicsInformedNN(chain, QuadratureTraining()) - -@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)]) -prob = discretize(pde_system, discretization) - -callback = function (p, l) - println("Current loss is: $l") - return false -end - -res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000) -prob = remake(prob, u0 = res.minimizer) -res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000) -phi = discretization.phi \ No newline at end of file diff --git a/dummy/t2.jl b/dummy/t2.jl deleted file mode 100644 index 37e6201ebd..0000000000 --- a/dummy/t2.jl +++ /dev/null @@ -1,5 +0,0 @@ -using Flux, Tracker -x = [0.8; 0.8] -ann = Chain(Dense(2, 10, tanh), Dense(10, 1)) -p, re = Flux.destructure(ann) -z = re(Float64(p)) \ No newline at end of file diff --git a/dummy/t3.jl b/dummy/t3.jl deleted file mode 100644 index 0545e777a8..0000000000 --- a/dummy/t3.jl +++ /dev/null @@ -1,6 +0,0 @@ -using NeuralPDE, Flux, OptimizationOptimisers - -linear(u, p, t) = cos(2pi * t) -tspan = (0.0f0, 1.0f0) -u0 = 0.0f0 -prob = ODEProblem(linear, u0, tspan) \ No newline at end of file diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl index 81051eb916..6fdd6d9c6c 100644 --- a/src/NeuralPDE.jl +++ b/src/NeuralPDE.jl @@ -17,6 +17,7 @@ using RuntimeGeneratedFunctions using SciMLBase using Statistics using ArrayInterface +using LinearAlgebra import Optim using DomainSets using Symbolics @@ -46,6 +47,7 @@ include("transform_inf_integral.jl") include("discretize.jl") include("neural_adapter.jl") + export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem, KolmogorovParamDomain, NNParamKolmogorov, diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index b37023da7c..e471be0dbc 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -257,3 +257,191 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, nothing end end + + + + +""" +NTK Adaptive Loss +""" + +mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss + reweight_every::Int64 + weight_change_inertia::T + pde_loss_weights::Vector{T} + bc_loss_weights::Vector{T} + additional_loss_weights::Vector{T} + SciMLBase.@add_kwonly function NTKAdaptiveLoss{T}(reweight_every; + weight_change_inertia = 0.9, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) where { + T <: + Real + } + new(convert(Int64, reweight_every), convert(T, weight_change_inertia), + vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), + vectorify(additional_loss_weights, T)) + end +end +# default to Float64 +SciMLBase.@add_kwonly function NTKAdaptiveLoss(reweight_every; + weight_change_inertia = 0.9, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) + NTKAdaptiveLoss{Float64}(reweight_every; + weight_change_inertia = weight_change_inertia, + pde_loss_weights = pde_loss_weights, + bc_loss_weights = bc_loss_weights, + additional_loss_weights = additional_loss_weights) +end + +function generate_adaptive_loss_function(pinnrep::PINNRepresentation, + adaloss::NTKAdaptiveLoss, + pde_loss_functions, bc_loss_functions) + weight_change_inertia = adaloss.weight_change_inertia + iteration = pinnrep.iteration + adaloss_T = eltype(adaloss.pde_loss_weights) + + function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses) + if iteration[1] % adaloss.reweight_every == 0 + # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function + # pde_grads_maxes = [maximum(abs.(Zygote.gradient(pde_loss_function, θ)[1])) + # for pde_loss_function in pde_loss_functions] + # pde_grads_max = maximum(pde_grads_maxes) + # bc_grads_mean = [mean(abs.(Zygote.gradient(bc_loss_function, θ)[1])) + # for bc_loss_function in bc_loss_functions] + + # nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) : + # convert(adaloss_T, 1e-7) + # bc_loss_weights_proposed = pde_grads_max ./ + # (bc_grads_mean .+ nonzero_divisor_eps) + # adaloss.bc_loss_weights .= weight_change_inertia .* + # adaloss.bc_loss_weights .+ + # (1 .- weight_change_inertia) .* + # bc_loss_weights_proposed + + + bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1])) + for bc_loss_function in bc_loss_functions] + bc_trace = sum(bc_vec) + pde_vec = [dot(vec(Zygote.gradient(pde_loss_function, θ)[1]), vec(Zygote.gradient(pde_loss_function, θ)[1])) + for pde_loss_function in pde_loss_functions] + pde_trace = sum(pde_vec) + + adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace ) ./ (bc_trace) + adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace ) ./ (pde_trace) + + + # logscalar(pinnrep.logger, pde_trace, "adaptive_loss/pde_grad_max", + # iteration[1]) + # logvector(pinnrep.logger, bc_trace, "adaptive_loss/bc_trace", + # iteration[1]) + # logvector(pinnrep.logger, adaloss.pde_loss_weights, + # "adaptive_loss/pde_loss_weights", + # iteration[1]) + # logvector(pinnrep.logger, adaloss.bc_loss_weights, + # "adaptive_loss/bc_loss_weights", + # iteration[1]) + end + nothing + end +end + + + + + + + + + +""" +Inverse Dirichlet Adaptive Loss +""" + +mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss + reweight_every::Int64 + weight_change_inertia::T + pde_loss_weights::Vector{T} + bc_loss_weights::Vector{T} + additional_loss_weights::Vector{T} + SciMLBase.@add_kwonly function IDAdaptiveLoss{T}(reweight_every; + weight_change_inertia = 0.9, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) where { + T <: + Real + } + new(convert(Int64, reweight_every), convert(T, weight_change_inertia), + vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), + vectorify(additional_loss_weights, T)) + end +end +# default to Float64 +SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every; + weight_change_inertia = 0.9, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) + IDAdaptiveLoss{Float64}(reweight_every; + weight_change_inertia = weight_change_inertia, + pde_loss_weights = pde_loss_weights, + bc_loss_weights = bc_loss_weights, + additional_loss_weights = additional_loss_weights) +end + +function generate_adaptive_loss_function(pinnrep::PINNRepresentation, + adaloss::IDAdaptiveLoss, + pde_loss_functions, bc_loss_functions) + weight_change_inertia = adaloss.weight_change_inertia + iteration = pinnrep.iteration + adaloss_T = eltype(adaloss.pde_loss_weights) + + function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses) + if iteration[1] % adaloss.reweight_every == 0 + # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function + pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1]) + for pde_loss_function in pde_loss_functions] + pde_std_max = maximum(pde_stds) + bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1]) + for bc_loss_function in bc_loss_functions] + bc_std_max = maximum(bc_stds) + gamma = max(pde_std_max, bc_std_max) + + bc_loss_weights_proposed = gamma ./ (bc_stds) + adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+ + (1 .- weight_change_inertia) .* bc_loss_weights_proposed + + pde_loss_weights_proposed = gamma ./ (pde_stds) + adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+ + (1 .- weight_change_inertia) .* pde_loss_weights_proposed + nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) : + convert(adaloss_T, 1e-7) + bc_loss_weights_proposed = gamma ./ + (bc_stds) + adaloss.bc_loss_weights .= weight_change_inertia .* + adaloss.bc_loss_weights .+ + (1 .- weight_change_inertia) .* + bc_loss_weights_proposed + # logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max", + # iteration[1]) + # logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes", + # iteration[1]) + # logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean", + # iteration[1]) + # logvector(pinnrep.logger, adaloss.bc_loss_weights, + # "adaptive_loss/bc_loss_weights", + # iteration[1]) + end + nothing + end +end + + + + + + diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index bd49783ec4..59e55f671f 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -10,8 +10,12 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight bc_loss_weights = 1) adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss] -maxiters = 4000 +ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1, + bc_loss_weights = 1) +id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3, + bc_loss_weights = 1) +adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss] +maxiters = 400 seed = 60 ## 2D Poisson equation From c3223c8461edbf6be84e1fefa5ea9857ee6827e0 Mon Sep 17 00:00:00 2001 From: Sagar Date: Sun, 2 Apr 2023 18:12:21 +0530 Subject: [PATCH 3/9] Added Neural Tangent Kernel and Inverse Dirichlet Adaptive Loss Functions --- src/adaptive_losses.jl | 41 ++----------------------------------- test/adaptive_loss_tests.jl | 3 ++- 2 files changed, 4 insertions(+), 40 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index e471be0dbc..80f9887dd8 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -262,7 +262,7 @@ end """ -NTK Adaptive Loss +Neural Tangent Kernel Adaptive Loss """ mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss @@ -306,23 +306,6 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses) if iteration[1] % adaloss.reweight_every == 0 - # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function - # pde_grads_maxes = [maximum(abs.(Zygote.gradient(pde_loss_function, θ)[1])) - # for pde_loss_function in pde_loss_functions] - # pde_grads_max = maximum(pde_grads_maxes) - # bc_grads_mean = [mean(abs.(Zygote.gradient(bc_loss_function, θ)[1])) - # for bc_loss_function in bc_loss_functions] - - # nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) : - # convert(adaloss_T, 1e-7) - # bc_loss_weights_proposed = pde_grads_max ./ - # (bc_grads_mean .+ nonzero_divisor_eps) - # adaloss.bc_loss_weights .= weight_change_inertia .* - # adaloss.bc_loss_weights .+ - # (1 .- weight_change_inertia) .* - # bc_loss_weights_proposed - - bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1])) for bc_loss_function in bc_loss_functions] bc_trace = sum(bc_vec) @@ -332,18 +315,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace ) ./ (bc_trace) adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace ) ./ (pde_trace) - - - # logscalar(pinnrep.logger, pde_trace, "adaptive_loss/pde_grad_max", - # iteration[1]) - # logvector(pinnrep.logger, bc_trace, "adaptive_loss/bc_trace", - # iteration[1]) - # logvector(pinnrep.logger, adaloss.pde_loss_weights, - # "adaptive_loss/pde_loss_weights", - # iteration[1]) - # logvector(pinnrep.logger, adaloss.bc_loss_weights, - # "adaptive_loss/bc_loss_weights", - # iteration[1]) + end nothing end @@ -426,15 +398,6 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, adaloss.bc_loss_weights .+ (1 .- weight_change_inertia) .* bc_loss_weights_proposed - # logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max", - # iteration[1]) - # logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes", - # iteration[1]) - # logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean", - # iteration[1]) - # logvector(pinnrep.logger, adaloss.bc_loss_weights, - # "adaptive_loss/bc_loss_weights", - # iteration[1]) end nothing end diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index 59e55f671f..d334ad2f9a 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -3,6 +3,7 @@ using Test, NeuralPDE import ModelingToolkit: Interval, infimum, supremum using DomainSets using Random +using Plots, Revise import Lux nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1) @@ -15,7 +16,7 @@ ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1, id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1) adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss] -maxiters = 400 +maxiters = 4000 seed = 60 ## 2D Poisson equation From 27a3b93c040d7268fc2328aee4e8186bbebb0e94 Mon Sep 17 00:00:00 2001 From: Sagar Date: Sun, 2 Apr 2023 19:37:55 +0530 Subject: [PATCH 4/9] Added Neural Tangent Kernel and Inverse Dirichlet Adaptive Loss functions --- src/adaptive_losses.jl | 102 +++++++++++++++++++++++------------- test/adaptive_loss_tests.jl | 6 +-- 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index 80f9887dd8..1bd786d3ed 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -263,49 +263,75 @@ end """ Neural Tangent Kernel Adaptive Loss + +Adapted from the paper +When and Why PINNs Fail to Train: A Neural Tangent kernel perspective +https://arxiv.org/pdf/2007.14527.pdf """ -mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss - reweight_every::Int64 - weight_change_inertia::T +mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real, + PDE_OPT <: Flux.Optimise.AbstractOptimiser, + BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss + reweight_steps::Int64 + pde_max_optimiser::PDE_OPT + bc_max_optimiser::BC_OPT pde_loss_weights::Vector{T} bc_loss_weights::Vector{T} additional_loss_weights::Vector{T} - SciMLBase.@add_kwonly function NTKAdaptiveLoss{T}(reweight_every; - weight_change_inertia = 0.9, - pde_loss_weights = 1, - bc_loss_weights = 1, - additional_loss_weights = 1) where { - T <: - Real - } - new(convert(Int64, reweight_every), convert(T, weight_change_inertia), + SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss{T, + PDE_OPT, BC_OPT}(reweight_steps; + pde_max_optimiser = Flux.ADAM(1e-4), + bc_max_optimiser = Flux.ADAM(0.5), + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) where { + T <: + Real, + PDE_OPT <: + Flux.Optimise.AbstractOptimiser, + BC_OPT <: + Flux.Optimise.AbstractOptimiser + } + new(convert(Int64, reweight_steps), convert(PDE_OPT, pde_max_optimiser), + convert(BC_OPT, bc_max_optimiser), vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), vectorify(additional_loss_weights, T)) end end -# default to Float64 -SciMLBase.@add_kwonly function NTKAdaptiveLoss(reweight_every; - weight_change_inertia = 0.9, - pde_loss_weights = 1, - bc_loss_weights = 1, - additional_loss_weights = 1) - NTKAdaptiveLoss{Float64}(reweight_every; - weight_change_inertia = weight_change_inertia, - pde_loss_weights = pde_loss_weights, - bc_loss_weights = bc_loss_weights, - additional_loss_weights = additional_loss_weights) + +SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps; + pde_max_optimiser = Flux.ADAM(1e-4), + bc_max_optimiser = Flux.ADAM(0.5), + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) where { + T <: + Real, + PDE_OPT <: + Flux.Optimise.AbstractOptimiser, + BC_OPT <: + Flux.Optimise.AbstractOptimiser + } + NeuralTangentKernelAdaptiveLoss{Float64, typeof(pde_max_optimiser), + typeof(bc_max_optimiser)}(reweight_steps; + pde_max_optimiser = pde_max_optimiser, + bc_max_optimiser = bc_max_optimiser, + pde_loss_weights = pde_loss_weights, + bc_loss_weights = bc_loss_weights, + additional_loss_weights = additional_loss_weights) end function generate_adaptive_loss_function(pinnrep::PINNRepresentation, - adaloss::NTKAdaptiveLoss, + adaloss::NeuralTangentKernelAdaptiveLoss, pde_loss_functions, bc_loss_functions) - weight_change_inertia = adaloss.weight_change_inertia + + pde_max_optimiser = adaloss.pde_max_optimiser + bc_max_optimiser = adaloss.bc_max_optimiser iteration = pinnrep.iteration adaloss_T = eltype(adaloss.pde_loss_weights) - function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses) - if iteration[1] % adaloss.reweight_every == 0 + function run_loss_ntk_adaptive_loss(θ, pde_losses, bc_losses) + if iteration[1] % adaloss.reweight_steps == 0 bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1])) for bc_loss_function in bc_loss_functions] bc_trace = sum(bc_vec) @@ -331,15 +357,21 @@ end """ Inverse Dirichlet Adaptive Loss + +Inverse Dirichlet weighting enables reliable training of physics informed neural networks +Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini +https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf +with code reference +https://github.com/mosaic-group/inverse-dirichlet-pinn """ -mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss - reweight_every::Int64 +mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss + reweight_steps::Int64 weight_change_inertia::T pde_loss_weights::Vector{T} bc_loss_weights::Vector{T} additional_loss_weights::Vector{T} - SciMLBase.@add_kwonly function IDAdaptiveLoss{T}(reweight_every; + SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_steps; weight_change_inertia = 0.9, pde_loss_weights = 1, bc_loss_weights = 1, @@ -347,18 +379,18 @@ mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss T <: Real } - new(convert(Int64, reweight_every), convert(T, weight_change_inertia), + new(convert(Int64, reweight_steps), convert(T, weight_change_inertia), vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), vectorify(additional_loss_weights, T)) end end # default to Float64 -SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every; +SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_steps; weight_change_inertia = 0.9, pde_loss_weights = 1, bc_loss_weights = 1, additional_loss_weights = 1) - IDAdaptiveLoss{Float64}(reweight_every; + InverseDirichletAdaptiveLoss{Float64}(reweight_steps; weight_change_inertia = weight_change_inertia, pde_loss_weights = pde_loss_weights, bc_loss_weights = bc_loss_weights, @@ -366,14 +398,14 @@ SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every; end function generate_adaptive_loss_function(pinnrep::PINNRepresentation, - adaloss::IDAdaptiveLoss, + adaloss::InverseDirichletAdaptiveLoss, pde_loss_functions, bc_loss_functions) weight_change_inertia = adaloss.weight_change_inertia iteration = pinnrep.iteration adaloss_T = eltype(adaloss.pde_loss_weights) function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses) - if iteration[1] % adaloss.reweight_every == 0 + if iteration[1] % adaloss.reweight_steps == 0 # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1]) for pde_loss_function in pde_loss_functions] diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index d334ad2f9a..f0b6403605 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -11,11 +11,11 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight bc_loss_weights = 1) adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1, +ntk_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3, +id_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1) -adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss] +adaptive_losses = [nonadaptive_loss, ntk_loss] maxiters = 4000 seed = 60 From 33d5ad2fd3959bb47b686286a821494560e4e828 Mon Sep 17 00:00:00 2001 From: Sagar Date: Sun, 2 Apr 2023 20:37:47 +0530 Subject: [PATCH 5/9] Added Neural Tangent Kernel and Inverse Dirichlet Adaptive Loss functions --- src/adaptive_losses.jl | 42 +++++++++++++++++++++++++------------ test/adaptive_loss_tests.jl | 8 +++---- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index 1bd786d3ed..825152667d 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -272,14 +272,14 @@ https://arxiv.org/pdf/2007.14527.pdf mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real, PDE_OPT <: Flux.Optimise.AbstractOptimiser, BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss - reweight_steps::Int64 + reweight_every::Int64 pde_max_optimiser::PDE_OPT bc_max_optimiser::BC_OPT pde_loss_weights::Vector{T} bc_loss_weights::Vector{T} additional_loss_weights::Vector{T} SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss{T, - PDE_OPT, BC_OPT}(reweight_steps; + PDE_OPT, BC_OPT}(reweight_every; pde_max_optimiser = Flux.ADAM(1e-4), bc_max_optimiser = Flux.ADAM(0.5), pde_loss_weights = 1, @@ -292,14 +292,14 @@ mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real, BC_OPT <: Flux.Optimise.AbstractOptimiser } - new(convert(Int64, reweight_steps), convert(PDE_OPT, pde_max_optimiser), + new(convert(Int64, reweight_every), convert(PDE_OPT, pde_max_optimiser), convert(BC_OPT, bc_max_optimiser), vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), vectorify(additional_loss_weights, T)) end end -SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps; +SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_every; pde_max_optimiser = Flux.ADAM(1e-4), bc_max_optimiser = Flux.ADAM(0.5), pde_loss_weights = 1, @@ -313,7 +313,7 @@ SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps; Flux.Optimise.AbstractOptimiser } NeuralTangentKernelAdaptiveLoss{Float64, typeof(pde_max_optimiser), - typeof(bc_max_optimiser)}(reweight_steps; + typeof(bc_max_optimiser)}(reweight_every; pde_max_optimiser = pde_max_optimiser, bc_max_optimiser = bc_max_optimiser, pde_loss_weights = pde_loss_weights, @@ -331,7 +331,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, adaloss_T = eltype(adaloss.pde_loss_weights) function run_loss_ntk_adaptive_loss(θ, pde_losses, bc_losses) - if iteration[1] % adaloss.reweight_steps == 0 + if iteration[1] % adaloss.reweight_every == 0 bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1])) for bc_loss_function in bc_loss_functions] bc_trace = sum(bc_vec) @@ -340,8 +340,13 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, pde_trace = sum(pde_vec) adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace ) ./ (bc_trace) - adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace ) ./ (pde_trace) + adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace ) ./ (pde_trace) + logvector(pinnrep.logger, adaloss.pde_loss_weights, + "adaptive_loss/pde_loss_weights", iteration[1]) + logvector(pinnrep.logger, adaloss.bc_loss_weights, + "adaptive_loss/bc_loss_weights", + iteration[1]) end nothing end @@ -366,12 +371,12 @@ https://github.com/mosaic-group/inverse-dirichlet-pinn """ mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss - reweight_steps::Int64 + reweight_every::Int64 weight_change_inertia::T pde_loss_weights::Vector{T} bc_loss_weights::Vector{T} additional_loss_weights::Vector{T} - SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_steps; + SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every; weight_change_inertia = 0.9, pde_loss_weights = 1, bc_loss_weights = 1, @@ -379,18 +384,18 @@ mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss T <: Real } - new(convert(Int64, reweight_steps), convert(T, weight_change_inertia), + new(convert(Int64, reweight_every), convert(T, weight_change_inertia), vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), vectorify(additional_loss_weights, T)) end end # default to Float64 -SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_steps; +SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every; weight_change_inertia = 0.9, pde_loss_weights = 1, bc_loss_weights = 1, additional_loss_weights = 1) - InverseDirichletAdaptiveLoss{Float64}(reweight_steps; + InverseDirichletAdaptiveLoss{Float64}(reweight_every; weight_change_inertia = weight_change_inertia, pde_loss_weights = pde_loss_weights, bc_loss_weights = bc_loss_weights, @@ -405,7 +410,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, adaloss_T = eltype(adaloss.pde_loss_weights) function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses) - if iteration[1] % adaloss.reweight_steps == 0 + if iteration[1] % adaloss.reweight_every == 0 # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1]) for pde_loss_function in pde_loss_functions] @@ -430,6 +435,17 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, adaloss.bc_loss_weights .+ (1 .- weight_change_inertia) .* bc_loss_weights_proposed + + logscalar(pinn.logger, gamma, + "adaptive_loss/gamma", iteration[1]) + logvector(pinn.logger, pde_stds, + "adaptive_loss/pde_stds", iteration[1]) + logvector(pinn.logger, bc_stds, + "adaptive_loss/bc_stds", iteration[1]) + logvector(pinn.logger, adaloss.bc_loss_weights, + "adaptive_loss/bc_loss_weights", iteration[1]) + logvector(pinn.logger, adaloss.pde_loss_weights, + "adaptive_loss/pde_loss_weights", iteration[1]) end nothing end diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index f0b6403605..24f74885fe 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -9,13 +9,13 @@ import Lux nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1) gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1) -adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, +minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -ntk_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, +neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -id_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, +inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1) -adaptive_losses = [nonadaptive_loss, ntk_loss] +adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss] maxiters = 4000 seed = 60 From 96f3bb594a7eb4673d7d9b8f95e5ef91864a94d8 Mon Sep 17 00:00:00 2001 From: Sagar Date: Mon, 3 Apr 2023 08:50:09 +0530 Subject: [PATCH 6/9] Added Neural Tangent Kernel and Inverse Dirichlet Adaptive Loss functions --- src/adaptive_losses.jl | 10 +++++----- test/adaptive_loss_tests.jl | 6 ++++++ 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index 825152667d..9956341073 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -436,15 +436,15 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, (1 .- weight_change_inertia) .* bc_loss_weights_proposed - logscalar(pinn.logger, gamma, + logscalar(pinnrep.logger, gamma, "adaptive_loss/gamma", iteration[1]) - logvector(pinn.logger, pde_stds, + logvector(pinnrep.logger, pde_stds, "adaptive_loss/pde_stds", iteration[1]) - logvector(pinn.logger, bc_stds, + logvector(pinnrep.logger, bc_stds, "adaptive_loss/bc_stds", iteration[1]) - logvector(pinn.logger, adaloss.bc_loss_weights, + logvector(pinnrep.logger, adaloss.bc_loss_weights, "adaptive_loss/bc_loss_weights", iteration[1]) - logvector(pinn.logger, adaloss.pde_loss_weights, + logvector(pinnrep.logger, adaloss.pde_loss_weights, "adaptive_loss/pde_loss_weights", iteration[1]) end nothing diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index 24f74885fe..136b5137a8 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -16,6 +16,7 @@ neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_lo inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, bc_loss_weights = 1) adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss] +# adaptive_losses = [neuraltangentkernel_loss, inversedirichlet_loss] maxiters = 4000 seed = 60 @@ -94,11 +95,16 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s @show error_results_no_logs[1][:total_diff_rel] @show error_results_no_logs[2][:total_diff_rel] @show error_results_no_logs[3][:total_diff_rel] +@show error_results_no_logs[4][:total_diff_rel] +@show error_results_no_logs[5][:total_diff_rel] + # accuracy tests, these work for this specific seed but might not for others # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent @test error_results_no_logs[1][:total_diff_rel] < 0.4 @test error_results_no_logs[2][:total_diff_rel] < 0.4 @test error_results_no_logs[3][:total_diff_rel] < 0.4 +@test error_results_no_logs[4][:total_diff_rel] < 0.4 +@test error_results_no_logs[5][:total_diff_rel] < 0.4 #plots_diffs[1][:plot] #plots_diffs[2][:plot] From 10da18afef99a1fd719faf4a0112124f70e4622c Mon Sep 17 00:00:00 2001 From: Sagar Date: Mon, 3 Apr 2023 09:23:51 +0530 Subject: [PATCH 7/9] Added Neural Tangent Kernel Adaptive Loss function --- src/adaptive_losses.jl | 121 +++++------------------------------- test/adaptive_loss_tests.jl | 9 +-- 2 files changed, 16 insertions(+), 114 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index 9956341073..c07d9e6d78 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -263,12 +263,24 @@ end """ Neural Tangent Kernel Adaptive Loss +``` julla +NeuralTangentKernelAdaptiveLoss(reweight_every; + pde_max_optimiser = Flux.ADAM(1e-4), + bc_max_optimiser = Flux.ADAM(0.5), + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) +``` + +A way of adaptively reweighing the components of the loss function by using the +values of the Jacobian of the +NTK predictions at the current point (infinite width assumption). + +#References : -Adapted from the paper When and Why PINNs Fail to Train: A Neural Tangent kernel perspective https://arxiv.org/pdf/2007.14527.pdf """ - mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real, PDE_OPT <: Flux.Optimise.AbstractOptimiser, BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss @@ -351,108 +363,3 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, nothing end end - - - - - - - - - -""" -Inverse Dirichlet Adaptive Loss - -Inverse Dirichlet weighting enables reliable training of physics informed neural networks -Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini -https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf -with code reference -https://github.com/mosaic-group/inverse-dirichlet-pinn -""" - -mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss - reweight_every::Int64 - weight_change_inertia::T - pde_loss_weights::Vector{T} - bc_loss_weights::Vector{T} - additional_loss_weights::Vector{T} - SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every; - weight_change_inertia = 0.9, - pde_loss_weights = 1, - bc_loss_weights = 1, - additional_loss_weights = 1) where { - T <: - Real - } - new(convert(Int64, reweight_every), convert(T, weight_change_inertia), - vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), - vectorify(additional_loss_weights, T)) - end -end -# default to Float64 -SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every; - weight_change_inertia = 0.9, - pde_loss_weights = 1, - bc_loss_weights = 1, - additional_loss_weights = 1) - InverseDirichletAdaptiveLoss{Float64}(reweight_every; - weight_change_inertia = weight_change_inertia, - pde_loss_weights = pde_loss_weights, - bc_loss_weights = bc_loss_weights, - additional_loss_weights = additional_loss_weights) -end - -function generate_adaptive_loss_function(pinnrep::PINNRepresentation, - adaloss::InverseDirichletAdaptiveLoss, - pde_loss_functions, bc_loss_functions) - weight_change_inertia = adaloss.weight_change_inertia - iteration = pinnrep.iteration - adaloss_T = eltype(adaloss.pde_loss_weights) - - function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses) - if iteration[1] % adaloss.reweight_every == 0 - # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function - pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1]) - for pde_loss_function in pde_loss_functions] - pde_std_max = maximum(pde_stds) - bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1]) - for bc_loss_function in bc_loss_functions] - bc_std_max = maximum(bc_stds) - gamma = max(pde_std_max, bc_std_max) - - bc_loss_weights_proposed = gamma ./ (bc_stds) - adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+ - (1 .- weight_change_inertia) .* bc_loss_weights_proposed - - pde_loss_weights_proposed = gamma ./ (pde_stds) - adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+ - (1 .- weight_change_inertia) .* pde_loss_weights_proposed - nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) : - convert(adaloss_T, 1e-7) - bc_loss_weights_proposed = gamma ./ - (bc_stds) - adaloss.bc_loss_weights .= weight_change_inertia .* - adaloss.bc_loss_weights .+ - (1 .- weight_change_inertia) .* - bc_loss_weights_proposed - - logscalar(pinnrep.logger, gamma, - "adaptive_loss/gamma", iteration[1]) - logvector(pinnrep.logger, pde_stds, - "adaptive_loss/pde_stds", iteration[1]) - logvector(pinnrep.logger, bc_stds, - "adaptive_loss/bc_stds", iteration[1]) - logvector(pinnrep.logger, adaloss.bc_loss_weights, - "adaptive_loss/bc_loss_weights", iteration[1]) - logvector(pinnrep.logger, adaloss.pde_loss_weights, - "adaptive_loss/pde_loss_weights", iteration[1]) - end - nothing - end -end - - - - - - diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index 136b5137a8..069ae6d073 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -11,12 +11,9 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight bc_loss_weights = 1) minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, +neuraltangentkerneladaptive_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, - bc_loss_weights = 1) -adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss] -# adaptive_losses = [neuraltangentkernel_loss, inversedirichlet_loss] +adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkerneladaptive_loss] maxiters = 4000 seed = 60 @@ -96,7 +93,6 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s @show error_results_no_logs[2][:total_diff_rel] @show error_results_no_logs[3][:total_diff_rel] @show error_results_no_logs[4][:total_diff_rel] -@show error_results_no_logs[5][:total_diff_rel] # accuracy tests, these work for this specific seed but might not for others # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent @@ -104,7 +100,6 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s @test error_results_no_logs[2][:total_diff_rel] < 0.4 @test error_results_no_logs[3][:total_diff_rel] < 0.4 @test error_results_no_logs[4][:total_diff_rel] < 0.4 -@test error_results_no_logs[5][:total_diff_rel] < 0.4 #plots_diffs[1][:plot] #plots_diffs[2][:plot] From bfb7c6226203e3b7bb8012e281666225360a9e2c Mon Sep 17 00:00:00 2001 From: Sagar Date: Mon, 3 Apr 2023 10:02:16 +0530 Subject: [PATCH 8/9] Formatted to SciML codebase style --- Project.toml | 2 -- src/NeuralPDE.jl | 1 - test/adaptive_loss_tests.jl | 1 - 3 files changed, 4 deletions(-) diff --git a/Project.toml b/Project.toml index aa145eec3a..5890628584 100644 --- a/Project.toml +++ b/Project.toml @@ -23,12 +23,10 @@ ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" -Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl index 6fdd6d9c6c..a2ae343e5e 100644 --- a/src/NeuralPDE.jl +++ b/src/NeuralPDE.jl @@ -47,7 +47,6 @@ include("transform_inf_integral.jl") include("discretize.jl") include("neural_adapter.jl") - export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE, KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem, KolmogorovParamDomain, NNParamKolmogorov, diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index 069ae6d073..dbbdea6f1b 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -3,7 +3,6 @@ using Test, NeuralPDE import ModelingToolkit: Interval, infimum, supremum using DomainSets using Random -using Plots, Revise import Lux nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1) From 1f405403054a00f6b83a904ecd299449351f90a3 Mon Sep 17 00:00:00 2001 From: Sagar Date: Mon, 3 Apr 2023 10:38:58 +0530 Subject: [PATCH 9/9] Added Inverse Dirichlet Loss Adaptive function --- src/adaptive_losses.jl | 96 ++++++++++++++++++++++++++++++++++++- test/adaptive_loss_tests.jl | 7 ++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl index c07d9e6d78..5e02caeb46 100644 --- a/src/adaptive_losses.jl +++ b/src/adaptive_losses.jl @@ -258,8 +258,102 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, end end +""" +Inverse Dirichlet Adaptive Loss +```julia +function InverseDirichletAdaptiveLoss(reweight_every; + weight_change_inertia = 0.5, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) +``` + +Inverse Dirichlet weighting enables reliable training of physics informed neural networks +Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini +https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf +with code reference +https://github.com/mosaic-group/inverse-dirichlet-pinn +""" +mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss + reweight_every::Int64 + weight_change_inertia::T + pde_loss_weights::Vector{T} + bc_loss_weights::Vector{T} + additional_loss_weights::Vector{T} + SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every; + weight_change_inertia = 0.5, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) where { + T <: + Real + } + new(convert(Int64, reweight_every), convert(T, weight_change_inertia), + vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T), + vectorify(additional_loss_weights, T)) + end +end +# default to Float64 +SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every; + weight_change_inertia = 0.5, + pde_loss_weights = 1, + bc_loss_weights = 1, + additional_loss_weights = 1) + InverseDirichletAdaptiveLoss{Float64}(reweight_every; + weight_change_inertia = weight_change_inertia, + pde_loss_weights = pde_loss_weights, + bc_loss_weights = bc_loss_weights, + additional_loss_weights = additional_loss_weights) +end +function generate_adaptive_loss_function(pinnrep::PINNRepresentation, + adaloss::InverseDirichletAdaptiveLoss, + pde_loss_functions, bc_loss_functions) + weight_change_inertia = adaloss.weight_change_inertia + iteration = pinnrep.iteration + adaloss_T = eltype(adaloss.pde_loss_weights) + function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses) + if iteration[1] % adaloss.reweight_every == 0 + # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function + pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1]) + for pde_loss_function in pde_loss_functions] + pde_std_max = maximum(pde_stds) + bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1]) + for bc_loss_function in bc_loss_functions] + bc_std_max = maximum(bc_stds) + gamma = max(pde_std_max, bc_std_max) + + bc_loss_weights_proposed = gamma ./ (bc_stds) + adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+ + (1 .- weight_change_inertia) .* bc_loss_weights_proposed + + pde_loss_weights_proposed = gamma ./ (pde_stds) + adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+ + (1 .- weight_change_inertia) .* pde_loss_weights_proposed + nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) : + convert(adaloss_T, 1e-7) + bc_loss_weights_proposed = gamma ./ + (bc_stds) + adaloss.bc_loss_weights .= weight_change_inertia .* + adaloss.bc_loss_weights .+ + (1 .- weight_change_inertia) .* + bc_loss_weights_proposed + + logscalar(pinnrep.logger, gamma, + "adaptive_loss/gamma", iteration[1]) + logvector(pinnrep.logger, pde_stds, + "adaptive_loss/pde_stds", iteration[1]) + logvector(pinnrep.logger, bc_stds, + "adaptive_loss/bc_stds", iteration[1]) + logvector(pinnrep.logger, adaloss.bc_loss_weights, + "adaptive_loss/bc_loss_weights", iteration[1]) + logvector(pinnrep.logger, adaloss.pde_loss_weights, + "adaptive_loss/pde_loss_weights", iteration[1]) + end + nothing + end +end """ Neural Tangent Kernel Adaptive Loss @@ -362,4 +456,4 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation, end nothing end -end +end \ No newline at end of file diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl index dbbdea6f1b..02b6443a84 100644 --- a/test/adaptive_loss_tests.jl +++ b/test/adaptive_loss_tests.jl @@ -10,9 +10,12 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight bc_loss_weights = 1) minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) +inversedirichletadaptive_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3, + bc_loss_weights = 1) neuraltangentkerneladaptive_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1, bc_loss_weights = 1) -adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkerneladaptive_loss] +adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, + inversedirichletadaptive_loss, neuraltangentkerneladaptive_loss, ] maxiters = 4000 seed = 60 @@ -92,6 +95,7 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s @show error_results_no_logs[2][:total_diff_rel] @show error_results_no_logs[3][:total_diff_rel] @show error_results_no_logs[4][:total_diff_rel] +@show error_results_no_logs[5][:total_diff_rel] # accuracy tests, these work for this specific seed but might not for others # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent @@ -99,6 +103,7 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s @test error_results_no_logs[2][:total_diff_rel] < 0.4 @test error_results_no_logs[3][:total_diff_rel] < 0.4 @test error_results_no_logs[4][:total_diff_rel] < 0.4 +@test error_results_no_logs[5][:total_diff_rel] < 0.4 #plots_diffs[1][:plot] #plots_diffs[2][:plot]