From f17686c013816036088e561d11b29ddfbc01d383 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Sat, 1 Apr 2023 12:18:03 +0530
Subject: [PATCH 1/9]  T1

 some implementations v1
---
 Project.toml         |  2 ++
 dummy/basic_eg.jl    | 57 ++++++++++++++++++++++++++++++++++++++++++++
 dummy/basic_nnode.jl | 17 +++++++++++++
 dummy/flux_eg.jl     |  0
 dummy/t1.jl          | 38 +++++++++++++++++++++++++++++
 dummy/t2.jl          |  5 ++++
 dummy/t3.jl          |  6 +++++
 7 files changed, 125 insertions(+)
 create mode 100644 dummy/basic_eg.jl
 create mode 100644 dummy/basic_nnode.jl
 create mode 100644 dummy/flux_eg.jl
 create mode 100644 dummy/t1.jl
 create mode 100644 dummy/t2.jl
 create mode 100644 dummy/t3.jl

diff --git a/Project.toml b/Project.toml
index 5890628584..aa145eec3a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,10 +23,12 @@ ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/dummy/basic_eg.jl b/dummy/basic_eg.jl
new file mode 100644
index 0000000000..b99c5419fc
--- /dev/null
+++ b/dummy/basic_eg.jl
@@ -0,0 +1,57 @@
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers
+import ModelingToolkit: Interval, infimum, supremum
+
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0]
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+# Discretization
+dx = 0.1
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
+
+discretization = PhysicsInformedNN(chain, QuadratureTraining())
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+prob = discretize(pde_system, discretization)
+
+callback = function (p, l)
+    println("Current loss is: $l")
+    return false
+end
+
+res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000)
+prob = remake(prob, u0 = res.minimizer)
+res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000)
+phi = discretization.phi
+
+
+
+
+
+xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains]
+analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
+
+u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
+                    (length(xs), length(ys)))
+u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
+                 (length(xs), length(ys)))
+diff_u = abs.(u_predict .- u_real)
+
+using Plots
+p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic");
+p2 = plot(xs, ys, u_predict, linetype = :contourf, title = "predict");
+p3 = plot(xs, ys, diff_u, linetype = :contourf, title = "error");
+plot(p1, p2, p3)
\ No newline at end of file
diff --git a/dummy/basic_nnode.jl b/dummy/basic_nnode.jl
new file mode 100644
index 0000000000..0b8cb5eec1
--- /dev/null
+++ b/dummy/basic_nnode.jl
@@ -0,0 +1,17 @@
+using NeuralPDE, Flux, OptimizationOptimisers
+
+linear(u, p, t) = cos(2pi * t)
+tspan = (0.0f0, 1.0f0)
+u0 = 0.0f0
+prob = ODEProblem(linear, u0, tspan)
+
+
+chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1))
+
+
+opt = OptimizationOptimisers.Adam(0.1)
+alg = NeuralPDE.NNODE(chain, opt)
+
+
+sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, abstol = 1.0f-6,
+            maxiters = 200)
\ No newline at end of file
diff --git a/dummy/flux_eg.jl b/dummy/flux_eg.jl
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dummy/t1.jl b/dummy/t1.jl
new file mode 100644
index 0000000000..a9baff0506
--- /dev/null
+++ b/dummy/t1.jl
@@ -0,0 +1,38 @@
+using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers
+import ModelingToolkit: Interval, infimum, supremum
+ 
+@parameters x y
+@variables u(..)
+Dxx = Differential(x)^2
+Dyy = Differential(y)^2
+
+# 2D PDE
+eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
+
+# Boundary conditions
+bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0,
+    u(x, 0) ~ 0.0, u(x, 1) ~ 0]
+# Space and time domains
+domains = [x ∈ Interval(0.0, 1.0),
+    y ∈ Interval(0.0, 1.0)]
+# Discretization
+dx = 0.1
+
+# Neural network
+dim = 2 # number of dimensions
+chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
+
+discretization = PhysicsInformedNN(chain, QuadratureTraining())
+
+@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
+prob = discretize(pde_system, discretization)
+
+callback = function (p, l)
+    println("Current loss is: $l")
+    return false
+end
+
+res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000)
+prob = remake(prob, u0 = res.minimizer)
+res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000)
+phi = discretization.phi
\ No newline at end of file
diff --git a/dummy/t2.jl b/dummy/t2.jl
new file mode 100644
index 0000000000..37e6201ebd
--- /dev/null
+++ b/dummy/t2.jl
@@ -0,0 +1,5 @@
+using Flux, Tracker
+x = [0.8; 0.8]
+ann = Chain(Dense(2, 10, tanh), Dense(10, 1))
+p, re = Flux.destructure(ann)
+z = re(Float64(p))
\ No newline at end of file
diff --git a/dummy/t3.jl b/dummy/t3.jl
new file mode 100644
index 0000000000..0545e777a8
--- /dev/null
+++ b/dummy/t3.jl
@@ -0,0 +1,6 @@
+using NeuralPDE, Flux, OptimizationOptimisers
+
+linear(u, p, t) = cos(2pi * t)
+tspan = (0.0f0, 1.0f0)
+u0 = 0.0f0
+prob = ODEProblem(linear, u0, tspan)
\ No newline at end of file

From 17e791f7671d7fc3bc0d96f7b5ccc679bb10bd4c Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Sat, 1 Apr 2023 19:06:00 +0530
Subject: [PATCH 2/9] Added Inverse Dirichlet Loss and Neural Tangent Loss
 implementations

---
 dummy/basic_eg.jl           |  57 -----------
 dummy/basic_nnode.jl        |  17 ----
 dummy/flux_eg.jl            |   0
 dummy/t1.jl                 |  38 --------
 dummy/t2.jl                 |   5 -
 dummy/t3.jl                 |   6 --
 src/NeuralPDE.jl            |   2 +
 src/adaptive_losses.jl      | 188 ++++++++++++++++++++++++++++++++++++
 test/adaptive_loss_tests.jl |   8 +-
 9 files changed, 196 insertions(+), 125 deletions(-)
 delete mode 100644 dummy/basic_eg.jl
 delete mode 100644 dummy/basic_nnode.jl
 delete mode 100644 dummy/flux_eg.jl
 delete mode 100644 dummy/t1.jl
 delete mode 100644 dummy/t2.jl
 delete mode 100644 dummy/t3.jl

diff --git a/dummy/basic_eg.jl b/dummy/basic_eg.jl
deleted file mode 100644
index b99c5419fc..0000000000
--- a/dummy/basic_eg.jl
+++ /dev/null
@@ -1,57 +0,0 @@
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers
-import ModelingToolkit: Interval, infimum, supremum
-
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0]
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-# Discretization
-dx = 0.1
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
-
-discretization = PhysicsInformedNN(chain, QuadratureTraining())
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = discretize(pde_system, discretization)
-
-callback = function (p, l)
-    println("Current loss is: $l")
-    return false
-end
-
-res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000)
-phi = discretization.phi
-
-
-
-
-
-xs, ys = [infimum(d.domain):(dx / 10):supremum(d.domain) for d in domains]
-analytic_sol_func(x, y) = (sin(pi * x) * sin(pi * y)) / (2pi^2)
-
-u_predict = reshape([first(phi([x, y], res.minimizer)) for x in xs for y in ys],
-                    (length(xs), length(ys)))
-u_real = reshape([analytic_sol_func(x, y) for x in xs for y in ys],
-                 (length(xs), length(ys)))
-diff_u = abs.(u_predict .- u_real)
-
-using Plots
-p1 = plot(xs, ys, u_real, linetype = :contourf, title = "analytic");
-p2 = plot(xs, ys, u_predict, linetype = :contourf, title = "predict");
-p3 = plot(xs, ys, diff_u, linetype = :contourf, title = "error");
-plot(p1, p2, p3)
\ No newline at end of file
diff --git a/dummy/basic_nnode.jl b/dummy/basic_nnode.jl
deleted file mode 100644
index 0b8cb5eec1..0000000000
--- a/dummy/basic_nnode.jl
+++ /dev/null
@@ -1,17 +0,0 @@
-using NeuralPDE, Flux, OptimizationOptimisers
-
-linear(u, p, t) = cos(2pi * t)
-tspan = (0.0f0, 1.0f0)
-u0 = 0.0f0
-prob = ODEProblem(linear, u0, tspan)
-
-
-chain = Flux.Chain(Dense(1, 5, σ), Dense(5, 1))
-
-
-opt = OptimizationOptimisers.Adam(0.1)
-alg = NeuralPDE.NNODE(chain, opt)
-
-
-sol = solve(prob, NeuralPDE.NNODE(chain, opt), verbose = true, abstol = 1.0f-6,
-            maxiters = 200)
\ No newline at end of file
diff --git a/dummy/flux_eg.jl b/dummy/flux_eg.jl
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/dummy/t1.jl b/dummy/t1.jl
deleted file mode 100644
index a9baff0506..0000000000
--- a/dummy/t1.jl
+++ /dev/null
@@ -1,38 +0,0 @@
-using NeuralPDE, Lux, ModelingToolkit, Optimization, OptimizationOptimisers
-import ModelingToolkit: Interval, infimum, supremum
- 
-@parameters x y
-@variables u(..)
-Dxx = Differential(x)^2
-Dyy = Differential(y)^2
-
-# 2D PDE
-eq = Dxx(u(x, y)) + Dyy(u(x, y)) ~ -sin(pi * x) * sin(pi * y)
-
-# Boundary conditions
-bcs = [u(0, y) ~ 0.0, u(1, y) ~ 0,
-    u(x, 0) ~ 0.0, u(x, 1) ~ 0]
-# Space and time domains
-domains = [x ∈ Interval(0.0, 1.0),
-    y ∈ Interval(0.0, 1.0)]
-# Discretization
-dx = 0.1
-
-# Neural network
-dim = 2 # number of dimensions
-chain = Lux.Chain(Dense(dim, 16, Lux.σ), Dense(16, 16, Lux.σ), Dense(16, 1))
-
-discretization = PhysicsInformedNN(chain, QuadratureTraining())
-
-@named pde_system = PDESystem(eq, bcs, domains, [x, y], [u(x, y)])
-prob = discretize(pde_system, discretization)
-
-callback = function (p, l)
-    println("Current loss is: $l")
-    return false
-end
-
-res = Optimization.solve(prob, ADAM(0.1); callback = callback, maxiters = 4000)
-prob = remake(prob, u0 = res.minimizer)
-res = Optimization.solve(prob, ADAM(0.01); callback = callback, maxiters = 2000)
-phi = discretization.phi
\ No newline at end of file
diff --git a/dummy/t2.jl b/dummy/t2.jl
deleted file mode 100644
index 37e6201ebd..0000000000
--- a/dummy/t2.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-using Flux, Tracker
-x = [0.8; 0.8]
-ann = Chain(Dense(2, 10, tanh), Dense(10, 1))
-p, re = Flux.destructure(ann)
-z = re(Float64(p))
\ No newline at end of file
diff --git a/dummy/t3.jl b/dummy/t3.jl
deleted file mode 100644
index 0545e777a8..0000000000
--- a/dummy/t3.jl
+++ /dev/null
@@ -1,6 +0,0 @@
-using NeuralPDE, Flux, OptimizationOptimisers
-
-linear(u, p, t) = cos(2pi * t)
-tspan = (0.0f0, 1.0f0)
-u0 = 0.0f0
-prob = ODEProblem(linear, u0, tspan)
\ No newline at end of file
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 81051eb916..6fdd6d9c6c 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -17,6 +17,7 @@ using RuntimeGeneratedFunctions
 using SciMLBase
 using Statistics
 using ArrayInterface
+using LinearAlgebra
 import Optim
 using DomainSets
 using Symbolics
@@ -46,6 +47,7 @@ include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
 
+
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
        KolmogorovParamDomain, NNParamKolmogorov,
diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index b37023da7c..e471be0dbc 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -257,3 +257,191 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
         nothing
     end
 end
+
+
+
+
+"""
+NTK Adaptive Loss
+"""
+
+mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_every::Int64
+    weight_change_inertia::T
+    pde_loss_weights::Vector{T}
+    bc_loss_weights::Vector{T}
+    additional_loss_weights::Vector{T}
+    SciMLBase.@add_kwonly function NTKAdaptiveLoss{T}(reweight_every;
+                                                                weight_change_inertia = 0.9,
+                                                                pde_loss_weights = 1,
+                                                                bc_loss_weights = 1,
+                                                                additional_loss_weights = 1) where {
+                                                                                                    T <:
+                                                                                                    Real
+                                                                                                    }
+        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
+            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+            vectorify(additional_loss_weights, T))
+    end
+end
+# default to Float64
+SciMLBase.@add_kwonly function NTKAdaptiveLoss(reweight_every;
+                                                         weight_change_inertia = 0.9,
+                                                         pde_loss_weights = 1,
+                                                         bc_loss_weights = 1,
+                                                         additional_loss_weights = 1)
+    NTKAdaptiveLoss{Float64}(reweight_every;
+                                       weight_change_inertia = weight_change_inertia,
+                                       pde_loss_weights = pde_loss_weights,
+                                       bc_loss_weights = bc_loss_weights,
+                                       additional_loss_weights = additional_loss_weights)
+end
+
+function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
+                                         adaloss::NTKAdaptiveLoss,
+                                         pde_loss_functions, bc_loss_functions)
+    weight_change_inertia = adaloss.weight_change_inertia
+    iteration = pinnrep.iteration
+    adaloss_T = eltype(adaloss.pde_loss_weights)
+
+    function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses)
+        if iteration[1] % adaloss.reweight_every == 0
+            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
+            # pde_grads_maxes = [maximum(abs.(Zygote.gradient(pde_loss_function, θ)[1]))
+            #                    for pde_loss_function in pde_loss_functions]
+            # pde_grads_max = maximum(pde_grads_maxes)
+            # bc_grads_mean = [mean(abs.(Zygote.gradient(bc_loss_function, θ)[1]))
+            #                  for bc_loss_function in bc_loss_functions]
+
+            # nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
+            #                       convert(adaloss_T, 1e-7)
+            # bc_loss_weights_proposed = pde_grads_max ./
+            #                            (bc_grads_mean .+ nonzero_divisor_eps)
+            # adaloss.bc_loss_weights .= weight_change_inertia .*
+            #                            adaloss.bc_loss_weights .+
+            #                            (1 .- weight_change_inertia) .*
+            #                            bc_loss_weights_proposed
+
+
+            bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1]))
+                            for bc_loss_function in bc_loss_functions]
+            bc_trace = sum(bc_vec)
+            pde_vec = [dot(vec(Zygote.gradient(pde_loss_function, θ)[1]), vec(Zygote.gradient(pde_loss_function, θ)[1]))
+                            for pde_loss_function in pde_loss_functions]
+            pde_trace = sum(pde_vec)
+            
+            adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace )  ./ (bc_trace)
+            adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace )  ./ (pde_trace) 
+                        
+
+            # logscalar(pinnrep.logger, pde_trace, "adaptive_loss/pde_grad_max",
+            #           iteration[1])
+            # logvector(pinnrep.logger, bc_trace, "adaptive_loss/bc_trace",
+            #           iteration[1])
+            # logvector(pinnrep.logger, adaloss.pde_loss_weights,
+            #           "adaptive_loss/pde_loss_weights",
+            #           iteration[1])
+            # logvector(pinnrep.logger, adaloss.bc_loss_weights,
+            #           "adaptive_loss/bc_loss_weights",
+            #           iteration[1])
+        end
+        nothing
+    end
+end
+
+
+
+
+
+
+
+
+
+"""
+Inverse Dirichlet Adaptive Loss
+"""
+
+mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_every::Int64
+    weight_change_inertia::T
+    pde_loss_weights::Vector{T}
+    bc_loss_weights::Vector{T}
+    additional_loss_weights::Vector{T}
+    SciMLBase.@add_kwonly function IDAdaptiveLoss{T}(reweight_every;
+                                                                weight_change_inertia = 0.9,
+                                                                pde_loss_weights = 1,
+                                                                bc_loss_weights = 1,
+                                                                additional_loss_weights = 1) where {
+                                                                                                    T <:
+                                                                                                    Real
+                                                                                                    }
+        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
+            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+            vectorify(additional_loss_weights, T))
+    end
+end
+# default to Float64
+SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every;
+                                                         weight_change_inertia = 0.9,
+                                                         pde_loss_weights = 1,
+                                                         bc_loss_weights = 1,
+                                                         additional_loss_weights = 1)
+    IDAdaptiveLoss{Float64}(reweight_every;
+                                       weight_change_inertia = weight_change_inertia,
+                                       pde_loss_weights = pde_loss_weights,
+                                       bc_loss_weights = bc_loss_weights,
+                                       additional_loss_weights = additional_loss_weights)
+end
+
+function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
+                                         adaloss::IDAdaptiveLoss,
+                                         pde_loss_functions, bc_loss_functions)
+    weight_change_inertia = adaloss.weight_change_inertia
+    iteration = pinnrep.iteration
+    adaloss_T = eltype(adaloss.pde_loss_weights)
+
+    function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses)
+        if iteration[1] % adaloss.reweight_every == 0
+            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
+            pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1])
+                               for pde_loss_function in pde_loss_functions]
+            pde_std_max = maximum(pde_stds)
+            bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1])
+                             for bc_loss_function in bc_loss_functions]
+            bc_std_max = maximum(bc_stds)
+            gamma = max(pde_std_max, bc_std_max)
+            
+            bc_loss_weights_proposed = gamma ./ (bc_stds)
+            adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+
+                                    (1 .- weight_change_inertia) .* bc_loss_weights_proposed
+
+            pde_loss_weights_proposed = gamma ./ (pde_stds)
+            adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+
+                                    (1 .- weight_change_inertia) .* pde_loss_weights_proposed                                   
+            nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
+                                  convert(adaloss_T, 1e-7)
+            bc_loss_weights_proposed = gamma ./
+                                       (bc_stds)
+            adaloss.bc_loss_weights .= weight_change_inertia .*
+                                       adaloss.bc_loss_weights .+
+                                       (1 .- weight_change_inertia) .*
+                                       bc_loss_weights_proposed
+            # logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max",
+            #           iteration[1])
+            # logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes",
+            #           iteration[1])
+            # logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean",
+            #           iteration[1])
+            # logvector(pinnrep.logger, adaloss.bc_loss_weights,
+            #           "adaptive_loss/bc_loss_weights",
+            #           iteration[1])
+        end
+        nothing
+    end
+end
+
+
+
+
+
+
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index bd49783ec4..59e55f671f 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -10,8 +10,12 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight
                                                             bc_loss_weights = 1)
 adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
                                               bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, adaptive_loss]
-maxiters = 4000
+ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1,
+                                         bc_loss_weights = 1)
+id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3,
+                                        bc_loss_weights = 1)
+adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss]
+maxiters = 400
 seed = 60
 
 ## 2D Poisson equation

From c3223c8461edbf6be84e1fefa5ea9857ee6827e0 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Sun, 2 Apr 2023 18:12:21 +0530
Subject: [PATCH 3/9] Added Neural Tangent Kernel and Inverse Dirichlet
 Adaptive Loss Functions

---
 src/adaptive_losses.jl      | 41 ++-----------------------------------
 test/adaptive_loss_tests.jl |  3 ++-
 2 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index e471be0dbc..80f9887dd8 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -262,7 +262,7 @@ end
 
 
 """
-NTK Adaptive Loss
+Neural Tangent Kernel Adaptive Loss
 """
 
 mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
@@ -306,23 +306,6 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
 
     function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses)
         if iteration[1] % adaloss.reweight_every == 0
-            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
-            # pde_grads_maxes = [maximum(abs.(Zygote.gradient(pde_loss_function, θ)[1]))
-            #                    for pde_loss_function in pde_loss_functions]
-            # pde_grads_max = maximum(pde_grads_maxes)
-            # bc_grads_mean = [mean(abs.(Zygote.gradient(bc_loss_function, θ)[1]))
-            #                  for bc_loss_function in bc_loss_functions]
-
-            # nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
-            #                       convert(adaloss_T, 1e-7)
-            # bc_loss_weights_proposed = pde_grads_max ./
-            #                            (bc_grads_mean .+ nonzero_divisor_eps)
-            # adaloss.bc_loss_weights .= weight_change_inertia .*
-            #                            adaloss.bc_loss_weights .+
-            #                            (1 .- weight_change_inertia) .*
-            #                            bc_loss_weights_proposed
-
-
             bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1]))
                             for bc_loss_function in bc_loss_functions]
             bc_trace = sum(bc_vec)
@@ -332,18 +315,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
             
             adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace )  ./ (bc_trace)
             adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace )  ./ (pde_trace) 
-                        
-
-            # logscalar(pinnrep.logger, pde_trace, "adaptive_loss/pde_grad_max",
-            #           iteration[1])
-            # logvector(pinnrep.logger, bc_trace, "adaptive_loss/bc_trace",
-            #           iteration[1])
-            # logvector(pinnrep.logger, adaloss.pde_loss_weights,
-            #           "adaptive_loss/pde_loss_weights",
-            #           iteration[1])
-            # logvector(pinnrep.logger, adaloss.bc_loss_weights,
-            #           "adaptive_loss/bc_loss_weights",
-            #           iteration[1])
+        
         end
         nothing
     end
@@ -426,15 +398,6 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                        adaloss.bc_loss_weights .+
                                        (1 .- weight_change_inertia) .*
                                        bc_loss_weights_proposed
-            # logscalar(pinnrep.logger, pde_grads_max, "adaptive_loss/pde_grad_max",
-            #           iteration[1])
-            # logvector(pinnrep.logger, pde_grads_maxes, "adaptive_loss/pde_grad_maxes",
-            #           iteration[1])
-            # logvector(pinnrep.logger, bc_grads_mean, "adaptive_loss/bc_grad_mean",
-            #           iteration[1])
-            # logvector(pinnrep.logger, adaloss.bc_loss_weights,
-            #           "adaptive_loss/bc_loss_weights",
-            #           iteration[1])
         end
         nothing
     end
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 59e55f671f..d334ad2f9a 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -3,6 +3,7 @@ using Test, NeuralPDE
 import ModelingToolkit: Interval, infimum, supremum
 using DomainSets
 using Random
+using Plots, Revise
 import Lux
 
 nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
@@ -15,7 +16,7 @@ ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1,
 id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3,
                                         bc_loss_weights = 1)
 adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss]
-maxiters = 400
+maxiters = 4000
 seed = 60
 
 ## 2D Poisson equation

From 27a3b93c040d7268fc2328aee4e8186bbebb0e94 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Sun, 2 Apr 2023 19:37:55 +0530
Subject: [PATCH 4/9] Added Neural Tangent Kernel and Inverse Dirichlet
 Adaptive Loss functions

---
 src/adaptive_losses.jl      | 102 +++++++++++++++++++++++-------------
 test/adaptive_loss_tests.jl |   6 +--
 2 files changed, 70 insertions(+), 38 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 80f9887dd8..1bd786d3ed 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -263,49 +263,75 @@ end
 
 """
 Neural Tangent Kernel Adaptive Loss
+
+Adapted from the paper
+When and Why PINNs Fail to Train: A Neural Tangent kernel perspective
+https://arxiv.org/pdf/2007.14527.pdf
 """
 
-mutable struct NTKAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
-    reweight_every::Int64
-    weight_change_inertia::T
+mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real,
+                                   PDE_OPT <: Flux.Optimise.AbstractOptimiser,
+                                   BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss
+    reweight_steps::Int64
+    pde_max_optimiser::PDE_OPT
+    bc_max_optimiser::BC_OPT
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function NTKAdaptiveLoss{T}(reweight_every;
-                                                                weight_change_inertia = 0.9,
-                                                                pde_loss_weights = 1,
-                                                                bc_loss_weights = 1,
-                                                                additional_loss_weights = 1) where {
-                                                                                                    T <:
-                                                                                                    Real
-                                                                                                    }
-        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
+    SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss{T,
+                                                       PDE_OPT, BC_OPT}(reweight_steps;
+                                                                        pde_max_optimiser = Flux.ADAM(1e-4),
+                                                                        bc_max_optimiser = Flux.ADAM(0.5),
+                                                                        pde_loss_weights = 1,
+                                                                        bc_loss_weights = 1,
+                                                                        additional_loss_weights = 1) where {
+                                                                                                            T <:
+                                                                                                            Real,
+                                                                                                            PDE_OPT <:
+                                                                                                            Flux.Optimise.AbstractOptimiser,
+                                                                                                            BC_OPT <:
+                                                                                                            Flux.Optimise.AbstractOptimiser
+                                                                                                            }
+        new(convert(Int64, reweight_steps), convert(PDE_OPT, pde_max_optimiser),
+            convert(BC_OPT, bc_max_optimiser),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
     end
 end
-# default to Float64
-SciMLBase.@add_kwonly function NTKAdaptiveLoss(reweight_every;
-                                                         weight_change_inertia = 0.9,
-                                                         pde_loss_weights = 1,
-                                                         bc_loss_weights = 1,
-                                                         additional_loss_weights = 1)
-    NTKAdaptiveLoss{Float64}(reweight_every;
-                                       weight_change_inertia = weight_change_inertia,
-                                       pde_loss_weights = pde_loss_weights,
-                                       bc_loss_weights = bc_loss_weights,
-                                       additional_loss_weights = additional_loss_weights)
+
+SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps;
+                                                   pde_max_optimiser = Flux.ADAM(1e-4),
+                                                   bc_max_optimiser = Flux.ADAM(0.5),
+                                                   pde_loss_weights = 1,
+                                                   bc_loss_weights = 1,
+                                                   additional_loss_weights = 1) where {
+                                                                                        T <:
+                                                                                        Real,
+                                                                                        PDE_OPT <:
+                                                                                        Flux.Optimise.AbstractOptimiser,
+                                                                                        BC_OPT <:
+                                                                                        Flux.Optimise.AbstractOptimiser
+                                                                                        }
+    NeuralTangentKernelAdaptiveLoss{Float64, typeof(pde_max_optimiser),
+                        typeof(bc_max_optimiser)}(reweight_steps;
+                                                  pde_max_optimiser = pde_max_optimiser,
+                                                  bc_max_optimiser = bc_max_optimiser,
+                                                  pde_loss_weights = pde_loss_weights,
+                                                  bc_loss_weights = bc_loss_weights,
+                                                  additional_loss_weights = additional_loss_weights)
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::NTKAdaptiveLoss,
+                                         adaloss::NeuralTangentKernelAdaptiveLoss,
                                          pde_loss_functions, bc_loss_functions)
-    weight_change_inertia = adaloss.weight_change_inertia
+    
+    pde_max_optimiser = adaloss.pde_max_optimiser
+    bc_max_optimiser = adaloss.bc_max_optimiser
     iteration = pinnrep.iteration
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
-    function run_loss_ntkl_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_every == 0
+    function run_loss_ntk_adaptive_loss(θ, pde_losses, bc_losses)
+        if iteration[1] % adaloss.reweight_steps == 0
             bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1]))
                             for bc_loss_function in bc_loss_functions]
             bc_trace = sum(bc_vec)
@@ -331,15 +357,21 @@ end
 
 """
 Inverse Dirichlet Adaptive Loss
+
+Inverse Dirichlet weighting enables reliable training of physics informed neural networks
+Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini
+https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf
+with code reference
+https://github.com/mosaic-group/inverse-dirichlet-pinn
 """
 
-mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
-    reweight_every::Int64
+mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_steps::Int64
     weight_change_inertia::T
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function IDAdaptiveLoss{T}(reweight_every;
+    SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_steps;
                                                                 weight_change_inertia = 0.9,
                                                                 pde_loss_weights = 1,
                                                                 bc_loss_weights = 1,
@@ -347,18 +379,18 @@ mutable struct IDAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
                                                                                                     T <:
                                                                                                     Real
                                                                                                     }
-        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
+        new(convert(Int64, reweight_steps), convert(T, weight_change_inertia),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
     end
 end
 # default to Float64
-SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every;
+SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_steps;
                                                          weight_change_inertia = 0.9,
                                                          pde_loss_weights = 1,
                                                          bc_loss_weights = 1,
                                                          additional_loss_weights = 1)
-    IDAdaptiveLoss{Float64}(reweight_every;
+    InverseDirichletAdaptiveLoss{Float64}(reweight_steps;
                                        weight_change_inertia = weight_change_inertia,
                                        pde_loss_weights = pde_loss_weights,
                                        bc_loss_weights = bc_loss_weights,
@@ -366,14 +398,14 @@ SciMLBase.@add_kwonly function IDAdaptiveLoss(reweight_every;
 end
 
 function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::IDAdaptiveLoss,
+                                         adaloss::InverseDirichletAdaptiveLoss,
                                          pde_loss_functions, bc_loss_functions)
     weight_change_inertia = adaloss.weight_change_inertia
     iteration = pinnrep.iteration
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
     function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_every == 0
+        if iteration[1] % adaloss.reweight_steps == 0
             # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
             pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1])
                                for pde_loss_function in pde_loss_functions]
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index d334ad2f9a..f0b6403605 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -11,11 +11,11 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight
                                                             bc_loss_weights = 1)
 adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
                                               bc_loss_weights = 1)
-ntk_loss = NeuralPDE.NTKAdaptiveLoss(100; pde_loss_weights = 1,
+ntk_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
                                          bc_loss_weights = 1)
-id_loss = NeuralPDE.IDAdaptiveLoss(100, pde_loss_weights = 1e3,
+id_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
                                         bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, id_loss, ntk_loss]
+adaptive_losses = [nonadaptive_loss, ntk_loss]
 maxiters = 4000
 seed = 60
 

From 33d5ad2fd3959bb47b686286a821494560e4e828 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Sun, 2 Apr 2023 20:37:47 +0530
Subject: [PATCH 5/9] Added Neural Tangent Kernel and Inverse Dirichlet
 Adaptive Loss functions

---
 src/adaptive_losses.jl      | 42 +++++++++++++++++++++++++------------
 test/adaptive_loss_tests.jl |  8 +++----
 2 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 1bd786d3ed..825152667d 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -272,14 +272,14 @@ https://arxiv.org/pdf/2007.14527.pdf
 mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real,
                                    PDE_OPT <: Flux.Optimise.AbstractOptimiser,
                                    BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss
-    reweight_steps::Int64
+    reweight_every::Int64
     pde_max_optimiser::PDE_OPT
     bc_max_optimiser::BC_OPT
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
     SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss{T,
-                                                       PDE_OPT, BC_OPT}(reweight_steps;
+                                                       PDE_OPT, BC_OPT}(reweight_every;
                                                                         pde_max_optimiser = Flux.ADAM(1e-4),
                                                                         bc_max_optimiser = Flux.ADAM(0.5),
                                                                         pde_loss_weights = 1,
@@ -292,14 +292,14 @@ mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real,
                                                                                                             BC_OPT <:
                                                                                                             Flux.Optimise.AbstractOptimiser
                                                                                                             }
-        new(convert(Int64, reweight_steps), convert(PDE_OPT, pde_max_optimiser),
+        new(convert(Int64, reweight_every), convert(PDE_OPT, pde_max_optimiser),
             convert(BC_OPT, bc_max_optimiser),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
     end
 end
 
-SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps;
+SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_every;
                                                    pde_max_optimiser = Flux.ADAM(1e-4),
                                                    bc_max_optimiser = Flux.ADAM(0.5),
                                                    pde_loss_weights = 1,
@@ -313,7 +313,7 @@ SciMLBase.@add_kwonly function NeuralTangentKernelAdaptiveLoss(reweight_xteps;
                                                                                         Flux.Optimise.AbstractOptimiser
                                                                                         }
     NeuralTangentKernelAdaptiveLoss{Float64, typeof(pde_max_optimiser),
-                        typeof(bc_max_optimiser)}(reweight_steps;
+                        typeof(bc_max_optimiser)}(reweight_every;
                                                   pde_max_optimiser = pde_max_optimiser,
                                                   bc_max_optimiser = bc_max_optimiser,
                                                   pde_loss_weights = pde_loss_weights,
@@ -331,7 +331,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
     function run_loss_ntk_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_steps == 0
+        if iteration[1] % adaloss.reweight_every == 0
             bc_vec = [dot(vec(Zygote.gradient(bc_loss_function, θ)[1]), vec(Zygote.gradient(bc_loss_function, θ)[1]))
                             for bc_loss_function in bc_loss_functions]
             bc_trace = sum(bc_vec)
@@ -340,8 +340,13 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
             pde_trace = sum(pde_vec)
             
             adaloss.bc_loss_weights .= ( bc_trace .+ pde_trace )  ./ (bc_trace)
-            adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace )  ./ (pde_trace) 
+            adaloss.pde_loss_weights .= ( bc_trace .+ pde_trace )  ./ (pde_trace)
         
+            logvector(pinnrep.logger, adaloss.pde_loss_weights,
+                      "adaptive_loss/pde_loss_weights", iteration[1])
+            logvector(pinnrep.logger, adaloss.bc_loss_weights,
+                      "adaptive_loss/bc_loss_weights",
+                      iteration[1])
         end
         nothing
     end
@@ -366,12 +371,12 @@ https://github.com/mosaic-group/inverse-dirichlet-pinn
 """
 
 mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
-    reweight_steps::Int64
+    reweight_every::Int64
     weight_change_inertia::T
     pde_loss_weights::Vector{T}
     bc_loss_weights::Vector{T}
     additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_steps;
+    SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every;
                                                                 weight_change_inertia = 0.9,
                                                                 pde_loss_weights = 1,
                                                                 bc_loss_weights = 1,
@@ -379,18 +384,18 @@ mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
                                                                                                     T <:
                                                                                                     Real
                                                                                                     }
-        new(convert(Int64, reweight_steps), convert(T, weight_change_inertia),
+        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
             vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
             vectorify(additional_loss_weights, T))
     end
 end
 # default to Float64
-SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_steps;
+SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every;
                                                          weight_change_inertia = 0.9,
                                                          pde_loss_weights = 1,
                                                          bc_loss_weights = 1,
                                                          additional_loss_weights = 1)
-    InverseDirichletAdaptiveLoss{Float64}(reweight_steps;
+    InverseDirichletAdaptiveLoss{Float64}(reweight_every;
                                        weight_change_inertia = weight_change_inertia,
                                        pde_loss_weights = pde_loss_weights,
                                        bc_loss_weights = bc_loss_weights,
@@ -405,7 +410,7 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
     adaloss_T = eltype(adaloss.pde_loss_weights)
 
     function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_steps == 0
+        if iteration[1] % adaloss.reweight_every == 0
             # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
             pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1])
                                for pde_loss_function in pde_loss_functions]
@@ -430,6 +435,17 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                        adaloss.bc_loss_weights .+
                                        (1 .- weight_change_inertia) .*
                                        bc_loss_weights_proposed
+
+                logscalar(pinn.logger, gamma,
+                         "adaptive_loss/gamma", iteration[1])
+                logvector(pinn.logger, pde_stds,
+                         "adaptive_loss/pde_stds", iteration[1])
+                logvector(pinn.logger, bc_stds,
+                         "adaptive_loss/bc_stds", iteration[1])
+                logvector(pinn.logger, adaloss.bc_loss_weights,
+                         "adaptive_loss/bc_loss_weights", iteration[1])
+                logvector(pinn.logger, adaloss.pde_loss_weights,
+                         "adaptive_loss/pde_loss_weights", iteration[1])
         end
         nothing
     end
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index f0b6403605..24f74885fe 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -9,13 +9,13 @@ import Lux
 nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)
 gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weights = 1e3,
                                                             bc_loss_weights = 1)
-adaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
+minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
                                               bc_loss_weights = 1)
-ntk_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
+neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
                                          bc_loss_weights = 1)
-id_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
+inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
                                         bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, ntk_loss]
+adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss]
 maxiters = 4000
 seed = 60
 

From 96f3bb594a7eb4673d7d9b8f95e5ef91864a94d8 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Mon, 3 Apr 2023 08:50:09 +0530
Subject: [PATCH 6/9] Added Neural Tangent Kernel and Inverse Dirichlet
 Adaptive Loss functions

---
 src/adaptive_losses.jl      | 10 +++++-----
 test/adaptive_loss_tests.jl |  6 ++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 825152667d..9956341073 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -436,15 +436,15 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
                                        (1 .- weight_change_inertia) .*
                                        bc_loss_weights_proposed
 
-                logscalar(pinn.logger, gamma,
+                logscalar(pinnrep.logger, gamma,
                          "adaptive_loss/gamma", iteration[1])
-                logvector(pinn.logger, pde_stds,
+                logvector(pinnrep.logger, pde_stds,
                          "adaptive_loss/pde_stds", iteration[1])
-                logvector(pinn.logger, bc_stds,
+                logvector(pinnrep.logger, bc_stds,
                          "adaptive_loss/bc_stds", iteration[1])
-                logvector(pinn.logger, adaloss.bc_loss_weights,
+                logvector(pinnrep.logger, adaloss.bc_loss_weights,
                          "adaptive_loss/bc_loss_weights", iteration[1])
-                logvector(pinn.logger, adaloss.pde_loss_weights,
+                logvector(pinnrep.logger, adaloss.pde_loss_weights,
                          "adaptive_loss/pde_loss_weights", iteration[1])
         end
         nothing
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 24f74885fe..136b5137a8 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -16,6 +16,7 @@ neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_lo
 inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
                                         bc_loss_weights = 1)
 adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss]
+# adaptive_losses = [neuraltangentkernel_loss, inversedirichlet_loss]
 maxiters = 4000
 seed = 60
 
@@ -94,11 +95,16 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @show error_results_no_logs[1][:total_diff_rel]
 @show error_results_no_logs[2][:total_diff_rel]
 @show error_results_no_logs[3][:total_diff_rel]
+@show error_results_no_logs[4][:total_diff_rel]
+@show error_results_no_logs[5][:total_diff_rel]
+
 # accuracy tests, these work for this specific seed but might not for others
 # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent
 @test error_results_no_logs[1][:total_diff_rel] < 0.4
 @test error_results_no_logs[2][:total_diff_rel] < 0.4
 @test error_results_no_logs[3][:total_diff_rel] < 0.4
+@test error_results_no_logs[4][:total_diff_rel] < 0.4
+@test error_results_no_logs[5][:total_diff_rel] < 0.4
 
 #plots_diffs[1][:plot]
 #plots_diffs[2][:plot]

From 10da18afef99a1fd719faf4a0112124f70e4622c Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Mon, 3 Apr 2023 09:23:51 +0530
Subject: [PATCH 7/9] Added Neural Tangent Kernel Adaptive Loss function

---
 src/adaptive_losses.jl      | 121 +++++-------------------------------
 test/adaptive_loss_tests.jl |   9 +--
 2 files changed, 16 insertions(+), 114 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index 9956341073..c07d9e6d78 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -263,12 +263,24 @@ end
 
 """
 Neural Tangent Kernel Adaptive Loss
+``` julla
+NeuralTangentKernelAdaptiveLoss(reweight_every;
+                                                pde_max_optimiser = Flux.ADAM(1e-4),
+                                                bc_max_optimiser = Flux.ADAM(0.5),
+                                                pde_loss_weights = 1,
+                                                bc_loss_weights = 1,
+                                                additional_loss_weights = 1)
+```
+
+A way of adaptively reweighing the components of the loss function by using the 
+values of the Jacobian of the 
+NTK predictions at the current point (infinite width assumption).
+
+#References :
 
-Adapted from the paper
 When and Why PINNs Fail to Train: A Neural Tangent kernel perspective
 https://arxiv.org/pdf/2007.14527.pdf
 """
-
 mutable struct NeuralTangentKernelAdaptiveLoss{T <: Real,
                                    PDE_OPT <: Flux.Optimise.AbstractOptimiser,
                                    BC_OPT <: Flux.Optimise.AbstractOptimiser} <: AbstractAdaptiveLoss
@@ -351,108 +363,3 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
         nothing
     end
 end
-
-
-
-
-
-
-
-
-
-"""
-Inverse Dirichlet Adaptive Loss
-
-Inverse Dirichlet weighting enables reliable training of physics informed neural networks
-Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini
-https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf
-with code reference
-https://github.com/mosaic-group/inverse-dirichlet-pinn
-"""
-
-mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
-    reweight_every::Int64
-    weight_change_inertia::T
-    pde_loss_weights::Vector{T}
-    bc_loss_weights::Vector{T}
-    additional_loss_weights::Vector{T}
-    SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every;
-                                                                weight_change_inertia = 0.9,
-                                                                pde_loss_weights = 1,
-                                                                bc_loss_weights = 1,
-                                                                additional_loss_weights = 1) where {
-                                                                                                    T <:
-                                                                                                    Real
-                                                                                                    }
-        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
-            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
-            vectorify(additional_loss_weights, T))
-    end
-end
-# default to Float64
-SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every;
-                                                         weight_change_inertia = 0.9,
-                                                         pde_loss_weights = 1,
-                                                         bc_loss_weights = 1,
-                                                         additional_loss_weights = 1)
-    InverseDirichletAdaptiveLoss{Float64}(reweight_every;
-                                       weight_change_inertia = weight_change_inertia,
-                                       pde_loss_weights = pde_loss_weights,
-                                       bc_loss_weights = bc_loss_weights,
-                                       additional_loss_weights = additional_loss_weights)
-end
-
-function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
-                                         adaloss::InverseDirichletAdaptiveLoss,
-                                         pde_loss_functions, bc_loss_functions)
-    weight_change_inertia = adaloss.weight_change_inertia
-    iteration = pinnrep.iteration
-    adaloss_T = eltype(adaloss.pde_loss_weights)
-
-    function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses)
-        if iteration[1] % adaloss.reweight_every == 0
-            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
-            pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1])
-                               for pde_loss_function in pde_loss_functions]
-            pde_std_max = maximum(pde_stds)
-            bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1])
-                             for bc_loss_function in bc_loss_functions]
-            bc_std_max = maximum(bc_stds)
-            gamma = max(pde_std_max, bc_std_max)
-            
-            bc_loss_weights_proposed = gamma ./ (bc_stds)
-            adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+
-                                    (1 .- weight_change_inertia) .* bc_loss_weights_proposed
-
-            pde_loss_weights_proposed = gamma ./ (pde_stds)
-            adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+
-                                    (1 .- weight_change_inertia) .* pde_loss_weights_proposed                                   
-            nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
-                                  convert(adaloss_T, 1e-7)
-            bc_loss_weights_proposed = gamma ./
-                                       (bc_stds)
-            adaloss.bc_loss_weights .= weight_change_inertia .*
-                                       adaloss.bc_loss_weights .+
-                                       (1 .- weight_change_inertia) .*
-                                       bc_loss_weights_proposed
-
-                logscalar(pinnrep.logger, gamma,
-                         "adaptive_loss/gamma", iteration[1])
-                logvector(pinnrep.logger, pde_stds,
-                         "adaptive_loss/pde_stds", iteration[1])
-                logvector(pinnrep.logger, bc_stds,
-                         "adaptive_loss/bc_stds", iteration[1])
-                logvector(pinnrep.logger, adaloss.bc_loss_weights,
-                         "adaptive_loss/bc_loss_weights", iteration[1])
-                logvector(pinnrep.logger, adaloss.pde_loss_weights,
-                         "adaptive_loss/pde_loss_weights", iteration[1])
-        end
-        nothing
-    end
-end
-
-
-
-
-
-
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 136b5137a8..069ae6d073 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -11,12 +11,9 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight
                                                             bc_loss_weights = 1)
 minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
                                               bc_loss_weights = 1)
-neuraltangentkernel_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
+neuraltangentkerneladaptive_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
                                          bc_loss_weights = 1)
-inversedirichlet_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
-                                        bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkernel_loss, inversedirichlet_loss]
-# adaptive_losses = [neuraltangentkernel_loss, inversedirichlet_loss]
+adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkerneladaptive_loss]
 maxiters = 4000
 seed = 60
 
@@ -96,7 +93,6 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @show error_results_no_logs[2][:total_diff_rel]
 @show error_results_no_logs[3][:total_diff_rel]
 @show error_results_no_logs[4][:total_diff_rel]
-@show error_results_no_logs[5][:total_diff_rel]
 
 # accuracy tests, these work for this specific seed but might not for others
 # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent
@@ -104,7 +100,6 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @test error_results_no_logs[2][:total_diff_rel] < 0.4
 @test error_results_no_logs[3][:total_diff_rel] < 0.4
 @test error_results_no_logs[4][:total_diff_rel] < 0.4
-@test error_results_no_logs[5][:total_diff_rel] < 0.4
 
 #plots_diffs[1][:plot]
 #plots_diffs[2][:plot]

From bfb7c6226203e3b7bb8012e281666225360a9e2c Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Mon, 3 Apr 2023 10:02:16 +0530
Subject: [PATCH 8/9] Formatted to SciML codebase style

---
 Project.toml                | 2 --
 src/NeuralPDE.jl            | 1 -
 test/adaptive_loss_tests.jl | 1 -
 3 files changed, 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index aa145eec3a..5890628584 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,12 +23,10 @@ ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78"
 Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 QuasiMonteCarlo = "8a4e6c94-4038-4cdc-81c3-7e6ffdb2a71b"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 RecursiveArrayTools = "731186ca-8d62-57ce-b412-fbd966d074cd"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 RuntimeGeneratedFunctions = "7e49a35a-f44a-4d26-94aa-eba1b4ca6b47"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/src/NeuralPDE.jl b/src/NeuralPDE.jl
index 6fdd6d9c6c..a2ae343e5e 100644
--- a/src/NeuralPDE.jl
+++ b/src/NeuralPDE.jl
@@ -47,7 +47,6 @@ include("transform_inf_integral.jl")
 include("discretize.jl")
 include("neural_adapter.jl")
 
-
 export NNODE, TerminalPDEProblem, NNPDEHan, NNPDENS, NNRODE,
        KolmogorovPDEProblem, NNKolmogorov, NNStopping, ParamKolmogorovPDEProblem,
        KolmogorovParamDomain, NNParamKolmogorov,
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index 069ae6d073..dbbdea6f1b 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -3,7 +3,6 @@ using Test, NeuralPDE
 import ModelingToolkit: Interval, infimum, supremum
 using DomainSets
 using Random
-using Plots, Revise
 import Lux
 
 nonadaptive_loss = NeuralPDE.NonAdaptiveLoss(pde_loss_weights = 1, bc_loss_weights = 1)

From 1f405403054a00f6b83a904ecd299449351f90a3 Mon Sep 17 00:00:00 2001
From: Sagar <arora.sagar26@gmail.com>
Date: Mon, 3 Apr 2023 10:38:58 +0530
Subject: [PATCH 9/9] Added Inverse Dirichlet Loss Adaptive function

---
 src/adaptive_losses.jl      | 96 ++++++++++++++++++++++++++++++++++++-
 test/adaptive_loss_tests.jl |  7 ++-
 2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/src/adaptive_losses.jl b/src/adaptive_losses.jl
index c07d9e6d78..5e02caeb46 100644
--- a/src/adaptive_losses.jl
+++ b/src/adaptive_losses.jl
@@ -258,8 +258,102 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
     end
 end
 
+"""
+Inverse Dirichlet Adaptive Loss
+```julia
+function InverseDirichletAdaptiveLoss(reweight_every;
+                                                    weight_change_inertia = 0.5,
+                                                    pde_loss_weights = 1,
+                                                    bc_loss_weights = 1,
+                                                    additional_loss_weights = 1)
+```                                                         
+
+Inverse Dirichlet weighting enables reliable training of physics informed neural networks
+Suryanarayana Maddu, Dominik Sturm, Christian L Müller, and Ivo F Sbalzarini
+https://iopscience.iop.org/article/10.1088/2632-2153/ac3712/pdf
+with code reference
+https://github.com/mosaic-group/inverse-dirichlet-pinn
+"""
+mutable struct InverseDirichletAdaptiveLoss{T <: Real} <: AbstractAdaptiveLoss
+    reweight_every::Int64
+    weight_change_inertia::T
+    pde_loss_weights::Vector{T}
+    bc_loss_weights::Vector{T}
+    additional_loss_weights::Vector{T}
+    SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss{T}(reweight_every;
+                                                                weight_change_inertia = 0.5,
+                                                                pde_loss_weights = 1,
+                                                                bc_loss_weights = 1,
+                                                                additional_loss_weights = 1) where {
+                                                                                                    T <:
+                                                                                                    Real
+                                                                                                    }
+        new(convert(Int64, reweight_every), convert(T, weight_change_inertia),
+            vectorify(pde_loss_weights, T), vectorify(bc_loss_weights, T),
+            vectorify(additional_loss_weights, T))
+    end
+end
+# default to Float64
+SciMLBase.@add_kwonly function InverseDirichletAdaptiveLoss(reweight_every;
+                                                         weight_change_inertia = 0.5,
+                                                         pde_loss_weights = 1,
+                                                         bc_loss_weights = 1,
+                                                         additional_loss_weights = 1)
+    InverseDirichletAdaptiveLoss{Float64}(reweight_every;
+                                       weight_change_inertia = weight_change_inertia,
+                                       pde_loss_weights = pde_loss_weights,
+                                       bc_loss_weights = bc_loss_weights,
+                                       additional_loss_weights = additional_loss_weights)
+end
 
+function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
+                                         adaloss::InverseDirichletAdaptiveLoss,
+                                         pde_loss_functions, bc_loss_functions)
+    weight_change_inertia = adaloss.weight_change_inertia
+    iteration = pinnrep.iteration
+    adaloss_T = eltype(adaloss.pde_loss_weights)
 
+    function run_loss_inverse_dirichlet_gradients_adaptive_loss(θ, pde_losses, bc_losses)
+        if iteration[1] % adaloss.reweight_every == 0
+            # the paper assumes a single pde loss function, so here we grab the maximum of the maximums of each pde loss function
+            pde_stds = [std(Zygote.gradient(pde_loss_function, θ)[1])
+                               for pde_loss_function in pde_loss_functions]
+            pde_std_max = maximum(pde_stds)
+            bc_stds = [std(Zygote.gradient(bc_loss_function, θ)[1])
+                             for bc_loss_function in bc_loss_functions]
+            bc_std_max = maximum(bc_stds)
+            gamma = max(pde_std_max, bc_std_max)
+            
+            bc_loss_weights_proposed = gamma ./ (bc_stds)
+            adaloss.bc_loss_weights .= weight_change_inertia .* adaloss.bc_loss_weights .+
+                                    (1 .- weight_change_inertia) .* bc_loss_weights_proposed
+
+            pde_loss_weights_proposed = gamma ./ (pde_stds)
+            adaloss.pde_loss_weights .= weight_change_inertia .* adaloss.pde_loss_weights .+
+                                    (1 .- weight_change_inertia) .* pde_loss_weights_proposed                                   
+            nonzero_divisor_eps = adaloss_T isa Float64 ? Float64(1e-11) :
+                                  convert(adaloss_T, 1e-7)
+            bc_loss_weights_proposed = gamma ./
+                                       (bc_stds)
+            adaloss.bc_loss_weights .= weight_change_inertia .*
+                                       adaloss.bc_loss_weights .+
+                                       (1 .- weight_change_inertia) .*
+                                       bc_loss_weights_proposed
+
+                logscalar(pinnrep.logger, gamma,
+                         "adaptive_loss/gamma", iteration[1])
+                logvector(pinnrep.logger, pde_stds,
+                         "adaptive_loss/pde_stds", iteration[1])
+                logvector(pinnrep.logger, bc_stds,
+                         "adaptive_loss/bc_stds", iteration[1])
+                logvector(pinnrep.logger, adaloss.bc_loss_weights,
+                         "adaptive_loss/bc_loss_weights", iteration[1])
+                logvector(pinnrep.logger, adaloss.pde_loss_weights,
+                         "adaptive_loss/pde_loss_weights", iteration[1])
+        end
+        nothing
+    end
+end
 
 """
 Neural Tangent Kernel Adaptive Loss
@@ -362,4 +456,4 @@ function generate_adaptive_loss_function(pinnrep::PINNRepresentation,
         end
         nothing
     end
-end
+end
\ No newline at end of file
diff --git a/test/adaptive_loss_tests.jl b/test/adaptive_loss_tests.jl
index dbbdea6f1b..02b6443a84 100644
--- a/test/adaptive_loss_tests.jl
+++ b/test/adaptive_loss_tests.jl
@@ -10,9 +10,12 @@ gradnormadaptive_loss = NeuralPDE.GradientScaleAdaptiveLoss(100, pde_loss_weight
                                                             bc_loss_weights = 1)
 minimaxadaptive_loss = NeuralPDE.MiniMaxAdaptiveLoss(100; pde_loss_weights = 1,
                                               bc_loss_weights = 1)
+inversedirichletadaptive_loss = NeuralPDE.InverseDirichletAdaptiveLoss(100, pde_loss_weights = 1e3,
+                                        bc_loss_weights = 1)
 neuraltangentkerneladaptive_loss = NeuralPDE.NeuralTangentKernelAdaptiveLoss(100; pde_loss_weights = 1,
                                          bc_loss_weights = 1)
-adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss, neuraltangentkerneladaptive_loss]
+adaptive_losses = [nonadaptive_loss, gradnormadaptive_loss, minimaxadaptive_loss,
+                    inversedirichletadaptive_loss, neuraltangentkerneladaptive_loss, ]
 maxiters = 4000
 seed = 60
 
@@ -92,6 +95,7 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @show error_results_no_logs[2][:total_diff_rel]
 @show error_results_no_logs[3][:total_diff_rel]
 @show error_results_no_logs[4][:total_diff_rel]
+@show error_results_no_logs[5][:total_diff_rel]
 
 # accuracy tests, these work for this specific seed but might not for others
 # note that this doesn't test that the adaptive losses are outperforming the nonadaptive loss, which is not guaranteed, and seed/arch/hyperparam/pde etc dependent
@@ -99,6 +103,7 @@ error_results_no_logs = map(test_2d_poisson_equation_adaptive_loss_no_logs_run_s
 @test error_results_no_logs[2][:total_diff_rel] < 0.4
 @test error_results_no_logs[3][:total_diff_rel] < 0.4
 @test error_results_no_logs[4][:total_diff_rel] < 0.4
+@test error_results_no_logs[5][:total_diff_rel] < 0.4
 
 #plots_diffs[1][:plot]
 #plots_diffs[2][:plot]