EarthyScience
diff --git a/‎Project.toml‎
Lines changed: 5 additions & 3 deletions b/‎Project.toml‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎dev/doubleMM.jl‎
Lines changed: 1 addition & 1 deletion b/‎dev/doubleMM.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ext/HybridVariationalInferenceFluxExt.jl‎
Lines changed: 5 additions & 3 deletions b/‎ext/HybridVariationalInferenceFluxExt.jl‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎ext/HybridVariationalInferenceSimpleChainsExt.jl‎
Lines changed: 8 additions & 6 deletions b/‎ext/HybridVariationalInferenceSimpleChainsExt.jl‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎src/AbstractHybridProblem.jl‎
Lines changed: 7 additions & 0 deletions b/‎src/AbstractHybridProblem.jl‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/DoubleMM/DoubleMM.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/DoubleMM/DoubleMM.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/DoubleMM/f_doubleMM.jl‎
Lines changed: 13 additions & 0 deletions b/‎src/DoubleMM/f_doubleMM.jl‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/HybridProblem.jl‎
Lines changed: 12 additions & 5 deletions b/‎src/HybridProblem.jl‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎src/HybridSolver.jl‎
Lines changed: 43 additions & 32 deletions b/‎src/HybridSolver.jl‎
Lines changed: 43 additions & 32 deletions
diff --git a/‎src/HybridVariationalInference.jl‎
Lines changed: 2 additions & 0 deletions b/‎src/HybridVariationalInference.jl‎
Lines changed: 2 additions & 0 deletions
@@ -20,29 +20,30 @@ MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 
 [weakdeps]
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 SimpleChains = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 
 [extensions]
+HybridVariationalInferenceCUDAExt = "CUDA"
 HybridVariationalInferenceFluxExt = "Flux"
 HybridVariationalInferenceLuxExt = "Lux"
 HybridVariationalInferenceSimpleChainsExt = "SimpleChains"
-HybridVariationalInferenceCUDAExt = "CUDA"
 
 [compat]
 Bijectors = "0.14, 0.15"
 BlockDiagonals = "0.1.42, 0.2"
+CUDA = "5.7"
 ChainRulesCore = "1.25"
 Combinatorics = "1.0.2"
 CommonSolve = "0.2.4"
 ComponentArrays = "0.15.19"
-CUDA = "5.7"
 DistributionFits = "0.3.9"
 Distributions = "0.25.117"
 Flux = "0.14, 0.15, 0.16"
@@ -56,6 +57,7 @@ Optimization = "3.19.3, 4"
 Random = "1.10.0"
 SimpleChains = "0.4"
 StableRNGs = "1.0.2"
+StaticArrays = "1.9.13"
 StatsBase = "0.34.4"
 StatsFuns = "1.3.2"
 julia = "1.10"
 
@@ -129,7 +129,7 @@ end
     () -> begin # optimized loss is indeed lower than with true parameters
         int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector(
             ϕg = 1:length(prob0.ϕg), θP = prob0.θP))
-        loss_gf = get_loss_gf(prob0.g, prob0.transM, prob0.f, Float32[], int_ϕθP)
+        loss_gf = get_loss_gf(prob0.g, prob0.transM, prob0.transP, prob0.f, Float32[], int_ϕθP)
         loss_gf(vcat(prob3.ϕg, prob3.θP), xM, xP, y_o, y_unc, i_sites)[1]
         loss_gf(vcat(prob3o.ϕg, prob3o.θP), xM, xP, y_o, y_unc, i_sites)[1]
         #
 
@@ -42,16 +42,18 @@ function HVI.construct_3layer_MLApplicator(
     (;θM) = get_hybridproblem_par_templates(prob; scenario)
     n_out = length(θM)
     n_covar = get_hybridproblem_n_covar(prob; scenario)
+    n_pbm_covars = length(get_hybridproblem_pbmpar_covars(prob; scenario))
+    n_input = n_covar + n_pbm_covars
     #(; n_covar, n_θM) = get_hybridproblem_sizes(prob; scenario)
     float_type = get_hybridproblem_float_type(prob; scenario)
     is_using_dropout = :use_dropout ∈ scenario
     is_using_dropout && error("dropout scenario not supported with Flux yet.")
     g_chain = Flux.Chain(
         # dense layer with bias that maps to 8 outputs and applies `tanh` activation
-        Flux.Dense(n_covar => n_covar * 4, tanh),
-        Flux.Dense(n_covar * 4 => n_covar * 4, tanh),
+        Flux.Dense(n_input => n_input * 4, tanh),
+        Flux.Dense(n_input * 4 => n_input * 4, tanh),
         # dense layer without bias that maps to n outputs and `logistic` activation
-        Flux.Dense(n_covar * 4 => n_out, logistic, bias = false)
+        Flux.Dense(n_input * 4 => n_out, logistic, bias = false)
     )
     construct_ChainsApplicator(rng, g_chain, float_type)
 end
 
@@ -21,27 +21,29 @@ function HVI.construct_3layer_MLApplicator(
     rng::AbstractRNG, prob::HVI.AbstractHybridProblem, ::Val{:SimpleChains};
     scenario::NTuple = ())
     n_covar = get_hybridproblem_n_covar(prob; scenario)
+    n_pbm_covars = length(get_hybridproblem_pbmpar_covars(prob; scenario))
+    n_input = n_covar + n_pbm_covars
     FloatType = get_hybridproblem_float_type(prob; scenario)
     (;θM) = get_hybridproblem_par_templates(prob; scenario)
     n_out = length(θM)
     is_using_dropout = :use_dropout ∈ scenario
     g_chain = if is_using_dropout
         SimpleChain(
-            static(n_covar), # input dimension (optional)
+            static(n_input), # input dimension (optional)
             # dense layer with bias that maps to 8 outputs and applies `tanh` activation
-            TurboDense{true}(tanh, n_covar * 4),
+            TurboDense{true}(tanh, n_input * 4),
             SimpleChains.Dropout(0.2), # dropout layer
-            TurboDense{true}(tanh, n_covar * 4),
+            TurboDense{true}(tanh, n_input * 4),
             SimpleChains.Dropout(0.2),
             # dense layer without bias that maps to n outputs and `logistic` activation
             TurboDense{false}(logistic, n_out)
         )
     else
         SimpleChain(
-            static(n_covar), # input dimension (optional)
+            static(n_input), # input dimension (optional)
             # dense layer with bias that maps to 8 outputs and applies `tanh` activation
-            TurboDense{true}(tanh, n_covar * 4),
-            TurboDense{true}(tanh, n_covar * 4),
+            TurboDense{true}(tanh, n_input * 4),
+            TurboDense{true}(tanh, n_input * 4),
             # dense layer without bias that maps to n outputs and `logistic` activation
             TurboDense{false}(logistic, n_out)
         )
 
@@ -18,6 +18,8 @@ optionally
 - `get_hybridproblem_float_type` (defaults to `eltype(θM)`)
 - `get_hybridproblem_cor_ends` (defaults to include all correlations: 
   `(P = [length(θP)], M = [length(θM)])`)
+- `get_hybridproblem_pbmpar_covars` (defaults to empty tuple)
+
 
 The initial value of parameters to estimate is spread
 - `ϕg`: parameter of the MLapplicator: returned by `get_hybridproblem_MLapplicator`
@@ -117,6 +119,11 @@ function get_hybridproblem_n_covar(::AbstractHybridProblem; scenario) end
 #     return (n_covar)
 # end
 
+
+function get_hybridproblem_pbmpar_covars(::AbstractHybridProblem; scenario) 
+    ()
+end
+
 """
     get_hybridproblem_n_site(::AbstractHybridProblem; scenario)
 
 
@@ -12,7 +12,7 @@ using Bijectors
 using Distributions, DistributionFits
 using MLDataDevices
 import GPUArraysCore # used in conditional breakpoints
-
+import StableRNGs
 
 export f_doubleMM, xP_S1, xP_S2
 include("f_doubleMM.jl")
 
@@ -41,6 +41,11 @@ function HVI.get_hybridproblem_priors(::DoubleMMCase; scenario = ())
     Dict(keys(θall) .=> fit.(LogNormal, θall, QuantilePoint.(θall .* 3, 0.95)))
 end
 
+function HVI.get_hybridproblem_MLapplicator(prob::HVI.DoubleMM.DoubleMMCase; scenario = ())
+    rng = StableRNGs.StableRNG(111)
+    get_hybridproblem_MLapplicator(rng, prob; scenario)
+end
+
 function HVI.get_hybridproblem_MLapplicator(
         rng::AbstractRNG, prob::HVI.DoubleMM.DoubleMMCase; scenario = ())
     ml_engine = select_ml_engine(; scenario)
@@ -53,6 +58,14 @@ function HVI.get_hybridproblem_MLapplicator(
     return g, ϕ_g0
 end
 
+function HVI.get_hybridproblem_pbmpar_covars(::DoubleMMCase; scenario) 
+    if (:covarK2 ∈ scenario)
+        return (:K2,)
+    end
+    ()
+end
+
+
 function HVI.get_hybridproblem_transforms(::DoubleMMCase; scenario::NTuple = ())
     if (:stackedMS ∈ scenario)
         return ((; transP, transM = transMS))
 
@@ -13,6 +13,7 @@ struct HybridProblem <: AbstractHybridProblem
     get_train_loader::Any
     n_covar::Int
     n_site::Int
+    pbm_covars::NTuple
     # inner constructor to constrain the types
     function HybridProblem(
             θP::CA.ComponentVector, θM::CA.ComponentVector,
@@ -28,10 +29,11 @@ struct HybridProblem <: AbstractHybridProblem
             n_covar::Int,
             n_site::Int,
             cor_ends::NamedTuple = (P = [length(θP)], M = [length(θM)]),
-    )
+            pbm_covars::NTuple{N,Symbol} = (),
+    ) where N
         new(
             θP, θM, f, g, ϕg, ϕunc, priors, py, transM, transP, cor_ends, get_train_loader,
-            n_covar, n_site)
+            n_covar, n_site, pbm_covars)
     end
 end
 
@@ -57,11 +59,12 @@ function HybridProblem(prob::AbstractHybridProblem; scenario = ())
         end
     end
     cor_ends = get_hybridproblem_cor_ends(prob; scenario)
+    pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario)
     priors = get_hybridproblem_priors(prob; scenario)
     n_covar = get_hybridproblem_n_covar(prob; scenario)
     n_site = get_hybridproblem_n_site(prob; scenario)
     HybridProblem(θP, θM, g, ϕg, ϕunc, f, priors, py, transP, transM, get_train_loader,
-        n_covar, n_site, cor_ends)
+        n_covar, n_site, cor_ends, pbm_covars)
 end
 
 function update(prob::HybridProblem;
@@ -76,12 +79,13 @@ function update(prob::HybridProblem;
         transM::Union{Function, Bijectors.Transform} = prob.transM,
         transP::Union{Function, Bijectors.Transform} = prob.transP,
         cor_ends::NamedTuple = prob.cor_ends,
+        pbm_covars::NTuple{N,Symbol} = prob.pbm_covars,
         get_train_loader::Function = prob.get_train_loader,
         n_covar::Integer = prob.n_covar,
         n_site::Integer = prob.n_site
-)
+) where N
     HybridProblem(θP, θM, g, ϕg, ϕunc, f, priors, py, transP, transM, get_train_loader,
-        n_covar, n_site, cor_ends)
+        n_covar, n_site, cor_ends, pbm_covars)
 end
 
 function get_hybridproblem_par_templates(prob::HybridProblem; scenario::NTuple = ())
@@ -121,6 +125,9 @@ end
 function get_hybridproblem_cor_ends(prob::HybridProblem; scenario = ())
     prob.cor_ends
 end
+function get_hybridproblem_pbmpar_covars(prob::HybridProblem; scenario = ()) 
+    prob.pbm_covars
+end
 function get_hybridproblem_n_covar(prob::HybridProblem; scenario = ())
     prob.n_covar
 end
 
@@ -17,35 +17,38 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPointSolve
     g, ϕg0 = get_hybridproblem_MLapplicator(prob; scenario)
     FT = get_hybridproblem_float_type(prob; scenario)
     (; transP, transM) = get_hybridproblem_transforms(prob; scenario)
-    int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector(
-        ϕg = 1:length(ϕg0), θP = par_templates.θP))
-    #p0_cpu = vcat(ϕg0, par_templates.θP .* FT(0.9))  # slightly disturb θP_true
-    p0_cpu = vcat(ϕg0, par_templates.θP)
-    p0 = p0_cpu
-    g_dev = g
+    intϕ = ComponentArrayInterpreter(CA.ComponentVector(
+        ϕg = 1:length(ϕg0), ϕP = par_templates.θP))
+    #ϕ0_cpu = vcat(ϕg0, par_templates.θP .* FT(0.9))  # slightly disturb θP_true
+    ϕ0_cpu = vcat(ϕg0, apply_preserve_axes(inverse(transP),par_templates.θP))
     if gdev isa MLDataDevices.AbstractGPUDevice
-        p0 = gdev(p0_cpu)
+        ϕ0_dev = gdev(ϕ0_cpu)
         g_dev = gdev(g)
+    else
+        ϕ0_dev = ϕ0_cpu
+        g_dev = g
     end
     train_loader = get_hybridproblem_train_dataloader(
         prob; scenario, n_batch = solver.n_batch)
     f = get_hybridproblem_PBmodel(prob; scenario)
     y_global_o = FT[] # TODO
-    loss_gf = get_loss_gf(g_dev, transM, f, y_global_o, int_ϕθP; cdev)
+    pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario)
+    #intP = ComponentArrayInterpreter(par_templates.θP)
+    loss_gf = get_loss_gf(g_dev, transM, transP, f, y_global_o, intϕ; cdev, pbm_covars)
     # call loss function once
-    l1 = loss_gf(p0, first(train_loader)...)[1]
+    l1 = loss_gf(ϕ0_dev, first(train_loader)...)[1]
     # and gradient
     # xMg, xP, y_o, y_unc = first(train_loader)
     # gr1 = Zygote.gradient(
     #             p -> loss_gf(p, xMg, xP, y_o, y_unc)[1],
-    #             p0)
+    #             ϕ0_dev)
     # data1 = first(train_loader)
-    # Zygote.gradient(p0 -> loss_gf(p0, data1...)[1], p0)
+    # Zygote.gradient(ϕ0_dev -> loss_gf(ϕ0_dev, data1...)[1], ϕ0_dev)
     optf = Optimization.OptimizationFunction((ϕ, data) -> loss_gf(ϕ, data...)[1],
         Optimization.AutoZygote())
-    optprob = OptimizationProblem(optf, CA.getdata(p0), train_loader)
+    optprob = OptimizationProblem(optf, CA.getdata(ϕ0_dev), train_loader)
     res = Optimization.solve(optprob, solver.alg; kwargs...)
-    (; ϕ = int_ϕθP(res.u), resopt = res)
+    (; ϕ = intϕ(res.u), resopt = res)
 end
 
 struct HybridPosteriorSolver{A} <: AbstractHybridSolver
@@ -77,6 +80,7 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPosteriorS
     g, ϕg0 = get_hybridproblem_MLapplicator(prob; scenario)
     ϕunc0 = get_hybridproblem_ϕunc(prob; scenario)
     (; transP, transM) = get_hybridproblem_transforms(prob; scenario)
+    pbm_covars = get_hybridproblem_pbmpar_covars(prob; scenario)
     (; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params(
         θP, θM, cor_ends, ϕg0, solver.n_batch; transP, transM, ϕunc0)
     if gdev isa MLDataDevices.AbstractGPUDevice
@@ -90,12 +94,12 @@ function CommonSolve.solve(prob::AbstractHybridProblem, solver::HybridPosteriorS
     f = get_hybridproblem_PBmodel(prob; scenario)
     py = get_hybridproblem_neg_logden_obs(prob; scenario)
     priors_θ_mean = construct_priors_θ_mean(
-        prob, ϕ0_dev.ϕg, keys(θM), θP, θmean_quant, g_dev, transM;
-        scenario, get_ca_int_PMs, cdev)
+        prob, ϕ0_dev.ϕg, keys(θM), θP, θmean_quant, g_dev, transM, transP;
+        scenario, get_ca_int_PMs, cdev, pbm_covars)
     y_global_o = Float32[] # TODO
     loss_elbo = get_loss_elbo(
         g_dev, transPMs_batch, f, py, y_global_o, interpreters;
-        solver.n_MC, solver.n_MC_cap, cor_ends, priors_θ_mean, cdev)
+        solver.n_MC, solver.n_MC_cap, cor_ends, priors_θ_mean, cdev, pbm_covars, θP)
     # test loss function once
     l0 = loss_elbo(ϕ0_dev, rng, first(train_loader)...)
     optf = Optimization.OptimizationFunction((ϕ, data) -> loss_elbo(ϕ, rng, data...)[1],
@@ -116,28 +120,32 @@ end
 
 """
 Create a loss function for parameter vector ϕ, given 
-- g(x, ϕ): machine learning model 
-- transPMS: transformation from unconstrained space to parameter space
-- f(θMs, θP): mechanistic model 
-- interpreters: assigning structure to pure vectors, see neg_elbo_gtf
-- n_MC: number of Monte-Carlo sample to approximate the expected value across distribution
+- `g(x, ϕ)`: machine learning model 
+- `transPMS`: transformation from unconstrained space to parameter space
+- `f(θMs, θP)`: mechanistic model 
+- `interpreters`: assigning structure to pure vectors, see `neg_elbo_gtf`
+- `n_MC`: number of Monte-Carlo sample to approximate the expected value across distribution
+- `pbm_covars`: tuple of symbols of process-based parameters provided to the ML model
+- `θP`: CompoenntVector as a template to select indices of pbm_covars
 
 The loss function takes in addition to ϕ, data that changes with minibatch
-- rng: random generator
-- xM: matrix of covariates, sites in columns
-- xP: drivers for the processmodel: Iterator of size n_site
-- y_o, y_unc: matrix of observations and uncertainties, sites in columns
+- `rng`: random generator
+- `xM`: matrix of covariates, sites in columns
+- `xP`: drivers for the processmodel: Iterator of size n_site
+- `y_o`, `y_unc`: matrix of observations and uncertainties, sites in columns
 """
 function get_loss_elbo(g, transPMs, f, py, y_o_global, interpreters;
-        n_MC, n_MC_cap = n_MC, cor_ends, priors_θ_mean, cdev)
+        n_MC, n_MC_cap = n_MC, cor_ends, priors_θ_mean, cdev, pbm_covars, θP,
+        )
     let g = g, transPMs = transPMs, f = f, py = py, y_o_global = y_o_global, n_MC = n_MC,
         cor_ends = cor_ends, interpreters = map(get_concrete, interpreters),
-        priors_θ_mean = priors_θ_mean, cdev = cdev
+        priors_θ_mean = priors_θ_mean, cdev = cdev, 
+        pbm_covar_indices = get_pbm_covar_indices(θP, pbm_covars)
 
         function loss_elbo(ϕ, rng, xM, xP, y_o, y_unc, i_sites)
             neg_elbo_gtf(
                 rng, ϕ, g, transPMs, f, py, xM, xP, y_o, y_unc, i_sites, interpreters;
-                n_MC, n_MC_cap, cor_ends, priors_θ_mean, cdev)
+                n_MC, n_MC_cap, cor_ends, priors_θ_mean, cdev, pbm_covar_indices)
         end
     end
 end
@@ -183,16 +191,19 @@ end
 In order to let mean of θ stay close to initial point parameter estimates 
 construct a prior on mean θ to a Normal around initial prediction.
 """
-function construct_priors_θ_mean(prob, ϕg, keysθM, θP, θmean_quant, g_dev, transM;
-        scenario, get_ca_int_PMs, cdev)
+function construct_priors_θ_mean(prob, ϕg, keysθM, θP, θmean_quant, g_dev, transM, transP;
+        scenario, get_ca_int_PMs, cdev, pbm_covars)
     iszero(θmean_quant) ? [] :
     begin
         n_site = get_hybridproblem_n_site(prob; scenario)
         all_loader = get_hybridproblem_train_dataloader(prob; scenario, n_batch = n_site)
         xM_all = first(all_loader)[1]
-        θMs = gtrans(g_dev, transM, xM_all, CA.getdata(ϕg); cdev)
-        priors_dict = get_hybridproblem_priors(prob; scenario)
         #Main.@infiltrate_main
+        ζP = apply_preserve_axes(inverse(transP), θP)
+        pbm_covar_indices = get_pbm_covar_indices(θP, pbm_covars)
+        xMP_all = _append_each_covars(xM_all, CA.getdata(ζP), pbm_covar_indices) 
+        θMs = gtrans(g_dev, transM, xMP_all, CA.getdata(ϕg); cdev)
+        priors_dict = get_hybridproblem_priors(prob; scenario)
         priorsP = [priors_dict[k] for k in keys(θP)]
         priors_θP_mean = map(priorsP, θP) do priorsP, θPi
             fit_narrow_normal(θPi, priorsP, θmean_quant)
 
@@ -17,6 +17,7 @@ using CommonSolve
 #using OptimizationOptimisers # default alg=Adam(0.02)
 using Optimization
 using Distributions, DistributionFits
+using StaticArrays: StaticArrays as SA
 using Functors
 
 export ComponentArrayInterpreter, flatten1, get_concrete
@@ -40,6 +41,7 @@ export AbstractHybridProblem, get_hybridproblem_MLapplicator, get_hybridproblem_
        get_hybridproblem_n_site,
        get_hybridproblem_cor_ends,
        get_hybridproblem_priors,
+       get_hybridproblem_pbmpar_covars,
 #update,
        gen_cov_pred,
        construct_dataloader_from_synthetic,
Original file line number	Diff line number	Diff line change
`@@ -129,7 +129,7 @@ end`
`129`	`129`	`() -> begin # optimized loss is indeed lower than with true parameters`
`130`	`130`	`int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector(`
`131`	`131`	`ϕg = 1:length(prob0.ϕg), θP = prob0.θP))`
`132`		`- loss_gf = get_loss_gf(prob0.g, prob0.transM, prob0.f, Float32[], int_ϕθP)`
	`132`	`+ loss_gf = get_loss_gf(prob0.g, prob0.transM, prob0.transP, prob0.f, Float32[], int_ϕθP)`
`133`	`133`	`loss_gf(vcat(prob3.ϕg, prob3.θP), xM, xP, y_o, y_unc, i_sites)[1]`
`134`	`134`	`loss_gf(vcat(prob3o.ϕg, prob3o.θP), xM, xP, y_o, y_unc, i_sites)[1]`
`135`	`135`	`#`