move S1 and S2 in doubleMM problem to drivers

bgctw · bgctw · commit e73130505721 · 2025-01-21T09:46:18.000+01:00
diff --git a/src/DoubleMM/DoubleMM.jl b/src/DoubleMM/DoubleMM.jl
@@ -9,8 +9,8 @@ using StatsFuns: logistic
 using Bijectors
 
 
+export f_doubleMM, xP_S1, xP_S2
 include("f_doubleMM.jl")
 
-export f_doubleMM, S1, S2
 
 end
diff --git a/src/DoubleMM/f_doubleMM.jl b/src/DoubleMM/f_doubleMM.jl
@@ -1,22 +1,20 @@
 struct DoubleMMCase <: AbstractHybridCase end
 
-const S1 = [1.0, 1.0, 1.0, 1.0, 0.4, 0.3, 0.1]
-const S2 = [1.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0]
 
-θP = CA.ComponentVector(r0 = 0.3, K2 = 2.0)
-θM = CA.ComponentVector(r1 = 0.5, K1 = 0.2)
+θP = CA.ComponentVector{Float32}(r0 = 0.3, K2 = 2.0)
+θM = CA.ComponentVector{Float32}(r1 = 0.5, K1 = 0.2)
 
 transP = elementwise(exp)
 transM = Stacked(elementwise(identity), elementwise(exp))
 
 
 const int_θdoubleMM = ComponentArrayInterpreter(flatten1(CA.ComponentVector(; θP, θM)))
 
-function f_doubleMM(θ::AbstractVector)
+function f_doubleMM(θ::AbstractVector, x)
     # extract parameters not depending on order, i.e whether they are in θP or θM
     θc = int_θdoubleMM(θ)
     r0, r1, K1, K2 = θc[(:r0, :r1, :K1, :K2)]
-    y = r0 .+ r1 .* S1 ./ (K1 .+ S1) .* S2 ./ (K2 .+ S2)
+    y = r0 .+ r1 .* x.S1 ./ (K1 .+ x.S1) .* x.S2 ./ (K2 .+ x.S2)
     return (y)
 end
 
@@ -40,17 +38,20 @@ function HVI.get_hybridcase_sizes(::DoubleMMCase; scenario = ())
 end
 
 function HVI.get_hybridcase_PBmodel(::DoubleMMCase; scenario::NTuple = ())
-    fsite = (θ, x_site) -> f_doubleMM(θ)  # omit x_site drivers
+    #fsite = (θ, x_site) -> f_doubleMM(θ)  # omit x_site drivers
     function f_doubleMM_with_global(θP::AbstractVector, θMs::AbstractMatrix, x)
-        pred_sites = applyf(fsite, θMs, θP, x)
+        pred_sites = applyf(f_doubleMM, θMs, θP, x)
         pred_global = eltype(pred_sites)[]
         return pred_global, pred_sites
     end
 end
 
-function HVI.get_hybridcase_FloatType(::DoubleMMCase; scenario)
-    return Float32
-end
+# function HVI.get_hybridcase_FloatType(::DoubleMMCase; scenario)
+#     return Float32
+# end
+
+const xP_S1 = Float32[1.0, 1.0, 1.0, 1.0, 0.4, 0.3, 0.1]
+const xP_S2 = Float32[1.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0]
 
 function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
         scenario = ())
@@ -62,14 +63,14 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
         rhodec = 8, is_using_dropout = false)
     int_θMs_sites = ComponentArrayInterpreter(θM, (n_site,))
     # normalize to be distributed around the prescribed true values
-    θMs_true = int_θMs_sites(scale_centered_at(θMs_true0, θM, 0.1))
+    θMs_true = int_θMs_sites(scale_centered_at(θMs_true0, θM, FloatType(0.1)))
     f = get_hybridcase_PBmodel(case; scenario)
-    xP = fill((), n_site)
-    y_global_true, y_true = f(θP, θMs_true, zip())
-    σ_o = 0.01
+    xP = fill((;S1=xP_S1, S2=xP_S2), n_site)
+    y_global_true, y_true = f(θP, θMs_true, xP)
+    σ_o = FloatType(0.01)
     #σ_o = 0.002
-    y_global_o = y_global_true .+ randn(rng, size(y_global_true)) .* σ_o
-    y_o = y_true .+ randn(rng, size(y_true)) .* σ_o
+    y_global_o = y_global_true .+ randn(rng, FloatType, size(y_global_true)) .* σ_o
+    y_o = y_true .+ randn(rng, FloatType, size(y_true)) .* σ_o
     (;
         xM,
         n_site,
@@ -83,3 +84,4 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
         σ_o = fill(σ_o, size(y_true,1)),
     )
 end
+
diff --git a/src/HybridProblem.jl b/src/HybridProblem.jl
@@ -37,9 +37,9 @@ function get_hybridcase_MLapplicator(prob::HybridProblem, ml_engine; scenario::N
     prob.g, prob.ϕg
 end
 
-function get_hybridcase_FloatType(prob::HybridProblem; scenario::NTuple = ()) 
-    eltype(prob.θM)
-end
+# function get_hybridcase_FloatType(prob::HybridProblem; scenario::NTuple = ()) 
+#     eltype(prob.θM)
+# end
 
 
 
diff --git a/src/elbo.jl b/src/elbo.jl
@@ -12,22 +12,23 @@ expected value of the likelihood of observations.
   including parameter of f (ϕ_P), of g (ϕ_Ms), and of VI (ϕ_unc),
   interpreted by interpreters.μP_ϕg_unc and interpreters.PMs
 - y_ob: matrix of observations (n_obs x n_site_batch)
-- x: matrix of covariates (n_cov x n_site_batch)
+- xM: matrix of covariates (n_cov x n_site_batch)
+- xP: model drivers, iterable of (n_site_batch)
 - transPMs: Transformations as generated by get_transPMs returned from init_hybrid_params
 - n_MC: number of MonteCarlo samples from the distribution of parameters to simulate
   using the mechanistic model f.
 - logσ2y: observation uncertainty (log of the variance)
 """
-function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, x::AbstractMatrix,
-    transPMs, interpreters::NamedTuple; 
+function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, xM::AbstractMatrix,
+    xP, transPMs, interpreters::NamedTuple; 
     n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler(),
     entropyN = 0.0,
     )
-    ζs, σ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC)
+    ζs, σ = generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC)
     ζs_cpu = gpu_data_handler(ζs) # differentiable fetch to CPU in Flux package extension
     #ζi = first(eachcol(ζs_cpu))
     nLy = reduce(+, map(eachcol(ζs_cpu)) do ζi
-        y_pred_i, logjac = predict_y(ζi, f, transPMs, interpreters.PMs)
+        y_pred_i, logjac = predict_y(ζi, xP, f, transPMs, interpreters.PMs)
         nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y)
         nLy1 - logjac
     end) / n_MC
@@ -45,7 +46,7 @@ end
 
 Prediction function for hybrid model. Returns an Array `(n_obs, n_site, n_sample_pred)`.
 """
-function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, interpreters;
+function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, xP, interpreters;
     get_transPMs, get_ca_int_PMs, n_sample_pred=200, 
     gpu_data_handler=get_default_GPUHandler())
     n_site = size(xM, 2)
@@ -56,7 +57,7 @@ function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, interpret
     interpreters_gen; n_MC = n_sample_pred)
     ζs_cpu = gpu_data_handler(ζs) #
     y_pred = stack(map(ζ -> first(predict_y(
-        ζ, f, trans_PMs_gen, interpreters_gen.PMs)), eachcol(ζs_cpu)));
+        ζ, xP, f, trans_PMs_gen, interpreters_gen.PMs)), eachcol(ζs_cpu)));
     y_pred
 end
 
@@ -68,19 +69,19 @@ Adds the MV-normally distributed residuals, retrieved by `sample_ζ_norm0`
 to the means extracted from parameters and predicted by the machine learning
 model. 
 """
-function generate_ζ(rng, g, f, ϕ::AbstractVector, x::AbstractMatrix,
+function generate_ζ(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix,
     interpreters::NamedTuple; n_MC=3)
     # see documentation of neg_elbo_transnorm_gf
     ϕc = interpreters.μP_ϕg_unc(CA.getdata(ϕ))
     μ_ζP = ϕc.μP
     ϕg = ϕc.ϕg
-    μ_ζMs0 = g(x, ϕg) # TODO provide μ_ζP to g
+    μ_ζMs0 = g(xM, ϕg) # TODO provide μ_ζP to g
     ζ_resid, σ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC)
     #ζ_resid, σ = sample_ζ_norm0(rng, ϕ[1:2], reshape(ϕ[2 .+ (1:20)],2,:), ϕ[(end-length(interpreters.unc)+1):end], interpreters.unc; n_MC)
     ζ = stack(map(eachcol(ζ_resid)) do r
         rc = interpreters.PMs(r)
         ζP = μ_ζP .+ rc.P
-        μ_ζMs = μ_ζMs0 # g(x, ϕc.ϕ) # TODO provide ζP to g
+        μ_ζMs = μ_ζMs0 # g(xM, ϕc.ϕ) # TODO provide ζP to g
         ζMs = μ_ζMs .+ rc.Ms
         vcat(ζP, vec(ζMs))
     end)
@@ -168,13 +169,13 @@ Steps:
 - transform the parameters to original constrained space
 - Applies the mechanistic model for each site
 """
-function predict_y(ζi, f, transPMs::Bijectors.Transform, int_PMs::AbstractComponentArrayInterpreter)
+function predict_y(ζi, xP, f, transPMs::Bijectors.Transform, int_PMs::AbstractComponentArrayInterpreter)
     # θtup, logjac = transform_and_logjac(transPMs, ζi) # both allocating
     # θc = CA.ComponentVector(θtup)
     θ, logjac = Bijectors.with_logabsdet_jacobian(transPMs, ζi) # both allocating
     θc = int_PMs(θ)
     # TODO provide xP
-    xP = fill((), size(θc.Ms,2))
+    # xP = fill((), size(θc.Ms,2))
     y_pred_global, y_pred = f(θc.P, θc.Ms, xP) # TODO parallelize on CPU
     # TODO take care of y_pred_global
     y_pred, logjac
diff --git a/src/gf.jl b/src/gf.jl
@@ -1,5 +1,5 @@
 function applyf(f, θMs::AbstractMatrix, θP::AbstractVector, x)
-    # predict several sites with same physical parameters
+    # predict several sites with same global parameters θP
     yv = map(eachcol(θMs), x) do θM, x_site
         f(vcat(θP, θM), x_site)
     end
diff --git a/src/hybrid_case.jl b/src/hybrid_case.jl
@@ -10,7 +10,7 @@ For a specific case, provide functions that specify details
 - get_hybridcase_PBmodel
 optionally
 - gen_hybridcase_synthetic
-- get_hybridcase_FloatType (if it should differ from Float32)
+- get_hybridcase_FloatType (defaults to eltype(θM))
 """
 abstract type AbstractHybridCase end;
 
@@ -92,8 +92,8 @@ function gen_hybridcase_synthetic end
 
 Determine the FloatType for given Case and scenario, defaults to Float32
 """
-function get_hybridcase_FloatType(::AbstractHybridCase; scenario)
-    return Float32
+function get_hybridcase_FloatType(case::AbstractHybridCase; scenario)
+    return eltype(get_hybridcase_par_templates(case; scenario).θM)
 end
 
 
diff --git a/src/init_hybrid_params.jl b/src/init_hybrid_params.jl
@@ -27,12 +27,13 @@ function init_hybrid_params(θP, θM, ϕg, n_batch;
     # check translating parameters - can match length?
     _ = Bijectors.inverse(transP)(θP)
     _ = Bijectors.inverse(transM)(θM)
+    FT = eltype(θM)
     # zero correlation matrices
-    ρsP = zeros(sum(1:(n_θP - 1)))
-    ρsM = zeros(sum(1:(n_θM - 1)))
+    ρsP = zeros(FT, sum(1:(n_θP - 1)))
+    ρsM = zeros(FT, sum(1:(n_θM - 1)))
     ϕunc0 = CA.ComponentVector(;
-        logσ2_logP = fill(-10.0, n_θP),
-        coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)),
+        logσ2_logP = fill(FT(-10.0), n_θP),
+        coef_logσ2_logMs = reduce(hcat, (FT[-10.0, 0.0] for _ in 1:n_θM)),
         ρsP,
         ρsM)
     ϕ = CA.ComponentVector(;
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -33,6 +33,7 @@ end
     if GROUP == "All" || GROUP == "Aqua"
         #@safetestset "test" include("test/test_aqua.jl")
         if VERSION >= VersionNumber("1.11.2")
+            #@safetestset "test" include("test/test_aqua.jl")
             @time @safetestset "test_aqua" include("test_aqua.jl")
         end
     end
diff --git a/test/test_HybridProblem.jl b/test/test_HybridProblem.jl
@@ -14,42 +14,38 @@ using OptimizationOptimisers
 
 const MLengine = Val(nameof(SimpleChains))
 
-
 construct_problem = () -> begin
-    S1 = [1.0, 1.0, 1.0, 1.0, 0.4, 0.3, 0.1]
-    S2 = [1.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0]
     θP = CA.ComponentVector{Float32}(r0 = 0.3, K2 = 2.0)
-    θM = CA.ComponentVector{Float32}(r1 = 0.5, K1 = 0.2)    
+    θM = CA.ComponentVector{Float32}(r1 = 0.5, K1 = 0.2)
     transP = elementwise(exp)
     transM = Stacked(elementwise(identity), elementwise(exp))
     n_covar = 5
     n_batch = 10
     int_θdoubleMM = get_concrete(ComponentArrayInterpreter(
         flatten1(CA.ComponentVector(; θP, θM))))
-    function f_doubleMM(θ::AbstractVector)
+    function f_doubleMM(θ::AbstractVector, x)
         # extract parameters not depending on order, i.e whether they are in θP or θM
         θc = int_θdoubleMM(θ)
         r0, r1, K1, K2 = θc[(:r0, :r1, :K1, :K2)]
-        y = r0 .+ r1 .* S1 ./ (K1 .+ S1) .* S2 ./ (K2 .+ S2)
+        y = r0 .+ r1 .* x.S1 ./ (K1 .+ x.S1) .* x.S2 ./ (K2 .+ x.S2)
         return (y)
     end
-    fsite = (θ, x_site) -> f_doubleMM(θ)  # omit x_site drivers
     function f_doubleMM_with_global(θP::AbstractVector, θMs::AbstractMatrix, x)
-        pred_sites = applyf(fsite, θMs, θP, x)
+        pred_sites = applyf(f_doubleMM, θMs, θP, x)
         pred_global = eltype(pred_sites)[]
         return pred_global, pred_sites
-    end    
+    end
     n_out = length(θM)
     g_chain = SimpleChain(
-            static(n_covar), # input dimension (optional)
-            # dense layer with bias that maps to 8 outputs and applies `tanh` activation
-            TurboDense{true}(tanh, n_covar * 4),
-            TurboDense{true}(tanh, n_covar * 4),
-            # dense layer without bias that maps to n outputs and `identity` activation
-            TurboDense{false}(identity, n_out),
-        )
+        static(n_covar), # input dimension (optional)
+        # dense layer with bias that maps to 8 outputs and applies `tanh` activation
+        TurboDense{true}(tanh, n_covar * 4),
+        TurboDense{true}(tanh, n_covar * 4),
+        # dense layer without bias that maps to n outputs and `identity` activation
+        TurboDense{false}(identity, n_out)
+    )
     g = construct_SimpleChainsApplicator(g_chain)
-    ϕg = SimpleChains.init_params(g_chain, eltype(θM));
+    ϕg = SimpleChains.init_params(g_chain, eltype(θM))
     HybridProblem(θP, θM, transM, transP, n_covar, n_batch, f_doubleMM_with_global, g, ϕg)
 end
 prob = construct_problem();
@@ -65,7 +61,7 @@ rng = StableRNG(111)
 ) = gen_hybridcase_synthetic(case_syn, rng; scenario);
 
 @testset "loss_g" begin
-    g, ϕg0 = get_hybridcase_MLapplicator(prob, MLengine; scenario);
+    g, ϕg0 = get_hybridcase_MLapplicator(prob, MLengine; scenario)
 
     function loss_g(ϕg, x, g)
         ζMs = g(x, ϕg) # predict the log of the parameters
@@ -74,15 +70,15 @@ rng = StableRNG(111)
         return loss, θMs
     end
     loss_g(ϕg0, xM, g)
-    Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0);
+    Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0)
 
     optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g)[1],
         Optimization.AutoZygote())
-    optprob = Optimization.OptimizationProblem(optf, ϕg0);
+    optprob = Optimization.OptimizationProblem(optf, ϕg0)
     #res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 600);
-    res = Optimization.solve(optprob, Adam(0.02), maxiters = 600);
+    res = Optimization.solve(optprob, Adam(0.02), maxiters = 600)
 
-    ϕg_opt1 = res.u;
+    ϕg_opt1 = res.u
     pred = loss_g(ϕg_opt1, xM, g)
     θMs_pred = pred[2]
     #scatterplot(vec(θMs_true), vec(θMs_pred))
@@ -91,12 +87,12 @@ end
 
 @testset "loss_gf" begin
     #----------- fit g and θP to y_o
-    g, ϕg0 = get_hybridcase_MLapplicator(prob, MLengine; scenario);
+    g, ϕg0 = get_hybridcase_MLapplicator(prob, MLengine; scenario)
     f = get_hybridcase_PBmodel(prob; scenario)
 
     int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector(
         ϕg = 1:length(ϕg0), θP = par_templates.θP))
-    p = p0 = vcat(ϕg0, par_templates.θP .* 0.8);  # slightly disturb θP_true
+    p = p0 = vcat(ϕg0, par_templates.θP .* 0.8)  # slightly disturb θP_true
 
     # Pass the site-data for the batches as separate vectors wrapped in a tuple
     train_loader = MLUtils.DataLoader((xM, xP, y_o), batchsize = n_batch)
@@ -109,8 +105,8 @@ end
     optprob = OptimizationProblem(optf, p0, train_loader)
 
     res = Optimization.solve(
-#        optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000);
-        optprob, Adam(0.02), maxiters = 1000);
+        #        optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000);
+        optprob, Adam(0.02), maxiters = 1000)
 
     l1, y_pred_global, y_pred, θMs_pred = loss_gf(res.u, train_loader.data...)
     @test isapprox(par_templates.θP, int_ϕθP(res.u).θP, rtol = 0.11)
diff --git a/test/test_doubleMM.jl b/test/test_doubleMM.jl
@@ -61,7 +61,7 @@ end
 end
 
 @testset "loss_gf" begin
-    #----------- fit g and θP to y_o
+    #----------- fit g and θP to y_o  (without transformations)
     g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario);
     f = get_hybridcase_PBmodel(case; scenario)
 
diff --git a/test/test_elbo.jl b/test/test_elbo.jl