EarthyScience
diff --git a/‎dev/doubleMM.jl‎
Lines changed: 35 additions & 65 deletions b/‎dev/doubleMM.jl‎
Lines changed: 35 additions & 65 deletions
diff --git a/‎ext/HybridVariationalInferenceFluxExt.jl‎
Lines changed: 12 additions & 10 deletions b/‎ext/HybridVariationalInferenceFluxExt.jl‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎ext/HybridVariationalInferenceLuxExt.jl‎
Lines changed: 9 additions & 8 deletions b/‎ext/HybridVariationalInferenceLuxExt.jl‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎ext/HybridVariationalInferenceSimpleChainsExt.jl‎
Lines changed: 6 additions & 12 deletions b/‎ext/HybridVariationalInferenceSimpleChainsExt.jl‎
Lines changed: 6 additions & 12 deletions
diff --git a/‎src/DoubleMM/f_doubleMM.jl‎
Lines changed: 11 additions & 4 deletions b/‎src/DoubleMM/f_doubleMM.jl‎
Lines changed: 11 additions & 4 deletions
@@ -26,32 +26,32 @@ par_templates = get_hybridcase_par_templates(case; scenario)
 
 (; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario)
 
-(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o
+(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, y_unc
 ) = gen_hybridcase_synthetic(case, rng; scenario);
 
 #----- fit g to θMs_true
 g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario);
+(; transP, transM) = get_hybridcase_transforms(case; scenario)
 
-function loss_g(ϕg, x, g)
+function loss_g(ϕg, x, g, transM)
     ζMs = g(x, ϕg) # predict the log of the parameters
-    θMs = exp.(ζMs)
+    θMs = reduce(hcat, map(transM, eachcol(ζMs))) # transform each column
     loss = sum(abs2, θMs .- θMs_true)
     return loss, θMs
 end
-loss_g(ϕg0, xM, g)
-Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0);
+loss_g(ϕg0, xM, g, transM)
 
-optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g)[1],
+optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g, transM)[1],
     Optimization.AutoZygote())
 optprob = Optimization.OptimizationProblem(optf, ϕg0);
 res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 800);
 
 ϕg_opt1 = res.u;
-loss_g(ϕg_opt1, xM, g)
-scatterplot(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2]))
-@test cor(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) > 0.9
+l1, θMs_pred = loss_g(ϕg_opt1, xM, g, transM)
+scatterplot(vec(θMs_true), vec(θMs_pred))
 
 f = get_hybridcase_PBmodel(case; scenario)
+py = get_hybridcase_neg_logden_obs(case; scenario)
 
 #----------- fit g and θP to y_o
 () -> begin
@@ -82,13 +82,12 @@ f = get_hybridcase_PBmodel(case; scenario)
 end
 
 #---------- HVI
-logσ2y = 2 .* log.(σ_o)
 n_MC = 3
-transP = elementwise(exp)
-transM = Stacked(elementwise(identity), elementwise(exp))
+(; transP, transM) = get_hybridcase_transforms(case; scenario)
+FT = get_hybridcase_float_type(case; scenario)
 
 (; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params(
-    θP_true, θMs_true[:, 1], ϕg_opt1, n_batch; transP = asℝ₊, transM = asℝ₊);
+    θP_true, θMs_true[:, 1], ϕg_opt1, n_batch; transP, transM);
 ϕ_true = ϕ
 
 () -> begin
@@ -149,49 +148,21 @@ transM = Stacked(elementwise(identity), elementwise(exp))
     ϕ_true = inverse_ca(trans_gu, ϕt_true)
 end
 
-ϕ_ini0 = ζ = vcat(ϕ_true[:μP] .* 0.0, ϕg0, ϕ_true[[:unc]]); # scratch
+ϕ_ini0 = ζ = reduce(
+    vcat, (
+        ϕ_true[[:μP]] .* FT(0.001), CA.ComponentVector(ϕg = ϕg0), ϕ_true[[:unc]])) # scratch
 #
-# true values
-ϕ_ini = ζ = vcat(ϕ_true[[:μP, :ϕg]] .* 1.2, ϕ_true[[:unc]]); # slight disturbance
+ϕ_ini = ζ = reduce(
+    vcat, (
+        ϕ_true[[:μP]] .- FT(0.1), ϕ_true[[:ϕg]] .* FT(1.1), ϕ_true[[:unc]])) # slight disturbance
 # hardcoded from HMC inversion
 ϕ_ini.unc.coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951]
 ϕ_ini.unc.logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893)
 mean_σ_o_MC = 0.006042
 
-# test cost function and gradient
-() -> begin
-    neg_elbo_transnorm_gf(rng, g, f, ϕ_true, y_o[:, 1:n_batch], xM[:, 1:n_batch],
-        transPMs_batch, map(get_concrete, interpreters);
-        n_MC = 8, logσ2y)
-    Zygote.gradient(
-        ϕ -> neg_elbo_transnorm_gf(
-            rng, g, f, ϕ, y_o[:, 1:n_batch], xM[:, 1:n_batch],
-            transPMs_batch, interpreters; n_MC = 8, logσ2y),
-        CA.getdata(ϕ_true))
-end
-
-# optimize using SimpleChains
-() -> begin
-    train_loader = MLUtils.DataLoader((xM, y_o), batchsize = n_batch)
-
-    optf = Optimization.OptimizationFunction(
-        (ϕ, data) -> begin
-            xM, y_o = data
-            neg_elbo_transnorm_gf(
-                rng, g, f, ϕ, y_o, xM, transPMs_batch,
-                map(get_concrete, interpreters_g); n_MC = 5, logσ2y)
-        end,
-        Optimization.AutoZygote())
-    optprob = Optimization.OptimizationProblem(optf, CA.getdata(ϕ_ini), train_loader)
-    res = Optimization.solve(
-        optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 800)
-    #optprob = Optimization.OptimizationProblem(optf, ϕ_ini0);
-    #res = Optimization.solve(optprob, Adam(0.02), callback=callback_loss(50), maxiters=1_400);
-end
-
-ϕ = ϕ_ini |> Flux.gpu;
+ϕ = CA.getdata(ϕ_ini) |> Flux.gpu;
 xM_gpu = xM |> Flux.gpu;
-g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case, FluxMLengine; scenario);
+g_flux, _ = get_hybridcase_MLapplicator(case, FluxMLengine; scenario);
 
 # otpimize using LUX
 () -> begin
@@ -216,27 +187,25 @@ g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case, FluxMLengine; scenario
     g_flux = g_luxs
 end
 
-function fcost(ϕ, xM, y_o)
-    neg_elbo_transnorm_gf(rng, g_flux, f, CA.getdata(ϕ), y_o,
-        xM, transPMs_batch, map(get_concrete, interpreters);
-        n_MC = 8, logσ2y = logσ2y)
+function fcost(ϕ, xM, y_o, y_unc)
+    neg_elbo_transnorm_gf(rng, g_flux, f, py, CA.getdata(ϕ), y_o, y_unc,
+        xM, xP, transPMs_batch, map(get_concrete, interpreters);
+        n_MC = 8)
 end
-fcost(ϕ, xM_gpu[:, 1:n_batch], y_o[:, 1:n_batch])
+fcost(ϕ, xM_gpu[:, 1:n_batch], y_o[:, 1:n_batch], y_unc[:, 1:n_batch])
 #Zygote.gradient(fcost, ϕ) |> cpu;
 gr = Zygote.gradient(fcost,
-    CA.getdata(ϕ), CA.getdata(xM_gpu[:, 1:n_batch]), CA.getdata(y_o[:, 1:n_batch]));
-gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ)...)
+    CA.getdata(ϕ), CA.getdata(xM_gpu[:, 1:n_batch]),
+    CA.getdata(y_o[:, 1:n_batch]), CA.getdata(y_unc[:, 1:n_batch]));
+gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ_ini)...)
 
-train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o), batchsize = n_batch)
-train_loader = get_hybridcase_train_dataloader(case, rng; scenario = (scenario..., :use_flux))
+train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o, y_unc), batchsize = n_batch)
+#train_loader = get_hybridcase_train_dataloader(case, rng; scenario = (scenario..., :use_flux))
 
 optf = Optimization.OptimizationFunction(
     (ϕ, data) -> begin
-        xM, y_o = data
-        fcost(ϕ, xM, y_o)
-        # neg_elbo_transnorm_gf(
-        #     rng, g_flux, f, ϕ, y_o, xM, transPMs_batch,
-        #     map(get_concrete, interpreters); n_MC = 5, logσ2y)
+        xM, xP, y_o, y_unc = data
+        fcost(ϕ, xM, y_o, y_unc)
     end,
     Optimization.AutoZygote())
 optprob = Optimization.OptimizationProblem(
@@ -256,7 +225,7 @@ end
 ζMs_VI = g_flux(xM_gpu, ζ_VIc.ϕg |> Flux.gpu) |> Flux.cpu
 ϕunc_VI = interpreters.unc(ζ_VIc.unc)
 
-hcat(θP_true, exp.(ζ_VIc.μP))
+hcat(log.(θP_true), ϕ_ini.μP, ζ_VIc.μP)
 plt = scatterplot(vec(θMs_true), vec(exp.(ζMs_VI)))
 #lineplot!(plt, 0.0, 1.1, identity)
 # 
@@ -266,11 +235,12 @@ hcat(ϕ_ini.unc, ϕunc_VI) # need to compare to MC sample
 # test predicting correct obs-uncertainty of predictive posterior
 n_sample_pred = 200
 
-y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, interpreters;
+y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, xP, interpreters;
     get_transPMs, get_ca_int_PMs, n_sample_pred);
 size(y_pred) # n_obs x n_site, n_sample_pred
 
 σ_o_post = dropdims(std(y_pred; dims = 3), dims = 3);
+σ_o = exp.(y_unc[:,1] / 2)
 
 #describe(σ_o_post)
 hcat(σ_o, fill(mean_σ_o_MC, length(σ_o)),
 
@@ -3,12 +3,14 @@ module HybridVariationalInferenceFluxExt
 using HybridVariationalInference, Flux
 using HybridVariationalInference: HybridVariationalInference as HVI
 using ComponentArrays: ComponentArrays as CA
+using Random
 
 struct FluxApplicator{RT} <: AbstractModelApplicator
     rebuild::RT
 end
 
-function HVI.construct_FluxApplicator(m::Chain)
+function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type::DataType)
+    # TODO: care fore rng and float_type
     ϕ, rebuild = destructure(m)
     FluxApplicator(rebuild), ϕ
 end
@@ -26,17 +28,17 @@ function __init__()
     HVI.set_default_GPUHandler(FluxGPUDataHandler())
 end
 
-function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Flux.Chain, 
-    args...; kwargs...)
-    # constructor with Flux.Chain
-    g, ϕg = construct_FluxApplicator(g_chain)
-    HybridProblem(θP, θM, g, ϕg, args...; kwargs...)
-end
+# function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Flux.Chain, 
+#     args...; kwargs...)
+#     # constructor with Flux.Chain
+#     g, ϕg = construct_FluxApplicator(g_chain)
+#     HybridProblem(θP, θM, g, ϕg, args...; kwargs...)
+# end
 
-function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{:Flux};
+function HVI.get_hybridcase_MLapplicator(rng::AbstractRNG, case::HVI.DoubleMM.DoubleMMCase, ::Val{:Flux};
         scenario::NTuple = ())
     (; n_covar, n_θM) = get_hybridcase_sizes(case; scenario)
-    FloatType = get_hybridcase_FloatType(case; scenario)
+    float_type = get_hybridcase_float_type(case; scenario)
     n_out = n_θM
     is_using_dropout = :use_dropout ∈ scenario
     is_using_dropout && error("dropout scenario not supported with Flux yet.")
@@ -47,7 +49,7 @@ function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{
         # dense layer without bias that maps to n outputs and `identity` activation
         Flux.Dense(n_covar * 4 => n_out, identity, bias = false)
     )
-    construct_FluxApplicator(g_chain)
+    construct_ChainsApplicator(rng, g_chain, float_type)
 end
 
 
 
@@ -10,8 +10,8 @@ struct LuxApplicator{MT, IT} <: AbstractModelApplicator
     int_ϕ::IT
 end
 
-function HVI.construct_LuxApplicator(m::Chain, float_type=Float32; device = gpu_device()) 
-    ps, st = Lux.setup(Random.default_rng(), m)
+function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type=Float32; device = gpu_device()) 
+    ps, st = Lux.setup(rng, m)
     ps_ca = float_type.(CA.ComponentArray(ps)) 
     st = st |> device
     stateful_layer = StatefulLuxLayer{true}(m, nothing, st)
@@ -25,11 +25,12 @@ function HVI.apply_model(app::LuxApplicator, x, ϕ)
     app.stateful_layer(x, ϕc)
 end
 
-function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Chain, 
-    args...; device = gpu_device(), kwargs...)
-    # constructor with SimpleChain
-    g, ϕg = construct_LuxApplicator(g_chain, eltype(θM); device)
-    HybridProblem(θP, θM, g, ϕg, args...; kwargs...)
-end
+# function HVI.HybridProblem(rng::AbstractRNG, 
+#     θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Chain, 
+#     args...; device = gpu_device(), kwargs...)
+#     # constructor with SimpleChain
+#     g, ϕg = construct_ChainsApplicator(rng, g_chain, eltype(θM); device)
+#     HybridProblem(θP, θM, g, ϕg, args...; kwargs...)
+# end
 
 end # module
@@ -4,31 +4,25 @@ using HybridVariationalInference, SimpleChains
 using HybridVariationalInference: HybridVariationalInference as HVI
 using StatsFuns: logistic
 using ComponentArrays: ComponentArrays as CA
+using Random
 
 
 
 struct SimpleChainsApplicator{MT} <: AbstractModelApplicator
     m::MT
 end
 
-function HVI.construct_SimpleChainsApplicator(m::SimpleChain, FloatType=Float32) 
-    ϕ = SimpleChains.init_params(m, FloatType);
+function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::SimpleChain, FloatType=Float32) 
+    ϕ = SimpleChains.init_params(m, FloatType; rng);
     SimpleChainsApplicator(m), ϕ
 end
 
 HVI.apply_model(app::SimpleChainsApplicator, x, ϕ) = app.m(x, ϕ)
 
-function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::SimpleChain, 
-    args...; kwargs...)
-    # constructor with SimpleChain
-    g, ϕg = construct_SimpleChainsApplicator(g_chain)
-    HybridProblem(θP, θM, g, ϕg, args...; kwargs...)
-end
-
-function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{:SimpleChains};
+function HVI.get_hybridcase_MLapplicator(rng::AbstractRNG, case::HVI.DoubleMM.DoubleMMCase, ::Val{:SimpleChains};
         scenario::NTuple=())
     (;n_covar, n_θM) = get_hybridcase_sizes(case; scenario)
-    FloatType = get_hybridcase_FloatType(case; scenario)
+    FloatType = get_hybridcase_float_type(case; scenario)
     n_out = n_θM
     is_using_dropout = :use_dropout ∈ scenario
     g_chain = if is_using_dropout
@@ -52,7 +46,7 @@ function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{
             TurboDense{false}(identity, n_out)
         )
     end
-    construct_SimpleChainsApplicator(g_chain, FloatType)
+    construct_ChainsApplicator(rng, g_chain, FloatType)
 end
 
 end # module
@@ -22,10 +22,14 @@ function HVI.get_hybridcase_par_templates(::DoubleMMCase; scenario::NTuple = ())
     (; θP, θM)
 end
 
-function HVI.get_hybridcase_transforms(::AbstractHybridCase; scenario::NTuple = ())
+function HVI.get_hybridcase_transforms(::DoubleMMCase; scenario::NTuple = ())
     (; transP, transM)
 end
 
+function HVI.get_hybridcase_neg_logden_obs(::DoubleMMCase; scenario::NTuple = ())
+    neg_logden_indep_normal
+end
+
 function HVI.get_hybridcase_sizes(::DoubleMMCase; scenario = ())
     n_covar_pc = 2
     n_covar = n_covar_pc + 3 # linear dependent
@@ -46,7 +50,7 @@ function HVI.get_hybridcase_PBmodel(::DoubleMMCase; scenario::NTuple = ())
     end
 end
 
-# function HVI.get_hybridcase_FloatType(::DoubleMMCase; scenario)
+# function HVI.get_hybridcase_float_type(::DoubleMMCase; scenario)
 #     return Float32
 # end
 
@@ -58,7 +62,7 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
     n_covar_pc = 2
     n_site = 200
     (; n_covar, n_θM, n_θP) = get_hybridcase_sizes(case; scenario)
-    FloatType = get_hybridcase_FloatType(case; scenario)
+    FloatType = get_hybridcase_float_type(case; scenario)
     xM, θMs_true0 = gen_cov_pred(rng, FloatType, n_covar_pc, n_covar, n_site, n_θM;
         rhodec = 8, is_using_dropout = false)
     int_θMs_sites = ComponentArrayInterpreter(θM, (n_site,))
@@ -68,6 +72,7 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
     xP = fill((;S1=xP_S1, S2=xP_S2), n_site)
     y_global_true, y_true = f(θP, θMs_true, xP)
     σ_o = FloatType(0.01)
+    logσ2_o = FloatType(2) .* log.(σ_o)
     #σ_o = 0.002
     y_global_o = y_global_true .+ randn(rng, FloatType, size(y_global_true)) .* σ_o
     y_o = y_true .+ randn(rng, FloatType, size(y_true)) .* σ_o
@@ -81,9 +86,11 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG;
         y_true,
         y_global_o,
         y_o,
-        σ_o = fill(σ_o, size(y_true,1)),
+        y_unc = fill(logσ2_o, size(y_o)),
     )
 end
 
 
 
+
+