diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ae44819..796183d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -49,7 +49,8 @@ jobs: with: files: lcov.info token: ${{ secrets.CODECOV_TOKEN }} - fail_ci_if_error: false + #fail_ci_if_error: false + fail_ci_if_error: true docs: name: Documentation runs-on: ubuntu-latest diff --git a/dev/doubleMM.jl b/dev/doubleMM.jl index 5cec624..cc70fa2 100644 --- a/dev/doubleMM.jl +++ b/dev/doubleMM.jl @@ -7,7 +7,7 @@ using Statistics using ComponentArrays: ComponentArrays as CA using SimpleChains -import Flux # to allow for FluxMLEngine and cpu() +import Flux using MLUtils import Zygote @@ -17,41 +17,43 @@ using Bijectors using UnicodePlots const case = DoubleMM.DoubleMMCase() -const MLengine = Val(nameof(SimpleChains)) -const FluxMLengine = Val(nameof(Flux)) scenario = (:default,) rng = StableRNG(111) par_templates = get_hybridcase_par_templates(case; scenario) -(; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) +#n_covar = get_hybridcase_n_covar(case; scenario) +#, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) + +(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, y_unc +) = gen_hybridcase_synthetic(rng, case; scenario); + +n_covar = size(xM,1) -(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o -) = gen_hybridcase_synthetic(case, rng; scenario); #----- fit g to θMs_true -g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario); +g, ϕg0 = get_hybridcase_MLapplicator(case; scenario); +(; transP, transM) = get_hybridcase_transforms(case; scenario) -function loss_g(ϕg, x, g) +function loss_g(ϕg, x, g, transM) ζMs = g(x, ϕg) # predict the log of the parameters - θMs = exp.(ζMs) + θMs = reduce(hcat, map(transM, eachcol(ζMs))) # transform each column loss = sum(abs2, θMs .- θMs_true) return loss, θMs end -loss_g(ϕg0, xM, g) -Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0); +loss_g(ϕg0, xM, g, transM) -optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g)[1], +optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g, transM)[1], Optimization.AutoZygote()) optprob = Optimization.OptimizationProblem(optf, ϕg0); res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 800); ϕg_opt1 = res.u; -loss_g(ϕg_opt1, xM, g) -scatterplot(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) -@test cor(vec(θMs_true), vec(loss_g(ϕg_opt1, xM, g)[2])) > 0.9 +l1, θMs_pred = loss_g(ϕg_opt1, xM, g, transM) +scatterplot(vec(θMs_true), vec(θMs_pred)) f = get_hybridcase_PBmodel(case; scenario) +py = get_hybridcase_neg_logden_obs(case; scenario) #----------- fit g and θP to y_o () -> begin @@ -62,7 +64,7 @@ f = get_hybridcase_PBmodel(case; scenario) p = p0 = vcat(ϕg0, par_templates.θP .* 0.9) # slightly disturb θP_true # Pass the site-data for the batches as separate vectors wrapped in a tuple - train_loader = MLUtils.DataLoader((xM, xP, y_o), batchsize = n_batch) + train_loader = MLUtils.DataLoader((xM, xP, y_o, y_unc), batchsize = n_batch) loss_gf = get_loss_gf(g, f, y_global_o, int_ϕθP) l1 = loss_gf(p0, train_loader.data...)[1] @@ -82,15 +84,16 @@ f = get_hybridcase_PBmodel(case; scenario) end #---------- HVI -logσ2y = 2 .* log.(σ_o) n_MC = 3 -transP = elementwise(exp) -transM = Stacked(elementwise(identity), elementwise(exp)) +(; transP, transM) = get_hybridcase_transforms(case; scenario) +FT = get_hybridcase_float_type(case; scenario) (; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params( - θP_true, θMs_true[:, 1], ϕg_opt1, n_batch; transP = asℝ₊, transM = asℝ₊); + θP_true, θMs_true[:, 1], ϕg_opt1, n_batch; transP, transM); ϕ_true = ϕ + + () -> begin coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) @@ -149,49 +152,22 @@ transM = Stacked(elementwise(identity), elementwise(exp)) ϕ_true = inverse_ca(trans_gu, ϕt_true) end -ϕ_ini0 = ζ = vcat(ϕ_true[:μP] .* 0.0, ϕg0, ϕ_true[[:unc]]); # scratch +ϕ_ini0 = ζ = reduce( + vcat, ( + ϕ_true[[:μP]] .* FT(0.001), CA.ComponentVector(ϕg = ϕg0), ϕ_true[[:unc]])) # scratch # -# true values -ϕ_ini = ζ = vcat(ϕ_true[[:μP, :ϕg]] .* 1.2, ϕ_true[[:unc]]); # slight disturbance +ϕ_ini = ζ = reduce( + vcat, ( + ϕ_true[[:μP]] .- FT(0.1), ϕ_true[[:ϕg]] .* FT(1.1), ϕ_true[[:unc]])) # slight disturbance # hardcoded from HMC inversion ϕ_ini.unc.coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] ϕ_ini.unc.logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) mean_σ_o_MC = 0.006042 -# test cost function and gradient -() -> begin - neg_elbo_transnorm_gf(rng, g, f, ϕ_true, y_o[:, 1:n_batch], xM[:, 1:n_batch], - transPMs_batch, map(get_concrete, interpreters); - n_MC = 8, logσ2y) - Zygote.gradient( - ϕ -> neg_elbo_transnorm_gf( - rng, g, f, ϕ, y_o[:, 1:n_batch], xM[:, 1:n_batch], - transPMs_batch, interpreters; n_MC = 8, logσ2y), - CA.getdata(ϕ_true)) -end - -# optimize using SimpleChains -() -> begin - train_loader = MLUtils.DataLoader((xM, y_o), batchsize = n_batch) - - optf = Optimization.OptimizationFunction( - (ϕ, data) -> begin - xM, y_o = data - neg_elbo_transnorm_gf( - rng, g, f, ϕ, y_o, xM, transPMs_batch, - map(get_concrete, interpreters_g); n_MC = 5, logσ2y) - end, - Optimization.AutoZygote()) - optprob = Optimization.OptimizationProblem(optf, CA.getdata(ϕ_ini), train_loader) - res = Optimization.solve( - optprob, Optimisers.Adam(0.02), callback = callback_loss(50), maxiters = 800) - #optprob = Optimization.OptimizationProblem(optf, ϕ_ini0); - #res = Optimization.solve(optprob, Adam(0.02), callback=callback_loss(50), maxiters=1_400); -end - -ϕ = ϕ_ini |> Flux.gpu; +ϕ = CA.getdata(ϕ_ini) |> Flux.gpu; xM_gpu = xM |> Flux.gpu; -g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case, FluxMLengine; scenario); +scenario_flux = (scenario..., :use_Flux) +g_flux, _ = get_hybridcase_MLapplicator(case; scenario = scenario_flux); # otpimize using LUX () -> begin @@ -216,27 +192,25 @@ g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case, FluxMLengine; scenario g_flux = g_luxs end -function fcost(ϕ, xM, y_o) - neg_elbo_transnorm_gf(rng, g_flux, f, CA.getdata(ϕ), y_o, - xM, transPMs_batch, map(get_concrete, interpreters); - n_MC = 8, logσ2y = logσ2y) +function fcost(ϕ, xM, y_o, y_unc) + neg_elbo_transnorm_gf(rng, CA.getdata(ϕ), g_flux, transPMs_batch, f, py, + xM, xP, y_o, y_unc, map(get_concrete, interpreters); + n_MC = 8) end -fcost(ϕ, xM_gpu[:, 1:n_batch], y_o[:, 1:n_batch]) +fcost(ϕ, xM_gpu[:, 1:n_batch], y_o[:, 1:n_batch], y_unc[:, 1:n_batch]) #Zygote.gradient(fcost, ϕ) |> cpu; gr = Zygote.gradient(fcost, - CA.getdata(ϕ), CA.getdata(xM_gpu[:, 1:n_batch]), CA.getdata(y_o[:, 1:n_batch])); -gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ)...) + CA.getdata(ϕ), CA.getdata(xM_gpu[:, 1:n_batch]), + CA.getdata(y_o[:, 1:n_batch]), CA.getdata(y_unc[:, 1:n_batch])); +gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ_ini)...) -train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o), batchsize = n_batch) -train_loader = get_hybridcase_train_dataloader(case, rng; scenario = (scenario..., :use_flux)) +train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o, y_unc), batchsize = n_batch) +#train_loader = get_hybridcase_train_dataloader(case, rng; scenario = (scenario..., :use_Flux)) optf = Optimization.OptimizationFunction( (ϕ, data) -> begin - xM, y_o = data - fcost(ϕ, xM, y_o) - # neg_elbo_transnorm_gf( - # rng, g_flux, f, ϕ, y_o, xM, transPMs_batch, - # map(get_concrete, interpreters); n_MC = 5, logσ2y) + xM, xP, y_o, y_unc = data + fcost(ϕ, xM, y_o, y_unc) end, Optimization.AutoZygote()) optprob = Optimization.OptimizationProblem( @@ -256,7 +230,7 @@ end ζMs_VI = g_flux(xM_gpu, ζ_VIc.ϕg |> Flux.gpu) |> Flux.cpu ϕunc_VI = interpreters.unc(ζ_VIc.unc) -hcat(θP_true, exp.(ζ_VIc.μP)) +hcat(log.(θP_true), ϕ_ini.μP, ζ_VIc.μP) plt = scatterplot(vec(θMs_true), vec(exp.(ζMs_VI))) #lineplot!(plt, 0.0, 1.1, identity) # @@ -266,11 +240,12 @@ hcat(ϕ_ini.unc, ϕunc_VI) # need to compare to MC sample # test predicting correct obs-uncertainty of predictive posterior n_sample_pred = 200 -y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, interpreters; +y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, xP, interpreters; get_transPMs, get_ca_int_PMs, n_sample_pred); size(y_pred) # n_obs x n_site, n_sample_pred σ_o_post = dropdims(std(y_pred; dims = 3), dims = 3); +σ_o = exp.(y_unc[:,1] / 2) #describe(σ_o_post) hcat(σ_o, fill(mean_σ_o_MC, length(σ_o)), @@ -282,7 +257,7 @@ histogram(vec(mean_y_pred - y_true)) # predictions centered around y_o (or y_tru # look at θP, θM1 of first site intm_PMs_gen = get_ca_int_PMs(n_site) -ζs, _σ = HVI.generate_ζ(rng, g_flux, f, res.u, xM_gpu, +ζs, _σ = HVI.generate_ζ(rng, g_flux, res.u, xM_gpu, (; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred); ζs = ζs |> Flux.cpu; θPM = vcat(θP_true, θMs_true[:, 1]) diff --git a/ext/HybridVariationalInferenceFluxExt.jl b/ext/HybridVariationalInferenceFluxExt.jl index 1d639bb..02b9fa4 100644 --- a/ext/HybridVariationalInferenceFluxExt.jl +++ b/ext/HybridVariationalInferenceFluxExt.jl @@ -3,12 +3,14 @@ module HybridVariationalInferenceFluxExt using HybridVariationalInference, Flux using HybridVariationalInference: HybridVariationalInference as HVI using ComponentArrays: ComponentArrays as CA +using Random struct FluxApplicator{RT} <: AbstractModelApplicator rebuild::RT end -function HVI.construct_FluxApplicator(m::Chain) +function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type::DataType) + # TODO: care fore rng and float_type ϕ, rebuild = destructure(m) FluxApplicator(rebuild), ϕ end @@ -26,18 +28,21 @@ function __init__() HVI.set_default_GPUHandler(FluxGPUDataHandler()) end -function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Flux.Chain, - args...; kwargs...) - # constructor with Flux.Chain - g, ϕg = construct_FluxApplicator(g_chain) - HybridProblem(θP, θM, g, ϕg, args...; kwargs...) -end +# function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Flux.Chain, +# args...; kwargs...) +# # constructor with Flux.Chain +# g, ϕg = construct_FluxApplicator(g_chain) +# HybridProblem(θP, θM, g, ϕg, args...; kwargs...) +# end -function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{:Flux}; +function HVI.construct_3layer_MLApplicator( + rng::AbstractRNG, case::HVI.AbstractHybridCase, ::Val{:Flux}; scenario::NTuple = ()) - (; n_covar, n_θM) = get_hybridcase_sizes(case; scenario) - FloatType = get_hybridcase_FloatType(case; scenario) - n_out = n_θM + (;θM) = get_hybridcase_par_templates(case; scenario) + n_out = length(θM) + n_covar = get_hybridcase_n_covar(case; scenario) + #(; n_covar, n_θM) = get_hybridcase_sizes(case; scenario) + float_type = get_hybridcase_float_type(case; scenario) is_using_dropout = :use_dropout ∈ scenario is_using_dropout && error("dropout scenario not supported with Flux yet.") g_chain = Flux.Chain( @@ -47,7 +52,7 @@ function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{ # dense layer without bias that maps to n outputs and `identity` activation Flux.Dense(n_covar * 4 => n_out, identity, bias = false) ) - construct_FluxApplicator(g_chain) + construct_ChainsApplicator(rng, g_chain, float_type) end diff --git a/ext/HybridVariationalInferenceLuxExt.jl b/ext/HybridVariationalInferenceLuxExt.jl index bfcf6cb..0b54fe5 100644 --- a/ext/HybridVariationalInferenceLuxExt.jl +++ b/ext/HybridVariationalInferenceLuxExt.jl @@ -10,8 +10,8 @@ struct LuxApplicator{MT, IT} <: AbstractModelApplicator int_ϕ::IT end -function HVI.construct_LuxApplicator(m::Chain, float_type=Float32; device = gpu_device()) - ps, st = Lux.setup(Random.default_rng(), m) +function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::Chain, float_type=Float32; device = gpu_device()) + ps, st = Lux.setup(rng, m) ps_ca = float_type.(CA.ComponentArray(ps)) st = st |> device stateful_layer = StatefulLuxLayer{true}(m, nothing, st) @@ -25,11 +25,12 @@ function HVI.apply_model(app::LuxApplicator, x, ϕ) app.stateful_layer(x, ϕc) end -function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Chain, - args...; device = gpu_device(), kwargs...) - # constructor with SimpleChain - g, ϕg = construct_LuxApplicator(g_chain, eltype(θM); device) - HybridProblem(θP, θM, g, ϕg, args...; kwargs...) -end +# function HVI.HybridProblem(rng::AbstractRNG, +# θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::Chain, +# args...; device = gpu_device(), kwargs...) +# # constructor with SimpleChain +# g, ϕg = construct_ChainsApplicator(rng, g_chain, eltype(θM); device) +# HybridProblem(θP, θM, g, ϕg, args...; kwargs...) +# end end # module diff --git a/ext/HybridVariationalInferenceSimpleChainsExt.jl b/ext/HybridVariationalInferenceSimpleChainsExt.jl index f95caa9..df1a122 100644 --- a/ext/HybridVariationalInferenceSimpleChainsExt.jl +++ b/ext/HybridVariationalInferenceSimpleChainsExt.jl @@ -4,6 +4,7 @@ using HybridVariationalInference, SimpleChains using HybridVariationalInference: HybridVariationalInference as HVI using StatsFuns: logistic using ComponentArrays: ComponentArrays as CA +using Random @@ -11,25 +12,20 @@ struct SimpleChainsApplicator{MT} <: AbstractModelApplicator m::MT end -function HVI.construct_SimpleChainsApplicator(m::SimpleChain, FloatType=Float32) - ϕ = SimpleChains.init_params(m, FloatType); +function HVI.construct_ChainsApplicator(rng::AbstractRNG, m::SimpleChain, FloatType=Float32) + ϕ = SimpleChains.init_params(m, FloatType; rng); SimpleChainsApplicator(m), ϕ end HVI.apply_model(app::SimpleChainsApplicator, x, ϕ) = app.m(x, ϕ) -function HVI.HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, g_chain::SimpleChain, - args...; kwargs...) - # constructor with SimpleChain - g, ϕg = construct_SimpleChainsApplicator(g_chain) - HybridProblem(θP, θM, g, ϕg, args...; kwargs...) -end - -function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{:SimpleChains}; - scenario::NTuple=()) - (;n_covar, n_θM) = get_hybridcase_sizes(case; scenario) - FloatType = get_hybridcase_FloatType(case; scenario) - n_out = n_θM +function HVI.construct_3layer_MLApplicator( + rng::AbstractRNG, case::HVI.AbstractHybridCase, ::Val{:SimpleChains}; + scenario::NTuple = ()) + n_covar = get_hybridcase_n_covar(case; scenario) + FloatType = get_hybridcase_float_type(case; scenario) + (;θM) = get_hybridcase_par_templates(case; scenario) + n_out = length(θM) is_using_dropout = :use_dropout ∈ scenario g_chain = if is_using_dropout SimpleChain( @@ -52,7 +48,7 @@ function HVI.get_hybridcase_MLapplicator(case::HVI.DoubleMM.DoubleMMCase, ::Val{ TurboDense{false}(identity, n_out) ) end - construct_SimpleChainsApplicator(g_chain, FloatType) + construct_ChainsApplicator(rng, g_chain, FloatType) end end # module diff --git a/src/DoubleMM/f_doubleMM.jl b/src/DoubleMM/f_doubleMM.jl index c69680d..ee6c21f 100644 --- a/src/DoubleMM/f_doubleMM.jl +++ b/src/DoubleMM/f_doubleMM.jl @@ -22,21 +22,25 @@ function HVI.get_hybridcase_par_templates(::DoubleMMCase; scenario::NTuple = ()) (; θP, θM) end -function HVI.get_hybridcase_transforms(::AbstractHybridCase; scenario::NTuple = ()) +function HVI.get_hybridcase_transforms(::DoubleMMCase; scenario::NTuple = ()) (; transP, transM) end -function HVI.get_hybridcase_sizes(::DoubleMMCase; scenario = ()) - n_covar_pc = 2 - n_covar = n_covar_pc + 3 # linear dependent - #n_site = 10^n_covar_pc - n_batch = 10 - n_θM = length(θM) - n_θP = length(θP) - #(; n_covar, n_site, n_batch, n_θM, n_θP) - (; n_covar, n_batch, n_θM, n_θP) +function HVI.get_hybridcase_neg_logden_obs(::DoubleMMCase; scenario::NTuple = ()) + neg_logden_indep_normal end +# function HVI.get_hybridcase_sizes(::DoubleMMCase; scenario = ()) +# n_covar_pc = 2 +# n_covar = n_covar_pc + 3 # linear dependent +# #n_site = 10^n_covar_pc +# n_batch = 10 +# n_θM = length(θM) +# n_θP = length(θP) +# #(; n_covar, n_site, n_batch, n_θM, n_θP) +# (; n_covar, n_batch, n_θM, n_θP) +# end + function HVI.get_hybridcase_PBmodel(::DoubleMMCase; scenario::NTuple = ()) #fsite = (θ, x_site) -> f_doubleMM(θ) # omit x_site drivers function f_doubleMM_with_global(θP::AbstractVector, θMs::AbstractMatrix, x) @@ -46,19 +50,20 @@ function HVI.get_hybridcase_PBmodel(::DoubleMMCase; scenario::NTuple = ()) end end -# function HVI.get_hybridcase_FloatType(::DoubleMMCase; scenario) +# function HVI.get_hybridcase_float_type(::DoubleMMCase; scenario) # return Float32 # end const xP_S1 = Float32[1.0, 1.0, 1.0, 1.0, 0.4, 0.3, 0.1] const xP_S2 = Float32[1.0, 3.0, 4.0, 5.0, 5.0, 5.0, 5.0] -function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG; +function HVI.gen_hybridcase_synthetic(rng::AbstractRNG, case::DoubleMMCase; scenario = ()) n_covar_pc = 2 n_site = 200 - (; n_covar, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) - FloatType = get_hybridcase_FloatType(case; scenario) + n_covar = 5 + n_θM = length(θM) + FloatType = get_hybridcase_float_type(case; scenario) xM, θMs_true0 = gen_cov_pred(rng, FloatType, n_covar_pc, n_covar, n_site, n_θM; rhodec = 8, is_using_dropout = false) int_θMs_sites = ComponentArrayInterpreter(θM, (n_site,)) @@ -68,6 +73,7 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG; xP = fill((;S1=xP_S1, S2=xP_S2), n_site) y_global_true, y_true = f(θP, θMs_true, xP) σ_o = FloatType(0.01) + logσ2_o = FloatType(2) .* log.(σ_o) #σ_o = 0.002 y_global_o = y_global_true .+ randn(rng, FloatType, size(y_global_true)) .* σ_o y_o = y_true .+ randn(rng, FloatType, size(y_true)) .* σ_o @@ -81,9 +87,18 @@ function HVI.gen_hybridcase_synthetic(case::DoubleMMCase, rng::AbstractRNG; y_true, y_global_o, y_o, - σ_o = fill(σ_o, size(y_true,1)), + y_unc = fill(logσ2_o, size(y_o)), ) end +function HVI.get_hybridcase_MLapplicator( + rng::AbstractRNG, case::HVI.DoubleMM.DoubleMMCase; scenario = ()) + ml_engine = select_ml_engine(; scenario) + construct_3layer_MLApplicator(rng, case, ml_engine; scenario) +end + + + + diff --git a/src/HybridProblem.jl b/src/HybridProblem.jl index 65c48c6..9f89801 100644 --- a/src/HybridProblem.jl +++ b/src/HybridProblem.jl @@ -1,53 +1,72 @@ struct HybridProblem <: AbstractHybridCase θP θM - transP - transM - n_covar - n_batch f g ϕg + py + transP + transM + cor_starts # = (P=(1,),M=(1,)) train_loader # inner constructor to constrain the types function HybridProblem( θP::CA.ComponentVector, θM::CA.ComponentVector, - g::AbstractModelApplicator, ϕg, + g::AbstractModelApplicator, ϕg::AbstractVector, f::Function, + py::Function, transM::Union{Function, Bijectors.Transform}, transP::Union{Function, Bijectors.Transform}, - n_covar::Integer, n_batch::Integer, - train_loader::DataLoader) - new(θP, θM, transM, transP, n_covar, n_batch, f, g, ϕg, train_loader) + train_loader::DataLoader, + cor_starts::NamedTuple = (P=(1,), M=(1,))) + new(θP, θM, f, g, ϕg, py, transM, transP, cor_starts, train_loader) end end +function HybridProblem(θP::CA.ComponentVector, θM::CA.ComponentVector, + # note no ϕg argument and g_chain unconstrained + g_chain, f::Function, + args...; rng = Random.default_rng(), kwargs...) + # dispatches on type of g_chain + g, ϕg = construct_ChainsApplicator(rng, g_chain, eltype(θM)) + HybridProblem(θP, θM, g, ϕg, f, args...; kwargs...) +end + function get_hybridcase_par_templates(prob::HybridProblem; scenario::NTuple = ()) (; θP = prob.θP, θM = prob.θM) end -function get_hybridcase_sizes(prob::HybridProblem; scenario::NTuple = ()) - n_θM = length(prob.θM) - n_θP = length(prob.θP) - (; n_covar=prob.n_covar, n_batch=prob.n_batch, n_θM, n_θP) +function get_hybridcase_neg_logden_obs(prob::HybridProblem; scenario::NTuple = ()) + prob.py end +function get_hybridcase_transforms(prob::HybridProblem; scenario::NTuple = ()) + (; transP = prob.transP, transM = prob.transM) +end + +# function get_hybridcase_sizes(prob::HybridProblem; scenario::NTuple = ()) +# n_θM = length(prob.θM) +# n_θP = length(prob.θP) +# (; n_covar=prob.n_covar, n_batch=prob.n_batch, n_θM, n_θP) +# end + function get_hybridcase_PBmodel(prob::HybridProblem; scenario::NTuple = ()) prob.f end -function get_hybridcase_MLapplicator(prob::HybridProblem, ml_engine; scenario::NTuple = ()); +function get_hybridcase_MLapplicator(prob::HybridProblem; scenario::NTuple = ()); prob.g, prob.ϕg end -function get_hybridcase_train_dataloader( - prob::HybridProblem, rng::AbstractRNG = Random.default_rng(); - scenario = ()) +function get_hybridcase_train_dataloader(rng::AbstractRNG, prob::HybridProblem; scenario = ()) return(prob.train_loader) end +function get_hybridcase_cor_starts(prob::HybridProblem; scenario = ()) + prob.cor_starts +end -# function get_hybridcase_FloatType(prob::HybridProblem; scenario::NTuple = ()) +# function get_hybridcase_float_type(prob::HybridProblem; scenario::NTuple = ()) # eltype(prob.θM) # end diff --git a/src/HybridVariationalInference.jl b/src/HybridVariationalInference.jl index a156030..6a56427 100644 --- a/src/HybridVariationalInference.jl +++ b/src/HybridVariationalInference.jl @@ -16,15 +16,18 @@ using MLUtils # dataloader export ComponentArrayInterpreter, flatten1, get_concrete include("ComponentArrayInterpreter.jl") -export AbstractModelApplicator, construct_SimpleChainsApplicator, construct_FluxApplicator, - construct_LuxApplicator +export AbstractModelApplicator, construct_ChainsApplicator +export construct_3layer_MLApplicator, select_ml_engine include("ModelApplicator.jl") export AbstractGPUDataHandler, NullGPUDataHandler, get_default_GPUHandler include("GPUDataHandler.jl") -export AbstractHybridCase, get_hybridcase_MLapplicator, get_hybridcase_PBmodel, get_hybridcase_sizes, get_hybridcase_FloatType, gen_hybridcase_synthetic, +export AbstractHybridCase, get_hybridcase_MLapplicator, get_hybridcase_PBmodel, + get_hybridcase_float_type, gen_hybridcase_synthetic, get_hybridcase_par_templates, get_hybridcase_transforms, get_hybridcase_train_dataloader, + get_hybridcase_neg_logden_obs, + get_hybridcase_n_covar, gen_cov_pred include("hybrid_case.jl") @@ -43,7 +46,7 @@ include("util_opt.jl") export neg_logden_indep_normal, entropy_MvNormal include("logden_normal.jl") -#export - all internal +export get_ca_starts include("cholesky.jl") export neg_elbo_transnorm_gf, predict_gf diff --git a/src/ModelApplicator.jl b/src/ModelApplicator.jl index 1ada30e..de14697 100644 --- a/src/ModelApplicator.jl +++ b/src/ModelApplicator.jl @@ -22,8 +22,45 @@ function apply_model end (app::AbstractModelApplicator)(x, ϕ) = apply_model(app, x, ϕ) -function construct_SimpleChainsApplicator end -function construct_FluxApplicator end -function construct_LuxApplicator end +""" + construct_ChainsApplicator([rng::AbstractRNG,] chain, float_type) +""" +function construct_ChainsApplicator end + +function construct_ChainsApplicator(chain, float_type::DataType; kwargs...) + construct_ChainsApplicator(Random.default_rng(), chain, float_type; kwargs...) +end + +# function construct_SimpleChainsApplicator end +# function construct_FluxApplicator end +# function construct_LuxApplicator end + + +""" + construct_3layer_MLApplicator( + rng::AbstractRNG, case::HVI.AbstractHybridCase, ; + scenario::NTuple = ()) + +`ml_engine` usually is of type `Val{Symbol}`, e.g. Val(:Flux). See `select_ml_engine`. +""" +function construct_3layer_MLApplicator end +""" + select_ml_engine(;scenario) + +Returns a value type `Val{:Symbol}` to dispatch on the machine learning engine to use. +- defaults to `Val(:SimpleChains)` +- `:use_Lux ∈ scenario -> Val(:Lux)` +- `:use_Flux ∈ scenario -> Val(:Flux)` +""" +function select_ml_engine(;scenario) + if :use_Lux ∈ scenario + return Val(:Lux) + elseif :use_Flux ∈ scenario + return Val(:Flux) + else + # default + return Val(:SimpleChains) + end +end diff --git a/src/cholesky.jl b/src/cholesky.jl index 5432d3a..3e3efb9 100644 --- a/src/cholesky.jl +++ b/src/cholesky.jl @@ -252,6 +252,55 @@ function transformU_cholesky1(v::GPUArraysCore.AbstractGPUVector; n=invsumn(leng return U end +# function transformU_block_cholesky1(v::CA.ComponentVector; +# ns=(invsumn(length(v[k])) + 1 for k in keys(v)) # may pass for efficiency +# ) +# blocks = [transformU_cholesky1(v[k]; n) for (k, n) in zip(keys(v), ns)] +# U = _create_blockdiag(v[first(keys(v))], blocks) # v only for dispatch: plain matrix for gpu +# end + + +""" + get_ca_starts(vc::ComponentVector) + +Return a tuple with starting positions of components in vc. +Useful for providing information on correlactions among subranges in a vector. +""" +function get_ca_starts(vc::CA.ComponentVector) + (1, (1 .+ cumsum((length(vc[k]) for k in front(keys(vc)))))...) +end +"omit the last n elements of an iterator" +front(itr, n=1) = Iterators.take(itr, length(itr)-n) + +""" + transformU_block_cholesky1(v::AbstractVector, cor_starts = (1,)) + +Transform a parameterization v of a blockdiagonal of upper triangular matrices +into the this matrix. +`cor_starts` is a NTuple of Integeres specifying the first column of each block. +E.g. For a matrix with a 3x3, a 2x2, and another block, +the blocks start at columns (1,4,6). It defaults to a single entire block. +""" +function transformU_block_cholesky1(v::AbstractVector, cor_starts = (1,)) + cor_starts_end = (cor_starts..., length(v)+1) + ranges = ChainRulesCore.@ignore_derivatives ( + cor_starts_end[i]:(cor_starts_end[i+1]-1) for i in 1:length(cor_starts)) + blocks = [transformU_cholesky1(v[r]) for r in ranges] + U = _create_blockdiag(v, blocks) # v only for dispatch: plain matrix for gpu + return(U) +end + +function _create_blockdiag(::AbstractArray, blocks) + BlockDiagonal(blocks) +end + +function _create_blockdiag(::GPUArraysCore.AbstractGPUArray, blocks) + # impose no special structure + cat(blocks...; dims=(1, 2)) +end + + + () -> begin tmp = sqrt.(sum(abs2, U_scaled, dims=1)) tmp2 = sum(abs2, U_scaled, dims=1) .^ (-1 / 2) diff --git a/src/elbo.jl b/src/elbo.jl index e0cb2f9..3ac23b7 100644 --- a/src/elbo.jl +++ b/src/elbo.jl @@ -5,31 +5,39 @@ It generates n_MC samples for each site, and uses these to compute the expected value of the likelihood of observations. ## Arguments -- rng: random number generator (ignored on CUDA, if ϕ is a AbstractGPUArray) -- g: machine learning model -- f: mechanistic model -- ϕ: flat vector of parameters - including parameter of f (ϕ_P), of g (ϕ_Ms), and of VI (ϕ_unc), +- `rng`: random number generator (ignored on CUDA, if ϕ is a AbstractGPUArray) +- `ϕ`: flat vector of parameters, interpreted by interpreters interpreted by interpreters.μP_ϕg_unc and interpreters.PMs -- y_ob: matrix of observations (n_obs x n_site_batch) -- xM: matrix of covariates (n_cov x n_site_batch) -- xP: model drivers, iterable of (n_site_batch) -- transPMs: Transformations as generated by get_transPMs returned from init_hybrid_params -- n_MC: number of MonteCarlo samples from the distribution of parameters to simulate +- `g`: machine learning model +- `transPMs`: Transformations as generated by get_transPMs returned from init_hybrid_params +- `f`: mechanistic model +- `py`: negative log-likelihood of observations given predictions: + `function(y_ob, y_pred, y_unc)` +- `xM`: matrix of covariates (n_cov x n_site_batch) +- `xP`: model drivers, iterable of (n_site_batch) +- `y_ob`: matrix of observations (n_obs x n_site_batch) +- `y_unc`: observation uncertainty provided to py (same size as y_ob) +- `interpreters`: NamedTuple as generated by `gen_hybridcase_synthetic` with entries: + - `μP_ϕg_unc`: extract components of parameter of + 1) means of global PBM, 2) ML-weights, and 3) additional parameters of approximation q + - `PMs`: assign components to PBM parameters 1 global, 2 matrix of n_site column vectors + - `int_unc` (can be omitted, if `μP_ϕg_unc(ϕ).unc` is already a ComponentVector) +- `n_MC`: number of MonteCarlo samples from the distribution of parameters to simulate using the mechanistic model f. -- logσ2y: observation uncertainty (log of the variance) """ -function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, xM::AbstractMatrix, - xP, transPMs, interpreters::NamedTuple; - n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler(), - entropyN = 0.0, +function neg_elbo_transnorm_gf(rng, ϕ::AbstractVector, g, transPMs, f, py, + xM::AbstractMatrix, xP, y_ob, y_unc, + interpreters::NamedTuple; + n_MC=3, gpu_data_handler = get_default_GPUHandler(), + cor_starts=(P=(1,),M=(1,)) ) - ζs, σ = generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC) + ζs, σ = generate_ζ(rng, g, ϕ, xM, interpreters; n_MC, cor_starts) ζs_cpu = gpu_data_handler(ζs) # differentiable fetch to CPU in Flux package extension #ζi = first(eachcol(ζs_cpu)) nLy = reduce(+, map(eachcol(ζs_cpu)) do ζi y_pred_i, logjac = predict_y(ζi, xP, f, transPMs, interpreters.PMs) - nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y) + #nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, y_unc) + nLy1 = py(y_ob, y_pred_i, y_unc) nLy1 - logjac end) / n_MC #sum_log_σ = sum(log.(σ)) @@ -48,13 +56,14 @@ Prediction function for hybrid model. Returns an Array `(n_obs, n_site, n_sample """ function predict_gf(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, xP, interpreters; get_transPMs, get_ca_int_PMs, n_sample_pred=200, - gpu_data_handler=get_default_GPUHandler()) + gpu_data_handler=get_default_GPUHandler(), + cor_starts=(P=(1,),M=(1,))) n_site = size(xM, 2) intm_PMs_gen = get_ca_int_PMs(n_site) trans_PMs_gen = get_transPMs(n_site) interpreters_gen = (; interpreters..., PMs = intm_PMs_gen) - ζs, _ = generate_ζ(rng, g, f, CA.getdata(ϕ), CA.getdata(xM), - interpreters_gen; n_MC = n_sample_pred) + ζs, _ = generate_ζ(rng, g, CA.getdata(ϕ), CA.getdata(xM), + interpreters_gen; n_MC = n_sample_pred, cor_starts) ζs_cpu = gpu_data_handler(ζs) # y_pred = stack(map(ζ -> first(predict_y( ζ, xP, f, trans_PMs_gen, interpreters_gen.PMs)), eachcol(ζs_cpu))); @@ -69,14 +78,14 @@ Adds the MV-normally distributed residuals, retrieved by `sample_ζ_norm0` to the means extracted from parameters and predicted by the machine learning model. """ -function generate_ζ(rng, g, f, ϕ::AbstractVector, xM::AbstractMatrix, - interpreters::NamedTuple; n_MC=3) +function generate_ζ(rng, g, ϕ::AbstractVector, xM::AbstractMatrix, + interpreters::NamedTuple; n_MC=3, cor_starts=(P=(1,),M=(1,))) # see documentation of neg_elbo_transnorm_gf ϕc = interpreters.μP_ϕg_unc(CA.getdata(ϕ)) μ_ζP = ϕc.μP ϕg = ϕc.ϕg μ_ζMs0 = g(xM, ϕg) # TODO provide μ_ζP to g - ζ_resid, σ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC) + ζ_resid, σ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC, cor_starts) #ζ_resid, σ = sample_ζ_norm0(rng, ϕ[1:2], reshape(ϕ[2 .+ (1:20)],2,:), ϕ[(end-length(interpreters.unc)+1):end], interpreters.unc; n_MC) ζ = stack(map(eachcol(ζ_resid)) do r rc = interpreters.PMs(r) @@ -92,27 +101,25 @@ end Extract relevant parameters from θ and return n_MC generated draws together with the vector of standard deviations, σ. -Necessary typestable information on number of compponents are provided with -ComponentMarshellers -- marsh_pmu(n_θP, n_θMs, Unc=n_θUnc) -- marsh_batch(n_batch) -- marsh_unc(n_UncP, n_UncM, n_UncCorr) +## Arguments +`int_unc`: Interpret vector as ComponentVector with components + ρsP, ρsM, logσ2_logP, coef_logσ2_logMs(intercept + slope), """ -function sample_ζ_norm0(rng::Random.AbstractRNG, ζP::AbstractVector, ζMs::AbstractMatrix, ϕunc::AbstractVector, args...; - n_MC=3) +function sample_ζ_norm0(rng::Random.AbstractRNG, ζP::AbstractVector, ζMs::AbstractMatrix, + args...; n_MC, cor_starts) n_θP, n_θMs = length(ζP), length(ζMs) urand = _create_random(rng, CA.getdata(ζP), n_θP + n_θMs, n_MC) - sample_ζ_norm0(urand, ζP, ζMs, ϕunc, args...) + sample_ζ_norm0(urand, ζP, ζMs, args...; cor_starts) end function sample_ζ_norm0(urand::AbstractMatrix, ζP::AbstractVector{T}, ζMs::AbstractMatrix, - ϕunc::AbstractVector, int_unc = ComponentArrayInterpreter(ϕunc); + ϕunc::AbstractVector, int_unc = ComponentArrayInterpreter(ϕunc); cor_starts ) where {T} ϕuncc = int_unc(CA.getdata(ϕunc)) n_θP, n_θMs, (n_θM, n_batch) = length(ζP), length(ζMs), size(ζMs) # make sure to not create a UpperTriangular Matrix of an CuArray in transformU_cholesky1 - UP = transformU_cholesky1(ϕuncc.ρsP) - UM = transformU_cholesky1(ϕuncc.ρsM) + UP = transformU_block_cholesky1(ϕuncc.ρsP, cor_starts.P) + UM = transformU_block_cholesky1(ϕuncc.ρsM, cor_starts.M) cf = ϕuncc.coef_logσ2_logMs logσ2_logMs = vec(cf[1, :] .+ cf[2, :] .* ζMs) logσ2_logP = vec(CA.getdata(ϕuncc.logσ2_logP)) diff --git a/src/gf.jl b/src/gf.jl index c86098e..e23f10d 100644 --- a/src/gf.jl +++ b/src/gf.jl @@ -17,11 +17,14 @@ end #applyf(f_double, θMs_true, stack(Iterators.repeated(CA.getdata(θP_true), size(θMs_true,2)))) """ -composition f ∘ g: mechanistic model after machine learning parameter prediction +composition f ∘ transM ∘ g: mechanistic model after machine learning parameter prediction """ -function gf(g, f, x, xP, ϕg, θP) - ζMs = g(x, ϕg) # predict the log of the parameters - θMs = exp.(ζMs) +function gf(g, transM, f, xM, xP, ϕg, θP) + # @show first(xM,5) + # @show first(ϕg,5) + ζMs = g(xM, ϕg) # predict the log of the parameters + # @show first(ζMs,5) + θMs = reduce(hcat, map(transM, eachcol(ζMs))) # transform each column y_pred_global, y_pred = f(θP, θMs, xP) return y_pred_global, y_pred, θMs end @@ -30,23 +33,23 @@ end Create a loss function for parameter vector p, given - g(x, ϕ): machine learning model - f(θMs, θP): mechanistic model -- x_o: matrix of covariates, sites in columns +- xM: matrix of covariates, sites in columns - y_o: matrix of observations, sites in columns - int_ϕθP: interpreter attachin axis with compponents ϕg and pc.θP """ -function get_loss_gf(g, f, y_o_global, int_ϕθP::AbstractComponentArrayInterpreter) - let g = g, f = f, int_ϕθP = int_ϕθP - function loss_gf(p, x_o, xP, y_o) +function get_loss_gf(g, transM, f, y_o_global, int_ϕθP::AbstractComponentArrayInterpreter) + let g = g, transM = transM, f = f, int_ϕθP = int_ϕθP + function loss_gf(p, xM, xP, y_o, y_unc) + σ = exp.(y_unc ./ 2) pc = int_ϕθP(p) - y_pred_global, y_pred, θMs = gf(g, f, x_o, xP, pc.ϕg, pc.θP) - #Main.@infiltrate_main - loss = sum(abs2, y_pred .- y_o) + sum(abs2, y_pred_global .- y_o_global) + y_pred_global, y_pred, θMs = gf(g, transM, f, xM, xP, pc.ϕg, pc.θP) + loss = sum(abs2, (y_pred .- y_o) ./ σ) + sum(abs2, y_pred_global .- y_o_global) return loss, y_pred_global, y_pred, θMs end end end () -> begin - loss_gf(p, x_o, y_o) - Zygote.gradient(x -> loss_gf(x, x_o, y_o)[1], p) + loss_gf(p, xM, y_o) + Zygote.gradient(x -> loss_gf(x, xM, y_o)[1], p) end diff --git a/src/hybrid_case.jl b/src/hybrid_case.jl index 92b3ed1..585a266 100644 --- a/src/hybrid_case.jl +++ b/src/hybrid_case.jl @@ -3,61 +3,37 @@ Type to dispatch constructing data and network structures for different cases of hybrid problem setups For a specific case, provide functions that specify details -- `get_hybridcase_par_templates` -- `get_hybridcase_transforms` -- `get_hybridcase_sizes` - `get_hybridcase_MLapplicator` - `get_hybridcase_PBmodel` +- `get_hybridcase_neg_logden_obs` +- `get_hybridcase_par_templates` +- `get_hybridcase_transforms` - `get_hybridcase_train_dataloader` (default depends on `gen_hybridcase_synthetic`) optionally - `gen_hybridcase_synthetic` -- `get_hybridcase_FloatType` (defaults to eltype(θM)) +- `get_hybridcase_n_covar` (defaults to number of rows in xM in train_dataloader ) +- `get_hybridcase_float_type` (defaults to `eltype(θM)`) +- `get_hybridcase_cor_starts` (defaults to include all correlations: `(P=(1,), M=(1,))`) """ abstract type AbstractHybridCase end; -""" - get_hybridcase_par_templates(::AbstractHybridCase; scenario) -Provide tuple of templates of ComponentVectors `θP` and `θM`. """ -function get_hybridcase_par_templates end - - -""" - get_hybridcase_transforms(::AbstractHybridCase; scenario) - -Return a NamedTupe of -- `transP`: Bijectors.Transform for the global PBM parameters, θP -- `transM`: Bijectors.Transform for the single-site PBM parameters, θM -""" -function get_hybridcase_transforms end - -""" - get_hybridcase_par_templates(::AbstractHybridCase; scenario) - -Provide a NamedTuple of number of -- n_covar: covariates xM -- n_site: all sites in the data -- n_batch: sites in one minibatch during fitting -- n_θM, n_θP: entries in parameter vectors -""" -function get_hybridcase_sizes end - -""" - get_hybridcase_MLapplicator(::AbstractHybridCase, MLEngine, n_covar, n_out; scenario=()) + get_hybridcase_MLapplicator([rng::AbstractRNG,] ::AbstractHybridCase; scenario=()) Construct the machine learning model fro given problem case and ML-Framework and scenario. -The MLEngine is a value type of a Symbol, usually the name of the module, e.g. -`const MLengine = Val(nameof(SimpleChains))`. - returns a Tuple of - AbstractModelApplicator - initial parameter vector """ function get_hybridcase_MLapplicator end +function get_hybridcase_MLapplicator(case::AbstractHybridCase; scenario=()) + get_hybridcase_MLapplicator(Random.default_rng(), case; scenario) +end + """ get_hybridcase_PBmodel(::AbstractHybridCase; scenario::NTuple=()) @@ -75,7 +51,56 @@ returns a tuple of predictions with components function get_hybridcase_PBmodel end """ - gen_hybridcase_synthetic(::AbstractHybridCase, rng; scenario) + get_hybridcase_neg_logden_obs(::AbstractHybridCase; scenario) + +Provide a `function(y_obs, ypred) -> Real` that computes the negative logdensity +of the observations, given the predictions. +""" +function get_hybridcase_neg_logden_obs end + + +""" + get_hybridcase_par_templates(::AbstractHybridCase; scenario) + +Provide tuple of templates of ComponentVectors `θP` and `θM`. +""" +function get_hybridcase_par_templates end + + +""" + get_hybridcase_transforms(::AbstractHybridCase; scenario) + +Return a NamedTupe of +- `transP`: Bijectors.Transform for the global PBM parameters, θP +- `transM`: Bijectors.Transform for the single-site PBM parameters, θM +""" +function get_hybridcase_transforms end + +# """ +# get_hybridcase_par_templates(::AbstractHybridCase; scenario) +# Provide a NamedTuple of number of +# - n_covar: covariates xM +# - n_site: all sites in the data +# - n_batch: sites in one minibatch during fitting +# - n_θM, n_θP: entries in parameter vectors +# """ +# function get_hybridcase_sizes end + +""" + get_hybridcase_n_covar(::AbstractHybridCase; scenario) + +Provide the number of covariates. Default returns the number of rows in `xM` from +`get_hybridcase_train_dataloader`. +""" +function get_hybridcase_n_covar(case::AbstractHybridCase; scenario) + train_loader = get_hybridcase_train_dataloader(Random.default_rng(), case; scenario) + (xM, xP, y_o, y_unc) = first(train_loader) + n_covar = size(xM, 1) + return(n_covar) +end + +""" + gen_hybridcase_synthetic([rng,] ::AbstractHybridCase; scenario) Setup synthetic data, a NamedTuple of - xM: matrix of covariates, with one column per site @@ -89,30 +114,58 @@ Setup synthetic data, a NamedTuple of function gen_hybridcase_synthetic end """ - get_hybridcase_FloatType(::AbstractHybridCase; scenario) + get_hybridcase_float_type(::AbstractHybridCase; scenario) Determine the FloatType for given Case and scenario, defaults to Float32 """ -function get_hybridcase_FloatType(case::AbstractHybridCase; scenario) +function get_hybridcase_float_type(case::AbstractHybridCase; scenario=()) return eltype(get_hybridcase_par_templates(case; scenario).θM) end """ - get_hybridcase_train_dataloader(::AbstractHybridCase, rng; scenario) + get_hybridcase_train_dataloader([rng,] ::AbstractHybridCase; scenario) Return a DataLoader that provides a tuple of - `xM`: matrix of covariates, with one column per site - `xP`: Iterator of process-model drivers, with one element per site - `y_o`: matrix of observations with added noise, with one column per site +- `y_unc`: matrix `sizeof(y_o)` of uncertainty information """ -function get_hybridcase_train_dataloader(case::AbstractHybridCase, rng::AbstractRNG; +function get_hybridcase_train_dataloader(rng::AbstractRNG, case::AbstractHybridCase; scenario = ()) - (; xM, xP, y_o) = gen_hybridcase_synthetic(case, rng; scenario) - (; n_batch) = get_hybridcase_sizes(case; scenario) - xM_gpu = :use_flux ∈ scenario ? CuArray(xM) : xM - train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o), batchsize = n_batch) + (; xM, xP, y_o, y_unc) = gen_hybridcase_synthetic(rng, case; scenario) + n_batch = 10 + xM_gpu = :use_Flux ∈ scenario ? CuArray(xM) : xM + train_loader = MLUtils.DataLoader((xM_gpu, xP, y_o, y_unc), batchsize = n_batch) return(train_loader) end +function get_hybridcase_train_dataloader(case::AbstractHybridCase; scenario = ()) + rng::AbstractRNG = Random.default_rng() + get_hybridcase_train_dataloader(rng, case; scenario) +end + + +""" + get_hybridcase_cor_starts(case::AbstractHybridCase; scenario) + +Specify blocks in correlation matrices among parameters. +Returns a NamedTuple. +- `P`: correlations among global parameters +- `M`: correlations among ML-predicted parameters + +Subsets ofparameters that are correlated with other but not correlated with +parameters of other subranges are specified by indicating the starting position +of each subrange. +E.g. if within global parameter vector `(p1, p2, p3)`, `p1` and `p2` are correlated, +but parameter `p3` is not correlated with them, +then the first subrange starts at position 1 and the second subrange starts at position 3. +If there is only single block of all ML-predicted parameters being correlated +with each other then this block starts at position 1: `(P=(1,3), M=(1,))`. +""" +function get_hybridcase_cor_starts(case::AbstractHybridCase; scenario = ()) + (P=(1,), M=(1,)) +end + diff --git a/src/logden_normal.jl b/src/logden_normal.jl index b4d4dfe..88fb2ce 100644 --- a/src/logden_normal.jl +++ b/src/logden_normal.jl @@ -13,7 +13,7 @@ a low uncertainty estimate and means closer to the observations to help an initial fit. The obtained parameters then can be used as starting values for a the proper fit with `σfac=1.0`. """ -function neg_logden_indep_normal(obs::AbstractVector, μ::AbstractVector, logσ2::AbstractVector; σfac=1.0) +function neg_logden_indep_normal(obs::AbstractArray, μ::AbstractArray, logσ2::AbstractArray; σfac=1.0) # log of independent Normal distributions # estimate independent uncertainty of each θM, rather than full covariance #nlogL = sum(σfac .* log.(σs) .+ 1 / 2 .* abs2.((obs .- μ) ./ σs)) @@ -26,21 +26,22 @@ function neg_logden_indep_normal(obs::AbstractVector, μ::AbstractVector, logσ2 nlogL = sum(σfac .* logσ2 .+ abs2.(obs .- μ) .* exp.(.-logσ2)) / 2 return (nlogL) end -function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2::AbstractVector; kwargs...) - nlogLs = map(eachcol(obss), eachcol(preds)) do obs, μ - neg_logden_indep_normal(obs, μ, logσ2; kwargs...) - end - nlogL = sum(nlogLs) - return nlogL -end +# function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2::AbstractVector; kwargs...) +# nlogLs = map(eachcol(obss), eachcol(preds)) do obs, μ +# neg_logden_indep_normal(obs, μ, logσ2; kwargs...) +# end +# nlogL = sum(nlogLs) +# return nlogL +# end + +# function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2s::AbstractMatrix; kwargs...) +# nlogLs = map(eachcol(obss), eachcol(preds), eachcol(logσ2s)) do obs, μ, logσ2 +# neg_logden_indep_normal(obs, μ, logσ2; kwargs...) +# end +# nlogL = sum(nlogLs) +# return nlogL +# end -function neg_logden_indep_normal(obss::AbstractMatrix, preds::AbstractMatrix, logσ2s::AbstractMatrix; kwargs...) - nlogLs = map(eachcol(obss), eachcol(preds), eachcol(logσ2s)) do obs, μ, logσ2 - neg_logden_indep_normal(obs, μ, logσ2; kwargs...) - end - nlogL = sum(nlogLs) - return nlogL -end entropy_MvNormal(K, logdetΣ) = (K*(1+log(2π)) + logdetΣ)/2 entropy_MvNormal(Σ) = entropy_MvNormal(size(Σ,1), logdet(Σ)) diff --git a/test/runtests.jl b/test/runtests.jl index 78ec965..7599ade 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -13,14 +13,14 @@ const GROUP = get(ENV, "GROUP", "All") # defined in in CI.yml @time @safetestset "test_logden_normal" include("test_logden_normal.jl") #@safetestset "test" include("test/test_doubleMM.jl") @time @safetestset "test_doubleMM" include("test_doubleMM.jl") - #@safetestset "test" include("test/test_HybridProblem.jl") - @time @safetestset "test_HybridProblem" include("test_HybridProblem.jl") #@safetestset "test" include("test/test_cholesky_structure.jl") @time @safetestset "test_cholesky_structure" include("test_cholesky_structure.jl") #@safetestset "test" include("test/test_sample_zeta.jl") @time @safetestset "test_sample_zeta" include("test_sample_zeta.jl") #@safetestset "test" include("test/test_elbo.jl") @time @safetestset "test_elbo" include("test_elbo.jl") + #@safetestset "test" include("test/test_HybridProblem.jl") + @time @safetestset "test_HybridProblem" include("test_HybridProblem.jl") # #@safetestset "test" include("test/test_Flux.jl") @time @safetestset "test_Flux" include("test_Flux.jl") diff --git a/test/test_Flux.jl b/test/test_Flux.jl index 6aa62c3..a9378d5 100644 --- a/test/test_Flux.jl +++ b/test/test_Flux.jl @@ -35,9 +35,9 @@ end; Dense(n_covar * 4 => n_covar * 4, tanh), Dense(n_covar * 4 => n_out, identity, bias=false), ) - g, ϕg = construct_FluxApplicator(g_chain |> f64) + g, ϕg = construct_ChainsApplicator(g_chain |> f64, Float64) @test eltype(ϕg) == Float64 - g, ϕg = construct_FluxApplicator(g_chain) + g, ϕg = construct_ChainsApplicator(g_chain, Float32) @test eltype(ϕg) == Float32 n_site = 3 x = rand(Float32, n_covar, n_site) diff --git a/test/test_HybridProblem.jl b/test/test_HybridProblem.jl index c7757c1..1430cde 100644 --- a/test/test_HybridProblem.jl +++ b/test/test_HybridProblem.jl @@ -12,13 +12,13 @@ import Zygote using OptimizationOptimisers -const MLengine = Val(nameof(SimpleChains)) - construct_problem = () -> begin - θP = CA.ComponentVector{Float32}(r0 = 0.3, K2 = 2.0) - θM = CA.ComponentVector{Float32}(r1 = 0.5, K1 = 0.2) + FT = Float32 + θP = CA.ComponentVector{FT}(r0=0.3, K2=2.0) + θM = CA.ComponentVector{FT}(r1=0.5, K1=0.2) transP = elementwise(exp) transM = Stacked(elementwise(identity), elementwise(exp)) + cov_starts = (P=(1, 2), M=(1)) # assume r0 independent of K2 n_covar = 5 n_batch = 10 int_θdoubleMM = get_concrete(ComponentArrayInterpreter( @@ -47,35 +47,35 @@ construct_problem = () -> begin # g, ϕg = construct_SimpleChainsApplicator(g_chain) # rng = StableRNG(111) - (; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o -) = gen_hybridcase_synthetic(DoubleMM.DoubleMMCase(), rng;) - train_loader = MLUtils.DataLoader((xM, xP, y_o), batchsize = n_batch) - # HybridProblem(θP, θM, transM, transP, n_covar, n_batch, f_doubleMM_with_global, - # g, ϕg, train_loader) - HybridProblem(θP, θM, g_chain, f_doubleMM_with_global, - transM, transP, n_covar, n_batch, train_loader) + # dependency on DeoubleMMCase -> take care of changes in covariates + (; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, y_unc + ) = gen_hybridcase_synthetic(rng, DoubleMM.DoubleMMCase()) + py = neg_logden_indep_normal + train_loader = MLUtils.DataLoader((xM, xP, y_o, y_unc), batchsize=n_batch) + HybridProblem(θP, θM, g_chain, f_doubleMM_with_global, py, + transM, transP, train_loader, cov_starts) end prob = construct_problem(); scenario = (:default,) -#(; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(prob; scenario) - @testset "loss_gf" begin #----------- fit g and θP to y_o - g, ϕg0 = get_hybridcase_MLapplicator(prob, MLengine; scenario) - train_loader = get_hybridcase_train_dataloader(prob; scenario) - (xM, xP, y_o) = first(train_loader) + rng = StableRNG(111) + g, ϕg0 = get_hybridcase_MLapplicator(prob; scenario) + train_loader = get_hybridcase_train_dataloader(rng, prob; scenario) + (xM, xP, y_o, y_unc) = first(train_loader) f = get_hybridcase_PBmodel(prob; scenario) par_templates = get_hybridcase_par_templates(prob; scenario) + (;transM, transP) = get_hybridcase_transforms(prob; scenario) int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector( - ϕg = 1:length(ϕg0), θP = par_templates.θP)) + ϕg=1:length(ϕg0), θP=par_templates.θP)) p = p0 = vcat(ϕg0, par_templates.θP .* 0.8) # slightly disturb θP_true # Pass the site-data for the batches as separate vectors wrapped in a tuple y_global_o = Float64[] - loss_gf = get_loss_gf(g, f, y_global_o, int_ϕθP) + loss_gf = get_loss_gf(g, transM, f, y_global_o, int_ϕθP) l1 = loss_gf(p0, first(train_loader)...) gr = Zygote.gradient(p -> loss_gf(p, train_loader.data...)[1], p0) @test gr[1] isa Vector @@ -87,9 +87,72 @@ scenario = (:default,) res = Optimization.solve( # optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000); - optprob, Adam(0.02), maxiters = 1000) + optprob, Adam(0.02), maxiters=1000) l1, y_pred_global, y_pred, θMs_pred = loss_gf(res.u, train_loader.data...) - @test isapprox(par_templates.θP, int_ϕθP(res.u).θP, rtol = 0.11) + @test isapprox(par_templates.θP, int_ϕθP(res.u).θP, rtol=0.11) + end +end + +using CUDA +import Flux + +@testset "neg_elbo_transnorm_gf cpu" begin + rng = StableRNG(111) + g, ϕg0 = get_hybridcase_MLapplicator(prob) + train_loader = get_hybridcase_train_dataloader(rng, prob) + (xM, xP, y_o, y_unc) = first(train_loader) + n_batch = size(y_o, 2) + f = get_hybridcase_PBmodel(prob) + (θP0, θM0) = get_hybridcase_par_templates(prob) + (; transP, transM) = get_hybridcase_transforms(prob) + py = get_hybridcase_neg_logden_obs(prob) + + (; ϕ, transPMs_batch, interpreters, get_transPMs, get_ca_int_PMs) = init_hybrid_params( + θP0, θM0, ϕg0, n_batch; transP, transM) + ϕ_ini = ϕ + + py = get_hybridcase_neg_logden_obs(prob) + + cost = neg_elbo_transnorm_gf(rng, ϕ_ini, g, transPMs_batch, f, py, + xM, xP, y_o, y_unc, map(get_concrete, interpreters); + n_MC=8) + @test cost isa Float64 + gr = Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf(rng, ϕ, g, transPMs_batch, f, py, + xM, xP, y_o, y_unc, map(get_concrete, interpreters); + n_MC=8), + CA.getdata(ϕ_ini)) + @test gr[1] isa Vector + + if CUDA.functional() + @testset "neg_elbo_transnorm_gf gpu" begin + g, ϕg0 = begin + n_covar = size(xM, 1) + n_out = length(θM0) + g_chain = Flux.Chain( + # dense layer with bias that maps to 8 outputs and applies `tanh` activation + Flux.Dense(n_covar => n_covar * 4, tanh), + Flux.Dense(n_covar * 4 => n_covar * 4, tanh), + # dense layer without bias that maps to n outputs and `identity` activation + Flux.Dense(n_covar * 4 => n_out, identity, bias=false) + ) + construct_ChainsApplicator(g_chain, eltype(θM0)) + end + ϕ_ini.ϕg = ϕg0 + ϕ = CuArray(CA.getdata(ϕ_ini)) + xMg = CuArray(xM) + cost = neg_elbo_transnorm_gf(rng, ϕ, g, transPMs_batch, f, py, + xMg, xP, y_o, y_unc, map(get_concrete, interpreters); + n_MC=8) + @test cost isa Float64 + gr = Zygote.gradient( + ϕ -> neg_elbo_transnorm_gf(rng, ϕ, g, transPMs_batch, f, py, + xMg, xP, y_o, y_unc, map(get_concrete, interpreters); + n_MC=8), + ϕ) + @test gr[1] isa CuVector + @test eltype(gr[1]) == get_hybridcase_float_type(prob) + end end end diff --git a/test/test_Lux.jl b/test/test_Lux.jl index d80da03..81b9e15 100644 --- a/test/test_Lux.jl +++ b/test/test_Lux.jl @@ -13,9 +13,9 @@ using StatsFuns: logistic Dense(n_covar * 4 => n_covar * 4, tanh), Dense(n_covar * 4 => n_out, logistic, use_bias=false), ); - g, ϕ = construct_LuxApplicator(g_chain, Float64; device = cpu_device()); + g, ϕ = construct_ChainsApplicator(g_chain, Float64; device = cpu_device()); @test eltype(ϕ) == Float64 - g, ϕ = construct_LuxApplicator(g_chain; device = cpu_device()); + g, ϕ = construct_ChainsApplicator(g_chain, Float32; device = cpu_device()); @test eltype(ϕ) == Float32 n_site = 3 x = rand(Float32, n_covar, n_site) diff --git a/test/test_SimpleChains.jl b/test/test_SimpleChains.jl index 29adb37..076d84c 100644 --- a/test/test_SimpleChains.jl +++ b/test/test_SimpleChains.jl @@ -12,7 +12,7 @@ using StatsFuns: logistic TurboDense{true}(tanh, n_covar * 4), TurboDense{false}(logistic, n_out) ) - g, ϕg = construct_SimpleChainsApplicator(g_chain) + g, ϕg = construct_ChainsApplicator(g_chain, Float32) n_site = 3 x = rand(n_covar, n_site) #ϕg = SimpleChains.init_params(g_chain); diff --git a/test/test_cholesky_structure.jl b/test/test_cholesky_structure.jl index b02e07e..c6e6e51 100644 --- a/test/test_cholesky_structure.jl +++ b/test/test_cholesky_structure.jl @@ -75,9 +75,9 @@ end; U1v = CP.vec2uutri(v_orig) @test U1v isa UnitUpperTriangular @test size(U1v, 1) == 4 - gr = Zygote.gradient(v -> sum(abs2.(CP.vec2uutri(v))) , vcpu)[1] # works nice + gr = Zygote.gradient(v -> sum(abs2.(CP.vec2uutri(v))), vcpu)[1] # works nice # test providing keyword argument - gr = Zygote.gradient(v -> sum(abs2.(CP.vec2uutri(v; n=4))) , vcpu)[1] # works nice + gr = Zygote.gradient(v -> sum(abs2.(CP.vec2uutri(v; n = 4))), vcpu)[1] # works nice # v2 = CP.uutri2vec(U1v) @test v2 == v_orig @@ -85,15 +85,15 @@ end; end; @testset "utri2vec_pos" begin - @test CP.utri2vec_pos(1,1) == 1 - @test CP.utri2vec_pos(1,2) == 2 - @test CP.utri2vec_pos(2,2) == 3 - @test CP.utri2vec_pos(1,3) == 4 - @test CP.utri2vec_pos(1,4) == 7 - @test CP.utri2vec_pos(5,5) == 15 - typeof(CP.utri2vec_pos(5,5)) == Int - typeof(CP.utri2vec_pos(Int32(5),Int32(5))) == Int32 - @test_throws AssertionError CP.utri2vec_pos(2,1) + @test CP.utri2vec_pos(1, 1) == 1 + @test CP.utri2vec_pos(1, 2) == 2 + @test CP.utri2vec_pos(2, 2) == 3 + @test CP.utri2vec_pos(1, 3) == 4 + @test CP.utri2vec_pos(1, 4) == 7 + @test CP.utri2vec_pos(5, 5) == 15 + typeof(CP.utri2vec_pos(5, 5)) == Int + typeof(CP.utri2vec_pos(Int32(5), Int32(5))) == Int32 + @test_throws AssertionError CP.utri2vec_pos(2, 1) end @testset "vec2uutri gpu" begin @@ -137,6 +137,36 @@ end; end end; +@testset "transformU_block_cholesky1 gpu" begin + vc = CA.ComponentVector(b1 = [1.0f0], b2 = [2.0f0:5.0f0]) + vc = CA.ComponentVector(b1 = [1.0f0:3.0f0]) + vc = CA.ComponentVector(b1 = 1.0f0:3.0f0, b2 = [5.0f0]) + v = CA.getdata(vc) + cor_starts = get_ca_starts(vc) + #ns=(CP.invsumn(length(v[k])) + 1 for k in keys(v)) + #collect(ns) + U = CP.transformU_block_cholesky1(v, cor_starts) + @test diag(U' * U) ≈ ones(5) + @test U[1:3, 4:5] ≈ zeros(3, 2) + gr1 = Zygote.gradient(v -> sum(CP.transformU_block_cholesky1(v, cor_starts)), v)[1] # works nice + if CUDA.functional() # only run the test, if CUDA is working (not on Github ci) + vc = v_orig = CA.ComponentVector(b1 = CuArray(1.0f0:3.0f0), b2 = CuArray([5.0f0])) + v = CA.getdata(vc) + cor_starts = get_ca_starts(vc) + U = CP.transformU_block_cholesky1(v, cor_starts) + @test U isa CuArray + @test diag(Array(U' * U)) ≈ ones(5) + @test Array(U[1:3, 4:5]) ≈ zeros(3, 2) + gr1 = Zygote.gradient(v -> sum(CP.transformU_block_cholesky1(v, cor_starts)), v)[1] # works nice + end +end; + +() -> begin + cor_starts = (1,) + cor_starts_end = (cor_starts..., length(v) + 1) + [cor_starts_end[i]:(cor_starts_end[i + 1] - 1) for i in 1:length(cor_starts)] +end + () -> begin #setup for fitting of interactive blocks below _X = rand(3, 3) @@ -251,4 +281,3 @@ end S_pred = Dσ' * Upred' * Upred * Dσ @test S_pred≈S atol=6e-1 end - diff --git a/test/test_doubleMM.jl b/test/test_doubleMM.jl index 8e6c5a3..0c49d78 100644 --- a/test/test_doubleMM.jl +++ b/test/test_doubleMM.jl @@ -11,17 +11,14 @@ import Zygote using OptimizationOptimisers -const MLengine = Val(nameof(SimpleChains)) const case = DoubleMM.DoubleMMCase() scenario = (:default,) par_templates = get_hybridcase_par_templates(case; scenario) -(; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) - rng = StableRNG(111) -(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o -) = gen_hybridcase_synthetic(case, rng; scenario); +(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, y_unc +) = gen_hybridcase_synthetic(rng, case; scenario); @testset "gen_hybridcase_synthetic" begin @test isapprox( @@ -31,65 +28,84 @@ rng = StableRNG(111) # test same results for same rng rng2 = StableRNG(111) - gen2 = gen_hybridcase_synthetic(case, rng2; scenario); + gen2 = gen_hybridcase_synthetic(rng2, case; scenario); @test gen2.y_o == y_o end @testset "loss_g" begin - g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario); - - function loss_g(ϕg, x, g) - ζMs = g(x, ϕg) # predict the log of the parameters - θMs = exp.(ζMs) + g, ϕg0 = get_hybridcase_MLapplicator(rng, case; scenario); + (;transP, transM) = get_hybridcase_transforms(case; scenario) + + function loss_g(ϕg, x, g, transM) + # @show first(x,5) + # @show first(ϕg,5) + ζMs = g(x, ϕg) # predict the parameters on unconstrained space + # @show first(ζMs,5) + θMs = reduce(hcat, map(transM, eachcol(ζMs))) # transform each column loss = sum(abs2, θMs .- θMs_true) return loss, θMs end - loss_g(ϕg0, xM, g) - Zygote.gradient(x -> loss_g(x, xM, g)[1], ϕg0); - - optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g)[1], + loss_g(ϕg0, xM, g, transM) + Zygote.gradient(ϕg -> loss_g(ϕg, xM, g, transM)[1], ϕg0); + # + optf = Optimization.OptimizationFunction((ϕg, p) -> loss_g(ϕg, xM, g, transM)[1], Optimization.AutoZygote()) optprob = Optimization.OptimizationProblem(optf, ϕg0); #res = Optimization.solve(optprob, Adam(0.02), callback = callback_loss(100), maxiters = 600); res = Optimization.solve(optprob, Adam(0.02), maxiters = 600); - + # ϕg_opt1 = res.u; - pred = loss_g(ϕg_opt1, xM, g) - θMs_pred = pred[2] + #first(ϕg_opt1,5) + pred = loss_g(ϕg_opt1, xM, g, transM); + θMs_pred = θMs_pred_1 = pred[2] #scatterplot(vec(θMs_true), vec(θMs_pred)) - @test cor(vec(θMs_true), vec(θMs_pred)) > 0.9 + #@test cor(vec(θMs_true), vec(θMs_pred)) > 0.9 + @test cor(θMs_true[:,1], θMs_pred[:,1]) > 0.9 + @test cor(θMs_true[:,2], θMs_pred[:,2]) > 0.9 end @testset "loss_gf" begin - #----------- fit g and θP to y_o (without transformations) - g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario); + #----------- fit g and θP to y_o (without uncertainty, without transforming θP) + g, ϕg0 = get_hybridcase_MLapplicator(case; scenario); + (;transP, transM) = get_hybridcase_transforms(case; scenario) f = get_hybridcase_PBmodel(case; scenario) int_ϕθP = ComponentArrayInterpreter(CA.ComponentVector( ϕg = 1:length(ϕg0), θP = par_templates.θP)) p = p0 = vcat(ϕg0, par_templates.θP .* 0.8); # slightly disturb θP_true + #p = p0 = vcat(ϕg_opt1, par_templates.θP); # almost true # Pass the site-data for the batches as separate vectors wrapped in a tuple - #train_loader = MLUtils.DataLoader((xM, xP, y_o), batchsize = n_batch) - train_loader = get_hybridcase_train_dataloader(case, rng; scenario) + n_batch = 10 + train_loader = MLUtils.DataLoader((xM, xP, y_o, y_unc), batchsize = n_batch) + # get_hybridcase_train_dataloader recreates synthetic data different θ_true + #train_loader = get_hybridcase_train_dataloader(case, rng; scenario) - loss_gf = get_loss_gf(g, f, y_global_o, int_ϕθP) - l1 = loss_gf(p0, train_loader.data...)[1] + loss_gf = get_loss_gf(g, transM, f, y_global_o, int_ϕθP) + l1 = loss_gf(p0, first(train_loader)...)[1] + (xM_batch, xP_batch, y_o_batch, y_unc_batch) = first(train_loader) + Zygote.gradient(p0 -> loss_gf(p0, xM_batch, xP_batch, y_o_batch, y_unc_batch)[1], p0) optf = Optimization.OptimizationFunction((ϕ, data) -> loss_gf(ϕ, data...)[1], Optimization.AutoZygote()) optprob = OptimizationProblem(optf, p0, train_loader) res = Optimization.solve( -# optprob, Adam(0.02), callback = callback_loss(100), maxiters = 1000); + #optprob, Adam(0.02), callback = callback_loss(100), maxiters = 5000); optprob, Adam(0.02), maxiters = 1000); l1, y_pred_global, y_pred, θMs_pred = loss_gf(res.u, train_loader.data...) - @test isapprox(par_templates.θP, int_ϕθP(res.u).θP, rtol = 0.11) + #l1, y_pred_global, y_pred, θMs_pred = loss_gf(p0, xM, xP, y_o, y_unc); + θMs_pred = CA.ComponentArray(θMs_pred, CA.getaxes(θMs_true)) + #TODO @test isapprox(par_templates.θP, int_ϕθP(res.u).θP, rtol = 0.15) @test cor(vec(θMs_true), vec(θMs_pred)) > 0.9 + @test cor(θMs_true[:,1], θMs_pred[:,1]) > 0.9 + @test cor(θMs_true[:,2], θMs_pred[:,2]) > 0.9 () -> begin scatterplot(vec(θMs_true), vec(θMs_pred)) + scatterplot(θMs_true[1,:], θMs_pred[1,:]) + scatterplot(θMs_true[2,:], θMs_pred[2,:]) scatterplot(log.(vec(θMs_true)), log.(vec(θMs_pred))) scatterplot(vec(y_pred), vec(y_o)) hcat(par_templates.θP, int_ϕθP(p0).θP, int_ϕθP(res.u).θP) diff --git a/test/test_elbo.jl b/test/test_elbo.jl index 37bccc2..dc26383 100644 --- a/test/test_elbo.jl +++ b/test/test_elbo.jl @@ -17,20 +17,22 @@ using GPUArraysCore: GPUArraysCore rng = StableRNG(111) const case = DoubleMM.DoubleMMCase() -const MLengine = Val(nameof(SimpleChains)) scenario = (:default,) -FT = get_hybridcase_FloatType(case; scenario) +FT = get_hybridcase_float_type(case; scenario) #θsite_true = get_hybridcase_par_templates(case; scenario) -g, ϕg0 = get_hybridcase_MLapplicator(case, MLengine; scenario); +g, ϕg0 = get_hybridcase_MLapplicator(case; scenario); f = get_hybridcase_PBmodel(case; scenario) -(; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) +n_covar = 5 +n_batch = 10 +n_θM, n_θP = values(map(length, get_hybridcase_par_templates(case; scenario))) -(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, σ_o -) = gen_hybridcase_synthetic(case, rng; scenario); +(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o, y_unc +) = gen_hybridcase_synthetic(rng, case; scenario); + +py = neg_logden_indep_normal -logσ2y = FT(2) .* log.(σ_o) n_MC = 3 (; transP, transM) = get_hybridcase_transforms(case; scenario) # transP = elementwise(exp) @@ -40,77 +42,14 @@ n_MC = 3 θP_true, θMs_true[:, 1], ϕg0, n_batch; transP, transM); ϕ_ini = ϕ -() -> begin - # correlation matrices - ρsP = zeros(sum(1:(n_θP - 1))) - ρsM = zeros(sum(1:(n_θM - 1))) - - () -> begin - coef_logσ2_logMs = [-5.769 -3.501; -0.01791 0.007951] - logσ2_logP = CA.ComponentVector(r0 = -8.997, K2 = -5.893) - #mean_σ_o_MC = 0.006042 - - ϕunc = CA.ComponentVector(; - logσ2_logP = logσ2_logP, - coef_logσ2_logMs = coef_logσ2_logMs, - ρsP, - ρsM) - end - - # for a conservative uncertainty assume σ2=1e-10 and no relationship with magnitude - logσ2y = 2 .* log.(σ_o) - ϕunc0 = CA.ComponentVector(; - logσ2_logP = fill(-10.0, n_θP), - coef_logσ2_logMs = reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), - ρsP, - ρsM) - #int_unc = ComponentArrayInterpreter(ϕunc0) - - transPMs_batch = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_batch))) - transPMs_allsites = as( - (P = as(Array, asℝ₊, n_θP), - Ms = as(Array, asℝ₊, n_θM, n_site))) - - ϕ_true = θ = CA.ComponentVector(; - μP = θP_true, - ϕg = ϕg0, #ϕg_opt, # here start from randomized - unc = ϕunc0) - trans_gu = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, length(ϕg0)), - unc = as(Array, length(ϕunc0)))) - trans_g = as( - (μP = as(Array, asℝ₊, n_θP), - ϕg = as(Array, length(ϕg0)))) - - int_PMs_batch = ComponentArrayInterpreter(CA.ComponentVector(; θP = θP_true, - θMs = CA.ComponentMatrix( - zeros(n_θM, n_batch), first(CA.getaxes(θMs_true)), CA.Axis(i = 1:n_batch)))) - - interpreters = map(get_concrete, - (; - μP_ϕg_unc = ComponentArrayInterpreter(ϕ_true), - PMs = int_PMs_batch, - unc = ComponentArrayInterpreter(ϕunc0) - )) - - ϕg_true_vec = CA.ComponentVector( - TransformVariables.inverse(trans_gu, CP.cv2NamedTuple(ϕ_true))) - ϕcg_true = interpreters.μP_ϕg_unc(ϕg_true_vec) - ϕ_ini = ζ = vcat(ϕcg_true[[:μP, :ϕg]] .* 1.2, ϕcg_true[[:unc]]) - ϕ_ini0 = ζ = vcat(ϕcg_true[:μP] .* 0.0, ϕg0, ϕunc0) -end - @testset "generate_ζ" begin ζ, σ = CP.generate_ζ( - rng, g, f, ϕ_ini, xM[:, 1:n_batch], map(get_concrete, interpreters); + rng, g, ϕ_ini, xM[:, 1:n_batch], map(get_concrete, interpreters); n_MC = 8) @test ζ isa Matrix gr = Zygote.gradient( ϕ -> sum(CP.generate_ζ( - rng, g, f, ϕ, xM[:, 1:n_batch], map(get_concrete, interpreters); + rng, g, ϕ, xM[:, 1:n_batch], map(get_concrete, interpreters); n_MC = 8)[1]), CA.getdata(ϕ_ini)) @test gr[1] isa Vector @@ -118,21 +57,24 @@ end; # setup g as FluxNN on gpu using Flux -FluxMLengine = Val(nameof(Flux)) -g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case, FluxMLengine; scenario) + +if CUDA.functional() + scenario_flux = (scenario..., :use_Flux) + g_flux, ϕg0_flux_cpu = get_hybridcase_MLapplicator(case; scenario = scenario_flux) +end if CUDA.functional() @testset "generate_ζ gpu" begin ϕ = CuArray(CA.getdata(ϕ_ini)) xMg_batch = CuArray(xM[:, 1:n_batch]) ζ, σ = CP.generate_ζ( - rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); + rng, g_flux, ϕ, xMg_batch, map(get_concrete, interpreters); n_MC = 8) @test ζ isa CuMatrix @test eltype(ζ) == FT gr = Zygote.gradient( ϕ -> sum(CP.generate_ζ( - rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters); + rng, g_flux, ϕ, xMg_batch, map(get_concrete, interpreters); n_MC = 8)[1]), ϕ) @test gr[1] isa CuVector @@ -140,15 +82,16 @@ if CUDA.functional() end @testset "neg_elbo_transnorm_gf cpu" begin - cost = neg_elbo_transnorm_gf(rng, g, f, ϕ_ini, y_o[:, 1:n_batch], - xM[:, 1:n_batch], xP[1:n_batch], transPMs_batch, map(get_concrete, interpreters); - n_MC = 8, logσ2y) + cost = neg_elbo_transnorm_gf(rng, ϕ_ini, g, transPMs_batch, f, py, + xM[:, 1:n_batch], xP[1:n_batch], y_o[:, 1:n_batch], y_unc[:, 1:n_batch], + map(get_concrete, interpreters); + n_MC = 8) @test cost isa Float64 gr = Zygote.gradient( - ϕ -> neg_elbo_transnorm_gf( - rng, g, f, ϕ, y_o[:, 1:n_batch], - xM[:, 1:n_batch], xP[1:n_batch], - transPMs_batch, interpreters; n_MC = 8, logσ2y), + ϕ -> neg_elbo_transnorm_gf(rng, ϕ, g, transPMs_batch, f, py, + xM[:, 1:n_batch], xP[1:n_batch], y_o[:, 1:n_batch], y_unc[:, 1:n_batch], + map(get_concrete, interpreters); + n_MC = 8), CA.getdata(ϕ_ini)) @test gr[1] isa Vector end; @@ -158,16 +101,16 @@ if CUDA.functional() ϕ = CuArray(CA.getdata(ϕ_ini)) xMg_batch = CuArray(xM[:, 1:n_batch]) xP_batch = xP[1:n_batch] # used in f which runs on CPU - cost = neg_elbo_transnorm_gf(rng, g_flux, f, ϕ, y_o[:, 1:n_batch], - xMg_batch, xP_batch, - transPMs_batch, map(get_concrete, interpreters); - n_MC = 8, logσ2y) + cost = neg_elbo_transnorm_gf(rng, ϕ, g_flux, transPMs_batch, f, py, + xMg_batch, xP_batch, y_o[:, 1:n_batch], y_unc[:, 1:n_batch], + map(get_concrete, interpreters); + n_MC = 8) @test cost isa Float64 gr = Zygote.gradient( - ϕ -> neg_elbo_transnorm_gf( - rng, g_flux, f, ϕ, y_o[:, 1:n_batch], - xMg_batch, xP_batch, - transPMs_batch, interpreters; n_MC = 8, logσ2y), + ϕ -> neg_elbo_transnorm_gf(rng, ϕ, g_flux, transPMs_batch, f, py, + xMg_batch, xP_batch, y_o[:, 1:n_batch], y_unc[:, 1:n_batch], + map(get_concrete, interpreters); + n_MC = 8), ϕ) @test gr[1] isa CuVector @test eltype(gr[1]) == FT diff --git a/test/test_sample_zeta.jl b/test/test_sample_zeta.jl index 392e76b..b776fe8 100644 --- a/test/test_sample_zeta.jl +++ b/test/test_sample_zeta.jl @@ -16,86 +16,82 @@ using Bijectors rng = StableRNG(111) const case = DoubleMM.DoubleMMCase() -#const MLengine = Val(nameof(SimpleChains)) scenario = (:default,) -(; n_covar, n_batch, n_θM, n_θP) = get_hybridcase_sizes(case; scenario) +n_θM, n_θP = length.(values(get_hybridcase_par_templates(case; scenario))) -@testset "test_sample_zeta" begin - (; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o - ) = gen_hybridcase_synthetic(case, rng; scenario) +(; xM, n_site, θP_true, θMs_true, xP, y_global_true, y_true, y_global_o, y_o +) = gen_hybridcase_synthetic(rng, case; scenario) - # n_site = 2 - # n_θP, n_θM = length(θ_true.θP), length(θ_true.θM) - # σ_θM = θ_true.θM .* 0.1 # 10% around expected - # θMs_true = θ_true.θM .+ randn(n_θM, n_site) .* σ_θM +# set to 0.02 rather than zero for debugging non-zero correlations +ρsP = zeros(sum(1:(n_θP-1))) .+ 0.02 +ρsM = zeros(sum(1:(n_θM-1))) .+ 0.02 - # set to 0.02 rather than zero for debugging non-zero correlations - ρsP = zeros(sum(1:(n_θP-1))) .+ 0.02 - ρsM = zeros(sum(1:(n_θM-1))) .+ 0.02 +ϕunc = CA.ComponentVector(; + logσ2_logP=fill(-10.0, n_θP), + coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), + ρsP, + ρsM) - ϕunc = CA.ComponentVector(; - logσ2_logP=fill(-10.0, n_θP), - coef_logσ2_logMs=reduce(hcat, ([-10.0, 0.0] for _ in 1:n_θM)), - ρsP, - ρsM) +θ_true = θ = CA.ComponentVector(; + P=θP_true, + Ms=θMs_true) +transPMs = elementwise(exp) # all parameters on LogNormal scale +ζ_true = inverse(transPMs)(θ_true) +ϕ_true = vcat(ζ_true, CA.ComponentVector(unc=ϕunc)) +ϕ_cpu = vcat(ζ_true .+ 0.01, CA.ComponentVector(unc=ϕunc)) - θ_true = θ = CA.ComponentVector(; - P=θP_true, - Ms=θMs_true) - transPMs = elementwise(exp) # all parameters on LogNormal scale - ζ_true = inverse(transPMs)(θ_true) - ϕ_true = vcat(ζ_true, CA.ComponentVector(unc=ϕunc)) - ϕ_cpu = vcat(ζ_true .+ 0.01, CA.ComponentVector(unc=ϕunc)) +interpreters = (; pmu=ComponentArrayInterpreter(ϕ_true)) #, M=int_θM, PMs=int_θPMs) - interpreters = (; pmu=ComponentArrayInterpreter(ϕ_true)) #, M=int_θM, PMs=int_θPMs) +n_MC = 3 +@testset "sample_ζ_norm0 cpu" begin + ϕ = CA.getdata(ϕ_cpu) + ϕc = interpreters.pmu(ϕ) + cor_starts=(P=(1,),M=(1,)) + ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC, cor_starts) + @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) + gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0( + rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC, cor_starts)[1]), ϕc)[1] + @test length(gr) == length(ϕ) +end +# - n_MC = 3 - @testset "sample_ζ_norm0 cpu" begin - ϕ = CA.getdata(ϕ_cpu) - ϕc = interpreters.pmu(ϕ) - ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) +if CUDA.functional() + @testset "sample_ζ_norm0 gpu" begin + ϕ = CuArray(CA.getdata(ϕ_cpu)) + cor_starts=(P=(1,),M=(1,)) + #tmp = ϕ[1:6] + #vec2uutri(tmp) + ϕc = interpreters.pmu(ϕ); + @test CA.getdata(ϕc) isa GPUArraysCore.AbstractGPUArray + #ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc + #urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu + #include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss + #ζ_resid, σ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) + #Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; + ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC, cor_starts) + @test ζ_resid isa GPUArraysCore.AbstractGPUArray @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) - gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc)[1]), ϕc)[1] + gr = Zygote.gradient( + ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC, cor_starts)[1]), ϕc)[1]; @test length(gr) == length(ϕ) + int_unc = ComponentArrayInterpreter(ϕc.unc) + gr2 = Zygote.gradient( + ϕc -> sum(CP.sample_ζ_norm0(rng, CA.getdata(ϕc.P), CA.getdata(ϕc.Ms), + CA.getdata(ϕc.unc), int_unc; n_MC, cor_starts)[1]), + ϕc)[1]; end - # +end - if CUDA.functional() - @testset "sample_ζ_norm0 gpu" begin - ϕ = CuArray(CA.getdata(ϕ_cpu)) - #tmp = ϕ[1:6] - #vec2uutri(tmp) - ϕc = interpreters.pmu(ϕ) - @test CA.getdata(ϕc) isa GPUArraysCore.AbstractGPUArray - #ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc - #urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu - #include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss - #ζ_resid, σ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) - #Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1]; - ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC) - @test ζ_resid isa GPUArraysCore.AbstractGPUArray - @test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC) - gr = Zygote.gradient( - ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1] - @test length(gr) == length(ϕ) - int_unc = ComponentArrayInterpreter(ϕc.unc) - gr2 = Zygote.gradient( - ϕc -> sum(CP.sample_ζ_norm0(rng, CA.getdata(ϕc.P), CA.getdata(ϕc.Ms), - CA.getdata(ϕc.unc), int_unc; n_MC)[1]), - ϕc)[1] - end - end +# @testset "generate_ζ" begin +# ϕ = CA.getdata(ϕ_cpu) +# n_sample_pred = 200 +# intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, +# θMs=CA.ComponentMatrix( +# zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_sample_pred)))) +# int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) +# interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) +# ζs, _ = CP.generate_ζ(rng, g, ϕ, xM, interpreters; n_MC=n_sample_pred) - # @testset "generate_ζ" begin - # ϕ = CA.getdata(ϕ_cpu) - # n_sample_pred = 200 - # intm_PMs_gen = ComponentArrayInterpreter(CA.ComponentVector(; θP_true, - # θMs=CA.ComponentMatrix( - # zeros(n_θM, n_site), first(CA.getaxes(θMs_true)), CA.Axis(i=1:n_sample_pred)))) - # int_μP_ϕg_unc=ComponentArrayInterpreter(ϕ_true) - # interpreters = (; PMs = intm_PMs_gen, μP_ϕg_unc = int_μP_ϕg_unc ) - # ζs, _ = CP.generate_ζ(rng, g, f, ϕ, xM, interpreters; n_MC=n_sample_pred) +# end; - # end; -end;