Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ CUDA = "5.5.2"
ChainRulesCore = "1.25"
Combinatorics = "1.0.2"
ComponentArrays = "0.15.19"
Flux = "v0.15.2"
Flux = "v0.15.2, 0.16"
GPUArraysCore = "0.1, 0.2"
LinearAlgebra = "1.10.0"
Lux = "1.4.2"
Expand Down
24 changes: 15 additions & 9 deletions dev/doubleMM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ end
#res = Optimization.solve(optprob, Adam(0.02), callback=callback_loss(50), maxiters=1_400);
end

ϕ = ϕ_true |> Flux.gpu;
ϕ = ϕ_ini |> Flux.gpu;
xM_gpu = xM |> Flux.gpu;
g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario);

Expand All @@ -213,22 +213,23 @@ g_flux, ϕg0_flux_cpu = gen_hybridcase_MLapplicator(case, FluxMLengine; scenario
g_flux = g_luxs
end

function fcost(ϕ)
neg_elbo_transnorm_gf(rng, g_flux, f, CA.getdata(ϕ), y_o[:, 1:n_batch],
xM_gpu[:, 1:n_batch], transPMs_batch, map(get_concrete, interpreters);
function fcost(ϕ, xM, y_o)
neg_elbo_transnorm_gf(rng, g_flux, f, CA.getdata(ϕ), y_o,
xM, transPMs_batch, map(get_concrete, interpreters);
n_MC = 8, logσ2y = logσ2y)
end
fcost(ϕ)
fcost(ϕ, xM_gpu[:, 1:n_batch], y_o[:, 1:n_batch])
#Zygote.gradient(fcost, ϕ) |> cpu;
gr = Zygote.gradient(fcost, CA.getdata(ϕ));
gr = Zygote.gradient(fcost,
CA.getdata(ϕ), CA.getdata(xM_gpu[:, 1:n_batch]), CA.getdata(y_o[:, 1:n_batch]));
gr_c = CA.ComponentArray(gr[1] |> Flux.cpu, CA.getaxes(ϕ)...)

train_loader = MLUtils.DataLoader((xM_gpu, y_o), batchsize = n_batch)

optf = Optimization.OptimizationFunction(
(ϕ, data) -> begin
xM, y_o = data
fcost(ϕ)
fcost(ϕ, xM, y_o)
# neg_elbo_transnorm_gf(
# rng, g_flux, f, ϕ, y_o, xM, transPMs_batch,
# map(get_concrete, interpreters); n_MC = 5, logσ2y)
Expand Down Expand Up @@ -259,22 +260,27 @@ hcat(ϕ_ini.unc, ϕunc_VI) # need to compare to MC sample
# hard to estimate for original very small theta's but otherwise good

# test predicting correct obs-uncertainty of predictive posterior
# TODO reuse g_flux rather than g
n_sample_pred = 200

y_pred = predict_gf(rng, g_flux, f, res.u, xM_gpu, interpreters;
get_transPMs, get_ca_int_PMs, n_sample_pred);
size(y_pred) # n_obs x n_site, n_sample_pred

σ_o_post = dropdims(std(y_pred; dims = 3), dims=3)
σ_o_post = dropdims(std(y_pred; dims = 3), dims = 3);

#describe(σ_o_post)
hcat(σ_o, fill(mean_σ_o_MC, length(σ_o)),
mean(σ_o_post, dims = 2), sqrt.(mean(abs2, σ_o_post, dims = 2)))
# VI predicted uncertainty is smaller than HMC predicted one
mean_y_pred = map(mean, eachslice(y_pred; dims = (1, 2)))
#describe(mean_y_pred - y_o)
histogram(vec(mean_y_pred - y_true)) # predictions centered around y_o (or y_true)

# look at θP, θM1 of first site
intm_PMs_gen = get_ca_int_PMs(n_site)
ζs, _σ = HVI.generate_ζ(rng, g_flux, f, res.u, xM_gpu,
(; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred);
ζs = ζs |> Flux.cpu;
θPM = vcat(θP_true, θMs_true[:, 1])
intm = ComponentArrayInterpreter(θPM, (n_sample_pred,))
ζs1c = intm(ζs[1:length(θPM), :])
Expand Down
33 changes: 19 additions & 14 deletions src/elbo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,22 @@
"""
function neg_elbo_transnorm_gf(rng, g, f, ϕ::AbstractVector, y_ob, x::AbstractMatrix,
transPMs, interpreters::NamedTuple;
n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler())
ζs, logdetΣ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC)
n_MC=3, logσ2y, gpu_data_handler = get_default_GPUHandler(),
entropyN = 0.0,
)
ζs, σ = generate_ζ(rng, g, f, ϕ, x, interpreters; n_MC)

Check warning on line 26 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L26

Added line #L26 was not covered by tests
ζs_cpu = gpu_data_handler(ζs) # differentiable fetch to CPU in Flux package extension
#ζi = first(eachcol(ζs_cpu))
nLy = reduce(+, map(eachcol(ζs_cpu)) do ζi
y_pred_i, logjac = predict_y(ζi, f, transPMs)
nLy1 = neg_logden_indep_normal(y_ob, y_pred_i, logσ2y)
nLy1 - logjac
end) / n_MC
ent = entropy_MvNormal(size(ζs, 1), logdetΣ) # defined in logden_normal
nLy - ent
logdet_jacT2 = sum_log_σ = sum(log.(σ))

Check warning on line 34 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L34

Added line #L34 was not covered by tests
# logdet_jacT2 = -sum_log_σ # log Prod(1/σ_i) = -sum log σ_i
# logdetΣ = 2 * sum_log_σ # log Prod(σ_i²) = 2* sum log σ_i
# ent = entropy_MvNormal(size(ζs, 1), logdetΣ) # defined in logden_normal
nLy - logdet_jacT2 - entropyN

Check warning on line 38 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L38

Added line #L38 was not covered by tests
end

"""
Expand All @@ -45,17 +50,17 @@
gpu_data_handler=get_default_GPUHandler())
n_site = size(xM, 2)
intm_PMs_gen = get_ca_int_PMs(n_site)
tans_PMs_gen = get_transPMs(n_site)
trans_PMs_gen = get_transPMs(n_site)

Check warning on line 53 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L53

Added line #L53 was not covered by tests
ζs, _ = generate_ζ(rng, g, f, CA.getdata(ϕ), CA.getdata(xM),
(; interpreters..., PMs = intm_PMs_gen); n_MC = n_sample_pred)
ζs_cpu = gpu_data_handler(ζs) #
y_pred = stack(map(ζ -> first(predict_y(ζ, f, tans_PMs_gen)), eachcol(ζs_cpu)));
y_pred = stack(map(ζ -> first(predict_y(ζ, f, trans_PMs_gen)), eachcol(ζs_cpu)));

Check warning on line 57 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L57

Added line #L57 was not covered by tests
y_pred
end

"""
Generate samples of (inv-transformed) model parameters, ζ, and Log-Determinant
of their distribution.
Generate samples of (inv-transformed) model parameters, ζ,
and the vector of standard deviations, σ, i.e. the diagonal of the cholesky-factor.

Adds the MV-normally distributed residuals, retrieved by `sample_ζ_norm0`
to the means extracted from parameters and predicted by the machine learning
Expand All @@ -68,21 +73,21 @@
μ_ζP = ϕc.μP
ϕg = ϕc.ϕg
μ_ζMs0 = g(x, ϕg) # TODO provide μ_ζP to g
ζ_resid, logdetΣ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC)
#ζ_resid, logdetΣ = sample_ζ_norm0(rng, ϕ[1:2], reshape(ϕ[2 .+ (1:20)],2,:), ϕ[(end-length(interpreters.unc)+1):end], interpreters.unc; n_MC)
ζ_resid, σ = sample_ζ_norm0(rng, μ_ζP, μ_ζMs0, ϕc.unc; n_MC)

Check warning on line 76 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L76

Added line #L76 was not covered by tests
#ζ_resid, σ = sample_ζ_norm0(rng, ϕ[1:2], reshape(ϕ[2 .+ (1:20)],2,:), ϕ[(end-length(interpreters.unc)+1):end], interpreters.unc; n_MC)
ζ = stack(map(eachcol(ζ_resid)) do r
rc = interpreters.PMs(r)
ζP = μ_ζP .+ rc.θP
μ_ζMs = μ_ζMs0 # g(x, ϕc.ϕ) # TODO provide ζP to g
ζMs = μ_ζMs .+ rc.θMs
vcat(ζP, vec(ζMs))
end)
ζ, logdetΣ
ζ, σ

Check warning on line 85 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L85

Added line #L85 was not covered by tests
end

"""
Extract relevant parameters from θ and return n_MC generated draws
together with the logdet of the transformation.
together with the vector of standard deviations, σ.

Necessary typestable information on number of compponents are provided with
ComponentMarshellers
Expand Down Expand Up @@ -115,9 +120,9 @@
# need to construct full matrix for CUDA
Uσ = _create_blockdiag(UP, UM, σP, σMs, n_batch)
ζ_resid = Uσ' * urand
logdetΣ = 2 .* sum(log.(diag(Uσ)))
σ = diag(Uσ) # elements of the diagonal: standard deviations

Check warning on line 123 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L123

Added line #L123 was not covered by tests
# returns CuArrays to either continue on GPU or need to transfer to CPU
ζ_resid, logdetΣ
ζ_resid, σ

Check warning on line 125 in src/elbo.jl

View check run for this annotation

Codecov / codecov/patch

src/elbo.jl#L125

Added line #L125 was not covered by tests
end

function _create_blockdiag(UP::AbstractMatrix{T}, UM, σP, σMs, n_batch) where {T}
Expand Down
2 changes: 1 addition & 1 deletion test/test_cholesky_structure.jl
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ end
optprob = Optimization.OptimizationProblem(optf, Us1vec0)
res = Optimization.solve(optprob, OptimizationOptimisers.Adam(0.02),
#callback=callback_loss(50),
maxiters = 800)
maxiters = 1_000)

Upred = CP.transformU_cholesky1(res.u; n = n_U)
#@test Upred ≈ CU
Expand Down
4 changes: 2 additions & 2 deletions test/test_elbo.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ n_MC = 3
end

@testset "generate_ζ" begin
ζ, logdetΣ = CP.generate_ζ(
ζ, σ = CP.generate_ζ(
rng, g, f, ϕ_ini, xM[:, 1:n_batch], map(get_concrete, interpreters);
n_MC = 8)
@test ζ isa Matrix
Expand All @@ -119,7 +119,7 @@ if CUDA.functional()
@testset "generate_ζ gpu" begin
ϕ = CuArray(CA.getdata(ϕ_ini))
xMg_batch = CuArray(xM[:, 1:n_batch])
ζ, logdetΣ = CP.generate_ζ(
ζ, σ = CP.generate_ζ(
rng, g_flux, f, ϕ, xMg_batch, map(get_concrete, interpreters);
n_MC = 8)
@test ζ isa CuMatrix
Expand Down
2 changes: 1 addition & 1 deletion test/test_logden_normal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ end;
@testset "entropy_MvNormal" begin
S = Diagonal([4,5]) .+ rand(2,2)
S2 = Symmetric(S*S)
@test entropy_MvNormal(S2) == entropy(MvNormal(S2))
@test entropy_MvNormal(S2) entropy(MvNormal(S2))
end;


Expand Down
6 changes: 3 additions & 3 deletions test/test_sample_zeta.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ scenario = (:default,)
@testset "sample_ζ_norm0 cpu" begin
ϕ = CA.getdata(ϕ_cpu)
ϕc = interpreters.pmu(ϕ)
ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
@test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC)
gr = Zygote.gradient(ϕc -> sum(CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc)[1]), ϕc)[1]
@test length(gr) == length(ϕ)
Expand All @@ -76,9 +76,9 @@ scenario = (:default,)
#ζP, ζMs, ϕunc = ϕc.P, ϕc.Ms, ϕc.unc
#urand = CUDA.randn(length(ϕc.P) + length(ϕc.Ms), n_MC) |> gpu
#include(joinpath(@__DIR__, "uncNN", "elbo.jl")) # callback_loss
#ζ_resid, logdetΣ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
#ζ_resid, σ = sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
#Zygote.gradient(ϕc -> sum(sample_ζ_norm0(urand, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)[1]), ϕc)[1];
ζ_resid, logdetΣ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
ζ_resid, σ = CP.sample_ζ_norm0(rng, ϕc.P, ϕc.Ms, ϕc.unc; n_MC)
@test ζ_resid isa GPUArraysCore.AbstractGPUArray
@test size(ζ_resid) == (length(ϕc.P) + n_θM * n_site, n_MC)
gr = Zygote.gradient(
Expand Down
Loading