From 9cb30f6112bb8cbde7bcb1851b1459514833ca8a Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 2 May 2025 18:11:02 -0700 Subject: [PATCH 01/35] EKP-based approximate dim-reduction --- examples/DimensionReduction/Project.toml | 7 + .../common_inverse_problem.jl | 42 +++ examples/DimensionReduction/forward_maps.jl | 24 ++ .../generate_inverse_problem_data.jl | 47 +++ .../likelihood_dimension_reduction.jl | 289 ++++++++++++++++++ 5 files changed, 409 insertions(+) create mode 100644 examples/DimensionReduction/Project.toml create mode 100644 examples/DimensionReduction/common_inverse_problem.jl create mode 100644 examples/DimensionReduction/forward_maps.jl create mode 100644 examples/DimensionReduction/generate_inverse_problem_data.jl create mode 100644 examples/DimensionReduction/likelihood_dimension_reduction.jl diff --git a/examples/DimensionReduction/Project.toml b/examples/DimensionReduction/Project.toml new file mode 100644 index 000000000..bfe820e30 --- /dev/null +++ b/examples/DimensionReduction/Project.toml @@ -0,0 +1,7 @@ +[deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/examples/DimensionReduction/common_inverse_problem.jl b/examples/DimensionReduction/common_inverse_problem.jl new file mode 100644 index 000000000..1444ed56d --- /dev/null +++ b/examples/DimensionReduction/common_inverse_problem.jl @@ -0,0 +1,42 @@ +using LinearAlgebra +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using Statistics +using Distributions + +# Inverse problem will be taken from (Cui, Tong, 2021) https://arxiv.org/pdf/2101.02417, example 7.1 +include("forward_maps.jl") + + +function linear_exp_inverse_problem(input_dim, output_dim, rng) + # prior + γ0 = 4.0 + β_γ = -2 + Γ = Diagonal([γ0 * (1.0*j)^β_γ for j in 1:input_dim]) + prior_dist = MvNormal(zeros(input_dim),Γ) + prior = ParameterDistribution( + Dict( + "distribution" => Parameterized(prior_dist), + "constraint" => repeat([no_constraint()],input_dim), + "name" => "param_$(input_dim)", + ), + ) + + # forward map + # random linear-exp forward map from Stewart 1980: https://www.jstor.org/stable/2156882?seq=2 + U = qr(randn(rng, (output_dim,output_dim))).Q + V = qr(randn(rng, (input_dim,input_dim))).Q + λ0 = 100.0 + β_λ = -1 + Λ = Diagonal([λ0 * (1.0*j)^β_λ for j in 1:output_dim]) + A = U*Λ*V[1:output_dim,:] # output x input + model = LinearExp(input_dim, output_dim, A) + + # generate data sample + obs_noise_std = 1.0 + obs_noise_cov = (obs_noise_std^2)*I(output_dim) + noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) +true_parameter = reshape(ones(input_dim),:,1) + y = vec(forward_map(true_parameter, model) + noise) + return prior, y, obs_noise_cov, model +end diff --git a/examples/DimensionReduction/forward_maps.jl b/examples/DimensionReduction/forward_maps.jl new file mode 100644 index 000000000..7e904346f --- /dev/null +++ b/examples/DimensionReduction/forward_maps.jl @@ -0,0 +1,24 @@ +abstract type ForwardMapType end + +## G*exp(X) +struct LinearExp{AM <: AbstractMatrix} <: ForwardMapType + input_dim::Int + output_dim::Int + G::AM +end + +# columns of X are samples +function forward_map(X::AVorM, model::LE) where {LE <: LinearExp, AVorM <: AbstractVecOrMat} + return model.G * exp.(X) +end + +# columns of X are samples +function jac_forward_map(X::AM, model::LE) where {AM <: AbstractMatrix, LE <: LinearExp} + # dGi / dXj = G_ij exp(x_j) = G.*exp.(mat with repeated x_j rows) + # return [G * exp.(Diagonal(r)) for r in eachrow(X')] # correct but extra multiplies + return [model.G .* exp.(reshape(c,1,:)) for c in eachcol(X)] +end + +function jac_forward_map(X::AV, model::LE) where {AV <: AbstractVector, LE <: LinearExp} + return jac_forward_map(reshape(X,:,1), model) +end diff --git a/examples/DimensionReduction/generate_inverse_problem_data.jl b/examples/DimensionReduction/generate_inverse_problem_data.jl new file mode 100644 index 000000000..037c6aee7 --- /dev/null +++ b/examples/DimensionReduction/generate_inverse_problem_data.jl @@ -0,0 +1,47 @@ +using Plots +using EnsembleKalmanProcesses +using Random +using JLD2 + +rng_seed = 41 +rng = Random.MersenneTwister(rng_seed) + +input_dim = 500 +output_dim = 50 + +include("common_inverse_problem.jl") + +n_trials = 20 +@info "solving $(n_trials) inverse problems with different random forward maps" + +for trial = 1:n_trials + prior, y, obs_noise_cov, model = linear_exp_inverse_problem(input_dim, output_dim, rng) + + n_ensemble = 50 + n_iters_max = 20 + + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, TransformInversion(); rng = rng) + + n_iters = [0] + for i in 1:n_iters_max + params_i = get_ϕ_final(prior, ekp) + G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) + terminate = update_ensemble!(ekp, G_ens) + if !isnothing(terminate) + n_iters[1] = i-1 + break + end + end + + @info "Iteration of posterior convergence: $(n_iters[1])" + @info "Loss over iterations:" get_error(ekp) + save( + "ekp_$(trial).jld2", + "ekp", ekp, + "prior", prior, + "y", y, + "obs_noise_cov", obs_noise_cov, + "model", model, + ) +end diff --git a/examples/DimensionReduction/likelihood_dimension_reduction.jl b/examples/DimensionReduction/likelihood_dimension_reduction.jl new file mode 100644 index 000000000..cf35fc417 --- /dev/null +++ b/examples/DimensionReduction/likelihood_dimension_reduction.jl @@ -0,0 +1,289 @@ +using LinearAlgebra +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using Statistics +using Distributions +using Plots +using JLD2 +#Utilities +function cossim(x::VV1,y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} + return dot(x,y)/(norm(x)*norm(y)) +end +function cossim_pos(x::VV1,y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} + return abs(cossim(x,y)) +end +function cossim_cols(X::AM1, Y::AM2) where {AM1 <: AbstractMatrix, AM2 <: AbstractMatrix} + return [cossim_pos(c1,c2) for (c1,c2) in zip(eachcol(X), eachcol(Y))] +end + +n_samples = 2000 # paper uses 5e5 +n_trials = 20 # get from generate_inverse_problem_data + +if !isfile("ekp_1.jld2") + include("generate_inverse_problem_data.jl") # will run n trials +else + include("forward_maps.jl") + loaded1 = load("ekp_1.jld2") + ekp = loaded1["ekp"] + prior = loaded1["prior"] + obs_noise_cov = loaded1["obs_noise_cov"] + input_dim = size(get_u(ekp,1),1) + output_dim = size(get_g(ekp,1),1) +end + +# new terminology +prior_cov = cov(prior) +prior_invrt = sqrt(inv(prior_cov)) +prior_rt = sqrt(prior_cov) +obs_invrt = sqrt(inv(obs_noise_cov)) +obs_inv = inv(obs_noise_cov) + +Hu_evals = [] +Hg_evals = [] +Hu_mean_evals = [] +Hg_mean_evals = [] +Hu_ekp_prior_evals = [] +Hg_ekp_prior_evals = [] + +sim_Hu_means = [] +sim_Hg_means = [] +sim_G_samples = [] +sim_U_samples = [] +sim_Hu_ekp_prior = [] +sim_Hg_ekp_prior = [] + +for i = 1:n_trials + + # Load the EKP iterations + loaded = load("ekp_$(i).jld2") + ekp = loaded["ekp"] + prior = loaded["prior"] + obs_noise_cov = loaded["obs_noise_cov"] + y = loaded["y"] + model = loaded["model"] + + # random samples + prior_samples = sample(prior, n_samples) + + # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) + @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" + gradG_samples = jac_forward_map(prior_samples, model) + Hu = zeros(input_dim,input_dim) + Hg = zeros(output_dim,output_dim) + + for j in 1:n_samples + Hu .+= 1/n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt + Hg .+= 1/n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt + end + + # [1b] One-point approximation at mean value, with perfect grad + @info "Construct with mean value (1 sample), perfect grad" + prior_mean_appr = mean(prior) # approximate mean + gradG_at_mean = jac_forward_map(prior_mean_appr, model)[1] + # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf + Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt + Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt + + # [2] One-point approximation at mean value with SL grad + @info "Construct with mean value (1 sample), SL grad" + g = get_g(ekp,1) + u = get_u(ekp,1) + N_ens = get_N_ens(ekp) + C_at_prior = cov([u;g], dims=2) # basic cross-cov + Cuu = C_at_prior[1:input_dim, 1:input_dim] + svdCuu = svd(Cuu) + nz = min(N_ens-1, input_dim) # nonzero sv's + pinvCuu = svdCuu.U[:,1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz,:] # can replace with localized covariance + Cug = C_at_prior[input_dim+1:end,1:input_dim] + SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. + Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt + Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt + + # cosine similarity of evector directions + svdHu = svd(Hu) + svdHg = svd(Hg) + svdHu_mean = svd(Hu_mean) + svdHg_mean = svd(Hg_mean) + svdHu_ekp_prior = svd(Hu_ekp_prior) + svdHg_ekp_prior = svd(Hg_ekp_prior) + @info """ + + samples -> mean value + $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) + + samples + deriv -> mean + (no deriv) + $(cossim_cols(svdHu_mean.V, svdHu_ekp_prior.V)[1:3]) + $(cossim_cols(svdHg_mean.V, svdHg_ekp_prior.V)[1:3]) + + """ + push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) + push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) + push!(Hu_evals, svdHu.S) + push!(Hg_evals, svdHg.S) + push!(Hu_mean_evals, svdHu_mean.S) + push!(Hg_mean_evals, svdHg_mean.S) + push!(Hu_ekp_prior_evals, svdHu_ekp_prior.S) + push!(Hg_ekp_prior_evals, svdHg_ekp_prior.S) + push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) + push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) + + # cosine similarity to output svd from samples + G_samples = forward_map(prior_samples, model)' + svdG = svd(G_samples) # nonsquare, so permuted so evectors are V + svdU = svd(prior_samples') + + push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) + push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) + +end + +using Plots.Measures +gr(size=(1.6*1200,600), legend=true, bottom_margin = 10mm, left_margin = 10mm) +default( + titlefont = 20, + legendfontsize = 12, + guidefont = 14, + tickfont = 14, +) + +normal_Hg_evals = [ev ./ ev[1] for ev in Hg_evals] +normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] +normal_Hg_ekp_prior_evals = [ev ./ ev[1] for ev in Hg_ekp_prior_evals] + +truncation = 15 +truncation = Int(minimum([truncation,input_dim, output_dim])) + +#= plot( + 1:truncation, + (mean(sim_Hg_means) .* mean(normal_Hg_evals))[1:truncation], + ribbon = (std(sim_Hg_means) .* mean(normal_Hg_evals)/sqrt(n_trials))[1:truncation], + color = :red, + label = "sim (samples v mean)", + +) +=# +pg = plot( + 1:truncation, + mean(sim_Hg_means)[1:truncation], + ribbon = (std(sim_Hg_means)/sqrt(n_trials))[1:truncation], + color = :blue, + label = "sim (samples v mean)", + legend=false, +) + +plot!( + pg, + 1:truncation, + mean(sim_Hg_ekp_prior)[1:truncation], + ribbon = (std(sim_Hg_ekp_prior)/sqrt(n_trials))[1:truncation], + color = :red, + label = "sim (samples v mean-no-der)", +) + +plot!( + pg, + 1:truncation, + mean(normal_Hg_evals)[1:truncation], + color = :black, + label = "normalized eval (samples)", +) +plot!( + pg, + 1:truncation, + mean(normal_Hg_mean_evals)[1:truncation], + color = :black, + alpha = 0.7, + label = "normalized eval (mean)", +) + +plot!( + pg, + 1:truncation, + mean(normal_Hg_ekp_prior_evals)[1:truncation], + color = :black, + alpha = 0.3, + label = "normalized eval (mean-no-der)", +) + +plot!( + pg, + 1:truncation, + mean(sim_G_samples)[1:truncation], + ribbon = (std(sim_G_samples)/sqrt(n_trials))[1:truncation], + color = :green, + label = "similarity (PCA)", +) + +title!(pg, "Similarity of spectrum of output diagnostic") + + +normal_Hu_evals = [ev ./ ev[1] for ev in Hu_evals] +normal_Hu_mean_evals = [ev ./ ev[1] for ev in Hu_mean_evals] +normal_Hu_ekp_prior_evals = [ev ./ ev[1] for ev in Hu_ekp_prior_evals] + +#= plot( + 1:truncation, + (mean(sim_Hu_means) .* mean(normal_Hu_evals))[1:truncation], + ribbon = (std(sim_Hu_means) .* mean(normal_Hu_evals)/sqrt(n_trials))[1:truncation], + color = :red, + label = "similarity scaled by eval", +)=# + +pu = plot( + 1:truncation, + mean(sim_Hu_means)[1:truncation], + ribbon = (std(sim_Hu_means)/sqrt(n_trials))[1:truncation], + color = :blue, + label = "sim (samples v mean)", +) + +plot!( + pu, + 1:truncation, + mean(normal_Hu_evals)[1:truncation], + color = :black, + label = "normalized eval (samples)", +) +plot!( + pu, + 1:truncation, + mean(normal_Hu_mean_evals)[1:truncation], + color = :black, + alpha = 0.7, + label = "normalized eval (mean)", +) +plot!( + pu, + 1:truncation, + mean(normal_Hu_ekp_prior_evals)[1:truncation], + color = :black, + alpha = 0.3, + label = "normalized eval (mean-no-der)", +) +plot!( + pu, + 1:truncation, + mean(sim_U_samples)[1:truncation], + ribbon = (std(sim_U_samples)/sqrt(n_trials))[1:truncation], + color = :green, + label = "similarity (PCA)", +) + +plot!( + pu, + 1:truncation, + mean(sim_Hu_ekp_prior)[1:truncation], + ribbon = (std(sim_Hu_ekp_prior)/sqrt(n_trials))[1:truncation], + color = :red, + label = "sim (samples v mean-no-der)", +) + +title!(pu, "Similarity of spectrum of input diagnostic") + +layout = @layout [a b] +p = plot(pu, pg, layout = layout) + +savefig(p, "spectrum_comparison.png") + + From 0472a5fb02da0943cd9d5253e29219679e0f5c16 Mon Sep 17 00:00:00 2001 From: odunbar Date: Wed, 7 May 2025 11:05:22 -0700 Subject: [PATCH 02/35] save diagnostic matrices --- ... build_and_compare_diagnostic_matrices.jl} | 161 +++++++++++++----- .../common_inverse_problem.jl | 2 +- .../generate_inverse_problem_data.jl | 5 +- 3 files changed, 122 insertions(+), 46 deletions(-) rename examples/DimensionReduction/{likelihood_dimension_reduction.jl => build_and_compare_diagnostic_matrices.jl} (62%) diff --git a/examples/DimensionReduction/likelihood_dimension_reduction.jl b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl similarity index 62% rename from examples/DimensionReduction/likelihood_dimension_reduction.jl rename to examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl index cf35fc417..3cfe7c042 100644 --- a/examples/DimensionReduction/likelihood_dimension_reduction.jl +++ b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl @@ -23,20 +23,8 @@ if !isfile("ekp_1.jld2") include("generate_inverse_problem_data.jl") # will run n trials else include("forward_maps.jl") - loaded1 = load("ekp_1.jld2") - ekp = loaded1["ekp"] - prior = loaded1["prior"] - obs_noise_cov = loaded1["obs_noise_cov"] - input_dim = size(get_u(ekp,1),1) - output_dim = size(get_g(ekp,1),1) end -# new terminology -prior_cov = cov(prior) -prior_invrt = sqrt(inv(prior_cov)) -prior_rt = sqrt(prior_cov) -obs_invrt = sqrt(inv(obs_noise_cov)) -obs_inv = inv(obs_noise_cov) Hu_evals = [] Hg_evals = [] @@ -44,6 +32,8 @@ Hu_mean_evals = [] Hg_mean_evals = [] Hu_ekp_prior_evals = [] Hg_ekp_prior_evals = [] +Hu_ekp_final_evals = [] +Hg_ekp_final_evals = [] sim_Hu_means = [] sim_Hg_means = [] @@ -51,17 +41,27 @@ sim_G_samples = [] sim_U_samples = [] sim_Hu_ekp_prior = [] sim_Hg_ekp_prior = [] +sim_Hu_ekp_final = [] +sim_Hg_ekp_final = [] -for i = 1:n_trials +for trial = 1:n_trials # Load the EKP iterations - loaded = load("ekp_$(i).jld2") + loaded = load("ekp_$(trial).jld2") ekp = loaded["ekp"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] model = loaded["model"] + input_dim = size(get_u(ekp,1),1) + output_dim = size(get_g(ekp,1),1) + prior_cov = cov(prior) + prior_invrt = sqrt(inv(prior_cov)) + prior_rt = sqrt(prior_cov) + obs_invrt = sqrt(inv(obs_noise_cov)) + obs_inv = inv(obs_noise_cov) + # random samples prior_samples = sample(prior, n_samples) @@ -84,8 +84,8 @@ for i = 1:n_trials Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt - # [2] One-point approximation at mean value with SL grad - @info "Construct with mean value (1 sample), SL grad" + # [2a] One-point approximation at mean value with SL grad + @info "Construct with mean value prior (1 sample), SL grad" g = get_g(ekp,1) u = get_u(ekp,1) N_ens = get_N_ens(ekp) @@ -94,11 +94,32 @@ for i = 1:n_trials svdCuu = svd(Cuu) nz = min(N_ens-1, input_dim) # nonzero sv's pinvCuu = svdCuu.U[:,1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz,:] # can replace with localized covariance + Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt Cug = C_at_prior[input_dim+1:end,1:input_dim] - SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. - Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt - Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt - +# SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. +# Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt +# Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt + Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt + Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + + # [2b] One-point approximation at mean value with SL grad + @info "Construct with mean value final (1 sample), SL grad" + final_it = length(get_g(ekp)) + g = get_g(ekp, final_it) + u = get_u(ekp, final_it) + C_at_final = cov([u;g], dims=2) # basic cross-cov + Cuu = C_at_final[1:input_dim, 1:input_dim] + svdCuu = svd(Cuu) + nz = min(N_ens-1, input_dim) # nonzero sv's + pinvCuu = svdCuu.U[:,1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz,:] # can replace with localized covariance + Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt + Cug = C_at_final[input_dim+1:end,1:input_dim] +# SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. +# Hu_ekp_final = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # here still using prior roots not Cuu +# Hg_ekp_final = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt + Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt + Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + # cosine similarity of evector directions svdHu = svd(Hu) svdHg = svd(Hg) @@ -106,15 +127,25 @@ for i = 1:n_trials svdHg_mean = svd(Hg_mean) svdHu_ekp_prior = svd(Hu_ekp_prior) svdHg_ekp_prior = svd(Hg_ekp_prior) + svdHu_ekp_final = svd(Hu_ekp_final) + svdHg_ekp_final = svd(Hg_ekp_final) @info """ - samples -> mean value + samples -> mean $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) - samples + deriv -> mean + (no deriv) - $(cossim_cols(svdHu_mean.V, svdHu_ekp_prior.V)[1:3]) - $(cossim_cols(svdHg_mean.V, svdHg_ekp_prior.V)[1:3]) + samples + deriv -> mean + (no deriv) prior + $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) + + samples + deriv -> mean + (no deriv) final + $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) + + mean+(no deriv): prior -> final + $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) """ push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) @@ -125,8 +156,12 @@ for i = 1:n_trials push!(Hg_mean_evals, svdHg_mean.S) push!(Hu_ekp_prior_evals, svdHu_ekp_prior.S) push!(Hg_ekp_prior_evals, svdHg_ekp_prior.S) + push!(Hu_ekp_final_evals, svdHu_ekp_final.S) + push!(Hg_ekp_final_evals, svdHg_ekp_final.S) push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) + push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) + push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) # cosine similarity to output svd from samples G_samples = forward_map(prior_samples, model)' @@ -135,7 +170,20 @@ for i = 1:n_trials push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) - + + save( + "diagnostic_matrices_$(trial).jld2", + "Hu", Hu, + "Hg", Hg, + "Hu_mean", Hu_mean, + "Hg_mean", Hg_mean, + "Hu_ekp_prior", Hu_ekp_prior, + "Hg_ekp_prior", Hg_ekp_prior, + "Hu_ekp_final", Hu_ekp_final, + "Hg_ekp_final", Hg_ekp_final, + "svdU", svdU, + "svdG", svdG, + ) end using Plots.Measures @@ -150,19 +198,17 @@ default( normal_Hg_evals = [ev ./ ev[1] for ev in Hg_evals] normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] normal_Hg_ekp_prior_evals = [ev ./ ev[1] for ev in Hg_ekp_prior_evals] +normal_Hg_ekp_final_evals = [ev ./ ev[1] for ev in Hg_ekp_final_evals] + +loaded1 = load("ekp_1.jld2") +ekp_tmp = loaded1["ekp"] +input_dim = size(get_u(ekp_tmp,1),1) +output_dim = size(get_g(ekp_tmp,1),1) truncation = 15 truncation = Int(minimum([truncation,input_dim, output_dim])) +# color names in https://github.com/JuliaGraphics/Colors.jl/blob/master/src/names_data.jl -#= plot( - 1:truncation, - (mean(sim_Hg_means) .* mean(normal_Hg_evals))[1:truncation], - ribbon = (std(sim_Hg_means) .* mean(normal_Hg_evals)/sqrt(n_trials))[1:truncation], - color = :red, - label = "sim (samples v mean)", - -) -=# pg = plot( 1:truncation, mean(sim_Hg_means)[1:truncation], @@ -178,7 +224,16 @@ plot!( mean(sim_Hg_ekp_prior)[1:truncation], ribbon = (std(sim_Hg_ekp_prior)/sqrt(n_trials))[1:truncation], color = :red, - label = "sim (samples v mean-no-der)", + alpha = 0.3, + label = "sim (samples v mean-no-der) prior", +) +plot!( + pg, + 1:truncation, + mean(sim_Hg_ekp_final)[1:truncation], + ribbon = (std(sim_Hg_ekp_final)/sqrt(n_trials))[1:truncation], + color = :gold, + label = "sim (samples v mean-no-der) final", ) plot!( @@ -206,6 +261,15 @@ plot!( label = "normalized eval (mean-no-der)", ) +plot!( + pg, + 1:truncation, + mean(normal_Hg_ekp_final_evals)[1:truncation], + color = :black, + alpha = 0.3, +) + + plot!( pg, 1:truncation, @@ -221,14 +285,8 @@ title!(pg, "Similarity of spectrum of output diagnostic") normal_Hu_evals = [ev ./ ev[1] for ev in Hu_evals] normal_Hu_mean_evals = [ev ./ ev[1] for ev in Hu_mean_evals] normal_Hu_ekp_prior_evals = [ev ./ ev[1] for ev in Hu_ekp_prior_evals] +normal_Hu_ekp_final_evals = [ev ./ ev[1] for ev in Hu_ekp_final_evals] -#= plot( - 1:truncation, - (mean(sim_Hu_means) .* mean(normal_Hu_evals))[1:truncation], - ribbon = (std(sim_Hu_means) .* mean(normal_Hu_evals)/sqrt(n_trials))[1:truncation], - color = :red, - label = "similarity scaled by eval", -)=# pu = plot( 1:truncation, @@ -261,6 +319,14 @@ plot!( alpha = 0.3, label = "normalized eval (mean-no-der)", ) +plot!( + pu, + 1:truncation, + mean(normal_Hu_ekp_final_evals)[1:truncation], + color = :black, + alpha = 0.3, +) + plot!( pu, 1:truncation, @@ -276,7 +342,16 @@ plot!( mean(sim_Hu_ekp_prior)[1:truncation], ribbon = (std(sim_Hu_ekp_prior)/sqrt(n_trials))[1:truncation], color = :red, - label = "sim (samples v mean-no-der)", + alpha = 0.3, + label = "sim (samples v mean-no-der) prior", +) +plot!( + pu, + 1:truncation, + mean(sim_Hu_ekp_final)[1:truncation], + ribbon = (std(sim_Hu_ekp_final)/sqrt(n_trials))[1:truncation], + color = :gold, + label = "sim (samples v mean-no-der) final", ) title!(pu, "Similarity of spectrum of input diagnostic") diff --git a/examples/DimensionReduction/common_inverse_problem.jl b/examples/DimensionReduction/common_inverse_problem.jl index 1444ed56d..9ad5c24e5 100644 --- a/examples/DimensionReduction/common_inverse_problem.jl +++ b/examples/DimensionReduction/common_inverse_problem.jl @@ -38,5 +38,5 @@ function linear_exp_inverse_problem(input_dim, output_dim, rng) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) true_parameter = reshape(ones(input_dim),:,1) y = vec(forward_map(true_parameter, model) + noise) - return prior, y, obs_noise_cov, model + return prior, y, obs_noise_cov, model, true_parameter end diff --git a/examples/DimensionReduction/generate_inverse_problem_data.jl b/examples/DimensionReduction/generate_inverse_problem_data.jl index 037c6aee7..b4e342308 100644 --- a/examples/DimensionReduction/generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/generate_inverse_problem_data.jl @@ -15,9 +15,9 @@ n_trials = 20 @info "solving $(n_trials) inverse problems with different random forward maps" for trial = 1:n_trials - prior, y, obs_noise_cov, model = linear_exp_inverse_problem(input_dim, output_dim, rng) + prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) - n_ensemble = 50 + n_ensemble = 80 n_iters_max = 20 initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) @@ -43,5 +43,6 @@ for trial = 1:n_trials "y", y, "obs_noise_cov", obs_noise_cov, "model", model, + "true_parameter", true_parameter ) end From 308257bd318a2a5241827bb99461b474e9ca3cbb Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 8 May 2025 13:09:33 -0700 Subject: [PATCH 03/35] estimate_posteriors.jl --- .../DimensionReduction/estimate_posteriors.jl | 182 ++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 examples/DimensionReduction/estimate_posteriors.jl diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl new file mode 100644 index 000000000..bd2d38933 --- /dev/null +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -0,0 +1,182 @@ +# Solve the problem with EKS + + +using Plots +using EnsembleKalmanProcesses +using Random +using JLD2 + +rng_seed = 41 +rng = Random.MersenneTwister(rng_seed) + +input_dim = 500 +output_dim = 50 + +include("common_inverse_problem.jl") + +n_trials = 1 + +if !isfile("ekp_1.jld2") + include("generate_inverse_problem_data.jl") # will run n trials +end +if !isfile("diagnostic_matrices_1.jld2") + include("build_an_compare_diagnostic_matrices.jl") # will run n trials +end + +for trial = 1:n_trials + + # Load the EKP iterations + loaded = load("ekp_$(trial).jld2") + ekp = loaded["ekp"] + prior = loaded["prior"] + obs_noise_cov = loaded["obs_noise_cov"] + y = loaded["y"] + model = loaded["model"] + input_dim = size(get_u(ekp,1),1) + output_dim = size(get_g(ekp,1),1) + + prior_cov = cov(prior) + prior_invrt = sqrt(inv(prior_cov)) + prior_rt = sqrt(prior_cov) + obs_invrt = sqrt(inv(obs_noise_cov)) + obs_inv = inv(obs_noise_cov) + + # Load diagnostic container + diagnostic_mats = load("diagnostic_matrices_$(trial).jld2") + + + # [1] solve the problem with EKS - directly + prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) + + n_ensemble = 100 + n_iters_max = 50 + + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng) + + n_iters = [0] + for i in 1:n_iters_max + params_i = get_ϕ_final(prior, ekp) + G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) + terminate = update_ensemble!(ekp, G_ens) + if !isnothing(terminate) + n_iters[1] = i-1 + break + end + end + @info get_error(ekp) + ekp_u = get_u(ekp) + ekp_g = get_g(ekp) + + # [2] Create emulator in truncated space, and run EKS on this + min_iter = 1 + max_iter = 8 + i_pairs = reduce(hcat, get_u(ekp)[min_iter:max_iter]) + o_pairs = reduce(hcat, get_g(ekp)[min_iter:max_iter]) + + # Reduce space diagnostic matrix + in_diag = "Hu" + out_diag = "Hg" + Hu = diagnostic_mats[in_diag] + Hg = diagnostic_mats[out_diag] + @info "Diagnostic matrices = ($(in_diag), $(out_diag))" + svdu = svd(Hu) + tol = 0.999 # <1 + r_in_vec = accumulate(+,svdu.S) ./sum(svdu.S) + r_in = sum(r_in_vec .< tol) + 1 # number evals needed for "tol" amount of information + U_r = svdu.V[:,1:r_in] + U_top = svdu.V[:,r_in+1:end] + + svdg = svd(Hg) + r_out_vec = accumulate(+,svdg.S) ./sum(svdg.S) + r_out = sum(r_out_vec .< tol) + 1 # number evals needed for "tol" amount of information + V_r = svdg.V[:,1:r_out] + V_top = svdg.V[:,r_out+1:end] + @info "dimensions of subspace retaining $(100*tol)% information: \n ($(r_in),$(r_out)) out of ($(input_dim),$(output_dim))" + + X_r = U_r' * prior_invrt * i_pairs + Y_r = V_r' * obs_invrt * o_pairs + y_r = V_r' * obs_invrt * y + + # [2a] exp-cubic model for regressor + Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound + logY_r = log.(Y_r .- Ylb) + β = logY_r / [ones(size(X_r)); X_r; X_r.^2; X_r.^3] # = ([1 X_r]' \ Y_r')' + # invert relationship by + # exp(β*X_r) + Ylb + if r_in ==1 & r_out == 1 + sc = scatter(X_r,logY_r) + xmin = minimum(X_r) + xmax = maximum(X_r) + xrange = range(xmin,xmax,100) + expcubic = (exp.(β[4]*reshape(xrange,1,:).^3 + β[3]*reshape(xrange,1,:).^2 .+ β[2]*reshape(xrange,1,:) .+ β[1]) .+ Ylb)' + plot!(sc, xrange, expcubic, legend=false) + hline!(sc, [y_r]) + savefig(sc, "linreg_learn_scatter.png") + end + + # [2b] gp model for regressor + + + + # now apply EKS to the new problem + + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + initial_r = U_r' * prior_invrt * initial_ensemble + prior_r = ParameterDistribution(Samples(U_r' * prior_invrt * sample(rng,prior,1000)), no_constraint(), "prior_r") + @info y_r + obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr + ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng = rng) + + n_iters = [0] + for i in 1:n_iters_max + params_i = get_ϕ_final(prior_r, ekp_r) + G_ens = exp.(β[4]*(params_i).^3 .+ β[3]*(params_i).^2 .+ β[2]*params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space + terminate = update_ensemble!(ekp_r, G_ens) + if !isnothing(terminate) + n_iters[1] = i-1 + break + end + end + @info get_error(ekp_r) + ekp_rlin_u = get_u(ekp_r) + ekp_rlin_g = get_g(ekp_r) + + # map to same space: [here in reduced space first] + spinup = 10*n_ensemble + ekp_rlin_u = reduce(hcat,ekp_rlin_u) + ekp_rlin_g = reduce(hcat,ekp_rlin_g) + ekp_u = reduce(hcat, ekp_u) + ekp_g = reduce(hcat, ekp_g) + projected_ekp_u = U_r' * prior_invrt * ekp_u + projected_ekp_g = V_r' * obs_invrt * ekp_g + + if r_in ==1 && r_out == 1 + pp1 = histogram(projected_ekp_u[:,spinup:end]', color= :gray, label="projected G", title="projected EKP samples (input)", legend=true) + # histogram!(pp1, ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear") + # pp1 = histogram(ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear", yscale=:log10) + + pp2 = histogram(projected_ekp_g[:,spinup:end]', color= :gray, title ="projected EKP samples (output)") + # histogram!(pp2, ekp_rlin_g[:,spinup:end]', color = :blue, legend=false) + #pp2 = histogram!(ekp_rlin_g[:,spinup:end]', color = :blue, legend=false, yscale=:log10) + l = @layout [a b] + pp = plot(pp1, pp2, layout=l) + savefig(pp,"projected_histogram_linreg.png") + end + #= + # 500 dim marginal plot.. + pp = plot(prior) + + spinup_iters = n_iters_max - 20 + posterior_samples = reduce(hcat,get_ϕ(prior,ekp)[spinup_iters:end]) # flatten over iterations (n_dim x n_particles) + posterior_dist = ParameterDistribution( + Dict( + "distribution" => Samples(posterior_samples), + "name" => "posterior samples", + "constraint" => repeat([no_constraint()], input_dim), + ), + ) + plot!(pp, posterior_dist) + =# + +end From e542a07b8f59055bd03e8a44c484e93dcba4a2f9 Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 8 May 2025 13:25:24 -0700 Subject: [PATCH 04/35] small changes --- examples/DimensionReduction/common_inverse_problem.jl | 2 +- examples/DimensionReduction/estimate_posteriors.jl | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/DimensionReduction/common_inverse_problem.jl b/examples/DimensionReduction/common_inverse_problem.jl index 9ad5c24e5..f048a4d56 100644 --- a/examples/DimensionReduction/common_inverse_problem.jl +++ b/examples/DimensionReduction/common_inverse_problem.jl @@ -36,7 +36,7 @@ function linear_exp_inverse_problem(input_dim, output_dim, rng) obs_noise_std = 1.0 obs_noise_cov = (obs_noise_std^2)*I(output_dim) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) -true_parameter = reshape(ones(input_dim),:,1) + true_parameter = reshape(ones(input_dim),:,1) y = vec(forward_map(true_parameter, model) + noise) return prior, y, obs_noise_cov, model, true_parameter end diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index bd2d38933..4d8d8afd6 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -65,6 +65,7 @@ for trial = 1:n_trials end end @info get_error(ekp) + ekp_u = get_u(ekp) ekp_g = get_g(ekp) @@ -96,7 +97,13 @@ for trial = 1:n_trials X_r = U_r' * prior_invrt * i_pairs Y_r = V_r' * obs_invrt * o_pairs + + # true + true_parameter = reshape(ones(input_dim),:,1) + true_r = U_r' * prior_invrt * true_parameter y_r = V_r' * obs_invrt * y + @info true_r + @info y_r # [2a] exp-cubic model for regressor Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound @@ -124,7 +131,6 @@ for trial = 1:n_trials initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) initial_r = U_r' * prior_invrt * initial_ensemble prior_r = ParameterDistribution(Samples(U_r' * prior_invrt * sample(rng,prior,1000)), no_constraint(), "prior_r") - @info y_r obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng = rng) @@ -139,6 +145,7 @@ for trial = 1:n_trials end end @info get_error(ekp_r) + @info get_u_mean_final(ekp_r) ekp_rlin_u = get_u(ekp_r) ekp_rlin_g = get_g(ekp_r) From 5924c7f90713972a8b7fbbedfd7b8e64bebf843f Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 8 May 2025 16:23:41 -0700 Subject: [PATCH 05/35] working with full G --- .../DimensionReduction/estimate_posteriors.jl | 131 +++++++++++------- 1 file changed, 83 insertions(+), 48 deletions(-) diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 4d8d8afd6..1011230c2 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -14,7 +14,16 @@ output_dim = 50 include("common_inverse_problem.jl") -n_trials = 1 +n_trials = 10 + +r_in = 6 +r_out = 1 + +in_diag = "Hu_ekp_prior" +out_diag = "Hg_ekp_prior" +Hu = diagnostic_mats[in_diag] +Hg = diagnostic_mats[out_diag] +@info "Diagnostic matrices = ($(in_diag), $(out_diag))" if !isfile("ekp_1.jld2") include("generate_inverse_problem_data.jl") # will run n trials @@ -64,112 +73,138 @@ for trial = 1:n_trials break end end - @info get_error(ekp) ekp_u = get_u(ekp) ekp_g = get_g(ekp) # [2] Create emulator in truncated space, and run EKS on this - min_iter = 1 + min_iter = max_iter = 8 i_pairs = reduce(hcat, get_u(ekp)[min_iter:max_iter]) o_pairs = reduce(hcat, get_g(ekp)[min_iter:max_iter]) # Reduce space diagnostic matrix - in_diag = "Hu" - out_diag = "Hg" - Hu = diagnostic_mats[in_diag] - Hg = diagnostic_mats[out_diag] - @info "Diagnostic matrices = ($(in_diag), $(out_diag))" svdu = svd(Hu) + + #= + # find by tolerance doesn't work well... tol = 0.999 # <1 r_in_vec = accumulate(+,svdu.S) ./sum(svdu.S) r_in = sum(r_in_vec .< tol) + 1 # number evals needed for "tol" amount of information + =# U_r = svdu.V[:,1:r_in] - U_top = svdu.V[:,r_in+1:end] + #= svdg = svd(Hg) r_out_vec = accumulate(+,svdg.S) ./sum(svdg.S) r_out = sum(r_out_vec .< tol) + 1 # number evals needed for "tol" amount of information + =# V_r = svdg.V[:,1:r_out] - V_top = svdg.V[:,r_out+1:end] - @info "dimensions of subspace retaining $(100*tol)% information: \n ($(r_in),$(r_out)) out of ($(input_dim),$(output_dim))" X_r = U_r' * prior_invrt * i_pairs Y_r = V_r' * obs_invrt * o_pairs - + # true true_parameter = reshape(ones(input_dim),:,1) true_r = U_r' * prior_invrt * true_parameter y_r = V_r' * obs_invrt * y - @info true_r - @info y_r # [2a] exp-cubic model for regressor - Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound - logY_r = log.(Y_r .- Ylb) - β = logY_r / [ones(size(X_r)); X_r; X_r.^2; X_r.^3] # = ([1 X_r]' \ Y_r')' - # invert relationship by - # exp(β*X_r) + Ylb - if r_in ==1 & r_out == 1 - sc = scatter(X_r,logY_r) - xmin = minimum(X_r) - xmax = maximum(X_r) - xrange = range(xmin,xmax,100) - expcubic = (exp.(β[4]*reshape(xrange,1,:).^3 + β[3]*reshape(xrange,1,:).^2 .+ β[2]*reshape(xrange,1,:) .+ β[1]) .+ Ylb)' - plot!(sc, xrange, expcubic, legend=false) - hline!(sc, [y_r]) - savefig(sc, "linreg_learn_scatter.png") - end - - # [2b] gp model for regressor - + red_model_ids = ["expcubic1d","G"] + red_model_id = red_model_ids[2] + if red_model_id == "expcubic1d" + Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound + logY_r = log.(Y_r .- Ylb) + β = logY_r / [ones(size(X_r)); X_r; X_r.^2; X_r.^3] # = ([1 X_r]' \ Y_r')' + # invert relationship by + # exp(β*X_r) + Ylb + if r_in ==1 & r_out == 1 + sc = scatter(X_r,logY_r) + xmin = minimum(X_r) + xmax = maximum(X_r) + xrange = reshape(range(xmin,xmax,100), 1, :) + expcubic = (β[4]*xrange.^3 + β[3]*xrange.^2 .+ β[2]*xrange .+ β[1])' + plot!(sc, xrange, expcubic, legend=false) + hline!(sc, [log.(y_r .- Ylb)]) + savefig(sc, "linreg_learn_scatter.png") + end + end - # now apply EKS to the new problem - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) initial_r = U_r' * prior_invrt * initial_ensemble - prior_r = ParameterDistribution(Samples(U_r' * prior_invrt * sample(rng,prior,1000)), no_constraint(), "prior_r") + prior_r = ParameterDistribution(Samples(U_r' * prior_invrt * sample(rng,prior,1000)), repeat([no_constraint()], r_in), "prior_r") + obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng = rng) n_iters = [0] for i in 1:n_iters_max params_i = get_ϕ_final(prior_r, ekp_r) - G_ens = exp.(β[4]*(params_i).^3 .+ β[3]*(params_i).^2 .+ β[2]*params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space + if red_model_id == "expcubic1d" + G_ens = exp.(β[4]*(params_i).^3 .+ β[3]*(params_i).^2 .+ β[2]*params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space + elseif red_model_id == "G" + # uninformative eigenvectors + sv_in = reduce(hcat, repeat([svdu.S], n_ensemble)) # repeat SVs, then replace first by params + sv_in[1:size(params_i,1),:] = params_i + # evaluate true G + G_ens_full = reduce(hcat, [forward_map(prior_rt * svdu.V * sv, model) for sv in eachcol(sv_in)] ) + # project data back + G_ens = V_r' * obs_invrt * G_ens_full + end + terminate = update_ensemble!(ekp_r, G_ens) if !isnothing(terminate) n_iters[1] = i-1 break end end - @info get_error(ekp_r) - @info get_u_mean_final(ekp_r) - ekp_rlin_u = get_u(ekp_r) - ekp_rlin_g = get_g(ekp_r) + ekp_r_u = get_u(ekp_r) + ekp_r_g = get_g(ekp_r) # map to same space: [here in reduced space first] spinup = 10*n_ensemble - ekp_rlin_u = reduce(hcat,ekp_rlin_u) - ekp_rlin_g = reduce(hcat,ekp_rlin_g) + ekp_r_u = reduce(hcat,ekp_r_u) + ekp_r_g = reduce(hcat,ekp_r_g) ekp_u = reduce(hcat, ekp_u) ekp_g = reduce(hcat, ekp_g) projected_ekp_u = U_r' * prior_invrt * ekp_u projected_ekp_g = V_r' * obs_invrt * ekp_g - + if r_in ==1 && r_out == 1 pp1 = histogram(projected_ekp_u[:,spinup:end]', color= :gray, label="projected G", title="projected EKP samples (input)", legend=true) - # histogram!(pp1, ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear") - # pp1 = histogram(ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear", yscale=:log10) + histogram!(pp1, ekp_r_u[:,spinup:end]', color = :blue, label="reduced") + # pp1 = histogram(ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear", yscale=:log10) pp2 = histogram(projected_ekp_g[:,spinup:end]', color= :gray, title ="projected EKP samples (output)") - # histogram!(pp2, ekp_rlin_g[:,spinup:end]', color = :blue, legend=false) - #pp2 = histogram!(ekp_rlin_g[:,spinup:end]', color = :blue, legend=false, yscale=:log10) + histogram!(pp2, ekp_r_g[:,spinup:end]', color = :blue, legend=false) + #pp2 = histogram!(ekp_rlin_g[:,spinup:end]', color = :blue, legend=false, yscale=:log10) l = @layout [a b] pp = plot(pp1, pp2, layout=l) - savefig(pp,"projected_histogram_linreg.png") + savefig(pp,"projected_histograms.png") end + + # compare in original space + mean_final = get_u_mean_final(ekp) + mean_final_in_red = U_r' * prior_invrt * mean_final + mean_red_final = get_u_mean_final(ekp_r) + sv_in = svdu.S + sv_in[1:r_in] = mean_red_final + mean_red_final_full = prior_rt * svdu.V * sv_in + + @info """ + + Reduced space dimension(input, output): $((r_in, r_out)) + + Mean of final-mean to true in reduced space: + Using Full space optimization: $(norm(mean_final_in_red - true_r)) + Using Red. space optimization: $(norm(mean_red_final - true_r)) + + Mean of final-mean to true in full space: + Using Full space optimization: $(norm(mean_final - true_parameter)) + Using Red. space optimization: $(norm(mean_red_final_full - true_parameter)) + """ + #@info norm(cov(get_u_final(ekp),dims=2) - cov(get_u_final(ekp_r), dims=2)) #= # 500 dim marginal plot.. pp = plot(prior) From acd8269691b7313220d95d78d1dd433c00d719f4 Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 8 May 2025 16:30:05 -0700 Subject: [PATCH 06/35] scale norms --- examples/DimensionReduction/estimate_posteriors.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 1011230c2..65b6c8715 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -196,13 +196,13 @@ for trial = 1:n_trials Reduced space dimension(input, output): $((r_in, r_out)) - Mean of final-mean to true in reduced space: - Using Full space optimization: $(norm(mean_final_in_red - true_r)) - Using Red. space optimization: $(norm(mean_red_final - true_r)) + Norm of final-mean to true in reduced space: + Using Full space optimization: $((1.0/r_in)*norm(mean_final_in_red - true_r)) + Using Red. space optimization: $((1.0/r_in)*norm(mean_red_final - true_r)) - Mean of final-mean to true in full space: - Using Full space optimization: $(norm(mean_final - true_parameter)) - Using Red. space optimization: $(norm(mean_red_final_full - true_parameter)) + Norm of final-mean to true in full space: + Using Full space optimization: $((1.0/input_dim)*norm(mean_final - true_parameter)) + Using Red. space optimization: $((1.0/input_dim)*norm(mean_red_final_full - true_parameter)) """ #@info norm(cov(get_u_final(ekp),dims=2) - cov(get_u_final(ekp_r), dims=2)) #= From 877c352a1a29ee0fd6d0ec3c749ec166b1803b77 Mon Sep 17 00:00:00 2001 From: odunbar Date: Thu, 8 May 2025 16:53:34 -0700 Subject: [PATCH 07/35] reorganize --- examples/DimensionReduction/estimate_posteriors.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 65b6c8715..0ec38bc5e 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -144,7 +144,6 @@ for trial = 1:n_trials if red_model_id == "expcubic1d" G_ens = exp.(β[4]*(params_i).^3 .+ β[3]*(params_i).^2 .+ β[2]*params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space elseif red_model_id == "G" - # uninformative eigenvectors sv_in = reduce(hcat, repeat([svdu.S], n_ensemble)) # repeat SVs, then replace first by params sv_in[1:size(params_i,1),:] = params_i # evaluate true G From 3a9435a076522cf883eb44af9c46097b2f455932 Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 9 May 2025 17:36:10 -0700 Subject: [PATCH 08/35] return diagnostic mats lines --- examples/DimensionReduction/estimate_posteriors.jl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 0ec38bc5e..66f3c3088 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -19,11 +19,6 @@ n_trials = 10 r_in = 6 r_out = 1 -in_diag = "Hu_ekp_prior" -out_diag = "Hg_ekp_prior" -Hu = diagnostic_mats[in_diag] -Hg = diagnostic_mats[out_diag] -@info "Diagnostic matrices = ($(in_diag), $(out_diag))" if !isfile("ekp_1.jld2") include("generate_inverse_problem_data.jl") # will run n trials @@ -32,6 +27,12 @@ if !isfile("diagnostic_matrices_1.jld2") include("build_an_compare_diagnostic_matrices.jl") # will run n trials end +in_diag = "Hu_ekp_prior" +out_diag = "Hg_ekp_prior" +@info "Diagnostic matrices = ($(in_diag), $(out_diag))" + + + for trial = 1:n_trials # Load the EKP iterations @@ -84,7 +85,10 @@ for trial = 1:n_trials o_pairs = reduce(hcat, get_g(ekp)[min_iter:max_iter]) # Reduce space diagnostic matrix + Hu = diagnostic_mats[in_diag] + Hg = diagnostic_mats[out_diag] svdu = svd(Hu) + svdg = svd(Hg) #= # find by tolerance doesn't work well... From aab7c22815bb1a02ee3f6f157da1e8e55bf5fb8c Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 9 May 2025 17:37:11 -0700 Subject: [PATCH 09/35] nice formatting --- .../build_and_compare_diagnostic_matrices.jl | 199 ++++++++---------- .../common_inverse_problem.jl | 22 +- .../DimensionReduction/estimate_posteriors.jl | 103 ++++----- examples/DimensionReduction/forward_maps.jl | 4 +- .../generate_inverse_problem_data.jl | 30 +-- 5 files changed, 176 insertions(+), 182 deletions(-) diff --git a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl index 3cfe7c042..c46d7c104 100644 --- a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl @@ -4,16 +4,16 @@ using EnsembleKalmanProcesses.ParameterDistributions using Statistics using Distributions using Plots -using JLD2 +using JLD2 #Utilities -function cossim(x::VV1,y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} - return dot(x,y)/(norm(x)*norm(y)) +function cossim(x::VV1, y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} + return dot(x, y) / (norm(x) * norm(y)) end -function cossim_pos(x::VV1,y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} - return abs(cossim(x,y)) +function cossim_pos(x::VV1, y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} + return abs(cossim(x, y)) end function cossim_cols(X::AM1, Y::AM2) where {AM1 <: AbstractMatrix, AM2 <: AbstractMatrix} - return [cossim_pos(c1,c2) for (c1,c2) in zip(eachcol(X), eachcol(Y))] + return [cossim_pos(c1, c2) for (c1, c2) in zip(eachcol(X), eachcol(Y))] end n_samples = 2000 # paper uses 5e5 @@ -44,7 +44,7 @@ sim_Hg_ekp_prior = [] sim_Hu_ekp_final = [] sim_Hg_ekp_final = [] -for trial = 1:n_trials +for trial in 1:n_trials # Load the EKP iterations loaded = load("ekp_$(trial).jld2") @@ -53,73 +53,73 @@ for trial = 1:n_trials obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] model = loaded["model"] - input_dim = size(get_u(ekp,1),1) - output_dim = size(get_g(ekp,1),1) + input_dim = size(get_u(ekp, 1), 1) + output_dim = size(get_g(ekp, 1), 1) prior_cov = cov(prior) prior_invrt = sqrt(inv(prior_cov)) prior_rt = sqrt(prior_cov) obs_invrt = sqrt(inv(obs_noise_cov)) obs_inv = inv(obs_noise_cov) - + # random samples prior_samples = sample(prior, n_samples) - + # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" gradG_samples = jac_forward_map(prior_samples, model) - Hu = zeros(input_dim,input_dim) - Hg = zeros(output_dim,output_dim) + Hu = zeros(input_dim, input_dim) + Hg = zeros(output_dim, output_dim) for j in 1:n_samples - Hu .+= 1/n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt - Hg .+= 1/n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt + Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt + Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt end - + # [1b] One-point approximation at mean value, with perfect grad @info "Construct with mean value (1 sample), perfect grad" prior_mean_appr = mean(prior) # approximate mean gradG_at_mean = jac_forward_map(prior_mean_appr, model)[1] # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf - Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt - Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt + Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt + Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt # [2a] One-point approximation at mean value with SL grad @info "Construct with mean value prior (1 sample), SL grad" - g = get_g(ekp,1) - u = get_u(ekp,1) + g = get_g(ekp, 1) + u = get_u(ekp, 1) N_ens = get_N_ens(ekp) - C_at_prior = cov([u;g], dims=2) # basic cross-cov + C_at_prior = cov([u; g], dims = 2) # basic cross-cov Cuu = C_at_prior[1:input_dim, 1:input_dim] - svdCuu = svd(Cuu) - nz = min(N_ens-1, input_dim) # nonzero sv's - pinvCuu = svdCuu.U[:,1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz,:] # can replace with localized covariance + svdCuu = svd(Cuu) + nz = min(N_ens - 1, input_dim) # nonzero sv's + pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_prior[input_dim+1:end,1:input_dim] -# SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. -# Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt -# Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt - Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt - Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt - + Cug = C_at_prior[(input_dim + 1):end, 1:input_dim] + # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. + # Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt + # Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt + Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt + Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + # [2b] One-point approximation at mean value with SL grad @info "Construct with mean value final (1 sample), SL grad" final_it = length(get_g(ekp)) g = get_g(ekp, final_it) u = get_u(ekp, final_it) - C_at_final = cov([u;g], dims=2) # basic cross-cov + C_at_final = cov([u; g], dims = 2) # basic cross-cov Cuu = C_at_final[1:input_dim, 1:input_dim] - svdCuu = svd(Cuu) - nz = min(N_ens-1, input_dim) # nonzero sv's - pinvCuu = svdCuu.U[:,1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz,:] # can replace with localized covariance + svdCuu = svd(Cuu) + nz = min(N_ens - 1, input_dim) # nonzero sv's + pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_final[input_dim+1:end,1:input_dim] -# SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. -# Hu_ekp_final = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # here still using prior roots not Cuu -# Hg_ekp_final = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt - Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt - Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt - + Cug = C_at_final[(input_dim + 1):end, 1:input_dim] + # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. + # Hu_ekp_final = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # here still using prior roots not Cuu + # Hg_ekp_final = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt + Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt + Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + # cosine similarity of evector directions svdHu = svd(Hu) svdHg = svd(Hg) @@ -167,33 +167,38 @@ for trial = 1:n_trials G_samples = forward_map(prior_samples, model)' svdG = svd(G_samples) # nonsquare, so permuted so evectors are V svdU = svd(prior_samples') - + push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) save( "diagnostic_matrices_$(trial).jld2", - "Hu", Hu, - "Hg", Hg, - "Hu_mean", Hu_mean, - "Hg_mean", Hg_mean, - "Hu_ekp_prior", Hu_ekp_prior, - "Hg_ekp_prior", Hg_ekp_prior, - "Hu_ekp_final", Hu_ekp_final, - "Hg_ekp_final", Hg_ekp_final, - "svdU", svdU, - "svdG", svdG, + "Hu", + Hu, + "Hg", + Hg, + "Hu_mean", + Hu_mean, + "Hg_mean", + Hg_mean, + "Hu_ekp_prior", + Hu_ekp_prior, + "Hg_ekp_prior", + Hg_ekp_prior, + "Hu_ekp_final", + Hu_ekp_final, + "Hg_ekp_final", + Hg_ekp_final, + "svdU", + svdU, + "svdG", + svdG, ) end using Plots.Measures -gr(size=(1.6*1200,600), legend=true, bottom_margin = 10mm, left_margin = 10mm) -default( - titlefont = 20, - legendfontsize = 12, - guidefont = 14, - tickfont = 14, -) +gr(size = (1.6 * 1200, 600), legend = true, bottom_margin = 10mm, left_margin = 10mm) +default(titlefont = 20, legendfontsize = 12, guidefont = 14, tickfont = 14) normal_Hg_evals = [ev ./ ev[1] for ev in Hg_evals] normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] @@ -202,27 +207,27 @@ normal_Hg_ekp_final_evals = [ev ./ ev[1] for ev in Hg_ekp_final_evals] loaded1 = load("ekp_1.jld2") ekp_tmp = loaded1["ekp"] -input_dim = size(get_u(ekp_tmp,1),1) -output_dim = size(get_g(ekp_tmp,1),1) +input_dim = size(get_u(ekp_tmp, 1), 1) +output_dim = size(get_g(ekp_tmp, 1), 1) truncation = 15 -truncation = Int(minimum([truncation,input_dim, output_dim])) +truncation = Int(minimum([truncation, input_dim, output_dim])) # color names in https://github.com/JuliaGraphics/Colors.jl/blob/master/src/names_data.jl pg = plot( 1:truncation, - mean(sim_Hg_means)[1:truncation], - ribbon = (std(sim_Hg_means)/sqrt(n_trials))[1:truncation], + mean(sim_Hg_means)[1:truncation], + ribbon = (std(sim_Hg_means) / sqrt(n_trials))[1:truncation], color = :blue, label = "sim (samples v mean)", - legend=false, + legend = false, ) plot!( pg, 1:truncation, - mean(sim_Hg_ekp_prior)[1:truncation], - ribbon = (std(sim_Hg_ekp_prior)/sqrt(n_trials))[1:truncation], + mean(sim_Hg_ekp_prior)[1:truncation], + ribbon = (std(sim_Hg_ekp_prior) / sqrt(n_trials))[1:truncation], color = :red, alpha = 0.3, label = "sim (samples v mean-no-der) prior", @@ -230,19 +235,13 @@ plot!( plot!( pg, 1:truncation, - mean(sim_Hg_ekp_final)[1:truncation], - ribbon = (std(sim_Hg_ekp_final)/sqrt(n_trials))[1:truncation], + mean(sim_Hg_ekp_final)[1:truncation], + ribbon = (std(sim_Hg_ekp_final) / sqrt(n_trials))[1:truncation], color = :gold, label = "sim (samples v mean-no-der) final", ) -plot!( - pg, - 1:truncation, - mean(normal_Hg_evals)[1:truncation], - color = :black, - label = "normalized eval (samples)", -) +plot!(pg, 1:truncation, mean(normal_Hg_evals)[1:truncation], color = :black, label = "normalized eval (samples)") plot!( pg, 1:truncation, @@ -261,20 +260,14 @@ plot!( label = "normalized eval (mean-no-der)", ) -plot!( - pg, - 1:truncation, - mean(normal_Hg_ekp_final_evals)[1:truncation], - color = :black, - alpha = 0.3, -) +plot!(pg, 1:truncation, mean(normal_Hg_ekp_final_evals)[1:truncation], color = :black, alpha = 0.3) plot!( pg, 1:truncation, - mean(sim_G_samples)[1:truncation], - ribbon = (std(sim_G_samples)/sqrt(n_trials))[1:truncation], + mean(sim_G_samples)[1:truncation], + ribbon = (std(sim_G_samples) / sqrt(n_trials))[1:truncation], color = :green, label = "similarity (PCA)", ) @@ -290,19 +283,13 @@ normal_Hu_ekp_final_evals = [ev ./ ev[1] for ev in Hu_ekp_final_evals] pu = plot( 1:truncation, - mean(sim_Hu_means)[1:truncation], - ribbon = (std(sim_Hu_means)/sqrt(n_trials))[1:truncation], + mean(sim_Hu_means)[1:truncation], + ribbon = (std(sim_Hu_means) / sqrt(n_trials))[1:truncation], color = :blue, label = "sim (samples v mean)", ) -plot!( - pu, - 1:truncation, - mean(normal_Hu_evals)[1:truncation], - color = :black, - label = "normalized eval (samples)", -) +plot!(pu, 1:truncation, mean(normal_Hu_evals)[1:truncation], color = :black, label = "normalized eval (samples)") plot!( pu, 1:truncation, @@ -319,19 +306,13 @@ plot!( alpha = 0.3, label = "normalized eval (mean-no-der)", ) -plot!( - pu, - 1:truncation, - mean(normal_Hu_ekp_final_evals)[1:truncation], - color = :black, - alpha = 0.3, -) +plot!(pu, 1:truncation, mean(normal_Hu_ekp_final_evals)[1:truncation], color = :black, alpha = 0.3) plot!( pu, 1:truncation, - mean(sim_U_samples)[1:truncation], - ribbon = (std(sim_U_samples)/sqrt(n_trials))[1:truncation], + mean(sim_U_samples)[1:truncation], + ribbon = (std(sim_U_samples) / sqrt(n_trials))[1:truncation], color = :green, label = "similarity (PCA)", ) @@ -339,8 +320,8 @@ plot!( plot!( pu, 1:truncation, - mean(sim_Hu_ekp_prior)[1:truncation], - ribbon = (std(sim_Hu_ekp_prior)/sqrt(n_trials))[1:truncation], + mean(sim_Hu_ekp_prior)[1:truncation], + ribbon = (std(sim_Hu_ekp_prior) / sqrt(n_trials))[1:truncation], color = :red, alpha = 0.3, label = "sim (samples v mean-no-der) prior", @@ -348,8 +329,8 @@ plot!( plot!( pu, 1:truncation, - mean(sim_Hu_ekp_final)[1:truncation], - ribbon = (std(sim_Hu_ekp_final)/sqrt(n_trials))[1:truncation], + mean(sim_Hu_ekp_final)[1:truncation], + ribbon = (std(sim_Hu_ekp_final) / sqrt(n_trials))[1:truncation], color = :gold, label = "sim (samples v mean-no-der) final", ) @@ -360,5 +341,3 @@ layout = @layout [a b] p = plot(pu, pg, layout = layout) savefig(p, "spectrum_comparison.png") - - diff --git a/examples/DimensionReduction/common_inverse_problem.jl b/examples/DimensionReduction/common_inverse_problem.jl index f048a4d56..7354a3b39 100644 --- a/examples/DimensionReduction/common_inverse_problem.jl +++ b/examples/DimensionReduction/common_inverse_problem.jl @@ -12,31 +12,31 @@ function linear_exp_inverse_problem(input_dim, output_dim, rng) # prior γ0 = 4.0 β_γ = -2 - Γ = Diagonal([γ0 * (1.0*j)^β_γ for j in 1:input_dim]) - prior_dist = MvNormal(zeros(input_dim),Γ) + Γ = Diagonal([γ0 * (1.0 * j)^β_γ for j in 1:input_dim]) + prior_dist = MvNormal(zeros(input_dim), Γ) prior = ParameterDistribution( Dict( "distribution" => Parameterized(prior_dist), - "constraint" => repeat([no_constraint()],input_dim), + "constraint" => repeat([no_constraint()], input_dim), "name" => "param_$(input_dim)", ), ) - + # forward map # random linear-exp forward map from Stewart 1980: https://www.jstor.org/stable/2156882?seq=2 - U = qr(randn(rng, (output_dim,output_dim))).Q - V = qr(randn(rng, (input_dim,input_dim))).Q + U = qr(randn(rng, (output_dim, output_dim))).Q + V = qr(randn(rng, (input_dim, input_dim))).Q λ0 = 100.0 β_λ = -1 - Λ = Diagonal([λ0 * (1.0*j)^β_λ for j in 1:output_dim]) - A = U*Λ*V[1:output_dim,:] # output x input + Λ = Diagonal([λ0 * (1.0 * j)^β_λ for j in 1:output_dim]) + A = U * Λ * V[1:output_dim, :] # output x input model = LinearExp(input_dim, output_dim, A) - + # generate data sample obs_noise_std = 1.0 - obs_noise_cov = (obs_noise_std^2)*I(output_dim) + obs_noise_cov = (obs_noise_std^2) * I(output_dim) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) - true_parameter = reshape(ones(input_dim),:,1) + true_parameter = reshape(ones(input_dim), :, 1) y = vec(forward_map(true_parameter, model) + noise) return prior, y, obs_noise_cov, model, true_parameter end diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 66f3c3088..6f42908f5 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -33,7 +33,7 @@ out_diag = "Hg_ekp_prior" -for trial = 1:n_trials +for trial in 1:n_trials # Load the EKP iterations loaded = load("ekp_$(trial).jld2") @@ -42,45 +42,44 @@ for trial = 1:n_trials obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] model = loaded["model"] - input_dim = size(get_u(ekp,1),1) - output_dim = size(get_g(ekp,1),1) + input_dim = size(get_u(ekp, 1), 1) + output_dim = size(get_g(ekp, 1), 1) prior_cov = cov(prior) prior_invrt = sqrt(inv(prior_cov)) prior_rt = sqrt(prior_cov) obs_invrt = sqrt(inv(obs_noise_cov)) obs_inv = inv(obs_noise_cov) - + # Load diagnostic container diagnostic_mats = load("diagnostic_matrices_$(trial).jld2") # [1] solve the problem with EKS - directly prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) - + n_ensemble = 100 n_iters_max = 50 initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng) - + n_iters = [0] for i in 1:n_iters_max params_i = get_ϕ_final(prior, ekp) G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) terminate = update_ensemble!(ekp, G_ens) if !isnothing(terminate) - n_iters[1] = i-1 + n_iters[1] = i - 1 break end end ekp_u = get_u(ekp) ekp_g = get_g(ekp) - + # [2] Create emulator in truncated space, and run EKS on this - min_iter = - max_iter = 8 + min_iter = max_iter = 8 i_pairs = reduce(hcat, get_u(ekp)[min_iter:max_iter]) o_pairs = reduce(hcat, get_g(ekp)[min_iter:max_iter]) @@ -96,105 +95,115 @@ for trial = 1:n_trials r_in_vec = accumulate(+,svdu.S) ./sum(svdu.S) r_in = sum(r_in_vec .< tol) + 1 # number evals needed for "tol" amount of information =# - U_r = svdu.V[:,1:r_in] - + U_r = svdu.V[:, 1:r_in] + #= svdg = svd(Hg) r_out_vec = accumulate(+,svdg.S) ./sum(svdg.S) r_out = sum(r_out_vec .< tol) + 1 # number evals needed for "tol" amount of information =# - V_r = svdg.V[:,1:r_out] - + V_r = svdg.V[:, 1:r_out] + X_r = U_r' * prior_invrt * i_pairs Y_r = V_r' * obs_invrt * o_pairs - + # true - true_parameter = reshape(ones(input_dim),:,1) + true_parameter = reshape(ones(input_dim), :, 1) true_r = U_r' * prior_invrt * true_parameter y_r = V_r' * obs_invrt * y # [2a] exp-cubic model for regressor - red_model_ids = ["expcubic1d","G"] + red_model_ids = ["expcubic1d", "G"] red_model_id = red_model_ids[2] if red_model_id == "expcubic1d" Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound logY_r = log.(Y_r .- Ylb) - β = logY_r / [ones(size(X_r)); X_r; X_r.^2; X_r.^3] # = ([1 X_r]' \ Y_r')' + β = logY_r / [ones(size(X_r)); X_r; X_r .^ 2; X_r .^ 3] # = ([1 X_r]' \ Y_r')' # invert relationship by # exp(β*X_r) + Ylb - if r_in ==1 & r_out == 1 - sc = scatter(X_r,logY_r) + if r_in == 1 & r_out == 1 + sc = scatter(X_r, logY_r) xmin = minimum(X_r) xmax = maximum(X_r) - xrange = reshape(range(xmin,xmax,100), 1, :) - expcubic = (β[4]*xrange.^3 + β[3]*xrange.^2 .+ β[2]*xrange .+ β[1])' - plot!(sc, xrange, expcubic, legend=false) + xrange = reshape(range(xmin, xmax, 100), 1, :) + expcubic = (β[4] * xrange .^ 3 + β[3] * xrange .^ 2 .+ β[2] * xrange .+ β[1])' + plot!(sc, xrange, expcubic, legend = false) hline!(sc, [log.(y_r .- Ylb)]) savefig(sc, "linreg_learn_scatter.png") end - end + end # now apply EKS to the new problem initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) initial_r = U_r' * prior_invrt * initial_ensemble - prior_r = ParameterDistribution(Samples(U_r' * prior_invrt * sample(rng,prior,1000)), repeat([no_constraint()], r_in), "prior_r") - + prior_r = ParameterDistribution( + Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), + repeat([no_constraint()], r_in), + "prior_r", + ) + obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng = rng) - + n_iters = [0] for i in 1:n_iters_max params_i = get_ϕ_final(prior_r, ekp_r) if red_model_id == "expcubic1d" - G_ens = exp.(β[4]*(params_i).^3 .+ β[3]*(params_i).^2 .+ β[2]*params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space + G_ens = exp.(β[4] * (params_i) .^ 3 .+ β[3] * (params_i) .^ 2 .+ β[2] * params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space elseif red_model_id == "G" sv_in = reduce(hcat, repeat([svdu.S], n_ensemble)) # repeat SVs, then replace first by params - sv_in[1:size(params_i,1),:] = params_i + sv_in[1:size(params_i, 1), :] = params_i # evaluate true G - G_ens_full = reduce(hcat, [forward_map(prior_rt * svdu.V * sv, model) for sv in eachcol(sv_in)] ) + G_ens_full = reduce(hcat, [forward_map(prior_rt * svdu.V * sv, model) for sv in eachcol(sv_in)]) # project data back G_ens = V_r' * obs_invrt * G_ens_full end - + terminate = update_ensemble!(ekp_r, G_ens) if !isnothing(terminate) - n_iters[1] = i-1 + n_iters[1] = i - 1 break end end ekp_r_u = get_u(ekp_r) ekp_r_g = get_g(ekp_r) - + # map to same space: [here in reduced space first] - spinup = 10*n_ensemble - ekp_r_u = reduce(hcat,ekp_r_u) - ekp_r_g = reduce(hcat,ekp_r_g) + spinup = 10 * n_ensemble + ekp_r_u = reduce(hcat, ekp_r_u) + ekp_r_g = reduce(hcat, ekp_r_g) ekp_u = reduce(hcat, ekp_u) ekp_g = reduce(hcat, ekp_g) projected_ekp_u = U_r' * prior_invrt * ekp_u projected_ekp_g = V_r' * obs_invrt * ekp_g - - if r_in ==1 && r_out == 1 - pp1 = histogram(projected_ekp_u[:,spinup:end]', color= :gray, label="projected G", title="projected EKP samples (input)", legend=true) - histogram!(pp1, ekp_r_u[:,spinup:end]', color = :blue, label="reduced") + + if r_in == 1 && r_out == 1 + pp1 = histogram( + projected_ekp_u[:, spinup:end]', + color = :gray, + label = "projected G", + title = "projected EKP samples (input)", + legend = true, + ) + histogram!(pp1, ekp_r_u[:, spinup:end]', color = :blue, label = "reduced") # pp1 = histogram(ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear", yscale=:log10) - pp2 = histogram(projected_ekp_g[:,spinup:end]', color= :gray, title ="projected EKP samples (output)") - histogram!(pp2, ekp_r_g[:,spinup:end]', color = :blue, legend=false) + pp2 = histogram(projected_ekp_g[:, spinup:end]', color = :gray, title = "projected EKP samples (output)") + histogram!(pp2, ekp_r_g[:, spinup:end]', color = :blue, legend = false) #pp2 = histogram!(ekp_rlin_g[:,spinup:end]', color = :blue, legend=false, yscale=:log10) l = @layout [a b] - pp = plot(pp1, pp2, layout=l) - savefig(pp,"projected_histograms.png") + pp = plot(pp1, pp2, layout = l) + savefig(pp, "projected_histograms.png") end # compare in original space mean_final = get_u_mean_final(ekp) - mean_final_in_red = U_r' * prior_invrt * mean_final + mean_final_in_red = U_r' * prior_invrt * mean_final mean_red_final = get_u_mean_final(ekp_r) sv_in = svdu.S sv_in[1:r_in] = mean_red_final mean_red_final_full = prior_rt * svdu.V * sv_in - + @info """ Reduced space dimension(input, output): $((r_in, r_out)) @@ -223,5 +232,5 @@ for trial = 1:n_trials ) plot!(pp, posterior_dist) =# - + end diff --git a/examples/DimensionReduction/forward_maps.jl b/examples/DimensionReduction/forward_maps.jl index 7e904346f..ec21013b0 100644 --- a/examples/DimensionReduction/forward_maps.jl +++ b/examples/DimensionReduction/forward_maps.jl @@ -16,9 +16,9 @@ end function jac_forward_map(X::AM, model::LE) where {AM <: AbstractMatrix, LE <: LinearExp} # dGi / dXj = G_ij exp(x_j) = G.*exp.(mat with repeated x_j rows) # return [G * exp.(Diagonal(r)) for r in eachrow(X')] # correct but extra multiplies - return [model.G .* exp.(reshape(c,1,:)) for c in eachcol(X)] + return [model.G .* exp.(reshape(c, 1, :)) for c in eachcol(X)] end function jac_forward_map(X::AV, model::LE) where {AV <: AbstractVector, LE <: LinearExp} - return jac_forward_map(reshape(X,:,1), model) + return jac_forward_map(reshape(X, :, 1), model) end diff --git a/examples/DimensionReduction/generate_inverse_problem_data.jl b/examples/DimensionReduction/generate_inverse_problem_data.jl index b4e342308..5c226dab1 100644 --- a/examples/DimensionReduction/generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/generate_inverse_problem_data.jl @@ -14,35 +14,41 @@ include("common_inverse_problem.jl") n_trials = 20 @info "solving $(n_trials) inverse problems with different random forward maps" -for trial = 1:n_trials +for trial in 1:n_trials prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) - + n_ensemble = 80 n_iters_max = 20 - + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, TransformInversion(); rng = rng) - + n_iters = [0] for i in 1:n_iters_max params_i = get_ϕ_final(prior, ekp) G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) terminate = update_ensemble!(ekp, G_ens) if !isnothing(terminate) - n_iters[1] = i-1 + n_iters[1] = i - 1 break end end - + @info "Iteration of posterior convergence: $(n_iters[1])" @info "Loss over iterations:" get_error(ekp) save( "ekp_$(trial).jld2", - "ekp", ekp, - "prior", prior, - "y", y, - "obs_noise_cov", obs_noise_cov, - "model", model, - "true_parameter", true_parameter + "ekp", + ekp, + "prior", + prior, + "y", + y, + "obs_noise_cov", + obs_noise_cov, + "model", + model, + "true_parameter", + true_parameter, ) end From a44d5bd4a513f0f34f48bddffd5b3682b93830af Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Mon, 12 May 2025 10:44:20 -0700 Subject: [PATCH 10/35] Add H_u^y --- .../build_and_compare_diagnostic_matrices.jl | 22 ++++++++++++++++++- .../DimensionReduction/estimate_posteriors.jl | 3 +-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl index c46d7c104..2d9f5257b 100644 --- a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl @@ -43,6 +43,7 @@ sim_Hu_ekp_prior = [] sim_Hg_ekp_prior = [] sim_Hu_ekp_final = [] sim_Hg_ekp_final = [] +sim_Huy_ekp_final = [] for trial in 1:n_trials @@ -113,13 +114,18 @@ for trial in 1:n_trials nz = min(N_ens - 1, input_dim) # nonzero sv's pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_final[(input_dim + 1):end, 1:input_dim] + Cug = C_at_final[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. # Hu_ekp_final = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # here still using prior roots not Cuu # Hg_ekp_final = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + myCug = Cug' + Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( + (y - gg) * (y - gg)' for gg in eachcol(g) + )*obs_inv*myCug' * Cuu_invrt + # cosine similarity of evector directions svdHu = svd(Hu) svdHg = svd(Hg) @@ -129,6 +135,7 @@ for trial in 1:n_trials svdHg_ekp_prior = svd(Hg_ekp_prior) svdHu_ekp_final = svd(Hu_ekp_final) svdHg_ekp_final = svd(Hg_ekp_final) + svdHuy_ekp_final = svd(Huy_ekp_final) @info """ samples -> mean @@ -147,6 +154,8 @@ for trial in 1:n_trials $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) + y-aware -> samples + $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) """ push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) @@ -162,6 +171,7 @@ for trial in 1:n_trials push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) + push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) # cosine similarity to output svd from samples G_samples = forward_map(prior_samples, model)' @@ -189,6 +199,8 @@ for trial in 1:n_trials Hu_ekp_final, "Hg_ekp_final", Hg_ekp_final, + "Huy_ekp_final", + Huy_ekp_final, "svdU", svdU, "svdG", @@ -334,6 +346,14 @@ plot!( color = :gold, label = "sim (samples v mean-no-der) final", ) +plot!( + pu, + 1:truncation, + mean(sim_Huy_ekp_final)[1:truncation], + ribbon = (std(sim_Huy_ekp_final) / sqrt(n_trials))[1:truncation], + color = :purple, + label = "sim (samples v y-aware) final", +) title!(pu, "Similarity of spectrum of input diagnostic") diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl index 6f42908f5..6c218db47 100644 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ b/examples/DimensionReduction/estimate_posteriors.jl @@ -24,7 +24,7 @@ if !isfile("ekp_1.jld2") include("generate_inverse_problem_data.jl") # will run n trials end if !isfile("diagnostic_matrices_1.jld2") - include("build_an_compare_diagnostic_matrices.jl") # will run n trials + include("build_and_compare_diagnostic_matrices.jl") # will run n trials end in_diag = "Hu_ekp_prior" @@ -108,7 +108,6 @@ for trial in 1:n_trials Y_r = V_r' * obs_invrt * o_pairs # true - true_parameter = reshape(ones(input_dim), :, 1) true_r = U_r' * prior_invrt * true_parameter y_r = V_r' * obs_invrt * y From 144553e989f85d559034d4c8e2037e5cd8fc22a6 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 5 Jun 2025 14:46:51 -0700 Subject: [PATCH 11/35] Add MCMC sampling; add first version of Hgy; add Lorenz; refactor code --- examples/DimensionReduction/Project.toml | 6 + .../DimensionReduction/estimate_posteriors.jl | 235 ---------------- examples/DimensionReduction/figures/.gitkeep | 0 examples/DimensionReduction/forward_maps.jl | 24 -- .../generate_inverse_problem_data.jl | 54 ---- .../problems/forward_maps.jl | 1 + .../problem_linear_exp.jl} | 32 ++- .../problems/problem_lorenz.jl | 258 ++++++++++++++++++ examples/DimensionReduction/settings.jl | 28 ++ .../step1_generate_inverse_problem_data.jl | 51 ++++ ..._build_and_compare_diagnostic_matrices.jl} | 233 +++++++++------- .../step3_estimate_posteriors.jl | 194 +++++++++++++ 12 files changed, 706 insertions(+), 410 deletions(-) delete mode 100644 examples/DimensionReduction/estimate_posteriors.jl create mode 100644 examples/DimensionReduction/figures/.gitkeep delete mode 100644 examples/DimensionReduction/forward_maps.jl delete mode 100644 examples/DimensionReduction/generate_inverse_problem_data.jl create mode 100644 examples/DimensionReduction/problems/forward_maps.jl rename examples/DimensionReduction/{common_inverse_problem.jl => problems/problem_linear_exp.jl} (59%) create mode 100644 examples/DimensionReduction/problems/problem_lorenz.jl create mode 100644 examples/DimensionReduction/settings.jl create mode 100644 examples/DimensionReduction/step1_generate_inverse_problem_data.jl rename examples/DimensionReduction/{build_and_compare_diagnostic_matrices.jl => step2_build_and_compare_diagnostic_matrices.jl} (57%) create mode 100644 examples/DimensionReduction/step3_estimate_posteriors.jl diff --git a/examples/DimensionReduction/Project.toml b/examples/DimensionReduction/Project.toml index bfe820e30..be01cd3d9 100644 --- a/examples/DimensionReduction/Project.toml +++ b/examples/DimensionReduction/Project.toml @@ -1,7 +1,13 @@ [deps] +AdvancedMH = "5b7e9947-ddc0-4b3f-9b55-0d8042f74170" +ChunkSplitters = "ae650224-84b6-46f8-82ea-d812ca08434e" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" EnsembleKalmanProcesses = "aa8a2aa5-91d8-4396-bcef-d4f2ec43552d" +FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/examples/DimensionReduction/estimate_posteriors.jl b/examples/DimensionReduction/estimate_posteriors.jl deleted file mode 100644 index 6c218db47..000000000 --- a/examples/DimensionReduction/estimate_posteriors.jl +++ /dev/null @@ -1,235 +0,0 @@ -# Solve the problem with EKS - - -using Plots -using EnsembleKalmanProcesses -using Random -using JLD2 - -rng_seed = 41 -rng = Random.MersenneTwister(rng_seed) - -input_dim = 500 -output_dim = 50 - -include("common_inverse_problem.jl") - -n_trials = 10 - -r_in = 6 -r_out = 1 - - -if !isfile("ekp_1.jld2") - include("generate_inverse_problem_data.jl") # will run n trials -end -if !isfile("diagnostic_matrices_1.jld2") - include("build_and_compare_diagnostic_matrices.jl") # will run n trials -end - -in_diag = "Hu_ekp_prior" -out_diag = "Hg_ekp_prior" -@info "Diagnostic matrices = ($(in_diag), $(out_diag))" - - - -for trial in 1:n_trials - - # Load the EKP iterations - loaded = load("ekp_$(trial).jld2") - ekp = loaded["ekp"] - prior = loaded["prior"] - obs_noise_cov = loaded["obs_noise_cov"] - y = loaded["y"] - model = loaded["model"] - input_dim = size(get_u(ekp, 1), 1) - output_dim = size(get_g(ekp, 1), 1) - - prior_cov = cov(prior) - prior_invrt = sqrt(inv(prior_cov)) - prior_rt = sqrt(prior_cov) - obs_invrt = sqrt(inv(obs_noise_cov)) - obs_inv = inv(obs_noise_cov) - - # Load diagnostic container - diagnostic_mats = load("diagnostic_matrices_$(trial).jld2") - - - # [1] solve the problem with EKS - directly - prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) - - n_ensemble = 100 - n_iters_max = 50 - - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng) - - n_iters = [0] - for i in 1:n_iters_max - params_i = get_ϕ_final(prior, ekp) - G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) - terminate = update_ensemble!(ekp, G_ens) - if !isnothing(terminate) - n_iters[1] = i - 1 - break - end - end - - ekp_u = get_u(ekp) - ekp_g = get_g(ekp) - - # [2] Create emulator in truncated space, and run EKS on this - min_iter = max_iter = 8 - i_pairs = reduce(hcat, get_u(ekp)[min_iter:max_iter]) - o_pairs = reduce(hcat, get_g(ekp)[min_iter:max_iter]) - - # Reduce space diagnostic matrix - Hu = diagnostic_mats[in_diag] - Hg = diagnostic_mats[out_diag] - svdu = svd(Hu) - svdg = svd(Hg) - - #= - # find by tolerance doesn't work well... - tol = 0.999 # <1 - r_in_vec = accumulate(+,svdu.S) ./sum(svdu.S) - r_in = sum(r_in_vec .< tol) + 1 # number evals needed for "tol" amount of information - =# - U_r = svdu.V[:, 1:r_in] - - #= - svdg = svd(Hg) - r_out_vec = accumulate(+,svdg.S) ./sum(svdg.S) - r_out = sum(r_out_vec .< tol) + 1 # number evals needed for "tol" amount of information - =# - V_r = svdg.V[:, 1:r_out] - - X_r = U_r' * prior_invrt * i_pairs - Y_r = V_r' * obs_invrt * o_pairs - - # true - true_r = U_r' * prior_invrt * true_parameter - y_r = V_r' * obs_invrt * y - - # [2a] exp-cubic model for regressor - red_model_ids = ["expcubic1d", "G"] - red_model_id = red_model_ids[2] - if red_model_id == "expcubic1d" - Ylb = min(1, minimum(Y_r) - abs(mean(Y_r))) # some loose lower bound - logY_r = log.(Y_r .- Ylb) - β = logY_r / [ones(size(X_r)); X_r; X_r .^ 2; X_r .^ 3] # = ([1 X_r]' \ Y_r')' - # invert relationship by - # exp(β*X_r) + Ylb - if r_in == 1 & r_out == 1 - sc = scatter(X_r, logY_r) - xmin = minimum(X_r) - xmax = maximum(X_r) - xrange = reshape(range(xmin, xmax, 100), 1, :) - expcubic = (β[4] * xrange .^ 3 + β[3] * xrange .^ 2 .+ β[2] * xrange .+ β[1])' - plot!(sc, xrange, expcubic, legend = false) - hline!(sc, [log.(y_r .- Ylb)]) - savefig(sc, "linreg_learn_scatter.png") - end - end - - # now apply EKS to the new problem - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - initial_r = U_r' * prior_invrt * initial_ensemble - prior_r = ParameterDistribution( - Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), - repeat([no_constraint()], r_in), - "prior_r", - ) - - obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr - ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng = rng) - - n_iters = [0] - for i in 1:n_iters_max - params_i = get_ϕ_final(prior_r, ekp_r) - if red_model_id == "expcubic1d" - G_ens = exp.(β[4] * (params_i) .^ 3 .+ β[3] * (params_i) .^ 2 .+ β[2] * params_i .+ β[1]) .+ Ylb # use linear forward map in reduced space - elseif red_model_id == "G" - sv_in = reduce(hcat, repeat([svdu.S], n_ensemble)) # repeat SVs, then replace first by params - sv_in[1:size(params_i, 1), :] = params_i - # evaluate true G - G_ens_full = reduce(hcat, [forward_map(prior_rt * svdu.V * sv, model) for sv in eachcol(sv_in)]) - # project data back - G_ens = V_r' * obs_invrt * G_ens_full - end - - terminate = update_ensemble!(ekp_r, G_ens) - if !isnothing(terminate) - n_iters[1] = i - 1 - break - end - end - ekp_r_u = get_u(ekp_r) - ekp_r_g = get_g(ekp_r) - - # map to same space: [here in reduced space first] - spinup = 10 * n_ensemble - ekp_r_u = reduce(hcat, ekp_r_u) - ekp_r_g = reduce(hcat, ekp_r_g) - ekp_u = reduce(hcat, ekp_u) - ekp_g = reduce(hcat, ekp_g) - projected_ekp_u = U_r' * prior_invrt * ekp_u - projected_ekp_g = V_r' * obs_invrt * ekp_g - - if r_in == 1 && r_out == 1 - pp1 = histogram( - projected_ekp_u[:, spinup:end]', - color = :gray, - label = "projected G", - title = "projected EKP samples (input)", - legend = true, - ) - histogram!(pp1, ekp_r_u[:, spinup:end]', color = :blue, label = "reduced") - # pp1 = histogram(ekp_rlin_u[:,spinup:end]', color = :blue, label="reduced linear", yscale=:log10) - - pp2 = histogram(projected_ekp_g[:, spinup:end]', color = :gray, title = "projected EKP samples (output)") - histogram!(pp2, ekp_r_g[:, spinup:end]', color = :blue, legend = false) - #pp2 = histogram!(ekp_rlin_g[:,spinup:end]', color = :blue, legend=false, yscale=:log10) - l = @layout [a b] - pp = plot(pp1, pp2, layout = l) - savefig(pp, "projected_histograms.png") - end - - # compare in original space - mean_final = get_u_mean_final(ekp) - mean_final_in_red = U_r' * prior_invrt * mean_final - mean_red_final = get_u_mean_final(ekp_r) - sv_in = svdu.S - sv_in[1:r_in] = mean_red_final - mean_red_final_full = prior_rt * svdu.V * sv_in - - @info """ - - Reduced space dimension(input, output): $((r_in, r_out)) - - Norm of final-mean to true in reduced space: - Using Full space optimization: $((1.0/r_in)*norm(mean_final_in_red - true_r)) - Using Red. space optimization: $((1.0/r_in)*norm(mean_red_final - true_r)) - - Norm of final-mean to true in full space: - Using Full space optimization: $((1.0/input_dim)*norm(mean_final - true_parameter)) - Using Red. space optimization: $((1.0/input_dim)*norm(mean_red_final_full - true_parameter)) - """ - #@info norm(cov(get_u_final(ekp),dims=2) - cov(get_u_final(ekp_r), dims=2)) - #= - # 500 dim marginal plot.. - pp = plot(prior) - - spinup_iters = n_iters_max - 20 - posterior_samples = reduce(hcat,get_ϕ(prior,ekp)[spinup_iters:end]) # flatten over iterations (n_dim x n_particles) - posterior_dist = ParameterDistribution( - Dict( - "distribution" => Samples(posterior_samples), - "name" => "posterior samples", - "constraint" => repeat([no_constraint()], input_dim), - ), - ) - plot!(pp, posterior_dist) - =# - -end diff --git a/examples/DimensionReduction/figures/.gitkeep b/examples/DimensionReduction/figures/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/examples/DimensionReduction/forward_maps.jl b/examples/DimensionReduction/forward_maps.jl deleted file mode 100644 index ec21013b0..000000000 --- a/examples/DimensionReduction/forward_maps.jl +++ /dev/null @@ -1,24 +0,0 @@ -abstract type ForwardMapType end - -## G*exp(X) -struct LinearExp{AM <: AbstractMatrix} <: ForwardMapType - input_dim::Int - output_dim::Int - G::AM -end - -# columns of X are samples -function forward_map(X::AVorM, model::LE) where {LE <: LinearExp, AVorM <: AbstractVecOrMat} - return model.G * exp.(X) -end - -# columns of X are samples -function jac_forward_map(X::AM, model::LE) where {AM <: AbstractMatrix, LE <: LinearExp} - # dGi / dXj = G_ij exp(x_j) = G.*exp.(mat with repeated x_j rows) - # return [G * exp.(Diagonal(r)) for r in eachrow(X')] # correct but extra multiplies - return [model.G .* exp.(reshape(c, 1, :)) for c in eachcol(X)] -end - -function jac_forward_map(X::AV, model::LE) where {AV <: AbstractVector, LE <: LinearExp} - return jac_forward_map(reshape(X, :, 1), model) -end diff --git a/examples/DimensionReduction/generate_inverse_problem_data.jl b/examples/DimensionReduction/generate_inverse_problem_data.jl deleted file mode 100644 index 5c226dab1..000000000 --- a/examples/DimensionReduction/generate_inverse_problem_data.jl +++ /dev/null @@ -1,54 +0,0 @@ -using Plots -using EnsembleKalmanProcesses -using Random -using JLD2 - -rng_seed = 41 -rng = Random.MersenneTwister(rng_seed) - -input_dim = 500 -output_dim = 50 - -include("common_inverse_problem.jl") - -n_trials = 20 -@info "solving $(n_trials) inverse problems with different random forward maps" - -for trial in 1:n_trials - prior, y, obs_noise_cov, model, true_parameter = linear_exp_inverse_problem(input_dim, output_dim, rng) - - n_ensemble = 80 - n_iters_max = 20 - - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, TransformInversion(); rng = rng) - - n_iters = [0] - for i in 1:n_iters_max - params_i = get_ϕ_final(prior, ekp) - G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) - terminate = update_ensemble!(ekp, G_ens) - if !isnothing(terminate) - n_iters[1] = i - 1 - break - end - end - - @info "Iteration of posterior convergence: $(n_iters[1])" - @info "Loss over iterations:" get_error(ekp) - save( - "ekp_$(trial).jld2", - "ekp", - ekp, - "prior", - prior, - "y", - y, - "obs_noise_cov", - obs_noise_cov, - "model", - model, - "true_parameter", - true_parameter, - ) -end diff --git a/examples/DimensionReduction/problems/forward_maps.jl b/examples/DimensionReduction/problems/forward_maps.jl new file mode 100644 index 000000000..79c93504c --- /dev/null +++ b/examples/DimensionReduction/problems/forward_maps.jl @@ -0,0 +1 @@ +abstract type ForwardMapType end diff --git a/examples/DimensionReduction/common_inverse_problem.jl b/examples/DimensionReduction/problems/problem_linear_exp.jl similarity index 59% rename from examples/DimensionReduction/common_inverse_problem.jl rename to examples/DimensionReduction/problems/problem_linear_exp.jl index 7354a3b39..cbba538a6 100644 --- a/examples/DimensionReduction/common_inverse_problem.jl +++ b/examples/DimensionReduction/problems/problem_linear_exp.jl @@ -7,8 +7,7 @@ using Distributions # Inverse problem will be taken from (Cui, Tong, 2021) https://arxiv.org/pdf/2101.02417, example 7.1 include("forward_maps.jl") - -function linear_exp_inverse_problem(input_dim, output_dim, rng) +function linear_exp(input_dim, output_dim, rng) # prior γ0 = 4.0 β_γ = -2 @@ -36,7 +35,34 @@ function linear_exp_inverse_problem(input_dim, output_dim, rng) obs_noise_std = 1.0 obs_noise_cov = (obs_noise_std^2) * I(output_dim) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) - true_parameter = reshape(ones(input_dim), :, 1) + # true_parameter = reshape(ones(input_dim), :, 1) + true_parameter = rand(prior_dist) y = vec(forward_map(true_parameter, model) + noise) return prior, y, obs_noise_cov, model, true_parameter end + + +## G*exp(X) +struct LinearExp{AM <: AbstractMatrix} <: ForwardMapType + input_dim::Int + output_dim::Int + G::AM +end + +# columns of X are samples +function forward_map(X::AVorM, model::LE) where {LE <: LinearExp, AVorM <: AbstractVecOrMat} + return model.G * exp.(X) +end + +has_jac(::LinearExp) = true + +# columns of X are samples +function jac_forward_map(X::AM, model::LE) where {AM <: AbstractMatrix, LE <: LinearExp} + # dGi / dXj = G_ij exp(x_j) = G.*exp.(mat with repeated x_j rows) + # return [G * exp.(Diagonal(r)) for r in eachrow(X')] # correct but extra multiplies + return [model.G .* exp.(reshape(c, 1, :)) for c in eachcol(X)] +end + +function jac_forward_map(X::AV, model::LE) where {AV <: AbstractVector, LE <: LinearExp} + return jac_forward_map(reshape(X, :, 1), model) +end diff --git a/examples/DimensionReduction/problems/problem_lorenz.jl b/examples/DimensionReduction/problems/problem_lorenz.jl new file mode 100644 index 000000000..83eee4fa6 --- /dev/null +++ b/examples/DimensionReduction/problems/problem_lorenz.jl @@ -0,0 +1,258 @@ +include("../../Lorenz/GModel.jl") # Contains Lorenz 96 source code + +include("./forward_maps.jl") + +# Import modules +using Distributions # probability distributions and associated functions +using LinearAlgebra +using StatsPlots +using Plots +using Random +using JLD2 +using Statistics + +# CES +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using EnsembleKalmanProcesses.Localizers + +const EKP = EnsembleKalmanProcesses + +# G(θ) = H(Ψ(θ,x₀,t₀,t₁)) +# y = G(θ) + η + +# This will change for different Lorenz simulators +struct LorenzConfig{FT1 <: Real, FT2 <: Real} + "Length of a fixed integration timestep" + dt::FT1 + "Total duration of integration (T = N*dt)" + T::FT2 +end + +# This will change for each ensemble member +struct EnsembleMemberConfig{VV <: AbstractVector} + "state-dependent-forcing" + F::VV +end + +# This will change for different "Observations" of Lorenz +struct ObservationConfig{FT1 <: Real, FT2 <: Real} + "initial time to gather statistics (T_start = N_start*dt)" + T_start::FT1 + "end time to gather statistics (T_end = N_end*dt)" + T_end::FT2 +end +######################################################################### +############################ Model Functions ############################ +######################################################################### + +# Forward pass of forward model +# Inputs: +# - params: structure with F (state-dependent-forcing vector) +# - x0: initial condition vector +# - config: structure including dt (timestep Float64(1)) and T (total time Float64(1)) +function lorenz_forward( + params::EnsembleMemberConfig, + x0::VorM, + config::LorenzConfig, + observation_config::ObservationConfig, +) where {VorM <: AbstractVecOrMat} + # run the Lorenz simulation + xn = lorenz_solve(params, x0, config) + # Get statistics + gt = stats(xn, config, observation_config) + return gt +end + +#Calculates statistics for forward model output +# Inputs: +# - xn: timeseries of states for length of simulation through Lorenz96 +function stats(xn::VorM, config::LorenzConfig, observation_config::ObservationConfig) where {VorM <: AbstractVecOrMat} + T_start = observation_config.T_start + T_end = observation_config.T_end + dt = config.dt + N_start = Int(ceil(T_start / dt)) + N_end = Int(ceil(T_end / dt)) + xn_stat = xn[:, N_start:N_end] + N_state = size(xn_stat, 1) + gt = zeros(2 * N_state) + gt[1:N_state] = mean(xn_stat, dims = 2) + gt[(N_state + 1):(2 * N_state)] = std(xn_stat, dims = 2) + return gt +end + +# Forward pass of the Lorenz 96 model +# Inputs: +# - params: structure with F (state-dependent-forcing vector) +# - x0: initial condition vector +# - config: structure including dt (timestep Float64(1)) and T (total time Float64(1)) +function lorenz_solve(params::EnsembleMemberConfig, x0::VorM, config::LorenzConfig) where {VorM <: AbstractVecOrMat} + # Initialize + nstep = Int(ceil(config.T / config.dt)) + state_dim = isa(x0, AbstractVector) ? length(x0) : size(x0, 1) + xn = zeros(size(x0, 1), nstep + 1) + xn[:, 1] = x0 + + # March forward in time + for j in 1:nstep + xn[:, j + 1] = RK4(params, xn[:, j], config) + end + # Output + return xn +end + +# Lorenz 96 system +# f = dx/dt +# Inputs: +# - params: structure with F (state-dependent-forcing vector) +# - x: current state +function f(params::EnsembleMemberConfig, x::VorM) where {VorM <: AbstractVecOrMat} + F = params.F + N = length(x) + f = zeros(N) + # Loop over N positions + for i in 3:(N - 1) + f[i] = -x[i - 2] * x[i - 1] + x[i - 1] * x[i + 1] - x[i] + F[i] + end + # Periodic boundary conditions + f[1] = -x[N - 1] * x[N] + x[N] * x[2] - x[1] + F[1] + f[2] = -x[N] * x[1] + x[1] * x[3] - x[2] + F[2] + f[N] = -x[N - 2] * x[N - 1] + x[N - 1] * x[1] - x[N] + F[N] + # Output + return f +end + +# RK4 solve +# Inputs: +# - params: structure with F (state-dependent-forcing vector) +# - xold: current state +# - config: structure including dt (timestep Float64(1)) and T (total time Float64(1)) +function RK4(params::EnsembleMemberConfig, xold::VorM, config::LorenzConfig) where {VorM <: AbstractVecOrMat} + N = length(xold) + dt = config.dt + + # Predictor steps (note no time-dependence is needed here) + k1 = f(params, xold) + k2 = f(params, xold + k1 * dt / 2.0) + k3 = f(params, xold + k2 * dt / 2.0) + k4 = f(params, xold + k3 * dt) + # Step + xnew = xold + (dt / 6.0) * (k1 + 2.0 * k2 + 2.0 * k3 + k4) + # Output + return xnew +end + + +######################################################################## +############################ Problem setup ############################# +######################################################################## + +struct Lorenz <: ForwardMapType + rng + config_settings + observation_config + x0 + ic_cov_sqrt + nx +end + +has_jac(::Lorenz) = false + +# columns of X are samples +function forward_map(X::AbstractVector, model::Lorenz) + lorenz_forward( + EnsembleMemberConfig(X), + (model.x0 .+ model.ic_cov_sqrt * randn(model.rng, model.nx)), + model.config_settings, + model.observation_config, + ) +end + +function forward_map(X::AbstractMatrix, model::Lorenz) + hcat([forward_map(x, model) for x in eachcol(X)]...) +end + +function lorenz(input_dim, output_dim, rng) + #Creating my sythetic data + #initalize model variables + nx = 40 #dimensions of parameter vector + ny = nx * 2 #number of data points + @assert input_dim == nx + @assert output_dim == ny + + gamma = 8 .+ 6 * sin.((4 * pi * range(0, stop = nx - 1, step = 1)) / nx) #forcing (Needs to be of type EnsembleMemberConfig) + true_parameters = EnsembleMemberConfig(gamma) + + t = 0.01 #time step + T_long = 1000.0 #total time + picking_initial_condition = LorenzConfig(t, T_long) + + #beginning state + x_initial = rand(rng, Normal(0.0, 1.0), nx) + + #Find the initial condition for my data + x_spun_up = lorenz_solve(true_parameters, x_initial, picking_initial_condition) #Need to make LorenzConfig object with t, T_long + + #intital condition used for the data + x0 = x_spun_up[:, end] #last element of the run is the initial condition for creating the data + + #Creating my sythetic data + T = 14.0 + lorenz_config_settings = LorenzConfig(t, T) + + # construct how we compute Observations + T_start = 4.0 #2*max + T_end = T + observation_config = ObservationConfig(T_start, T_end) + + model_out_y = lorenz_forward(true_parameters, x0, lorenz_config_settings, observation_config) + + #Observation covariance + # [Don't need to do this bit really] - initial condition perturbations + covT = 1000.0 #time to simulate to calculate a covariance matrix of the system + cov_solve = lorenz_solve(true_parameters, x0, LorenzConfig(t, covT)) + ic_cov = 0.1 * cov(cov_solve, dims = 2) + ic_cov_sqrt = sqrt(ic_cov) + + n_samples = 200 + y_ens = hcat( + [ + lorenz_forward( + true_parameters, + (x0 .+ ic_cov_sqrt * rand(rng, Normal(0.0, 1.0), nx, n_samples))[:, j], + lorenz_config_settings, + observation_config, + ) for j in 1:n_samples + ]..., + ) + + # estimate noise from IC-effect + R + obs_noise_cov = cov(y_ens, dims = 2) + y_mean = mean(y_ens, dims = 2) + y = y_ens[:, 1] + + pl = 2.0 + psig = 3.0 + #Prior covariance + B = zeros(nx, nx) + for ii in 1:nx + for jj in 1:nx + B[ii, jj] = psig^2 * exp(-abs(ii - jj) / pl) + end + end + B_sqrt = sqrt(B) + + #Prior mean + mu = 8.0 * ones(nx) + + #Creating prior distribution + distribution = Parameterized(MvNormal(mu, B)) + constraint = repeat([no_constraint()], 40) + name = "ml96_prior" + + prior = ParameterDistribution(distribution, constraint, name) + + model = Lorenz(rng, lorenz_config_settings, observation_config, x0, ic_cov_sqrt, nx) + + return prior, y, obs_noise_cov, model, gamma +end diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl new file mode 100644 index 000000000..fd48d51c3 --- /dev/null +++ b/examples/DimensionReduction/settings.jl @@ -0,0 +1,28 @@ +# CONFIGURE THE THREE STEPS + +## -- Configure the inverse problem -- +problem = "linear_exp" # "lorenz" or "linear_exp" +input_dim = 50 +output_dim = 50 + +## -- Configure parameters of the experiment itself -- +rng_seed = 41 +num_trials = 2 + +# Specific to step 1 +step1_eki_ensemble_size = 800 +step1_eki_max_iters = 20 + +# Specific to step 2 +step2_num_prior_samples = 2000 # paper uses 5e5 + +# Specific to step 3 +step3_diagnostics_to_use = [ + ("Hu", 50, "Hg", 50), +] +step3_run_reduced_in_full_space = true +step3_posterior_sampler = :eks # :eks or :mcmc +step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` +step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` +step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_samples_per_chain = 50_000 # only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl new file mode 100644 index 000000000..f37f86aa4 --- /dev/null +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -0,0 +1,51 @@ +using EnsembleKalmanProcesses +using Random +using JLD2 + +include("./problems/problem_linear_exp.jl") +include("./problems/problem_lorenz.jl") + +include("./settings.jl") +rng = Random.MersenneTwister(rng_seed) +problem_fun = if problem == "lorenz" + lorenz +elseif problem == "linear_exp" + linear_exp +else + throw("Unknown problem=$problem") +end + + +for trial in 1:num_trials + prior, y, obs_noise_cov, model, true_parameter = problem_fun(input_dim, output_dim, rng) + + n_ensemble = step1_eki_ensemble_size + n_iters_max = step1_eki_max_iters + + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, TransformInversion(); rng, scheduler = EKSStableScheduler(2.0, 0.01)) + + n_iters = n_iters_max + for i in 1:n_iters_max + params_i = get_ϕ_final(prior, ekp) + G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) + terminate = update_ensemble!(ekp, G_ens) + if !isnothing(terminate) + n_iters = i - 1 + break + end + end + + @info "Iteration of posterior convergence: $n_iters" + @info "Loss over iterations: $(get_error(ekp))" + save( + "data/ekp_$(problem)_$(trial).jld2", + "ekp", ekp, + "prior", prior, + "y", y, + "obs_noise_cov", obs_noise_cov, + "model", model, + "true_parameter", true_parameter, + ) +end + diff --git a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl similarity index 57% rename from examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl rename to examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 2d9f5257b..959d6d4be 100644 --- a/examples/DimensionReduction/build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -5,6 +5,14 @@ using Statistics using Distributions using Plots using JLD2 +using Manopt, Manifolds + +include("./settings.jl") + +if !isfile("data/ekp_$(problem)_1.jld2") + include("step1_generate_inverse_problem_data.jl") +end + #Utilities function cossim(x::VV1, y::VV2) where {VV1 <: AbstractVector, VV2 <: AbstractVector} return dot(x, y) / (norm(x) * norm(y)) @@ -16,15 +24,7 @@ function cossim_cols(X::AM1, Y::AM2) where {AM1 <: AbstractMatrix, AM2 <: Abstra return [cossim_pos(c1, c2) for (c1, c2) in zip(eachcol(X), eachcol(Y))] end -n_samples = 2000 # paper uses 5e5 -n_trials = 20 # get from generate_inverse_problem_data - -if !isfile("ekp_1.jld2") - include("generate_inverse_problem_data.jl") # will run n trials -else - include("forward_maps.jl") -end - +n_samples = step2_num_prior_samples Hu_evals = [] Hg_evals = [] @@ -44,11 +44,11 @@ sim_Hg_ekp_prior = [] sim_Hu_ekp_final = [] sim_Hg_ekp_final = [] sim_Huy_ekp_final = [] +sim_Hgy_ekp_final = [] -for trial in 1:n_trials - +for trial in 1:num_trials # Load the EKP iterations - loaded = load("ekp_$(trial).jld2") + loaded = load("data/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] @@ -66,25 +66,30 @@ for trial in 1:n_trials # random samples prior_samples = sample(prior, n_samples) - # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) - @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" - gradG_samples = jac_forward_map(prior_samples, model) - Hu = zeros(input_dim, input_dim) - Hg = zeros(output_dim, output_dim) - - for j in 1:n_samples - Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt - Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt + if has_jac(model) + # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) + @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" + gradG_samples = jac_forward_map(prior_samples, model) + Hu = zeros(input_dim, input_dim) + Hg = zeros(output_dim, output_dim) + + for j in 1:n_samples + Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt + Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt + end + + # [1b] One-point approximation at mean value, with perfect grad + @info "Construct with mean value (1 sample), perfect grad" + prior_mean_appr = mean(prior) # approximate mean + gradG_at_mean = jac_forward_map(prior_mean_appr, model)[1] + # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf + Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt + Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt + else + Hu = Hu_mean = NaN * zeros(input_dim) + Hg = Hg_mean = NaN * zeros(output_dim) end - # [1b] One-point approximation at mean value, with perfect grad - @info "Construct with mean value (1 sample), perfect grad" - prior_mean_appr = mean(prior) # approximate mean - gradG_at_mean = jac_forward_map(prior_mean_appr, model)[1] - # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf - Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt - Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt - # [2a] One-point approximation at mean value with SL grad @info "Construct with mean value prior (1 sample), SL grad" g = get_g(ekp, 1) @@ -122,10 +127,43 @@ for trial in 1:n_trials Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt myCug = Cug' - Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( + Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct (y - gg) * (y - gg)' for gg in eachcol(g) )*obs_inv*myCug' * Cuu_invrt + dim_g = size(g, 1) + Vgy_ekp_final = zeros(dim_g, 0) + num_vecs = 10 + @assert num_vecs ≤ dim_g + for k in 1:num_vecs + println("vector $k") + counter = 0 + M = Grassmann(dim_g, 1) + f = (_, v) -> begin + counter += 1 + Vs = hcat(Vgy_ekp_final, vec(v)) + Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' + res = sum( # TODO: Check if whitening is correct + norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) + for gg in eachcol(g) + ) + mod(counter, 100) == 1 && println(" iter $counter: $res") + res + end + v00 = eigvecs(Hg_ekp_final)[:,k:k] + v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] + v0 = [v0i / norm(v0i) for v0i in v0] + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5.0, 5.0)) + # Orthogonalize + proj = bestvec - Vgy_ekp_final * (Vgy_ekp_final' * bestvec) + bestvec = proj / norm(proj) + + Vgy_ekp_final = hcat(Vgy_ekp_final, bestvec) + end + Vgy_ekp_final = hcat(Vgy_ekp_final, randn(dim_g, dim_g - num_vecs)) + Hgy_ekp_final = Vgy_ekp_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_ekp_final' + Hgy_ekp_final = Hg_ekp_final + # cosine similarity of evector directions svdHu = svd(Hu) svdHg = svd(Hg) @@ -136,29 +174,31 @@ for trial in 1:n_trials svdHu_ekp_final = svd(Hu_ekp_final) svdHg_ekp_final = svd(Hg_ekp_final) svdHuy_ekp_final = svd(Huy_ekp_final) - @info """ - - samples -> mean - $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) - - samples + deriv -> mean + (no deriv) prior - $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) - - samples + deriv -> mean + (no deriv) final - $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) - - mean+(no deriv): prior -> final - $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) - - y-aware -> samples - $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) - """ - push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) - push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) + svdHgy_ekp_final = svd(Hgy_ekp_final) + if has_jac(model) + @info """ + + samples -> mean + $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) + + samples + deriv -> mean + (no deriv) prior + $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) + + samples + deriv -> mean + (no deriv) final + $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) + + mean+(no deriv): prior -> final + $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) + + y-aware -> samples + $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) + $(cossim_cols(svdHg.V, svdHgy_ekp_final.V)[1:3]) + """ + end push!(Hu_evals, svdHu.S) push!(Hg_evals, svdHg.S) push!(Hu_mean_evals, svdHu_mean.S) @@ -167,44 +207,41 @@ for trial in 1:n_trials push!(Hg_ekp_prior_evals, svdHg_ekp_prior.S) push!(Hu_ekp_final_evals, svdHu_ekp_final.S) push!(Hg_ekp_final_evals, svdHg_ekp_final.S) - push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) - push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) - push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) - push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) - push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) + if has_jac(model) + push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) + push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) + push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) + push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) + push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) + push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) + push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) + push!(sim_Hgy_ekp_final, cossim_cols(svdHg.V, svdHgy_ekp_final.V)) + end # cosine similarity to output svd from samples G_samples = forward_map(prior_samples, model)' svdG = svd(G_samples) # nonsquare, so permuted so evectors are V svdU = svd(prior_samples') - push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) - push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) + if has_jac(model) + push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) + push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) + end save( - "diagnostic_matrices_$(trial).jld2", - "Hu", - Hu, - "Hg", - Hg, - "Hu_mean", - Hu_mean, - "Hg_mean", - Hg_mean, - "Hu_ekp_prior", - Hu_ekp_prior, - "Hg_ekp_prior", - Hg_ekp_prior, - "Hu_ekp_final", - Hu_ekp_final, - "Hg_ekp_final", - Hg_ekp_final, - "Huy_ekp_final", - Huy_ekp_final, - "svdU", - svdU, - "svdG", - svdG, + "data/diagnostic_matrices_$(problem)_$(trial).jld2", + "Hu", Hu, + "Hg", Hg, + "Hu_mean", Hu_mean, + "Hg_mean", Hg_mean, + "Hu_ekp_prior", Hu_ekp_prior, + "Hg_ekp_prior", Hg_ekp_prior, + "Hu_ekp_final", Hu_ekp_final, + "Hg_ekp_final", Hg_ekp_final, + "Huy_ekp_final", Huy_ekp_final, + "Hgy_ekp_final", Hgy_ekp_final, + "svdU", svdU, + "svdG", svdG, ) end @@ -217,7 +254,7 @@ normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] normal_Hg_ekp_prior_evals = [ev ./ ev[1] for ev in Hg_ekp_prior_evals] normal_Hg_ekp_final_evals = [ev ./ ev[1] for ev in Hg_ekp_final_evals] -loaded1 = load("ekp_1.jld2") +loaded1 = load("data/ekp_$(problem)_1.jld2") ekp_tmp = loaded1["ekp"] input_dim = size(get_u(ekp_tmp, 1), 1) output_dim = size(get_g(ekp_tmp, 1), 1) @@ -229,7 +266,7 @@ truncation = Int(minimum([truncation, input_dim, output_dim])) pg = plot( 1:truncation, mean(sim_Hg_means)[1:truncation], - ribbon = (std(sim_Hg_means) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hg_means) / sqrt(num_trials))[1:truncation], color = :blue, label = "sim (samples v mean)", legend = false, @@ -239,7 +276,7 @@ plot!( pg, 1:truncation, mean(sim_Hg_ekp_prior)[1:truncation], - ribbon = (std(sim_Hg_ekp_prior) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hg_ekp_prior) / sqrt(num_trials))[1:truncation], color = :red, alpha = 0.3, label = "sim (samples v mean-no-der) prior", @@ -248,10 +285,18 @@ plot!( pg, 1:truncation, mean(sim_Hg_ekp_final)[1:truncation], - ribbon = (std(sim_Hg_ekp_final) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hg_ekp_final) / sqrt(num_trials))[1:truncation], color = :gold, label = "sim (samples v mean-no-der) final", ) +plot!( + pg, + 1:truncation, + mean(sim_Hgy_ekp_final)[1:truncation], + ribbon = (std(sim_Hgy_ekp_final) / sqrt(num_trials))[1:truncation], + color = :purple, + label = "sim (samples v y-aware) final", +) plot!(pg, 1:truncation, mean(normal_Hg_evals)[1:truncation], color = :black, label = "normalized eval (samples)") plot!( @@ -279,7 +324,7 @@ plot!( pg, 1:truncation, mean(sim_G_samples)[1:truncation], - ribbon = (std(sim_G_samples) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_G_samples) / sqrt(num_trials))[1:truncation], color = :green, label = "similarity (PCA)", ) @@ -296,7 +341,7 @@ normal_Hu_ekp_final_evals = [ev ./ ev[1] for ev in Hu_ekp_final_evals] pu = plot( 1:truncation, mean(sim_Hu_means)[1:truncation], - ribbon = (std(sim_Hu_means) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hu_means) / sqrt(num_trials))[1:truncation], color = :blue, label = "sim (samples v mean)", ) @@ -324,7 +369,7 @@ plot!( pu, 1:truncation, mean(sim_U_samples)[1:truncation], - ribbon = (std(sim_U_samples) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_U_samples) / sqrt(num_trials))[1:truncation], color = :green, label = "similarity (PCA)", ) @@ -333,7 +378,7 @@ plot!( pu, 1:truncation, mean(sim_Hu_ekp_prior)[1:truncation], - ribbon = (std(sim_Hu_ekp_prior) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hu_ekp_prior) / sqrt(num_trials))[1:truncation], color = :red, alpha = 0.3, label = "sim (samples v mean-no-der) prior", @@ -342,7 +387,7 @@ plot!( pu, 1:truncation, mean(sim_Hu_ekp_final)[1:truncation], - ribbon = (std(sim_Hu_ekp_final) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Hu_ekp_final) / sqrt(num_trials))[1:truncation], color = :gold, label = "sim (samples v mean-no-der) final", ) @@ -350,7 +395,7 @@ plot!( pu, 1:truncation, mean(sim_Huy_ekp_final)[1:truncation], - ribbon = (std(sim_Huy_ekp_final) / sqrt(n_trials))[1:truncation], + ribbon = (std(sim_Huy_ekp_final) / sqrt(num_trials))[1:truncation], color = :purple, label = "sim (samples v y-aware) final", ) @@ -360,4 +405,4 @@ title!(pu, "Similarity of spectrum of input diagnostic") layout = @layout [a b] p = plot(pu, pg, layout = layout) -savefig(p, "spectrum_comparison.png") +savefig(p, "figures/spectrum_comparison_$problem.png") diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl new file mode 100644 index 000000000..4eb95f110 --- /dev/null +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -0,0 +1,194 @@ +using AdvancedMH +using Distributions +using ForwardDiff +using JLD2 +using MCMCChains +using Plots +using Random +using Statistics + +include("./settings.jl") +rng = Random.MersenneTwister(rng_seed) + +if !isfile("data/ekp_$(problem)_1.jld2") + include("step1_generate_inverse_problem_data.jl") +end +if !isfile("data/diagnostic_matrices_$(problem)_1.jld2") + include("step2_build_and_compare_diagnostic_matrices.jl") +end + +for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use + @info "Diagnostic matrices = ($in_diag [1-$in_r], $out_diag [1-$out_r])" + average_error = 0 + + for trial in 1:num_trials + # Load the EKP iterations + loaded = load("data/ekp_$(problem)_$(trial).jld2") + ekp = loaded["ekp"] + prior = loaded["prior"] + obs_noise_cov = loaded["obs_noise_cov"] + y = loaded["y"] + model = loaded["model"] + true_parameter = loaded["true_parameter"] + + prior_cov = cov(prior) + prior_inv = inv(prior_cov) + prior_invrt = sqrt(inv(prior_cov)) + prior_rt = sqrt(prior_cov) + obs_rt = sqrt(obs_noise_cov) + obs_invrt = sqrt(inv(obs_noise_cov)) + obs_inv = inv(obs_noise_cov) + + # Load diagnostic container + diagnostic_mats = load("data/diagnostic_matrices_$(problem)_$(trial).jld2") + + Hu = diagnostic_mats[in_diag] + Hg = diagnostic_mats[out_diag] + svdu = svd(Hu) + svdg = svd(Hg) + U_r = svdu.V[:, 1:in_r] + V_r = svdg.V[:, 1:out_r] + + M = prior_rt * U_r * U_r' * prior_invrt + N = obs_rt * V_r * V_r' * obs_invrt + + obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr + obs_noise_cov_r_inv = inv(obs_noise_cov_r) + prior_cov_r = U_r' * U_r + prior_cov_r_inv = inv(prior_cov_r) + y_r = V_r' * obs_invrt * y + + if step3_posterior_sampler == :mcmc + logpostfull = x -> begin + g = forward_map(x, model) + -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) + end + densitymodelfull = DensityModel(logpostfull) + mean_full = zeros(input_dim) + num_iters = 1 + for _ in 1:num_iters + sampler = if step3_mcmc_sampler == :mala + MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) + elseif step3_mcmc_sampler == :rw + RWMH(MvNormal(zeros(input_dim), .01prior_cov)) + else + throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") + end + chainfull = sample(densitymodelfull, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params = [zeros(input_dim) for _ in 1:8]) + sampfull = vcat([vec(MCMCChains.get(chainfull, Symbol("param_$i"))[1])' for i in 1:input_dim]...) + mean_full += mean(sampfull[:,end÷2:end]; dims = 2) / num_iters + end + mean_full_red = U_r' * (prior_invrt * mean_full) + + if step3_run_reduced_in_full_space + logpostred = xfull -> begin + xredfull = M * xfull + gredfull = N * forward_map(xredfull, model) + + -2\xfull'*prior_inv*xfull - 2\(y - gredfull)'*obs_inv*(y - gredfull) + end + densitymodelred = DensityModel(logpostred) + + mean_red_full = zeros(input_dim) + num_iters = 1 + for _ in 1:num_iters + sampler = if step3_mcmc_sampler == :mala + MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) + elseif step3_mcmc_sampler == :rw + RWMH(MvNormal(zeros(input_dim), .01prior_cov)) + else + throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") + end + chainred = sample(densitymodelred, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params = [zeros(input_dim) for _ in 1:8]) + sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1])' for i in 1:input_dim]...) + mean_red_full += mean(sampred[:,end÷2:end]; dims = 2) / num_iters + end + mean_red = U_r' * (prior_invrt * mean_red_full) + else + logpostred = xred -> begin + xredfull = prior_rt * U_r * xred + gred = V_r' * obs_invrt * forward_map(xredfull, model) + + -2\xred'*prior_cov_r_inv*xred - 2\(y_r - gred)'*obs_noise_cov_r_inv*(y_r - gred) + end + densitymodelred = DensityModel(logpostred) + + mean_red = zeros(in_r) + num_iters = 1 + for _ in 1:num_iters + sampler, num_samples = if step3_mcmc_sampler == :mala + MALA(x -> MvNormal(.0001 * prior_cov_r * x, .0001 * 2 * prior_cov_r)), 50 # MALA is very slow, likely due to ForwardDiff + elseif step3_mcmc_sampler == :rw + RWMH(MvNormal(zeros(in_r), .01prior_cov_r)), 5_000 + else + throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") + end + chainred = sample(densitymodelred, sampler, MCMCThreads(), num_samples, 8; chain_type=Chains, initial_params = [zeros(in_r) for _ in 1:8]) + sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1])' for i in 1:in_r]...) + mean_red += mean(sampred[:,end÷2:end]; dims = 2) / num_iters + end + mean_red_full = prior_rt * U_r * mean_red + end + elseif step3_posterior_sampler == :eks + n_ensemble = step3_eks_ensemble_size + n_iters_max = step3_eks_max_iters + + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng, scheduler = EKSStableScheduler(2.0, 0.01)) + for i in 1:n_iters_max + G_ens = hcat([forward_map(params, model) for params in eachcol(get_ϕ_final(prior, ekp))]...) + isnothing(update_ensemble!(ekp, G_ens)) || break + end + ekp_u, ekp_g = reduce(hcat, get_u(ekp)), reduce(hcat, get_g(ekp)) + mean_full = get_u_mean_final(ekp) + mean_full_red = U_r' * prior_invrt * mean_full + + if step3_run_reduced_in_full_space + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + ekp_r = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler = EKSStableScheduler(2.0, 0.01)) + + for i in 1:n_iters_max + G_ens = hcat([N*forward_map(M*params, model) for params in eachcol(get_ϕ_final(prior, ekp_r))]...) + isnothing(update_ensemble!(ekp_r, G_ens)) || break + end + ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) + mean_red_full = get_u_mean_final(ekp_r) + mean_red = U_r' * prior_invrt * mean_red_full + else + initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + initial_r = U_r' * prior_invrt * initial_ensemble + prior_r = ParameterDistribution( + Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), + repeat([no_constraint()], in_r), + "prior_r", + ) + + ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng) + + for i in 1:n_iters_max + # evaluate true G + G_ens_full = reduce(hcat, [forward_map(prior_rt * U_r * params, model) for params in eachcol(get_ϕ_final(prior_r, ekp_r))]) + # project data back + G_ens = V_r' * obs_invrt * G_ens_full + + isnothing(update_ensemble!(ekp_r, G_ens)) || break + end + ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) + mean_red = get_u_mean_final(ekp_r) + mean_red_full = prior_rt * U_r * mean_red + # TODO: Check if we're OK with this way of projecting + end + else + throw("Unknown step3_posterior_sampler=$step3_posterior_sampler") + end + + @info """ + True: $(true_parameter[1:5]) + Mean (in full space): $(mean_full[1:5]) + Red. mean (in full space): $(mean_red_full[1:5]) + + Relative error in full space: $(norm(mean_full - mean_red_full) / norm(mean_full)) + Relative error in reduced space: $(norm(mean_full_red - mean_red) / norm(mean_full_red)) + """ + end +end From c526551b90f7777a0d4b8d88e2cef282aaced91c Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 5 Jun 2025 15:24:48 -0700 Subject: [PATCH 12/35] Rename data folder to escape .gitignore --- examples/DimensionReduction/datafiles/.gitkeep | 0 .../step1_generate_inverse_problem_data.jl | 3 +-- .../step2_build_and_compare_diagnostic_matrices.jl | 8 ++++---- examples/DimensionReduction/step3_estimate_posteriors.jl | 8 ++++---- 4 files changed, 9 insertions(+), 10 deletions(-) create mode 100644 examples/DimensionReduction/datafiles/.gitkeep diff --git a/examples/DimensionReduction/datafiles/.gitkeep b/examples/DimensionReduction/datafiles/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index f37f86aa4..3926f9fcb 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -39,7 +39,7 @@ for trial in 1:num_trials @info "Iteration of posterior convergence: $n_iters" @info "Loss over iterations: $(get_error(ekp))" save( - "data/ekp_$(problem)_$(trial).jld2", + "datafiles/ekp_$(problem)_$(trial).jld2", "ekp", ekp, "prior", prior, "y", y, @@ -48,4 +48,3 @@ for trial in 1:num_trials "true_parameter", true_parameter, ) end - diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 959d6d4be..2192fd7e8 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -9,7 +9,7 @@ using Manopt, Manifolds include("./settings.jl") -if !isfile("data/ekp_$(problem)_1.jld2") +if !isfile("datafiles/ekp_$(problem)_1.jld2") include("step1_generate_inverse_problem_data.jl") end @@ -48,7 +48,7 @@ sim_Hgy_ekp_final = [] for trial in 1:num_trials # Load the EKP iterations - loaded = load("data/ekp_$(problem)_$(trial).jld2") + loaded = load("datafiles/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] @@ -229,7 +229,7 @@ for trial in 1:num_trials end save( - "data/diagnostic_matrices_$(problem)_$(trial).jld2", + "datafiles/diagnostic_matrices_$(problem)_$(trial).jld2", "Hu", Hu, "Hg", Hg, "Hu_mean", Hu_mean, @@ -254,7 +254,7 @@ normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] normal_Hg_ekp_prior_evals = [ev ./ ev[1] for ev in Hg_ekp_prior_evals] normal_Hg_ekp_final_evals = [ev ./ ev[1] for ev in Hg_ekp_final_evals] -loaded1 = load("data/ekp_$(problem)_1.jld2") +loaded1 = load("datafiles/ekp_$(problem)_1.jld2") ekp_tmp = loaded1["ekp"] input_dim = size(get_u(ekp_tmp, 1), 1) output_dim = size(get_g(ekp_tmp, 1), 1) diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index 4eb95f110..9b892e081 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -10,10 +10,10 @@ using Statistics include("./settings.jl") rng = Random.MersenneTwister(rng_seed) -if !isfile("data/ekp_$(problem)_1.jld2") +if !isfile("datafiles/ekp_$(problem)_1.jld2") include("step1_generate_inverse_problem_data.jl") end -if !isfile("data/diagnostic_matrices_$(problem)_1.jld2") +if !isfile("datafiles/diagnostic_matrices_$(problem)_1.jld2") include("step2_build_and_compare_diagnostic_matrices.jl") end @@ -23,7 +23,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use for trial in 1:num_trials # Load the EKP iterations - loaded = load("data/ekp_$(problem)_$(trial).jld2") + loaded = load("datafiles/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] @@ -40,7 +40,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use obs_inv = inv(obs_noise_cov) # Load diagnostic container - diagnostic_mats = load("data/diagnostic_matrices_$(problem)_$(trial).jld2") + diagnostic_mats = load("datafiles/diagnostic_matrices_$(problem)_$(trial).jld2") Hu = diagnostic_mats[in_diag] Hg = diagnostic_mats[out_diag] From cba2e0b0755de1380b57fd223ba5154b94233e4b Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 5 Jun 2025 16:13:34 -0700 Subject: [PATCH 13/35] Remove debugging line --- .../step2_build_and_compare_diagnostic_matrices.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 2192fd7e8..ab1d24a3c 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -162,7 +162,7 @@ for trial in 1:num_trials end Vgy_ekp_final = hcat(Vgy_ekp_final, randn(dim_g, dim_g - num_vecs)) Hgy_ekp_final = Vgy_ekp_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_ekp_final' - Hgy_ekp_final = Hg_ekp_final + # cosine similarity of evector directions svdHu = svd(Hu) From f1e21f1db6301defb61cdc1b967988377e93253b Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 5 Jun 2025 16:42:40 -0700 Subject: [PATCH 14/35] Also build diagnostic matrices with MCMC final samples --- examples/DimensionReduction/settings.jl | 7 +- .../step1_generate_inverse_problem_data.jl | 42 ++++++++-- ...2_build_and_compare_diagnostic_matrices.jl | 76 ++++++++++++++++++- .../step3_estimate_posteriors.jl | 18 ++--- 4 files changed, 125 insertions(+), 18 deletions(-) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index fd48d51c3..6fae0bb55 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -12,16 +12,19 @@ num_trials = 2 # Specific to step 1 step1_eki_ensemble_size = 800 step1_eki_max_iters = 20 +step1_mcmc_sampler = :rw # :rw or :mala +step1_mcmc_samples_per_chain = 50_000 +step1_mcmc_subsample_rate = 100 # Specific to step 2 step2_num_prior_samples = 2000 # paper uses 5e5 # Specific to step 3 step3_diagnostics_to_use = [ - ("Hu", 50, "Hg", 50), + ("Hu", 5, "Hg", 5), ] step3_run_reduced_in_full_space = true -step3_posterior_sampler = :eks # :eks or :mcmc +step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 3926f9fcb..d16ed489f 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -1,6 +1,11 @@ +using AdvancedMH +using Distributions using EnsembleKalmanProcesses -using Random +using ForwardDiff using JLD2 +using LinearAlgebra +using MCMCChains +using Random include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") @@ -19,6 +24,7 @@ end for trial in 1:num_trials prior, y, obs_noise_cov, model, true_parameter = problem_fun(input_dim, output_dim, rng) + # [1] EKP run n_ensemble = step1_eki_ensemble_size n_iters_max = step1_eki_max_iters @@ -27,24 +33,50 @@ for trial in 1:num_trials n_iters = n_iters_max for i in 1:n_iters_max - params_i = get_ϕ_final(prior, ekp) - G_ens = hcat([forward_map(params_i[:, i], model) for i in 1:n_ensemble]...) + G_ens = hcat([forward_map(param, model) for param in eachcol(get_ϕ_final(prior, ekp))]...) terminate = update_ensemble!(ekp, G_ens) if !isnothing(terminate) n_iters = i - 1 break end end - - @info "Iteration of posterior convergence: $n_iters" + @info "EKP iterations: $n_iters" @info "Loss over iterations: $(get_error(ekp))" + + # [2] MCMC run + prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) + logpost = x -> begin + g = forward_map(x, model) + -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) + end + density_model = DensityModel(logpost) + num_iters = 1 + mcmc_samples = zeros(input_dim, 0) + for _ in 1:num_iters + sampler = if step1_mcmc_sampler == :mala + MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) + elseif step1_mcmc_sampler == :rw + RWMH(MvNormal(zeros(input_dim), .01prior_cov)) + else + throw("Unknown step1_mcmc_sampler=$step1_mcmc_sampler") + end + chain = sample(density_model, sampler, MCMCThreads(), step1_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) + samp = vcat([vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, end÷2:step1_mcmc_subsample_rate:end])' for i in 1:input_dim]...) + mcmc_samples = hcat(mcmc_samples, samp) + end + @info "MCMC finished" + + # [3] Save everything to a file + #! format: off save( "datafiles/ekp_$(problem)_$(trial).jld2", "ekp", ekp, + "mcmc_samples", mcmc_samples, "prior", prior, "y", y, "obs_noise_cov", obs_noise_cov, "model", model, "true_parameter", true_parameter, ) + #! format: on end diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index ab1d24a3c..5a770ccfc 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -43,13 +43,18 @@ sim_Hu_ekp_prior = [] sim_Hg_ekp_prior = [] sim_Hu_ekp_final = [] sim_Hg_ekp_final = [] +sim_Hu_mcmc_final = [] +sim_Hg_mcmc_final = [] sim_Huy_ekp_final = [] sim_Hgy_ekp_final = [] +sim_Huy_mcmc_final = [] +sim_Hgy_mcmc_final = [] for trial in 1:num_trials # Load the EKP iterations loaded = load("datafiles/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] + mcmc_samples = loaded["mcmc_samples"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] @@ -109,7 +114,7 @@ for trial in 1:num_trials Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt # [2b] One-point approximation at mean value with SL grad - @info "Construct with mean value final (1 sample), SL grad" + @info "Construct with mean value EKP final (1 sample), SL grad" final_it = length(get_g(ekp)) g = get_g(ekp, final_it) u = get_u(ekp, final_it) @@ -126,6 +131,7 @@ for trial in 1:num_trials Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + @info "Construct y-informed at EKP final (SL grad)" myCug = Cug' Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct (y - gg) * (y - gg)' for gg in eachcol(g) @@ -153,7 +159,7 @@ for trial in 1:num_trials v00 = eigvecs(Hg_ekp_final)[:,k:k] v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5.0, 5.0)) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(20.0, 20.0)) # Orthogonalize proj = bestvec - Vgy_ekp_final * (Vgy_ekp_final' * bestvec) bestvec = proj / norm(proj) @@ -164,6 +170,58 @@ for trial in 1:num_trials Hgy_ekp_final = Vgy_ekp_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_ekp_final' + @info "Construct with mean value MCMC final (1 sample), SL grad" + u = mcmc_samples + g = hcat([forward_map(uu, model) for uu in eachcol(u)]...) + C_at_final = cov([u; g], dims = 2) # basic cross-cov + Cuu = C_at_final[1:input_dim, 1:input_dim] + svdCuu = svd(Cuu) + nz = min(N_ens - 1, input_dim) # nonzero sv's + pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance + Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt + Cug = C_at_final[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? + Hu_mcmc_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt + Hg_mcmc_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + + @info "Construct y-informed at MCMC final (SL grad)" + myCug = Cug' + Huy_mcmc_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct + (y - gg) * (y - gg)' for gg in eachcol(g) + )*obs_inv*myCug' * Cuu_invrt + + dim_g = size(g, 1) + Vgy_mcmc_final = zeros(dim_g, 0) + num_vecs = 10 + @assert num_vecs ≤ dim_g + for k in 1:num_vecs + println("vector $k") + counter = 0 + M = Grassmann(dim_g, 1) + f = (_, v) -> begin + counter += 1 + Vs = hcat(Vgy_mcmc_final, vec(v)) + Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' + res = sum( # TODO: Check if whitening is correct + norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) + for gg in eachcol(g) + ) + mod(counter, 100) == 1 && println(" iter $counter: $res") + res + end + v00 = eigvecs(Hg_mcmc_final)[:,k:k] + v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] + v0 = [v0i / norm(v0i) for v0i in v0] + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(20.0, 20.0)) + # Orthogonalize + proj = bestvec - Vgy_mcmc_final * (Vgy_mcmc_final' * bestvec) + bestvec = proj / norm(proj) + + Vgy_mcmc_final = hcat(Vgy_mcmc_final, bestvec) + end + Vgy_mcmc_final = hcat(Vgy_mcmc_final, randn(dim_g, dim_g - num_vecs)) + Hgy_mcmc_final = Vgy_mcmc_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_mcmc_final' + + # cosine similarity of evector directions svdHu = svd(Hu) svdHg = svd(Hg) @@ -173,8 +231,12 @@ for trial in 1:num_trials svdHg_ekp_prior = svd(Hg_ekp_prior) svdHu_ekp_final = svd(Hu_ekp_final) svdHg_ekp_final = svd(Hg_ekp_final) + svdHu_mcmc_final = svd(Hu_mcmc_final) + svdHg_mcmc_final = svd(Hg_mcmc_final) svdHuy_ekp_final = svd(Huy_ekp_final) svdHgy_ekp_final = svd(Hgy_ekp_final) + svdHuy_mcmc_final = svd(Huy_mcmc_final) + svdHgy_mcmc_final = svd(Hgy_mcmc_final) if has_jac(model) @info """ @@ -214,8 +276,12 @@ for trial in 1:num_trials push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) + push!(sim_Hu_mcmc_final, cossim_cols(svdHu.V, svdHu_mcmc_final.V)) + push!(sim_Hg_mcmc_final, cossim_cols(svdHg.V, svdHg_mcmc_final.V)) push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) push!(sim_Hgy_ekp_final, cossim_cols(svdHg.V, svdHgy_ekp_final.V)) + push!(sim_Huy_mcmc_final, cossim_cols(svdHu.V, svdHuy_mcmc_final.V)) + push!(sim_Hgy_mcmc_final, cossim_cols(svdHg.V, svdHgy_mcmc_final.V)) end # cosine similarity to output svd from samples @@ -228,6 +294,7 @@ for trial in 1:num_trials push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) end + #! format: off save( "datafiles/diagnostic_matrices_$(problem)_$(trial).jld2", "Hu", Hu, @@ -238,11 +305,16 @@ for trial in 1:num_trials "Hg_ekp_prior", Hg_ekp_prior, "Hu_ekp_final", Hu_ekp_final, "Hg_ekp_final", Hg_ekp_final, + "Hu_mcmc_final", Hu_mcmc_final, + "Hg_mcmc_final", Hg_mcmc_final, "Huy_ekp_final", Huy_ekp_final, "Hgy_ekp_final", Hgy_ekp_final, + "Huy_mcmc_final", Huy_mcmc_final, + "Hgy_mcmc_final", Hgy_mcmc_final, "svdU", svdU, "svdG", svdG, ) + #! format: on end using Plots.Measures diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index 9b892e081..e6afd11c6 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -74,9 +74,9 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use else throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") end - chainfull = sample(densitymodelfull, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params = [zeros(input_dim) for _ in 1:8]) - sampfull = vcat([vec(MCMCChains.get(chainfull, Symbol("param_$i"))[1])' for i in 1:input_dim]...) - mean_full += mean(sampfull[:,end÷2:end]; dims = 2) / num_iters + chainfull = sample(densitymodelfull, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) + sampfull = vcat([vec(MCMCChains.get(chainfull, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:input_dim]...) + mean_full += mean(sampfull; dims = 2) / num_iters end mean_full_red = U_r' * (prior_invrt * mean_full) @@ -99,9 +99,9 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use else throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") end - chainred = sample(densitymodelred, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params = [zeros(input_dim) for _ in 1:8]) - sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1])' for i in 1:input_dim]...) - mean_red_full += mean(sampred[:,end÷2:end]; dims = 2) / num_iters + chainred = sample(densitymodelred, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) + sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:input_dim]...) + mean_red_full += mean(sampred; dims = 2) / num_iters end mean_red = U_r' * (prior_invrt * mean_red_full) else @@ -123,9 +123,9 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use else throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") end - chainred = sample(densitymodelred, sampler, MCMCThreads(), num_samples, 8; chain_type=Chains, initial_params = [zeros(in_r) for _ in 1:8]) - sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1])' for i in 1:in_r]...) - mean_red += mean(sampred[:,end÷2:end]; dims = 2) / num_iters + chainred = sample(densitymodelred, sampler, MCMCThreads(), num_samples, 8; chain_type=Chains, initial_params=[zeros(in_r) for _ in 1:8]) + sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:in_r]...) + mean_red += mean(sampred; dims = 2) / num_iters end mean_red_full = prior_rt * U_r * mean_red end From 557bafaf4f714e59bb7ba2b6ae53db96b8df2bdf Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 6 Jun 2025 07:08:46 -0700 Subject: [PATCH 15/35] add StatsPlots --- examples/DimensionReduction/Project.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/DimensionReduction/Project.toml b/examples/DimensionReduction/Project.toml index be01cd3d9..58870b89e 100644 --- a/examples/DimensionReduction/Project.toml +++ b/examples/DimensionReduction/Project.toml @@ -11,3 +11,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" From 61f2f266ef57db90eb2a7179737f6791270b6726 Mon Sep 17 00:00:00 2001 From: odunbar Date: Fri, 6 Jun 2025 07:24:38 -0700 Subject: [PATCH 16/35] more pkg additions --- examples/DimensionReduction/Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/DimensionReduction/Project.toml b/examples/DimensionReduction/Project.toml index 58870b89e..0d266b679 100644 --- a/examples/DimensionReduction/Project.toml +++ b/examples/DimensionReduction/Project.toml @@ -9,6 +9,8 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" +Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" +Manopt = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" From 715a7da8337a0b5ba27c7aa7ed9908185cfbb7b3 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Fri, 6 Jun 2025 09:08:27 -0700 Subject: [PATCH 17/35] Add temperature parameter to step1 MCMC --- examples/DimensionReduction/settings.jl | 3 ++- .../step1_generate_inverse_problem_data.jl | 2 +- .../step2_build_and_compare_diagnostic_matrices.jl | 9 +++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index 6fae0bb55..f475cab38 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -12,6 +12,7 @@ num_trials = 2 # Specific to step 1 step1_eki_ensemble_size = 800 step1_eki_max_iters = 20 +step1_mcmc_temperature = 2.0 # 1.0 is the "true" posterior; higher oversamples the tails step1_mcmc_sampler = :rw # :rw or :mala step1_mcmc_samples_per_chain = 50_000 step1_mcmc_subsample_rate = 100 @@ -21,7 +22,7 @@ step2_num_prior_samples = 2000 # paper uses 5e5 # Specific to step 3 step3_diagnostics_to_use = [ - ("Hu", 5, "Hg", 5), + ("Hu", 10, "Hg", 10), ] step3_run_reduced_in_full_space = true step3_posterior_sampler = :mcmc # :eks or :mcmc diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index d16ed489f..f41be7b17 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -47,7 +47,7 @@ for trial in 1:num_trials prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) logpost = x -> begin g = forward_map(x, model) - -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) + (-2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g)) / step1_mcmc_temperature end density_model = DensityModel(logpost) num_iters = 1 diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 5a770ccfc..f62552157 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -149,7 +149,7 @@ for trial in 1:num_trials counter += 1 Vs = hcat(Vgy_ekp_final, vec(v)) Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' - res = sum( # TODO: Check if whitening is correct + res = N_ens \ sum( # TODO: Check if whitening is correct norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) for gg in eachcol(g) ) @@ -159,7 +159,7 @@ for trial in 1:num_trials v00 = eigvecs(Hg_ekp_final)[:,k:k] v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(20.0, 20.0)) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5000.0, 5000.0)) # TODO: Set very high to effectively turn off this diagnostic for speed # Orthogonalize proj = bestvec - Vgy_ekp_final * (Vgy_ekp_final' * bestvec) bestvec = proj / norm(proj) @@ -173,6 +173,7 @@ for trial in 1:num_trials @info "Construct with mean value MCMC final (1 sample), SL grad" u = mcmc_samples g = hcat([forward_map(uu, model) for uu in eachcol(u)]...) + N_ens = size(u, 2) C_at_final = cov([u; g], dims = 2) # basic cross-cov Cuu = C_at_final[1:input_dim, 1:input_dim] svdCuu = svd(Cuu) @@ -201,7 +202,7 @@ for trial in 1:num_trials counter += 1 Vs = hcat(Vgy_mcmc_final, vec(v)) Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' - res = sum( # TODO: Check if whitening is correct + res = N_ens \ sum( # TODO: Check if whitening is correct norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) for gg in eachcol(g) ) @@ -211,7 +212,7 @@ for trial in 1:num_trials v00 = eigvecs(Hg_mcmc_final)[:,k:k] v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(20.0, 20.0)) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5000.0, 5000.0)) # TODO: Set very high to effectively turn off this diagnostic for speed # Orthogonalize proj = bestvec - Vgy_mcmc_final * (Vgy_mcmc_final' * bestvec) bestvec = proj / norm(proj) From a9df3b0edd83f9e64b6c00550a53e0ed7f304e26 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Fri, 6 Jun 2025 15:50:23 -0700 Subject: [PATCH 18/35] Add finite-difference Jacobian for Lorenz --- .../problems/problem_linear_exp.jl | 6 - .../problems/problem_lorenz.jl | 21 ++- ...2_build_and_compare_diagnostic_matrices.jl | 149 ++++++++---------- 3 files changed, 89 insertions(+), 87 deletions(-) diff --git a/examples/DimensionReduction/problems/problem_linear_exp.jl b/examples/DimensionReduction/problems/problem_linear_exp.jl index cbba538a6..f8d34dab3 100644 --- a/examples/DimensionReduction/problems/problem_linear_exp.jl +++ b/examples/DimensionReduction/problems/problem_linear_exp.jl @@ -54,15 +54,9 @@ function forward_map(X::AVorM, model::LE) where {LE <: LinearExp, AVorM <: Abstr return model.G * exp.(X) end -has_jac(::LinearExp) = true - # columns of X are samples function jac_forward_map(X::AM, model::LE) where {AM <: AbstractMatrix, LE <: LinearExp} # dGi / dXj = G_ij exp(x_j) = G.*exp.(mat with repeated x_j rows) # return [G * exp.(Diagonal(r)) for r in eachrow(X')] # correct but extra multiplies return [model.G .* exp.(reshape(c, 1, :)) for c in eachcol(X)] end - -function jac_forward_map(X::AV, model::LE) where {AV <: AbstractVector, LE <: LinearExp} - return jac_forward_map(reshape(X, :, 1), model) -end diff --git a/examples/DimensionReduction/problems/problem_lorenz.jl b/examples/DimensionReduction/problems/problem_lorenz.jl index 83eee4fa6..ac44aa2d2 100644 --- a/examples/DimensionReduction/problems/problem_lorenz.jl +++ b/examples/DimensionReduction/problems/problem_lorenz.jl @@ -156,8 +156,6 @@ struct Lorenz <: ForwardMapType nx end -has_jac(::Lorenz) = false - # columns of X are samples function forward_map(X::AbstractVector, model::Lorenz) lorenz_forward( @@ -172,6 +170,25 @@ function forward_map(X::AbstractMatrix, model::Lorenz) hcat([forward_map(x, model) for x in eachcol(X)]...) end +function jac_forward_map(X::AbstractVector, model::Lorenz) + # Finite-difference Jacobian + nx = model.nx + h = 1e-6 + J = zeros(nx * 2, nx) + for i in 1:nx + x_plus_h = copy(X) + x_plus_h[i] += h + x_minus_h = copy(X) + x_minus_h[i] -= h + J[:, i] = (forward_map(x_plus_h, model) - forward_map(x_minus_h, model)) / (2 * h) + end + return J +end + +function jac_forward_map(X::AbstractMatrix, model::Lorenz) + return [jac_forward_map(x, model) for x in eachcol(X)] +end + function lorenz(input_dim, output_dim, rng) #Creating my sythetic data #initalize model variables diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index f62552157..cdcc60226 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -9,6 +9,9 @@ using Manopt, Manifolds include("./settings.jl") +include("./problems/problem_linear_exp.jl") +include("./problems/problem_lorenz.jl") + if !isfile("datafiles/ekp_$(problem)_1.jld2") include("step1_generate_inverse_problem_data.jl") end @@ -59,8 +62,6 @@ for trial in 1:num_trials obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] model = loaded["model"] - input_dim = size(get_u(ekp, 1), 1) - output_dim = size(get_g(ekp, 1), 1) prior_cov = cov(prior) prior_invrt = sqrt(inv(prior_cov)) @@ -71,30 +72,25 @@ for trial in 1:num_trials # random samples prior_samples = sample(prior, n_samples) - if has_jac(model) - # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) - @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" - gradG_samples = jac_forward_map(prior_samples, model) - Hu = zeros(input_dim, input_dim) - Hg = zeros(output_dim, output_dim) - - for j in 1:n_samples - Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt - Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt - end + # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) + @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" + gradG_samples = jac_forward_map(prior_samples, model) + Hu = zeros(input_dim, input_dim) + Hg = zeros(output_dim, output_dim) - # [1b] One-point approximation at mean value, with perfect grad - @info "Construct with mean value (1 sample), perfect grad" - prior_mean_appr = mean(prior) # approximate mean - gradG_at_mean = jac_forward_map(prior_mean_appr, model)[1] - # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf - Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt - Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt - else - Hu = Hu_mean = NaN * zeros(input_dim) - Hg = Hg_mean = NaN * zeros(output_dim) + for j in 1:n_samples + Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt + Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt end + # [1b] One-point approximation at mean value, with perfect grad + @info "Construct with mean value (1 sample), perfect grad" + prior_mean_appr = mean(prior) # approximate mean + gradG_at_mean = jac_forward_map(reshape(prior_mean_appr, input_dim, 1), model)[1] + # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf + Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt + Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt + # [2a] One-point approximation at mean value with SL grad @info "Construct with mean value prior (1 sample), SL grad" g = get_g(ekp, 1) @@ -224,44 +220,43 @@ for trial in 1:num_trials # cosine similarity of evector directions - svdHu = svd(Hu) - svdHg = svd(Hg) - svdHu_mean = svd(Hu_mean) - svdHg_mean = svd(Hg_mean) - svdHu_ekp_prior = svd(Hu_ekp_prior) - svdHg_ekp_prior = svd(Hg_ekp_prior) - svdHu_ekp_final = svd(Hu_ekp_final) - svdHg_ekp_final = svd(Hg_ekp_final) - svdHu_mcmc_final = svd(Hu_mcmc_final) - svdHg_mcmc_final = svd(Hg_mcmc_final) - svdHuy_ekp_final = svd(Huy_ekp_final) - svdHgy_ekp_final = svd(Hgy_ekp_final) - svdHuy_mcmc_final = svd(Huy_mcmc_final) - svdHgy_mcmc_final = svd(Hgy_mcmc_final) - if has_jac(model) - @info """ - - samples -> mean - $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) - - samples + deriv -> mean + (no deriv) prior - $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) - - samples + deriv -> mean + (no deriv) final - $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) - - mean+(no deriv): prior -> final - $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) - - y-aware -> samples - $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) - $(cossim_cols(svdHg.V, svdHgy_ekp_final.V)[1:3]) - """ - end + alg = LinearAlgebra.QRIteration() + svdHu = svd(Hu; alg) + svdHg = svd(Hg; alg) + svdHu_mean = svd(Hu_mean; alg) + svdHg_mean = svd(Hg_mean; alg) + svdHu_ekp_prior = svd(Hu_ekp_prior; alg) + svdHg_ekp_prior = svd(Hg_ekp_prior; alg) + svdHu_ekp_final = svd(Hu_ekp_final; alg) + svdHg_ekp_final = svd(Hg_ekp_final; alg) + svdHu_mcmc_final = svd(Hu_mcmc_final; alg) + svdHg_mcmc_final = svd(Hg_mcmc_final; alg) + svdHuy_ekp_final = svd(Huy_ekp_final; alg) + svdHgy_ekp_final = svd(Hgy_ekp_final; alg) + svdHuy_mcmc_final = svd(Huy_mcmc_final; alg) + svdHgy_mcmc_final = svd(Hgy_mcmc_final; alg) + @info """ + + samples -> mean + $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) + + samples + deriv -> mean + (no deriv) prior + $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) + + samples + deriv -> mean + (no deriv) final + $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) + + mean+(no deriv): prior -> final + $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) + $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) + + y-aware -> samples + $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) + $(cossim_cols(svdHg.V, svdHgy_ekp_final.V)[1:3]) + """ push!(Hu_evals, svdHu.S) push!(Hg_evals, svdHg.S) push!(Hu_mean_evals, svdHu_mean.S) @@ -270,30 +265,26 @@ for trial in 1:num_trials push!(Hg_ekp_prior_evals, svdHg_ekp_prior.S) push!(Hu_ekp_final_evals, svdHu_ekp_final.S) push!(Hg_ekp_final_evals, svdHg_ekp_final.S) - if has_jac(model) - push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) - push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) - push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) - push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) - push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) - push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) - push!(sim_Hu_mcmc_final, cossim_cols(svdHu.V, svdHu_mcmc_final.V)) - push!(sim_Hg_mcmc_final, cossim_cols(svdHg.V, svdHg_mcmc_final.V)) - push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) - push!(sim_Hgy_ekp_final, cossim_cols(svdHg.V, svdHgy_ekp_final.V)) - push!(sim_Huy_mcmc_final, cossim_cols(svdHu.V, svdHuy_mcmc_final.V)) - push!(sim_Hgy_mcmc_final, cossim_cols(svdHg.V, svdHgy_mcmc_final.V)) - end + push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) + push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) + push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) + push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) + push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) + push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) + push!(sim_Hu_mcmc_final, cossim_cols(svdHu.V, svdHu_mcmc_final.V)) + push!(sim_Hg_mcmc_final, cossim_cols(svdHg.V, svdHg_mcmc_final.V)) + push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) + push!(sim_Hgy_ekp_final, cossim_cols(svdHg.V, svdHgy_ekp_final.V)) + push!(sim_Huy_mcmc_final, cossim_cols(svdHu.V, svdHuy_mcmc_final.V)) + push!(sim_Hgy_mcmc_final, cossim_cols(svdHg.V, svdHgy_mcmc_final.V)) # cosine similarity to output svd from samples G_samples = forward_map(prior_samples, model)' svdG = svd(G_samples) # nonsquare, so permuted so evectors are V svdU = svd(prior_samples') - if has_jac(model) - push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) - push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) - end + push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) + push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) #! format: off save( From 1e6de70abee64071012ba1e14c9d9d4ef9923ee6 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Fri, 6 Jun 2025 18:10:10 -0700 Subject: [PATCH 19/35] Use prior for entire space (as I think we should) --- examples/DimensionReduction/settings.jl | 14 +- .../step1_generate_inverse_problem_data.jl | 19 +- .../step3_estimate_posteriors.jl | 217 +++++++++--------- examples/DimensionReduction/util.jl | 23 ++ 4 files changed, 141 insertions(+), 132 deletions(-) create mode 100644 examples/DimensionReduction/util.jl diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index f475cab38..3dbc9b6f6 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -2,7 +2,7 @@ ## -- Configure the inverse problem -- problem = "linear_exp" # "lorenz" or "linear_exp" -input_dim = 50 +input_dim = 200 output_dim = 50 ## -- Configure parameters of the experiment itself -- @@ -11,22 +11,24 @@ num_trials = 2 # Specific to step 1 step1_eki_ensemble_size = 800 -step1_eki_max_iters = 20 +step1_eki_max_iters = 200 step1_mcmc_temperature = 2.0 # 1.0 is the "true" posterior; higher oversamples the tails step1_mcmc_sampler = :rw # :rw or :mala step1_mcmc_samples_per_chain = 50_000 +step1_mcmc_num_chains = 8 step1_mcmc_subsample_rate = 100 # Specific to step 2 -step2_num_prior_samples = 2000 # paper uses 5e5 +step2_num_prior_samples = 5_000 # paper uses 5e5 # Specific to step 3 step3_diagnostics_to_use = [ - ("Hu", 10, "Hg", 10), + ("Hu", 16, "Hg", 16), ] -step3_run_reduced_in_full_space = true +step3_run_reduced_in_full_space = false step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` -step3_mcmc_samples_per_chain = 50_000 # only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_samples_per_chain = 20_000 # only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_num_chains = 8 # only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index f41be7b17..3ecc3ed9e 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -11,6 +11,7 @@ include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") include("./settings.jl") +include("./util.jl") rng = Random.MersenneTwister(rng_seed) problem_fun = if problem == "lorenz" lorenz @@ -45,23 +46,11 @@ for trial in 1:num_trials # [2] MCMC run prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) - logpost = x -> begin + mcmc_samples = zeros(input_dim, 0) + do_mcmc(input_dim, x -> begin g = forward_map(x, model) (-2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g)) / step1_mcmc_temperature - end - density_model = DensityModel(logpost) - num_iters = 1 - mcmc_samples = zeros(input_dim, 0) - for _ in 1:num_iters - sampler = if step1_mcmc_sampler == :mala - MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) - elseif step1_mcmc_sampler == :rw - RWMH(MvNormal(zeros(input_dim), .01prior_cov)) - else - throw("Unknown step1_mcmc_sampler=$step1_mcmc_sampler") - end - chain = sample(density_model, sampler, MCMCThreads(), step1_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) - samp = vcat([vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, end÷2:step1_mcmc_subsample_rate:end])' for i in 1:input_dim]...) + end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, subsample_rate=step1_mcmc_subsample_rate) do samp mcmc_samples = hcat(mcmc_samples, samp) end @info "MCMC finished" diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index e6afd11c6..b9d971f03 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -2,12 +2,14 @@ using AdvancedMH using Distributions using ForwardDiff using JLD2 +using LinearAlgebra using MCMCChains using Plots using Random using Statistics include("./settings.jl") +include("./util.jl") rng = Random.MersenneTwister(rng_seed) if !isfile("datafiles/ekp_$(problem)_1.jld2") @@ -35,7 +37,6 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use prior_inv = inv(prior_cov) prior_invrt = sqrt(inv(prior_cov)) prior_rt = sqrt(prior_cov) - obs_rt = sqrt(obs_noise_cov) obs_invrt = sqrt(inv(obs_noise_cov)) obs_inv = inv(obs_noise_cov) @@ -44,13 +45,15 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use Hu = diagnostic_mats[in_diag] Hg = diagnostic_mats[out_diag] - svdu = svd(Hu) - svdg = svd(Hg) + svdu = svd(Hu; alg=LinearAlgebra.QRIteration()) + svdg = svd(Hg; alg=LinearAlgebra.QRIteration()) U_r = svdu.V[:, 1:in_r] V_r = svdg.V[:, 1:out_r] - M = prior_rt * U_r * U_r' * prior_invrt - N = obs_rt * V_r * V_r' * obs_invrt + # Projection matrices + P = U_r' * prior_invrt + Pinv = prior_rt * U_r + Q = V_r' * obs_invrt obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr obs_noise_cov_r_inv = inv(obs_noise_cov_r) @@ -58,126 +61,111 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use prior_cov_r_inv = inv(prior_cov_r) y_r = V_r' * obs_invrt * y + # TODO: Fix assert below for the actual type of `prior` + # @assert prior isa MvNormal && mean(prior) == zeros(input_dim) + + # Let prior = N(0, C) and let x ~ prior + # Then the distribution of x | P*x=x_r is N(Mmean * x_r, Mcov) + C = prior_cov + Mmean = C*P'*inv(P*C*P') + @assert Pinv ≈ Mmean + Mcov = C - Mmean*P*C + 1e-13 * I + Mcov = (Mcov + Mcov') / 2 # Otherwise, it's not numerically Hermitian + covsamps = rand(MvNormal(zeros(input_dim), Mcov), 8) + if step3_posterior_sampler == :mcmc - logpostfull = x -> begin + mean_full = zeros(input_dim) + do_mcmc(input_dim, x -> begin g = forward_map(x, model) -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov) do samp, num_batches + mean_full += mean(samp; dims = 2) / num_batches end - densitymodelfull = DensityModel(logpostfull) - mean_full = zeros(input_dim) - num_iters = 1 - for _ in 1:num_iters - sampler = if step3_mcmc_sampler == :mala - MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) - elseif step3_mcmc_sampler == :rw - RWMH(MvNormal(zeros(input_dim), .01prior_cov)) - else - throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") - end - chainfull = sample(densitymodelfull, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) - sampfull = vcat([vec(MCMCChains.get(chainfull, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:input_dim]...) - mean_full += mean(sampfull; dims = 2) / num_iters - end - mean_full_red = U_r' * (prior_invrt * mean_full) + mean_full_red = P * mean_full if step3_run_reduced_in_full_space - logpostred = xfull -> begin - xredfull = M * xfull - gredfull = N * forward_map(xredfull, model) - - -2\xfull'*prior_inv*xfull - 2\(y - gredfull)'*obs_inv*(y - gredfull) - end - densitymodelred = DensityModel(logpostred) - mean_red_full = zeros(input_dim) - num_iters = 1 - for _ in 1:num_iters - sampler = if step3_mcmc_sampler == :mala - MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) - elseif step3_mcmc_sampler == :rw - RWMH(MvNormal(zeros(input_dim), .01prior_cov)) - else - throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") - end - chainred = sample(densitymodelred, sampler, MCMCThreads(), step3_mcmc_samples_per_chain, 8; chain_type=Chains, initial_params=[zeros(input_dim) for _ in 1:8]) - sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:input_dim]...) - mean_red_full += mean(sampred; dims = 2) / num_iters + do_mcmc(input_dim, xfull -> begin + xred = P*xfull + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + + return -2\xfull'*prior_inv*xfull + mean( + -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + for (x, g) in zip(eachcol(samp), gsamp) + ) + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov) do samp, num_batches + mean_red_full += mean(samp; dims = 2) / num_batches end - mean_red = U_r' * (prior_invrt * mean_red_full) + mean_red = P * mean_red_full else - logpostred = xred -> begin - xredfull = prior_rt * U_r * xred - gred = V_r' * obs_invrt * forward_map(xredfull, model) - - -2\xred'*prior_cov_r_inv*xred - 2\(y_r - gred)'*obs_noise_cov_r_inv*(y_r - gred) - end - densitymodelred = DensityModel(logpostred) - mean_red = zeros(in_r) - num_iters = 1 - for _ in 1:num_iters - sampler, num_samples = if step3_mcmc_sampler == :mala - MALA(x -> MvNormal(.0001 * prior_cov_r * x, .0001 * 2 * prior_cov_r)), 50 # MALA is very slow, likely due to ForwardDiff - elseif step3_mcmc_sampler == :rw - RWMH(MvNormal(zeros(in_r), .01prior_cov_r)), 5_000 - else - throw("Unknown step3_mcmc_sampler=$step3_mcmc_sampler") - end - chainred = sample(densitymodelred, sampler, MCMCThreads(), num_samples, 8; chain_type=Chains, initial_params=[zeros(in_r) for _ in 1:8]) - sampred = vcat([vec(MCMCChains.get(chainred, Symbol("param_$i"))[1]'[:, end÷2:end])' for i in 1:in_r]...) - mean_red += mean(sampred; dims = 2) / num_iters + do_mcmc(in_r, xred -> begin + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + + return -2\xred'*prior_cov_r_inv*xred + mean( + -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + for (x, g) in zip(eachcol(samp), gsamp) + ) + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r) do samp, num_batches + mean_red += mean(samp; dims = 2) / num_batches end - mean_red_full = prior_rt * U_r * mean_red + mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here end elseif step3_posterior_sampler == :eks - n_ensemble = step3_eks_ensemble_size - n_iters_max = step3_eks_max_iters - - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng, scheduler = EKSStableScheduler(2.0, 0.01)) - for i in 1:n_iters_max - G_ens = hcat([forward_map(params, model) for params in eachcol(get_ϕ_final(prior, ekp))]...) - isnothing(update_ensemble!(ekp, G_ens)) || break - end - ekp_u, ekp_g = reduce(hcat, get_u(ekp)), reduce(hcat, get_g(ekp)) - mean_full = get_u_mean_final(ekp) - mean_full_red = U_r' * prior_invrt * mean_full - - if step3_run_reduced_in_full_space - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - ekp_r = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler = EKSStableScheduler(2.0, 0.01)) + throw(""" + EKS sampling from the reduced posterior is currently not supported: + The reduced posterior density is not straightforwardly defined in terms of a forward model. + We need to look into this. + """) + # n_ensemble = step3_eks_ensemble_size + # n_iters_max = step3_eks_max_iters + + # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + # ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng, scheduler = EKSStableScheduler(2.0, 0.01)) + # for i in 1:n_iters_max + # G_ens = hcat([forward_map(params, model) for params in eachcol(get_ϕ_final(prior, ekp))]...) + # isnothing(update_ensemble!(ekp, G_ens)) || break + # end + # ekp_u, ekp_g = reduce(hcat, get_u(ekp)), reduce(hcat, get_g(ekp)) + # mean_full = get_u_mean_final(ekp) + # mean_full_red = U_r' * prior_invrt * mean_full + + # if step3_run_reduced_in_full_space + # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + # ekp_r = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler = EKSStableScheduler(2.0, 0.01)) - for i in 1:n_iters_max - G_ens = hcat([N*forward_map(M*params, model) for params in eachcol(get_ϕ_final(prior, ekp_r))]...) - isnothing(update_ensemble!(ekp_r, G_ens)) || break - end - ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) - mean_red_full = get_u_mean_final(ekp_r) - mean_red = U_r' * prior_invrt * mean_red_full - else - initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - initial_r = U_r' * prior_invrt * initial_ensemble - prior_r = ParameterDistribution( - Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), - repeat([no_constraint()], in_r), - "prior_r", - ) - - ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng) + # for i in 1:n_iters_max + # G_ens = hcat([N*forward_map(M*params, model) for params in eachcol(get_ϕ_final(prior, ekp_r))]...) + # isnothing(update_ensemble!(ekp_r, G_ens)) || break + # end + # ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) + # mean_red_full = get_u_mean_final(ekp_r) + # mean_red = U_r' * prior_invrt * mean_red_full + # else + # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) + # initial_r = U_r' * prior_invrt * initial_ensemble + # prior_r = ParameterDistribution( + # Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), + # repeat([no_constraint()], in_r), + # "prior_r", + # ) + + # ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng) - for i in 1:n_iters_max - # evaluate true G - G_ens_full = reduce(hcat, [forward_map(prior_rt * U_r * params, model) for params in eachcol(get_ϕ_final(prior_r, ekp_r))]) - # project data back - G_ens = V_r' * obs_invrt * G_ens_full + # for i in 1:n_iters_max + # # evaluate true G + # G_ens_full = reduce(hcat, [forward_map(prior_rt * U_r * params, model) for params in eachcol(get_ϕ_final(prior_r, ekp_r))]) + # # project data back + # G_ens = V_r' * obs_invrt * G_ens_full - isnothing(update_ensemble!(ekp_r, G_ens)) || break - end - ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) - mean_red = get_u_mean_final(ekp_r) - mean_red_full = prior_rt * U_r * mean_red - # TODO: Check if we're OK with this way of projecting - end + # isnothing(update_ensemble!(ekp_r, G_ens)) || break + # end + # ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) + # mean_red = get_u_mean_final(ekp_r) + # mean_red_full = prior_rt * U_r * mean_red + # end else throw("Unknown step3_posterior_sampler=$step3_posterior_sampler") end @@ -186,9 +174,16 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use True: $(true_parameter[1:5]) Mean (in full space): $(mean_full[1:5]) Red. mean (in full space): $(mean_red_full[1:5]) + + Mean (in red. space): $(mean_full_red[1:16]) + Red. mean (in red. space): $(mean_red[1:16]) - Relative error in full space: $(norm(mean_full - mean_red_full) / norm(mean_full)) - Relative error in reduced space: $(norm(mean_full_red - mean_red) / norm(mean_full_red)) + Relative error on mean in full space: $(norm(mean_full - mean_red_full) / norm(mean_full)) + Relative error on mean in reduced space: $(norm(mean_full_red - mean_red) / norm(mean_full_red)) """ + # [A] The relative error seems larger in the reduced space + # The reason is likely the whitening that happens. Small absolute errors in the full space + # can be amplified in the reduced space due to the different scales in the prior. I think + # the full space is probably the one we should be concerned about. end end diff --git a/examples/DimensionReduction/util.jl b/examples/DimensionReduction/util.jl new file mode 100644 index 000000000..a4fbb10fd --- /dev/null +++ b/examples/DimensionReduction/util.jl @@ -0,0 +1,23 @@ +using AdvancedMH +using Distributions +using ForwardDiff +using MCMCChains + +function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc_sampler, prior_cov; subsample_rate=1) + density_model = DensityModel(logpost) + sampler = if mcmc_sampler == :mala + MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) + elseif mcmc_sampler == :rw + RWMH(MvNormal(zeros(dim), .01prior_cov)) + else + throw("Unknown mcmc_sampler=$mcmc_sampler") + end + + num_batches = (num_chains + 7) ÷ 8 + for batch in 1:num_batches + num_chains_in_batch = min(8, num_chains - (batch - 1)*8) + chain = sample(density_model, sampler, MCMCThreads(), num_samples_per_chain, num_chains_in_batch; chain_type=Chains, initial_params=[zeros(dim) for _ in 1:num_chains_in_batch]) + samp = vcat([vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, end÷2:subsample_rate:end])' for i in 1:dim]...) + callback(samp, num_batches) + end +end From 112678412d83108a0ae3fba44bfb62730a1aa9c5 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Fri, 6 Jun 2025 18:14:45 -0700 Subject: [PATCH 20/35] Fix bug --- .../DimensionReduction/step1_generate_inverse_problem_data.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 3ecc3ed9e..80ddf8a57 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -50,7 +50,7 @@ for trial in 1:num_trials do_mcmc(input_dim, x -> begin g = forward_map(x, model) (-2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g)) / step1_mcmc_temperature - end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, subsample_rate=step1_mcmc_subsample_rate) do samp + end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, subsample_rate=step1_mcmc_subsample_rate) do samp, _ mcmc_samples = hcat(mcmc_samples, samp) end @info "MCMC finished" From 674ea13f403e11a458eff4168a65f296c402858a Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Mon, 9 Jun 2025 16:12:38 -0700 Subject: [PATCH 21/35] Add linear model, forward-model marginalization, and Huy diagnostic matrix --- .../problems/problem_linear.jl | 53 +++++++++++++++++++ examples/DimensionReduction/settings.jl | 15 +++--- .../step1_generate_inverse_problem_data.jl | 7 ++- ...2_build_and_compare_diagnostic_matrices.jl | 17 ++++-- .../step3_estimate_posteriors.jl | 30 ++++++++--- 5 files changed, 103 insertions(+), 19 deletions(-) create mode 100644 examples/DimensionReduction/problems/problem_linear.jl diff --git a/examples/DimensionReduction/problems/problem_linear.jl b/examples/DimensionReduction/problems/problem_linear.jl new file mode 100644 index 000000000..1bab4cb88 --- /dev/null +++ b/examples/DimensionReduction/problems/problem_linear.jl @@ -0,0 +1,53 @@ +using LinearAlgebra +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using Statistics +using Distributions + +include("forward_maps.jl") + +function linear(input_dim, output_dim, rng) + # prior + γ0 = 4.0 + β_γ = -2 + Γ = Diagonal([γ0 * (1.0 * j)^β_γ for j in 1:input_dim]) + prior_dist = MvNormal(zeros(input_dim), Γ) + prior = ParameterDistribution( + Dict( + "distribution" => Parameterized(prior_dist), + "constraint" => repeat([no_constraint()], input_dim), + "name" => "param_$(input_dim)", + ), + ) + + U = qr(randn(rng, (output_dim, output_dim))).Q + V = qr(randn(rng, (input_dim, input_dim))).Q + λ0 = 100.0 + β_λ = -1 + Λ = Diagonal([λ0 * (1.0 * j)^β_λ for j in 1:output_dim]) + A = U * Λ * V[1:output_dim, :] # output x input + model = Linear(input_dim, output_dim, A) + + # generate data sample + obs_noise_cov = Diagonal([Float64(j)^(-1/2) for j in 1:output_dim]) + noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) + # true_parameter = reshape(ones(input_dim), :, 1) + true_parameter = rand(prior_dist) + y = vec(forward_map(true_parameter, model) + noise) + return prior, y, obs_noise_cov, model, true_parameter +end + +struct Linear{AM <: AbstractMatrix} <: ForwardMapType + input_dim::Int + output_dim::Int + G::AM +end + +function forward_map(X::AVorM, model::Linear) where {AVorM <: AbstractVecOrMat} + return model.G * X +end + +function jac_forward_map(X::AbstractMatrix, model::Linear) + return [model.G for _ in eachcol(X)] +end + diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index 3dbc9b6f6..ec69b45ea 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -1,31 +1,34 @@ # CONFIGURE THE THREE STEPS ## -- Configure the inverse problem -- -problem = "linear_exp" # "lorenz" or "linear_exp" +problem = "linear" # "lorenz" or "linear" or "linear_exp" input_dim = 200 output_dim = 50 ## -- Configure parameters of the experiment itself -- rng_seed = 41 -num_trials = 2 +num_trials = 1 # Specific to step 1 step1_eki_ensemble_size = 800 -step1_eki_max_iters = 200 -step1_mcmc_temperature = 2.0 # 1.0 is the "true" posterior; higher oversamples the tails +step1_eki_max_iters = 20 +step1_mcmc_temperature = 1.0 # 1.0 is the "true" posterior; higher oversamples the tails step1_mcmc_sampler = :rw # :rw or :mala step1_mcmc_samples_per_chain = 50_000 step1_mcmc_num_chains = 8 -step1_mcmc_subsample_rate = 100 +step1_mcmc_subsample_rate = 1000 # Specific to step 2 step2_num_prior_samples = 5_000 # paper uses 5e5 # Specific to step 3 step3_diagnostics_to_use = [ - ("Hu", 16, "Hg", 16), + ("Huy", 4, "Hg", 16), + ("Huy", 8, "Hg", 16), + ("Huy", 16, "Hg", 16), ] step3_run_reduced_in_full_space = false +step3_marginalization = :forward_model # :loglikelihood or :forward_model step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 80ddf8a57..65cedea7c 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -7,16 +7,19 @@ using LinearAlgebra using MCMCChains using Random +include("./problems/problem_linear.jl") include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") include("./settings.jl") include("./util.jl") rng = Random.MersenneTwister(rng_seed) -problem_fun = if problem == "lorenz" - lorenz +problem_fun = if problem == "linear" + linear elseif problem == "linear_exp" linear_exp +elseif problem == "lorenz" + lorenz else throw("Unknown problem=$problem") end diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index cdcc60226..8140c8e35 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -8,7 +8,7 @@ using JLD2 using Manopt, Manifolds include("./settings.jl") - +include("./problems/problem_linear.jl") include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") @@ -102,13 +102,15 @@ for trial in 1:num_trials nz = min(N_ens - 1, input_dim) # nonzero sv's pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_prior[(input_dim + 1):end, 1:input_dim] + Cug = C_at_prior[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. # Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + println("Relative gradient error: ", norm(gradG_at_mean - (Cug * pinvCuu)) / norm(gradG_at_mean)) + # [2b] One-point approximation at mean value with SL grad @info "Construct with mean value EKP final (1 sample), SL grad" final_it = length(get_g(ekp)) @@ -135,7 +137,7 @@ for trial in 1:num_trials dim_g = size(g, 1) Vgy_ekp_final = zeros(dim_g, 0) - num_vecs = 10 + num_vecs = 1 @assert num_vecs ≤ dim_g for k in 1:num_vecs println("vector $k") @@ -180,6 +182,14 @@ for trial in 1:num_trials Hu_mcmc_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_mcmc_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + @info "Construct y-informed at MCMC final (perfect grad)" + gradG_samples = jac_forward_map(u, model) + Huy = zeros(input_dim, input_dim) + + for j in 1:N_ens + Huy .+= 1 / N_ens * prior_rt * gradG_samples[j]' * obs_inv^2 * (y - g[:, j]) * (y - g[:, j])' * obs_inv^2 * gradG_samples[j] * prior_rt # TODO: Is the obs_inv^2 correct? + end + @info "Construct y-informed at MCMC final (SL grad)" myCug = Cug' Huy_mcmc_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct @@ -291,6 +301,7 @@ for trial in 1:num_trials "datafiles/diagnostic_matrices_$(problem)_$(trial).jld2", "Hu", Hu, "Hg", Hg, + "Huy", Huy, "Hu_mean", Hu_mean, "Hg_mean", Hg_mean, "Hu_ekp_prior", Hu_ekp_prior, diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index b9d971f03..a47ae3ce6 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -90,10 +90,17 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use samp = covsamps .+ Mmean * xred gsamp = map(x -> forward_map(x, model), eachcol(samp)) - return -2\xfull'*prior_inv*xfull + mean( + return -2\xfull'*prior_inv*xfull + if step3_marginalization == :loglikelihood + mean( + -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + for (x, g) in zip(eachcol(samp), gsamp) + ) + elseif step3_marginalization == :forward_model + g = mean(gsamp) -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) - for (x, g) in zip(eachcol(samp), gsamp) - ) + else + throw("Unknown step3_marginalization=$step3_marginalization") + end end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov) do samp, num_batches mean_red_full += mean(samp; dims = 2) / num_batches end @@ -104,10 +111,17 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use samp = covsamps .+ Mmean * xred gsamp = map(x -> forward_map(x, model), eachcol(samp)) - return -2\xred'*prior_cov_r_inv*xred + mean( + return -2\xred'*prior_cov_r_inv*xred + if step3_marginalization == :loglikelihood + mean( + -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + for (x, g) in zip(eachcol(samp), gsamp) + ) + elseif step3_marginalization == :forward_model + g = mean(gsamp) -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) - for (x, g) in zip(eachcol(samp), gsamp) - ) + else + throw("Unknown step3_marginalization=$step3_marginalization") + end end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r) do samp, num_batches mean_red += mean(samp; dims = 2) / num_batches end @@ -175,8 +189,8 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use Mean (in full space): $(mean_full[1:5]) Red. mean (in full space): $(mean_red_full[1:5]) - Mean (in red. space): $(mean_full_red[1:16]) - Red. mean (in red. space): $(mean_red[1:16]) + Mean (in red. space): $(mean_full_red) + Red. mean (in red. space): $(mean_red) Relative error on mean in full space: $(norm(mean_full - mean_red_full) / norm(mean_full)) Relative error on mean in reduced space: $(norm(mean_full_red - mean_red) / norm(mean_full_red)) From 2c59ae52ca21a4ea03f3a336bffeb464d6666b43 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Mon, 9 Jun 2025 17:04:52 -0700 Subject: [PATCH 22/35] Fix silly performance bug and make step3_num_marginalization_samples a setting --- examples/DimensionReduction/settings.jl | 1 + .../step3_estimate_posteriors.jl | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index ec69b45ea..cad9c3f6a 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -29,6 +29,7 @@ step3_diagnostics_to_use = [ ] step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model +step3_num_marginalization_samples = 8 step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index a47ae3ce6..cb25f02ca 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -59,7 +59,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use obs_noise_cov_r_inv = inv(obs_noise_cov_r) prior_cov_r = U_r' * U_r prior_cov_r_inv = inv(prior_cov_r) - y_r = V_r' * obs_invrt * y + y_r = Q * y # TODO: Fix assert below for the actual type of `prior` # @assert prior isa MvNormal && mean(prior) == zeros(input_dim) @@ -71,7 +71,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use @assert Pinv ≈ Mmean Mcov = C - Mmean*P*C + 1e-13 * I Mcov = (Mcov + Mcov') / 2 # Otherwise, it's not numerically Hermitian - covsamps = rand(MvNormal(zeros(input_dim), Mcov), 8) + covsamps = rand(MvNormal(zeros(input_dim), Mcov), step3_num_marginalization_samples) if step3_posterior_sampler == :mcmc mean_full = zeros(input_dim) @@ -92,12 +92,12 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use return -2\xfull'*prior_inv*xfull + if step3_marginalization == :loglikelihood mean( - -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + -2\(Q*(y - g))'*obs_noise_cov_r_inv*(Q*(y - g)) for (x, g) in zip(eachcol(samp), gsamp) ) elseif step3_marginalization == :forward_model g = mean(gsamp) - -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) else throw("Unknown step3_marginalization=$step3_marginalization") end @@ -113,19 +113,19 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use return -2\xred'*prior_cov_r_inv*xred + if step3_marginalization == :loglikelihood mean( - -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) for (x, g) in zip(eachcol(samp), gsamp) ) elseif step3_marginalization == :forward_model g = mean(gsamp) - -2\(Q*(y - g))'*inv(Q*obs_noise_cov*Q')*(Q*(y - g)) + -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) else throw("Unknown step3_marginalization=$step3_marginalization") end end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r) do samp, num_batches mean_red += mean(samp; dims = 2) / num_batches end - mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here + mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here (same in a few other places) end elseif step3_posterior_sampler == :eks throw(""" From acb3f3199324b99246f4b4a1730e99d2c06c6404 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Mon, 9 Jun 2025 17:52:53 -0700 Subject: [PATCH 23/35] Re-add EKS sampling in step 3 (only with :forward_model marginalization) --- .../step3_estimate_posteriors.jl | 93 +++++++------------ examples/DimensionReduction/util.jl | 12 +++ 2 files changed, 48 insertions(+), 57 deletions(-) diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index cb25f02ca..4567e03d8 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -56,10 +56,17 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use Q = V_r' * obs_invrt obs_noise_cov_r = V_r' * V_r # Vr' * invrt(noise) * noise * invrt(noise) * Vr - obs_noise_cov_r_inv = inv(obs_noise_cov_r) + @assert obs_noise_cov_r ≈ I prior_cov_r = U_r' * U_r prior_cov_r_inv = inv(prior_cov_r) y_r = Q * y + prior_r = ParameterDistribution( + Dict( + "distribution" => Parameterized(MvNormal(zeros(in_r), prior_cov_r)), + "constraint" => repeat([no_constraint()], in_r), + "name" => "param_$(in_r)", + ), + ) # TODO: Fix assert below for the actual type of `prior` # @assert prior isa MvNormal && mean(prior) == zeros(input_dim) @@ -92,12 +99,12 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use return -2\xfull'*prior_inv*xfull + if step3_marginalization == :loglikelihood mean( - -2\(Q*(y - g))'*obs_noise_cov_r_inv*(Q*(y - g)) + -2\(y_r - Q*g)'*(y_r - Q*g) for (x, g) in zip(eachcol(samp), gsamp) ) elseif step3_marginalization == :forward_model g = mean(gsamp) - -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) + -2\(y_r - Q*g)'*(y_r - Q*g) else throw("Unknown step3_marginalization=$step3_marginalization") end @@ -113,12 +120,12 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use return -2\xred'*prior_cov_r_inv*xred + if step3_marginalization == :loglikelihood mean( - -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) + -2\(y_r - Q*g)'*(y_r - Q*g) for (x, g) in zip(eachcol(samp), gsamp) ) elseif step3_marginalization == :forward_model g = mean(gsamp) - -2\(y_r - Q*g)'*obs_noise_cov_r_inv*(y_r - Q*g) + -2\(y_r - Q*g)'*(y_r - Q*g) else throw("Unknown step3_marginalization=$step3_marginalization") end @@ -128,58 +135,30 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here (same in a few other places) end elseif step3_posterior_sampler == :eks - throw(""" - EKS sampling from the reduced posterior is currently not supported: - The reduced posterior density is not straightforwardly defined in terms of a forward model. - We need to look into this. - """) - # n_ensemble = step3_eks_ensemble_size - # n_iters_max = step3_eks_max_iters - - # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - # ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng = rng, scheduler = EKSStableScheduler(2.0, 0.01)) - # for i in 1:n_iters_max - # G_ens = hcat([forward_map(params, model) for params in eachcol(get_ϕ_final(prior, ekp))]...) - # isnothing(update_ensemble!(ekp, G_ens)) || break - # end - # ekp_u, ekp_g = reduce(hcat, get_u(ekp)), reduce(hcat, get_g(ekp)) - # mean_full = get_u_mean_final(ekp) - # mean_full_red = U_r' * prior_invrt * mean_full - - # if step3_run_reduced_in_full_space - # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - # ekp_r = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler = EKSStableScheduler(2.0, 0.01)) - - # for i in 1:n_iters_max - # G_ens = hcat([N*forward_map(M*params, model) for params in eachcol(get_ϕ_final(prior, ekp_r))]...) - # isnothing(update_ensemble!(ekp_r, G_ens)) || break - # end - # ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) - # mean_red_full = get_u_mean_final(ekp_r) - # mean_red = U_r' * prior_invrt * mean_red_full - # else - # initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - # initial_r = U_r' * prior_invrt * initial_ensemble - # prior_r = ParameterDistribution( - # Samples(U_r' * prior_invrt * sample(rng, prior, 1000)), - # repeat([no_constraint()], in_r), - # "prior_r", - # ) - - # ekp_r = EnsembleKalmanProcess(initial_r, y_r, obs_noise_cov_r, Sampler(mean(prior_r)[:], cov(prior_r)); rng) - - # for i in 1:n_iters_max - # # evaluate true G - # G_ens_full = reduce(hcat, [forward_map(prior_rt * U_r * params, model) for params in eachcol(get_ϕ_final(prior_r, ekp_r))]) - # # project data back - # G_ens = V_r' * obs_invrt * G_ens_full - - # isnothing(update_ensemble!(ekp_r, G_ens)) || break - # end - # ekp_r_u, ekp_r_g = reduce(hcat, get_u(ekp_r)), reduce(hcat, get_g(ekp_r)) - # mean_red = get_u_mean_final(ekp_r) - # mean_red_full = prior_rt * U_r * mean_red - # end + step3_marginalization == :forward_model || throw("EKS sampling from the reduced posterior is only supported when marginalizing over the forward model.") + + u, _ = do_eks(input_dim, x -> forward_map(x, model), y, obs_noise_cov, prior, rng, step3_eks_ensemble_size, step3_eks_max_iters) + mean_full = mean(u; dims = 2) + mean_full_red = P * mean_full + + if step3_run_reduced_in_full_space + u, _ = do_eks(input_dim, xfull -> begin + xred = P*xfull + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + return Q*mean(gsamp) + end, y_r, 1.0*I(out_r), prior, rng, step3_eks_ensemble_size, step3_eks_max_iters) + mean_red_full = mean(u; dims = 2) + mean_red = P * mean_red_full + else + u, _ = do_eks(in_r, xred -> begin + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + return Q*mean(gsamp) + end, y_r, 1.0*I(out_r), prior_r, rng, step3_eks_ensemble_size, step3_eks_max_iters) + mean_red = mean(u; dims = 2) + mean_red_full = Pinv*mean_red + end else throw("Unknown step3_posterior_sampler=$step3_posterior_sampler") end diff --git a/examples/DimensionReduction/util.jl b/examples/DimensionReduction/util.jl index a4fbb10fd..24568591d 100644 --- a/examples/DimensionReduction/util.jl +++ b/examples/DimensionReduction/util.jl @@ -21,3 +21,15 @@ function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc callback(samp, num_batches) end end + +function do_eks(dim, G, y, obs_noise_cov, prior, rng, num_ensemble, num_iters_max) + initial_ensemble = construct_initial_ensemble(rng, prior, num_ensemble) + ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler=EKSStableScheduler(2.0, 0.01)) + + for i in 1:num_iters_max + g = hcat([G(params) for params in eachcol(get_ϕ_final(prior, ekp))]...) + isnothing(update_ensemble!(ekp, g)) || break + end + + return get_u_final(ekp), get_g_final(ekp) +end From 964171fad823fbb0531b3df87ba27875bb7429ae Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Mon, 9 Jun 2025 17:56:40 -0700 Subject: [PATCH 24/35] Add true parameter as initial guess to MCMC --- .../step1_generate_inverse_problem_data.jl | 2 +- examples/DimensionReduction/step3_estimate_posteriors.jl | 6 +++--- examples/DimensionReduction/util.jl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 65cedea7c..56472c60f 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -53,7 +53,7 @@ for trial in 1:num_trials do_mcmc(input_dim, x -> begin g = forward_map(x, model) (-2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g)) / step1_mcmc_temperature - end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, subsample_rate=step1_mcmc_subsample_rate) do samp, _ + end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, true_parameter; subsample_rate=step1_mcmc_subsample_rate) do samp, _ mcmc_samples = hcat(mcmc_samples, samp) end @info "MCMC finished" diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index 4567e03d8..699099c0c 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -85,7 +85,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use do_mcmc(input_dim, x -> begin g = forward_map(x, model) -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov) do samp, num_batches + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov, true_parameter) do samp, num_batches mean_full += mean(samp; dims = 2) / num_batches end mean_full_red = P * mean_full @@ -108,7 +108,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use else throw("Unknown step3_marginalization=$step3_marginalization") end - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov) do samp, num_batches + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov, true_parameter) do samp, num_batches mean_red_full += mean(samp; dims = 2) / num_batches end mean_red = P * mean_red_full @@ -129,7 +129,7 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use else throw("Unknown step3_marginalization=$step3_marginalization") end - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r) do samp, num_batches + end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r, P*true_parameter) do samp, num_batches mean_red += mean(samp; dims = 2) / num_batches end mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here (same in a few other places) diff --git a/examples/DimensionReduction/util.jl b/examples/DimensionReduction/util.jl index 24568591d..2a29a567e 100644 --- a/examples/DimensionReduction/util.jl +++ b/examples/DimensionReduction/util.jl @@ -3,7 +3,7 @@ using Distributions using ForwardDiff using MCMCChains -function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc_sampler, prior_cov; subsample_rate=1) +function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc_sampler, prior_cov, initial_guess; subsample_rate=1) density_model = DensityModel(logpost) sampler = if mcmc_sampler == :mala MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) @@ -16,7 +16,7 @@ function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc num_batches = (num_chains + 7) ÷ 8 for batch in 1:num_batches num_chains_in_batch = min(8, num_chains - (batch - 1)*8) - chain = sample(density_model, sampler, MCMCThreads(), num_samples_per_chain, num_chains_in_batch; chain_type=Chains, initial_params=[zeros(dim) for _ in 1:num_chains_in_batch]) + chain = sample(density_model, sampler, MCMCThreads(), num_samples_per_chain, num_chains_in_batch; chain_type=Chains, initial_params=[initial_guess for _ in 1:num_chains_in_batch]) samp = vcat([vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, end÷2:subsample_rate:end])' for i in 1:dim]...) callback(samp, num_batches) end From 70a9283dbb28fc8f2c0be2fd5889c28c087c81d4 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Tue, 10 Jun 2025 16:22:25 -0700 Subject: [PATCH 25/35] Refactor step2 to reduce code duplication --- ...2_build_and_compare_diagnostic_matrices.jl | 343 ++++-------------- 1 file changed, 69 insertions(+), 274 deletions(-) diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 8140c8e35..bbf9a3361 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -27,33 +27,14 @@ function cossim_cols(X::AM1, Y::AM2) where {AM1 <: AbstractMatrix, AM2 <: Abstra return [cossim_pos(c1, c2) for (c1, c2) in zip(eachcol(X), eachcol(Y))] end -n_samples = step2_num_prior_samples - -Hu_evals = [] -Hg_evals = [] -Hu_mean_evals = [] -Hg_mean_evals = [] -Hu_ekp_prior_evals = [] -Hg_ekp_prior_evals = [] -Hu_ekp_final_evals = [] -Hg_ekp_final_evals = [] - -sim_Hu_means = [] -sim_Hg_means = [] -sim_G_samples = [] -sim_U_samples = [] -sim_Hu_ekp_prior = [] -sim_Hg_ekp_prior = [] -sim_Hu_ekp_final = [] -sim_Hg_ekp_final = [] -sim_Hu_mcmc_final = [] -sim_Hg_mcmc_final = [] -sim_Huy_ekp_final = [] -sim_Hgy_ekp_final = [] -sim_Huy_mcmc_final = [] -sim_Hgy_mcmc_final = [] +all_diagnostic_matrices_u = Dict() +all_diagnostic_matrices_g = Dict() for trial in 1:num_trials + @info "Trial $trial" + diagnostic_matrices_u = Dict() + diagnostic_matrices_g = Dict() + # Load the EKP iterations loaded = load("datafiles/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] @@ -70,19 +51,22 @@ for trial in 1:num_trials obs_inv = inv(obs_noise_cov) # random samples - prior_samples = sample(prior, n_samples) + prior_samples = sample(prior, step2_num_prior_samples) # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) - @info "Construct good matrix ($(n_samples) samples of prior, perfect grad)" + @info "Construct good matrix ($(step2_num_prior_samples) samples of prior, perfect grad)" gradG_samples = jac_forward_map(prior_samples, model) Hu = zeros(input_dim, input_dim) Hg = zeros(output_dim, output_dim) - for j in 1:n_samples - Hu .+= 1 / n_samples * prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt - Hg .+= 1 / n_samples * obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt + for j in 1:step2_num_prior_samples + Hu .+= step2_num_prior_samples \ prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt + Hg .+= step2_num_prior_samples \ obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt end + diagnostic_matrices_u["Hu"] = Hu, :black + diagnostic_matrices_g["Hg"] = Hg, :black + # [1b] One-point approximation at mean value, with perfect grad @info "Construct with mean value (1 sample), perfect grad" prior_mean_appr = mean(prior) # approximate mean @@ -91,6 +75,9 @@ for trial in 1:num_trials Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt + diagnostic_matrices_u["Hu_mean"] = Hu_mean, :blue + diagnostic_matrices_g["Hg_mean"] = Hg_mean, :blue + # [2a] One-point approximation at mean value with SL grad @info "Construct with mean value prior (1 sample), SL grad" g = get_g(ekp, 1) @@ -109,6 +96,9 @@ for trial in 1:num_trials Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + diagnostic_matrices_u["Hu_ekp_prior"] = Hu_ekp_prior, :red + diagnostic_matrices_g["Hg_ekp_prior"] = Hg_ekp_prior, :red + println("Relative gradient error: ", norm(gradG_at_mean - (Cug * pinvCuu)) / norm(gradG_at_mean)) # [2b] One-point approximation at mean value with SL grad @@ -129,6 +119,9 @@ for trial in 1:num_trials Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + diagnostic_matrices_u["Hu_ekp_final"] = Hu_ekp_final, :gold + diagnostic_matrices_g["Hg_ekp_final"] = Hg_ekp_final, :gold + @info "Construct y-informed at EKP final (SL grad)" myCug = Cug' Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct @@ -167,6 +160,9 @@ for trial in 1:num_trials Vgy_ekp_final = hcat(Vgy_ekp_final, randn(dim_g, dim_g - num_vecs)) Hgy_ekp_final = Vgy_ekp_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_ekp_final' + diagnostic_matrices_u["Huy_ekp_final"] = Huy_ekp_final, :purple + diagnostic_matrices_g["Hgy_ekp_final"] = Hgy_ekp_final, :purple + @info "Construct with mean value MCMC final (1 sample), SL grad" u = mcmc_samples @@ -182,6 +178,9 @@ for trial in 1:num_trials Hu_mcmc_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt Hg_mcmc_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt + diagnostic_matrices_u["Hu_mcmc_final"] = Hu_mcmc_final, :green + diagnostic_matrices_g["Hg_mcmc_final"] = Hg_mcmc_final, :green + @info "Construct y-informed at MCMC final (perfect grad)" gradG_samples = jac_forward_map(u, model) Huy = zeros(input_dim, input_dim) @@ -198,7 +197,7 @@ for trial in 1:num_trials dim_g = size(g, 1) Vgy_mcmc_final = zeros(dim_g, 0) - num_vecs = 10 + num_vecs = 1 @assert num_vecs ≤ dim_g for k in 1:num_vecs println("vector $k") @@ -228,256 +227,52 @@ for trial in 1:num_trials Vgy_mcmc_final = hcat(Vgy_mcmc_final, randn(dim_g, dim_g - num_vecs)) Hgy_mcmc_final = Vgy_mcmc_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_mcmc_final' + diagnostic_matrices_u["Huy_mcmc_final"] = Huy_mcmc_final, :orange + diagnostic_matrices_g["Hgy_mcmc_final"] = Hgy_mcmc_final, :orange + + for (name, (value, color)) in diagnostic_matrices_u + if !haskey(all_diagnostic_matrices_u, name) + all_diagnostic_matrices_u[name] = ([], color) + end + push!(all_diagnostic_matrices_u[name][1], value) + end + for (name, (value, color)) in diagnostic_matrices_g + if !haskey(all_diagnostic_matrices_g, name) + all_diagnostic_matrices_g[name] = ([], color) + end + push!(all_diagnostic_matrices_g[name][1], value) + end - # cosine similarity of evector directions - alg = LinearAlgebra.QRIteration() - svdHu = svd(Hu; alg) - svdHg = svd(Hg; alg) - svdHu_mean = svd(Hu_mean; alg) - svdHg_mean = svd(Hg_mean; alg) - svdHu_ekp_prior = svd(Hu_ekp_prior; alg) - svdHg_ekp_prior = svd(Hg_ekp_prior; alg) - svdHu_ekp_final = svd(Hu_ekp_final; alg) - svdHg_ekp_final = svd(Hg_ekp_final; alg) - svdHu_mcmc_final = svd(Hu_mcmc_final; alg) - svdHg_mcmc_final = svd(Hg_mcmc_final; alg) - svdHuy_ekp_final = svd(Huy_ekp_final; alg) - svdHgy_ekp_final = svd(Hgy_ekp_final; alg) - svdHuy_mcmc_final = svd(Huy_mcmc_final; alg) - svdHgy_mcmc_final = svd(Hgy_mcmc_final; alg) - @info """ - - samples -> mean - $(cossim_cols(svdHu.V, svdHu_mean.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_mean.V)[1:3]) - - samples + deriv -> mean + (no deriv) prior - $(cossim_cols(svdHu.V, svdHu_ekp_prior.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_prior.V)[1:3]) - - samples + deriv -> mean + (no deriv) final - $(cossim_cols(svdHu.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg.V, svdHg_ekp_final.V)[1:3]) - - mean+(no deriv): prior -> final - $(cossim_cols(svdHu_ekp_prior.V, svdHu_ekp_final.V)[1:3]) - $(cossim_cols(svdHg_ekp_prior.V, svdHg_ekp_final.V)[1:3]) - - y-aware -> samples - $(cossim_cols(svdHu.V, svdHuy_ekp_final.V)[1:3]) - $(cossim_cols(svdHg.V, svdHgy_ekp_final.V)[1:3]) - """ - push!(Hu_evals, svdHu.S) - push!(Hg_evals, svdHg.S) - push!(Hu_mean_evals, svdHu_mean.S) - push!(Hg_mean_evals, svdHg_mean.S) - push!(Hu_ekp_prior_evals, svdHu_ekp_prior.S) - push!(Hg_ekp_prior_evals, svdHg_ekp_prior.S) - push!(Hu_ekp_final_evals, svdHu_ekp_final.S) - push!(Hg_ekp_final_evals, svdHg_ekp_final.S) - push!(sim_Hu_means, cossim_cols(svdHu.V, svdHu_mean.V)) - push!(sim_Hg_means, cossim_cols(svdHg.V, svdHg_mean.V)) - push!(sim_Hu_ekp_prior, cossim_cols(svdHu.V, svdHu_ekp_prior.V)) - push!(sim_Hg_ekp_prior, cossim_cols(svdHg.V, svdHg_ekp_prior.V)) - push!(sim_Hu_ekp_final, cossim_cols(svdHu.V, svdHu_ekp_final.V)) - push!(sim_Hg_ekp_final, cossim_cols(svdHg.V, svdHg_ekp_final.V)) - push!(sim_Hu_mcmc_final, cossim_cols(svdHu.V, svdHu_mcmc_final.V)) - push!(sim_Hg_mcmc_final, cossim_cols(svdHg.V, svdHg_mcmc_final.V)) - push!(sim_Huy_ekp_final, cossim_cols(svdHu.V, svdHuy_ekp_final.V)) - push!(sim_Hgy_ekp_final, cossim_cols(svdHg.V, svdHgy_ekp_final.V)) - push!(sim_Huy_mcmc_final, cossim_cols(svdHu.V, svdHuy_mcmc_final.V)) - push!(sim_Hgy_mcmc_final, cossim_cols(svdHg.V, svdHgy_mcmc_final.V)) - - # cosine similarity to output svd from samples - G_samples = forward_map(prior_samples, model)' - svdG = svd(G_samples) # nonsquare, so permuted so evectors are V - svdU = svd(prior_samples') - - push!(sim_G_samples, cossim_cols(svdHg.V, svdG.V)) - push!(sim_U_samples, cossim_cols(svdHu.V, svdU.V)) - - #! format: off save( "datafiles/diagnostic_matrices_$(problem)_$(trial).jld2", - "Hu", Hu, - "Hg", Hg, - "Huy", Huy, - "Hu_mean", Hu_mean, - "Hg_mean", Hg_mean, - "Hu_ekp_prior", Hu_ekp_prior, - "Hg_ekp_prior", Hg_ekp_prior, - "Hu_ekp_final", Hu_ekp_final, - "Hg_ekp_final", Hg_ekp_final, - "Hu_mcmc_final", Hu_mcmc_final, - "Hg_mcmc_final", Hg_mcmc_final, - "Huy_ekp_final", Huy_ekp_final, - "Hgy_ekp_final", Hgy_ekp_final, - "Huy_mcmc_final", Huy_mcmc_final, - "Hgy_mcmc_final", Hgy_mcmc_final, - "svdU", svdU, - "svdG", svdG, + vcat([[name, value] for (name, (value, _)) in diagnostic_matrices_u]...)..., + vcat([[name, value] for (name, (value, _)) in diagnostic_matrices_g]...)..., ) - #! format: on end using Plots.Measures -gr(size = (1.6 * 1200, 600), legend = true, bottom_margin = 10mm, left_margin = 10mm) -default(titlefont = 20, legendfontsize = 12, guidefont = 14, tickfont = 14) - -normal_Hg_evals = [ev ./ ev[1] for ev in Hg_evals] -normal_Hg_mean_evals = [ev ./ ev[1] for ev in Hg_mean_evals] -normal_Hg_ekp_prior_evals = [ev ./ ev[1] for ev in Hg_ekp_prior_evals] -normal_Hg_ekp_final_evals = [ev ./ ev[1] for ev in Hg_ekp_final_evals] +gr(; size=(1.6 * 1200, 600), legend=true, bottom_margin=10mm, left_margin=10mm) +default(; titlefont=20, legendfontsize=12, guidefont=14, tickfont=14) -loaded1 = load("datafiles/ekp_$(problem)_1.jld2") -ekp_tmp = loaded1["ekp"] -input_dim = size(get_u(ekp_tmp, 1), 1) -output_dim = size(get_g(ekp_tmp, 1), 1) - -truncation = 15 -truncation = Int(minimum([truncation, input_dim, output_dim])) +trunc = 15 +trunc = min(trunc, input_dim, output_dim) # color names in https://github.com/JuliaGraphics/Colors.jl/blob/master/src/names_data.jl -pg = plot( - 1:truncation, - mean(sim_Hg_means)[1:truncation], - ribbon = (std(sim_Hg_means) / sqrt(num_trials))[1:truncation], - color = :blue, - label = "sim (samples v mean)", - legend = false, -) - -plot!( - pg, - 1:truncation, - mean(sim_Hg_ekp_prior)[1:truncation], - ribbon = (std(sim_Hg_ekp_prior) / sqrt(num_trials))[1:truncation], - color = :red, - alpha = 0.3, - label = "sim (samples v mean-no-der) prior", -) -plot!( - pg, - 1:truncation, - mean(sim_Hg_ekp_final)[1:truncation], - ribbon = (std(sim_Hg_ekp_final) / sqrt(num_trials))[1:truncation], - color = :gold, - label = "sim (samples v mean-no-der) final", -) -plot!( - pg, - 1:truncation, - mean(sim_Hgy_ekp_final)[1:truncation], - ribbon = (std(sim_Hgy_ekp_final) / sqrt(num_trials))[1:truncation], - color = :purple, - label = "sim (samples v y-aware) final", -) - -plot!(pg, 1:truncation, mean(normal_Hg_evals)[1:truncation], color = :black, label = "normalized eval (samples)") -plot!( - pg, - 1:truncation, - mean(normal_Hg_mean_evals)[1:truncation], - color = :black, - alpha = 0.7, - label = "normalized eval (mean)", -) - -plot!( - pg, - 1:truncation, - mean(normal_Hg_ekp_prior_evals)[1:truncation], - color = :black, - alpha = 0.3, - label = "normalized eval (mean-no-der)", -) - -plot!(pg, 1:truncation, mean(normal_Hg_ekp_final_evals)[1:truncation], color = :black, alpha = 0.3) - - -plot!( - pg, - 1:truncation, - mean(sim_G_samples)[1:truncation], - ribbon = (std(sim_G_samples) / sqrt(num_trials))[1:truncation], - color = :green, - label = "similarity (PCA)", -) - -title!(pg, "Similarity of spectrum of output diagnostic") - - -normal_Hu_evals = [ev ./ ev[1] for ev in Hu_evals] -normal_Hu_mean_evals = [ev ./ ev[1] for ev in Hu_mean_evals] -normal_Hu_ekp_prior_evals = [ev ./ ev[1] for ev in Hu_ekp_prior_evals] -normal_Hu_ekp_final_evals = [ev ./ ev[1] for ev in Hu_ekp_final_evals] - - -pu = plot( - 1:truncation, - mean(sim_Hu_means)[1:truncation], - ribbon = (std(sim_Hu_means) / sqrt(num_trials))[1:truncation], - color = :blue, - label = "sim (samples v mean)", -) - -plot!(pu, 1:truncation, mean(normal_Hu_evals)[1:truncation], color = :black, label = "normalized eval (samples)") -plot!( - pu, - 1:truncation, - mean(normal_Hu_mean_evals)[1:truncation], - color = :black, - alpha = 0.7, - label = "normalized eval (mean)", -) -plot!( - pu, - 1:truncation, - mean(normal_Hu_ekp_prior_evals)[1:truncation], - color = :black, - alpha = 0.3, - label = "normalized eval (mean-no-der)", -) -plot!(pu, 1:truncation, mean(normal_Hu_ekp_final_evals)[1:truncation], color = :black, alpha = 0.3) - -plot!( - pu, - 1:truncation, - mean(sim_U_samples)[1:truncation], - ribbon = (std(sim_U_samples) / sqrt(num_trials))[1:truncation], - color = :green, - label = "similarity (PCA)", -) - -plot!( - pu, - 1:truncation, - mean(sim_Hu_ekp_prior)[1:truncation], - ribbon = (std(sim_Hu_ekp_prior) / sqrt(num_trials))[1:truncation], - color = :red, - alpha = 0.3, - label = "sim (samples v mean-no-der) prior", -) -plot!( - pu, - 1:truncation, - mean(sim_Hu_ekp_final)[1:truncation], - ribbon = (std(sim_Hu_ekp_final) / sqrt(num_trials))[1:truncation], - color = :gold, - label = "sim (samples v mean-no-der) final", -) -plot!( - pu, - 1:truncation, - mean(sim_Huy_ekp_final)[1:truncation], - ribbon = (std(sim_Huy_ekp_final) / sqrt(num_trials))[1:truncation], - color = :purple, - label = "sim (samples v y-aware) final", -) - -title!(pu, "Similarity of spectrum of input diagnostic") - -layout = @layout [a b] -p = plot(pu, pg, layout = layout) - -savefig(p, "figures/spectrum_comparison_$problem.png") +alg = LinearAlgebra.QRIteration() +plots = map([:in, :out]) do in_or_out + diagnostics = in_or_out == :in ? all_diagnostic_matrices_u : all_diagnostic_matrices_g + ref = in_or_out == :in ? "Hu" : "Hg" + + p = plot(; title="Similarity of spectrum of $(in_or_out)put diagnostic", xlabel="SV index") + for (name, (mats, color)) in diagnostics + svds = [svd(mat; alg) for mat in mats] + sims = [cossim_cols(s.V, svd(ref_diag; alg).V) for (s, ref_diag) in zip(svds, diagnostics[ref][1])] + name == ref || plot!(p, mean(sims)[1:trunc]; ribbon=std(sims)[1:trunc], label="sim ($ref vs. $name)", color) + mean_S = mean([s.S[1:trunc] for s in svds]) + plot!(p, mean_S ./ mean_S[1]; label="SVs ($name)", linestyle=:dash, linewidth=3, color) + end + + p +end +plot(plots...; layout=@layout([a b])) +savefig("figures/spectrum_comparison_$problem.png") From 2b910e1f1ae7ea9fdbd815d56f93c9417064066f Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Tue, 10 Jun 2025 17:48:03 -0700 Subject: [PATCH 26/35] Set up larger experiment --- .../problems/problem_linear.jl | 1 - examples/DimensionReduction/settings.jl | 11 +++--- ...2_build_and_compare_diagnostic_matrices.jl | 15 ++++++-- .../step3_estimate_posteriors.jl | 36 +++++++++++++------ 4 files changed, 44 insertions(+), 19 deletions(-) diff --git a/examples/DimensionReduction/problems/problem_linear.jl b/examples/DimensionReduction/problems/problem_linear.jl index 1bab4cb88..1f2ad9177 100644 --- a/examples/DimensionReduction/problems/problem_linear.jl +++ b/examples/DimensionReduction/problems/problem_linear.jl @@ -50,4 +50,3 @@ end function jac_forward_map(X::AbstractMatrix, model::Linear) return [model.G for _ in eachcol(X)] end - diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index cad9c3f6a..6f855ee56 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -7,7 +7,7 @@ output_dim = 50 ## -- Configure parameters of the experiment itself -- rng_seed = 41 -num_trials = 1 +num_trials = 5 # Specific to step 1 step1_eki_ensemble_size = 800 @@ -23,9 +23,10 @@ step2_num_prior_samples = 5_000 # paper uses 5e5 # Specific to step 3 step3_diagnostics_to_use = [ - ("Huy", 4, "Hg", 16), - ("Huy", 8, "Hg", 16), - ("Huy", 16, "Hg", 16), + (diag, num, "Hg", 16) + for diag in ( + "Hu", "Huy", "Huy_mcmc_final", "pca_u", "Hu_ekp_prior", "Hu_ekp_final", + ) for num in (4, 6, 8, 10, 12, 14, 16) ] step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model @@ -35,4 +36,4 @@ step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` step3_mcmc_samples_per_chain = 20_000 # only used if `step3_posterior_sampler == :mcmc` -step3_mcmc_num_chains = 8 # only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_num_chains = 24 # only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index bbf9a3361..6b0e504d3 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -53,6 +53,13 @@ for trial in 1:num_trials # random samples prior_samples = sample(prior, step2_num_prior_samples) + @info "Construct PCA matrices" + pca_u = prior_samples' + pca_g = forward_map(prior_samples, model)' + + diagnostic_matrices_u["pca_u"] = pca_u, :gray + diagnostic_matrices_g["pca_g"] = pca_g, :gray + # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) @info "Construct good matrix ($(step2_num_prior_samples) samples of prior, perfect grad)" gradG_samples = jac_forward_map(prior_samples, model) @@ -186,14 +193,16 @@ for trial in 1:num_trials Huy = zeros(input_dim, input_dim) for j in 1:N_ens - Huy .+= 1 / N_ens * prior_rt * gradG_samples[j]' * obs_inv^2 * (y - g[:, j]) * (y - g[:, j])' * obs_inv^2 * gradG_samples[j] * prior_rt # TODO: Is the obs_inv^2 correct? + Huy .+= N_ens \ prior_rt * gradG_samples[j]' * obs_inv^2 * (y - g[:, j]) * (y - g[:, j])' * obs_inv^2 * gradG_samples[j] * prior_rt end + diagnostic_matrices_u["Huy"] = Huy, :pink + @info "Construct y-informed at MCMC final (SL grad)" myCug = Cug' - Huy_mcmc_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct + Huy_mcmc_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( # TODO: Check if whitening is correct (y - gg) * (y - gg)' for gg in eachcol(g) - )*obs_inv*myCug' * Cuu_invrt + )*obs_inv^2*myCug' * pinvCuu * prior_rt dim_g = size(g, 1) Vgy_mcmc_final = zeros(dim_g, 0) diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index 699099c0c..e4346c16c 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -21,7 +21,9 @@ end for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use @info "Diagnostic matrices = ($in_diag [1-$in_r], $out_diag [1-$out_r])" - average_error = 0 + + rel_error_full_rmse = 0 + rel_error_red_rmse = 0 for trial in 1:num_trials # Load the EKP iterations @@ -163,20 +165,34 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use throw("Unknown step3_posterior_sampler=$step3_posterior_sampler") end + rel_error_full = norm(mean_full - mean_red_full) / norm(mean_full) + rel_error_red = norm(mean_full_red - mean_red) / norm(mean_full_red) + @info """ True: $(true_parameter[1:5]) Mean (in full space): $(mean_full[1:5]) Red. mean (in full space): $(mean_red_full[1:5]) - Mean (in red. space): $(mean_full_red) - Red. mean (in red. space): $(mean_red) - - Relative error on mean in full space: $(norm(mean_full - mean_red_full) / norm(mean_full)) - Relative error on mean in reduced space: $(norm(mean_full_red - mean_red) / norm(mean_full_red)) + Mean (in red. space): $mean_full_red + Red. mean (in red. space): $mean_red + + Relative error on mean in full space: $rel_error_full + Relative error on mean in reduced space: $rel_error_red """ - # [A] The relative error seems larger in the reduced space - # The reason is likely the whitening that happens. Small absolute errors in the full space - # can be amplified in the reduced space due to the different scales in the prior. I think - # the full space is probably the one we should be concerned about. + + rel_error_full_rmse += rel_error_full^2 + rel_error_red_rmse += rel_error_red^2 end + + rel_error_full_rmse = sqrt(rel_error_full_rmse / num_trials) + rel_error_red_rmse = sqrt(rel_error_red_rmse / num_trials) + + open("datafiles/output_error_$(problem).log", "a") do f + println(f, "$in_diag, $in_r, $out_diag, $out_r, $rel_error_full_rmse, $rel_error_red_rmse") + end + + # [A] The relative error seems larger in the reduced space + # The reason is likely the whitening that happens. Small absolute errors in the full space + # can be amplified in the reduced space due to the different scales in the prior. I think + # the full space is probably the one we should be concerned about. end From 1079f16973ad4193a26caee6316cf764d597cd04 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Tue, 10 Jun 2025 18:44:00 -0700 Subject: [PATCH 27/35] Add LinLinExp problem and fix bug in Huy_ekp_final --- .../problems/problem_linlinexp.jl | 56 +++++++++++++++++++ examples/DimensionReduction/settings.jl | 4 +- .../step1_generate_inverse_problem_data.jl | 3 + ...2_build_and_compare_diagnostic_matrices.jl | 5 +- 4 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 examples/DimensionReduction/problems/problem_linlinexp.jl diff --git a/examples/DimensionReduction/problems/problem_linlinexp.jl b/examples/DimensionReduction/problems/problem_linlinexp.jl new file mode 100644 index 000000000..1963fa03e --- /dev/null +++ b/examples/DimensionReduction/problems/problem_linlinexp.jl @@ -0,0 +1,56 @@ +using LinearAlgebra +using EnsembleKalmanProcesses +using EnsembleKalmanProcesses.ParameterDistributions +using Statistics +using Distributions + +include("forward_maps.jl") + +function linlinexp(input_dim, output_dim, rng) + # prior + γ0 = 4.0 + β_γ = -2 + Γ = Diagonal([γ0 * (1.0 * j)^β_γ for j in 1:input_dim]) + prior_dist = MvNormal(zeros(input_dim), Γ) + prior = ParameterDistribution( + Dict( + "distribution" => Parameterized(prior_dist), + "constraint" => repeat([no_constraint()], input_dim), + "name" => "param_$(input_dim)", + ), + ) + + U = qr(randn(rng, (output_dim, output_dim))).Q + V = qr(randn(rng, (input_dim, input_dim))).Q + λ0 = 100.0 + β_λ = -1 + Λ = Diagonal([λ0 * (1.0 * j)^β_λ for j in 1:output_dim]) + A = U * Λ * V[1:output_dim, :] # output x input + model = LinLinExp(input_dim, output_dim, A) + + # generate data sample + obs_noise_cov = Diagonal([Float64(j)^(-1/2) for j in 1:output_dim]) + noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) + # true_parameter = reshape(ones(input_dim), :, 1) + true_parameter = rand(prior_dist) + y = vec(forward_map(true_parameter, model) + noise) + return prior, y, obs_noise_cov, model, true_parameter +end + +struct LinLinExp{AM <: AbstractMatrix} <: ForwardMapType + input_dim::Int + output_dim::Int + G::AM +end + +function forward_map(X::AVorM, model::LinLinExp) where {AVorM <: AbstractVecOrMat} + return model.G * (X .* exp.(0.01X)) +end + +function jac_forward_map(X::AbstractVector, model::LinLinExp) + return model.G * Diagonal(exp.(0.01X) .* (1 .+ 0.01X)) +end + +function jac_forward_map(X::AbstractMatrix, model::LinLinExp) + return [jac_forward_map(x, model) for x in eachcol(X)] +end diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index 6f855ee56..72802b65d 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -1,7 +1,7 @@ # CONFIGURE THE THREE STEPS ## -- Configure the inverse problem -- -problem = "linear" # "lorenz" or "linear" or "linear_exp" +problem = "linlinexp" # "lorenz" or "linear" or "linear_exp" or "linlinexp" input_dim = 200 output_dim = 50 @@ -25,7 +25,7 @@ step2_num_prior_samples = 5_000 # paper uses 5e5 step3_diagnostics_to_use = [ (diag, num, "Hg", 16) for diag in ( - "Hu", "Huy", "Huy_mcmc_final", "pca_u", "Hu_ekp_prior", "Hu_ekp_final", + "Hu", "Huy", "Huy_ekp_final", "Huy_mcmc_final", "pca_u", "Hu_ekp_prior", "Hu_ekp_final", ) for num in (4, 6, 8, 10, 12, 14, 16) ] step3_run_reduced_in_full_space = false diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 56472c60f..da719a557 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -10,6 +10,7 @@ using Random include("./problems/problem_linear.jl") include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") +include("./problems/problem_linlinexp.jl") include("./settings.jl") include("./util.jl") @@ -20,6 +21,8 @@ elseif problem == "linear_exp" linear_exp elseif problem == "lorenz" lorenz +elseif problem == "linlinexp" + linlinexp else throw("Unknown problem=$problem") end diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 6b0e504d3..834cbf4c1 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -11,6 +11,7 @@ include("./settings.jl") include("./problems/problem_linear.jl") include("./problems/problem_linear_exp.jl") include("./problems/problem_lorenz.jl") +include("./problems/problem_linlinexp.jl") if !isfile("datafiles/ekp_$(problem)_1.jld2") include("step1_generate_inverse_problem_data.jl") @@ -131,9 +132,9 @@ for trial in 1:num_trials @info "Construct y-informed at EKP final (SL grad)" myCug = Cug' - Huy_ekp_final = N_ens \ Cuu_invrt * myCug*obs_inv'*sum( # TODO: Check if whitening is correct + Huy_ekp_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( # TODO: Check if whitening is correct (y - gg) * (y - gg)' for gg in eachcol(g) - )*obs_inv*myCug' * Cuu_invrt + )*obs_inv^2*myCug' * pinvCuu * prior_rt dim_g = size(g, 1) Vgy_ekp_final = zeros(dim_g, 0) From 51e05c5e43b505e7ee84d2e2593ca33e877cf02b Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Wed, 11 Jun 2025 12:38:08 -0700 Subject: [PATCH 28/35] Fix observation-informed output diagnostics --- ...2_build_and_compare_diagnostic_matrices.jl | 78 +++++++++++-------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 834cbf4c1..58034140a 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -132,41 +132,48 @@ for trial in 1:num_trials @info "Construct y-informed at EKP final (SL grad)" myCug = Cug' - Huy_ekp_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( # TODO: Check if whitening is correct + Huy_ekp_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( (y - gg) * (y - gg)' for gg in eachcol(g) )*obs_inv^2*myCug' * pinvCuu * prior_rt dim_g = size(g, 1) - Vgy_ekp_final = zeros(dim_g, 0) - num_vecs = 1 + vecs = zeros(dim_g, 0) + num_vecs = step2_manopt_num_dims @assert num_vecs ≤ dim_g for k in 1:num_vecs println("vector $k") counter = 0 - M = Grassmann(dim_g, 1) + + vecs_compl = qr(vecs).Q[:, k:end] + M = Grassmann(dim_g + 1 - k, 1) + f = (_, v) -> begin - counter += 1 - Vs = hcat(Vgy_ekp_final, vec(v)) + Vs = hcat(vecs, vecs_compl * vec(v)) Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' res = N_ens \ sum( # TODO: Check if whitening is correct - norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) + norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) ) + + counter += 1 mod(counter, 100) == 1 && println(" iter $counter: $res") + res end - v00 = eigvecs(Hg_ekp_final)[:,k:k] - v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] + + # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) + # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] + # ^ This should be a good initial guess, but it seems like a local minimum that the optimizer can't get out of + v00 = ones(dim_g + 1 - k, 1) + v00 ./= norm(v00) + v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5000.0, 5000.0)) # TODO: Set very high to effectively turn off this diagnostic for speed - # Orthogonalize - proj = bestvec - Vgy_ekp_final * (Vgy_ekp_final' * bestvec) - bestvec = proj / norm(proj) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(0.1, 0.1)) - Vgy_ekp_final = hcat(Vgy_ekp_final, bestvec) + vecs = hcat(vecs, vecs_compl * bestvec) end - Vgy_ekp_final = hcat(Vgy_ekp_final, randn(dim_g, dim_g - num_vecs)) - Hgy_ekp_final = Vgy_ekp_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_ekp_final' + vecs = hcat(vecs, randn(dim_g, dim_g - num_vecs)) + Hgy_ekp_final = vecs * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * vecs' diagnostic_matrices_u["Huy_ekp_final"] = Huy_ekp_final, :purple diagnostic_matrices_g["Hgy_ekp_final"] = Hgy_ekp_final, :purple @@ -201,41 +208,48 @@ for trial in 1:num_trials @info "Construct y-informed at MCMC final (SL grad)" myCug = Cug' - Huy_mcmc_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( # TODO: Check if whitening is correct + Huy_mcmc_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( (y - gg) * (y - gg)' for gg in eachcol(g) )*obs_inv^2*myCug' * pinvCuu * prior_rt dim_g = size(g, 1) - Vgy_mcmc_final = zeros(dim_g, 0) - num_vecs = 1 + vecs = zeros(dim_g, 0) + num_vecs = step2_manopt_num_dims @assert num_vecs ≤ dim_g for k in 1:num_vecs println("vector $k") counter = 0 - M = Grassmann(dim_g, 1) + + vecs_compl = qr(vecs).Q[:, k:end] + M = Grassmann(dim_g + 1 - k, 1) + f = (_, v) -> begin - counter += 1 - Vs = hcat(Vgy_mcmc_final, vec(v)) + Vs = hcat(vecs, vecs_compl * vec(v)) Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' res = N_ens \ sum( # TODO: Check if whitening is correct - norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2# * det(Vs'*obs_noise_cov*Vs)^(-1/2) * exp(0.5(y-gg)'*Γtildeinv*(y-gg)) + norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) ) + + counter += 1 mod(counter, 100) == 1 && println(" iter $counter: $res") + res end - v00 = eigvecs(Hg_mcmc_final)[:,k:k] - v0 = [v00 + randn(dim_g, 1) / 10 for _ in 1:dim_g] + + # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) + # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] + # ^ This should be a good initial guess, but it seems like a local minimum that the optimizer can't get out of + v00 = ones(dim_g + 1 - k, 1) + v00 ./= norm(v00) + v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(5000.0, 5000.0)) # TODO: Set very high to effectively turn off this diagnostic for speed - # Orthogonalize - proj = bestvec - Vgy_mcmc_final * (Vgy_mcmc_final' * bestvec) - bestvec = proj / norm(proj) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(0.1, 0.1)) - Vgy_mcmc_final = hcat(Vgy_mcmc_final, bestvec) + vecs = hcat(vecs, vecs_compl * bestvec) end - Vgy_mcmc_final = hcat(Vgy_mcmc_final, randn(dim_g, dim_g - num_vecs)) - Hgy_mcmc_final = Vgy_mcmc_final * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * Vgy_mcmc_final' + vecs = hcat(vecs, randn(dim_g, dim_g - num_vecs)) + Hgy_mcmc_final = vecs * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * vecs' diagnostic_matrices_u["Huy_mcmc_final"] = Huy_mcmc_final, :orange diagnostic_matrices_g["Hgy_mcmc_final"] = Hgy_mcmc_final, :orange From ab58616cbc14f1261ddc2448b80c507f7ee747b8 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Wed, 11 Jun 2025 12:47:15 -0700 Subject: [PATCH 29/35] Format --- .../problems/problem_linear.jl | 2 +- .../problems/problem_linlinexp.jl | 2 +- .../problems/problem_lorenz.jl | 12 +- examples/DimensionReduction/settings.jl | 11 +- .../step1_generate_inverse_problem_data.jl | 26 ++- ...2_build_and_compare_diagnostic_matrices.jl | 99 +++++++---- .../step3_estimate_posteriors.jl | 161 +++++++++++------- examples/DimensionReduction/util.jl | 41 ++++- 8 files changed, 233 insertions(+), 121 deletions(-) diff --git a/examples/DimensionReduction/problems/problem_linear.jl b/examples/DimensionReduction/problems/problem_linear.jl index 1f2ad9177..717e1ec3f 100644 --- a/examples/DimensionReduction/problems/problem_linear.jl +++ b/examples/DimensionReduction/problems/problem_linear.jl @@ -29,7 +29,7 @@ function linear(input_dim, output_dim, rng) model = Linear(input_dim, output_dim, A) # generate data sample - obs_noise_cov = Diagonal([Float64(j)^(-1/2) for j in 1:output_dim]) + obs_noise_cov = Diagonal([Float64(j)^(-1 / 2) for j in 1:output_dim]) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) # true_parameter = reshape(ones(input_dim), :, 1) true_parameter = rand(prior_dist) diff --git a/examples/DimensionReduction/problems/problem_linlinexp.jl b/examples/DimensionReduction/problems/problem_linlinexp.jl index 1963fa03e..580bf6d67 100644 --- a/examples/DimensionReduction/problems/problem_linlinexp.jl +++ b/examples/DimensionReduction/problems/problem_linlinexp.jl @@ -29,7 +29,7 @@ function linlinexp(input_dim, output_dim, rng) model = LinLinExp(input_dim, output_dim, A) # generate data sample - obs_noise_cov = Diagonal([Float64(j)^(-1/2) for j in 1:output_dim]) + obs_noise_cov = Diagonal([Float64(j)^(-1 / 2) for j in 1:output_dim]) noise = rand(rng, MvNormal(zeros(output_dim), obs_noise_cov)) # true_parameter = reshape(ones(input_dim), :, 1) true_parameter = rand(prior_dist) diff --git a/examples/DimensionReduction/problems/problem_lorenz.jl b/examples/DimensionReduction/problems/problem_lorenz.jl index ac44aa2d2..ef3a1a955 100644 --- a/examples/DimensionReduction/problems/problem_lorenz.jl +++ b/examples/DimensionReduction/problems/problem_lorenz.jl @@ -148,12 +148,12 @@ end ######################################################################## struct Lorenz <: ForwardMapType - rng - config_settings - observation_config - x0 - ic_cov_sqrt - nx + rng::Any + config_settings::Any + observation_config::Any + x0::Any + ic_cov_sqrt::Any + nx::Any end # columns of X are samples diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index 72802b65d..68c54b849 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -7,7 +7,7 @@ output_dim = 50 ## -- Configure parameters of the experiment itself -- rng_seed = 41 -num_trials = 5 +num_trials = 1 # Specific to step 1 step1_eki_ensemble_size = 800 @@ -20,14 +20,11 @@ step1_mcmc_subsample_rate = 1000 # Specific to step 2 step2_num_prior_samples = 5_000 # paper uses 5e5 +step2_manopt_num_dims = 16 # Specific to step 3 -step3_diagnostics_to_use = [ - (diag, num, "Hg", 16) - for diag in ( - "Hu", "Huy", "Huy_ekp_final", "Huy_mcmc_final", "pca_u", "Hu_ekp_prior", "Hu_ekp_final", - ) for num in (4, 6, 8, 10, 12, 14, 16) -] +step3_diagnostics_to_use = + [("Hu", 50, diag, num) for diag in ("Hg", "Hgy_ekp_final", "Hgy_mcmc_final") for num in (4, 8, 16)] step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model step3_num_marginalization_samples = 8 diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index da719a557..655edee77 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -36,7 +36,14 @@ for trial in 1:num_trials n_iters_max = step1_eki_max_iters initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) - ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, TransformInversion(); rng, scheduler = EKSStableScheduler(2.0, 0.01)) + ekp = EnsembleKalmanProcess( + initial_ensemble, + y, + obs_noise_cov, + TransformInversion(); + rng, + scheduler = EKSStableScheduler(2.0, 0.01), + ) n_iters = n_iters_max for i in 1:n_iters_max @@ -53,10 +60,19 @@ for trial in 1:num_trials # [2] MCMC run prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) mcmc_samples = zeros(input_dim, 0) - do_mcmc(input_dim, x -> begin - g = forward_map(x, model) - (-2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g)) / step1_mcmc_temperature - end, step1_mcmc_num_chains, step1_mcmc_samples_per_chain, step1_mcmc_sampler, prior_cov, true_parameter; subsample_rate=step1_mcmc_subsample_rate) do samp, _ + do_mcmc( + input_dim, + x -> begin + g = forward_map(x, model) + (-2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g)) / step1_mcmc_temperature + end, + step1_mcmc_num_chains, + step1_mcmc_samples_per_chain, + step1_mcmc_sampler, + prior_cov, + true_parameter; + subsample_rate = step1_mcmc_subsample_rate, + ) do samp, _ mcmc_samples = hcat(mcmc_samples, samp) end @info "MCMC finished" diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 58034140a..c407611e9 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -132,9 +132,16 @@ for trial in 1:num_trials @info "Construct y-informed at EKP final (SL grad)" myCug = Cug' - Huy_ekp_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( - (y - gg) * (y - gg)' for gg in eachcol(g) - )*obs_inv^2*myCug' * pinvCuu * prior_rt + Huy_ekp_final = + N_ens \ prior_rt * + pinvCuu * + myCug * + obs_inv^2 * + sum((y - gg) * (y - gg)' for gg in eachcol(g)) * + obs_inv^2 * + myCug' * + pinvCuu * + prior_rt dim_g = size(g, 1) vecs = zeros(dim_g, 0) @@ -147,19 +154,20 @@ for trial in 1:num_trials vecs_compl = qr(vecs).Q[:, k:end] M = Grassmann(dim_g + 1 - k, 1) - f = (_, v) -> begin - Vs = hcat(vecs, vecs_compl * vec(v)) - Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' - res = N_ens \ sum( # TODO: Check if whitening is correct - norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2 - for gg in eachcol(g) - ) + f = + (_, v) -> begin + Vs = hcat(vecs, vecs_compl * vec(v)) + Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' + res = + N_ens \ sum( # TODO: Check if whitening is correct + norm((y - gg)' * obs_invrt * (I - Vs * Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) + ) - counter += 1 - mod(counter, 100) == 1 && println(" iter $counter: $res") + counter += 1 + mod(counter, 100) == 1 && println(" iter $counter: $res") - res - end + res + end # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] @@ -168,7 +176,7 @@ for trial in 1:num_trials v00 ./= norm(v00) v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(0.1, 0.1)) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1, 0.1)) vecs = hcat(vecs, vecs_compl * bestvec) end @@ -201,16 +209,31 @@ for trial in 1:num_trials Huy = zeros(input_dim, input_dim) for j in 1:N_ens - Huy .+= N_ens \ prior_rt * gradG_samples[j]' * obs_inv^2 * (y - g[:, j]) * (y - g[:, j])' * obs_inv^2 * gradG_samples[j] * prior_rt + Huy .+= + N_ens \ prior_rt * + gradG_samples[j]' * + obs_inv^2 * + (y - g[:, j]) * + (y - g[:, j])' * + obs_inv^2 * + gradG_samples[j] * + prior_rt end diagnostic_matrices_u["Huy"] = Huy, :pink @info "Construct y-informed at MCMC final (SL grad)" myCug = Cug' - Huy_mcmc_final = N_ens \ prior_rt * pinvCuu * myCug*obs_inv^2*sum( - (y - gg) * (y - gg)' for gg in eachcol(g) - )*obs_inv^2*myCug' * pinvCuu * prior_rt + Huy_mcmc_final = + N_ens \ prior_rt * + pinvCuu * + myCug * + obs_inv^2 * + sum((y - gg) * (y - gg)' for gg in eachcol(g)) * + obs_inv^2 * + myCug' * + pinvCuu * + prior_rt dim_g = size(g, 1) vecs = zeros(dim_g, 0) @@ -223,19 +246,20 @@ for trial in 1:num_trials vecs_compl = qr(vecs).Q[:, k:end] M = Grassmann(dim_g + 1 - k, 1) - f = (_, v) -> begin - Vs = hcat(vecs, vecs_compl * vec(v)) - Γtildeinv = obs_inv - Vs*inv(Vs'*obs_noise_cov*Vs)*Vs' - res = N_ens \ sum( # TODO: Check if whitening is correct - norm((y-gg)' * obs_invrt * (I - Vs*Vs') * myCug' * Cuu_invrt)^2 - for gg in eachcol(g) - ) + f = + (_, v) -> begin + Vs = hcat(vecs, vecs_compl * vec(v)) + Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' + res = + N_ens \ sum( # TODO: Check if whitening is correct + norm((y - gg)' * obs_invrt * (I - Vs * Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) + ) - counter += 1 - mod(counter, 100) == 1 && println(" iter $counter: $res") + counter += 1 + mod(counter, 100) == 1 && println(" iter $counter: $res") - res - end + res + end # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] @@ -244,7 +268,7 @@ for trial in 1:num_trials v00 ./= norm(v00) v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion=StopWhenPopulationConcentrated(0.1, 0.1)) + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1, 0.1)) vecs = hcat(vecs, vecs_compl * bestvec) end @@ -275,8 +299,8 @@ for trial in 1:num_trials end using Plots.Measures -gr(; size=(1.6 * 1200, 600), legend=true, bottom_margin=10mm, left_margin=10mm) -default(; titlefont=20, legendfontsize=12, guidefont=14, tickfont=14) +gr(; size = (1.6 * 1200, 600), legend = true, bottom_margin = 10mm, left_margin = 10mm) +default(; titlefont = 20, legendfontsize = 12, guidefont = 14, tickfont = 14) trunc = 15 trunc = min(trunc, input_dim, output_dim) @@ -287,16 +311,17 @@ plots = map([:in, :out]) do in_or_out diagnostics = in_or_out == :in ? all_diagnostic_matrices_u : all_diagnostic_matrices_g ref = in_or_out == :in ? "Hu" : "Hg" - p = plot(; title="Similarity of spectrum of $(in_or_out)put diagnostic", xlabel="SV index") + p = plot(; title = "Similarity of spectrum of $(in_or_out)put diagnostic", xlabel = "SV index") for (name, (mats, color)) in diagnostics svds = [svd(mat; alg) for mat in mats] sims = [cossim_cols(s.V, svd(ref_diag; alg).V) for (s, ref_diag) in zip(svds, diagnostics[ref][1])] - name == ref || plot!(p, mean(sims)[1:trunc]; ribbon=std(sims)[1:trunc], label="sim ($ref vs. $name)", color) + name == ref || + plot!(p, mean(sims)[1:trunc]; ribbon = std(sims)[1:trunc], label = "sim ($ref vs. $name)", color) mean_S = mean([s.S[1:trunc] for s in svds]) - plot!(p, mean_S ./ mean_S[1]; label="SVs ($name)", linestyle=:dash, linewidth=3, color) + plot!(p, mean_S ./ mean_S[1]; label = "SVs ($name)", linestyle = :dash, linewidth = 3, color) end p end -plot(plots...; layout=@layout([a b])) +plot(plots...; layout = @layout([a b])) savefig("figures/spectrum_comparison_$problem.png") diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index e4346c16c..aa0049fb2 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -47,8 +47,8 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use Hu = diagnostic_mats[in_diag] Hg = diagnostic_mats[out_diag] - svdu = svd(Hu; alg=LinearAlgebra.QRIteration()) - svdg = svd(Hg; alg=LinearAlgebra.QRIteration()) + svdu = svd(Hu; alg = LinearAlgebra.QRIteration()) + svdg = svd(Hg; alg = LinearAlgebra.QRIteration()) U_r = svdu.V[:, 1:in_r] V_r = svdg.V[:, 1:out_r] @@ -76,90 +76,137 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use # Let prior = N(0, C) and let x ~ prior # Then the distribution of x | P*x=x_r is N(Mmean * x_r, Mcov) C = prior_cov - Mmean = C*P'*inv(P*C*P') + Mmean = C * P' * inv(P * C * P') @assert Pinv ≈ Mmean - Mcov = C - Mmean*P*C + 1e-13 * I + Mcov = C - Mmean * P * C + 1e-13 * I Mcov = (Mcov + Mcov') / 2 # Otherwise, it's not numerically Hermitian covsamps = rand(MvNormal(zeros(input_dim), Mcov), step3_num_marginalization_samples) if step3_posterior_sampler == :mcmc mean_full = zeros(input_dim) - do_mcmc(input_dim, x -> begin - g = forward_map(x, model) - -2\x'*prior_inv*x - 2\(y - g)'*obs_inv*(y - g) - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov, true_parameter) do samp, num_batches + do_mcmc( + input_dim, + x -> begin + g = forward_map(x, model) + -2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g) + end, + step3_mcmc_num_chains, + step3_mcmc_samples_per_chain, + step3_mcmc_sampler, + prior_cov, + true_parameter, + ) do samp, num_batches mean_full += mean(samp; dims = 2) / num_batches end mean_full_red = P * mean_full if step3_run_reduced_in_full_space mean_red_full = zeros(input_dim) - do_mcmc(input_dim, xfull -> begin - xred = P*xfull - samp = covsamps .+ Mmean * xred - gsamp = map(x -> forward_map(x, model), eachcol(samp)) - - return -2\xfull'*prior_inv*xfull + if step3_marginalization == :loglikelihood - mean( - -2\(y_r - Q*g)'*(y_r - Q*g) - for (x, g) in zip(eachcol(samp), gsamp) - ) - elseif step3_marginalization == :forward_model - g = mean(gsamp) - -2\(y_r - Q*g)'*(y_r - Q*g) - else - throw("Unknown step3_marginalization=$step3_marginalization") - end - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov, true_parameter) do samp, num_batches + do_mcmc( + input_dim, + xfull -> begin + xred = P * xfull + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + + return -2 \ xfull' * prior_inv * xfull + if step3_marginalization == :loglikelihood + mean(-2 \ (y_r - Q * g)' * (y_r - Q * g) for (x, g) in zip(eachcol(samp), gsamp)) + elseif step3_marginalization == :forward_model + g = mean(gsamp) + -2 \ (y_r - Q * g)' * (y_r - Q * g) + else + throw("Unknown step3_marginalization=$step3_marginalization") + end + end, + step3_mcmc_num_chains, + step3_mcmc_samples_per_chain, + step3_mcmc_sampler, + prior_cov, + true_parameter, + ) do samp, num_batches mean_red_full += mean(samp; dims = 2) / num_batches end mean_red = P * mean_red_full else mean_red = zeros(in_r) - do_mcmc(in_r, xred -> begin - samp = covsamps .+ Mmean * xred - gsamp = map(x -> forward_map(x, model), eachcol(samp)) - - return -2\xred'*prior_cov_r_inv*xred + if step3_marginalization == :loglikelihood - mean( - -2\(y_r - Q*g)'*(y_r - Q*g) - for (x, g) in zip(eachcol(samp), gsamp) - ) - elseif step3_marginalization == :forward_model - g = mean(gsamp) - -2\(y_r - Q*g)'*(y_r - Q*g) - else - throw("Unknown step3_marginalization=$step3_marginalization") - end - end, step3_mcmc_num_chains, step3_mcmc_samples_per_chain, step3_mcmc_sampler, prior_cov_r, P*true_parameter) do samp, num_batches + do_mcmc( + in_r, + xred -> begin + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + + return -2 \ xred' * prior_cov_r_inv * xred + if step3_marginalization == :loglikelihood + mean(-2 \ (y_r - Q * g)' * (y_r - Q * g) for (x, g) in zip(eachcol(samp), gsamp)) + elseif step3_marginalization == :forward_model + g = mean(gsamp) + -2 \ (y_r - Q * g)' * (y_r - Q * g) + else + throw("Unknown step3_marginalization=$step3_marginalization") + end + end, + step3_mcmc_num_chains, + step3_mcmc_samples_per_chain, + step3_mcmc_sampler, + prior_cov_r, + P * true_parameter, + ) do samp, num_batches mean_red += mean(samp; dims = 2) / num_batches end - mean_red_full = Pinv*mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here (same in a few other places) + mean_red_full = Pinv * mean_red # This only works since it's the mean (linear) — if not, we'd have to use the covsamps here (same in a few other places) end elseif step3_posterior_sampler == :eks - step3_marginalization == :forward_model || throw("EKS sampling from the reduced posterior is only supported when marginalizing over the forward model.") - - u, _ = do_eks(input_dim, x -> forward_map(x, model), y, obs_noise_cov, prior, rng, step3_eks_ensemble_size, step3_eks_max_iters) + step3_marginalization == :forward_model || throw( + "EKS sampling from the reduced posterior is only supported when marginalizing over the forward model.", + ) + + u, _ = do_eks( + input_dim, + x -> forward_map(x, model), + y, + obs_noise_cov, + prior, + rng, + step3_eks_ensemble_size, + step3_eks_max_iters, + ) mean_full = mean(u; dims = 2) mean_full_red = P * mean_full if step3_run_reduced_in_full_space - u, _ = do_eks(input_dim, xfull -> begin - xred = P*xfull - samp = covsamps .+ Mmean * xred - gsamp = map(x -> forward_map(x, model), eachcol(samp)) - return Q*mean(gsamp) - end, y_r, 1.0*I(out_r), prior, rng, step3_eks_ensemble_size, step3_eks_max_iters) + u, _ = do_eks( + input_dim, + xfull -> begin + xred = P * xfull + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + return Q * mean(gsamp) + end, + y_r, + 1.0 * I(out_r), + prior, + rng, + step3_eks_ensemble_size, + step3_eks_max_iters, + ) mean_red_full = mean(u; dims = 2) mean_red = P * mean_red_full else - u, _ = do_eks(in_r, xred -> begin - samp = covsamps .+ Mmean * xred - gsamp = map(x -> forward_map(x, model), eachcol(samp)) - return Q*mean(gsamp) - end, y_r, 1.0*I(out_r), prior_r, rng, step3_eks_ensemble_size, step3_eks_max_iters) + u, _ = do_eks( + in_r, + xred -> begin + samp = covsamps .+ Mmean * xred + gsamp = map(x -> forward_map(x, model), eachcol(samp)) + return Q * mean(gsamp) + end, + y_r, + 1.0 * I(out_r), + prior_r, + rng, + step3_eks_ensemble_size, + step3_eks_max_iters, + ) mean_red = mean(u; dims = 2) - mean_red_full = Pinv*mean_red + mean_red_full = Pinv * mean_red end else throw("Unknown step3_posterior_sampler=$step3_posterior_sampler") diff --git a/examples/DimensionReduction/util.jl b/examples/DimensionReduction/util.jl index 2a29a567e..912d9faee 100644 --- a/examples/DimensionReduction/util.jl +++ b/examples/DimensionReduction/util.jl @@ -3,28 +3,55 @@ using Distributions using ForwardDiff using MCMCChains -function do_mcmc(callback, dim, logpost, num_chains, num_samples_per_chain, mcmc_sampler, prior_cov, initial_guess; subsample_rate=1) +function do_mcmc( + callback, + dim, + logpost, + num_chains, + num_samples_per_chain, + mcmc_sampler, + prior_cov, + initial_guess; + subsample_rate = 1, +) density_model = DensityModel(logpost) sampler = if mcmc_sampler == :mala - MALA(x -> MvNormal(.0001 * prior_cov * x, .0001 * 2 * prior_cov)) + MALA(x -> MvNormal(0.0001 * prior_cov * x, 0.0001 * 2 * prior_cov)) elseif mcmc_sampler == :rw - RWMH(MvNormal(zeros(dim), .01prior_cov)) + RWMH(MvNormal(zeros(dim), 0.01prior_cov)) else throw("Unknown mcmc_sampler=$mcmc_sampler") end num_batches = (num_chains + 7) ÷ 8 for batch in 1:num_batches - num_chains_in_batch = min(8, num_chains - (batch - 1)*8) - chain = sample(density_model, sampler, MCMCThreads(), num_samples_per_chain, num_chains_in_batch; chain_type=Chains, initial_params=[initial_guess for _ in 1:num_chains_in_batch]) - samp = vcat([vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, end÷2:subsample_rate:end])' for i in 1:dim]...) + num_chains_in_batch = min(8, num_chains - (batch - 1) * 8) + chain = sample( + density_model, + sampler, + MCMCThreads(), + num_samples_per_chain, + num_chains_in_batch; + chain_type = Chains, + initial_params = [initial_guess for _ in 1:num_chains_in_batch], + ) + samp = vcat( + [vec(MCMCChains.get(chain, Symbol("param_$i"))[1]'[:, (end ÷ 2):subsample_rate:end])' for i in 1:dim]..., + ) callback(samp, num_batches) end end function do_eks(dim, G, y, obs_noise_cov, prior, rng, num_ensemble, num_iters_max) initial_ensemble = construct_initial_ensemble(rng, prior, num_ensemble) - ekp = EnsembleKalmanProcess(initial_ensemble, y, obs_noise_cov, Sampler(prior); rng, scheduler=EKSStableScheduler(2.0, 0.01)) + ekp = EnsembleKalmanProcess( + initial_ensemble, + y, + obs_noise_cov, + Sampler(prior); + rng, + scheduler = EKSStableScheduler(2.0, 0.01), + ) for i in 1:num_iters_max g = hcat([G(params) for params in eachcol(get_ϕ_final(prior, ekp))]...) From 2c657b9a2470567795a6bf69aa926febfb1b6f96 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 12 Jun 2025 14:30:24 -0700 Subject: [PATCH 30/35] Fix initial noise to fix Lorenz Jacobian bug --- .../DimensionReduction/problems/problem_lorenz.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/examples/DimensionReduction/problems/problem_lorenz.jl b/examples/DimensionReduction/problems/problem_lorenz.jl index ef3a1a955..36bf21923 100644 --- a/examples/DimensionReduction/problems/problem_lorenz.jl +++ b/examples/DimensionReduction/problems/problem_lorenz.jl @@ -157,22 +157,24 @@ struct Lorenz <: ForwardMapType end # columns of X are samples -function forward_map(X::AbstractVector, model::Lorenz) +function forward_map(X::AbstractVector, model::Lorenz; noise = nothing) + noise = isnothing(noise) ? model.ic_cov_sqrt * randn(model.rng, model.nx) : noise lorenz_forward( EnsembleMemberConfig(X), - (model.x0 .+ model.ic_cov_sqrt * randn(model.rng, model.nx)), + (model.x0 .+ noise), model.config_settings, model.observation_config, ) end -function forward_map(X::AbstractMatrix, model::Lorenz) - hcat([forward_map(x, model) for x in eachcol(X)]...) +function forward_map(X::AbstractMatrix, model::Lorenz; noise = nothing) + hcat([forward_map(x, model; noise) for x in eachcol(X)]...) end function jac_forward_map(X::AbstractVector, model::Lorenz) # Finite-difference Jacobian nx = model.nx + noise = model.ic_cov_sqrt * randn(model.rng, model.nx) h = 1e-6 J = zeros(nx * 2, nx) for i in 1:nx @@ -180,7 +182,7 @@ function jac_forward_map(X::AbstractVector, model::Lorenz) x_plus_h[i] += h x_minus_h = copy(X) x_minus_h[i] -= h - J[:, i] = (forward_map(x_plus_h, model) - forward_map(x_minus_h, model)) / (2 * h) + J[:, i] = (forward_map(x_plus_h, model; noise) - forward_map(x_minus_h, model; noise)) / (2 * h) end return J end From 0adcd6dd9cc634865bcad9dd0d527ebdb91ff77e Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 12 Jun 2025 14:30:33 -0700 Subject: [PATCH 31/35] Fix Huy bug --- .../step2_build_and_compare_diagnostic_matrices.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index c407611e9..69438fd3c 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -136,9 +136,9 @@ for trial in 1:num_trials N_ens \ prior_rt * pinvCuu * myCug * - obs_inv^2 * + obs_inv * sum((y - gg) * (y - gg)' for gg in eachcol(g)) * - obs_inv^2 * + obs_inv * myCug' * pinvCuu * prior_rt @@ -228,9 +228,9 @@ for trial in 1:num_trials N_ens \ prior_rt * pinvCuu * myCug * - obs_inv^2 * + obs_inv * sum((y - gg) * (y - gg)' for gg in eachcol(g)) * - obs_inv^2 * + obs_inv * myCug' * pinvCuu * prior_rt From 48a532176d4043206b0a8d4d44021f0882609ec7 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 12 Jun 2025 15:23:52 -0700 Subject: [PATCH 32/35] Add linear-regression diagnostics --- .../step2_build_and_compare_diagnostic_matrices.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 69438fd3c..77d533871 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -86,6 +86,15 @@ for trial in 1:num_trials diagnostic_matrices_u["Hu_mean"] = Hu_mean, :blue diagnostic_matrices_g["Hg_mean"] = Hg_mean, :blue + @info "Construct with prior (1 sample), linear-regression grad" + u = get_u(ekp, 1) + g = get_g(ekp, 1) + grad_linear_regression = (g / vcat(u, ones(1, size(u, 2))))[:, 1:end-1] + Hu_linear_regression = prior_rt * grad_linear_regression' * obs_inv * grad_linear_regression * prior_rt + Hg_linear_regression = obs_invrt * grad_linear_regression * prior_cov * grad_linear_regression' * obs_invrt + diagnostic_matrices_u["Hu_linear_regression"] = Hu_linear_regression, :teal + diagnostic_matrices_g["Hg_linear_regression"] = Hg_linear_regression, :teal + # [2a] One-point approximation at mean value with SL grad @info "Construct with mean value prior (1 sample), SL grad" g = get_g(ekp, 1) From 97264859b808ad584c2e7f83e61f4417bf49dbda Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Thu, 3 Jul 2025 10:35:35 -0700 Subject: [PATCH 33/35] =?UTF-8?q?Rewrite=20step=202;=20add=20diagnostics?= =?UTF-8?q?=20at=20different=20=CE=B1s;=20add=20localsl=20and=20egi?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/DimensionReduction/settings.jl | 20 +- .../step1_generate_inverse_problem_data.jl | 78 ++-- ...2_build_and_compare_diagnostic_matrices.jl | 367 +++++++----------- .../step3_estimate_posteriors.jl | 64 +-- 4 files changed, 255 insertions(+), 274 deletions(-) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index 68c54b849..f313175a0 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -8,26 +8,34 @@ output_dim = 50 ## -- Configure parameters of the experiment itself -- rng_seed = 41 num_trials = 1 +αs = [0.0, 1.0] +grad_types = (:perfect, :mean, :linreg, :localsl) # Out of :perfect, :mean, :linreg, and :localsl +Vgrad_types = (:egi,) # Out of :egi # Specific to step 1 step1_eki_ensemble_size = 800 -step1_eki_max_iters = 20 -step1_mcmc_temperature = 1.0 # 1.0 is the "true" posterior; higher oversamples the tails step1_mcmc_sampler = :rw # :rw or :mala step1_mcmc_samples_per_chain = 50_000 step1_mcmc_num_chains = 8 step1_mcmc_subsample_rate = 1000 # Specific to step 2 -step2_num_prior_samples = 5_000 # paper uses 5e5 -step2_manopt_num_dims = 16 +step2_manopt_num_dims = 1 +step2_Vgrad_num_samples = 8 +step2_egi_ξ = 0.0 +step2_egi_γ = 1.5 # Specific to step 3 step3_diagnostics_to_use = - [("Hu", 50, diag, num) for diag in ("Hg", "Hgy_ekp_final", "Hgy_mcmc_final") for num in (4, 8, 16)] + [ + (input_diag, i, "Hg_0.0_ekp_perfect", 32) for input_diag in ( + "Hu_0.0_ekp_egi", + "Hu_1.0_ekp_egi", + ) for i in 4:2:16 + ] step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model -step3_num_marginalization_samples = 8 +step3_num_marginalization_samples = 1 step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 655edee77..140686e3c 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -27,13 +27,32 @@ else throw("Unknown problem=$problem") end +mutable struct CheckpointScheduler <: EnsembleKalmanProcesses.LearningRateScheduler + αs::Vector{Float64} + scheduler + + current_index + + CheckpointScheduler(αs, scheduler) = new(αs, scheduler, 2) +end + +function EnsembleKalmanProcesses.calculate_timestep!(ekp, g, Δt_new, scheduler::CheckpointScheduler) + EnsembleKalmanProcesses.calculate_timestep!(ekp, g, Δt_new, scheduler.scheduler) + if scheduler.current_index <= length(scheduler.αs) && get_algorithm_time(ekp)[end] > scheduler.αs[scheduler.current_index] + get_algorithm_time(ekp)[end] = scheduler.αs[scheduler.current_index] + scheduler.current_index += 1 + end + + nothing +end + + for trial in 1:num_trials prior, y, obs_noise_cov, model, true_parameter = problem_fun(input_dim, output_dim, rng) # [1] EKP run n_ensemble = step1_eki_ensemble_size - n_iters_max = step1_eki_max_iters initial_ensemble = construct_initial_ensemble(rng, prior, n_ensemble) ekp = EnsembleKalmanProcess( @@ -42,38 +61,52 @@ for trial in 1:num_trials obs_noise_cov, TransformInversion(); rng, - scheduler = EKSStableScheduler(2.0, 0.01), + scheduler = CheckpointScheduler(αs, EKSStableScheduler(2.0, 0.01)), ) - n_iters = n_iters_max - for i in 1:n_iters_max + n_iters = 0 + while vcat([0.0], get_algorithm_time(ekp))[end] < maximum(αs) + n_iters += 1 G_ens = hcat([forward_map(param, model) for param in eachcol(get_ϕ_final(prior, ekp))]...) terminate = update_ensemble!(ekp, G_ens) if !isnothing(terminate) - n_iters = i - 1 - break + throw("EKI terminated prematurely: $(terminate)! Shouldn't happen...") end end @info "EKP iterations: $n_iters" @info "Loss over iterations: $(get_error(ekp))" + ekp_samples = Dict() + for α in αs + closest_iter = argmin(0:n_iters) do i + abs(α - (i == 0 ? 0.0 : get_algorithm_time(ekp)[i])) + end + 1 + ekp_samples[α] = (get_u(ekp, closest_iter), closest_iter == n_iters + 1 ? hcat([forward_map(u, model) for u in eachcol(get_u(ekp, closest_iter))]...) : get_g(ekp, closest_iter)) + end + # [2] MCMC run - prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) - mcmc_samples = zeros(input_dim, 0) - do_mcmc( - input_dim, - x -> begin - g = forward_map(x, model) - (-2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g)) / step1_mcmc_temperature - end, - step1_mcmc_num_chains, - step1_mcmc_samples_per_chain, - step1_mcmc_sampler, - prior_cov, - true_parameter; - subsample_rate = step1_mcmc_subsample_rate, - ) do samp, _ - mcmc_samples = hcat(mcmc_samples, samp) + mcmc_samples = Dict() + for α in αs + @info "Running MCMC for α = $α" + + prior_cov, prior_inv, obs_inv = cov(prior), inv(cov(prior)), inv(obs_noise_cov) + mcmc_samples[α] = (zeros(input_dim, 0), zeros(output_dim, 0)) + do_mcmc( + input_dim, + x -> begin + g = α == 0 ? 0y : forward_map(x, model) + -2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g) * α + end, + step1_mcmc_num_chains, + step1_mcmc_samples_per_chain, + step1_mcmc_sampler, + prior_cov, + true_parameter; + subsample_rate = step1_mcmc_subsample_rate, + ) do samp, _ + gsamp = hcat([forward_map(s, model) for s in eachcol(samp)]...) # TODO: This is wasteful, as they have actually already been computed + mcmc_samples[α] = (hcat(mcmc_samples[α][1], samp), hcat(mcmc_samples[α][2], gsamp)) + end end @info "MCMC finished" @@ -82,6 +115,7 @@ for trial in 1:num_trials save( "datafiles/ekp_$(problem)_$(trial).jld2", "ekp", ekp, + "ekp_samples", ekp_samples, "mcmc_samples", mcmc_samples, "prior", prior, "y", y, diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index 77d533871..a9b956a96 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -39,7 +39,8 @@ for trial in 1:num_trials # Load the EKP iterations loaded = load("datafiles/ekp_$(problem)_$(trial).jld2") ekp = loaded["ekp"] - mcmc_samples = loaded["mcmc_samples"] + ekp_samp = loaded["ekp_samples"] + mcmc_samp = loaded["mcmc_samples"] prior = loaded["prior"] obs_noise_cov = loaded["obs_noise_cov"] y = loaded["y"] @@ -51,241 +52,165 @@ for trial in 1:num_trials obs_invrt = sqrt(inv(obs_noise_cov)) obs_inv = inv(obs_noise_cov) - # random samples - prior_samples = sample(prior, step2_num_prior_samples) + ekp_samp_grad = Dict() + mcmc_samp_grad = Dict() + ekp_samp_Vgrad = Dict() + mcmc_samp_Vgrad = Dict() + for (dict_samp, dict_samp_grad, dict_samp_Vgrad) in ( + (ekp_samp, ekp_samp_grad, ekp_samp_Vgrad), + (mcmc_samp, mcmc_samp_grad, mcmc_samp_Vgrad), + ) + for (α, (samps, gsamps)) in dict_samp + dict_samp_grad[α] = [] + for grad_type in grad_types + @info "Computing gradients: α=$α, grad_type=$grad_type" + grads = if grad_type == :perfect + jac_forward_map(samps, model) + elseif grad_type == :mean + grad = jac_forward_map(reshape(mean(samps; dims = 2), :, 1), model)[1] + fill(grad, size(samps, 2)) + elseif grad_type == :linreg + grad = (gsamps .- mean(gsamps; dims = 2)) / (samps .- mean(samps; dims = 2)) + fill(grad, size(samps, 2)) + elseif grad_type == :localsl + map(zip(eachcol(samps), eachcol(gsamps))) do (u, g) + weights = exp.(-1/2 * norm.(eachcol(u .- samps)).^2) # TODO: Matrix weighting + D = Diagonal(sqrt.(weights)) + uw = (samps .- mean(samps * Diagonal(weights); dims = 2)) * D + gw = (gsamps .- mean(gsamps * Diagonal(weights); dims = 2)) * D + gw / uw + end + else + throw("Unknown grad_type=$grad_type") + end + + push!(dict_samp_grad[α], (grad_type, grads)) + end - @info "Construct PCA matrices" - pca_u = prior_samples' - pca_g = forward_map(prior_samples, model)' + dict_samp_Vgrad[α] = [] + for Vgrad_type in Vgrad_types + @info "Computing Vgradients: α=$α, Vgrad_type=$Vgrad_type" + grads = if Vgrad_type == :egi + ∇Vs = map(enumerate(zip(eachcol(samps), eachcol(gsamps)))) do (i, (u, g)) + @info "In full EGI procedure; particle $i/$(size(samps, 2))" - diagnostic_matrices_u["pca_u"] = pca_u, :gray - diagnostic_matrices_g["pca_g"] = pca_g, :gray + yys = eachcol(rand(MvNormal((1-α)g + α*y, (1-α)obs_noise_cov), α == 1.0 ? 1 : step2_Vgrad_num_samples)) # If α == 1.0, all samples will be the same anyway + map(yys) do yy + Vs = [1/2 * (yy - g)' * obs_inv * (yy - g) for g in eachcol(gsamps)] - # [1a] Large-sample diagnostic matrices with perfect grad(Baptista et al 2022) - @info "Construct good matrix ($(step2_num_prior_samples) samples of prior, perfect grad)" - gradG_samples = jac_forward_map(prior_samples, model) - Hu = zeros(input_dim, input_dim) - Hg = zeros(output_dim, output_dim) + X = samps[:, [1:i-1; i+1:end]] .- u + Z = X ./ norm.(eachcol(X))' + A = hcat(X'Z, (X'Z).^2 / 2) - for j in 1:step2_num_prior_samples - Hu .+= step2_num_prior_samples \ prior_rt * gradG_samples[j]' * obs_inv * gradG_samples[j] * prior_rt - Hg .+= step2_num_prior_samples \ obs_invrt * gradG_samples[j] * prior_cov * gradG_samples[j]' * obs_invrt - end + ξ, γ = step2_egi_ξ, step2_egi_γ + Γ = γ * (factorial(3) \ Diagonal(norm.(eachcol(X)).^3) + ξ * I) # The paper has γ², but that's wrong - diagnostic_matrices_u["Hu"] = Hu, :black - diagnostic_matrices_g["Hg"] = Hg, :black - - # [1b] One-point approximation at mean value, with perfect grad - @info "Construct with mean value (1 sample), perfect grad" - prior_mean_appr = mean(prior) # approximate mean - gradG_at_mean = jac_forward_map(reshape(prior_mean_appr, input_dim, 1), model)[1] - # NB the logpdf of the prior at the ~mean is 1805 so pdf here is ~Inf - Hu_mean = prior_rt * gradG_at_mean' * obs_inv * gradG_at_mean * prior_rt - Hg_mean = obs_invrt * gradG_at_mean * prior_cov * gradG_at_mean' * obs_invrt - - diagnostic_matrices_u["Hu_mean"] = Hu_mean, :blue - diagnostic_matrices_g["Hg_mean"] = Hg_mean, :blue - - @info "Construct with prior (1 sample), linear-regression grad" - u = get_u(ekp, 1) - g = get_g(ekp, 1) - grad_linear_regression = (g / vcat(u, ones(1, size(u, 2))))[:, 1:end-1] - Hu_linear_regression = prior_rt * grad_linear_regression' * obs_inv * grad_linear_regression * prior_rt - Hg_linear_regression = obs_invrt * grad_linear_regression * prior_cov * grad_linear_regression' * obs_invrt - diagnostic_matrices_u["Hu_linear_regression"] = Hu_linear_regression, :teal - diagnostic_matrices_g["Hg_linear_regression"] = Hg_linear_regression, :teal - - # [2a] One-point approximation at mean value with SL grad - @info "Construct with mean value prior (1 sample), SL grad" - g = get_g(ekp, 1) - u = get_u(ekp, 1) - N_ens = get_N_ens(ekp) - C_at_prior = cov([u; g], dims = 2) # basic cross-cov - Cuu = C_at_prior[1:input_dim, 1:input_dim] - svdCuu = svd(Cuu) - nz = min(N_ens - 1, input_dim) # nonzero sv's - pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance - Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_prior[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? - # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. - # Hu_ekp_prior = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt - # Hg_ekp_prior = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt - Hu_ekp_prior = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt - Hg_ekp_prior = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt - - diagnostic_matrices_u["Hu_ekp_prior"] = Hu_ekp_prior, :red - diagnostic_matrices_g["Hg_ekp_prior"] = Hg_ekp_prior, :red - - println("Relative gradient error: ", norm(gradG_at_mean - (Cug * pinvCuu)) / norm(gradG_at_mean)) - - # [2b] One-point approximation at mean value with SL grad - @info "Construct with mean value EKP final (1 sample), SL grad" - final_it = length(get_g(ekp)) - g = get_g(ekp, final_it) - u = get_u(ekp, final_it) - C_at_final = cov([u; g], dims = 2) # basic cross-cov - Cuu = C_at_final[1:input_dim, 1:input_dim] - svdCuu = svd(Cuu) - nz = min(N_ens - 1, input_dim) # nonzero sv's - pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance - Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_final[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? - # SL_gradG = (pinvCuu * Cug')' # approximates ∇G with ensemble. - # Hu_ekp_final = prior_rt * SL_gradG' * obs_inv * SL_gradG * prior_rt # here still using prior roots not Cuu - # Hg_ekp_final = obs_invrt * SL_gradG * prior_cov * SL_gradG' * obs_invrt - Hu_ekp_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt - Hg_ekp_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt - - diagnostic_matrices_u["Hu_ekp_final"] = Hu_ekp_final, :gold - diagnostic_matrices_g["Hg_ekp_final"] = Hg_ekp_final, :gold - - @info "Construct y-informed at EKP final (SL grad)" - myCug = Cug' - Huy_ekp_final = - N_ens \ prior_rt * - pinvCuu * - myCug * - obs_inv * - sum((y - gg) * (y - gg)' for gg in eachcol(g)) * - obs_inv * - myCug' * - pinvCuu * - prior_rt - - dim_g = size(g, 1) - vecs = zeros(dim_g, 0) - num_vecs = step2_manopt_num_dims - @assert num_vecs ≤ dim_g - for k in 1:num_vecs - println("vector $k") - counter = 0 - - vecs_compl = qr(vecs).Q[:, k:end] - M = Grassmann(dim_g + 1 - k, 1) - - f = - (_, v) -> begin - Vs = hcat(vecs, vecs_compl * vec(v)) - Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' - res = - N_ens \ sum( # TODO: Check if whitening is correct - norm((y - gg)' * obs_invrt * (I - Vs * Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) - ) + Y = Vs[[1:i-1; i+1:end]] .- Vs[i] + + ū = pinv(Γ \ A) * (Γ \ Y) + Z * ū[1:end÷2] + end + end - counter += 1 - mod(counter, 100) == 1 && println(" iter $counter: $res") + mean( + mean( + ∇V * ∇V' + for ∇V in ∇Vs_at_x + ) for ∇Vs_at_x in ∇Vs + ) + else + throw("Unknown Vgrad_type=$Vgrad_type") + end - res + push!(dict_samp_Vgrad[α], (Vgrad_type, grads)) end + end + end - # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) - # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] - # ^ This should be a good initial guess, but it seems like a local minimum that the optimizer can't get out of - v00 = ones(dim_g + 1 - k, 1) - v00 ./= norm(v00) - v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] - v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1, 0.1)) + # random samples + @assert 0.0 in αs + prior_samp, prior_gsamp = ekp_samp[0.0] + num_prior_samps = size(prior_samp, 2) - vecs = hcat(vecs, vecs_compl * bestvec) - end - vecs = hcat(vecs, randn(dim_g, dim_g - num_vecs)) - Hgy_ekp_final = vecs * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * vecs' - - diagnostic_matrices_u["Huy_ekp_final"] = Huy_ekp_final, :purple - diagnostic_matrices_g["Hgy_ekp_final"] = Hgy_ekp_final, :purple - - - @info "Construct with mean value MCMC final (1 sample), SL grad" - u = mcmc_samples - g = hcat([forward_map(uu, model) for uu in eachcol(u)]...) - N_ens = size(u, 2) - C_at_final = cov([u; g], dims = 2) # basic cross-cov - Cuu = C_at_final[1:input_dim, 1:input_dim] - svdCuu = svd(Cuu) - nz = min(N_ens - 1, input_dim) # nonzero sv's - pinvCuu = svdCuu.U[:, 1:nz] * Diagonal(1 ./ svdCuu.S[1:nz]) * svdCuu.Vt[1:nz, :] # can replace with localized covariance - Cuu_invrt = svdCuu.U * Diagonal(1 ./ sqrt.(svdCuu.S)) * svdCuu.Vt - Cug = C_at_final[(input_dim + 1):end, 1:input_dim] # TODO: Isn't this Cgu? - Hu_mcmc_final = Cuu_invrt * Cug' * obs_inv * Cug * Cuu_invrt - Hg_mcmc_final = obs_invrt * Cug * pinvCuu * Cug' * obs_invrt - - diagnostic_matrices_u["Hu_mcmc_final"] = Hu_mcmc_final, :green - diagnostic_matrices_g["Hg_mcmc_final"] = Hg_mcmc_final, :green - - @info "Construct y-informed at MCMC final (perfect grad)" - gradG_samples = jac_forward_map(u, model) - Huy = zeros(input_dim, input_dim) - - for j in 1:N_ens - Huy .+= - N_ens \ prior_rt * - gradG_samples[j]' * - obs_inv^2 * - (y - g[:, j]) * - (y - g[:, j])' * - obs_inv^2 * - gradG_samples[j] * - prior_rt - end + @info "Construct PCA matrices" + pca_u = prior_samp' + pca_g = prior_gsamp' - diagnostic_matrices_u["Huy"] = Huy, :pink - - @info "Construct y-informed at MCMC final (SL grad)" - myCug = Cug' - Huy_mcmc_final = - N_ens \ prior_rt * - pinvCuu * - myCug * - obs_inv * - sum((y - gg) * (y - gg)' for gg in eachcol(g)) * - obs_inv * - myCug' * - pinvCuu * - prior_rt - - dim_g = size(g, 1) - vecs = zeros(dim_g, 0) - num_vecs = step2_manopt_num_dims - @assert num_vecs ≤ dim_g - for k in 1:num_vecs - println("vector $k") - counter = 0 - - vecs_compl = qr(vecs).Q[:, k:end] - M = Grassmann(dim_g + 1 - k, 1) - - f = - (_, v) -> begin - Vs = hcat(vecs, vecs_compl * vec(v)) - Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' - res = - N_ens \ sum( # TODO: Check if whitening is correct - norm((y - gg)' * obs_invrt * (I - Vs * Vs') * myCug' * Cuu_invrt)^2 for gg in eachcol(g) - ) + diagnostic_matrices_u["pca_u"] = pca_u, :gray + diagnostic_matrices_g["pca_g"] = pca_g, :gray - counter += 1 - mod(counter, 100) == 1 && println(" iter $counter: $res") + for α in αs + for (sampler, dict_samp, dict_samp_grad, dict_samp_Vgrad) in ( + ("ekp", ekp_samp, ekp_samp_grad, ekp_samp_Vgrad), + ("mcmc", mcmc_samp, mcmc_samp_grad, mcmc_samp_Vgrad), + ) + samp, gsamp = dict_samp[α] + for (Vgrad_type, grads) in dict_samp_Vgrad[α] + name_suffix = "$(α)_$(sampler)_$(Vgrad_type)" + @info "Construct $name_suffix matrices" - res - end + Hu = prior_rt * mean(grads) * prior_rt + # TODO: Hg - # svd_Hg_ekp_final = svd(Hg_ekp_final; alg=LinearAlgebra.QRIteration()) - # v00 = (vecs_compl' * svd_Hg_ekp_final.V * Diagonal(svd_Hg_ekp_final.S))[:, 1:1] - # ^ This should be a good initial guess, but it seems like a local minimum that the optimizer can't get out of - v00 = ones(dim_g + 1 - k, 1) - v00 ./= norm(v00) - v0 = [v00 + randn(dim_g + 1 - k, 1) / 2 for _ in 1:dim_g] - v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1, 0.1)) + diagnostic_matrices_u["Hu_$name_suffix"] = Hu, :black + end - vecs = hcat(vecs, vecs_compl * bestvec) + for (grad_type, grads) in dict_samp_grad[α] + name_suffix = "$(α)_$(sampler)_$(grad_type)" + @info "Construct $name_suffix matrices" + + Hu = prior_rt * mean( + grad' * obs_inv * ( + (1-α)obs_noise_cov + α^2 * (y - g) * (y - g)' + ) * obs_inv * grad + for (g, grad) in zip(eachcol(gsamp), grads) + ) * prior_rt + + Hg = if α == 0 + obs_invrt * mean(grad * prior_cov * grad' for grad in grads) * obs_invrt + else + vecs = zeros(output_dim, 0) + num_vecs = step2_manopt_num_dims + @assert num_vecs ≤ output_dim + for k in 1:num_vecs + println("vector $k") + counter = 0 + + vecs_compl = qr(vecs).Q[:, k:end] + M = Grassmann(output_dim + 1 - k, 1) + + f = + (_, v) -> begin + Vs = hcat(vecs, vecs_compl * vec(v)) + Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' + res = + mean( # TODO: This isn't yet the right form for α≠0! + norm((y - g)' * obs_invrt * (I - Vs * Vs') * grad)^2 for (g, grad) in zip(eachcol(gsamp), grads) + ) + + counter += 1 + mod(counter, 100) == 1 && println(" iter $counter: $res") + + res + end + + v00 = ones(output_dim + 1 - k, 1) + v00 ./= norm(v00) + v0 = [v00 + randn(output_dim + 1 - k, 1) / 2 for _ in 1:output_dim] + v0 = [v0i / norm(v0i) for v0i in v0] + bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1*100000, 0.1*100000)) + + vecs = hcat(vecs, vecs_compl * bestvec) + end + vecs = hcat(vecs, randn(output_dim, output_dim - num_vecs)) + vecs * diagm(vcat(num_vecs:-1:1, zeros(output_dim - num_vecs))) * vecs' + end + + diagnostic_matrices_u["Hu_$name_suffix"] = Hu, :black + diagnostic_matrices_g["Hg_$name_suffix"] = Hg, :black + end + end end - vecs = hcat(vecs, randn(dim_g, dim_g - num_vecs)) - Hgy_mcmc_final = vecs * diagm(vcat(num_vecs:-1:1, zeros(dim_g - num_vecs))) * vecs' - - diagnostic_matrices_u["Huy_mcmc_final"] = Huy_mcmc_final, :orange - diagnostic_matrices_g["Hgy_mcmc_final"] = Hgy_mcmc_final, :orange for (name, (value, color)) in diagnostic_matrices_u if !haskey(all_diagnostic_matrices_u, name) @@ -318,7 +243,7 @@ trunc = min(trunc, input_dim, output_dim) alg = LinearAlgebra.QRIteration() plots = map([:in, :out]) do in_or_out diagnostics = in_or_out == :in ? all_diagnostic_matrices_u : all_diagnostic_matrices_g - ref = in_or_out == :in ? "Hu" : "Hg" + ref = in_or_out == :in ? "Hu_0.0_ekp_perfect" : "Hg_0.0_ekp_perfect" p = plot(; title = "Similarity of spectrum of $(in_or_out)put diagnostic", xlabel = "SV index") for (name, (mats, color)) in diagnostics diff --git a/examples/DimensionReduction/step3_estimate_posteriors.jl b/examples/DimensionReduction/step3_estimate_posteriors.jl index aa0049fb2..b4bc128b0 100644 --- a/examples/DimensionReduction/step3_estimate_posteriors.jl +++ b/examples/DimensionReduction/step3_estimate_posteriors.jl @@ -19,6 +19,8 @@ if !isfile("datafiles/diagnostic_matrices_$(problem)_1.jld2") include("step2_build_and_compare_diagnostic_matrices.jl") end +means_full = Dict() + for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use @info "Diagnostic matrices = ($in_diag [1-$in_r], $out_diag [1-$out_r])" @@ -83,20 +85,26 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use covsamps = rand(MvNormal(zeros(input_dim), Mcov), step3_num_marginalization_samples) if step3_posterior_sampler == :mcmc - mean_full = zeros(input_dim) - do_mcmc( - input_dim, - x -> begin - g = forward_map(x, model) - -2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g) - end, - step3_mcmc_num_chains, - step3_mcmc_samples_per_chain, - step3_mcmc_sampler, - prior_cov, - true_parameter, - ) do samp, num_batches - mean_full += mean(samp; dims = 2) / num_batches + mean_full = if trial in keys(means_full) + means_full[trial] + else + mean_full = zeros(input_dim) + do_mcmc( + input_dim, + x -> begin + g = forward_map(x, model) + -2 \ x' * prior_inv * x - 2 \ (y - g)' * obs_inv * (y - g) + end, + step3_mcmc_num_chains, + step3_mcmc_samples_per_chain, + step3_mcmc_sampler, + prior_cov, + true_parameter, + ) do samp, num_batches + mean_full += mean(samp; dims = 2) / num_batches + end + means_full[trial] = mean_full + mean_full end mean_full_red = P * mean_full @@ -159,17 +167,23 @@ for (in_diag, in_r, out_diag, out_r) in step3_diagnostics_to_use "EKS sampling from the reduced posterior is only supported when marginalizing over the forward model.", ) - u, _ = do_eks( - input_dim, - x -> forward_map(x, model), - y, - obs_noise_cov, - prior, - rng, - step3_eks_ensemble_size, - step3_eks_max_iters, - ) - mean_full = mean(u; dims = 2) + mean_full = if trial in keys(means_full) + means_full[trial] + else + u, _ = do_eks( + input_dim, + x -> forward_map(x, model), + y, + obs_noise_cov, + prior, + rng, + step3_eks_ensemble_size, + step3_eks_max_iters, + ) + mean_full = mean(u; dims = 2) + means_full[trial] = mean_full + mean_full + end mean_full_red = P * mean_full if step3_run_reduced_in_full_space From 9c6fa4fc0e243fddf12702f7ecc88133174693af Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Tue, 8 Jul 2025 19:07:38 -0700 Subject: [PATCH 34/35] Center Lorenz prior --- .../problems/problem_linlinexp.jl | 4 +-- .../problems/problem_lorenz.jl | 6 ++-- examples/DimensionReduction/settings.jl | 33 ++++++++++--------- .../step1_generate_inverse_problem_data.jl | 2 ++ 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/examples/DimensionReduction/problems/problem_linlinexp.jl b/examples/DimensionReduction/problems/problem_linlinexp.jl index 580bf6d67..4aaf366b5 100644 --- a/examples/DimensionReduction/problems/problem_linlinexp.jl +++ b/examples/DimensionReduction/problems/problem_linlinexp.jl @@ -44,11 +44,11 @@ struct LinLinExp{AM <: AbstractMatrix} <: ForwardMapType end function forward_map(X::AVorM, model::LinLinExp) where {AVorM <: AbstractVecOrMat} - return model.G * (X .* exp.(0.01X)) + return model.G * (X .* exp.(0.05X)) end function jac_forward_map(X::AbstractVector, model::LinLinExp) - return model.G * Diagonal(exp.(0.01X) .* (1 .+ 0.01X)) + return model.G * Diagonal(exp.(0.05X) .* (1 .+ 0.05X)) end function jac_forward_map(X::AbstractMatrix, model::LinLinExp) diff --git a/examples/DimensionReduction/problems/problem_lorenz.jl b/examples/DimensionReduction/problems/problem_lorenz.jl index 36bf21923..0a5972585 100644 --- a/examples/DimensionReduction/problems/problem_lorenz.jl +++ b/examples/DimensionReduction/problems/problem_lorenz.jl @@ -160,7 +160,7 @@ end function forward_map(X::AbstractVector, model::Lorenz; noise = nothing) noise = isnothing(noise) ? model.ic_cov_sqrt * randn(model.rng, model.nx) : noise lorenz_forward( - EnsembleMemberConfig(X), + EnsembleMemberConfig(X .+ 8.0), (model.x0 .+ noise), model.config_settings, model.observation_config, @@ -262,11 +262,11 @@ function lorenz(input_dim, output_dim, rng) B_sqrt = sqrt(B) #Prior mean - mu = 8.0 * ones(nx) + mu = zeros(nx) #Creating prior distribution distribution = Parameterized(MvNormal(mu, B)) - constraint = repeat([no_constraint()], 40) + constraint = repeat([no_constraint()], nx) name = "ml96_prior" prior = ParameterDistribution(distribution, constraint, name) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index f313175a0..d11124456 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -1,38 +1,39 @@ # CONFIGURE THE THREE STEPS ## -- Configure the inverse problem -- -problem = "linlinexp" # "lorenz" or "linear" or "linear_exp" or "linlinexp" -input_dim = 200 -output_dim = 50 +problem = "lorenz" # "lorenz" or "linear" or "linear_exp" or "linlinexp" +input_dim = 40 +output_dim = 80 ## -- Configure parameters of the experiment itself -- rng_seed = 41 num_trials = 1 -αs = [0.0, 1.0] +αs = 0.0:0.25:1.0 grad_types = (:perfect, :mean, :linreg, :localsl) # Out of :perfect, :mean, :linreg, and :localsl -Vgrad_types = (:egi,) # Out of :egi +Vgrad_types = () # Out of :egi # Specific to step 1 -step1_eki_ensemble_size = 800 +step1_eki_ensemble_size = 200 step1_mcmc_sampler = :rw # :rw or :mala -step1_mcmc_samples_per_chain = 50_000 +step1_mcmc_samples_per_chain = 5_000 step1_mcmc_num_chains = 8 -step1_mcmc_subsample_rate = 1000 +step1_mcmc_subsample_rate = 100 # Specific to step 2 -step2_manopt_num_dims = 1 +step2_manopt_num_dims = 0 step2_Vgrad_num_samples = 8 step2_egi_ξ = 0.0 step2_egi_γ = 1.5 # Specific to step 3 step3_diagnostics_to_use = - [ - (input_diag, i, "Hg_0.0_ekp_perfect", 32) for input_diag in ( - "Hu_0.0_ekp_egi", - "Hu_1.0_ekp_egi", - ) for i in 4:2:16 - ] + vcat([ + ("Hu_1.0_mcmc_$grad_type", i, "Hg_1.0_ekp_perfect", 80) for grad_type in grad_types for i in 4:2:16 + ], [ + ("Hu_$(α)_mcmc_perfect", i, "Hg_1.0_ekp_perfect", 80) for α in αs[1:end-1] for i in 4:2:16 + ], [ + ("pca_u", i, "Hg_1.0_ekp_perfect", 80) for i in 4:2:16 + ]) step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model step3_num_marginalization_samples = 1 @@ -40,5 +41,5 @@ step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` -step3_mcmc_samples_per_chain = 20_000 # only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_samples_per_chain = 2_000 # only used if `step3_posterior_sampler == :mcmc` step3_mcmc_num_chains = 24 # only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl index 140686e3c..39d8ad5af 100644 --- a/examples/DimensionReduction/step1_generate_inverse_problem_data.jl +++ b/examples/DimensionReduction/step1_generate_inverse_problem_data.jl @@ -49,6 +49,8 @@ end for trial in 1:num_trials + @info "Trial $trial" + prior, y, obs_noise_cov, model, true_parameter = problem_fun(input_dim, output_dim, rng) # [1] EKP run From fa9c51538077b3edd7de62fc4ab3e91ad8c35f85 Mon Sep 17 00:00:00 2001 From: Arne Bouillon Date: Wed, 13 Aug 2025 16:22:14 +0200 Subject: [PATCH 35/35] Use gradients for manifold optimization --- examples/DimensionReduction/settings.jl | 31 +++++----- ...2_build_and_compare_diagnostic_matrices.jl | 62 +++++++++---------- 2 files changed, 46 insertions(+), 47 deletions(-) diff --git a/examples/DimensionReduction/settings.jl b/examples/DimensionReduction/settings.jl index d11124456..cc6704e8e 100644 --- a/examples/DimensionReduction/settings.jl +++ b/examples/DimensionReduction/settings.jl @@ -1,39 +1,38 @@ # CONFIGURE THE THREE STEPS ## -- Configure the inverse problem -- -problem = "lorenz" # "lorenz" or "linear" or "linear_exp" or "linlinexp" -input_dim = 40 -output_dim = 80 +problem = "linlinexp" # "lorenz" or "linear" or "linear_exp" or "linlinexp" +input_dim = 200 +output_dim = 50 ## -- Configure parameters of the experiment itself -- rng_seed = 41 num_trials = 1 αs = 0.0:0.25:1.0 -grad_types = (:perfect, :mean, :linreg, :localsl) # Out of :perfect, :mean, :linreg, and :localsl +grad_types = (:perfect,) # Out of :perfect, :mean, :linreg, and :localsl Vgrad_types = () # Out of :egi # Specific to step 1 step1_eki_ensemble_size = 200 step1_mcmc_sampler = :rw # :rw or :mala -step1_mcmc_samples_per_chain = 5_000 +step1_mcmc_samples_per_chain = 50_000 step1_mcmc_num_chains = 8 -step1_mcmc_subsample_rate = 100 +step1_mcmc_subsample_rate = 1000 # Specific to step 2 -step2_manopt_num_dims = 0 +step2_manopt_num_dims = 8 step2_Vgrad_num_samples = 8 step2_egi_ξ = 0.0 step2_egi_γ = 1.5 # Specific to step 3 -step3_diagnostics_to_use = - vcat([ - ("Hu_1.0_mcmc_$grad_type", i, "Hg_1.0_ekp_perfect", 80) for grad_type in grad_types for i in 4:2:16 - ], [ - ("Hu_$(α)_mcmc_perfect", i, "Hg_1.0_ekp_perfect", 80) for α in αs[1:end-1] for i in 4:2:16 - ], [ - ("pca_u", i, "Hg_1.0_ekp_perfect", 80) for i in 4:2:16 - ]) +step3_diagnostics_to_use = [ + ("Hu_1.0_mcmc_perfect", input_dim, "Hg_0.0_mcmc_perfect", step2_manopt_num_dims), + ("Hu_1.0_mcmc_perfect", input_dim, "Hg_0.25_mcmc_perfect", step2_manopt_num_dims), + ("Hu_1.0_mcmc_perfect", input_dim, "Hg_0.5_mcmc_perfect", step2_manopt_num_dims), + ("Hu_1.0_mcmc_perfect", input_dim, "Hg_0.75_mcmc_perfect", step2_manopt_num_dims), + ("Hu_1.0_mcmc_perfect", input_dim, "Hg_1.0_mcmc_perfect", step2_manopt_num_dims), +] step3_run_reduced_in_full_space = false step3_marginalization = :forward_model # :loglikelihood or :forward_model step3_num_marginalization_samples = 1 @@ -41,5 +40,5 @@ step3_posterior_sampler = :mcmc # :eks or :mcmc step3_eks_ensemble_size = 800 # only used if `step3_posterior_sampler == :eks` step3_eks_max_iters = 200 # only used if `step3_posterior_sampler == :eks` step3_mcmc_sampler = :rw # :rw or :mala; only used if `step3_posterior_sampler == :mcmc` -step3_mcmc_samples_per_chain = 2_000 # only used if `step3_posterior_sampler == :mcmc` +step3_mcmc_samples_per_chain = 20_000 # only used if `step3_posterior_sampler == :mcmc` step3_mcmc_num_chains = 24 # only used if `step3_posterior_sampler == :mcmc` diff --git a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl index a9b956a96..1e5afda71 100644 --- a/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl +++ b/examples/DimensionReduction/step2_build_and_compare_diagnostic_matrices.jl @@ -169,41 +169,41 @@ for trial in 1:num_trials Hg = if α == 0 obs_invrt * mean(grad * prior_cov * grad' for grad in grads) * obs_invrt else - vecs = zeros(output_dim, 0) - num_vecs = step2_manopt_num_dims - @assert num_vecs ≤ output_dim - for k in 1:num_vecs - println("vector $k") - counter = 0 - - vecs_compl = qr(vecs).Q[:, k:end] - M = Grassmann(output_dim + 1 - k, 1) - - f = - (_, v) -> begin - Vs = hcat(vecs, vecs_compl * vec(v)) - Γtildeinv = obs_inv - Vs * inv(Vs' * obs_noise_cov * Vs) * Vs' - res = - mean( # TODO: This isn't yet the right form for α≠0! - norm((y - g)' * obs_invrt * (I - Vs * Vs') * grad)^2 for (g, grad) in zip(eachcol(gsamp), grads) - ) - - counter += 1 - mod(counter, 100) == 1 && println(" iter $counter: $res") - - res + Vs0 = qr(randn(output_dim, output_dim)).Q[:, 1:step2_manopt_num_dims] + + f = (_, Vs) -> begin + res = mean( + begin + mat = obs_invrt * grad * prior_rt - Vs*(Vs'*obs_invrt * grad * prior_rt) + a = obs_invrt * (y - g) + + (1-α)norm(mat) + α^2 * norm(a' * mat) end + for (g, grad) in zip(eachcol(gsamp), grads) + ) + println(res) + res + end - v00 = ones(output_dim + 1 - k, 1) - v00 ./= norm(v00) - v0 = [v00 + randn(output_dim + 1 - k, 1) / 2 for _ in 1:output_dim] - v0 = [v0i / norm(v0i) for v0i in v0] - bestvec = NelderMead(M, f, NelderMeadSimplex(v0); stopping_criterion = StopWhenPopulationConcentrated(0.1*100000, 0.1*100000)) + egrad = Vs -> begin + -2mean( + begin + a = obs_invrt * (y - g) + mat = obs_invrt * grad * prior_cov * grad' * obs_invrt * (I - Vs * Vs') * ((1-α)I + α^2 * a * a') - vecs = hcat(vecs, vecs_compl * bestvec) + mat + mat' + end + for (g, grad) in zip(eachcol(gsamp), grads) + ) * Vs + end + rgrad = (_, Vs) -> begin + egrd = egrad(Vs) + res = egrd - Vs * (Vs' * egrd) + res end - vecs = hcat(vecs, randn(output_dim, output_dim - num_vecs)) - vecs * diagm(vcat(num_vecs:-1:1, zeros(output_dim - num_vecs))) * vecs' + Vs = quasi_Newton(Grassmann(output_dim, step2_manopt_num_dims), f, rgrad, Vs0; stopping_criterion = StopWhenGradientNormLess(3.0)) + Vs = hcat(Vs, randn(output_dim, output_dim - step2_manopt_num_dims)) + Vs * diagm(vcat(step2_manopt_num_dims:-1:1, zeros(output_dim - step2_manopt_num_dims))) * Vs' end diagnostic_matrices_u["Hu_$name_suffix"] = Hu, :black