Skip to content

Commit 831cd69

Browse files
committed
update spread calc of joint KDE
1 parent 4984d40 commit 831cd69

File tree

4 files changed

+600
-15
lines changed

4 files changed

+600
-15
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
setwd("./Paper/MeasurementVariance/")
2+
library(ggplot2)
3+
4+
# Load the results
5+
# ========================================
6+
extract_best_runs <- function(res) {
7+
states <- res$states
8+
params <- res$params
9+
errors <- res$metrices
10+
errors <- Reduce(rbind, errors)
11+
states <- Reduce(rbind, states)
12+
params <- Reduce(rbind, params)
13+
params <- lapply(unique(errors$dataset), function(x) {
14+
params_subset <- params[params$dataset == x, ]
15+
errors_subset <- errors[errors$dataset == x, ]
16+
errors_subset <- errors_subset[order(errors_subset$MeanSquareError), ][1:50, ]
17+
res <- params_subset[params_subset$repetition %in% errors_subset$repetition, ]
18+
res <- res[, 1:4]
19+
res$error <- errors_subset$MeanSquareError
20+
return(res)
21+
})
22+
params <- Reduce(rbind, params)
23+
return(params)
24+
}
25+
26+
load_params <- function(path) {
27+
load(path)
28+
extract_best_runs(res[[1]])
29+
}
30+
p_ida <- load_params("ida_100.RData")
31+
32+
# Kernel density estimation
33+
# ========================================
34+
hdi_interval <- function(x, prob = 0.95) {
35+
x <- sort(x[is.finite(x)])
36+
n <- length(x)
37+
if (n < 2) return(c(lower = NA_real_, upper = NA_real_))
38+
m <- max(1L, floor(prob * n))
39+
widths <- x[(m+1):n] - x[1:(n-m)]
40+
j <- which.min(widths)
41+
c(lower = x[j], upper = x[j + m])
42+
}
43+
44+
kde_fit_and_draws <- function(X, n_samp = 2000) {
45+
H <- ks::Hpi(as.matrix(X)) # determines the bandwidth later used in kde
46+
fit <- ks::kde(as.matrix(X), H = H) # calculates: f_hat(X*) where f_hat is the joint kernel density
47+
# Result: The estimated joint kernel density (fit$estimate) has the same dimensions as X
48+
draws <- as.data.frame(ks::rkde(n = n_samp, fhat = fit)) # Derived quantities from kernel density estimates
49+
# draws N_samp time from the previously calculated joint kernel density
50+
# Result: data.frame with n_samp rows and the ncol(X)
51+
marginals <- lapply(seq_len(ncol(draws)), function(j) {
52+
d <- density(draws[[j]], n = 512) # calculates a smoothed marginal PDF (kernel density estimate)
53+
data.frame(x = d$x, y = d$y)
54+
})
55+
names(marginals) <- colnames(draws)
56+
list(fit = fit, draws = draws, marginals = marginals)
57+
}
58+
59+
print_progress_bootstrap <- function(c, ncol, b, boot_iter) {
60+
already_done <- (c - 1) / ncol
61+
from_current_done <- (b / boot_iter) / ncol
62+
print(paste0(100 * (already_done + from_current_done), "%"))
63+
}
64+
65+
bootstrap_mode_ci <- function(df, Xs, density_n, boot_iter, prob) {
66+
qs <- matrix(NA_real_, nrow = ncol(df), ncol = 2,
67+
dimnames = list(colnames(df), c("lower","upper")))
68+
n <- nrow(df)
69+
for (j in seq_len(ncol(df))) {
70+
d0 <- density(Xs[[j]], n = density_n)
71+
mb <- numeric(boot_iter)
72+
for (b in seq_len(boot_iter)) {
73+
bdf <- df[sample.int(n, n, replace = TRUE), , drop = FALSE]
74+
kb <- kde_fit_and_draws(bdf, n_samp = nrow(Xs))
75+
db <- density(kb$draws[[j]], n = density_n)
76+
mb[b] <- db$x[ which.max(db$y) ]
77+
print_progress_bootstrap(j, ncol(df), b, boot_iter)
78+
}
79+
qs[j, ] <- stats::quantile(mb, c((1-prob)/2, 1-(1-prob)/2), names = FALSE)
80+
}
81+
qs
82+
}
83+
84+
joint_jde_mode_and_intervals <- function(df, prob = 0.95, n_samp = 20000,
85+
spread_estimation = "mode_boot",
86+
boot_iter = 1000, density_n = 512) {
87+
spread_estimators <- c("hdi", "quantile", "mode_boot")
88+
stopifnot("Unknown spread estimator" = any(spread_estimation == spread_estimators))
89+
90+
ki <- kde_fit_and_draws(df, n_samp = n_samp)
91+
fit <- ki$fit
92+
Xs <- ki$draws
93+
grid <- expand.grid(fit$eval.points)
94+
dens <- as.vector(fit$estimate)
95+
mode <- as.numeric(grid[which.max(dens), ])
96+
names(mode) <- colnames(df)
97+
if (spread_estimation == "quantile") {
98+
alpha <- (1 - prob) / 2
99+
qs <- t(apply(Xs, 2, quantile, probs = c(alpha, 1 - alpha), na.rm = TRUE))
100+
colnames(qs) <- c("lower", "upper")
101+
} else if (spread_estimation == "hdi") {
102+
qs <- t(apply(Xs, 2, function(col) hdi_interval(col, prob)))
103+
colnames(qs) <- c("lower", "upper")
104+
} else {
105+
qs <- bootstrap_mode_ci(df, Xs, density_n, boot_iter, prob)
106+
}
107+
list(
108+
mode = mode,
109+
lower_ci = qs[, "lower"],
110+
upper_ci = qs[, "upper"],
111+
df = ki$marginals
112+
)
113+
}
114+
115+
res_bootstrapped <- joint_jde_mode_and_intervals(p_ida[, 1:4], boot_iter = 100)
116+
117+
res <- res_bootstraped
118+
df_lists <- res$df
119+
idx <- 1L
120+
df <- df_lists[[idx]]
121+
bins <- hist(df[, 2L], plot = FALSE)$breaks
122+
location_error <- data.frame(
123+
y = 2.5e-9, x = res$mode[[1L]],
124+
xmin = res$lower_ci[[1L]], xmax = res$upper_ci[[1L]]
125+
)
126+
127+
p <- ggplot() +
128+
geom_point(data = df, aes(x = x, y = y)) +
129+
geom_errorbarh(
130+
data = location_error,
131+
aes(xmin = xmin, xmax = xmax, y = y),
132+
height = 1e-9, size = 0.8
133+
) +
134+
labs(y = "Joint kernel density", x = "Ka(HG) [1/M]")
135+
p

0 commit comments

Comments
 (0)