|
| 1 | +setwd("./Paper/MeasurementVariance/") |
| 2 | +library(ggplot2) |
| 3 | + |
| 4 | +# Load the results |
| 5 | +# ======================================== |
| 6 | +extract_best_runs <- function(res) { |
| 7 | + states <- res$states |
| 8 | + params <- res$params |
| 9 | + errors <- res$metrices |
| 10 | + errors <- Reduce(rbind, errors) |
| 11 | + states <- Reduce(rbind, states) |
| 12 | + params <- Reduce(rbind, params) |
| 13 | + params <- lapply(unique(errors$dataset), function(x) { |
| 14 | + params_subset <- params[params$dataset == x, ] |
| 15 | + errors_subset <- errors[errors$dataset == x, ] |
| 16 | + errors_subset <- errors_subset[order(errors_subset$MeanSquareError), ][1:50, ] |
| 17 | + res <- params_subset[params_subset$repetition %in% errors_subset$repetition, ] |
| 18 | + res <- res[, 1:4] |
| 19 | + res$error <- errors_subset$MeanSquareError |
| 20 | + return(res) |
| 21 | + }) |
| 22 | + params <- Reduce(rbind, params) |
| 23 | + return(params) |
| 24 | +} |
| 25 | + |
| 26 | +load_params <- function(path) { |
| 27 | + load(path) |
| 28 | + extract_best_runs(res[[1]]) |
| 29 | +} |
| 30 | +p_ida <- load_params("ida_100.RData") |
| 31 | + |
| 32 | +# Kernel density estimation |
| 33 | +# ======================================== |
| 34 | +hdi_interval <- function(x, prob = 0.95) { |
| 35 | + x <- sort(x[is.finite(x)]) |
| 36 | + n <- length(x) |
| 37 | + if (n < 2) return(c(lower = NA_real_, upper = NA_real_)) |
| 38 | + m <- max(1L, floor(prob * n)) |
| 39 | + widths <- x[(m+1):n] - x[1:(n-m)] |
| 40 | + j <- which.min(widths) |
| 41 | + c(lower = x[j], upper = x[j + m]) |
| 42 | +} |
| 43 | + |
| 44 | +kde_fit_and_draws <- function(X, n_samp = 2000) { |
| 45 | + H <- ks::Hpi(as.matrix(X)) # determines the bandwidth later used in kde |
| 46 | + fit <- ks::kde(as.matrix(X), H = H) # calculates: f_hat(X*) where f_hat is the joint kernel density |
| 47 | + # Result: The estimated joint kernel density (fit$estimate) has the same dimensions as X |
| 48 | + draws <- as.data.frame(ks::rkde(n = n_samp, fhat = fit)) # Derived quantities from kernel density estimates |
| 49 | + # draws N_samp time from the previously calculated joint kernel density |
| 50 | + # Result: data.frame with n_samp rows and the ncol(X) |
| 51 | + marginals <- lapply(seq_len(ncol(draws)), function(j) { |
| 52 | + d <- density(draws[[j]], n = 512) # calculates a smoothed marginal PDF (kernel density estimate) |
| 53 | + data.frame(x = d$x, y = d$y) |
| 54 | + }) |
| 55 | + names(marginals) <- colnames(draws) |
| 56 | + list(fit = fit, draws = draws, marginals = marginals) |
| 57 | +} |
| 58 | + |
| 59 | +print_progress_bootstrap <- function(c, ncol, b, boot_iter) { |
| 60 | + already_done <- (c - 1) / ncol |
| 61 | + from_current_done <- (b / boot_iter) / ncol |
| 62 | + print(paste0(100 * (already_done + from_current_done), "%")) |
| 63 | +} |
| 64 | + |
| 65 | +bootstrap_mode_ci <- function(df, Xs, density_n, boot_iter, prob) { |
| 66 | + qs <- matrix(NA_real_, nrow = ncol(df), ncol = 2, |
| 67 | + dimnames = list(colnames(df), c("lower","upper"))) |
| 68 | + n <- nrow(df) |
| 69 | + for (j in seq_len(ncol(df))) { |
| 70 | + d0 <- density(Xs[[j]], n = density_n) |
| 71 | + mb <- numeric(boot_iter) |
| 72 | + for (b in seq_len(boot_iter)) { |
| 73 | + bdf <- df[sample.int(n, n, replace = TRUE), , drop = FALSE] |
| 74 | + kb <- kde_fit_and_draws(bdf, n_samp = nrow(Xs)) |
| 75 | + db <- density(kb$draws[[j]], n = density_n) |
| 76 | + mb[b] <- db$x[ which.max(db$y) ] |
| 77 | + print_progress_bootstrap(j, ncol(df), b, boot_iter) |
| 78 | + } |
| 79 | + qs[j, ] <- stats::quantile(mb, c((1-prob)/2, 1-(1-prob)/2), names = FALSE) |
| 80 | + } |
| 81 | + qs |
| 82 | +} |
| 83 | + |
| 84 | +joint_jde_mode_and_intervals <- function(df, prob = 0.95, n_samp = 20000, |
| 85 | + spread_estimation = "mode_boot", |
| 86 | + boot_iter = 1000, density_n = 512) { |
| 87 | + spread_estimators <- c("hdi", "quantile", "mode_boot") |
| 88 | + stopifnot("Unknown spread estimator" = any(spread_estimation == spread_estimators)) |
| 89 | + |
| 90 | + ki <- kde_fit_and_draws(df, n_samp = n_samp) |
| 91 | + fit <- ki$fit |
| 92 | + Xs <- ki$draws |
| 93 | + grid <- expand.grid(fit$eval.points) |
| 94 | + dens <- as.vector(fit$estimate) |
| 95 | + mode <- as.numeric(grid[which.max(dens), ]) |
| 96 | + names(mode) <- colnames(df) |
| 97 | + if (spread_estimation == "quantile") { |
| 98 | + alpha <- (1 - prob) / 2 |
| 99 | + qs <- t(apply(Xs, 2, quantile, probs = c(alpha, 1 - alpha), na.rm = TRUE)) |
| 100 | + colnames(qs) <- c("lower", "upper") |
| 101 | + } else if (spread_estimation == "hdi") { |
| 102 | + qs <- t(apply(Xs, 2, function(col) hdi_interval(col, prob))) |
| 103 | + colnames(qs) <- c("lower", "upper") |
| 104 | + } else { |
| 105 | + qs <- bootstrap_mode_ci(df, Xs, density_n, boot_iter, prob) |
| 106 | + } |
| 107 | + list( |
| 108 | + mode = mode, |
| 109 | + lower_ci = qs[, "lower"], |
| 110 | + upper_ci = qs[, "upper"], |
| 111 | + df = ki$marginals |
| 112 | + ) |
| 113 | +} |
| 114 | + |
| 115 | +res_bootstrapped <- joint_jde_mode_and_intervals(p_ida[, 1:4], boot_iter = 100) |
| 116 | + |
| 117 | +res <- res_bootstraped |
| 118 | +df_lists <- res$df |
| 119 | +idx <- 1L |
| 120 | +df <- df_lists[[idx]] |
| 121 | +bins <- hist(df[, 2L], plot = FALSE)$breaks |
| 122 | +location_error <- data.frame( |
| 123 | + y = 2.5e-9, x = res$mode[[1L]], |
| 124 | + xmin = res$lower_ci[[1L]], xmax = res$upper_ci[[1L]] |
| 125 | +) |
| 126 | + |
| 127 | +p <- ggplot() + |
| 128 | + geom_point(data = df, aes(x = x, y = y)) + |
| 129 | + geom_errorbarh( |
| 130 | + data = location_error, |
| 131 | + aes(xmin = xmin, xmax = xmax, y = y), |
| 132 | + height = 1e-9, size = 0.8 |
| 133 | + ) + |
| 134 | + labs(y = "Joint kernel density", x = "Ka(HG) [1/M]") |
| 135 | +p |
0 commit comments