Skip to content

Commit d6070d7

Browse files
Added documentation to internal functions
1 parent a1028b0 commit d6070d7

File tree

1 file changed

+99
-39
lines changed

1 file changed

+99
-39
lines changed

R/helpers-ppc.R

Lines changed: 99 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,9 @@ all_counts <- function(x, ...) {
283283
#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
284284
#' @param prob Desired simultaneous coverage (0,1).
285285
#' @param M number of simulations to run, if simulation method is used.
286-
#' @param adj_method String defining the desired adjustment method. By default
287-
#' "interpolate" is used. Other available options "optimize" and "simulate".
286+
#' @param interpolate_adj Boolean defining whether to interpolate the confidence
287+
#' bands from precomputed values. Interpolation provides a faster plot with the
288+
#' trade-off of possible loss of accuracy.
288289
#' @return The adjusted coverage parameter yielding the desired simultaneous
289290
#' coverage of the ECDF traces.
290291
#' @noRd
@@ -301,20 +302,22 @@ adjust_gamma <- function(N,
301302
abort("Value of 'prob' must be in (0,1).")
302303
}
303304
if (interpolate_adj == TRUE) {
304-
gamma <- interpolate_gamma(N, K, prob, L)
305+
gamma <- interpolate_gamma(N = N, K = K, prob = prob, L = L)
305306
} else if (L == 1) {
306-
gamma <- adjust_gamma_optimize(N, K, prob)
307+
gamma <- adjust_gamma_optimize(N = N, K = K, prob = prob)
307308
} else {
308-
gamma <- adjust_gamma_simulate(N,
309-
L,
310-
K,
311-
prob,
312-
M)
309+
gamma <- adjust_gamma_simulate(N = N, L = L, K = K, prob = prob, M = M)
313310
}
314311
gamma
315312
}
316313

317-
# Adjust coverage parameter for single sample using the optimization method.
314+
#' Adjust coverage parameter for single sample using the optimization method.
315+
#' @param N Length of sample.
316+
#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
317+
#' @param prob Desired simultaneous coverage (0,1).
318+
#' @return The adjusted coverage parameter yielding the desired simultaneous
319+
#' coverage of the ECDF traces.
320+
#' @noRd
318321
adjust_gamma_optimize <- function(N, K, prob) {
319322
target <- function(gamma, prob, N, K) {
320323
z <- 1:(K - 1) / K
@@ -331,27 +334,41 @@ adjust_gamma_optimize <- function(N, K, prob) {
331334
x1 <- 0
332335
p_int <- 1
333336
for (i in seq_along(z1)) {
334-
tmp <- p_interior(
335-
p_int, x1 = x1, x2 = x2_lower[i]: x2_upper[i],
336-
z1 = z1[i], z2 = z2[i], N = N
337+
p_int <- p_interior(
338+
p_int = p_int,
339+
x1 = x1,
340+
x2 = x2_lower[i]: x2_upper[i],
341+
z1 = z1[i],
342+
z2 = z2[i],
343+
N = N
337344
)
338-
x1 <- tmp$x1
339-
p_int <- tmp$p_int
345+
x1 <- x2_lower[i]:x2_upper[i]
340346
}
341-
abs(prob - sum(p_int))
347+
return(abs(prob - sum(p_int)))
342348
}
343-
optimize(target, c(0, 1 - prob), prob, N = N, K = K)$minimum
349+
optimize(target, c(0, 1 - prob), prob = prob, N = N, K = K)$minimum
344350
}
345351

346-
# Adjust coverage parameter for multiple chains using simulation method.
352+
#' Adjust coverage parameter for multiple chains using the simulation method.
353+
#' In short, 'M' simulations of 'L' standard uniform chains are run and the
354+
#' confidence bands are set to cover 100 * 'prob' % of these simulations.
355+
#' @param N Length of sample.
356+
#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
357+
#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
358+
#' @param prob Desired simultaneous coverage (0,1).
359+
#' @param M number of simulations to run.
360+
#' @return The adjusted coverage parameter yielding the desired simultaneous
361+
#' coverage of the ECDF traces.
362+
#' @noRd
347363
adjust_gamma_simulate <- function(N, L, K, prob, M) {
348364
gamma <- numeric(M)
349-
z <- (1:(K - 1)) / K
365+
z <- (1:(K - 1)) / K # Rank ECDF evaluation points.
350366
n <- N * (L - 1)
351367
k <- floor(z * N * L)
352368
for (m in seq_len(M)) {
353-
u <- u_scale(replicate(L, runif(N)))
369+
u <- u_scale(replicate(L, runif(N))) # Fractional ranks of sample chains
354370
scaled_ecdfs <- apply(outer(u, z, "<="), c(2, 3), sum)
371+
# Find the smalles marginal probability of the simulation run
355372
gamma[m] <- 2 * min(
356373
apply(
357374
scaled_ecdfs, 1, phyper, m = N, n = n, k = k
@@ -364,34 +381,59 @@ adjust_gamma_simulate <- function(N, L, K, prob, M) {
364381
alpha_quantile(gamma, 1 - prob)
365382
}
366383

367-
interpolate_gamma <- function(N, K, p, L) {
368-
vals <- get_interpolation_values(N, K, L, p)
384+
#' Approximate the required adjustement to obtain simultaneous confidence bands
385+
#' of an ECDF plot with interpolation with regards to N and K from precomputed
386+
#' values for a fixed set of prob and L values.
387+
#' @param N Length of sample.
388+
#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
389+
#' @param prob Desired simultaneous coverage (0,1).
390+
#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
391+
#' @return The approximated adjusted coverage parameter yielding the desired
392+
#' simultaneous coverage of the ECDF traces.
393+
#' @noRd
394+
interpolate_gamma <- function(N, K, prob, L) {
395+
# Find the precomputed values ueful for the interpolation task.
396+
vals <- get_interpolation_values(N, K, L, prob)
397+
# Largest lower bound and smalles upper bound for N among precomputed values.
369398
N_lb <- max(vals[vals$N <= N, ]$N)
370399
N_ub <- min(vals[vals$N >= N, ]$N)
371-
g_lb <- approx(
400+
# Approximate largest lower bound and smalles upper bound for gamma.
401+
log_gamma_lb <- approx(
372402
x = log(vals[vals$N == N_lb, ]$K),
373403
y = log(vals[vals$N == N_lb, ]$val),
374404
xout = log(K)
375405
)$y
376-
g_ub <- approx(
406+
log_gamma_ub <- approx(
377407
x = log(vals[vals$N == N_ub, ]$K),
378408
y = log(vals[vals$N == N_ub, ]$val),
379409
xout = log(K)
380410
)$y
381411
if (N_ub == N_lb) {
382-
g <- exp(g_lb)
412+
log_gamma_approx <- log_gamma_lb
383413
} else {
384-
g <- exp(approx(x = log(c(N_lb, N_ub)), y = c(g_lb, g_ub), xout = log(N))$y)
414+
# Approximate log_gamma for the desired value of N.
415+
log_gamma_approx <- approx(
416+
x = log(c(N_lb, N_ub)),
417+
y = c(log_gamma_lb, log_gamma_ub),
418+
xout = log(N)
419+
)$y
385420
}
386-
g
421+
exp(log_gamma_approx)
387422
}
388423

389-
get_interpolation_values <- function(N, K, L, p) {
424+
#' Filter the precomputed values useful for the interpolation task given to
425+
#' interpolate_gamma. Check, if the task is possible with the availabel data.
426+
#' @param N Length of sample.
427+
#' @param K Number of equally spaced evaluation points (1:K / K). Defaults to N.
428+
#' @param L Number of chains. Used for MCMC, defaults to 1 for ppc.
429+
#' @param prob Desired simultaneous coverage (0,1).
430+
#' @return A data.frame containing the relevant precomputed values.
431+
get_interpolation_values <- function(N, K, L, prob) {
390432
for (dim in c("L", "prob")) {
391-
if (all(get(if (dim == "L") dim else "p") != bayesplot:::gamma_adj[, dim])) {
433+
if (all(get(dim) != bayesplot:::gamma_adj[, dim])) {
392434
stop(paste(
393435
"No precomputed values to interpolate from for '", dim, "' = ",
394-
get(if (dim == "L") dim else "p"),
436+
get(dim),
395437
".\n",
396438
"Values of '", dim, "' available for interpolation: ",
397439
unique(bayesplot:::gamma_adj[, dim]),
@@ -400,7 +442,9 @@ get_interpolation_values <- function(N, K, L, p) {
400442
))
401443
}
402444
}
403-
vals <- bayesplot:::gamma_adj[bayesplot:::gamma_adj$L == L & bayesplot:::gamma_adj$prob == p, ]
445+
vals <- bayesplot:::gamma_adj[
446+
bayesplot:::gamma_adj$L == L & bayesplot:::gamma_adj$prob == prob,
447+
]
404448
if (K > max(vals[vals$N <= N, ]$K)) {
405449
stop(paste(
406450
"No precomputed values available for interpolation for 'K' = ",
@@ -413,21 +457,35 @@ get_interpolation_values <- function(N, K, L, p) {
413457
}
414458
vals
415459
}
460+
416461
#' A helper function for 'adjust_gamma_optimize' defining the probability that
417-
#' an ECDF stays within the supplied bounds between z1 and z2.
462+
#' a scaled ECDF stays within the supplied bounds between two evaluation points.
463+
#' @param p_int For each value in x1, the probability that the ECDF has stayed
464+
#' within the bounds until z1 and takes the value in x1 at z1.
465+
#' @param x1 Vector of scaled ECDF values at the left end of the interval, z1.
466+
#' @param x2 Vector of scaled ECDF values at the right end of the interval, z2.
467+
#' @param z1 Left evaluation point in [0,1]
468+
#' @param z2 Right evaluation point in [0,1] with z2 > z1.
469+
#' @param N Total number of values in the sample.
470+
#' @return A vector containing the probability to transitioning from the values
471+
#' in x1 to each of the values in x2 weighted by the probabilities in p_int.
418472
#' @noRd
419473
p_interior <- function(p_int, x1, x2, z1, z2, N) {
474+
# Ratio between the length of the evaluation interval and the total length of
475+
# the interval left to cover by ECDF.
420476
z_tilde <- (z2 - z1) / (1 - z1)
477+
# Number of samples left to cover by ECDF.
421478
N_tilde <- rep(N - x1, each = length(x2))
479+
422480
p_int <- rep(p_int, each = length(x2))
423481
x_diff <- outer(x2, x1, "-")
482+
# Pobability of each transition from a value in x1 to a value in x2.
424483
p_x2_int <- p_int * dbinom(x_diff, N_tilde, z_tilde)
425-
426-
list(p_int = rowSums(p_x2_int), x1 = x2)
484+
rowSums(p_x2_int)
427485
}
428486

429487
#' A helper function for 'adjust_alpha_simulate'
430-
#' 100 * `alpha` percent of the trials are allowed to be rejected.
488+
#' 100 * `alpha` percent of the trials in 'gamma' are allowed to be rejected.
431489
#' In case of ties, return the largest value dominating at most
432490
#' 100 * (alpha + tol) percent of the values.
433491
#' @noRd
@@ -445,13 +503,15 @@ alpha_quantile <- function(gamma, alpha, tol = 0.001) {
445503

446504
#' Compute simultaneous confidence intervals with the given adjusted coverage
447505
#' parameter gamma.
448-
#' @param N Sample length.
449-
#' @param L Number of MCMC-chains. (1 for ppc)
450-
#' @param K Number of uniformly spaced evaluation points.
451506
#' @param gamma Adjusted coverage parameter for the marginal distribution
452507
#' (binomial for PIT values and hypergeometric for rank transformed chains).
508+
#' @param N Sample length.
509+
#' @param K Number of uniformly spaced evaluation points.
510+
#' @param L Number of MCMC-chains. (1 for ppc)
511+
#' @return A list with upper and lower confidence interval values at the
512+
#' evaluation points.
453513
#' @noRd
454-
ecdf_intervals <- function(N, L = 1, K, gamma) {
514+
ecdf_intervals <- function(gamma, N, K, L = 1) {
455515
lims <- list()
456516
z <- seq(0, 1, length.out = K + 1)
457517
if (L == 1) {

0 commit comments

Comments
 (0)