Skip to content

Commit 7e492aa

Browse files
Functions for simultaneous confidence bands of ECDF
1 parent 42f7b51 commit 7e492aa

File tree

1 file changed

+163
-0
lines changed

1 file changed

+163
-0
lines changed

R/helpers-ppc.R

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,32 @@ validate_yrep <- function(yrep, y) {
6565
}
6666

6767

68+
#' Validate PIT
69+
#'
70+
#' Checks that `pit` is numeric, doesn't have any NAs, and is either a vector,
71+
#' or a 1-D array with values in [0,1].
72+
#'
73+
#' @param pit The 'pit' object from the user.
74+
#' @return Either throws an error or returns a numeric vector.
75+
#' @noRd
76+
validate_pit <- function(pit) {
77+
stopifnot(is.numeric(pit))
78+
79+
if (!is_vector_or_1Darray(pit)) {
80+
abort("'pit' must be a vector or 1D array.")
81+
}
82+
83+
if (any(pit > 1) || any(pit < 0)) {
84+
abort("'pit' must only contain values between 0 and 1.")
85+
}
86+
87+
if (anyNA(pit)) {
88+
abort("NAs not allowed in 'pit'.")
89+
}
90+
91+
unname(pit)
92+
}
93+
6894
#' Validate group
6995
#'
7096
#' Checks that grouping variable has same length as `y` and is either a vector or
@@ -250,6 +276,143 @@ all_counts <- function(x, ...) {
250276
all_whole_number(x, ...) && min(x) >= 0
251277
}
252278

279+
280+
adjust_gamma <- function(N,
281+
L = 1,
282+
K = N,
283+
prob = 0.99,
284+
M = 1000,
285+
adj_method = "interpolate") {
286+
if (any(c(K, N, L) < 1)) {
287+
abort("Parameters 'N', 'L' and 'K' must be positive integers.")
288+
}
289+
if (prob >= 1 || prob <= 0) {
290+
abort("Value of 'prob' must be in (0,1).")
291+
}
292+
if (L == 1) {
293+
gamma <- adjust_gamma_optimize(N, K, prob)
294+
}
295+
else {
296+
gamma <- adjust_gamma_simulate(N, L, K, prob, M)
297+
}
298+
gamma
299+
}
300+
301+
# Adjust coverage parameter to find simultaneous confidence intervals for the
302+
# ECDF of a sample from the uniform distribution.
303+
# N - length of samples
304+
# K - number of equally spaced evaluation points, i.e. the right ends of the
305+
# partition intervals.
306+
adjust_gamma_optimize <- function(N, K, prob=0.99) {
307+
target <- function(gamma, prob, N, K) {
308+
z <- 1:(K - 1) / K
309+
z1 <- c(0, z)
310+
z2 <- c(z, 1)
311+
312+
# pre-compute quantiles and use symmetry for increased efficiency.
313+
x2_lower <- qbinom(gamma / 2, N, z2)
314+
x2_upper <- c(N - rev(x2_lower)[2:K], 1)
315+
316+
# Compute the total probability of trajectories inside the confidence
317+
# intervals. Initialize the set and corresponding probabilities known
318+
# to be 0 and 1 for the starting value z1 = 0.
319+
x1 <- 0
320+
p_int <- 1
321+
for (i in seq_along(z1)) {
322+
tmp <- p_interior(
323+
p_int, x1 = x1, x2 = x2_lower[i]: x2_upper[i],
324+
z1 = z1[i], z2 = z2[i], gamma = gamma, N = N
325+
)
326+
x1 <- tmp$x1
327+
p_int <- tmp$p_int
328+
}
329+
abs(prob - sum(p_int))
330+
}
331+
optimize(target, c(0, 1 - prob), prob, N = N, K = K)$minimum
332+
}
333+
334+
# Adjust coverage parameter to find silmultaneous confidence intervals for the
335+
# ECDFs of multiple samples (chains) from the uniform distribution.
336+
# N - length of samples (chains).
337+
# L - number of samples (chains).
338+
# K - number of equally spaced evaluation points, i.e. the right ends of the
339+
# partition intervals.
340+
# M - number of simulations used to determine the 'prob' middle quantile.
341+
#'
342+
adjust_gamma_simulate <- function(N, L, K, prob = 0.99, M = 1000) {
343+
gamma <- numeric(M)
344+
z <- (1:(K - 1)) / K
345+
n <- N * (L - 1)
346+
k <- floor(z * N * L)
347+
for (m in seq_len(M)) {
348+
u <- replicate(L, runif(N)) %>% u_scale
349+
scaled_ecdfs <- apply(outer(u, z, "<="), c(2, 3), sum)
350+
gamma[m] <- 2 * min(
351+
apply(
352+
scaled_ecdfs, 1, phyper, m = N, n = n, k = k
353+
),
354+
apply(
355+
scaled_ecdfs - 1, 1, phyper, m = N, n = n, k = k, lower.tail = FALSE
356+
)
357+
)
358+
}
359+
alpha_quantile(gamma, 1 - prob)
360+
}
361+
362+
p_interior <- function(p_int, x1, x2, z1, z2, gamma, N) {
363+
z_tilde <- (z2 - z1) / (1 - z1)
364+
N_tilde <- rep(N - x1, each = length(x2))
365+
p_int <- rep(p_int, each = length(x2))
366+
x_diff <- outer(x2, x1, "-")
367+
p_x2_int <- p_int * dbinom(x_diff, N_tilde, z_tilde)
368+
369+
list(p_int = rowSums(p_x2_int), x1 = x2)
370+
}
371+
372+
# 100 * `alpha` percent of the trials are allowed to be rejected.
373+
# In case of ties, return the largest value dominating at most
374+
# 100 * (alpha + tol) percent of the values.
375+
alpha_quantile <- function(gamma, alpha, tol = 0.001) {
376+
a <- unname(quantile(gamma, probs = alpha))
377+
a_tol <- unname(quantile(gamma, probs = alpha + tol))
378+
if (a == a_tol) {
379+
if (min(gamma) < a) {
380+
# take the largest value that doesn't exceed the tolerance.
381+
a <- max(gamma[gamma < a])
382+
}
383+
}
384+
a
385+
}
386+
387+
# Compute simultaneous confidence intervals for one or more samples from the
388+
# standard uniform distribution.
389+
# N - sample length
390+
# L - number of samples
391+
# K - size of uniform partition defining the ECDF evaluation points.
392+
# gamma - coverage parameter for the marginal distribution (binomial for
393+
# one sample and hypergeometric for multiple rank transformed chains).
394+
#' @noRd
395+
ecdf_intervals <- function(N, L=1, K, gamma) {
396+
lims <- list()
397+
z <- seq(0,1, length.out = K + 1)
398+
if (L == 1) {
399+
lims$lower <- qbinom(gamma / 2, N, z)
400+
lims$upper <- qbinom(1 - gamma / 2, N, z)
401+
} else {
402+
n <- N * (L - 1)
403+
k <- floor(z * L * N)
404+
lims$lower <- qhyper(gamma / 2, N, n, k)
405+
lims$upper <- qhyper(1 - gamma / 2, N, n, k)
406+
}
407+
lims
408+
}
409+
410+
#' Transform observations in 'x' into their corresponding fractional ranks.
411+
#' @noRd
412+
u_scale <- function(x) {
413+
array(rank(x) / length(x), dim = dim(x), dimnames = dimnames(x))
414+
}
415+
253416
# labels ----------------------------------------------------------------
254417
create_yrep_ids <- function(ids) paste('italic(y)[rep] (', ids, ")")
255418
yrep_label <- function() expression(italic(y)[rep])

0 commit comments

Comments
 (0)