@@ -118,14 +118,15 @@ NULL
118
118
# ' standard normal distribution.
119
119
# ' @param trim Passed to [ggplot2::stat_density()].
120
120
# ' @template args-density-controls
121
- # ' @param boundary_correction For `ppc_loo_pit_overlay()`, when set to `TRUE` the function will
122
- # ' compute boundary corrected density values via convolution and a Gaussian filter.
123
- # ' As a result, parameters controlling the standard kernel density estimation
124
- # ' such as `adjust`, `kernel` and `n_dens` are ignored. NOTE: Current implementation only
125
- # ' works for continuous observations. This is set to `TRUE` by default.
126
- # '@param grid_len For `ppc_loo_pit_overlay()`, when `boundary_correction` is set to `TRUE`
127
- # ' this parameter specifies the number of points used to generate the estimations. This is
128
- # ' set to 512 by default.
121
+ # ' @param boundary_correction For `ppc_loo_pit_overlay()`, when set to `TRUE`
122
+ # ' (the default) the function will compute boundary corrected density values
123
+ # ' via convolution and a Gaussian filter. As a result, parameters controlling
124
+ # ' the standard kernel density estimation such as `adjust`, `kernel` and
125
+ # ' `n_dens` are ignored. NOTE: The current implementation only works well for
126
+ # ' continuous observations.
127
+ # '@param grid_len For `ppc_loo_pit_overlay()`, when `boundary_correction` is set
128
+ # ' to `TRUE` this parameter specifies the number of points used to generate the
129
+ # ' estimations. This is set to 512 by default.
129
130
130
131
ppc_loo_pit_overlay <- function (y ,
131
132
yrep ,
@@ -135,13 +136,13 @@ ppc_loo_pit_overlay <- function(y,
135
136
samples = 100 ,
136
137
size = 0.25 ,
137
138
alpha = 0.7 ,
138
- trim = FALSE ,
139
+ boundary_correction = TRUE ,
140
+ grid_len = 512 ,
139
141
bw = " nrd0" ,
142
+ trim = FALSE ,
140
143
adjust = 1 ,
141
144
kernel = " gaussian" ,
142
- n_dens = 1024 ,
143
- boundary_correction = TRUE ,
144
- grid_len = 512 ) {
145
+ n_dens = 1024 ) {
145
146
check_ignored_arguments(... )
146
147
147
148
data <-
@@ -252,15 +253,15 @@ ppc_loo_pit_data <-
252
253
stopifnot(identical(dim(yrep ), dim(lw )))
253
254
pit <- rstantools :: loo_pit(object = yrep , y = y , lw = lw )
254
255
}
255
-
256
+
256
257
if (! boundary_correction ) {
257
258
unifs <- matrix (runif(length(pit ) * samples ), nrow = samples )
258
259
data <- ppc_data(pit , unifs )
259
260
} else {
260
261
unifs <- matrix (runif(grid_len * samples ), nrow = samples )
261
262
ref_list <- .ref_kde_correction(unifs , bw = bw , grid_len = grid_len )
262
263
pit_list <- .kde_correction(pit , bw = bw , grid_len = grid_len )
263
-
264
+
264
265
pit <- pit_list $ bc_pvals
265
266
unifs <- ref_list $ unifs
266
267
xs <- c(pit_list $ xs , ref_list $ xs )
@@ -540,57 +541,57 @@ ppc_loo_ribbon <-
540
541
}
541
542
542
543
# # Boundary correction based on code by ArViz development team
543
- # The main method is a 1-D density estimation for linear data with
544
- # convolution with a Gaussian filter.
544
+ # The main method is a 1-D density estimation for linear data with
545
+ # convolution with a Gaussian filter.
545
546
546
547
# Based on scipy.signal.gaussian formula
547
548
.gaussian <- function (N , bw ){
548
549
n <- seq(0 , N - 1 ) - (N - 1 )/ 2
549
550
sigma = 2 * bw * bw
550
551
w = exp(- n ^ 2 / sigma )
551
552
return (w )
552
-
553
+
553
554
}
554
555
555
- .linear_convolution <- function (x ,
556
- bw ,
557
- grid_counts ,
556
+ .linear_convolution <- function (x ,
557
+ bw ,
558
+ grid_counts ,
558
559
grid_breaks ,
559
560
grid_len ){
560
561
# 1-D Gaussian estimation via
561
562
# convolution of a Gaussian filter and the binned relative freqs
562
563
bin_width <- grid_breaks [2 ] - grid_breaks [1 ]
563
564
f <- grid_counts / bin_width / length(x )
564
565
bw <- bw / bin_width
565
-
566
+
566
567
# number of data points to generate for gaussian filter
567
568
gauss_n <- as.integer(bw * 2 * pi )
568
569
if (gauss_n == 0 ){
569
570
gauss_n = 1
570
571
}
571
-
572
+
572
573
# Generate Gaussian filter vector
573
574
kernel <- .gaussian(gauss_n , bw )
574
575
npad <- as.integer(grid_len / 5 )
575
-
576
+
576
577
# Reflection trick (i.e. get first N and last N points to pad vector)
577
- f <- c(rev(f [1 : (npad )]),
578
- f ,
579
- rev(f )[(grid_len - npad ): (grid_len - 1 )])
580
-
578
+ f <- c(rev(f [1 : (npad )]),
579
+ f ,
580
+ rev(f )[(grid_len - npad ): (grid_len - 1 )])
581
+
581
582
# Convolution: Gaussian filter + reflection trick (pading) works as an
582
583
# averaging moving window based on a Gaussian density which takes care
583
584
# of the density boundary values near 0 and 1.
584
- bc_pvals <- stats :: filter(f ,
585
- kernel ,
585
+ bc_pvals <- stats :: filter(f ,
586
+ kernel ,
586
587
method = ' convolution' ,
587
588
sides = 2 )[(npad + 1 ): (npad + grid_len )]
588
-
589
+
589
590
bc_pvals <- bc_pvals / (bw * (2 * pi )^ 0.5 )
590
591
return (bc_pvals )
591
592
}
592
593
593
- .kde_correction <- function (x ,
594
+ .kde_correction <- function (x ,
594
595
bw ,
595
596
grid_len ){
596
597
# Generate boundary corrected values via a linear convolution using a
@@ -601,55 +602,56 @@ ppc_loo_ribbon <-
601
602
" Non-finite PIT values are invalid for KDE boundary correction method" ))
602
603
x <- x [is.finite(x )]
603
604
}
604
-
605
+
605
606
if (grid_len < 100 ){
606
607
grid_len = 100
607
608
}
608
-
609
+
609
610
# Get relative frequency boundaries and counts for input vector
610
611
bins <- seq(from = min(x ), to = max(x ), length.out = grid_len + 1 )
611
612
hist_obj <- hist(x , breaks = bins , plot = FALSE )
612
613
grid_breaks <- hist_obj $ breaks
613
614
grid_counts <- hist_obj $ counts
614
-
615
+
615
616
# Compute bandwidth based on use specification
616
617
bw <- density(x , bw = bw )$ bw
617
-
618
+
618
619
# 1-D Convolution
619
620
bc_pvals <- .linear_convolution(x , bw , grid_counts , grid_breaks , grid_len )
620
-
621
+
621
622
# Generate vector of x-axis values for plotting based on binned relative freqs
622
623
n_breaks <- length(grid_breaks )
624
+
623
625
xs <- (grid_breaks [2 : n_breaks ] + grid_breaks [1 : (n_breaks - 1 )]) / 2
624
626
625
627
first_nonNA <- head(which(! is.na(bc_pvals )),1 )
626
628
last_nonNA <- tail(which(! is.na(bc_pvals )),1 )
627
629
bc_pvals [1 : first_nonNA ] <- bc_pvals [first_nonNA ]
628
630
bc_pvals [last_nonNA : length(bc_pvals )] <- bc_pvals [last_nonNA ]
629
-
631
+
630
632
return (list (xs = xs , bc_pvals = bc_pvals ))
631
633
}
632
634
633
635
# Wrapper function to generate runif reference lines based on
634
636
# .kde_correction()
635
637
.ref_kde_correction <- function (unifs , bw , grid_len ){
636
-
638
+
637
639
# Allocate memory
638
- idx <- seq(from = 1 ,
639
- to = ncol(unifs )* nrow(unifs ) + ncol(unifs ),
640
+ idx <- seq(from = 1 ,
641
+ to = ncol(unifs )* nrow(unifs ) + ncol(unifs ),
640
642
by = ncol(unifs ))
641
643
idx <- c(idx , ncol(unifs )* nrow(unifs ))
642
644
xs <- rep(0 , ncol(unifs )* nrow(unifs ))
643
645
bc_mat <- matrix (0 , nrow(unifs ), ncol(unifs ))
644
-
646
+
645
647
# Generate boundary corrected reference values
646
648
for (i in 1 : nrow(unifs )){
647
- bc_list <- .kde_correction(unifs [i ,],
648
- bw = bw ,
649
+ bc_list <- .kde_correction(unifs [i ,],
650
+ bw = bw ,
649
651
grid_len = grid_len )
650
652
bc_mat [i ,] <- bc_list $ bc_pvals
651
653
xs [idx [i ]: (idx [i + 1 ]- 1 )] <- bc_list $ xs
652
654
}
653
-
655
+
654
656
return (list (xs = xs , unifs = bc_mat ))
655
657
}
0 commit comments