2929# ' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
3030# '
3131gpdfit <- function (x , wip = TRUE , min_grid_pts = 30 , sort_x = TRUE ) {
32- # See section 4 of Zhang and Stephens (2009)
32+ # see section 4 of Zhang and Stephens (2009)
3333 if (sort_x ) {
3434 x <- sort.int(x )
3535 }
@@ -38,49 +38,37 @@ gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
3838 M <- min_grid_pts + floor(sqrt(N ))
3939 jj <- seq_len(M )
4040 xstar <- x [floor(N / 4 + 0.5 )] # first quartile of sample
41- theta <- 1 / x [N ] + (1 - sqrt(M / (jj - 0.5 ))) / prior / xstar
42- l_theta <- N * lx(theta , x ) # profile log-lik
43- w_theta <- exp(l_theta - matrixStats :: logSumExp(l_theta )) # normalize
44- theta_hat <- sum(theta * w_theta )
45- k <- mean.default(log1p(- theta_hat * x ))
46- sigma <- - k / theta_hat
41+ if (xstar > x [1 ]) {
42+ # first quantile is bigger than the minimum
43+ theta <- 1 / x [N ] + (1 - sqrt(M / (jj - 0.5 ))) / prior / xstar
44+ k <- matrixStats :: rowMeans2(log1p(- theta %o% x ))
45+ l_theta <- N * (log(- theta / k ) - k - 1 ) # profile log-lik
46+ w_theta <- exp(l_theta - matrixStats :: logSumExp(l_theta )) # normalize
47+ theta_hat <- sum(theta * w_theta )
48+ k_hat <- mean.default(log1p(- theta_hat * x ))
49+ sigma_hat <- - k_hat / theta_hat
4750
48- if (wip ) {
49- k <- adjust_k_wip(k , n = N )
51+ # adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
52+ # this stabilizes estimates for very small Monte Carlo sample sizes and low ess
53+ # (see Vehtari et al., 2024 for details)
54+ if (wip ) {
55+ k_hat <- (k_hat * N + 0.5 * 10 ) / (N + 10 )
56+ }
57+ if (is.na(k_hat )) {
58+ k_hat <- Inf
59+ sigma_hat <- NaN
60+ }
61+ } else {
62+ # first quantile is not bigger than the minimum, which indicates
63+ # that the distribution is far from a generalized Pareto
64+ # distribution
65+ k_hat <- NA
66+ sigma_hat <- NA
5067 }
5168
52- if (is.na(k )) {
53- k <- Inf
54- }
55-
56- nlist(k , sigma )
57- }
58-
59-
60- # internal ----------------------------------------------------------------
61-
62- lx <- function (a ,x ) {
63- a <- - a
64- k <- vapply(a , FUN = function (a_i ) mean(log1p(a_i * x )), FUN.VALUE = numeric (1 ))
65- log(a / k ) - k - 1
69+ list (k = k_hat , sigma = sigma_hat )
6670}
6771
68- # ' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
69- # ' will stabilize estimates for very small Monte Carlo sample sizes and low neff
70- # ' cases.
71- # '
72- # ' @noRd
73- # ' @param k Scalar khat estimate.
74- # ' @param n Integer number of tail samples used to fit GPD.
75- # ' @return Scalar adjusted khat estimate.
76- # '
77- adjust_k_wip <- function (k , n ) {
78- a <- 10
79- n_plus_a <- n + a
80- k * n / n_plus_a + a * 0.5 / n_plus_a
81- }
82-
83-
8472# ' Inverse CDF of generalized Pareto distribution
8573# ' (assuming location parameter is 0)
8674# '
0 commit comments