Skip to content

Commit 253746b

Browse files
committed
copy gpdfit from posterior
1 parent cd6715d commit 253746b

File tree

1 file changed

+27
-39
lines changed

1 file changed

+27
-39
lines changed

R/gpdfit.R

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
3030
#'
3131
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
32-
# See section 4 of Zhang and Stephens (2009)
32+
# see section 4 of Zhang and Stephens (2009)
3333
if (sort_x) {
3434
x <- sort.int(x)
3535
}
@@ -38,49 +38,37 @@ gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
3838
M <- min_grid_pts + floor(sqrt(N))
3939
jj <- seq_len(M)
4040
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
41-
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
42-
l_theta <- N * lx(theta, x) # profile log-lik
43-
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
44-
theta_hat <- sum(theta * w_theta)
45-
k <- mean.default(log1p(-theta_hat * x))
46-
sigma <- -k / theta_hat
41+
if (xstar > x[1]) {
42+
# first quantile is bigger than the minimum
43+
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
44+
k <- matrixStats::rowMeans2(log1p(-theta %o% x))
45+
l_theta <- N * (log(-theta / k) - k - 1) # profile log-lik
46+
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
47+
theta_hat <- sum(theta * w_theta)
48+
k_hat <- mean.default(log1p(-theta_hat * x))
49+
sigma_hat <- -k_hat / theta_hat
4750

48-
if (wip) {
49-
k <- adjust_k_wip(k, n = N)
51+
# adjust k_hat based on weakly informative prior, Gaussian centered on 0.5.
52+
# this stabilizes estimates for very small Monte Carlo sample sizes and low ess
53+
# (see Vehtari et al., 2024 for details)
54+
if (wip) {
55+
k_hat <- (k_hat * N + 0.5 * 10) / (N + 10)
56+
}
57+
if (is.na(k_hat)) {
58+
k_hat <- Inf
59+
sigma_hat <- NaN
60+
}
61+
} else {
62+
# first quantile is not bigger than the minimum, which indicates
63+
# that the distribution is far from a generalized Pareto
64+
# distribution
65+
k_hat <- NA
66+
sigma_hat <- NA
5067
}
5168

52-
if (is.na(k)) {
53-
k <- Inf
54-
}
55-
56-
nlist(k, sigma)
57-
}
58-
59-
60-
# internal ----------------------------------------------------------------
61-
62-
lx <- function(a,x) {
63-
a <- -a
64-
k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
65-
log(a / k) - k - 1
69+
list(k = k_hat, sigma = sigma_hat)
6670
}
6771

68-
#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
69-
#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
70-
#' cases.
71-
#'
72-
#' @noRd
73-
#' @param k Scalar khat estimate.
74-
#' @param n Integer number of tail samples used to fit GPD.
75-
#' @return Scalar adjusted khat estimate.
76-
#'
77-
adjust_k_wip <- function(k, n) {
78-
a <- 10
79-
n_plus_a <- n + a
80-
k * n / n_plus_a + a * 0.5 / n_plus_a
81-
}
82-
83-
8472
#' Inverse CDF of generalized Pareto distribution
8573
#' (assuming location parameter is 0)
8674
#'

0 commit comments

Comments
 (0)