Skip to content

Commit cf20250

Browse files
committed
Merge branch 'master' into diff-diagnostics
2 parents d579c06 + b4c12c3 commit cf20250

File tree

12 files changed

+210
-78
lines changed

12 files changed

+210
-78
lines changed

R/psis.R

Lines changed: 59 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,14 @@ psis <- function(log_ratios, ...) UseMethod("psis")
9898
#' @template array
9999
#'
100100
psis.array <-
101-
function(log_ratios, ...,
102-
r_eff = 1,
103-
cores = getOption("mc.cores", 1)) {
104-
importance_sampling.array(log_ratios = log_ratios, ...,
105-
r_eff = r_eff,
106-
cores = cores,
107-
method = "psis")
101+
function(log_ratios, ..., r_eff = 1, cores = getOption("mc.cores", 1)) {
102+
importance_sampling.array(
103+
log_ratios = log_ratios,
104+
...,
105+
r_eff = r_eff,
106+
cores = cores,
107+
method = "psis"
108+
)
108109
}
109110

110111

@@ -113,15 +114,14 @@ psis.array <-
113114
#' @template matrix
114115
#'
115116
psis.matrix <-
116-
function(log_ratios,
117-
...,
118-
r_eff = 1,
119-
cores = getOption("mc.cores", 1)) {
120-
importance_sampling.matrix(log_ratios,
121-
...,
122-
r_eff = r_eff,
123-
cores = cores,
124-
method = "psis")
117+
function(log_ratios, ..., r_eff = 1, cores = getOption("mc.cores", 1)) {
118+
importance_sampling.matrix(
119+
log_ratios,
120+
...,
121+
r_eff = r_eff,
122+
cores = cores,
123+
method = "psis"
124+
)
125125
}
126126

127127
#' @export
@@ -130,9 +130,12 @@ psis.matrix <-
130130
#'
131131
psis.default <-
132132
function(log_ratios, ..., r_eff = 1) {
133-
importance_sampling.default(log_ratios = log_ratios, ...,
134-
r_eff = r_eff,
135-
method = "psis")
133+
importance_sampling.default(
134+
log_ratios = log_ratios,
135+
...,
136+
r_eff = r_eff,
137+
method = "psis"
138+
)
136139
}
137140

138141

@@ -149,25 +152,26 @@ is.psis <- function(x) {
149152
#' @noRd
150153
#' @seealso importance_sampling_object
151154
psis_object <-
152-
function(unnormalized_log_weights,
153-
pareto_k,
154-
tail_len,
155-
r_eff) {
156-
importance_sampling_object(unnormalized_log_weights = unnormalized_log_weights,
157-
pareto_k = pareto_k,
158-
tail_len = tail_len,
159-
r_eff = r_eff,
160-
method = "psis")
155+
function(unnormalized_log_weights, pareto_k, tail_len, r_eff) {
156+
importance_sampling_object(
157+
unnormalized_log_weights = unnormalized_log_weights,
158+
pareto_k = pareto_k,
159+
tail_len = tail_len,
160+
r_eff = r_eff,
161+
method = "psis"
162+
)
161163
}
162164

163165

164166
#' @noRd
165167
#' @seealso do_importance_sampling
166-
do_psis <- function(log_ratios, r_eff, cores, method){
167-
do_importance_sampling(log_ratios = log_ratios,
168-
r_eff = r_eff,
169-
cores = cores,
170-
method = "psis")
168+
do_psis <- function(log_ratios, r_eff, cores, method) {
169+
do_importance_sampling(
170+
log_ratios = log_ratios,
171+
r_eff = r_eff,
172+
cores = cores,
173+
method = "psis"
174+
)
171175
}
172176

173177
#' Extract named components from each list in the list of lists obtained by
@@ -181,7 +185,9 @@ do_psis <- function(log_ratios, r_eff, cores, method){
181185
#' @return Numeric vector or matrix.
182186
#'
183187
psis_apply <- function(x, item, fun = c("[[", "attr"), fun_val = numeric(1)) {
184-
if (!is.list(x)) stop("Internal error ('x' must be a list for psis_apply)")
188+
if (!is.list(x)) {
189+
stop("Internal error ('x' must be a list for psis_apply)")
190+
}
185191
vapply(x, FUN = match.arg(fun), FUN.VALUE = fun_val, item)
186192
}
187193

@@ -212,7 +218,7 @@ do_psis_i <- function(log_ratios_i, tail_len_i, ...) {
212218
ord <- sort.int(lw_i, index.return = TRUE)
213219
tail_ids <- seq(S - tail_len_i + 1, S)
214220
lw_tail <- ord$x[tail_ids]
215-
if (abs(max(lw_tail) - min(lw_tail)) < .Machine$double.eps/100) {
221+
if (abs(max(lw_tail) - min(lw_tail)) < .Machine$double.eps / 100) {
216222
warning(
217223
"Can't fit generalized Pareto distribution ",
218224
"because all tail values are the same.",
@@ -252,11 +258,11 @@ psis_smooth_tail <- function(x, cutoff) {
252258
k <- fit$k
253259
sigma <- fit$sigma
254260
if (is.finite(k)) {
255-
p <- (seq_len(len) - 0.5) / len
256-
qq <- qgpd(p, k, sigma) + exp_cutoff
257-
tail <- log(qq)
261+
p <- (seq_len(len) - 0.5) / len
262+
qq <- qgpd(p, k, sigma) + exp_cutoff
263+
tail <- log(qq)
258264
} else {
259-
tail <- x
265+
tail <- x
260266
}
261267
list(tail = tail, k = k)
262268
}
@@ -322,7 +328,8 @@ throw_tail_length_warnings <- function(tail_lengths) {
322328
if (length(tail_lengths) == 1) {
323329
warning(
324330
"Not enough tail samples to fit the generalized Pareto distribution.",
325-
call. = FALSE, immediate. = TRUE
331+
call. = FALSE,
332+
immediate. = TRUE
326333
)
327334
} else {
328335
bad <- which(tail_len_bad)
@@ -332,7 +339,11 @@ throw_tail_length_warnings <- function(tail_lengths) {
332339
"in some or all columns of matrix of log importance ratios. ",
333340
"Skipping the following columns: ",
334341
paste(if (Nbad <= 10) bad else bad[1:10], collapse = ", "),
335-
if (Nbad > 10) paste0(", ... [", Nbad - 10, " more not printed].\n") else "\n",
342+
if (Nbad > 10) {
343+
paste0(", ... [", Nbad - 10, " more not printed].\n")
344+
} else {
345+
"\n"
346+
},
336347
call. = FALSE,
337348
immediate. = TRUE
338349
)
@@ -352,17 +363,21 @@ throw_tail_length_warnings <- function(tail_lengths) {
352363
#' * If `r_eff` is `NA` then `rep(1, len)` is returned.
353364
#' * If `r_eff` is a scalar then `rep(r_eff, len)` is returned.
354365
#' * If `r_eff` is not a scalar but the length is not `len` then an error is thrown.
355-
#' * If `r_eff` has length `len` but has `NA`s then an error is thrown.
366+
#' * If `r_eff` has length `len` but has `NA`s then `NA`s are filled in with `1`s.
356367
#'
357368
prepare_psis_r_eff <- function(r_eff, len) {
358369
if (isTRUE(is.null(r_eff) || all(is.na(r_eff)))) {
359370
r_eff <- rep(1, len)
360371
} else if (length(r_eff) == 1) {
361372
r_eff <- rep(r_eff, len)
362373
} else if (length(r_eff) != len) {
363-
stop("'r_eff' must have one value or one value per observation.", call. = FALSE)
374+
stop(
375+
"'r_eff' must have one value or one value per observation.",
376+
call. = FALSE
377+
)
364378
} else if (anyNA(r_eff)) {
365-
stop("Can't mix NA and not NA values in 'r_eff'.", call. = FALSE)
379+
message("Replacing NAs in `r_eff` with 1s")
380+
r_eff[is.na(r_eff)] <- 1
366381
}
367382
r_eff
368383
}
@@ -390,4 +405,3 @@ throw_psis_r_eff_warning <- function() {
390405
call. = FALSE
391406
)
392407
}
393-

README.md

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,27 @@ __loo__ is an R package that allows users to compute efficient approximate
1313
leave-one-out cross-validation for fitted Bayesian models, as well as model
1414
weights that can be used to average predictive distributions.
1515
The __loo__ package package implements the fast and stable computations for
16-
approximate LOO-CV and WAIC from
16+
approximate LOO-CV
1717

1818
* Vehtari, A., Gelman, A., and Gabry, J. (2017). Practical Bayesian model
1919
evaluation using leave-one-out cross-validation and WAIC.
20-
_Statistics and Computing_. 27(5), 1413--1432.
21-
doi:10.1007/s11222-016-9696-4. [Online](https://link.springer.com/article/10.1007/s11222-016-9696-4),
22-
[arXiv preprint arXiv:1507.04544](https://arxiv.org/abs/1507.04544).
20+
_Statistics and Computing_. 27(5): 1413-1432.
21+
[Journal](https://dx.doi.org/10.1007/s11222-016-9696-4),
22+
[arXiv preprint](https://arxiv.org/abs/1507.04544)
23+
24+
* Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. (2024).
25+
Pareto smoothed importance sampling. *Journal of Machine Learning Research*,
26+
25(72): 1-58.
27+
[Journal](https://jmlr.org/papers/v25/19-556.html),
28+
[arXiv preprint](https://arxiv.org/abs/1507.02646)
2329

2430
and computes model weights as described in
2531

2632
* Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018). Using
27-
stacking to average Bayesian predictive distributions. In Bayesian
28-
Analysis, doi:10.1214/17-BA1091.
29-
[Online](https://projecteuclid.org/euclid.ba/1516093227),
30-
[arXiv preprint arXiv:1704.02030](https://arxiv.org/abs/1704.02030).
33+
stacking to average Bayesian predictive distributions. *Bayesian Analysis*
34+
13(3): 917-1007.
35+
[Journal](https://dx.doi.org/10.1214/17-BA1091),
36+
[arXiv preprint](https://arxiv.org/abs/1704.02030)
3137

3238
From existing posterior simulation draws, we compute approximate LOO-CV using
3339
Pareto smoothed importance sampling (PSIS), a new procedure for regularizing

inst/CITATION

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@ bibentry(bibtype = "Article",
2626
header = "To cite the loo paper:"
2727
)
2828

29+
bibentry(bibtype = "Article",
30+
title = "Uncertainty in Bayesian leave-one-out cross-validation based model comparison",
31+
author = c(person("Tuomas", "Sivula"),
32+
person("Måns", "Magnusson"),
33+
person("Asael Alonzo", "Matamoros"),
34+
person("Aki", "Vehtari")),
35+
journal = "Bayesian Analysis",
36+
year = "2025",
37+
note = "accepted for publication",
38+
header = "To cite when using loo_compare():"
39+
)
40+
2941
bibentry(bibtype = "Article",
3042
title = "Using stacking to average Bayesian predictive distributions",
3143
author = c(person("Yuling", "Yao"),
@@ -40,3 +52,71 @@ bibentry(bibtype = "Article",
4052
doi = "10.1214/17-BA1091",
4153
header = "To cite the stacking paper:"
4254
)
55+
56+
bibentry(
57+
title = "Pareto smoothed importance sampling",
58+
bibtype = "Article",
59+
author = c(
60+
person("Aki", "Vehtari"),
61+
person("Daniel", "Simpson"),
62+
person("Andrew", "Gelman"),
63+
person("Yuling", "Yao"),
64+
person("Jonah", "Gabry")
65+
),
66+
journal = "Journal of Machine Learning Research",
67+
year = 2024,
68+
volume = 25,
69+
number = 72,
70+
pages = "1-58",
71+
header = "To cite Pareto-k diagnostics:"
72+
)
73+
74+
bibentry(
75+
bibtype = "Article",
76+
author = c(
77+
person(given = "Topi", family = "Paananen"),
78+
person(given = "Juho", family = "Piironen"),
79+
person(given = "Paul-Christian", family = "Buerkner"),
80+
person(given = "Aki", family = "Vehtari")
81+
),
82+
title = "Implicitly adaptive importance sampling",
83+
journal = "Statistics and Computing",
84+
volume = 31,
85+
pages = "16",
86+
year = 2021,
87+
header = "To cite moment matching:"
88+
)
89+
90+
bibentry(
91+
bibtype = "InProceedings",
92+
author = c(
93+
person(given = "Måns", family = "Magnusson"),
94+
person(given = "Michael Riis", family = "Andersen"),
95+
person(given = "Johan", family = "Jonasson"),
96+
person(given = "Aki", family = "Vehtari")
97+
),
98+
title = "Leave-One-Out Cross-Validation for Large Data",
99+
booktitle = "Thirty-sixth International Conference on Machine Learning",
100+
publisher = "PMLR",
101+
volume = "97",
102+
pages = "4244-4253",
103+
year = 2019,
104+
header = "To cite subsampling loo:"
105+
)
106+
107+
bibentry(
108+
bibtype = "InProceedings",
109+
author = c(
110+
person(given = "Måns", family = "Magnusson"),
111+
person(given = "Michael Riis", family = "Andersen"),
112+
person(given = "Johan", family = "Jonasson"),
113+
person(given = "Aki", family = "Vehtari")
114+
),
115+
title = "Leave-One-Out Cross-Validation for Model Comparison in Large Data",
116+
booktitle = "Proceedings of the 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)",
117+
publisher = "PMLR",
118+
volume = "108",
119+
pages = "341-351",
120+
year = 2019,
121+
header = "To cite subsampling loo:"
122+
)

man-roxygen/loo-and-compare-references.R

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@
1010
#' 25(72):1-58.
1111
#' [PDF](https://jmlr.org/papers/v25/19-556.html)
1212
#'
13-
#' Sivula, T, Magnusson, M., Matamoros A. A., and Vehtari, A. (2022).
13+
#' Sivula, T, Magnusson, M., Matamoros A. A., and Vehtari, A. (2025).
1414
#' Uncertainty in Bayesian leave-one-out cross-validation based model
15-
#' comparison. [preprint arXiv:2008.10296v3.](https://arxiv.org/abs/2008.10296v3).
15+
#' comparison. *Bayesian Analysis*, accepted for publication.
16+
#' [preprint arXiv:2008.10296v5.](https://arxiv.org/abs/2008.10296v5).
1617
#'
17-
#' McLatchie, Y., and Vehtari, A. (2023). Efficient estimation and
18+
#' McLatchie, Y., and Vehtari, A. (2024). Efficient estimation and
1819
#' correction of selection-induced bias with order statistics.
19-
#' [preprint arXiv:2309.03742](https://arxiv.org/abs/2309.03742)
20+
#' *Statistics and Computing*. 34(132).
21+
#' [doi:10.1007/s11222-024-10442-4](https://doi.org/10.1007/s11222-024-10442-4)
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
#' @references Sivula, T, Magnusson, M., Matamoros A. A., and Vehtari,
2-
#' A. (2022). Uncertainty in Bayesian leave-one-out
3-
#' cross-validation based model comparison. [preprint
4-
#' arXiv:2008.10296v3.](https://arxiv.org/abs/2008.10296v3).
1+
#' @references
2+
#' Sivula, T, Magnusson, M., Matamoros A. A., and Vehtari, A. (2025).
3+
#' Uncertainty in Bayesian leave-one-out cross-validation based model
4+
#' comparison. *Bayesian Analysis*, accepted for publication.
5+
#' [preprint arXiv:2008.10296v5.](https://arxiv.org/abs/2008.10296v5).

man/crps.Rd

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/loo-glossary.Rd

Lines changed: 4 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)