diff --git a/NAMESPACE b/NAMESPACE index fb1156e5..a09fffcb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,8 @@ S3method(run_mold,default) S3method(run_mold,default_formula_blueprint) S3method(run_mold,default_recipe_blueprint) S3method(run_mold,default_xy_blueprint) +S3method(snap,numeric) +S3method(snap,quantile_pred) S3method(standardize,array) S3method(standardize,data.frame) S3method(standardize,default) @@ -40,13 +42,19 @@ S3method(standardize,double) S3method(standardize,factor) S3method(standardize,integer) S3method(standardize,matrix) +S3method(vec_arith,quantile_pred) +S3method(vec_arith.numeric,quantile_pred) +S3method(vec_arith.quantile_pred,numeric) S3method(vec_cast,double.hardhat_frequency_weights) S3method(vec_cast,double.hardhat_importance_weights) S3method(vec_cast,hardhat_frequency_weights.hardhat_frequency_weights) S3method(vec_cast,hardhat_importance_weights.hardhat_importance_weights) S3method(vec_cast,integer.hardhat_frequency_weights) +S3method(vec_cast,quantile_pred.quantile_pred) +S3method(vec_math,quantile_pred) S3method(vec_ptype2,hardhat_frequency_weights.hardhat_frequency_weights) S3method(vec_ptype2,hardhat_importance_weights.hardhat_importance_weights) +S3method(vec_ptype2,quantile_pred.quantile_pred) S3method(vec_ptype_abbr,hardhat_frequency_weights) S3method(vec_ptype_abbr,hardhat_importance_weights) S3method(vec_ptype_abbr,quantile_pred) @@ -88,10 +96,12 @@ export(get_data_classes) export(get_levels) export(get_outcome_levels) export(importance_weights) +export(impute_quantiles) export(is_blueprint) export(is_case_weights) export(is_frequency_weights) export(is_importance_weights) +export(is_quantile_pred) export(model_frame) export(model_matrix) export(model_offset) @@ -114,6 +124,7 @@ export(run_forge) export(run_mold) export(scream) export(shrink) +export(snap) export(spruce_class) export(spruce_class_multiple) export(spruce_numeric) diff --git a/R/impute-quantile_pred.R b/R/impute-quantile_pred.R new file mode 100644 index 00000000..da03fbda --- /dev/null +++ b/R/impute-quantile_pred.R @@ -0,0 +1,213 @@ +#' Restrict numeric data to the interval \[lower, upper\] +#' +#' @param x a numeric vector +#' @param lower number, the lower bound +#' @param upper number, the upper bound +#' @param ... unused +#' @export +#' +#' @return An object of the same type as `x` +#' +#' @keywords internal +snap <- function(x, lower, upper, ...) { + UseMethod("snap") +} + +#' @export +snap.numeric <- function(x, lower, upper, ...) { + rlang::check_dots_empty() + check_number_decimal(lower) + check_number_decimal(upper) + + pmin(pmax(x, lower), upper) +} + +#' @export +snap.quantile_pred <- function(x, lower, upper, ...) { + if (vec_size(x) == 0) return(x) + values <- as.matrix(x) + quantile_levels <- extract_quantile_levels(x) + values <- map(vctrs::vec_chop(values), ~ snap(.x, lower, upper)) + quantile_pred(do.call(rbind, values), quantile_levels = quantile_levels) +} + + + + +#' Impute additional quantiles from a `quantile_pred` +#' +#' While a [quantile_pred] describes evaluations for the inverse +#' cummulative distribution function (CDF, sometimes called the "quantile +#' function") at particular quantile levels, this is not enough +#' to fully describe the distribution. For example, +#' ```r +#' p <- c(.1, .5, .9) +#' quantile_pred(matrix(qnorm(p), nrow = 1), p) +#' ``` +#' encapsulates the 10%, 50%, and 90% quantile levels of the standard normal distribution. +#' But, what if we need, say, the 25% and 75% levels? This function imputes +#' them if possible. +#' +#' @details +#' If `probs` is simply a subset of `quantile_levels` that already exist in `x`, +#' then these will be returned (up to numeric error). Small errors are possible +#' due to difficulties matching double vectors. +#' +#' For `probs` that do not exist in `x`, these will be interpolated or +#' extrapolated as needed. The process has 3 steps. +#' +#' First, by default (`middle = "cubic"`), missing _internal_ quantile levels are +#' interpolated using a cubic spline fit to the observed values + quantile levels with +#' [stats::splinefun]. Second, if cubic interpolation fails (or if +#' `middle = "linear"`), linear interpolation is used via [stats::approx]. +#' Finally, missing _external_ quantile levels (those outside the range of +#' `quantile_levels`) are extrapolated. This is done using a linear fit on the +#' logistic scale to the two closest tail values. +#' +#' This procedure results in sorted quantiles that interpolate/extrapolate +#' smoothly, while also enforcing heavy tails beyond the range. +#' +#' Optionally, the resulting quantiles can be constrained to a compact interval +#' using `lower` and/or `upper`. This is done after extrapolation, so it may +#' result in multiple quantile levels having the same value (a CDF with a spike). +#' +#' +#' @param x an object of class `quantile_pred` +#' @param probs vector. probabilities at which to evaluate the inverse CDF +#' @param lower number. lower bound for the resulting values +#' @param upper number. upper bound for the resulting values +#' @param middle character. +#' +#' @returns A matrix with `length(probs)` columns and `length(x)` rows. Each +#' row contains the inverse CDF (quantile function) given by `x`, +#' extrapolated/interpolated to `probs`. +#' @export +#' +#' @examples +#' p <- c(.1, .5, .9) +#' qp <- quantile_pred(matrix(c(qnorm(p), qexp(p)), nrow = 2, byrow = TRUE), p) +#' impute_quantiles(qp, p) +#' as.matrix(qp) # same as the imputation +#' +#' p1 <- c(.05, .25, .75, .95) +#' impute_quantiles(qp, p1) +#' rbind(qnorm(p1), qexp(p1)) # exact values, for comparison +impute_quantiles <- function( + x, + probs = seq(0, 1, 0.25), + lower = -Inf, + upper = Inf, + middle = c("cubic", "linear") +) { + if (!is_quantile_pred(x)) { + cli::cli_abort( + "{.arg x} must be a {.cls quantile_pred} object, not + {.obj_type_friendly {x}}." + ) + } + if (length(extract_quantile_levels(x)) < 2) { + cli::cli_abort( + "Quantile interpolation is not possible when fewer than 2 quantiles + are avaliable." + ) + } + if (is.unsorted(probs)) probs <- sort(probs) + check_quantile_level_values(probs, "probs", call = caller_env()) + check_number_decimal(lower) + check_number_decimal(upper) + if (lower > upper) { + cli::cli_abort("`lower` must be less than `upper`.") + } + middle <- rlang::arg_match(middle) + snap(impute_quantile_internal(x, probs, middle), lower, upper) +} + +impute_quantile_internal <- function(x, tau_out, middle) { + tau <- extract_quantile_levels(x) + qvals <- as.matrix(x) + if (all(tau_out %in% tau) && !anyNA(qvals)) { + return(qvals[, match(tau_out, tau), drop = FALSE]) + } + qvals_out <- map( + vctrs::vec_chop(qvals), + ~ impute_quantiles_single(.x, tau, tau_out, middle) + ) + qvals_out <- do.call(rbind, qvals_out) + qvals_out +} + +impute_quantiles_single <- function(qvals, tau, tau_out, middle) { + qvals_out <- rep(NA, length(tau_out)) + good <- !is.na(qvals) + if (!any(good)) { + return(qvals_out) + } + qvals <- qvals[good] + tau <- tau[good] + + # in case we only have one point, and it matches something we wanted + if (length(good) < 2) { + matched_one <- tau_out %in% tau + qvals_out[matched_one] <- qvals[matched_one] + return(qvals_out) + } + + indl <- tau_out < min(tau) + indr <- tau_out > max(tau) + indm <- !indl & !indr + + if (middle == "cubic") { + method <- "cubic" + result <- tryCatch( + { + Q <- stats::splinefun(tau, qvals, method = "hyman") + quartiles <- Q(c(.25, .5, .75)) + }, + error = function(e) { + return(NA) + } + ) + } + if (middle == "linear" || any(is.na(result))) { + method <- "linear" + quartiles <- stats::approx(tau, qvals, c(.25, .5, .75))$y + } + if (any(indm)) { + qvals_out[indm] <- switch( + method, + linear = stats::approx(tau, qvals, tau_out[indm])$y, + cubic = Q(tau_out[indm]) + ) + } + if (any(indl) || any(indr)) { + qv <- data.frame( + q = c(tau, tau_out[indm]), + v = c(qvals, qvals_out[indm]) + ) + qv <- qv[vctrs::vec_unique_loc(qv$q), ] + qv <- qv[vctrs::vec_order(qv$q), ] + } + if (any(indl)) { + qvals_out[indl] <- tail_extrapolate(tau_out[indl], utils::head(qv, 2)) + } + if (any(indr)) { + qvals_out[indr] <- tail_extrapolate(tau_out[indr], utils::tail(qv, 2)) + } + qvals_out +} + +logit <- function(p) { + p <- pmax(pmin(p, 1), 0) + log(p) - log(1 - p) +} + +# extrapolates linearly on the logistic scale using +# the two points nearest the tail +tail_extrapolate <- function(tau_out, qv) { + if (nrow(qv) == 1L) return(rep(qv$v[1], length(tau_out))) + x <- logit(qv$q) + x0 <- logit(tau_out) + y <- qv$v + m <- diff(y) / diff(x) + m * (x0 - x[1]) + y[1] +} diff --git a/R/quantile-pred.R b/R/quantile-pred.R index 4fc4589b..e6c0c6a8 100644 --- a/R/quantile-pred.R +++ b/R/quantile-pred.R @@ -21,6 +21,7 @@ #' `".quantile_levels"`, and `".row"`. #' * `as.matrix()` returns an unnamed matrix with rows as samples, columns as #' quantile levels, and entries are predictions. +#' * `is_quantile_pred()` tests for the "quantile_pred" class #' @examples #' .pred_quantile <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) #' @@ -42,7 +43,7 @@ quantile_pred <- function(values, quantile_levels = double()) { rownames(values) <- NULL colnames(values) <- NULL - values <- lapply(vctrs::vec_chop(values), drop) + values <- map(vctrs::vec_chop(values), drop) new_quantile_pred(values, quantile_levels) } @@ -55,10 +56,18 @@ new_quantile_pred <- function(values = list(), quantile_levels = double()) { ) } + + +#' @export +#' @rdname quantile_pred +is_quantile_pred <- function(x) { + inherits(x, "quantile_pred") +} + #' @export #' @rdname quantile_pred extract_quantile_levels <- function(x) { - if (!inherits(x, "quantile_pred")) { + if (!is_quantile_pred(x)) { cli::cli_abort( "{.arg x} must be a {.cls quantile_pred} object, not {.obj_type_friendly {x}}." @@ -208,3 +217,74 @@ check_quantile_level_values <- function(levels, arg, call) { } invisible(TRUE) } + + +# vctrs behaviours -------------------------------------------------------- + +#' @export +#' @keywords internal +vec_ptype2.quantile_pred.quantile_pred <- function( + x, y, ..., x_arg = "", y_arg = "", call = caller_env() +) { + if (all(extract_quantile_levels(y) %in% extract_quantile_levels(x))) { + return(x) + } + if (all(extract_quantile_levels(x) %in% extract_quantile_levels(y))) { + return(y) + } + stop_incompatible_type( + x, y, x_arg = x_arg, y_arg = y_arg, + details = "`quantile_levels` must be compatible (a superset/subset relation)." + ) +} + +#' @export +vec_cast.quantile_pred.quantile_pred <- function(x, to, ..., x_arg = "", to_arg = "") { + x_lvls <- extract_quantile_levels(x) + to_lvls <- extract_quantile_levels(to) + x_in_to <- x_lvls %in% to_lvls + to_in_x <- to_lvls %in% x_lvls + + old_qdata <- as.matrix(x)[, x_in_to] + new_qdata <- matrix(NA, nrow = vec_size(x), ncol = length(to_lvls)) + new_qdata[, to_in_x] <- old_qdata + quantile_pred(new_qdata, quantile_levels = to_lvls) +} + + +#' @export +#' @method vec_math quantile_pred +vec_math.quantile_pred <- function(.fn, .x, ...) { + fn <- .fn + .fn <- getExportedValue("base", .fn) + if (fn %in% c("any", "all", "prod", "sum", "cumsum", "cummax", "cummin", "cumprod")) { + cli::cli_abort("{.fn {fn}} is not a supported operation for {.cls quantile_pred}.") + } + quantile_levels <- .x %@% "quantile_levels" + .x <- as.matrix(.x) + quantile_pred(.fn(.x), quantile_levels) +} + +#' @export +#' @method vec_arith quantile_pred +vec_arith.quantile_pred <- function(op, x, y, ...) { + UseMethod("vec_arith.quantile_pred", y) +} + +#' @export +#' @method vec_arith.quantile_pred numeric +vec_arith.quantile_pred.numeric <- function(op, x, y, ...) { + op_fn <- getExportedValue("base", op) + l <- vctrs::vec_recycle_common(x = x, y = y) + out <- op_fn(as.matrix(l$x), l$y) + quantile_pred(out, x %@% "quantile_levels") +} + +#' @export +#' @method vec_arith.numeric quantile_pred +vec_arith.numeric.quantile_pred <- function(op, x, y, ...) { + op_fn <- getExportedValue("base", op) + l <- vctrs::vec_recycle_common(x = x, y = y) + out <- op_fn(l$x, as.matrix(l$y)) + quantile_pred(out, y %@% "quantile_levels") +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 6894b4c5..289a0d9e 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -37,6 +37,7 @@ reference: - add_intercept_column - weighted_table - fct_encode_one_hot + - impute_quantiles - title: Validation contents: diff --git a/man/impute_quantiles.Rd b/man/impute_quantiles.Rd new file mode 100644 index 00000000..35096b67 --- /dev/null +++ b/man/impute_quantiles.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/impute-quantile_pred.R +\name{impute_quantiles} +\alias{impute_quantiles} +\title{Impute additional quantiles from a \code{quantile_pred}} +\usage{ +impute_quantiles( + x, + probs = seq(0, 1, 0.25), + lower = -Inf, + upper = Inf, + middle = c("cubic", "linear") +) +} +\arguments{ +\item{x}{an object of class \code{quantile_pred}} + +\item{probs}{vector. probabilities at which to evaluate the inverse CDF} + +\item{lower}{number. lower bound for the resulting values} + +\item{upper}{number. upper bound for the resulting values} + +\item{middle}{character.} +} +\value{ +A matrix with \code{length(probs)} columns and \code{length(x)} rows. Each +row contains the inverse CDF (quantile function) given by \code{x}, +extrapolated/interpolated to \code{probs}. +} +\description{ +While a \link{quantile_pred} describes evaluations for the inverse +cummulative distribution function (CDF, sometimes called the "quantile +function") at particular quantile levels, this is not enough +to fully describe the distribution. For example, + +\if{html}{\out{
}}\preformatted{p <- c(.1, .5, .9) +quantile_pred(matrix(qnorm(p), nrow = 1), p) +}\if{html}{\out{
}} + +encapsulates the 10\%, 50\%, and 90\% quantile levels of the standard normal distribution. +But, what if we need, say, the 25\% and 75\% levels? This function imputes +them if possible. +} +\details{ +If \code{probs} is simply a subset of \code{quantile_levels} that already exist in \code{x}, +then these will be returned (up to numeric error). Small errors are possible +due to difficulties matching double vectors. + +For \code{probs} that do not exist in \code{x}, these will be interpolated or +extrapolated as needed. The process has 3 steps. + +First, by default (\code{middle = "cubic"}), missing \emph{internal} quantile levels are +interpolated using a cubic spline fit to the observed values + quantile levels with +\link[stats:splinefun]{stats::splinefun}. Second, if cubic interpolation fails (or if +\code{middle = "linear"}), linear interpolation is used via \link[stats:approxfun]{stats::approx}. +Finally, missing \emph{external} quantile levels (those outside the range of +\code{quantile_levels}) are extrapolated. This is done using a linear fit on the +logistic scale to the two closest tail values. + +This procedure results in sorted quantiles that interpolate/extrapolate +smoothly, while also enforcing heavy tails beyond the range. + +Optionally, the resulting quantiles can be constrained to a compact interval +using \code{lower} and/or \code{upper}. This is done after extrapolation, so it may +result in multiple quantile levels having the same value (a CDF with a spike). +} +\examples{ +p <- c(.1, .5, .9) +qp <- quantile_pred(matrix(c(qnorm(p), qexp(p)), nrow = 2, byrow = TRUE), p) +impute_quantiles(qp, p) +as.matrix(qp) # same as the imputation + +p1 <- c(.05, .25, .75, .95) +impute_quantiles(qp, p1) +rbind(qnorm(p1), qexp(p1)) # exact values, for comparison +} diff --git a/man/quantile_pred.Rd b/man/quantile_pred.Rd index f9ac7cf3..28fa8139 100644 --- a/man/quantile_pred.Rd +++ b/man/quantile_pred.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/quantile-pred.R \name{quantile_pred} \alias{quantile_pred} +\alias{is_quantile_pred} \alias{extract_quantile_levels} \alias{as_tibble.quantile_pred} \alias{as.matrix.quantile_pred} @@ -9,6 +10,8 @@ \usage{ quantile_pred(values, quantile_levels = double()) +is_quantile_pred(x) + extract_quantile_levels(x) \method{as_tibble}{quantile_pred}(x, ..., .rows = NULL, .name_repair = "minimal", rownames = NULL) @@ -37,6 +40,7 @@ quantile levels. \code{".quantile_levels"}, and \code{".row"}. \item \code{as.matrix()} returns an unnamed matrix with rows as samples, columns as quantile levels, and entries are predictions. +\item \code{is_quantile_pred()} tests for the "quantile_pred" class } } \description{ diff --git a/man/snap.Rd b/man/snap.Rd new file mode 100644 index 00000000..f884a2ea --- /dev/null +++ b/man/snap.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/impute-quantile_pred.R +\name{snap} +\alias{snap} +\title{Restrict numeric data to the interval [lower, upper]} +\usage{ +snap(x, lower, upper, ...) +} +\arguments{ +\item{x}{a numeric vector} + +\item{lower}{number, the lower bound} + +\item{upper}{number, the upper bound} + +\item{...}{unused} +} +\value{ +An object of the same type as \code{x} +} +\description{ +Restrict numeric data to the interval [lower, upper] +} +\keyword{internal} diff --git a/tests/testthat/_snaps/impute_quantile_pred.md b/tests/testthat/_snaps/impute_quantile_pred.md new file mode 100644 index 00000000..8471b52e --- /dev/null +++ b/tests/testthat/_snaps/impute_quantile_pred.md @@ -0,0 +1,64 @@ +# impute_quantiles failure modes + + Code + impute_quantiles(x) + Condition + Error in `impute_quantiles()`: + ! `x` must be a object, not a double vector. + +--- + + Code + impute_quantiles(x, c(0.1, 0.5, 0.9)) + Condition + Error in `impute_quantiles()`: + ! Quantile interpolation is not possible when fewer than 2 quantiles are avaliable. + +--- + + Code + impute_quantiles(x, probs = c(-1, 0.2, 2)) + Condition + Error: + ! `probs` must be a number between 0 and 1, not the number -1. + +--- + + Code + impute_quantiles(x, lower = "a") + Condition + Error in `impute_quantiles()`: + ! `lower` must be a number, not the string "a". + +--- + + Code + impute_quantiles(x, upper = "b") + Condition + Error in `impute_quantiles()`: + ! `upper` must be a number, not the string "b". + +--- + + Code + impute_quantiles(x, lower = NULL) + Condition + Error in `impute_quantiles()`: + ! `lower` must be a number, not `NULL`. + +--- + + Code + impute_quantiles(x, lower = 2, upper = -1) + Condition + Error in `impute_quantiles()`: + ! `lower` must be less than `upper`. + +--- + + Code + impute_quantiles(x, middle = "middle") + Condition + Error in `impute_quantiles()`: + ! `middle` must be one of "cubic" or "linear", not "middle". + diff --git a/tests/testthat/_snaps/quantile-pred.md b/tests/testthat/_snaps/quantile-pred.md index cb2b45e7..a02d6957 100644 --- a/tests/testthat/_snaps/quantile-pred.md +++ b/tests/testthat/_snaps/quantile-pred.md @@ -168,3 +168,20 @@ Output [1] "[1.7154]" "[0.56784]" "[1.2393]" "[2.2062]" "[0.76714]" +# arithmetic works on quantiles + + Code + sum(dstn) + Condition + Error in `vec_math()`: + ! `sum()` is not a supported operation for . + +# vec_ptype works + + Code + vec_ptype2(v1, ugly_levels) + Condition + Error in `vec_ptype2.quantile_pred.quantile_pred()`: + ! Can't combine `v1` and `ugly_levels` . + `quantile_levels` must be compatible (a superset/subset relation). + diff --git a/tests/testthat/test-impute_quantile_pred.R b/tests/testthat/test-impute_quantile_pred.R new file mode 100644 index 00000000..9e932fc0 --- /dev/null +++ b/tests/testthat/test-impute_quantile_pred.R @@ -0,0 +1,86 @@ +test_that("snap.numeric clamps values correctly", { + x <- c(-5, 0, 5, 10, 15) + expect_equal(snap(x, lower = 0, upper = 10), c(0, 0, 5, 10, 10)) + + x <- c(2, 4, 6) + expect_equal(snap(x, lower = 0, upper = 10), x) + + x <- numeric(0) + expect_equal(snap(x, lower = 0, upper = 10), numeric(0)) +}) + +test_that("snap.quantile_pred clamps values correctly", { + x <- quantile_pred(matrix(c(-5, 0, 5, 10, 15), nrow = 1), 1:5 / 6) + s <- snap(x, lower = 0, upper = 10) + expect_s3_class(s, "quantile_pred") + expect_equal(extract_quantile_levels(s), 1:5 / 6) + expect_equal( + drop(as.matrix(snap(x, lower = 0, upper = 10))), + c(0, 0, 5, 10, 10) + ) + + x <- quantile_pred(matrix(c(2, 4, 6), nrow = 1), 1:3 / 4) + s <- snap(x, lower = 0, upper = 10) + expect_s3_class(s, "quantile_pred") + expect_equal(extract_quantile_levels(s), 1:3 / 4) + expect_equal( + drop(as.matrix(snap(x, lower = 0, upper = 10))), + drop(as.matrix(x)) + ) + + x <- new_quantile_pred() + expect_equal(snap(x, lower = 0, upper = 10), x) +}) + +test_that("impute_quantiles failure modes", { + x <- double(7) + expect_snapshot(error = TRUE, impute_quantiles(x)) + + probs <- c(0.5) + x <- quantile_pred(matrix(0, nrow = 1), probs) + expect_snapshot(error = TRUE, impute_quantiles(x, c(0.1, 0.5, 0.9))) + + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + expect_snapshot(error = TRUE, impute_quantiles(x, probs = c(-1, .2, 2))) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = "a")) + expect_snapshot(error = TRUE, impute_quantiles(x, upper = "b")) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = NULL)) + expect_snapshot(error = TRUE, impute_quantiles(x, lower = 2, upper = -1)) + expect_snapshot(error = TRUE, impute_quantiles(x, middle = "middle")) +}) + +test_that("impute_quantiles returns existing quantiles when matched", { + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, probs) + expect_equal(out, as.matrix(x)) +}) + +test_that("impute_quantiles interpolates correctly", { + probs <- c(0.1, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.1, 0.5, 0.9), middle = "linear") + expect_equal(out[1], qnorm(0.1)) + expect_equal(out[3], qnorm(0.9)) + expect_equal(out[2], 0) +}) + +test_that("impute_quantiles extrapolates correctly", { + probs <- c(0.2, 0.8) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.01, 0.2, 0.8, 0.99)) + expect_equal(out[2], qnorm(0.2)) + expect_equal(out[3], qnorm(0.8)) + tail1 <- tail_extrapolate(.01, tibble(q = c(.2, .8), v = qnorm(q))) + tail2 <- tail_extrapolate(.99, tibble(q = c(.8, .2), v = qnorm(q))) + expect_equal(out[1], tail1) + expect_equal(out[4], tail2) +}) + +test_that("impute_quantiles applies lower/upper bounds", { + probs <- c(0.1, 0.5, 0.9) + x <- quantile_pred(matrix(qnorm(probs), nrow = 1), probs) + out <- impute_quantiles(x, c(0.01, 0.5, 0.99), lower = -1, upper = 1) + expect_true(all(out >= -1 & out <= 1)) +}) diff --git a/tests/testthat/test-quantile-pred.R b/tests/testthat/test-quantile-pred.R index b649fca1..2f94d123 100644 --- a/tests/testthat/test-quantile-pred.R +++ b/tests/testthat/test-quantile-pred.R @@ -55,6 +55,23 @@ test_that("extract_quantile_levels", { ) }) +test_that("median for quantile_pred", { + v <- quantile_pred(matrix(1:25, 5), 3:7 / 10) + expect_identical(median(v), as.double(11:15)) # has explicit median, but dbl + + v_above_med <- quantile_pred(matrix(1:10, 2), 11:15 / 20) + expect_equal(median(v_above_med), rep(NA, 2)) + + v_below_med <- quantile_pred(matrix(1:10, 2), 5:9 / 20) + expect_equal(median(v_above_med), rep(NA, 2)) + + v4 <- quantile_pred(matrix(1:10, ncol = 1), 0.4) + expect_equal(median(v4), rep(NA, 10)) + + v5 <- quantile_pred(matrix(1:10, ncol = 1), 0.5) + expect_equal(median(v5), as.double(1:10)) +}) + test_that("quantile_pred formatting", { # multiple quantiles v <- quantile_pred(matrix(1:20, 5), 1:4 / 5) @@ -98,3 +115,62 @@ test_that("as.matrix() for quantile_pred", { expect_true(is.matrix(m)) expect_identical(m, x) }) + + + +test_that("unary math works on quantiles", { + dstn <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE), + 1:4 / 5 + ) + dstn2 <- quantile_pred( + log(matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE)), + 1:4 / 5 + ) + expect_identical(log(dstn), dstn2) +}) + +test_that("arithmetic works on quantiles", { + dstn <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE), + 1:4 / 5 + ) + dstn2 <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) + 1, + 1:4 / 5 + ) + expect_identical(dstn + 1, dstn2) + expect_identical(1 + dstn, dstn2) + + dstn2 <- quantile_pred( + matrix(c(1:4, 8:11), nrow = 2, byrow = TRUE) / 4, + 1:4 / 5 + ) + expect_identical(dstn / 4, dstn2) + expect_identical((1 / 4) * dstn, dstn2) + + expect_snapshot(error = TRUE, sum(dstn)) +}) + +test_that("vec_ptype works", { + v1 <- quantile_pred(matrix(1:20, 5), 1:4 / 5) + v2 <- quantile_pred(matrix(1:15, 5), 2:4 / 5) + expect_identical(vec_ptype2(v1, v1), vec_ptype(v1)) + expect_identical(vec_ptype2(v1, v2), vec_ptype(v1)) + expect_identical(vec_ptype2(v2, v1), vec_ptype(v1)) + + ugly_levels <- quantile_pred(matrix(1:20, 5), 1:4 / 5 + .1) + expect_snapshot(error = TRUE, vec_ptype2(v1, ugly_levels)) +}) + +test_that("vec_cast self-self works", { + qp <- quantile_pred(matrix(rnorm(20), 5), c(.2, .4, .6, .8)) + qp2 <- quantile_pred(matrix(rnorm(7), nrow = 1), 2:8 / 10) + expect_identical(vec_cast(qp, qp), qp) + expect_identical(vec_cast(qp2, qp2), qp2) + + qp_dat <- as.matrix(qp) + qp_big <- matrix(NA, nrow(qp_dat), length(2:8)) + qp_big[, c(1, 3, 5, 7)] <- qp_dat + expect_identical(vec_cast(qp, qp2), quantile_pred(qp_big, 2:8 / 10)) +})