From 6390aea8bf6ace7f277b047a3e98c6a8118d72ed Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 1 Mar 2021 08:35:18 -0700 Subject: [PATCH 01/14] Implement pivot_longer() --- R/step-call-pivot_longer.R | 389 ++++++++++++++++++++++++++++++++ R/tidyeval.R | 10 +- R/zzz.R | 2 + man/pivot_longer.dtplyr_step.Rd | 163 +++++++++++++ 4 files changed, 561 insertions(+), 3 deletions(-) create mode 100644 R/step-call-pivot_longer.R create mode 100644 man/pivot_longer.dtplyr_step.Rd diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R new file mode 100644 index 000000000..1fbfb8a1e --- /dev/null +++ b/R/step-call-pivot_longer.R @@ -0,0 +1,389 @@ +#' Pivot data from wide to long +#' +#' @description +#' This is a method for the tidyr `pivot_longer()` generic. It is translated to +#' [data.table::melt()] +#' +#' @param data A [lazy_dt()]. +#' @inheritParams tidyr::pivot_longer +#' @examples +#' library(tidyr) +#' +#' # Simplest case where column names are character data +#' relig_income_dt <- lazy_dt(relig_income) +#' relig_income_dt %>% +#' pivot_longer(!religion, names_to = "income", values_to = "count") +#' +#' # Slightly more complex case where columns have common prefix, +#' # and missing missings are structural so should be dropped. +#' billboard_dt <- lazy_dt(billboard) +#' billboard %>% +#' pivot_longer( +#' cols = starts_with("wk"), +#' names_to = "week", +#' names_prefix = "wk", +#' values_to = "rank", +#' values_drop_na = TRUE +#' ) +#' +#' # Multiple variables stored in column names +#' lazy_dt(who) %>% +#' pivot_longer( +#' cols = new_sp_m014:newrel_f65, +#' names_to = c("diagnosis", "gender", "age"), +#' names_pattern = "new_?(.*)_(.)(.*)", +#' values_to = "count" +#' ) +#' +#' # Multiple observations per row +#' anscombe_dt <- lazy_dt(anscombe) +#' anscombe_dt %>% +#' pivot_longer( +#' everything(), +#' names_to = c(".value", "set"), +#' names_pattern = "(.)(.)" +#' ) +# exported onLoad +pivot_longer.dtplyr_step <- function(data, + cols, + names_to = "name", + names_prefix = NULL, + names_sep = NULL, + names_pattern = NULL, + names_ptypes = list(), + names_transform = list(), + names_repair = "check_unique", + values_to = "value", + values_drop_na = FALSE, + values_ptypes = list(), + values_transform = list(), + ...) { + + sim_data <- simulate_vars(data) + sim_vars <- names(sim_data) + measure_vars <- names(tidyselect::eval_select(enquo(cols), sim_data)) + if (length(measure_vars) == 0) { + abort("`cols` must select at least one column.") + } + + multiple_names_to <- length(names_to) > 1 + uses_dot_value <- ".value" %in% names_to + + variable_name <- "variable" + + if (uses_dot_value) { + if (!is.null(names_sep)) { + .value <- str_separate(measure_vars, into = names_to, sep = names_sep)$.value + } else if (!is.null(names_pattern)) { + .value <- str_extract(measure_vars, into = names_to, names_pattern)$.value + } else { + abort("If you use '.value' in `names_to` you must also supply + `names_sep' or `names_pattern") + } + + v_fct <- factor(.value, levels = unique(.value)) + measure_vars <- split(measure_vars, v_fct) + values_to <- names(measure_vars) + names(measure_vars) <- NULL + + if (multiple_names_to) { + variable_name <- names_to[!names_to == ".value"] + } + } else if (multiple_names_to) { + if (is.null(names_sep) && is.null(names_pattern)) { + abort("If you supply multiple names in `names_to` you must also + supply `names_sep` or `names_pattern`") + } else if (!is.null(names_sep) && !is.null(names_pattern)) { + abort("only one of names_sep or names_pattern should be provided") + } + } else { + variable_name <- names_to + } + + args <- list( + measure.vars = measure_vars, + variable.name = variable_name, + value.name = values_to, + na.rm = values_drop_na, + variable.factor = FALSE + ) + + # Clean up call args if defaults are used + if (variable_name == "variable") { + args$variable.name <- NULL + } + + if (length(values_to) == 1) { + if (values_to == "value") { + args$value.name <- NULL + } + } + + if (isFALSE(values_drop_na)) { + args$na.rm <- NULL + } + + id_vars <- sim_vars[!sim_vars %in% unlist(measure_vars)] + + out <- step_call( + data, + "melt", + args = args, + vars = c(id_vars, variable_name, values_to) + ) + + if (!is.null(names_prefix)) { + out <- mutate(out, !!variable_name := gsub(paste0("^", names_prefix), "", !!sym(variable_name))) + } + + if (multiple_names_to && !uses_dot_value) { + if (!is.null(names_sep)) { + into_cols <- str_separate(pull(out, !!sym(variable_name)), names_to, sep = names_sep) + } else { + into_cols <- str_extract(pull(out, !!sym(variable_name)), into = names_to, regex = names_pattern) + } + out <- mutate(out, !!!into_cols) + + # Need to drop variable_name and move names_to vars to correct position + # Recreates relocate logic so only select is necessary, not relocate + select + sim_vars <- names(simulate_vars(out)) + var_idx <- which(sim_vars == variable_name) + before_vars <- sim_vars[seq_along(sim_vars) < var_idx] + after_vars <- sim_vars[seq_along(sim_vars) > var_idx] + + out <- select(out, !!!syms(before_vars), !!!syms(names_to), !!!syms(after_vars)) + } else if (!multiple_names_to && uses_dot_value) { + out <- mutate(out, variable = NULL) + } + + out <- step_repair(out, repair = names_repair) + + ## names_ptype & names_transform + cast_vars <- intersect(names_to, names(names_ptypes)) + if (length(cast_vars) > 0) { + cast_calls <- vector("list", length(cast_vars)) + names(cast_calls) <- cast_vars + for (i in seq_along(cast_calls)) { + cast_calls[[i]] <- call2("vec_cast", sym(cast_vars[[i]]), names_ptypes[[i]]) + } + out <- mutate(out, !!!cast_calls) + } + + coerce_vars <- intersect(names_to, names(names_transform)) + if (length(coerce_vars) > 0) { + coerce_calls <- vector("list", length(coerce_vars)) + names(coerce_calls) <- coerce_vars + for (i in seq_along(coerce_calls)) { + .fn <- as_function(names_transform[[i]]) + coerce_calls[[i]] <- call2(.fn, sym(coerce_vars[[i]])) + } + out <- mutate(out, !!!coerce_calls) + } + + ## values_ptype & values_transform + cast_vars <- intersect(values_to, names(values_ptypes)) + if (length(cast_vars) > 0) { + cast_calls <- vector("list", length(cast_vars)) + names(cast_calls) <- cast_vars + for (i in seq_along(cast_calls)) { + cast_calls[[i]] <- call2(expr(vec_cast), sym(cast_vars[[i]]), values_ptypes[[i]]) + } + out <- mutate(out, !!!cast_calls) + } + + # transform vars + coerce_vars <- intersect(values_to, names(values_transform)) + if (length(coerce_vars) > 0) { + coerce_calls <- vector("list", length(coerce_vars)) + names(coerce_calls) <- coerce_vars + for (i in seq_along(coerce_calls)) { + .fn <- as_function(values_transform[[i]]) + coerce_calls[[i]] <- call2(.fn, sym(coerce_vars[[i]])) + } + out <- mutate(out, !!!coerce_calls) + } + + out +} + +# exported onLoad +pivot_longer.data.table <- function(data, + cols, + names_to = "name", + names_prefix = NULL, + names_sep = NULL, + names_pattern = NULL, + names_ptypes = list(), + names_transform = list(), + names_repair = "check_unique", + values_to = "value", + values_drop_na = FALSE, + values_ptypes = list(), + values_transform = list(), + ...) { + data <- lazy_dt(data) + tidyr::pivot_longer( + data = data, + cols = {{ cols }}, + names_to = names_to, + names_prefix = names_prefix, + names_sep = names_sep, + names_pattern = names_pattern, + names_ptypes = names_ptypes, + names_transform = names_transform, + names_repair = names_repair, + values_to = values_to, + values_drop_na = values_drop_na, + values_ptypes = values_ptypes, + values_transform = values_transform, + ... + ) +} + +# str_extract() ----------------------------------------------------------------- +str_extract <- function(x, into, regex, convert = FALSE) { + stopifnot( + is_string(regex), + is_character(into) + ) + + out <- str_match_first(x, regex) + if (length(out) != length(into)) { + stop( + "`regex` should define ", length(into), " groups; ", ncol(out), " found.", + call. = FALSE + ) + } + + # Handle duplicated names + if (anyDuplicated(into)) { + pieces <- split(out, into) + into <- names(pieces) + out <- lapply(pieces, pmap_chr, paste0, sep = "") + } + + into <- as_utf8_character(into) + + non_na_into <- !is.na(into) + out <- out[non_na_into] + names(out) <- into[non_na_into] + + if (convert) { + out[] <- lapply(out, utils::type.convert, as.is = TRUE) + } + + out +} + +str_match_first <- function(string, regex) { + loc <- regexpr(regex, string, perl = TRUE) + loc <- group_loc(loc) + + out <- lapply( + seq_len(loc$matches), + function(i) substr(string, loc$start[, i], loc$end[, i]) + ) + out[-1] +} + +group_loc <- function(x) { + start <- cbind(as.vector(x), attr(x, "capture.start")) + end <- start + cbind(attr(x, "match.length"), attr(x, "capture.length")) - 1L + + no_match <- start == -1L + start[no_match] <- NA + end[no_match] <- NA + + list(matches = ncol(start), start = start, end = end) +} + +# str_separate() ----------------------------------------------------------------- + +str_separate <- function(x, into, sep, convert = FALSE, extra = "warn", fill = "warn") { + if (!is.character(into)) { + abort("`into` must be a character vector") + } + + if (is.numeric(sep)) { + out <- strsep(x, sep) + } else if (is_character(sep)) { + out <- data.table::tstrsplit(x, sep, fixed = TRUE, names = TRUE) + out <- as_tibble(out) + } else { + abort("`sep` must be either numeric or character") + } + + names(out) <- as_utf8_character(into) + out <- out[!is.na(names(out))] + if (convert) { + out[] <- lapply(out, utils::type.convert, as.is = TRUE) + } + out +} + +strsep <- function(x, sep) { + nchar <- nchar(x) + pos <- lapply(sep, function(i) { + if (i >= 0) return(i) + pmax(0, nchar + i) + }) + pos <- c(list(0), pos, list(nchar)) + + lapply(1:(length(pos) - 1), function(i) { + substr(x, pos[[i]] + 1, pos[[i + 1]]) + }) +} + +str_split_n <- function(x, pattern, n_max = -1) { + if (is.factor(x)) { + x <- as.character(x) + } + m <- gregexpr(pattern, x, perl = TRUE) + if (n_max > 0) { + m <- lapply(m, function(x) slice_match(x, seq_along(x) < n_max)) + } + regmatches(x, m, invert = TRUE) +} + +slice_match <- function(x, i) { + structure( + x[i], + match.length = attr(x, "match.length")[i], + index.type = attr(x, "index.type"), + useBytes = attr(x, "useBytes") + ) +} + +list_indices <- function(x, max = 20) { + if (length(x) > max) { + x <- c(x[seq_len(max)], "...") + } + + paste(x, collapse = ", ") +} + +# str_separate() ----------------------------------------------------------------- + +args_recycle <- function(args) { + lengths <- vapply(args, length, integer(1)) + n <- max(lengths) + + stopifnot(all(lengths == 1L | lengths == n)) + to_recycle <- lengths == 1L + args[to_recycle] <- lapply(args[to_recycle], function(x) rep.int(x, n)) + + args +} + +pmap <- function(.l, .f, ...) { + args <- args_recycle(.l) + do.call("mapply", c( + FUN = list(quote(.f)), + args, MoreArgs = quote(list(...)), + SIMPLIFY = FALSE, USE.NAMES = FALSE + )) +} + +pmap_chr <- function(.l, .f, ...) { + as.character(pmap(.l, .f, ...)) +} diff --git a/R/tidyeval.R b/R/tidyeval.R index a796765ae..2781448ab 100644 --- a/R/tidyeval.R +++ b/R/tidyeval.R @@ -14,14 +14,18 @@ dt_eval <- function(x) { # Make sure data.table functions are available so dtplyr still works # even when data.table isn't attached dt_funs <- c( - "copy", "dcast", "nafill", + "copy", "dcast", "melt", "nafill", "fcase", "fcoalesce", "fintersect", "frank", "frankv", "fsetdiff", "funion", "setcolorder", "setnames" ) add_dt_wrappers <- function(env) { - env_bind(env, !!!env_get_list(ns_env("data.table"), dt_funs)) + env_bind( + env, + !!!env_get_list(ns_env("data.table"), dt_funs), + vec_cast = vctrs::vec_cast + ) } -globalVariables(dt_funs) +globalVariables(c(dt_funs, "vec_cast")) # These functions attempt to simulate tidy eval as much as possible within # data.table. The goal is to get the majority of real-world code to work, diff --git a/R/zzz.R b/R/zzz.R index c43e2172f..0537891b0 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -6,6 +6,7 @@ register_s3_method("dplyr", "union", "data.table") register_s3_method("tidyr", "drop_na", "data.table") register_s3_method("tidyr", "fill", "data.table") + register_s3_method("tidyr", "pivot_longer", "data.table") register_s3_method("tidyr", "pivot_wider", "data.table") register_s3_method("tidyr", "replace_na", "data.table") @@ -15,6 +16,7 @@ register_s3_method("dplyr", "union", "dtplyr_step") register_s3_method("tidyr", "drop_na", "dtplyr_step") register_s3_method("tidyr", "fill", "dtplyr_step") + register_s3_method("tidyr", "pivot_longer", "dtplyr_step") register_s3_method("tidyr", "pivot_wider", "dtplyr_step") register_s3_method("tidyr", "replace_na", "dtplyr_step") } diff --git a/man/pivot_longer.dtplyr_step.Rd b/man/pivot_longer.dtplyr_step.Rd new file mode 100644 index 000000000..f95e1d92e --- /dev/null +++ b/man/pivot_longer.dtplyr_step.Rd @@ -0,0 +1,163 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/step-call-pivot_longer.R +\name{pivot_longer.dtplyr_step} +\alias{pivot_longer.dtplyr_step} +\title{Pivot data from wide to long} +\usage{ +\method{pivot_longer}{dtplyr_step}( + data, + cols, + names_to = "name", + names_prefix = NULL, + names_sep = NULL, + names_pattern = NULL, + names_ptypes = list(), + names_transform = list(), + names_repair = "check_unique", + values_to = "value", + values_drop_na = FALSE, + values_ptypes = list(), + values_transform = list(), + ... +) +} +\arguments{ +\item{data}{A \code{\link[=lazy_dt]{lazy_dt()}}.} + +\item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into +longer format.} + +\item{names_to}{A string specifying the name of the column to create +from the data stored in the column names of \code{data}. + +Can be a character vector, creating multiple columns, if \code{names_sep} +or \code{names_pattern} is provided. In this case, there are two special +values you can take advantage of: +\itemize{ +\item \code{NA} will discard that component of the name. +\item \code{.value} indicates that component of the name defines the name of the +column containing the cell values, overriding \code{values_to}. +}} + +\item{names_prefix}{A regular expression used to remove matching text +from the start of each variable name.} + +\item{names_sep}{If \code{names_to} contains multiple values, +these arguments control how the column name is broken up. + +\code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either +be a numeric vector (specifying positions to break on), or a single string +(specifying a regular expression to split on). + +\code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular +expression containing matching groups (\verb{()}). + +If these arguments do not give you enough control, use +\code{pivot_longer_spec()} to create a spec object and process manually as +needed.} + +\item{names_pattern}{If \code{names_to} contains multiple values, +these arguments control how the column name is broken up. + +\code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either +be a numeric vector (specifying positions to break on), or a single string +(specifying a regular expression to split on). + +\code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular +expression containing matching groups (\verb{()}). + +If these arguments do not give you enough control, use +\code{pivot_longer_spec()} to create a spec object and process manually as +needed.} + +\item{names_ptypes}{A list of column name-prototype pairs. +A prototype (or ptype for short) is a zero-length vector (like \code{integer()} +or \code{numeric()}) that defines the type, class, and attributes of a vector. +Use these arguments to confirm that the created columns are the types that +you expect. + +If not specified, the type of the columns generated from \code{names_to} will +be character, and the type of the variables generated from \code{values_to} +will be the common type of the input columns used to generate them.} + +\item{names_transform}{A list of column name-function pairs. +Use these arguments if you need to change the type of specific columns. +For example, \code{names_transform = list(week = as.integer)} would convert +a character week variable to an integer.} + +\item{names_repair}{What happens if the output has invalid column names? +The default, \code{"check_unique"} is to error if the columns are duplicated. +Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to +de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} +for more options.} + +\item{values_to}{A string specifying the name of the column to create +from the data stored in cell values. If \code{names_to} is a character +containing the special \code{.value} sentinel, this value will be ignored, +and the name of the value column will be derived from part of the +existing column names.} + +\item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s +in the \code{value_to} column. This effectively converts explicit missing values +to implicit missing values, and should generally be used only when missing +values in \code{data} were created by its structure.} + +\item{values_ptypes}{A list of column name-prototype pairs. +A prototype (or ptype for short) is a zero-length vector (like \code{integer()} +or \code{numeric()}) that defines the type, class, and attributes of a vector. +Use these arguments to confirm that the created columns are the types that +you expect. + +If not specified, the type of the columns generated from \code{names_to} will +be character, and the type of the variables generated from \code{values_to} +will be the common type of the input columns used to generate them.} + +\item{values_transform}{A list of column name-function pairs. +Use these arguments if you need to change the type of specific columns. +For example, \code{names_transform = list(week = as.integer)} would convert +a character week variable to an integer.} + +\item{...}{Additional arguments passed on to methods.} +} +\description{ +This is a method for the tidyr \code{pivot_longer()} generic. It is translated to +\code{\link[data.table:melt.data.table]{data.table::melt()}} +} +\examples{ +library(tidyr) + +# Simplest case where column names are character data +relig_income_dt <- lazy_dt(relig_income) +relig_income_dt \%>\% + pivot_longer(!religion, names_to = "income", values_to = "count") + +# Slightly more complex case where columns have common prefix, +# and missing missings are structural so should be dropped. +billboard_dt <- lazy_dt(billboard) +billboard \%>\% + pivot_longer( + cols = starts_with("wk"), + names_to = "week", + names_prefix = "wk", + values_to = "rank", + values_drop_na = TRUE + ) + +# Multiple variables stored in column names +lazy_dt(who) \%>\% + pivot_longer( + cols = new_sp_m014:newrel_f65, + names_to = c("diagnosis", "gender", "age"), + names_pattern = "new_?(.*)_(.)(.*)", + values_to = "count" + ) + +# Multiple observations per row +anscombe_dt <- lazy_dt(anscombe) +anscombe_dt \%>\% + pivot_longer( + everything(), + names_to = c(".value", "set"), + names_pattern = "(.)(.)" + ) +} From 7e00be115bc37eb2c3e4caa22d55a488ccdd000b Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 1 Mar 2021 09:00:06 -0700 Subject: [PATCH 02/14] Add tests --- .../testthat/_snaps/step-call-pivot_longer.md | 40 ++++ .../testthat/_snaps/step-call-pivot_wider.md | 2 +- tests/testthat/test-step-call-pivot_longer.R | 171 ++++++++++++++++++ 3 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/_snaps/step-call-pivot_longer.md create mode 100644 tests/testthat/test-step-call-pivot_longer.R diff --git a/tests/testthat/_snaps/step-call-pivot_longer.md b/tests/testthat/_snaps/step-call-pivot_longer.md new file mode 100644 index 000000000..c1eeb7221 --- /dev/null +++ b/tests/testthat/_snaps/step-call-pivot_longer.md @@ -0,0 +1,40 @@ +# can pivot all cols to long + + Code + show_query(step) + Output + melt(DT, measure.vars = c("x", "y"), variable.name = "name", + variable.factor = FALSE) + +# preserves original keys + + Code + show_query(step) + Output + melt(DT, measure.vars = c("y", "z"), variable.name = "name", + variable.factor = FALSE) + +# can handle missing combinations + + Code + show_query(step) + Output + melt(DT, measure.vars = list(c("x_1", "x_2"), "y_2"), variable.name = "n", + value.name = c("x", "y"), variable.factor = FALSE) + +# can cast values cols + + Code + show_query(step) + Output + melt(DT, measure.vars = c("x", "y"), variable.name = "name", + variable.factor = FALSE)[, `:=`(value = vec_cast(value, numeric(0)))] + +# can coerce values cols + + Code + show_query(step) + Output + melt(DT, measure.vars = c("x", "y"), variable.name = "name", + variable.factor = FALSE)[, `:=`(value = .Primitive("as.character")(value))] + diff --git a/tests/testthat/_snaps/step-call-pivot_wider.md b/tests/testthat/_snaps/step-call-pivot_wider.md index 6b1710f6a..9aafab79e 100644 --- a/tests/testthat/_snaps/step-call-pivot_wider.md +++ b/tests/testthat/_snaps/step-call-pivot_wider.md @@ -3,7 +3,7 @@ Code show_query(step) Output - setnames(dcast(`_DT6`, formula = "..." ~ x + y, value.var = c("a", + setnames(dcast(`_DT8`, formula = "..." ~ x + y, value.var = c("a", "b"))[, .(a_X_1, a_Y_2, b_X_1, b_Y_2)], old = c("a_X_1", "a_Y_2", "b_X_1", "b_Y_2"), new = c("X1_a", "Y2_a", "X1_b", "Y2_b")) diff --git a/tests/testthat/test-step-call-pivot_longer.R b/tests/testthat/test-step-call-pivot_longer.R new file mode 100644 index 000000000..b0b05927c --- /dev/null +++ b/tests/testthat/test-step-call-pivot_longer.R @@ -0,0 +1,171 @@ +test_that("can pivot all cols to long", { + tbl <- tibble(x = 1:2, y = 3:4) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y) + out <- collect(step) + + expect_snapshot(show_query(step)) + expect_equal(step$vars, c("name", "value")) + expect_equal(out$name, c("x", "x", "y", "y")) + expect_equal(out$value, c(1, 2, 3, 4)) +}) + +test_that("preserves original keys", { + tbl <- tibble(x = 1:2, y = 2L, z = 1:2) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, y:z) + out <- collect(step) + + expect_snapshot(show_query(step)) + expect_equal(step$vars, c("x", "name", "value")) + expect_equal(out$x, rep(tbl$x, 2)) +}) + +test_that("can drop missing values", { + tbl <- tibble(x = c(1, NA), y = c(NA, 2)) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y, values_drop_na = TRUE) + out <- collect(step) + + expect_equal(out$name, c("x", "y")) + expect_equal(out$value, c(1, 2)) +}) + +test_that("can handle missing combinations", { + tbl <- tribble( + ~id, ~x_1, ~x_2, ~y_2, + "A", 1, 2, "a", + "B", 3, 4, "b", + ) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, -id, names_to = c(".value", "n"), names_sep = "_") + out <- collect(step) + + expect_snapshot(show_query(step)) + expect_equal(step$vars, c("id", "n", "x", "y")) + expect_equal(out$x, c(1, 3, 2, 4)) + expect_equal(out$y, c("a", "b", NA, NA)) +}) + +test_that("can cast values cols", { + tbl <- tibble(x = 1L, y = 2L) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y, values_ptypes = list(value = double())) + out <- collect(step) + + expect_snapshot(show_query(step)) + expect_equal(out$value, c(1, 2)) +}) + +test_that("can coerce values cols", { + tbl <- tibble(x = 1, y = 2) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y, values_transform = list(value = as.character)) + out <- collect(step) + + expect_snapshot(show_query(step)) + expect_equal(out$value, c("1", "2")) +}) + +test_that("can cast names cols", { + tbl <- tibble(x = 1L, y = 2L) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y, names_ptypes = list(name = factor())) + out <- collect(step) + + expect_equal(out$name, as.factor(c("x", "y"))) +}) + +test_that("can coerce names cols", { + tbl <- tibble(x = 1, y = 2) + dt <- lazy_dt(tbl, "DT") + step <- pivot_longer(dt, x:y, names_transform = list(name = as.factor)) + out <- collect(step) + + expect_equal(out$name, as.factor(c("x", "y"))) +}) + +test_that("can pivot to multiple measure cols", { + dt <- lazy_dt(head(anscombe, 2), "DT") + step <- pivot_longer( + dt, + everything(), + names_to = c(".value", "set"), + names_pattern = "(.)(.)" + ) + out <- collect(step) + + expect_equal(step$vars, c("set", "x", "y")) +}) + +test_that(".value can be at any position in `names_to`", { + samp1 <- tibble( + i = 1:4, + y_t1 = rnorm(4), + y_t2 = rnorm(4), + z_t1 = rep(3, 4), + z_t2 = rep(-2, 4), + ) + dt1 <- lazy_dt(samp1, "DT1") + + value_first <- dt1 %>% + pivot_longer(-i, names_to = c(".value", "time"), names_sep = "_") %>% + collect() + + samp2 <- dplyr::rename(samp1, t1_y = y_t1, + t2_y = y_t2, + t1_z = z_t1, + t2_z = z_t2) + dt2 <- lazy_dt(samp2, "DT2") + + value_second <- dt2 %>% + pivot_longer(-i, names_to = c("time", ".value"), names_sep = "_") %>% + collect() + + expect_identical(value_first, value_second) +}) + +test_that("can use names_prefix", { + tbl <- tibble(x_x = 1:2, x_y = 3:4) + dt <- lazy_dt(tbl, "DT") + out <- dt %>% + pivot_longer(everything(), names_prefix = "x_") %>% + arrange(name, value) %>% + collect() + expect_equal(out$name, c("x","x","y","y")) + expect_equal(out$value, c(1,2,3,4)) +}) + +test_that("can use names_pattern w/out .value in names_to", { + dt <- data.table(a1_1 = 1, b2_2 = 2) + + out <- dt %>% + pivot_longer( + cols = everything(), + names_to = c("a", "b"), + names_pattern = "([[:alnum:]]+)_([[:alnum:]]+)" + ) %>% + collect() + + expect_named(out, c("a", "b", "value")) + expect_equal(out$a, c("a1", "b2")) + expect_equal(out$b, c("1", "2")) + expect_equal(out$value, c(1, 2)) +}) + +test_that("can use names_sep w/out .value in names_to", { + dt <- data.table(a1_1 = 1, b2_2 = 2) + + out <- dt %>% + pivot_longer( + cols = everything(), + names_to = c("a", "b"), + names_sep = "_" + ) %>% + collect() + + expect_named(out, c("a", "b", "value")) + expect_equal(out$a, c("a1", "b2")) + expect_equal(out$b, c("1", "2")) + expect_equal(out$value, c(1, 2)) +}) From 086fb530c9c86a7db2cce9bfa9f66b1dcbd0f9f6 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 1 Mar 2021 09:01:15 -0700 Subject: [PATCH 03/14] Add news bullet --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 528acc7b8..159d8eb57 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * `fill()` (@markfairbanks, #197) + * `pivot_longer()` (@markfairbanks, #204) + * `replace_na()` (@markfairbanks, #202) # dtplyr 1.1.0 From 83517a6212347be753417290653d38dbd1f488d7 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 1 Mar 2021 09:07:53 -0700 Subject: [PATCH 04/14] Correctly label pmap() helpers --- R/step-call-pivot_longer.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index 1fbfb8a1e..bfed4e936 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -362,7 +362,7 @@ list_indices <- function(x, max = 20) { paste(x, collapse = ", ") } -# str_separate() ----------------------------------------------------------------- +# pmap()/pmap_chr() ----------------------------------------------------------------- args_recycle <- function(args) { lengths <- vapply(args, length, integer(1)) From 7d309db1bfb65fadd17affa7dca92091d644283b Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Mon, 1 Mar 2021 09:48:44 -0700 Subject: [PATCH 05/14] Use rlang::is_false() --- R/step-call-pivot_longer.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index bfed4e936..e5d69debe 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -119,7 +119,7 @@ pivot_longer.dtplyr_step <- function(data, } } - if (isFALSE(values_drop_na)) { + if (is_false(values_drop_na)) { args$na.rm <- NULL } From bc9b182cee03349731473e6c92c9bc5bec3b030c Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Wed, 3 Mar 2021 13:46:44 -0700 Subject: [PATCH 06/14] Use `!!` to allow translation checks --- .../testthat/_snaps/step-call-pivot_longer.md | 35 ++++--------------- tests/testthat/test-step-call-pivot_longer.R | 30 +++++++++++++--- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/tests/testthat/_snaps/step-call-pivot_longer.md b/tests/testthat/_snaps/step-call-pivot_longer.md index c1eeb7221..aa5ab1739 100644 --- a/tests/testthat/_snaps/step-call-pivot_longer.md +++ b/tests/testthat/_snaps/step-call-pivot_longer.md @@ -1,40 +1,17 @@ -# can pivot all cols to long - - Code - show_query(step) - Output - melt(DT, measure.vars = c("x", "y"), variable.name = "name", - variable.factor = FALSE) - -# preserves original keys - - Code - show_query(step) - Output - melt(DT, measure.vars = c("y", "z"), variable.name = "name", - variable.factor = FALSE) - -# can handle missing combinations - - Code - show_query(step) - Output - melt(DT, measure.vars = list(c("x_1", "x_2"), "y_2"), variable.name = "n", - value.name = c("x", "y"), variable.factor = FALSE) - -# can cast values cols +# can coerce values cols Code show_query(step) Output melt(DT, measure.vars = c("x", "y"), variable.name = "name", - variable.factor = FALSE)[, `:=`(value = vec_cast(value, numeric(0)))] + variable.factor = FALSE)[, `:=`(value = .Primitive("as.character")(value))] -# can coerce values cols +# can pivot to multiple measure cols Code show_query(step) Output - melt(DT, measure.vars = c("x", "y"), variable.name = "name", - variable.factor = FALSE)[, `:=`(value = .Primitive("as.character")(value))] + melt(DT, measure.vars = list(c("x1", "x2", "x3", "x4"), c("y1", + "y2", "y3", "y4")), variable.name = "set", value.name = c("x", + "y"), variable.factor = FALSE) diff --git a/tests/testthat/test-step-call-pivot_longer.R b/tests/testthat/test-step-call-pivot_longer.R index b0b05927c..2f746b4da 100644 --- a/tests/testthat/test-step-call-pivot_longer.R +++ b/tests/testthat/test-step-call-pivot_longer.R @@ -4,7 +4,11 @@ test_that("can pivot all cols to long", { step <- pivot_longer(dt, x:y) out <- collect(step) - expect_snapshot(show_query(step)) + expect_equal( + show_query(step), + expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", + variable.factor = FALSE)) + ) expect_equal(step$vars, c("name", "value")) expect_equal(out$name, c("x", "x", "y", "y")) expect_equal(out$value, c(1, 2, 3, 4)) @@ -16,7 +20,11 @@ test_that("preserves original keys", { step <- pivot_longer(dt, y:z) out <- collect(step) - expect_snapshot(show_query(step)) + expect_equal( + show_query(step), + expr(melt(DT, measure.vars = !!c("y", "z"), variable.name = "name", + variable.factor = FALSE)) + ) expect_equal(step$vars, c("x", "name", "value")) expect_equal(out$x, rep(tbl$x, 2)) }) @@ -27,6 +35,11 @@ test_that("can drop missing values", { step <- pivot_longer(dt, x:y, values_drop_na = TRUE) out <- collect(step) + expect_equal( + show_query(step), + expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", + na.rm = TRUE, variable.factor = FALSE)) + ) expect_equal(out$name, c("x", "y")) expect_equal(out$value, c(1, 2)) }) @@ -41,7 +54,11 @@ test_that("can handle missing combinations", { step <- pivot_longer(dt, -id, names_to = c(".value", "n"), names_sep = "_") out <- collect(step) - expect_snapshot(show_query(step)) + expect_equal( + show_query(step), + expr(melt(DT, measure.vars = !!list(c("x_1", "x_2"), "y_2"), variable.name = "n", + value.name = !!c("x", "y"), variable.factor = FALSE)) + ) expect_equal(step$vars, c("id", "n", "x", "y")) expect_equal(out$x, c(1, 3, 2, 4)) expect_equal(out$y, c("a", "b", NA, NA)) @@ -53,7 +70,11 @@ test_that("can cast values cols", { step <- pivot_longer(dt, x:y, values_ptypes = list(value = double())) out <- collect(step) - expect_snapshot(show_query(step)) + expect_equal( + show_query(step), + expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", + variable.factor = FALSE)[, `:=`(value = vec_cast(value, !!numeric(0)))]) + ) expect_equal(out$value, c(1, 2)) }) @@ -95,6 +116,7 @@ test_that("can pivot to multiple measure cols", { ) out <- collect(step) + expect_snapshot(show_query(step)) expect_equal(step$vars, c("set", "x", "y")) }) From c3e094c5ac8dae8035eab14e24631bfbd4cc1e7b Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Thu, 4 Mar 2021 07:31:26 -0700 Subject: [PATCH 07/14] Directly access out$vars --- R/step-call-pivot_longer.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index e5d69debe..362a239b0 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -146,10 +146,10 @@ pivot_longer.dtplyr_step <- function(data, # Need to drop variable_name and move names_to vars to correct position # Recreates relocate logic so only select is necessary, not relocate + select - sim_vars <- names(simulate_vars(out)) - var_idx <- which(sim_vars == variable_name) - before_vars <- sim_vars[seq_along(sim_vars) < var_idx] - after_vars <- sim_vars[seq_along(sim_vars) > var_idx] + out_vars <- out$vars + var_idx <- which(out_vars == variable_name) + before_vars <- out_vars[seq_along(out_vars) < var_idx] + after_vars <- out_vars[seq_along(out_vars) > var_idx] out <- select(out, !!!syms(before_vars), !!!syms(names_to), !!!syms(after_vars)) } else if (!multiple_names_to && uses_dot_value) { @@ -191,7 +191,6 @@ pivot_longer.dtplyr_step <- function(data, out <- mutate(out, !!!cast_calls) } - # transform vars coerce_vars <- intersect(values_to, names(values_transform)) if (length(coerce_vars) > 0) { coerce_calls <- vector("list", length(coerce_vars)) From 944891c4b207f8f92a2c6f91c5b9fd2432578c46 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 5 Mar 2021 11:53:05 -0700 Subject: [PATCH 08/14] Error on usage of _ptypes or _transform args --- R/step-call-pivot_longer.R | 88 ++++++++++++-------------------------- 1 file changed, 28 insertions(+), 60 deletions(-) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index 362a239b0..be34fc4e9 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -50,17 +50,32 @@ pivot_longer.dtplyr_step <- function(data, names_prefix = NULL, names_sep = NULL, names_pattern = NULL, - names_ptypes = list(), - names_transform = list(), + names_ptypes = NULL, + names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, - values_ptypes = list(), - values_transform = list(), + values_ptypes = NULL, + values_transform = NULL, ...) { + if (!is.null(names_ptypes)) { + abort("`names_ptype` is not supported by dtplyr") + } + + if (!is.null(names_transform)) { + abort("`names_transform` is not supported by dtplyr") + } + + if (!is.null(values_ptypes)) { + abort("`values_ptypes` is not supported by dtplyr") + } + + if (!is.null(values_transform)) { + abort("`values_transform` is not supported by dtplyr") + } + sim_data <- simulate_vars(data) - sim_vars <- names(sim_data) measure_vars <- names(tidyselect::eval_select(enquo(cols), sim_data)) if (length(measure_vars) == 0) { abort("`cols` must select at least one column.") @@ -113,16 +128,15 @@ pivot_longer.dtplyr_step <- function(data, args$variable.name <- NULL } - if (length(values_to) == 1) { - if (values_to == "value") { - args$value.name <- NULL - } + if (identical(values_to, "value")) { + args$value.name <- NULL } if (is_false(values_drop_na)) { args$na.rm <- NULL } + sim_vars <- names(sim_data) id_vars <- sim_vars[!sim_vars %in% unlist(measure_vars)] out <- step_call( @@ -156,53 +170,7 @@ pivot_longer.dtplyr_step <- function(data, out <- mutate(out, variable = NULL) } - out <- step_repair(out, repair = names_repair) - - ## names_ptype & names_transform - cast_vars <- intersect(names_to, names(names_ptypes)) - if (length(cast_vars) > 0) { - cast_calls <- vector("list", length(cast_vars)) - names(cast_calls) <- cast_vars - for (i in seq_along(cast_calls)) { - cast_calls[[i]] <- call2("vec_cast", sym(cast_vars[[i]]), names_ptypes[[i]]) - } - out <- mutate(out, !!!cast_calls) - } - - coerce_vars <- intersect(names_to, names(names_transform)) - if (length(coerce_vars) > 0) { - coerce_calls <- vector("list", length(coerce_vars)) - names(coerce_calls) <- coerce_vars - for (i in seq_along(coerce_calls)) { - .fn <- as_function(names_transform[[i]]) - coerce_calls[[i]] <- call2(.fn, sym(coerce_vars[[i]])) - } - out <- mutate(out, !!!coerce_calls) - } - - ## values_ptype & values_transform - cast_vars <- intersect(values_to, names(values_ptypes)) - if (length(cast_vars) > 0) { - cast_calls <- vector("list", length(cast_vars)) - names(cast_calls) <- cast_vars - for (i in seq_along(cast_calls)) { - cast_calls[[i]] <- call2(expr(vec_cast), sym(cast_vars[[i]]), values_ptypes[[i]]) - } - out <- mutate(out, !!!cast_calls) - } - - coerce_vars <- intersect(values_to, names(values_transform)) - if (length(coerce_vars) > 0) { - coerce_calls <- vector("list", length(coerce_vars)) - names(coerce_calls) <- coerce_vars - for (i in seq_along(coerce_calls)) { - .fn <- as_function(values_transform[[i]]) - coerce_calls[[i]] <- call2(.fn, sym(coerce_vars[[i]])) - } - out <- mutate(out, !!!coerce_calls) - } - - out + step_repair(out, repair = names_repair) } # exported onLoad @@ -212,13 +180,13 @@ pivot_longer.data.table <- function(data, names_prefix = NULL, names_sep = NULL, names_pattern = NULL, - names_ptypes = list(), - names_transform = list(), + names_ptypes = NULL, + names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, - values_ptypes = list(), - values_transform = list(), + values_ptypes = NULL, + values_transform = NULL, ...) { data <- lazy_dt(data) tidyr::pivot_longer( From 93d5d556e024f5e136acbe52476ea56aeb91a09c Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 5 Mar 2021 11:53:18 -0700 Subject: [PATCH 09/14] Update tests --- .../testthat/_snaps/step-call-pivot_longer.md | 8 ---- tests/testthat/test-step-call-pivot_longer.R | 42 ------------------- 2 files changed, 50 deletions(-) diff --git a/tests/testthat/_snaps/step-call-pivot_longer.md b/tests/testthat/_snaps/step-call-pivot_longer.md index aa5ab1739..1fc14e214 100644 --- a/tests/testthat/_snaps/step-call-pivot_longer.md +++ b/tests/testthat/_snaps/step-call-pivot_longer.md @@ -1,11 +1,3 @@ -# can coerce values cols - - Code - show_query(step) - Output - melt(DT, measure.vars = c("x", "y"), variable.name = "name", - variable.factor = FALSE)[, `:=`(value = .Primitive("as.character")(value))] - # can pivot to multiple measure cols Code diff --git a/tests/testthat/test-step-call-pivot_longer.R b/tests/testthat/test-step-call-pivot_longer.R index 2f746b4da..eaba57651 100644 --- a/tests/testthat/test-step-call-pivot_longer.R +++ b/tests/testthat/test-step-call-pivot_longer.R @@ -64,48 +64,6 @@ test_that("can handle missing combinations", { expect_equal(out$y, c("a", "b", NA, NA)) }) -test_that("can cast values cols", { - tbl <- tibble(x = 1L, y = 2L) - dt <- lazy_dt(tbl, "DT") - step <- pivot_longer(dt, x:y, values_ptypes = list(value = double())) - out <- collect(step) - - expect_equal( - show_query(step), - expr(melt(DT, measure.vars = !!c("x", "y"), variable.name = "name", - variable.factor = FALSE)[, `:=`(value = vec_cast(value, !!numeric(0)))]) - ) - expect_equal(out$value, c(1, 2)) -}) - -test_that("can coerce values cols", { - tbl <- tibble(x = 1, y = 2) - dt <- lazy_dt(tbl, "DT") - step <- pivot_longer(dt, x:y, values_transform = list(value = as.character)) - out <- collect(step) - - expect_snapshot(show_query(step)) - expect_equal(out$value, c("1", "2")) -}) - -test_that("can cast names cols", { - tbl <- tibble(x = 1L, y = 2L) - dt <- lazy_dt(tbl, "DT") - step <- pivot_longer(dt, x:y, names_ptypes = list(name = factor())) - out <- collect(step) - - expect_equal(out$name, as.factor(c("x", "y"))) -}) - -test_that("can coerce names cols", { - tbl <- tibble(x = 1, y = 2) - dt <- lazy_dt(tbl, "DT") - step <- pivot_longer(dt, x:y, names_transform = list(name = as.factor)) - out <- collect(step) - - expect_equal(out$name, as.factor(c("x", "y"))) -}) - test_that("can pivot to multiple measure cols", { dt <- lazy_dt(head(anscombe, 2), "DT") step <- pivot_longer( From 462133ab924205ded1fc23071fdc2a699a19c80e Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Fri, 5 Mar 2021 11:58:27 -0700 Subject: [PATCH 10/14] Update documentation --- man/pivot_longer.dtplyr_step.Rd | 40 ++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/man/pivot_longer.dtplyr_step.Rd b/man/pivot_longer.dtplyr_step.Rd index f95e1d92e..280cd3dac 100644 --- a/man/pivot_longer.dtplyr_step.Rd +++ b/man/pivot_longer.dtplyr_step.Rd @@ -11,13 +11,13 @@ names_prefix = NULL, names_sep = NULL, names_pattern = NULL, - names_ptypes = list(), - names_transform = list(), + names_ptypes = NULL, + names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, - values_ptypes = list(), - values_transform = list(), + values_ptypes = NULL, + values_transform = NULL, ... ) } @@ -73,18 +73,20 @@ needed.} \item{names_ptypes}{A list of column name-prototype pairs. A prototype (or ptype for short) is a zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, class, and attributes of a vector. -Use these arguments to confirm that the created columns are the types that -you expect. +Use these arguments if you want to confirm that the created columns are +the types that you expect. Note that if you want to change (instead of confirm) +the types of specific columns, you should use \code{names_transform} or +\code{values_transform} instead.} + +\item{names_transform}{A list of column name-function pairs. +Use these arguments if you need to change the types of specific columns. +For example, \code{names_transform = list(week = as.integer)} would convert +a character variable called \code{week} to an integer. If not specified, the type of the columns generated from \code{names_to} will be character, and the type of the variables generated from \code{values_to} will be the common type of the input columns used to generate them.} -\item{names_transform}{A list of column name-function pairs. -Use these arguments if you need to change the type of specific columns. -For example, \code{names_transform = list(week = as.integer)} would convert -a character week variable to an integer.} - \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to @@ -105,18 +107,20 @@ values in \code{data} were created by its structure.} \item{values_ptypes}{A list of column name-prototype pairs. A prototype (or ptype for short) is a zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, class, and attributes of a vector. -Use these arguments to confirm that the created columns are the types that -you expect. +Use these arguments if you want to confirm that the created columns are +the types that you expect. Note that if you want to change (instead of confirm) +the types of specific columns, you should use \code{names_transform} or +\code{values_transform} instead.} + +\item{values_transform}{A list of column name-function pairs. +Use these arguments if you need to change the types of specific columns. +For example, \code{names_transform = list(week = as.integer)} would convert +a character variable called \code{week} to an integer. If not specified, the type of the columns generated from \code{names_to} will be character, and the type of the variables generated from \code{values_to} will be the common type of the input columns used to generate them.} -\item{values_transform}{A list of column name-function pairs. -Use these arguments if you need to change the type of specific columns. -For example, \code{names_transform = list(week = as.integer)} would convert -a character week variable to an integer.} - \item{...}{Additional arguments passed on to methods.} } \description{ From 35a4de2c185d1a02d86493af6913657e629b1bd4 Mon Sep 17 00:00:00 2001 From: Mark Fairbanks Date: Sat, 6 Mar 2021 04:13:37 -0700 Subject: [PATCH 11/14] Remove vec_cast from add_dt_wrappers --- R/tidyeval.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/tidyeval.R b/R/tidyeval.R index 2781448ab..d4cdf6d6f 100644 --- a/R/tidyeval.R +++ b/R/tidyeval.R @@ -19,13 +19,9 @@ dt_funs <- c( "setcolorder", "setnames" ) add_dt_wrappers <- function(env) { - env_bind( - env, - !!!env_get_list(ns_env("data.table"), dt_funs), - vec_cast = vctrs::vec_cast - ) + env_bind(env, !!!env_get_list(ns_env("data.table"), dt_funs)) } -globalVariables(c(dt_funs, "vec_cast")) +globalVariables(dt_funs) # These functions attempt to simulate tidy eval as much as possible within # data.table. The goal is to get the majority of real-world code to work, From 29c67aee708d4a9c9dc58bcae2a18addac4d4a24 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 7 Mar 2021 08:18:50 -0600 Subject: [PATCH 12/14] Document & test unsupported params --- R/step-call-pivot_longer.R | 4 ++- man/pivot_longer.dtplyr_step.Rd | 34 +------------------ .../testthat/_snaps/step-call-pivot_longer.md | 19 +++++++++++ .../testthat/_snaps/step-call-pivot_wider.md | 2 +- tests/testthat/test-step-call-pivot_longer.R | 12 +++++++ 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index be34fc4e9..5fe93ba5c 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -6,6 +6,8 @@ #' #' @param data A [lazy_dt()]. #' @inheritParams tidyr::pivot_longer +#' @param names_ptypes,names_transform,values_ptypes,values_transform +#' Not currently supported by dtplyr. #' @examples #' library(tidyr) #' @@ -60,7 +62,7 @@ pivot_longer.dtplyr_step <- function(data, ...) { if (!is.null(names_ptypes)) { - abort("`names_ptype` is not supported by dtplyr") + abort("`names_ptypes` is not supported by dtplyr") } if (!is.null(names_transform)) { diff --git a/man/pivot_longer.dtplyr_step.Rd b/man/pivot_longer.dtplyr_step.Rd index 280cd3dac..cfe00a138 100644 --- a/man/pivot_longer.dtplyr_step.Rd +++ b/man/pivot_longer.dtplyr_step.Rd @@ -70,22 +70,7 @@ If these arguments do not give you enough control, use \code{pivot_longer_spec()} to create a spec object and process manually as needed.} -\item{names_ptypes}{A list of column name-prototype pairs. -A prototype (or ptype for short) is a zero-length vector (like \code{integer()} -or \code{numeric()}) that defines the type, class, and attributes of a vector. -Use these arguments if you want to confirm that the created columns are -the types that you expect. Note that if you want to change (instead of confirm) -the types of specific columns, you should use \code{names_transform} or -\code{values_transform} instead.} - -\item{names_transform}{A list of column name-function pairs. -Use these arguments if you need to change the types of specific columns. -For example, \code{names_transform = list(week = as.integer)} would convert -a character variable called \code{week} to an integer. - -If not specified, the type of the columns generated from \code{names_to} will -be character, and the type of the variables generated from \code{values_to} -will be the common type of the input columns used to generate them.} +\item{names_ptypes, names_transform, values_ptypes, values_transform}{Not currently supported by dtplyr.} \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. @@ -104,23 +89,6 @@ in the \code{value_to} column. This effectively converts explicit missing values to implicit missing values, and should generally be used only when missing values in \code{data} were created by its structure.} -\item{values_ptypes}{A list of column name-prototype pairs. -A prototype (or ptype for short) is a zero-length vector (like \code{integer()} -or \code{numeric()}) that defines the type, class, and attributes of a vector. -Use these arguments if you want to confirm that the created columns are -the types that you expect. Note that if you want to change (instead of confirm) -the types of specific columns, you should use \code{names_transform} or -\code{values_transform} instead.} - -\item{values_transform}{A list of column name-function pairs. -Use these arguments if you need to change the types of specific columns. -For example, \code{names_transform = list(week = as.integer)} would convert -a character variable called \code{week} to an integer. - -If not specified, the type of the columns generated from \code{names_to} will -be character, and the type of the variables generated from \code{values_to} -will be the common type of the input columns used to generate them.} - \item{...}{Additional arguments passed on to methods.} } \description{ diff --git a/tests/testthat/_snaps/step-call-pivot_longer.md b/tests/testthat/_snaps/step-call-pivot_longer.md index 1fc14e214..1ee3533f9 100644 --- a/tests/testthat/_snaps/step-call-pivot_longer.md +++ b/tests/testthat/_snaps/step-call-pivot_longer.md @@ -7,3 +7,22 @@ "y2", "y3", "y4")), variable.name = "set", value.name = c("x", "y"), variable.factor = FALSE) +# informative errors on unsupported features + + Code + dt %>% pivot_longer(names_ptypes = list()) + Error + `names_ptypes` is not supported by dtplyr + Code + dt %>% pivot_longer(names_transform = list()) + Error + `names_transform` is not supported by dtplyr + Code + dt %>% pivot_longer(values_ptypes = list()) + Error + `values_ptypes` is not supported by dtplyr + Code + dt %>% pivot_longer(values_transform = list()) + Error + `values_transform` is not supported by dtplyr + diff --git a/tests/testthat/_snaps/step-call-pivot_wider.md b/tests/testthat/_snaps/step-call-pivot_wider.md index 9aafab79e..1a18cf29a 100644 --- a/tests/testthat/_snaps/step-call-pivot_wider.md +++ b/tests/testthat/_snaps/step-call-pivot_wider.md @@ -3,7 +3,7 @@ Code show_query(step) Output - setnames(dcast(`_DT8`, formula = "..." ~ x + y, value.var = c("a", + setnames(dcast(`_DT12`, formula = "..." ~ x + y, value.var = c("a", "b"))[, .(a_X_1, a_Y_2, b_X_1, b_Y_2)], old = c("a_X_1", "a_Y_2", "b_X_1", "b_Y_2"), new = c("X1_a", "Y2_a", "X1_b", "Y2_b")) diff --git a/tests/testthat/test-step-call-pivot_longer.R b/tests/testthat/test-step-call-pivot_longer.R index eaba57651..25dc4812a 100644 --- a/tests/testthat/test-step-call-pivot_longer.R +++ b/tests/testthat/test-step-call-pivot_longer.R @@ -149,3 +149,15 @@ test_that("can use names_sep w/out .value in names_to", { expect_equal(out$b, c("1", "2")) expect_equal(out$value, c(1, 2)) }) + +test_that("informative errors on unsupported features", { + dt <- data.table(a1_1 = 1, b2_2 = 2) + + expect_snapshot(error = TRUE, { + dt %>% pivot_longer(names_ptypes = list()) + dt %>% pivot_longer(names_transform = list()) + dt %>% pivot_longer(values_ptypes = list()) + dt %>% pivot_longer(values_transform = list()) + }) + +}) From 6aef3009136dc3aac091e3756e9d02cf3172d6c1 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Sun, 7 Mar 2021 08:23:20 -0600 Subject: [PATCH 13/14] Clarify inlined code --- R/step-call-pivot_longer.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/step-call-pivot_longer.R b/R/step-call-pivot_longer.R index 5fe93ba5c..06cd8c651 100644 --- a/R/step-call-pivot_longer.R +++ b/R/step-call-pivot_longer.R @@ -209,6 +209,12 @@ pivot_longer.data.table <- function(data, ) } +# ============================================================================== +# inlined from tidyr +# https://github.com/tidyverse/tidyr/issues/1103 +# ============================================================================== +# nocov start + # str_extract() ----------------------------------------------------------------- str_extract <- function(x, into, regex, convert = FALSE) { stopifnot( @@ -356,3 +362,5 @@ pmap <- function(.l, .f, ...) { pmap_chr <- function(.l, .f, ...) { as.character(pmap(.l, .f, ...)) } + +# nocov end From b160753175adc18d7715b0018b65f87ab9181456 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Mon, 8 Mar 2021 07:43:44 -0600 Subject: [PATCH 14/14] Use hack to attach tidyr when documenting --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d28e6e7dc..738aa6076 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,6 +39,6 @@ VignetteBuilder: knitr Encoding: UTF-8 LazyData: true -Roxygen: list(markdown = TRUE) +Roxygen: {library(tidyr); list(markdown = TRUE)} RoxygenNote: 7.1.1 Config/testthat/edition: 3