From f04a6fe5ec0361a4b479ba1aff168f61fbc79c2a Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Wed, 8 Oct 2025 12:56:26 -0400 Subject: [PATCH] Add `.size` and `.error_call` to `vec_interleave()` --- NEWS.md | 4 + R/slice-interleave.R | 82 +++++++---- man/vec_interleave.Rd | 31 ++-- src/decl/slice-interleave-decl.h | 11 +- src/globals.c | 1 + src/globals.h | 1 + src/init.c | 4 +- src/slice-interleave.c | 170 ++++++++++++++++++---- src/slice-interleave.h | 17 +++ tests/testthat/_snaps/slice-interleave.md | 130 ++++++++++++++++- tests/testthat/test-slice-interleave.R | 102 +++++++++++-- 11 files changed, 469 insertions(+), 84 deletions(-) create mode 100644 src/slice-interleave.h diff --git a/NEWS.md b/NEWS.md index 7007c569d..abc09c463 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # vctrs (development version) +* `vec_interleave()` gains new `.size` and `.error_call` arguments. + +* `vec_interleave()` now reports the correct index in errors when `NULL`s are present. + * New `list_combine()` for combining a list of vectors together according to a set of `indices`. We now recommend using: * `list_combine(x, indices = indices, size = size)` over `list_unchop(x, indices = indices)` diff --git a/R/slice-interleave.R b/R/slice-interleave.R index 53c19755c..63a82c42a 100644 --- a/R/slice-interleave.R +++ b/R/slice-interleave.R @@ -15,12 +15,19 @@ #' #' ## vctrs dependencies #' -#' - [list_unchop()] +#' - [list_combine()] #' #' @inheritParams vec_c #' -#' @param ... Vectors to interleave. These will be -#' [recycled][theory-faq-recycling] to a common size. +#' @param ... Vectors to interleave. +#' +#' @param .size The expected size of each vector. If not provided, computed +#' automatically by [vec_size_common()]. Each vector will be +#' [recycled][theory-faq-recycling] to this size. +#' +#' @param .ptype The expected type of each vector. If not provided, computed +#' automatically by [vec_ptype_common()]. Each vector will be +#' [cast][theory-faq-coercion] to this type. #' #' @export #' @examples @@ -35,8 +42,15 @@ #' y <- data_frame(x = 3:4, y = c("c", "d")) #' #' vec_interleave(x, y) +#' +#' # `.size` can be used to recycle size 1 elements before interleaving +#' vec_interleave(1, 2, .size = 3) +#' +#' # `.ptype` can be used to enforce a particular type +#' typeof(vec_interleave(1, 2, .ptype = integer())) vec_interleave <- function( ..., + .size = NULL, .ptype = NULL, .name_spec = NULL, .name_repair = c( @@ -46,34 +60,48 @@ vec_interleave <- function( "universal", "unique_quiet", "universal_quiet" - ) + ), + .error_call = current_env() ) { - args <- list2(...) - - # `NULL`s must be dropped up front to generate appropriate indices - if (vec_any_missing(args)) { - missing <- vec_detect_missing(args) - args <- vec_slice(args, !missing) - } - - n <- length(args) - size <- vec_size_common(!!!args) - - indices <- vec_interleave_indices(n, size) - - # TODO: Consider switching to `list_combine()`, add tests for empty `...` - # case, which would start returning `unspecified()` rather than `NULL`. - # This would be more correct and good for composibility, see - # https://github.com/r-lib/vctrs/issues/2055 - list_unchop( - x = args, - indices = indices, + list_interleave( + x = list2(...), + size = .size, ptype = .ptype, name_spec = .name_spec, - name_repair = .name_repair + name_repair = .name_repair, + x_arg = "", + error_call = .error_call ) } -vec_interleave_indices <- function(n, size) { - .Call(ffi_interleave_indices, n, size) +# It's sometimes more convenient to supply a list, plus you get access to +# `x_arg` for better error messages than you get from `vec_interleave(!!!x)`. +# We could consider exporting this alongside `vec_interleave()`. +list_interleave <- function( + x, + ..., + size = NULL, + ptype = NULL, + name_spec = NULL, + name_repair = c( + "minimal", + "unique", + "check_unique", + "universal", + "unique_quiet", + "universal_quiet" + ), + x_arg = caller_arg(x), + error_call = current_env() +) { + check_dots_empty0(...) + .Call( + ffi_list_interleave, + x, + size, + ptype, + name_spec, + name_repair, + environment() + ) } diff --git a/man/vec_interleave.Rd b/man/vec_interleave.Rd index 26148005c..9e212bc54 100644 --- a/man/vec_interleave.Rd +++ b/man/vec_interleave.Rd @@ -6,22 +6,24 @@ \usage{ vec_interleave( ..., + .size = NULL, .ptype = NULL, .name_spec = NULL, .name_repair = c("minimal", "unique", "check_unique", "universal", "unique_quiet", - "universal_quiet") + "universal_quiet"), + .error_call = current_env() ) } \arguments{ -\item{...}{Vectors to interleave. These will be -\link[=theory-faq-recycling]{recycled} to a common size.} +\item{...}{Vectors to interleave.} -\item{.ptype}{If \code{NULL}, the default, the output type is determined by -computing the common type across all elements of \code{...}. +\item{.size}{The expected size of each vector. If not provided, computed +automatically by \code{\link[=vec_size_common]{vec_size_common()}}. Each vector will be +\link[=theory-faq-recycling]{recycled} to this size.} -Alternatively, you can supply \code{.ptype} to give the output known type. -If \code{getOption("vctrs.no_guessing")} is \code{TRUE} you must supply this value: -this is a convenient way to make production code demand fixed types.} +\item{.ptype}{The expected type of each vector. If not provided, computed +automatically by \code{\link[=vec_ptype_common]{vec_ptype_common()}}. Each vector will be +\link[=theory-faq-coercion]{cast} to this type.} \item{.name_spec}{A name specification for combining inner and outer names. This is relevant for inputs passed with a @@ -46,6 +48,11 @@ See the \link[=name_spec]{name specification topic}.} \item{.name_repair}{How to repair names, see \code{repair} options in \code{\link[=vec_as_names]{vec_as_names()}}.} + +\item{.error_call}{The execution environment of a currently +running function, e.g. \code{caller_env()}. The function will be +mentioned in error messages as the source of the error. See the +\code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.} } \description{ \code{vec_interleave()} combines multiple vectors together, much like \code{\link[=vec_c]{vec_c()}}, @@ -61,7 +68,7 @@ It is a more efficient equivalent to the following usage of \code{vec_c()}: \subsection{vctrs dependencies}{ \itemize{ -\item \code{\link[=list_unchop]{list_unchop()}} +\item \code{\link[=list_combine]{list_combine()}} } } } @@ -78,4 +85,10 @@ x <- data_frame(x = 1:2, y = c("a", "b")) y <- data_frame(x = 3:4, y = c("c", "d")) vec_interleave(x, y) + +# `.size` can be used to recycle size 1 elements before interleaving +vec_interleave(1, 2, .size = 3) + +# `.ptype` can be used to enforce a particular type +typeof(vec_interleave(1, 2, .ptype = integer())) } diff --git a/src/decl/slice-interleave-decl.h b/src/decl/slice-interleave-decl.h index 8061c7676..3b0daf622 100644 --- a/src/decl/slice-interleave-decl.h +++ b/src/decl/slice-interleave-decl.h @@ -1 +1,10 @@ -static r_obj* vec_interleave_indices(r_ssize n, r_ssize size); +static +r_ssize list_interleave_x_size_used(r_obj* const* v_x, r_ssize x_size); + +static +r_obj* list_interleave_indices( + r_obj* const* v_x, + r_ssize x_size, + r_ssize x_size_used, + r_ssize elt_size +); diff --git a/src/globals.c b/src/globals.c index bd38e27c2..e3e51314e 100644 --- a/src/globals.c +++ b/src/globals.c @@ -92,6 +92,7 @@ void vctrs_init_globals(r_obj* ns) { INIT_ARG(indices); INIT_ARG(sizes); INIT_ARG(ptype); + INIT_ARG(size); // Lazy args --------------------------------------------------------- INIT_LAZY_ARG_2(dot_name_repair, ".name_repair"); diff --git a/src/globals.h b/src/globals.h index 2001a73b7..bcd2d34cb 100644 --- a/src/globals.h +++ b/src/globals.h @@ -69,6 +69,7 @@ struct vec_args { struct vctrs_arg* indices; struct vctrs_arg* sizes; struct vctrs_arg* ptype; + struct vctrs_arg* size; }; struct lazy_args { diff --git a/src/init.c b/src/init.c index e3af327a1..92a5356a0 100644 --- a/src/init.c +++ b/src/init.c @@ -142,7 +142,7 @@ extern r_obj* vctrs_integer64_proxy(r_obj*); extern r_obj* vctrs_integer64_restore(r_obj*); extern r_obj* vctrs_list_drop_empty(r_obj*); extern r_obj* vctrs_is_altrep(r_obj* x); -extern r_obj* ffi_interleave_indices(r_obj*, r_obj*); +extern r_obj* ffi_list_interleave(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_compute_nesting_container_info(r_obj*, r_obj*); extern r_obj* ffi_locate_matches(r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*, r_obj*); extern r_obj* ffi_interval_groups(r_obj*, r_obj*, r_obj*, r_obj*); @@ -343,7 +343,7 @@ static const R_CallMethodDef CallEntries[] = { {"vctrs_integer64_restore", (DL_FUNC) &vctrs_integer64_restore, 1}, {"vctrs_list_drop_empty", (DL_FUNC) &vctrs_list_drop_empty, 1}, {"vctrs_is_altrep", (DL_FUNC) &vctrs_is_altrep, 1}, - {"ffi_interleave_indices", (DL_FUNC) &ffi_interleave_indices, 2}, + {"ffi_list_interleave", (DL_FUNC) &ffi_list_interleave, 6}, {"ffi_compute_nesting_container_info", (DL_FUNC) &ffi_compute_nesting_container_info, 2}, {"ffi_locate_matches", (DL_FUNC) &ffi_locate_matches, 14}, {"ffi_interval_groups", (DL_FUNC) &ffi_interval_groups, 4}, diff --git a/src/slice-interleave.c b/src/slice-interleave.c index ecaf49a9f..fbbf512c4 100644 --- a/src/slice-interleave.c +++ b/src/slice-interleave.c @@ -1,52 +1,162 @@ +#include "slice-interleave.h" #include "vctrs.h" + #include "decl/slice-interleave-decl.h" -// [[ register() ]] -r_obj* ffi_interleave_indices(r_obj* n, r_obj* size) { - r_ssize c_n = r_arg_as_ssize(n, "n"); - r_ssize c_size = r_arg_as_ssize(size, "size"); - return vec_interleave_indices(c_n, c_size); +r_obj* ffi_list_interleave( + r_obj* ffi_x, + r_obj* ffi_size, + r_obj* ffi_ptype, + r_obj* ffi_name_spec, + r_obj* ffi_name_repair, + r_obj* ffi_frame +) { + struct r_lazy x_arg_lazy = { .x = syms.x_arg, .env = ffi_frame }; + struct vctrs_arg x_arg = new_lazy_arg(&x_arg_lazy); + + struct r_lazy error_call = { .x = r_syms.error_call, .env = ffi_frame }; + + const r_ssize size = (ffi_size == r_null) ? + -1 : + vec_as_short_length(ffi_size, vec_args.size, error_call); + + struct name_repair_opts name_repair_opts = new_name_repair_opts( + ffi_name_repair, + r_lazy_null, + false, + error_call + ); + KEEP(name_repair_opts.shelter); + + r_obj* out = list_interleave( + ffi_x, + size, + ffi_ptype, + ffi_name_spec, + &name_repair_opts, + &x_arg, + error_call + ); + + FREE(1); + return out; } -static -r_obj* vec_interleave_indices(r_ssize n, r_ssize size) { - if (n < 0) { - r_stop_internal( - "`n` must be greater than or equal to 0." - ); - } +r_obj* list_interleave( + r_obj* x, + r_ssize size, + r_obj* ptype, + r_obj* name_spec, + const struct name_repair_opts* p_name_repair_opts, + struct vctrs_arg* p_x_arg, + struct r_lazy error_call +) { + obj_check_list(x, p_x_arg, error_call); - if (size < 0) { - r_stop_internal( - "`size` must be greater than or equal to 0." - ); - } + const r_ssize elt_size = (size == -1) ? + vec_check_size_common(x, 0, p_x_arg, error_call) : + size; + + r_obj* const* v_x = r_list_cbegin(x); + const r_ssize x_size = r_length(x); + + // `x_size` excluding `NULL`s + const r_ssize x_size_used = list_interleave_x_size_used( + v_x, + x_size + ); - const r_ssize total_size = r_ssize_mult(n, size); + const r_ssize out_size = r_ssize_mult(x_size_used, elt_size); - if (total_size > R_LEN_T_MAX) { + if (out_size > R_LEN_T_MAX) { r_abort( - "Long vectors are not yet supported in `vec_interleave()`. " + "Long vectors are not yet supported in `list_interleave()`. " "Result from interleaving would have size %td, which is larger " "than the maximum supported size of 2^31 - 1.", - total_size + out_size ); } - r_obj* out = KEEP(r_alloc_list(n)); + r_obj* indices = KEEP(list_interleave_indices( + v_x, + x_size, + x_size_used, + elt_size + )); - for (r_ssize i = 0; i < n; ++i) { - const r_ssize start = i + 1; + r_obj* default_ = r_null; + struct vctrs_arg* p_indices_arg = vec_args.empty; + struct vctrs_arg* p_default_arg = vec_args.empty; - r_obj* elt = r_alloc_integer(size); - r_list_poke(out, i, elt); - int* v_elt = r_int_begin(elt); + r_obj* out = list_combine( + x, + indices, + out_size, + default_, + LIST_COMBINE_UNMATCHED_default, + LIST_COMBINE_MULTIPLE_last, + ASSIGNMENT_SLICE_VALUE_no, + ptype, + name_spec, + p_name_repair_opts, + p_x_arg, + p_indices_arg, + p_default_arg, + error_call + ); + + FREE(1); + return out; +} - for (r_ssize j = 0; j < size; ++j) { - v_elt[j] = start + n * j; +static +r_ssize list_interleave_x_size_used(r_obj* const* v_x, r_ssize x_size) { + r_ssize x_size_used = 0; + + for (r_ssize i = 0; i < x_size; ++i) { + r_obj* elt = v_x[i]; + + if (elt == r_null) { + continue; + } + + ++x_size_used; + } + + return x_size_used; +} + +static +r_obj* list_interleave_indices( + r_obj* const* v_x, + r_ssize x_size, + r_ssize x_size_used, + r_ssize elt_size +) { + r_obj* indices = KEEP(r_alloc_list(x_size)); + + r_ssize start = 0; + + for (r_ssize i = 0; i < x_size; ++i) { + r_obj* elt = v_x[i]; + + if (elt == r_null) { + // Insert `integer()` index for `NULL`, don't advance `start` + r_list_poke(indices, i, r_globals.empty_int); + continue; + } + + ++start; + + r_obj* index = r_alloc_integer(elt_size); + r_list_poke(indices, i, index); + int* v_index = r_int_begin(index); + + for (r_ssize j = 0; j < elt_size; ++j) { + v_index[j] = start + x_size_used * j; } } FREE(1); - return out; + return indices; } diff --git a/src/slice-interleave.h b/src/slice-interleave.h new file mode 100644 index 000000000..ebe81e079 --- /dev/null +++ b/src/slice-interleave.h @@ -0,0 +1,17 @@ +#ifndef VCTRS_SLICE_INTERLEAVE_H +#define VCTRS_SLICE_INTERLEAVE_H + +#include "vctrs-core.h" +#include "names.h" + +r_obj* list_interleave( + r_obj* x, + r_ssize size, + r_obj* ptype, + r_obj* name_spec, + const struct name_repair_opts* p_name_repair_opts, + struct vctrs_arg* p_x_arg, + struct r_lazy error_call +); + +#endif diff --git a/tests/testthat/_snaps/slice-interleave.md b/tests/testthat/_snaps/slice-interleave.md index fae12eaee..5cc810ba1 100644 --- a/tests/testthat/_snaps/slice-interleave.md +++ b/tests/testthat/_snaps/slice-interleave.md @@ -16,7 +16,55 @@ res_unique <- vec_interleave(c(x = 1), c(x = 2), .name_repair = "unique_quiet") res_universal <- vec_interleave(c(`if` = 1), c(`in` = 2), .name_repair = "universal_quiet") -# uses recycling errors +# reports type errors + + Code + vec_interleave(1, "x") + Condition + Error in `vec_interleave()`: + ! Can't combine `..1` and `..2` . + +--- + + Code + vec_interleave(1, "x", .error_call = quote(foo())) + Condition + Error in `foo()`: + ! Can't combine `..1` and `..2` . + +--- + + Code + vec_interleave(1, "x", .ptype = double()) + Condition + Error in `vec_interleave()`: + ! Can't convert `..2` to . + +--- + + Code + vec_interleave(1, "x", .ptype = double(), .error_call = quote(foo())) + Condition + Error in `foo()`: + ! Can't convert `..2` to . + +--- + + Code + vec_interleave(1, NULL, "x") + Condition + Error in `vec_interleave()`: + ! Can't combine `..1` and `..3` . + +--- + + Code + vec_interleave(1, NULL, "x", .ptype = double()) + Condition + Error in `vec_interleave()`: + ! Can't convert `..3` to . + +# reports recycling errors Code vec_interleave(1:2, 1:3) @@ -24,11 +72,83 @@ Error in `vec_interleave()`: ! Can't recycle `..1` (size 2) to match `..2` (size 3). -# errors if the result would be a long vector +--- + + Code + vec_interleave(1:2, 1:3, .error_call = quote(foo())) + Condition + Error in `foo()`: + ! Can't recycle `..1` (size 2) to match `..2` (size 3). + +--- + + Code + vec_interleave(1:2, 3:4, .size = 3) + Condition + Error in `vec_interleave()`: + ! Can't recycle `..1` (size 2) to size 3. + +--- + + Code + vec_interleave(1:2, 3:4, .size = 3, .error_call = quote(foo())) + Condition + Error in `foo()`: + ! Can't recycle `..1` (size 2) to size 3. + +--- + + Code + vec_interleave(1:2, NULL, 1:3) + Condition + Error in `vec_interleave()`: + ! Can't recycle `..1` (size 2) to match `..3` (size 3). + +--- + + Code + vec_interleave(1:2, NULL, 1:3, .size = 2) + Condition + Error in `vec_interleave()`: + ! Can't recycle `..3` (size 3) to size 2. + +# reports scalar errors + + Code + vec_interleave(lm(1 ~ 1)) + Condition + Error in `vec_interleave()`: + ! `..1` must be a vector, not a object. + +--- + + Code + vec_interleave(lm(1 ~ 1), .error_call = quote(foo())) + Condition + Error in `foo()`: + ! `..1` must be a vector, not a object. + +--- + + Code + vec_interleave(1, NULL, lm(1 ~ 1)) + Condition + Error in `vec_interleave()`: + ! `..3` must be a vector, not a object. + +--- + + Code + vec_interleave(1, NULL, lm(1 ~ 1), .error_call = quote(foo())) + Condition + Error in `foo()`: + ! `..3` must be a vector, not a object. + +# `list_interleave()` checks for a list Code - vec_interleave_indices(3L, 1000000000L) + list_interleave(1) Condition - Error in `vec_interleave_indices()`: - ! Long vectors are not yet supported in `vec_interleave()`. Result from interleaving would have size 3000000000, which is larger than the maximum supported size of 2^31 - 1. + Error in `list_interleave()`: + ! `1` must be a list, not the number 1. diff --git a/tests/testthat/test-slice-interleave.R b/tests/testthat/test-slice-interleave.R index be9b1abf0..7cbd7e3af 100644 --- a/tests/testthat/test-slice-interleave.R +++ b/tests/testthat/test-slice-interleave.R @@ -78,28 +78,110 @@ test_that("recycles inputs", { }) test_that("works with no inputs", { - expect_identical(vec_interleave(), NULL) + # Purposefully returns `unspecified()`, which is the more useful result + # for generic programming against this, like in `list_transpose()` + expect_identical(vec_interleave(), unspecified()) + expect_identical(vec_interleave(NULL), unspecified()) + + # `.size` affects the size of each element, thus it doesn't affect the output + # when there are 0 elements + expect_identical(vec_interleave(.size = 2), unspecified()) + expect_identical(vec_interleave(NULL, .size = 2), unspecified()) }) test_that("works with length zero input", { expect_identical(vec_interleave(integer(), integer()), integer()) }) -test_that("respects ptype", { +test_that("respects `.ptype`", { expect_identical(vec_interleave(.ptype = character()), character()) + expect_identical(vec_interleave(NULL, .ptype = character()), character()) + expect_identical(vec_interleave(1L, 2L, .ptype = numeric()), c(1, 2)) }) -test_that("uses recycling errors", { - expect_snapshot(error = TRUE, vec_interleave(1:2, 1:3)) +test_that("reports type errors", { + expect_snapshot(error = TRUE, { + vec_interleave(1, "x") + }) + expect_snapshot(error = TRUE, { + vec_interleave(1, "x", .error_call = quote(foo())) + }) + + expect_snapshot(error = TRUE, { + vec_interleave(1, "x", .ptype = double()) + }) + expect_snapshot(error = TRUE, { + vec_interleave(1, "x", .ptype = double(), .error_call = quote(foo())) + }) + + # Index is right even with `NULL`! + expect_snapshot(error = TRUE, { + vec_interleave(1, NULL, "x") + }) + expect_snapshot(error = TRUE, { + vec_interleave(1, NULL, "x", .ptype = double()) + }) }) -test_that("errors if the result would be a long vector", { - # Internal multiplication overflows `r_ssize` resulting in a different error - skip_on_os("windows") +test_that("respects `.size`", { + # Correctly does not report an error here + expect_identical( + vec_interleave(1:2, 3:4, .size = 2), + c(1L, 3L, 2L, 4L) + ) - expect_snapshot( - error = TRUE, - vec_interleave_indices(3L, 1e9L) + # Useful for recycling to a known element size + # in the case of all size 1 elements + expect_identical( + vec_interleave(1, 2, .size = 2), + c(1, 2, 1, 2) ) }) + +test_that("reports recycling errors", { + expect_snapshot(error = TRUE, { + vec_interleave(1:2, 1:3) + }) + expect_snapshot(error = TRUE, { + vec_interleave(1:2, 1:3, .error_call = quote(foo())) + }) + + expect_snapshot(error = TRUE, { + vec_interleave(1:2, 3:4, .size = 3) + }) + expect_snapshot(error = TRUE, { + vec_interleave(1:2, 3:4, .size = 3, .error_call = quote(foo())) + }) + + # Index is right even with `NULL`! + expect_snapshot(error = TRUE, { + vec_interleave(1:2, NULL, 1:3) + }) + expect_snapshot(error = TRUE, { + vec_interleave(1:2, NULL, 1:3, .size = 2) + }) +}) + +test_that("reports scalar errors", { + expect_snapshot(error = TRUE, { + vec_interleave(lm(1 ~ 1)) + }) + expect_snapshot(error = TRUE, { + vec_interleave(lm(1 ~ 1), .error_call = quote(foo())) + }) + + # Index is right even with `NULL`! + expect_snapshot(error = TRUE, { + vec_interleave(1, NULL, lm(1 ~ 1)) + }) + expect_snapshot(error = TRUE, { + vec_interleave(1, NULL, lm(1 ~ 1), .error_call = quote(foo())) + }) +}) + +test_that("`list_interleave()` checks for a list", { + expect_snapshot(error = TRUE, { + list_interleave(1) + }) +})