From 01314dfe460dd3cf9d5dfc16b16f98e814715b8f Mon Sep 17 00:00:00 2001 From: Davis Vaughan Date: Fri, 24 Oct 2025 15:14:23 -0400 Subject: [PATCH] Use `.subset()` to avoid hackily calling into `Extract_subset()` --- R/altrep-rle.R | 12 +++++++ R/slice.R | 20 +++++++++++ src/altrep-lazy-character.c | 2 +- src/altrep-rle.c | 2 -- src/altrep-rle.h | 3 +- src/altrep.c | 2 +- src/altrep.h | 47 -------------------------- src/decl/slice-decl.h | 1 + src/globals.h | 2 ++ src/slice.c | 40 ++++++++++++++-------- tests/testthat/_snaps/slice.md | 10 ++++++ tests/testthat/test-slice-chop.R | 6 ++-- tests/testthat/test-slice.R | 57 +++++++++++++++++++++++++++++--- 13 files changed, 132 insertions(+), 72 deletions(-) create mode 100644 R/altrep-rle.R delete mode 100644 src/altrep.h create mode 100644 src/decl/slice-decl.h diff --git a/R/altrep-rle.R b/R/altrep-rle.R new file mode 100644 index 000000000..4affd66c3 --- /dev/null +++ b/R/altrep-rle.R @@ -0,0 +1,12 @@ +chr_rle <- function(...) { + new_chr_rle(c(...)) +} + +new_chr_rle <- function(x) { + stopifnot(is.integer(x), is_named(x)) + .Call(vctrs_altrep_rle_Make, x) +} + +chr_rle_is_materialized <- function(x) { + .Call(vctrs_altrep_rle_is_materialized, x) +} diff --git a/R/slice.R b/R/slice.R index be2d5526b..17017880f 100644 --- a/R/slice.R +++ b/R/slice.R @@ -219,6 +219,26 @@ vec_slice_dispatch_integer64 <- function(x, i) { out } +vec_slice_altrep <- function(x, i) { + # We have already validated `i`, it is one of: + # - Integer vector from `vec_as_location()` + # - Integer vector from materializing a `compact_rep()` + # - Integer vector from materializing a `compact_seq()` + # - Logical vector from materializing a `compact_condition()` + # (which `VectorSubset()` will convert to an integer vector) + + # For the main case we care about (an ALTREP vector with an Extract_Subset + # method, like vroom), `.subset()` will: + # - Call `do_subset_dflt()` (bypassing S3 dispatch!) + # - Call `VectorSubset()` + # - Call `ExtractSubset()` + # - Call `ALTVEC_EXTRACT_SUBSET()` + # - If that returns `NULL`, i.e. if this ALTREP class has not implemented an + # ALTREP `Extract_Subset` method, then it will use the `Elt` method to + # subset + .subset(x, i) +} + #' @rdname vec_slice #' @export diff --git a/src/altrep-lazy-character.c b/src/altrep-lazy-character.c index ffe013c0d..3e4e16979 100644 --- a/src/altrep-lazy-character.c +++ b/src/altrep-lazy-character.c @@ -1,5 +1,5 @@ #include "vctrs.h" -#include "altrep.h" +#include "R_ext/Altrep.h" // Initialised at load time R_altrep_class_t altrep_lazy_character_class; diff --git a/src/altrep-rle.c b/src/altrep-rle.c index 4b4b09735..854bee3ce 100644 --- a/src/altrep-rle.c +++ b/src/altrep-rle.c @@ -1,6 +1,4 @@ -#include "vctrs.h" #include "altrep-rle.h" -#include "altrep.h" // Initialised at load time R_altrep_class_t altrep_rle_class; diff --git a/src/altrep-rle.h b/src/altrep-rle.h index 04e3f95f9..9099dd9a2 100644 --- a/src/altrep-rle.h +++ b/src/altrep-rle.h @@ -1,7 +1,8 @@ #ifndef ALTREP_RLE_H #define ALTREP_RLE_H -#include "altrep.h" +#include "vctrs-core.h" +#include "R_ext/Altrep.h" SEXP altrep_rle_Make(SEXP input); R_xlen_t altrep_rle_Length(SEXP vec); diff --git a/src/altrep.c b/src/altrep.c index da163b526..c4b6b0cee 100644 --- a/src/altrep.c +++ b/src/altrep.c @@ -1,5 +1,5 @@ #include -#include "altrep.h" +#include "R_ext/Altrep.h" // [[ register() ]] r_obj* vctrs_is_altrep(r_obj* x) { diff --git a/src/altrep.h b/src/altrep.h deleted file mode 100644 index 904be3180..000000000 --- a/src/altrep.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef VCTRS_ALTREP_H -#define VCTRS_ALTREP_H - -#include "R_ext/Altrep.h" - -#define ALTREP_METHODS \ - R_altrep_UnserializeEX_method_t UnserializeEX; \ - R_altrep_Unserialize_method_t Unserialize; \ - R_altrep_Serialized_state_method_t Serialized_state; \ - R_altrep_DuplicateEX_method_t DuplicateEX; \ - R_altrep_Duplicate_method_t Duplicate; \ - R_altrep_Coerce_method_t Coerce; \ - R_altrep_Inspect_method_t Inspect; \ - R_altrep_Length_method_t Length - -#define ALTVEC_METHODS \ - ALTREP_METHODS; \ - R_altvec_Dataptr_method_t Dataptr; \ - R_altvec_Dataptr_or_null_method_t Dataptr_or_null; \ - R_altvec_Extract_subset_method_t Extract_subset - -typedef struct { ALTREP_METHODS; } altrep_methods_t; -typedef struct { ALTVEC_METHODS; } altvec_methods_t; - -#define CLASS_METHODS_TABLE(type_class) STDVEC_DATAPTR(type_class) - -#define GENERIC_METHODS_TABLE(x, type_class) \ - ((type_class##_methods_t *) CLASS_METHODS_TABLE(ALTREP_CLASS(x))) - - -#define ALTREP_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altrep) -#define ALTVEC_METHODS_TABLE(x) GENERIC_METHODS_TABLE(x, altvec) - -#define DISPATCH_TARGET_HELPER(x, ...) x -#define DISPATCH_TARGET(...) DISPATCH_TARGET_HELPER(__VA_ARGS__, dummy) - -#define DO_DISPATCH(type, fun, ...) \ - type##_METHODS_TABLE(DISPATCH_TARGET(__VA_ARGS__))->fun(__VA_ARGS__) - -#define ALTREP_DISPATCH(fun, ...) DO_DISPATCH(ALTREP, fun, __VA_ARGS__) -#define ALTVEC_DISPATCH(fun, ...) DO_DISPATCH(ALTVEC, fun, __VA_ARGS__) - -static inline SEXP ALTVEC_EXTRACT_SUBSET_PROXY(SEXP x, SEXP indx, SEXP call) { - return ALTVEC_DISPATCH(Extract_subset, x, indx, call); -} - -#endif diff --git a/src/decl/slice-decl.h b/src/decl/slice-decl.h new file mode 100644 index 000000000..67a82b104 --- /dev/null +++ b/src/decl/slice-decl.h @@ -0,0 +1 @@ +r_obj* vec_slice_altrep(r_obj* x, r_obj* subscript); diff --git a/src/globals.h b/src/globals.h index 4a284e7f4..603254364 100644 --- a/src/globals.h +++ b/src/globals.h @@ -27,6 +27,7 @@ struct syms { r_obj* value_arg; r_obj* values_arg; r_obj* vec_default_cast; + r_obj* vec_slice_altrep; r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; @@ -50,6 +51,7 @@ struct chrs { }; struct fns { + r_obj* vec_slice_altrep; r_obj* vec_slice_dispatch_integer64; r_obj* vec_slice_fallback; r_obj* vec_slice_fallback_integer64; diff --git a/src/slice.c b/src/slice.c index 98d967cc9..c0b68e02f 100644 --- a/src/slice.c +++ b/src/slice.c @@ -1,6 +1,7 @@ #include "vctrs.h" #include "type-data-frame.h" -#include "altrep.h" + +#include "decl/slice-decl.h" #define SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ const CTYPE* data = CONST_DEREF(x); \ @@ -57,14 +58,8 @@ #define SLICE(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \ if (!materialize && ALTREP(x)) { \ - r_obj* alt_subscript = KEEP(vec_subscript_materialize(subscript)); \ - r_obj* out = ALTVEC_EXTRACT_SUBSET_PROXY(x, alt_subscript, r_null); \ - FREE(1); \ - if (out != NULL) { \ - return out; \ - } \ - } \ - if (is_compact_rep(subscript)) { \ + return vec_slice_altrep(x, subscript); \ + } else if (is_compact_rep(subscript)) { \ SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \ @@ -148,7 +143,9 @@ r_obj* raw_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) return out #define SLICE_BARRIER(RTYPE, CONST_DEREF, SET, NA_VALUE) \ - if (is_compact_rep(subscript)) { \ + if (!materialize && ALTREP(x)) { \ + return vec_slice_altrep(x, subscript); \ + } else if (is_compact_rep(subscript)) { \ SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE); \ } else if (is_compact_seq(subscript)) { \ SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET); \ @@ -165,7 +162,7 @@ r_obj* chr_names_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materi SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_strs.empty); } static -r_obj* list_slice(r_obj* x, r_obj* subscript) { +r_obj* list_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) { SLICE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke, r_null); } @@ -207,7 +204,6 @@ r_obj* df_slice(r_obj* x, r_obj* subscript) { return out; } - r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript) { // TODO - Remove once bit64 is updated on CRAN. Special casing integer64 // objects to ensure correct slicing with `NA_integer_`. @@ -237,6 +233,22 @@ r_obj* vec_slice_dispatch(r_obj* x, r_obj* subscript) { syms_i, subscript); } +r_obj* vec_slice_altrep(r_obj* x, r_obj* subscript) { + subscript = KEEP(vec_subscript_materialize(subscript)); + + r_obj* out = vctrs_dispatch2( + syms.vec_slice_altrep, + fns.vec_slice_altrep, + syms_x, + x, + syms_i, + subscript + ); + + FREE(1); + return out; +} + bool vec_requires_fallback(r_obj* x, struct vctrs_proxy_info info) { return r_is_object(x) && !info.had_proxy_method && @@ -254,7 +266,7 @@ r_obj* vec_slice_base(enum vctrs_type type, case VCTRS_TYPE_complex: return cpl_slice(x, subscript, materialize); case VCTRS_TYPE_character: return chr_slice(x, subscript, materialize); case VCTRS_TYPE_raw: return raw_slice(x, subscript, materialize); - case VCTRS_TYPE_list: return list_slice(x, subscript); + case VCTRS_TYPE_list: return list_slice(x, subscript, materialize); default: stop_unimplemented_vctrs_type("vec_slice_base", type); } } @@ -520,10 +532,12 @@ r_obj* ffi_slice_rep(r_obj* x, r_obj* ffi_i, r_obj* ffi_n) { void vctrs_init_slice(r_obj* ns) { + syms.vec_slice_altrep = r_sym("vec_slice_altrep"); syms.vec_slice_dispatch_integer64 = r_sym("vec_slice_dispatch_integer64"); syms.vec_slice_fallback = r_sym("vec_slice_fallback"); syms.vec_slice_fallback_integer64 = r_sym("vec_slice_fallback_integer64"); + fns.vec_slice_altrep = r_eval(syms.vec_slice_altrep, ns); fns.vec_slice_dispatch_integer64 = r_eval(syms.vec_slice_dispatch_integer64, ns); fns.vec_slice_fallback = r_eval(syms.vec_slice_fallback, ns); fns.vec_slice_fallback_integer64 = r_eval(syms.vec_slice_fallback_integer64, ns); diff --git a/tests/testthat/_snaps/slice.md b/tests/testthat/_snaps/slice.md index 4f65a7146..54f59a3fc 100644 --- a/tests/testthat/_snaps/slice.md +++ b/tests/testthat/_snaps/slice.md @@ -45,6 +45,16 @@ ! Can't subset elements with `2^31`. x Can't convert from `2^31` to due to loss of precision. +# vec_slice() works with Altrep classes with custom extract methods + + Code + vec_slice(x, idx) + Condition + Error in `vec_slice()`: + ! Can't subset elements past the end. + i Location 16 doesn't exist. + i There are only 15 elements. + # Unnamed vector with character subscript is caught Code diff --git a/tests/testthat/test-slice-chop.R b/tests/testthat/test-slice-chop.R index 8e54f325f..0e7d9ffc7 100644 --- a/tests/testthat/test-slice-chop.R +++ b/tests/testthat/test-slice-chop.R @@ -341,15 +341,15 @@ test_that("vec_chop() with data frame proxies always uses the proxy's length inf }) test_that("ALTREP objects always generate materialized chops (#1450)", { - x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) + x <- chr_rle(foo = 10L, bar = 5L) # `x` starts in compact form - expect_false(.Call(vctrs_altrep_rle_is_materialized, x)) + expect_false(chr_rle_is_materialized(x)) result <- vec_chop(x) # Chopping materializes `x` - expect_true(.Call(vctrs_altrep_rle_is_materialized, x)) + expect_true(chr_rle_is_materialized(x)) # And chopped elements are not ALTREP vectors expect_false(any(map_lgl(result, is_altrep))) diff --git a/tests/testthat/test-slice.R b/tests/testthat/test-slice.R index d8e9b4355..57023dfb8 100644 --- a/tests/testthat/test-slice.R +++ b/tests/testthat/test-slice.R @@ -446,10 +446,53 @@ test_that("slicing an unspecified() object returns an unspecified()", { test_that("vec_slice() works with Altrep classes with custom extract methods", { - x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) + x <- chr_rle(foo = 10L, bar = 5L) + expect_false(chr_rle_is_materialized(x)) idx <- c(9, 10, 11) expect_equal(vec_slice(x, idx), c("foo", "foo", "bar")) + expect_false(chr_rle_is_materialized(x)) + + # With zero + idx <- c(0, 1) + expect_equal(vec_slice(x, idx), "foo") + expect_false(chr_rle_is_materialized(x)) + + # With integer missing values + idx <- c(0, NA, 2, NA) + expect_equal(vec_slice(x, idx), c(NA, "foo", NA)) + expect_false(chr_rle_is_materialized(x)) + + # With logical condition index + idx <- c(TRUE, rep(FALSE, 8), TRUE, rep(FALSE, 2), TRUE, TRUE, TRUE) + expect_equal(vec_slice(x, idx), c("foo", "foo", "bar", "bar", "bar")) + expect_false(chr_rle_is_materialized(x)) + + # Everything + idx <- TRUE + expect_equal(vec_slice(x, idx), c(rep("foo", 10), rep("bar", 5))) + expect_false(chr_rle_is_materialized(x)) + + # Nothing + idx <- FALSE + expect_equal(vec_slice(x, idx), character()) + expect_false(chr_rle_is_materialized(x)) + + # Whole vector of missing values + idx <- NA + expect_equal(vec_slice(x, idx), rep(NA_character_, 15)) + expect_false(chr_rle_is_materialized(x)) + + # Just 1 missing value + idx <- NA_integer_ + expect_equal(vec_slice(x, idx), rep(NA_character_, 1)) + expect_false(chr_rle_is_materialized(x)) + + # OOB + idx <- 16 + expect_snapshot(error = TRUE, { + vec_slice(x, idx) + }) }) test_that("Unnamed vector with character subscript is caught", { @@ -517,8 +560,10 @@ test_that("vec_init() asserts vectorness (#301)", { }) test_that("vec_init() works with Altrep classes", { - x <- .Call(vctrs_altrep_rle_Make, c(foo = 1L, bar = 2L)) + x <- chr_rle(foo = 1L, bar = 2L) + expect_false(chr_rle_is_materialized(x)) expect_equal(vec_init(x, 2), rep(NA_character_, 2)) + expect_false(chr_rle_is_materialized(x)) }) test_that("vec_init() validates `n`", { @@ -548,8 +593,10 @@ test_that("names are recycled correctly with compact reps", { }) test_that("vec_slice() with compact_reps work with Altrep classes", { - x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L)) + x <- chr_rle(foo = 10L, bar = 5L) + expect_false(chr_rle_is_materialized(x)) expect_equal(vec_slice_rep(x, 10L, 3L), rep("foo", 3)) + expect_false(chr_rle_is_materialized(x)) }) # vec_slice + compact_seq ------------------------------------------------- @@ -788,8 +835,10 @@ test_that("can subset S3 objects using the fallback method with compact seqs", { }) test_that("vec_slice() with compact_seqs work with Altrep classes", { - x <- .Call(vctrs_altrep_rle_Make, c(foo = 2L, bar = 3L)) + x <- chr_rle(foo = 2L, bar = 3L) + expect_false(chr_rle_is_materialized(x)) expect_equal(vec_slice_seq(x, 1L, 3L), c("foo", "bar", "bar")) + expect_false(chr_rle_is_materialized(x)) }) test_that("vec_slice() handles symbols and OO objects", {