Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions R/altrep-rle.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
chr_rle <- function(...) {
new_chr_rle(c(...))
}

new_chr_rle <- function(x) {
stopifnot(is.integer(x), is_named(x))
.Call(vctrs_altrep_rle_Make, x)
}

chr_rle_is_materialized <- function(x) {
.Call(vctrs_altrep_rle_is_materialized, x)
}
20 changes: 20 additions & 0 deletions R/slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,26 @@ vec_slice_dispatch_integer64 <- function(x, i) {
out
}

vec_slice_altrep <- function(x, i) {
# We have already validated `i`, it is one of:
# - Integer vector from `vec_as_location()`
# - Integer vector from materializing a `compact_rep()`
# - Integer vector from materializing a `compact_seq()`
# - Logical vector from materializing a `compact_condition()`
# (which `VectorSubset()` will convert to an integer vector)

# For the main case we care about (an ALTREP vector with an Extract_Subset
# method, like vroom), `.subset()` will:
# - Call `do_subset_dflt()` (bypassing S3 dispatch!)
# - Call `VectorSubset()`
# - Call `ExtractSubset()`
# - Call `ALTVEC_EXTRACT_SUBSET()`
# - If that returns `NULL`, i.e. if this ALTREP class has not implemented an
# ALTREP `Extract_Subset` method, then it will use the `Elt` method to
# subset
.subset(x, i)
}


#' @rdname vec_slice
#' @export
Expand Down
2 changes: 1 addition & 1 deletion src/altrep-lazy-character.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "vctrs.h"
#include "altrep.h"
#include "R_ext/Altrep.h"

// Initialised at load time
R_altrep_class_t altrep_lazy_character_class;
Expand Down
2 changes: 0 additions & 2 deletions src/altrep-rle.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#include "vctrs.h"
#include "altrep-rle.h"
#include "altrep.h"

// Initialised at load time
R_altrep_class_t altrep_rle_class;
Expand Down
3 changes: 2 additions & 1 deletion src/altrep-rle.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifndef ALTREP_RLE_H
#define ALTREP_RLE_H

#include "altrep.h"
#include "vctrs-core.h"
#include "R_ext/Altrep.h"

SEXP altrep_rle_Make(SEXP input);
R_xlen_t altrep_rle_Length(SEXP vec);
Expand Down
2 changes: 1 addition & 1 deletion src/altrep.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include <rlang.h>
#include "altrep.h"
#include "R_ext/Altrep.h"

// [[ register() ]]
r_obj* vctrs_is_altrep(r_obj* x) {
Expand Down
47 changes: 0 additions & 47 deletions src/altrep.h
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can delete all of this hackery now!

This file was deleted.

1 change: 1 addition & 0 deletions src/decl/slice-decl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
r_obj* vec_slice_altrep(r_obj* x, r_obj* subscript);
2 changes: 2 additions & 0 deletions src/globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct syms {
r_obj* value_arg;
r_obj* values_arg;
r_obj* vec_default_cast;
r_obj* vec_slice_altrep;
r_obj* vec_slice_dispatch_integer64;
r_obj* vec_slice_fallback;
r_obj* vec_slice_fallback_integer64;
Expand All @@ -50,6 +51,7 @@ struct chrs {
};

struct fns {
r_obj* vec_slice_altrep;
r_obj* vec_slice_dispatch_integer64;
r_obj* vec_slice_fallback;
r_obj* vec_slice_fallback_integer64;
Expand Down
40 changes: 27 additions & 13 deletions src/slice.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "vctrs.h"
#include "type-data-frame.h"
#include "altrep.h"

#include "decl/slice-decl.h"

#define SLICE_SUBSCRIPT(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \
const CTYPE* data = CONST_DEREF(x); \
Expand Down Expand Up @@ -57,14 +58,8 @@

#define SLICE(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE) \
if (!materialize && ALTREP(x)) { \
r_obj* alt_subscript = KEEP(vec_subscript_materialize(subscript)); \
r_obj* out = ALTVEC_EXTRACT_SUBSET_PROXY(x, alt_subscript, r_null); \
FREE(1); \
if (out != NULL) { \
return out; \
} \
} \
if (is_compact_rep(subscript)) { \
return vec_slice_altrep(x, subscript); \
} else if (is_compact_rep(subscript)) { \
Comment on lines -60 to +62
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note can no longer fall through to vctrs slicing code on ALTREP objects

SLICE_COMPACT_REP(RTYPE, CTYPE, DEREF, CONST_DEREF, NA_VALUE); \
} else if (is_compact_seq(subscript)) { \
SLICE_COMPACT_SEQ(RTYPE, CTYPE, DEREF, CONST_DEREF); \
Expand Down Expand Up @@ -148,7 +143,9 @@ r_obj* raw_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize)
return out

#define SLICE_BARRIER(RTYPE, CONST_DEREF, SET, NA_VALUE) \
if (is_compact_rep(subscript)) { \
if (!materialize && ALTREP(x)) { \
return vec_slice_altrep(x, subscript); \
} else if (is_compact_rep(subscript)) { \
SLICE_BARRIER_COMPACT_REP(RTYPE, CONST_DEREF, SET, NA_VALUE); \
Comment on lines 145 to 149
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Notably we weren't actually doing anything with ALTREP in the barrier path for characters and lists

But all our tests are for a character ALTREP vector, so they weren't actually hitting this at all and it was materializing the test vector.

I've added extra checks to our tests that confirm that after the vec_slice(x) call that x is still not materialized, which previously would have failed.

Base R also now has support for ALTREP lists thanks to Gabor, so those would now be supported by this as well.

} else if (is_compact_seq(subscript)) { \
SLICE_BARRIER_COMPACT_SEQ(RTYPE, CONST_DEREF, SET); \
Expand All @@ -165,7 +162,7 @@ r_obj* chr_names_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materi
SLICE_BARRIER(R_TYPE_character, r_chr_cbegin, r_chr_poke, r_strs.empty);
}
static
r_obj* list_slice(r_obj* x, r_obj* subscript) {
r_obj* list_slice(r_obj* x, r_obj* subscript, enum vctrs_materialize materialize) {
SLICE_BARRIER(R_TYPE_list, r_list_cbegin, r_list_poke, r_null);
}

Expand Down Expand Up @@ -207,7 +204,6 @@ r_obj* df_slice(r_obj* x, r_obj* subscript) {
return out;
}


r_obj* vec_slice_fallback(r_obj* x, r_obj* subscript) {
// TODO - Remove once bit64 is updated on CRAN. Special casing integer64
// objects to ensure correct slicing with `NA_integer_`.
Expand Down Expand Up @@ -237,6 +233,22 @@ r_obj* vec_slice_dispatch(r_obj* x, r_obj* subscript) {
syms_i, subscript);
}

r_obj* vec_slice_altrep(r_obj* x, r_obj* subscript) {
subscript = KEEP(vec_subscript_materialize(subscript));

r_obj* out = vctrs_dispatch2(
syms.vec_slice_altrep,
fns.vec_slice_altrep,
syms_x,
x,
syms_i,
subscript
);

FREE(1);
return out;
}

bool vec_requires_fallback(r_obj* x, struct vctrs_proxy_info info) {
return r_is_object(x) &&
!info.had_proxy_method &&
Expand All @@ -254,7 +266,7 @@ r_obj* vec_slice_base(enum vctrs_type type,
case VCTRS_TYPE_complex: return cpl_slice(x, subscript, materialize);
case VCTRS_TYPE_character: return chr_slice(x, subscript, materialize);
case VCTRS_TYPE_raw: return raw_slice(x, subscript, materialize);
case VCTRS_TYPE_list: return list_slice(x, subscript);
case VCTRS_TYPE_list: return list_slice(x, subscript, materialize);
default: stop_unimplemented_vctrs_type("vec_slice_base", type);
}
}
Expand Down Expand Up @@ -520,10 +532,12 @@ r_obj* ffi_slice_rep(r_obj* x, r_obj* ffi_i, r_obj* ffi_n) {


void vctrs_init_slice(r_obj* ns) {
syms.vec_slice_altrep = r_sym("vec_slice_altrep");
syms.vec_slice_dispatch_integer64 = r_sym("vec_slice_dispatch_integer64");
syms.vec_slice_fallback = r_sym("vec_slice_fallback");
syms.vec_slice_fallback_integer64 = r_sym("vec_slice_fallback_integer64");

fns.vec_slice_altrep = r_eval(syms.vec_slice_altrep, ns);
fns.vec_slice_dispatch_integer64 = r_eval(syms.vec_slice_dispatch_integer64, ns);
fns.vec_slice_fallback = r_eval(syms.vec_slice_fallback, ns);
fns.vec_slice_fallback_integer64 = r_eval(syms.vec_slice_fallback_integer64, ns);
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/_snaps/slice.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@
! Can't subset elements with `2^31`.
x Can't convert from `2^31` <double> to <integer> due to loss of precision.

# vec_slice() works with Altrep classes with custom extract methods

Code
vec_slice(x, idx)
Condition
Error in `vec_slice()`:
! Can't subset elements past the end.
i Location 16 doesn't exist.
i There are only 15 elements.

# Unnamed vector with character subscript is caught

Code
Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-slice-chop.R
Original file line number Diff line number Diff line change
Expand Up @@ -341,15 +341,15 @@ test_that("vec_chop() with data frame proxies always uses the proxy's length inf
})

test_that("ALTREP objects always generate materialized chops (#1450)", {
x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L))
x <- chr_rle(foo = 10L, bar = 5L)

# `x` starts in compact form
expect_false(.Call(vctrs_altrep_rle_is_materialized, x))
expect_false(chr_rle_is_materialized(x))

result <- vec_chop(x)

# Chopping materializes `x`
expect_true(.Call(vctrs_altrep_rle_is_materialized, x))
expect_true(chr_rle_is_materialized(x))

# And chopped elements are not ALTREP vectors
expect_false(any(map_lgl(result, is_altrep)))
Expand Down
57 changes: 53 additions & 4 deletions tests/testthat/test-slice.R
Original file line number Diff line number Diff line change
Expand Up @@ -446,10 +446,53 @@ test_that("slicing an unspecified() object returns an unspecified()", {


test_that("vec_slice() works with Altrep classes with custom extract methods", {
x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L))
x <- chr_rle(foo = 10L, bar = 5L)
Comment on lines 448 to +449
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note this only tests the barrier altrep path (character / list). We don't have an altrep test class for the non-barrier path (like an altrep integer rep or something).

We could make one if we feel like we need the coverage

expect_false(chr_rle_is_materialized(x))

idx <- c(9, 10, 11)
expect_equal(vec_slice(x, idx), c("foo", "foo", "bar"))
expect_false(chr_rle_is_materialized(x))

# With zero
idx <- c(0, 1)
expect_equal(vec_slice(x, idx), "foo")
expect_false(chr_rle_is_materialized(x))

# With integer missing values
idx <- c(0, NA, 2, NA)
expect_equal(vec_slice(x, idx), c(NA, "foo", NA))
expect_false(chr_rle_is_materialized(x))

# With logical condition index
idx <- c(TRUE, rep(FALSE, 8), TRUE, rep(FALSE, 2), TRUE, TRUE, TRUE)
expect_equal(vec_slice(x, idx), c("foo", "foo", "bar", "bar", "bar"))
expect_false(chr_rle_is_materialized(x))

# Everything
idx <- TRUE
expect_equal(vec_slice(x, idx), c(rep("foo", 10), rep("bar", 5)))
expect_false(chr_rle_is_materialized(x))

# Nothing
idx <- FALSE
expect_equal(vec_slice(x, idx), character())
expect_false(chr_rle_is_materialized(x))

# Whole vector of missing values
idx <- NA
expect_equal(vec_slice(x, idx), rep(NA_character_, 15))
expect_false(chr_rle_is_materialized(x))

# Just 1 missing value
idx <- NA_integer_
expect_equal(vec_slice(x, idx), rep(NA_character_, 1))
expect_false(chr_rle_is_materialized(x))

# OOB
idx <- 16
expect_snapshot(error = TRUE, {
vec_slice(x, idx)
})
})

test_that("Unnamed vector with character subscript is caught", {
Expand Down Expand Up @@ -517,8 +560,10 @@ test_that("vec_init() asserts vectorness (#301)", {
})

test_that("vec_init() works with Altrep classes", {
x <- .Call(vctrs_altrep_rle_Make, c(foo = 1L, bar = 2L))
x <- chr_rle(foo = 1L, bar = 2L)
expect_false(chr_rle_is_materialized(x))
expect_equal(vec_init(x, 2), rep(NA_character_, 2))
expect_false(chr_rle_is_materialized(x))
})

test_that("vec_init() validates `n`", {
Expand Down Expand Up @@ -548,8 +593,10 @@ test_that("names are recycled correctly with compact reps", {
})

test_that("vec_slice() with compact_reps work with Altrep classes", {
x <- .Call(vctrs_altrep_rle_Make, c(foo = 10L, bar = 5L))
x <- chr_rle(foo = 10L, bar = 5L)
expect_false(chr_rle_is_materialized(x))
expect_equal(vec_slice_rep(x, 10L, 3L), rep("foo", 3))
expect_false(chr_rle_is_materialized(x))
})

# vec_slice + compact_seq -------------------------------------------------
Expand Down Expand Up @@ -788,8 +835,10 @@ test_that("can subset S3 objects using the fallback method with compact seqs", {
})

test_that("vec_slice() with compact_seqs work with Altrep classes", {
x <- .Call(vctrs_altrep_rle_Make, c(foo = 2L, bar = 3L))
x <- chr_rle(foo = 2L, bar = 3L)
expect_false(chr_rle_is_materialized(x))
expect_equal(vec_slice_seq(x, 1L, 3L), c("foo", "bar", "bar"))
expect_false(chr_rle_is_materialized(x))
})

test_that("vec_slice() handles symbols and OO objects", {
Expand Down