diff --git a/src/internal/attr.c b/src/internal/attr.c index 074c0c203..676af7804 100644 --- a/src/internal/attr.c +++ b/src/internal/attr.c @@ -61,21 +61,27 @@ r_obj* node_names(r_obj* x) { } r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) { - int n_kept = 0; - - r_obj* dots = KEEP_N(rlang_dots(env), &n_kept); - if (!r_is_vector(x, -1)) { r_abort("`x` must be a vector"); } if (nm == r_null) { - x = set_names_dispatch(x, r_null, env); - - FREE(n_kept); - return x; + // Fast case for dropping names + if (r_is_object(x)) { + return set_names_dispatch(x, r_null, env); + } else if (r_names(x) != r_null) { + x = r_wrap_or_clone(x); + r_attrib_poke_names(x, r_null); + return x; + } else { + return x; + } } + int n_kept = 0; + + r_obj* dots = KEEP_N(rlang_dots(env), &n_kept); + if (r_is_function(nm) || r_is_formula(nm, -1, -1)) { if (r_names(mold) == r_null) { mold = KEEP_N(eval_as_character(mold, env), &n_kept); @@ -85,12 +91,16 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) { nm = KEEP_N(rlang_as_function(nm, env), &n_kept); nm = KEEP_N(eval_fn_dots(nm, mold, dots, env), &n_kept); - } else { - if (r_length(dots) > 0) { - nm = KEEP_N(eval_fn_dots(c_fn, nm, dots, env), &n_kept); - } + } else if (r_length(dots) > 0) { + nm = KEEP_N(eval_fn_dots(c_fn, nm, dots, env), &n_kept); + } + if (r_typeof(nm) != R_TYPE_character || r_is_object(nm)) { nm = KEEP_N(eval_as_character(nm, env), &n_kept); + + if (r_typeof(nm) != R_TYPE_character) { + r_abort("`nm` must be `NULL` or a character vector."); + } } r_ssize n; @@ -100,10 +110,6 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) { n = r_length(x); } - if (r_typeof(nm) != R_TYPE_character) { - r_abort("`nm` must be `NULL` or a character vector."); - } - r_ssize nm_n = r_length(nm); if (nm_n != n) { if (nm_n != 1) { @@ -118,12 +124,13 @@ r_obj* ffi_set_names(r_obj* x, r_obj* mold, r_obj* nm, r_obj* env) { r_chr_fill(nm, val, n); } - if (!is_character(nm, n, OPTION_BOOL_null, OPTION_BOOL_null)) { - r_abort("`nm` must be `NULL` or a character vector the same length as `x`"); + if (r_is_object(x)) { + x = set_names_dispatch(x, nm, env); + } else { + x = r_wrap_or_clone(x); + r_attrib_poke_names(x, nm); } - x = set_names_dispatch(x, nm, env); - FREE(n_kept); return x; } @@ -158,9 +165,6 @@ r_obj* names_dispatch(r_obj* x, r_obj* env) { return r_eval(names_call, env); } -// Use `names<-()` rather than setting names directly with `r_attrib_poke_names()` -// for genericity and for speed. `names<-()` can shallow duplicate `x`'s -// attributes using ALTREP wrappers, which is not in R's public API. static inline r_obj* set_names_dispatch(r_obj* x, r_obj* nm, r_obj* env) { r_env_poke(env, r_syms.dot_x, x); diff --git a/src/internal/encoding.c b/src/internal/encoding.c index f6a109939..12da48d4b 100644 --- a/src/internal/encoding.c +++ b/src/internal/encoding.c @@ -131,7 +131,7 @@ r_obj* obj_attrib_encode_utf8(r_obj* x, r_obj* attrib) { } KEEP(attrib_new); - x = KEEP(r_clone_shared(x)); + x = KEEP(r_wrap_or_clone_shared(x)); r_poke_attrib(x, attrib_new); FREE(2); diff --git a/src/rlang/obj.h b/src/rlang/obj.h index e836d3a89..5979a8528 100644 --- a/src/rlang/obj.h +++ b/src/rlang/obj.h @@ -81,6 +81,30 @@ r_obj* r_clone_shared(r_obj* x) { return r_is_shared(x) ? r_clone(x) : x; } +// Copy/Clone equivalents that attempt to generate a thin ALTREP wrapper +// instead of copying/cloning if possible. Typically useful before modifying +// attributes, rather than before modifying the underlying data. +static inline +r_obj* r_wrap_or_copy(r_obj* x) { +#if R_VERSION >= R_Version(3, 6, 0) + return R_duplicate_attr(x); +#else + return r_copy(x); +#endif +} +static inline +r_obj* r_wrap_or_clone(r_obj* x) { +#if R_VERSION >= R_Version(3, 6, 0) + return R_shallow_duplicate_attr(x); +#else + return r_clone(x); +#endif +} +static inline +r_obj* r_wrap_or_clone_shared(r_obj* x) { + return r_is_shared(x) ? r_wrap_or_clone(x) : x; +} + // These also clone names r_obj* r_vec_clone(r_obj* x); r_obj* r_vec_clone_shared(r_obj* x); diff --git a/tests/testthat/test-attr.R b/tests/testthat/test-attr.R index 79e00b755..7786770ec 100644 --- a/tests/testthat/test-attr.R +++ b/tests/testthat/test-attr.R @@ -31,6 +31,7 @@ test_that("can supply function/formula to rename", { expect_named(set_names(x, toupper), c("A", "B")) expect_named(set_names(x, ~ toupper(.)), c("A", "B")) expect_named(set_names(x, paste, "foo"), c("a foo", "b foo")) + expect_named(set_names(x, ~ c(3, 4)), c("3", "4")) }) test_that("set_names() zaps names", { diff --git a/tests/testthat/test-c-api.R b/tests/testthat/test-c-api.R index aee52499f..88579bb99 100644 --- a/tests/testthat/test-c-api.R +++ b/tests/testthat/test-c-api.R @@ -1284,6 +1284,24 @@ test_that("attributes are re-encoded recursively", { expect_utf8_encoded(attrib_nested$latin1) }) +test_that("re-encoding attributes doesn't modify the original attributes", { + latin1 <- test_encodings()$latin1 + + # Large object so duplication in `r_obj_encode_utf8()` generates an ALTREP + # wrapper + x <- 1:1e6 + 0L + attr(x, "foo") <- latin1 + original <- Encoding(latin1) + + result <- r_obj_encode_utf8(x) + attrib <- attributes(result) + expect_utf8_encoded(attrib$foo) + + # Still the same as before + attrib <- attributes(x) + expect_identical(Encoding(attrib$foo), original) +}) + test_that("NAs aren't re-encoded to 'NA' (r-lib/vctrs#1291)", { utf8 <- c(NA, test_encodings()$utf8) latin1 <- c(NA, test_encodings()$latin1)