diff --git a/NEWS.md b/NEWS.md index 89b4f5f21..b4cf03d27 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # vctrs (development version) +* `vec_ptype()` is now an optional _performance_ generic. It is not necessary + to implement, but if your class has a static prototype, you might consider + implementing a custom `vec_ptype()` method that returns a constant to + improve performance in some cases (such as common type imputation). + * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and matrices, this detects rows that only contain non-missing values. diff --git a/R/type.R b/R/type.R index 01edb2f09..14c90835a 100644 --- a/R/type.R +++ b/R/type.R @@ -39,6 +39,14 @@ #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' +#' `vec_ptype()` is a _performance_ generic. It is not necessary to implement it +#' because the default method will work for any vctrs type. However the default +#' method builds around other vctrs primitives like `vec_slice()` which incurs +#' performance costs. If your class has a static prototype, you might consider +#' implementing a custom `vec_ptype()` method that returns a constant. This will +#' improve the performance of your class in many cases ([common +#' type][vec_ptype2] imputation in particular). +#' #' Because it may contain unspecified vectors, the prototype returned #' by `vec_ptype()` is said to be __unfinalised__. Call #' [vec_ptype_finalise()] to finalise it. Commonly you will need the @@ -94,7 +102,8 @@ vec_ptype <- function(x, ..., x_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } - .Call(vctrs_ptype, x, x_arg) + return(.Call(vctrs_ptype, x, x_arg)) + UseMethod("vec_ptype") } #' @export diff --git a/man/vec_ptype.Rd b/man/vec_ptype.Rd index 389488755..4ba92e4a5 100644 --- a/man/vec_ptype.Rd +++ b/man/vec_ptype.Rd @@ -59,6 +59,13 @@ for any 1d vector type. See \link{internal-faq-ptype2-identity} for more information about identity values. +\code{vec_ptype()} is a \emph{performance} generic. It is not necessary to implement it +because the default method will work for any vctrs type. However the default +method builds around other vctrs primitives like \code{vec_slice()} which incurs +performance costs. If your class has a static prototype, you might consider +implementing a custom \code{vec_ptype()} method that returns a constant. This will +improve the performance of your class in many cases (\link[=vec_ptype2]{common type} imputation in particular). + Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} to finalise it. Commonly you will need the diff --git a/src/decl/ptype-decl.h b/src/decl/ptype-decl.h new file mode 100644 index 000000000..efab4b976 --- /dev/null +++ b/src/decl/ptype-decl.h @@ -0,0 +1,7 @@ +#ifndef VCTRS_PTYPE_DECL_H +#define VCTRS_PTYPE_DECL_H + +static inline SEXP vec_ptype_method(SEXP x); +static inline SEXP vec_ptype_invoke(SEXP x, SEXP method); + +#endif diff --git a/src/type.c b/src/type.c index e708a19ea..572424097 100644 --- a/src/type.c +++ b/src/type.c @@ -4,8 +4,11 @@ #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" +#include "decl/ptype-decl.h" // Initialised at load time +static SEXP syms_vec_ptype = NULL; + static SEXP syms_vec_ptype_finalise_dispatch = NULL; static SEXP fns_vec_ptype_finalise_dispatch = NULL; @@ -53,6 +56,7 @@ static inline SEXP vec_ptype_slice(SEXP x, SEXP empty) { return vec_slice(x, R_NilValue); } } + static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { switch (class_type(x)) { case vctrs_class_bare_tibble: @@ -75,8 +79,32 @@ static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { return x; } - vec_assert(x, x_arg); - return vec_slice(x, R_NilValue); + SEXP method = PROTECT(vec_ptype_method(x)); + + SEXP out; + + if (method == r_null) { + vec_assert(x, x_arg); + out = vec_slice(x, r_null); + } else { + out = vec_ptype_invoke(x, method); + } + + UNPROTECT(1); + return out; +} + +static inline +SEXP vec_ptype_method(SEXP x) { + SEXP cls = PROTECT(s3_get_class(x)); + SEXP method = s3_class_find_method("vec_ptype", cls, vctrs_method_table); + UNPROTECT(1); + return method; +} + +static inline +SEXP vec_ptype_invoke(SEXP x, SEXP method) { + return vctrs_dispatch1(syms_vec_ptype, method, syms_x, x); } SEXP df_ptype(SEXP x, bool bare) { @@ -270,6 +298,8 @@ static SEXP vctrs_type2_common(SEXP current, void vctrs_init_type(SEXP ns) { + syms_vec_ptype = Rf_install("vec_ptype"); + syms_vec_ptype_finalise_dispatch = Rf_install("vec_ptype_finalise_dispatch"); fns_vec_ptype_finalise_dispatch = Rf_findVar(syms_vec_ptype_finalise_dispatch, ns); } diff --git a/tests/testthat/test-type.R b/tests/testthat/test-type.R index 6b5ad5f5f..23a026724 100644 --- a/tests/testthat/test-type.R +++ b/tests/testthat/test-type.R @@ -190,6 +190,13 @@ test_that("the type of a classed data frame with an unspecified column retains u expect_identical(vec_ptype(df2), expect) }) +test_that("vec_ptype() methods can be written", { + local_methods( + vec_ptype.vctrs_foobar = function(x, ...) "dispatch" + ) + expect_identical(vec_ptype(foobar()), "dispatch") +}) + test_that("vec_ptype_finalise() works with NULL", { expect_identical(vec_ptype_finalise(NULL), NULL) })