From 62b974801685199cc15d0e9c254d622fdfa1e55b Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Sat, 6 Feb 2021 07:15:34 -0500 Subject: [PATCH 1/7] Rework `vec_ptype()` into a generic for S3 types --- R/type.R | 3 ++- src/type.c | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/R/type.R b/R/type.R index 01edb2f09..df5cd9f16 100644 --- a/R/type.R +++ b/R/type.R @@ -94,7 +94,8 @@ vec_ptype <- function(x, ..., x_arg = "") { if (!missing(...)) { ellipsis::check_dots_empty() } - .Call(vctrs_ptype, x, x_arg) + return(.Call(vctrs_ptype, x, x_arg)) + UseMethod("vec_ptype") } #' @export diff --git a/src/type.c b/src/type.c index e708a19ea..454edf913 100644 --- a/src/type.c +++ b/src/type.c @@ -6,6 +6,8 @@ #include "utils.h" // Initialised at load time +static SEXP syms_vec_ptype = NULL; + static SEXP syms_vec_ptype_finalise_dispatch = NULL; static SEXP fns_vec_ptype_finalise_dispatch = NULL; @@ -53,6 +55,10 @@ static inline SEXP vec_ptype_slice(SEXP x, SEXP empty) { return vec_slice(x, R_NilValue); } } + +static inline SEXP vec_ptype_method(SEXP x); +static inline SEXP vec_ptype_invoke(SEXP x, SEXP method); + static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { switch (class_type(x)) { case vctrs_class_bare_tibble: @@ -75,8 +81,32 @@ static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { return x; } - vec_assert(x, x_arg); - return vec_slice(x, R_NilValue); + SEXP method = PROTECT(vec_ptype_method(x)); + + SEXP out; + + if (method == r_null) { + vec_assert(x, x_arg); + out = vec_slice(x, r_null); + } else { + out = vec_ptype_invoke(x, method); + } + + UNPROTECT(1); + return out; +} + +static inline +SEXP vec_ptype_method(SEXP x) { + SEXP cls = PROTECT(s3_get_class(x)); + SEXP method = s3_class_find_method("vec_ptype", cls, vctrs_method_table); + UNPROTECT(1); + return method; +} + +static inline +SEXP vec_ptype_invoke(SEXP x, SEXP method) { + return vctrs_dispatch1(syms_vec_ptype, method, syms_x, x); } SEXP df_ptype(SEXP x, bool bare) { @@ -270,6 +300,8 @@ static SEXP vctrs_type2_common(SEXP current, void vctrs_init_type(SEXP ns) { + syms_vec_ptype = Rf_install("vec_ptype"); + syms_vec_ptype_finalise_dispatch = Rf_install("vec_ptype_finalise_dispatch"); fns_vec_ptype_finalise_dispatch = Rf_findVar(syms_vec_ptype_finalise_dispatch, ns); } From 75583e249c1677c7d953e2ae2f2cfe82163b4712 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 25 Feb 2021 08:16:51 -0500 Subject: [PATCH 2/7] Document that methods can be written for `vec_ptype()` --- R/type.R | 6 ++++++ man/vec_ptype.Rd | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/R/type.R b/R/type.R index df5cd9f16..366b61d2e 100644 --- a/R/type.R +++ b/R/type.R @@ -39,6 +39,12 @@ #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' +#' For performance, when developing a new S3 class you might want to override +#' the default behavior of `vec_ptype()`, which is to call `vec_slice(x, 0L)`. +#' To do this, write a `vec_ptype()` S3 method for your class. The method should +#' return a result equivalent to `vec_slice(x, 0L)`, but for many classes this +#' is a static object. +#' #' Because it may contain unspecified vectors, the prototype returned #' by `vec_ptype()` is said to be __unfinalised__. Call #' [vec_ptype_finalise()] to finalise it. Commonly you will need the diff --git a/man/vec_ptype.Rd b/man/vec_ptype.Rd index 389488755..21b324aea 100644 --- a/man/vec_ptype.Rd +++ b/man/vec_ptype.Rd @@ -59,6 +59,12 @@ for any 1d vector type. See \link{internal-faq-ptype2-identity} for more information about identity values. +For performance, when developing a new S3 class you might want to override +the default behavior of \code{vec_ptype()}, which is to call \code{vec_slice(x, 0L)}. +To do this, write a \code{vec_ptype()} S3 method for your class. The method should +return a result equivalent to \code{vec_slice(x, 0L)}, but for many classes this +is a static object. + Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call \code{\link[=vec_ptype_finalise]{vec_ptype_finalise()}} to finalise it. Commonly you will need the From 2acbfa2fbe3e4900c9f38af515b934d38494f3ca Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 25 Feb 2021 08:23:03 -0500 Subject: [PATCH 3/7] Test that vec-ptype methods are found --- tests/testthat/test-type.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/testthat/test-type.R b/tests/testthat/test-type.R index 6b5ad5f5f..23a026724 100644 --- a/tests/testthat/test-type.R +++ b/tests/testthat/test-type.R @@ -190,6 +190,13 @@ test_that("the type of a classed data frame with an unspecified column retains u expect_identical(vec_ptype(df2), expect) }) +test_that("vec_ptype() methods can be written", { + local_methods( + vec_ptype.vctrs_foobar = function(x, ...) "dispatch" + ) + expect_identical(vec_ptype(foobar()), "dispatch") +}) + test_that("vec_ptype_finalise() works with NULL", { expect_identical(vec_ptype_finalise(NULL), NULL) }) From 73b31dfd251068f06f8c5c7ade6577311d7b0fca Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 25 Feb 2021 08:27:23 -0500 Subject: [PATCH 4/7] Add NEWS bullet --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 89b4f5f21..bcb2f9809 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # vctrs (development version) +* `vec_ptype()` is now generic. The default behavior for S3 classes continues + to use `vec_slice(x, 0L)`, but S3 methods can now be written to improve + performance as required for custom classes. + * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and matrices, this detects rows that only contain non-missing values. From 749373dd46c60a4e7cdb125ba23d4625c41bd8b0 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Wed, 10 Mar 2021 09:39:19 -0500 Subject: [PATCH 5/7] Use `decl/` folder to hold forward declarations --- src/decl/ptype-decl.h | 7 +++++++ src/type.c | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 src/decl/ptype-decl.h diff --git a/src/decl/ptype-decl.h b/src/decl/ptype-decl.h new file mode 100644 index 000000000..efab4b976 --- /dev/null +++ b/src/decl/ptype-decl.h @@ -0,0 +1,7 @@ +#ifndef VCTRS_PTYPE_DECL_H +#define VCTRS_PTYPE_DECL_H + +static inline SEXP vec_ptype_method(SEXP x); +static inline SEXP vec_ptype_invoke(SEXP x, SEXP method); + +#endif diff --git a/src/type.c b/src/type.c index 454edf913..572424097 100644 --- a/src/type.c +++ b/src/type.c @@ -4,6 +4,7 @@ #include "ptype2.h" #include "type-data-frame.h" #include "utils.h" +#include "decl/ptype-decl.h" // Initialised at load time static SEXP syms_vec_ptype = NULL; @@ -56,9 +57,6 @@ static inline SEXP vec_ptype_slice(SEXP x, SEXP empty) { } } -static inline SEXP vec_ptype_method(SEXP x); -static inline SEXP vec_ptype_invoke(SEXP x, SEXP method); - static SEXP s3_type(SEXP x, struct vctrs_arg* x_arg) { switch (class_type(x)) { case vctrs_class_bare_tibble: From b999f2560a2de421341cdc42dc8f65d98b6ecbd3 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Wed, 10 Mar 2021 09:46:03 -0500 Subject: [PATCH 6/7] Use @lionel-'s suggestion in `vec_ptype()` documentation --- R/type.R | 12 +++++++----- man/vec_ptype.Rd | 11 ++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/R/type.R b/R/type.R index 366b61d2e..14c90835a 100644 --- a/R/type.R +++ b/R/type.R @@ -39,11 +39,13 @@ #' See [internal-faq-ptype2-identity] for more information about #' identity values. #' -#' For performance, when developing a new S3 class you might want to override -#' the default behavior of `vec_ptype()`, which is to call `vec_slice(x, 0L)`. -#' To do this, write a `vec_ptype()` S3 method for your class. The method should -#' return a result equivalent to `vec_slice(x, 0L)`, but for many classes this -#' is a static object. +#' `vec_ptype()` is a _performance_ generic. It is not necessary to implement it +#' because the default method will work for any vctrs type. However the default +#' method builds around other vctrs primitives like `vec_slice()` which incurs +#' performance costs. If your class has a static prototype, you might consider +#' implementing a custom `vec_ptype()` method that returns a constant. This will +#' improve the performance of your class in many cases ([common +#' type][vec_ptype2] imputation in particular). #' #' Because it may contain unspecified vectors, the prototype returned #' by `vec_ptype()` is said to be __unfinalised__. Call diff --git a/man/vec_ptype.Rd b/man/vec_ptype.Rd index 21b324aea..4ba92e4a5 100644 --- a/man/vec_ptype.Rd +++ b/man/vec_ptype.Rd @@ -59,11 +59,12 @@ for any 1d vector type. See \link{internal-faq-ptype2-identity} for more information about identity values. -For performance, when developing a new S3 class you might want to override -the default behavior of \code{vec_ptype()}, which is to call \code{vec_slice(x, 0L)}. -To do this, write a \code{vec_ptype()} S3 method for your class. The method should -return a result equivalent to \code{vec_slice(x, 0L)}, but for many classes this -is a static object. +\code{vec_ptype()} is a \emph{performance} generic. It is not necessary to implement it +because the default method will work for any vctrs type. However the default +method builds around other vctrs primitives like \code{vec_slice()} which incurs +performance costs. If your class has a static prototype, you might consider +implementing a custom \code{vec_ptype()} method that returns a constant. This will +improve the performance of your class in many cases (\link[=vec_ptype2]{common type} imputation in particular). Because it may contain unspecified vectors, the prototype returned by \code{vec_ptype()} is said to be \strong{unfinalised}. Call From 3cd8668fa7b178472095321a5b891744bd710112 Mon Sep 17 00:00:00 2001 From: DavisVaughan Date: Thu, 11 Mar 2021 09:59:12 -0500 Subject: [PATCH 7/7] Tweak NEWS to align better with new docs --- NEWS.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index bcb2f9809..b4cf03d27 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,9 @@ # vctrs (development version) -* `vec_ptype()` is now generic. The default behavior for S3 classes continues - to use `vec_slice(x, 0L)`, but S3 methods can now be written to improve - performance as required for custom classes. +* `vec_ptype()` is now an optional _performance_ generic. It is not necessary + to implement, but if your class has a static prototype, you might consider + implementing a custom `vec_ptype()` method that returns a constant to + improve performance in some cases (such as common type imputation). * New `vec_detect_complete()`, inspired by `stats::complete.cases()`. For most vectors, this is identical to `!vec_equal_na()`. For data frames and