Skip to content

Commit 98a5a78

Browse files
Performance issues with data_filter() when tidyverse is loaded? (#651)
* Performance issues with `data_filter()` when tidyverse is loaded? Fixes #650 * add test * news * same for data_arrange * news * also data_duplicated * also data_unique * version * Update NEWS.md Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> --------- Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
1 parent d85088c commit 98a5a78

File tree

7 files changed

+88
-2
lines changed

7 files changed

+88
-2
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: datawizard
33
Title: Easy Data Wrangling and Statistical Transformations
4-
Version: 1.2.0.6
4+
Version: 1.2.0.7
55
Authors@R: c(
66
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
77
comment = c(ORCID = "0000-0003-1995-6531")),

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ CHANGES
2121
* `display()` methods now support the `{tinytable}` package. Use `format = "tt"`
2222
to export tables as `tinytable` objects (#646).
2323

24+
* Improved performance for several functions that process grouped data frames
25+
when the input is a grouped `tibble` (#651).
26+
2427
BUG FIXES
2528

2629
* Fixed an issue when `demean()`ing nested structures with more than 2 grouping

R/data_arrange.R

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,19 @@ data_arrange.default <- function(data, select = NULL, safe = TRUE) {
3333
return(data)
3434
}
3535

36+
original_x <- data
37+
3638
# Input validation check
3739
data <- .coerce_to_dataframe(data)
3840

41+
# Remove tidyverse attributes, will add them back at the end
42+
if (inherits(original_x, "tbl_df")) {
43+
tbl_input <- TRUE
44+
data <- as.data.frame(data, stringsAsFactors = FALSE)
45+
} else {
46+
tbl_input <- FALSE
47+
}
48+
3949
# find which vars should be decreasing
4050
desc <- select[startsWith(select, "-")]
4151
desc <- gsub("^-", "", desc)
@@ -95,15 +105,27 @@ data_arrange.default <- function(data, select = NULL, safe = TRUE) {
95105
rownames(out) <- NULL
96106
}
97107

108+
# add back custom attributes
109+
out <- .replace_attrs(out, attributes(original_x))
110+
98111
out
99112
}
100113

101114

102115
#' @export
103116
data_arrange.grouped_df <- function(data, select = NULL, safe = TRUE) {
117+
original_x <- data
104118
grps <- attr(data, "groups", exact = TRUE)
105119
grps <- grps[[".rows"]]
106120

121+
# Remove tidyverse attributes, will add them back at the end
122+
if (inherits(data, "tbl_df")) {
123+
tbl_input <- TRUE
124+
data <- as.data.frame(data, stringsAsFactors = FALSE)
125+
} else {
126+
tbl_input <- FALSE
127+
}
128+
107129
out <- lapply(grps, function(x) {
108130
data_arrange.default(data[x, ], select = select, safe = safe)
109131
})
@@ -114,5 +136,13 @@ data_arrange.grouped_df <- function(data, select = NULL, safe = TRUE) {
114136
rownames(out) <- NULL
115137
}
116138

139+
# add back tidyverse attributes
140+
if (isTRUE(tbl_input)) {
141+
class(out) <- c("tbl_df", "tbl", "data.frame")
142+
}
143+
144+
# add back custom attributes
145+
out <- .replace_attrs(out, attributes(original_x))
146+
117147
out
118148
}

R/data_duplicated.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ data_duplicated.grouped_df <- function(data,
8686
grps <- attr(data, "groups", exact = TRUE)
8787
grps <- grps[[".rows"]]
8888

89+
data <- as.data.frame(data)
90+
8991
out <- lapply(grps, function(x) {
9092
data_duplicated.data.frame(data[x, ], select = select)
9193
})

R/data_match.R

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,15 @@ data_filter <- function(x, ...) {
183183
#' @export
184184
data_filter.data.frame <- function(x, ...) {
185185
out <- x
186+
187+
# convert tibble to data.frame
188+
if (inherits(x, "tbl_df")) {
189+
out <- as.data.frame(out, stringsAsFactors = FALSE)
190+
tbl_input <- TRUE
191+
} else {
192+
tbl_input <- FALSE
193+
}
194+
186195
dots <- match.call(expand.dots = FALSE)[["..."]]
187196

188197
if (any(nzchar(names(dots), keepNA = TRUE))) {
@@ -275,15 +284,30 @@ data_filter.data.frame <- function(x, ...) {
275284

276285
# add back custom attributes
277286
out <- .replace_attrs(out, attributes(x))
287+
288+
# add back tidyverse attributes
289+
if (isTRUE(tbl_input)) {
290+
class(out) <- c("tbl_df", "tbl", "data.frame")
291+
}
292+
278293
out
279294
}
280295

281296

282297
#' @export
283298
data_filter.grouped_df <- function(x, ...) {
299+
original_x <- x
284300
grps <- attr(x, "groups", exact = TRUE)
285301
grps <- grps[[".rows"]]
286302

303+
# Remove tidyverse attributes, will add them back at the end
304+
if (inherits(x, "tbl_df")) {
305+
tbl_input <- TRUE
306+
x <- as.data.frame(x, stringsAsFactors = FALSE)
307+
} else {
308+
tbl_input <- FALSE
309+
}
310+
287311
dots <- match.call(expand.dots = FALSE)[["..."]]
288312
out <- lapply(grps, function(grp) {
289313
arguments <- list(x[grp, ])
@@ -297,6 +321,14 @@ data_filter.grouped_df <- function(x, ...) {
297321
rownames(out) <- NULL
298322
}
299323

324+
# add back tidyverse attributes
325+
if (isTRUE(tbl_input)) {
326+
class(out) <- c("tbl_df", "tbl", "data.frame")
327+
}
328+
329+
# add back custom attributes
330+
out <- .replace_attrs(out, attributes(original_x))
331+
300332
out
301333
}
302334

R/data_unique.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ data_unique.grouped_df <- function(data,
122122
grps <- attr(data, "groups", exact = TRUE)
123123
grps <- grps[[".rows"]]
124124

125-
data2 <- data_ungroup(data)
125+
data2 <- as.data.frame(data_ungroup(data))
126126

127127
out <- lapply(grps, function(x) {
128128
data_unique.data.frame(data2[x, ], select = select, keep = keep, verbose = verbose)

tests/testthat/test-data_match.R

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,3 +345,22 @@ test_that("data_filter, slicing works with functions", {
345345
)
346346
# styler: on
347347
})
348+
349+
350+
test_that("data_filter works with tibbles", {
351+
skip_if_not_installed("tibble")
352+
skip_if_not_installed("dplyr")
353+
data(mtcars)
354+
355+
# preserve class
356+
d <- tibble::as_tibble(mtcars)
357+
out <- data_filter(d, mpg > 15)
358+
expect_s3_class(out, "tbl_df")
359+
360+
# preserve attributes
361+
d <- tibble::as_tibble(mtcars)
362+
d <- dplyr::group_by(d, cyl)
363+
out <- data_filter(d, mpg > 15)
364+
expect_s3_class(out, "tbl_df")
365+
expect_named(attr(out, "groups"), c("cyl", ".rows"))
366+
})

0 commit comments

Comments
 (0)