Skip to content

Commit 2b191ae

Browse files
split() output retains a custom class (#7195)
* split output retains a custom class * fix test; add todo
1 parent 81646e7 commit 2b191ae

File tree

3 files changed

+17
-5
lines changed

3 files changed

+17
-5
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@
8686
8787
14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
8888
89+
15. `split.data.table()` output list elements retain the S3 class of the generating data.table, e.g. in `l=split(x, ...)` if `x` has class `my_class`, so will `l[[1]]` and so on, [#7105](https://github.com/Rdatatable/data.table/issues/7105). Thanks @m-muecke for the bug report and @MichaelChirico for the fix.
90+
8991
### NOTES
9092
9193
1. The following in-progress deprecations have proceeded:

R/data.table.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,7 +2491,7 @@ Ops.data.table = function(e1, e2 = NULL)
24912491
}
24922492

24932493
split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TRUE, flatten = TRUE, ..., verbose = getOption("datatable.verbose")) {
2494-
if (!is.data.table(x)) stopf("x argument must be a data.table")
2494+
if (!is.data.table(x)) internal_error("x argument to split.data.table must be a data.table") # nocov
24952495
stopifnot(is.logical(drop), is.logical(sorted), is.logical(keep.by), is.logical(flatten))
24962496
# split data.frame way, using `f` and not `by` argument
24972497
if (!missing(f)) {
@@ -2566,8 +2566,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
25662566
setattr(ll, "names", nm)
25672567
# handle nested split
25682568
if (flatten || length(by) == 1L) {
2569-
for (x in ll) .Call(C_unlock, x)
2570-
lapply(ll, setDT)
2569+
for (xi in ll) .Call(C_unlock, xi)
2570+
out = lapply(ll, setDT)
2571+
# TODO(#2000): just let setDT handle this
2572+
if (!identical(old_class <- class(x), c("data.table", "data.frame"))) for (xi in out) setattr(xi, "class", old_class)
2573+
out
25712574
# alloc.col could handle DT in list as done in: c9c4ff80bdd4c600b0c4eff23b207d53677176bd
25722575
} else if (length(by) > 1L) {
25732576
lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)

inst/tests/tests.Rraw

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
7474
setfrev = data.table:::setfrev
7575
shallow = data.table:::shallow # until exported
7676
.shallow = data.table:::.shallow
77-
split.data.table = data.table:::split.data.table
7877
stopf = data.table:::stopf
7978
test = data.table:::test
8079
uniqlengths = data.table:::uniqlengths
@@ -9764,6 +9763,14 @@ test(1639.141, all(sapply(dtL, truelength) > 1000))
97649763
dt <- data.table(x = factor("a"), y = 1)
97659764
test(1639.142, x = split(dt, by = "x"), y = list(a = dt))
97669765
test(1639.143, x = split(dt, by = "y"), y = list(`1` = dt))
9766+
9767+
# retain a custom class after splitting, #7105
9768+
DT = data.table(x=letters[1:10], y=1:10, z=rnorm(10))
9769+
setattr(DT, "class", c("my_class", class(DT)))
9770+
test(1639.144, "my_class" %in% unlist(lapply(split(DT, by="x"), class)))
9771+
test(1639.145, "my_class" %in% unlist(lapply(split(DT, ~x), class)))
9772+
test(1639.146, "my_class" %in% unlist(lapply(split(DT, by=c("x", "y")), class)))
9773+
test(1639.147, "my_class" %in% unlist(lapply(split(DT, ~x+y), class)))
97679774
rm_all()
97689775

97699776
# allow x's cols (specifically x's join cols) to be referred to using 'x.' syntax
@@ -14277,7 +14284,7 @@ test(1984.25, rbindlist(list(DT[1L], DT[2L]), idcol = TRUE), data.table(.id=1:2,
1427714284
test(1984.26, setalloccol(`*tmp*`), error='setalloccol attempting to modify `*tmp*`')
1427814285
DF = as.data.frame(DT)
1427914286
test(1984.27, identical(shallow(DF), DF)) # shallow (which is not exported) works on DF from v1.14.2. identical() to force checking the selfref attribute for #5286.
14280-
test(1984.28, split.data.table(DF), error='argument must be a data.table')
14287+
# 1984.28 was a coverage test converted to 'nocov' of an internal_error instead
1428114288
test(1984.29, split(DT, by='a', f='a'), error="passing 'f' argument together with 'by' is not allowed")
1428214289
test(1984.30, split(DT), error="Either 'by' or 'f' argument must be supplied")
1428314290
setnames(DT, '.ll.tech.split')

0 commit comments

Comments
 (0)