Skip to content

Commit ca6637c

Browse files
committed
Merge branch 'master' of https://github.com/Rdatatable/data.table into issue_2606
2 parents a4c7e7a + 2b191ae commit ca6637c

File tree

7 files changed

+25
-14
lines changed

7 files changed

+25
-14
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@
8888
8989
14. Filling columns of class Date with POSIXct (and vice versa) using `shift()` now yields a clear, informative error message specifying the class mismatch, [#5218](https://github.com/Rdatatable/data.table/issues/5218). Thanks @ashbaldry for the report and @ben-schwen for the fix.
9090
91+
15. `split.data.table()` output list elements retain the S3 class of the generating data.table, e.g. in `l=split(x, ...)` if `x` has class `my_class`, so will `l[[1]]` and so on, [#7105](https://github.com/Rdatatable/data.table/issues/7105). Thanks @m-muecke for the bug report and @MichaelChirico for the fix.
92+
9193
### NOTES
9294
9395
1. The following in-progress deprecations have proceeded:

R/data.table.R

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,7 +2491,7 @@ Ops.data.table = function(e1, e2 = NULL)
24912491
}
24922492

24932493
split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TRUE, flatten = TRUE, ..., verbose = getOption("datatable.verbose")) {
2494-
if (!is.data.table(x)) stopf("x argument must be a data.table")
2494+
if (!is.data.table(x)) internal_error("x argument to split.data.table must be a data.table") # nocov
24952495
stopifnot(is.logical(drop), is.logical(sorted), is.logical(keep.by), is.logical(flatten))
24962496
# split data.frame way, using `f` and not `by` argument
24972497
if (!missing(f)) {
@@ -2566,8 +2566,11 @@ split.data.table = function(x, f, drop = FALSE, by, sorted = FALSE, keep.by = TR
25662566
setattr(ll, "names", nm)
25672567
# handle nested split
25682568
if (flatten || length(by) == 1L) {
2569-
for (x in ll) .Call(C_unlock, x)
2570-
lapply(ll, setDT)
2569+
for (xi in ll) .Call(C_unlock, xi)
2570+
out = lapply(ll, setDT)
2571+
# TODO(#2000): just let setDT handle this
2572+
if (!identical(old_class <- class(x), c("data.table", "data.frame"))) for (xi in out) setattr(xi, "class", old_class)
2573+
out
25712574
# alloc.col could handle DT in list as done in: c9c4ff80bdd4c600b0c4eff23b207d53677176bd
25722575
} else if (length(by) > 1L) {
25732576
lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)

inst/tests/tests.Rraw

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
7474
setfrev = data.table:::setfrev
7575
shallow = data.table:::shallow # until exported
7676
.shallow = data.table:::.shallow
77-
split.data.table = data.table:::split.data.table
7877
stopf = data.table:::stopf
7978
test = data.table:::test
8079
uniqlengths = data.table:::uniqlengths
@@ -6852,11 +6851,12 @@ test(1463.79, shift(x,-1L, type="cyclic"), as.raw(c(2:5, 1)))
68526851
test(1463.80, shift(x,-(1:2),type="cyclic"), list(as.raw(c(2:5, 1)), as.raw(c(3:5,1:2))))
68536852

68546853
# shift incompatible types (e.g. Date and POSIXct)
6855-
d = .Date(0:4)
6854+
# TODO(R>=3.5): use .Date() instead of setting class by hand
6855+
d = structure(0:4, class="Date")
68566856
p = .POSIXct(1:5)
68576857
test(1463.81, shift(d, fill=p[1L]), error="Filling Date with POSIXct .* unsupported.*")
68586858
test(1463.82, shift(p, fill=d[1L]), error="Filling POSIXct with Date .* unsupported.*")
6859-
test(1463.83, shift(d, fill=as.IDate(2000L)), .Date(c(2000L, 0:3)))
6859+
test(1463.83, shift(d, fill=as.IDate(2000L)), structure(c(2000L, 0:3), class="Date"))
68606860

68616861
# FR #686
68626862
DT = data.table(a=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), foo=1:10)
@@ -9763,6 +9763,14 @@ test(1639.141, all(sapply(dtL, truelength) > 1000))
97639763
dt <- data.table(x = factor("a"), y = 1)
97649764
test(1639.142, x = split(dt, by = "x"), y = list(a = dt))
97659765
test(1639.143, x = split(dt, by = "y"), y = list(`1` = dt))
9766+
9767+
# retain a custom class after splitting, #7105
9768+
DT = data.table(x=letters[1:10], y=1:10, z=rnorm(10))
9769+
setattr(DT, "class", c("my_class", class(DT)))
9770+
test(1639.144, "my_class" %in% unlist(lapply(split(DT, by="x"), class)))
9771+
test(1639.145, "my_class" %in% unlist(lapply(split(DT, ~x), class)))
9772+
test(1639.146, "my_class" %in% unlist(lapply(split(DT, by=c("x", "y")), class)))
9773+
test(1639.147, "my_class" %in% unlist(lapply(split(DT, ~x+y), class)))
97669774
rm_all()
97679775

97689776
# allow x's cols (specifically x's join cols) to be referred to using 'x.' syntax
@@ -14276,7 +14284,7 @@ test(1984.25, rbindlist(list(DT[1L], DT[2L]), idcol = TRUE), data.table(.id=1:2,
1427614284
test(1984.26, setalloccol(`*tmp*`), error='setalloccol attempting to modify `*tmp*`')
1427714285
DF = as.data.frame(DT)
1427814286
test(1984.27, identical(shallow(DF), DF)) # shallow (which is not exported) works on DF from v1.14.2. identical() to force checking the selfref attribute for #5286.
14279-
test(1984.28, split.data.table(DF), error='argument must be a data.table')
14287+
# 1984.28 was a coverage test converted to 'nocov' of an internal_error instead
1428014288
test(1984.29, split(DT, by='a', f='a'), error="passing 'f' argument together with 'by' is not allowed")
1428114289
test(1984.30, split(DT), error="Either 'by' or 'f' argument must be supplied")
1428214290
setnames(DT, '.ll.tech.split')

src/fread.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ static inline bool end_of_field(const char *ch)
293293
// default, and therefore characters in the range 0x80-0xFF are negative.
294294
// We use eol() because that looks at eol_one_r inside it w.r.t. \r
295295
// \0 (maybe more than one) before eof are part of field and do not end it; eol() returns false for \0 but the ch==eof will return true for the \0 at eof.
296-
return *ch == sep || (*ch <= 13 && (ch == eof || eol(&ch)));
296+
return *ch == sep || ((uint8_t)*ch <= 13 && (ch == eof || eol(&ch)));
297297
}
298298

299299
static inline const char *end_NA_string(const char *start)

src/freadR.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,8 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
457457
if (colClassesAs) setAttrib(DT, sym_colClassesAs, colClassesAs);
458458
} else {
459459
int nprotect = 0;
460-
SEXP tt = PROTECT(allocVector(STRSXP, ncol - ndrop));
461-
setAttrib(DT, R_NamesSymbol, tt); nprotect++;
460+
SEXP tt = PROTECT(allocVector(STRSXP, ncol - ndrop)); nprotect++;
461+
setAttrib(DT, R_NamesSymbol, tt);
462462

463463
SEXP ss;
464464
if (colClassesAs) {

src/mergelist.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void mergeIndexAttrib(SEXP to, SEXP from) {
3232
}
3333

3434
SEXP cbindlist(SEXP x, SEXP copyArg) {
35-
if (!isNewList(x) || isFrame(x))
35+
if (!isNewList(x) || isDataFrame(x))
3636
error(_("'%s' must be a list"), "x");
3737
bool copy = (bool)LOGICAL(copyArg)[0];
3838
const bool verbose = GetVerbose();

src/utils.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -533,10 +533,8 @@ bool isRectangularList(SEXP x) {
533533
return isRectangular(x);
534534
}
535535

536-
// TODO: use isDataFrame (when included in any R release).
537-
// isDataTable(x) || isFrame(x) || isRectangularList(x)
538536
bool perhapsDataTable(SEXP x) {
539-
return isDataTable(x) || isFrame(x) || isRectangularList(x);
537+
return isDataTable(x) || isDataFrame(x) || isRectangularList(x);
540538
}
541539
SEXP perhapsDataTableR(SEXP x) {
542540
return ScalarLogical(perhapsDataTable(x));

0 commit comments

Comments
 (0)