Skip to content

Commit 966f00b

Browse files
committed
wip shift multiple n return data.table rather than list
1 parent beded95 commit 966f00b

File tree

6 files changed

+49
-33
lines changed

6 files changed

+49
-33
lines changed

R/shift.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ shift = function(x, n=1L, fill, type=c("lag", "lead", "shift", "cyclic"), give.n
2323
}
2424
setattr(ans, "names", paste(rep(nx,each=length(n)), type, n, sep="_"))
2525
}
26+
if (length(n)>1L) setDT(ans)
2627
ans
2728
}
2829

inst/tests/tests.Rraw

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6707,68 +6707,68 @@ test(1462.3, DT[, sum(unlist(mget(cols, as.environment(-1)))), by=x], DT[, sum(u
67076707
x=1:5
67086708
y=factor(x)
67096709
test(1463.01, shift(x,1L), as.integer(c(NA, 1:4)))
6710-
test(1463.02, shift(x,1:2), list(as.integer(c(NA, 1:4)), as.integer(c(NA, NA, 1:3))))
6710+
test(1463.02, shift(x,1:2), data.table(as.integer(c(NA, 1:4)), as.integer(c(NA, NA, 1:3))))
67116711
test(1463.03, shift(x,1L, 0L), as.integer(c(0L, 1:4)))
67126712
test(1463.04, shift(x,1L, type="lead"), as.integer(c(2:5, NA)))
6713-
test(1463.05, shift(x,1:2, type="lead"), list(as.integer(c(2:5, NA)), as.integer(c(3:5, NA, NA))))
6713+
test(1463.05, shift(x,1:2, type="lead"), data.table(as.integer(c(2:5, NA)), as.integer(c(3:5, NA, NA))))
67146714
test(1463.06, shift(x,1L, 0L,type="lead"), as.integer(c(2:5, 0L)))
67156715
test(1463.07, shift(y,1L), factor(c(NA,1:4), levels=1:5))
67166716
test(1463.08, shift(y,1L, type="lead"), factor(c(2:5, NA), levels=1:5))
67176717
test(1463.09, shift(x,1L, type="cyclic"), as.integer(c(5, 1:4)))
6718-
test(1463.10, shift(x,1:2, type="cyclic"), list(as.integer(c(5, 1:4)), as.integer(c(4:5, 1:3))))
6718+
test(1463.10, shift(x,1:2, type="cyclic"), data.table(as.integer(c(5, 1:4)), as.integer(c(4:5, 1:3))))
67196719
test(1463.11, shift(x,-1L, type="cyclic"), as.integer(c(2:5, 1)))
6720-
test(1463.12, shift(x,-(1:2),type="cyclic"), list(as.integer(c(2:5, 1)), as.integer(c(3:5,1:2))))
6720+
test(1463.12, shift(x,-(1:2),type="cyclic"), data.table(as.integer(c(2:5, 1)), as.integer(c(3:5,1:2))))
67216721

67226722
x=as.numeric(x)
67236723
test(1463.13, shift(x,1L), as.numeric(c(NA, 1:4)))
6724-
test(1463.14, shift(x,1:2), list(as.numeric(c(NA, 1:4)), as.numeric(c(NA, NA, 1:3))))
6724+
test(1463.14, shift(x,1:2), data.table(as.numeric(c(NA, 1:4)), as.numeric(c(NA, NA, 1:3))))
67256725
test(1463.15, shift(x,1L, 0L), as.numeric(c(0L, 1:4)))
67266726
test(1463.16, shift(x,1L, type="lead"), as.numeric(c(2:5, NA)))
6727-
test(1463.17, shift(x,1:2, type="lead"), list(as.numeric(c(2:5, NA)), as.numeric(c(3:5, NA, NA))))
6727+
test(1463.17, shift(x,1:2, type="lead"), data.table(as.numeric(c(2:5, NA)), as.numeric(c(3:5, NA, NA))))
67286728
test(1463.18, shift(x,1L, 0L,type="lead"), as.numeric(c(2:5, 0L)))
67296729
test(1463.19, shift(x,1L, type="cyclic"), as.numeric(c(5, 1:4)))
6730-
test(1463.20, shift(x,1:2, type="cyclic"), list(as.numeric(c(5, 1:4)), as.numeric(c(4:5, 1:3))))
6730+
test(1463.20, shift(x,1:2, type="cyclic"), data.table(as.numeric(c(5, 1:4)), as.numeric(c(4:5, 1:3))))
67316731
test(1463.21, shift(x,-1L, type="cyclic"), as.numeric(c(2:5, 1)))
6732-
test(1463.22, shift(x,-(1:2),type="cyclic"), list(as.numeric(c(2:5, 1)), as.numeric(c(3:5,1:2))))
6732+
test(1463.22, shift(x,-(1:2),type="cyclic"), data.table(as.numeric(c(2:5, 1)), as.numeric(c(3:5,1:2))))
67336733

67346734

67356735
if (test_bit64) {
67366736
x=as.integer64(x)
67376737
test(1463.23, shift(x,1L), as.integer64(c(NA, 1:4)))
6738-
test(1463.24, shift(x,1:2), list(as.integer64(c(NA, 1:4)), as.integer64(c(NA, NA, 1:3))))
6738+
test(1463.24, shift(x,1:2), data.table(as.integer64(c(NA, 1:4)), as.integer64(c(NA, NA, 1:3))))
67396739
test(1463.25, shift(x,1L, 0L), as.integer64(c(0L, 1:4)))
67406740
test(1463.26, shift(x,1L, type="lead"), as.integer64(c(2:5, NA)))
6741-
test(1463.27, shift(x,1:2, type="lead"), list(as.integer64(c(2:5, NA)), as.integer64(c(3:5, NA, NA))))
6741+
test(1463.27, shift(x,1:2, type="lead"), data.table(as.integer64(c(2:5, NA)), as.integer64(c(3:5, NA, NA))))
67426742
test(1463.28, shift(x,1L, 0L, type="lead"), as.integer64(c(2:5, 0L)))
67436743
test(1463.29, shift(x,1L, type="cyclic"), as.integer64(c(5, 1:4)))
6744-
test(1463.30, shift(x,1:2, type="cyclic"), list(as.integer64(c(5, 1:4)), as.integer64(c(4:5, 1:3))))
6744+
test(1463.30, shift(x,1:2, type="cyclic"), data.table(as.integer64(c(5, 1:4)), as.integer64(c(4:5, 1:3))))
67456745
test(1463.31, shift(x,-1L, type="cyclic"), as.integer64(c(2:5, 1)))
6746-
test(1463.32, shift(x,-(1:2), type="cyclic"), list(as.integer64(c(2:5, 1)), as.integer64(c(3:5,1:2))))
6746+
test(1463.32, shift(x,-(1:2), type="cyclic"), data.table(as.integer64(c(2:5, 1)), as.integer64(c(3:5,1:2))))
67476747
}
67486748

67496749
x=as.character(x)
67506750
test(1463.33, shift(x,1L), as.character(c(NA, 1:4)))
6751-
test(1463.34, shift(x,1:2), list(as.character(c(NA, 1:4)), as.character(c(NA, NA, 1:3))))
6751+
test(1463.34, shift(x,1:2), data.table(as.character(c(NA, 1:4)), as.character(c(NA, NA, 1:3))))
67526752
test(1463.35, shift(x,1L, 0L), as.character(c(0L, 1:4)))
67536753
test(1463.36, shift(x,1L, type="lead"), as.character(c(2:5, NA)))
6754-
test(1463.37, shift(x,1:2, type="lead"), list(as.character(c(2:5, NA)), as.character(c(3:5, NA, NA))))
6754+
test(1463.37, shift(x,1:2, type="lead"), data.table(as.character(c(2:5, NA)), as.character(c(3:5, NA, NA))))
67556755
test(1463.38, shift(x,1L, 0L, type="lead"), as.character(c(2:5, 0L)))
67566756
test(1463.39, shift(x,1L, type="cyclic"), as.character(c(5, 1:4)))
6757-
test(1463.40, shift(x,1:2, type="cyclic"), list(as.character(c(5, 1:4)), as.character(c(4:5, 1:3))))
6757+
test(1463.40, shift(x,1:2, type="cyclic"), data.table(as.character(c(5, 1:4)), as.character(c(4:5, 1:3))))
67586758
test(1463.41, shift(x,-1L, type="cyclic"), as.character(c(2:5, 1)))
6759-
test(1463.42, shift(x,-(1:2), type="cyclic"), list(as.character(c(2:5, 1)), as.character(c(3:5,1:2))))
6759+
test(1463.42, shift(x,-(1:2), type="cyclic"), data.table(as.character(c(2:5, 1)), as.character(c(3:5,1:2))))
67606760

67616761
x=c(TRUE,FALSE,TRUE,FALSE,TRUE)
67626762
test(1463.43, shift(x,1L), c(NA, x[-5L]))
6763-
test(1463.44, shift(x,1:2), list(c(NA, x[-5L]), c(NA, NA, x[-(4:5)])))
6763+
test(1463.44, shift(x,1:2), data.table(c(NA, x[-5L]), c(NA, NA, x[-(4:5)])))
67646764
test(1463.45, shift(x,1L, 0L), c(FALSE, x[-5L]))
67656765
test(1463.46, shift(x,1L, type="lead"), c(x[-1L], NA))
6766-
test(1463.47, shift(x,1:2, type="lead"), list(c(x[-1L],NA), c(x[-(1:2)],NA,NA)))
6766+
test(1463.47, shift(x,1:2, type="lead"), data.table(c(x[-1L],NA), c(x[-(1:2)],NA,NA)))
67676767
test(1463.48, shift(x,1L, 0L, type="lead"), c(x[-(1)], FALSE))
67686768
test(1463.49, shift(x,1L, type="cyclic"), c(x[5L], x[-5L]))
6769-
test(1463.50, shift(x,1:2, type="cyclic"), list(c(x[5L], x[-5L]), c(x[4L:5L], x[-4L:-5L])))
6769+
test(1463.50, shift(x,1:2, type="cyclic"), data.table(c(x[5L], x[-5L]), c(x[4L:5L], x[-4L:-5L])))
67706770
test(1463.51, shift(x,-1L, type="cyclic"), c(x[-1L], x[1L]))
6771-
test(1463.52, shift(x,-(1:2), type="cyclic"), list(c(x[-1L], x[1L]), c(x[-1L:-2L], x[1L:2L])))
6771+
test(1463.52, shift(x,-(1:2), type="cyclic"), data.table(c(x[-1L], x[1L]), c(x[-1L:-2L], x[1L:2L])))
67726772

67736773
# for list of list, #1595
67746774
x = data.table(foo = c(list(c("a","b","c")), list(c("b","c")), list(c("a","b")), list(c("a"))), id = c(1,1,2,2))
@@ -6794,7 +6794,7 @@ test(1463.60, shift(mean), error="type 'closure' passed to shift(). Must be a ve
67946794
# test for 'give.names=TRUE' on vectors
67956795
x = 1:10
67966796
nm = c("x_lag_1", "x_lag_2")
6797-
ans = list(as.integer(c(NA, 1:9)), as.integer(c(NA, NA, 1:8)))
6797+
ans = data.table(as.integer(c(NA, 1:9)), as.integer(c(NA, NA, 1:8)))
67986798
setattr(ans, 'names', nm)
67996799
test(1463.61, shift(x, 1:2, give.names=TRUE), ans)
68006800

@@ -7001,7 +7001,7 @@ dt.query <- data.table(q1=c(-0.2, -0.05, 0.05, 0.15), q2=c(-0.2, -0.05, 0.05, 0.
70017001
test(1471, foverlaps(dt.query, dt.ref), data.table(dt.ref, dt.query, key=c("q1", "q2")))
70027002

70037003
# #1014 (segfault) fix
7004-
test(1472, shift(1, 1:2, NA, 'lag'), list(NA_real_, NA_real_))
7004+
test(1472, shift(1, 1:2, NA, 'lag'), data.table(NA_real_, NA_real_))
70057005

70067006
# #528, type=equal simple test
70077007
dt1 = data.table(x=1:5, y=6:10)
@@ -13945,21 +13945,21 @@ test(1963.03, shift(DT$x, -1, fill = 0L),
1394513945
test(1963.04, shift(DT$x, -1, give.names = TRUE), # give.names is ignored because we do not return list
1394613946
c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA))
1394713947
test(1963.05, shift(DT$x, -1:1),
13948-
list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
13949-
c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)))
13948+
data.table(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
13949+
c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)))
1395013950
test(1963.06, shift(DT, -1),
13951-
list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
13952-
c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA)))
13951+
data.table(x=c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
13952+
y=c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA)))
1395313953
test(1963.07, shift(DT, -1:1),
13954-
list(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
13954+
data.table(c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA), 1:10,
1395513955
c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L),
1395613956
c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA), 10:1,
1395713957
c(NA, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L)))
1395813958
## some coverage tests for good measure
1395913959
test(1963.08, shift(DT$x, type = 'some_other_type'), error='should be one of.*lag.*lead')
1396013960
test(1963.09, shift(as.raw(0:1)), error = "Type 'raw' is not supported")
1396113961
test(1963.10, shift(DT, -1:1, type="shift", give.names = TRUE), # new type="shift" #3223
13962-
ans <- list(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
13962+
ans <- data.table(`x_shift_-1` = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, NA),
1396313963
x_shift_0 = 1:10,
1396413964
x_shift_1 = c(NA, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L),
1396513965
`y_shift_-1` = c(9L, 8L, 7L, 6L, 5L, 4L, 3L, 2L, 1L, NA),
@@ -16375,8 +16375,8 @@ test(2074.33, merge(DT, DT, by.x = 1i, by.y=1i), error="A non-empty vector of co
1637516375
# shift naming
1637616376
test(2074.34, shift(list(a=1:5, b=6:10), give.names=TRUE), list(a_lag_1=c(NA, 1:4), b_lag_1=c(NA, 6:9)))
1637716377
test(2074.345, shift(list(a=1:5, b=6:10), type="cyclic", give.names=TRUE), list(a_cyclic_1=c(5L, 1:4), b_cyclic_1=c(10L, 6:9)))
16378-
test(2074.35, shift(1:5, 1:2, give.names=TRUE), list(V1_lag_1=c(NA, 1:4), V1_lag_2=c(NA, NA, 1:3)))
16379-
test(2074.355, shift(1:5, 1:2, type="cyclic", give.names=TRUE), list(V1_cyclic_1=c(5L, 1:4), V1_cyclic_2=c(4L:5L, 1:3)))
16378+
test(2074.35, shift(1:5, 1:2, give.names=TRUE), data.table(V1_lag_1=c(NA, 1:4), V1_lag_2=c(NA, NA, 1:3)))
16379+
test(2074.355, shift(1:5, 1:2, type="cyclic", give.names=TRUE), data.table(V1_cyclic_1=c(5L, 1:4), V1_cyclic_2=c(4L:5L, 1:3)))
1638016380

1638116381
# bmerge.c
1638216382
x = data.table(a='a')

src/assign.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
212212
names = getAttrib(dt,R_NamesSymbol);
213213
// names may be NULL when null.data.table() passes list() to alloccol for example.
214214
// So, careful to use length() on names, not LENGTH().
215-
if (length(names)!=l) error(_("Internal error: length of names (%d) is not length of dt (%d)"),length(names),l); // # nocov
215+
if (length(names)!=l && length(names)>0) error(_("Internal error: length of names (%d) is not length of dt (%d)"),length(names),l); // # nocov
216216
if (!selfrefok(dt,verbose))
217217
return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
218218
// added (n>l) ? ... for #970, see test 1481.

src/data.table.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ SEXP islockedR(SEXP x);
245245
bool need2utf8(SEXP x);
246246
SEXP coerceUtf8IfNeeded(SEXP x);
247247
SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg);
248+
SEXP setDT(SEXP list);
248249

249250
// types.c
250251
char *end(char *start);

src/gsumm.c

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,10 +1462,16 @@ SEXP gshift(SEXP x, SEXP nArg, SEXP fillArg, SEXP typeArg) {
14621462
for (int i=0; i<nk; i++) if (kd[i]==NA_INTEGER) error(_("Item %d of n is NA"), i+1);
14631463

14641464
SEXP ans = PROTECT(allocVector(VECSXP, nk)); nprotect++;
1465+
setDT(ans); // to tell gforce() clearly that these are columns without ambiguity over being a list column when DT or the group has 1 row
1466+
1467+
// TODO: do we still need to set gforce dynamic here???
14651468
SEXP att = PROTECT(allocVector(VECSXP, 3)); nprotect++;
14661469
SET_VECTOR_ELT(att, 0, R_NilValue);
1467-
SET_VECTOR_ELT(att, 1, ScalarLogical(true)); // first/last doesn't matter for gshift
1470+
SET_VECTOR_ELT(att, 1, ScalarLogical(true)); // first/last doesn't matter for gshift which returns the same length as its input
14681471
SET_VECTOR_ELT(att, 2, ScalarInteger(INT_MAX)); // i.e. grpsize; TODO: perhaps point lens directly to grpsize instead
1472+
setAttrib(ans, sym_gforce_dynamic, att);
1473+
//
1474+
14691475
SEXP thisfill = PROTECT(coerceAs(fillArg, x, ScalarLogical(0))); nprotect++;
14701476
for (int g=0; g<nk; g++) {
14711477
lag = stype == LAG || stype == CYCLIC;
@@ -1478,7 +1484,6 @@ SEXP gshift(SEXP x, SEXP nArg, SEXP fillArg, SEXP typeArg) {
14781484
R_xlen_t ansi = 0;
14791485
SEXP tmp;
14801486
SET_VECTOR_ELT(ans, g, tmp=allocVector(TYPEOF(x), nx));
1481-
setAttrib(tmp, sym_gforce_dynamic, att);
14821487
#define SHIFT(CTYPE, RTYPE, ASSIGN) { \
14831488
const CTYPE *xd = (const CTYPE *)RTYPE(x); \
14841489
const CTYPE fill = RTYPE(thisfill)[0]; \

src/utils.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,3 +406,12 @@ SEXP startsWithAny(const SEXP x, const SEXP y, SEXP start) {
406406
return ScalarLogical(false);
407407
}
408408

409+
SEXP setDT(SEXP x) {
410+
// used by gsumm.c to return a barebones DT (e.g. no names, not over-allocated) to gforce when n is a vector of shifts
411+
SEXP class;
412+
setAttrib(x, R_ClassSymbol, class=allocVector(STRSXP, 2));
413+
SET_STRING_ELT(class,0,char_datatable);
414+
SET_STRING_ELT(class,1,char_dataframe);
415+
return x;
416+
}
417+

0 commit comments

Comments
 (0)