Skip to content

Commit beded95

Browse files
committed
catch first/last n>1 := by group with helpful error
1 parent 9256ad7 commit beded95

File tree

3 files changed

+44
-37
lines changed

3 files changed

+44
-37
lines changed

R/data.table.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,7 +1864,8 @@ replace_dot_alias = function(e) {
18641864
#fix for #1683
18651865
if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
18661866
ans = gforce(thisEnv, jsub, o__, f__, len__, irows, # irows needed for #971
1867-
.Call(CsubsetVector, groups, grpcols)) # just a list() subset to make C level neater; doesn't copy column contents
1867+
.Call(CsubsetVector, groups, grpcols), # just a list() subset to make C level neater; doesn't copy column contents
1868+
lhs) # for now this just prevents := with new feature first/last n>1; in future see TODO below
18681869
} else {
18691870
ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose)
18701871
}
@@ -1886,7 +1887,7 @@ replace_dot_alias = function(e) {
18861887
# Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__)
18871888
# TO DO: setkey could mark the key whether it is unique or not.
18881889
if (!is.null(lhs)) {
1889-
if (GForce) { # GForce should work with := #1414
1890+
if (GForce) { # GForce should work with := #1414. TODO: move down into gforce at C level to save creating/rep'ing ans and grpcols wastefully
18901891
vlen = length(ans[[1L]]) # TODO: this might be ngrp when na.rm=TRUE and one group has 2 and another 0, so needs enhancing here (by passing all-1 back from gans?)
18911892
# replicate vals if GForce returns 1 value per group
18921893
jvals = if (vlen==length(len__)) lapply(tail(ans, -length(grpcols)), rep, times=len__) else tail(ans, -length(grpcols)) # see comment in #4245 for why rep instead of rep.int
@@ -3004,7 +3005,7 @@ gshift = function(x, n=1L, fill=NA, type=c("lag", "lead", "shift", "cyclic")) {
30043005
stopifnot(is.numeric(n))
30053006
.Call(Cgshift, x, as.integer(n), fill, type)
30063007
}
3007-
gforce = function(env, jsub, o, f, l, rows, grpcols) .Call(Cgforce, env, jsub, o, f, l, rows, grpcols)
3008+
gforce = function(env, jsub, o, f, l, rows, grpcols, lhs) .Call(Cgforce, env, jsub, o, f, l, rows, grpcols, lhs)
30083009

30093010
.prepareFastSubset = function(isub, x, enclos, notjoin, verbose = FALSE){
30103011
## helper that decides, whether a fast binary search can be performed, if i is a call

inst/tests/tests.Rraw

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -18623,60 +18623,60 @@ test(2232.4, unique(DT, by='g', cols='v3'), error="non-existing column(s)")
1862318623
# support := with GForce #1414
1862418624
options(datatable.optimize = 2L)
1862518625
DT = data.table(a=1:3,b=(1:9)/10)
18626-
test(2233.01, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:3)/10), output="GForce optimized j to")
18626+
test(2233.01, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:3)/10), output="GForce optimized j to")
1862718627
# GForce returning full length
18628-
test(2233.02, DT[, v := head(b, 3L), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:9)/10), output="GForce optimized j to")
18628+
test(2233.02, DT[, v := shift(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=c(NA,NA,NA,(1:6)/10)), output="GForce optimized j to")
1862918629
# GForce neither returning 1 per group nor full length
18630-
test(2233.03, DT[, v := head(b, 2L), a], error="Supplied 6 items to be assigned to 9 items of column 'v'.")
18631-
# compare to non GForce version
18630+
test(2233.03, DT[, v := head(b, 2L), a], error="could be implemented")
18631+
# ensure base:: and utils:: return the same result
1863218632
DT = data.table(a=1:3,b=(1:9)/10)
18633-
test(2233.04, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18634-
test(2233.05, copy(DT)[, v := head(b, 3L), a, verbose=TRUE], copy(DT)[, v := utils::head(b, 3L), a], output="GForce optimized j to")
18633+
test(2233.04, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18634+
test(2233.05, copy(DT)[, v := b[1:3], a], copy(DT)[, v := utils::head(b, 3L), a])
1863518635

1863618636
# with key and grouping by key
1863718637
DT = data.table(a=1:3,b=(1:9)/10, key="a")
18638-
test(2233.06, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:3)/10, key="a"), output="GForce optimized j to")
18639-
test(2233.07, DT[, v := head(b, 3L), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:9)/10, key="a"), output="GForce optimized j to")
18640-
test(2233.08, DT[, v := head(b, 2L), a], error="Supplied 6 items to be assigned to 9 items of column 'v'.")
18638+
test(2233.06, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:3)/10, key="a"), output="GForce optimized j to")
18639+
test(2233.07, DT[, v := shift(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=c(NA,NA,NA,(1:6)/10), key="a"), output="GForce optimized j to")
18640+
test(2233.08, DT[, v := head(b, 2L), a], error="could be implemented")
1864118641
DT = data.table(a=1:3,b=(1:9)/10, key="a")
18642-
test(2233.09, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18643-
test(2233.10, copy(DT)[, v := head(b, 3L), a, verbose=TRUE], copy(DT)[, v := utils::head(b, 3L), a], output="GForce optimized j to")
18642+
test(2233.09, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18643+
test(2233.10, copy(DT)[, v := b[1:3], a], copy(DT)[, v := utils::head(b, 3L), a])
1864418644

1864518645
# with key and grouping by nonkey
1864618646
DT = data.table(a=1:3,b=(1:9)/10,c=(3:1),key="c")
18647-
test(2233.11, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:3)/10, key="c"), output="GForce optimized j to")
18648-
test(2233.12, DT[, v := head(b, 3L), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:9)/10, key="c"), output="GForce optimized j to")
18649-
test(2233.13, DT[, v := head(b, 2L), a], error="Supplied 6 items to be assigned to 9 items of column 'v'.")
18647+
test(2233.11, DT[, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:3)/10, key="c"), output="GForce optimized j to")
18648+
test(2233.12, DT[, v := shift(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=c(NA,NA,NA,1:6)/10, key="c"), output="GForce optimized j to")
18649+
test(2233.13, DT[, v := head(b, 2L), a], error="could be implemented")
1865018650
DT = data.table(a=1:3,b=(1:9)/10,c=(3:1),key="c")
18651-
test(2233.14, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18652-
test(2233.15, copy(DT)[, v := head(b, 3L), a, verbose=TRUE], copy(DT)[, v := utils::head(b, 3L), a], output="GForce optimized j to")
18651+
test(2233.14, copy(DT)[, v := min(b), a, verbose=TRUE], copy(DT)[, v := base::min(b), a, ], output="GForce optimized j to")
18652+
test(2233.15, copy(DT)[, v := b[1:3], a], copy(DT)[, v := utils::head(b, 3L), a])
1865318653

1865418654
# with key and keyby by nonkey
1865518655
DT = data.table(a=1:3,b=(1:9)/10,c=(3:1),key="c")
18656-
test(2233.16, copy(DT)[, v := min(b), keyby=a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:3)/10, key="a"), output="GForce optimized j to")
18657-
test(2233.17, copy(DT)[, v := head(b, 3L), keyby=a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:9)/10, key="a"), output="GForce optimized j to")
18658-
test(2233.18, copy(DT)[, v := head(b, 2L), keyby=a], error="Supplied 6 items to be assigned to 9 items of column 'v'.")
18656+
test(2233.16, copy(DT)[, v := min(b), keyby=a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=(1:3)/10, key="a"), output="GForce optimized j to")
18657+
test(2233.17, copy(DT)[, v := shift(b), keyby=a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, c=(3:1), v=c(NA,NA,NA,1:6)/10, key="a"), output="GForce optimized j to")
18658+
test(2233.18, copy(DT)[, v := head(b, 2L), keyby=a], error="could be implemented")
1865918659
DT = data.table(a=1:3,b=(1:9)/10,c=(3:1),key="c")
18660-
test(2233.19, copy(DT)[, v := min(b), keyby=a, verbose=TRUE], copy(DT)[, v := base::min(b), keyby=a], output="GForce optimized j to")
18661-
test(2233.20, copy(DT)[, v := head(b, 3L), keyby=a, verbose=TRUE], copy(DT)[, v := utils::head(b, 3L), keyby=a], output="GForce optimized j to")
18660+
test(2233.19, copy(DT)[, v := min(b), keyby=a, verbose=TRUE], copy(DT)[, v := base::min(b), keyby=a], output="GForce optimized j to")
18661+
test(2233.20, copy(DT)[, v := b[1:3], keyby=a], copy(DT)[, v := utils::head(b, 3L), keyby=a])
1866218662
# with irows
1866318663
DT = data.table(a=1:3,b=(1:9)/10)
18664-
test(2233.21, DT[a==2, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=c(NA,0.2,NA)), output="GForce optimized j to")
18665-
test(2233.22, DT[a!=4, v := head(b, 3L), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=(1:9)/10), output="GForce optimized j to")
18666-
test(2233.23, DT[a!=4, v := head(b, 2L), a], error="Supplied 6 items to be assigned to 9 items of column 'v'.")
18664+
test(2233.21, DT[a==2, v := min(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=c(NA,0.2,NA)), output="GForce optimized j to")
18665+
test(2233.22, DT[a!=4, v := shift(b), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v=c(NA,NA,NA,1:6)/10), output="GForce optimized j to")
18666+
test(2233.23, DT[a!=4, v := head(b, 2L), a], error="could be implemented")
1866718667
DT = data.table(a=1:3,b=(1:9)/10)
18668-
test(2233.24, copy(DT)[a==2, v := min(b), a, verbose=TRUE], copy(DT)[a==2, v := base::min(b), a, ], output="GForce optimized j to")
18669-
test(2233.25, copy(DT)[a!=4, v := head(b, 3L), a, verbose=TRUE], copy(DT)[a!=4, v := utils::head(b, 3L), a], output="GForce optimized j to")
18668+
test(2233.24, copy(DT)[a==2, v := min(b), a, verbose=TRUE], copy(DT)[a==2, v := base::min(b), a, ], output="GForce optimized j to")
18669+
test(2233.25, copy(DT)[a!=4, v := b[1:3], a], copy(DT)[a!=4, v := utils::head(b, 3L), a])
1867018670

1867118671
# multiple assignments
1867218672
DT = data.table(a=1:3,b=(1:9)/10)
18673-
test(2233.26, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:3)/10, v2=(7:9)/10), output="GForce optimized j to")
18674-
test(2233.27, DT[, c("v1","v2") := .(head(b,3L), tail(b,3L)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:9)/10, v2=(1:9)/10), output="GForce optimized j to")
18675-
test(2233.28, DT[, c("v1","v2") := .(head(b,3L), tail(b,2L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v2'.")
18676-
test(2233.29, DT[, c("v1","v2") := .(head(b,2L), tail(b,3L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v1'.")
18677-
test(2233.30, DT[, c("v1","v2") := .(head(b,2L), tail(b,2L)), a], error="Supplied 6 items to be assigned to 9 items of column 'v1'.")
18678-
test(2233.31, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], DT[, c("v1","v2") := .(base::min(b), base::max(b)), a ], output="GForce optimized j to")
18679-
test(2233.32, DT[, c("v1","v2") := .(head(b,3L), tail(b,3L)), a, verbose=TRUE], DT[, c("v1","v2") := .(utils::head(b,3L), utils::tail(b,3L)), a], output="GForce optimized j to")
18673+
test(2233.26, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:3)/10, v2=(7:9)/10), output="GForce optimized j to")
18674+
test(2233.27, DT[, c("v1","v2") := .(shift(b), shift(b,2L)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=c(NA,NA,NA,1:6)/10, v2=c(NA,NA,NA,NA,NA,NA,1:3)/10), output="GForce optimized j to")
18675+
test(2233.28, DT[, c("v1","v2") := .(shift(b), head(b,3L)), a], error="head.*:=.*could be implemented")
18676+
test(2233.29, DT[, c("v1","v2") := .(min(b), shift(b)), a, verbose=TRUE], data.table(a=1:3, b=(1:9)/10, v1=(1:3)/10, v2=c(NA,NA,NA,1:6)/10), output="GForce optimized j to")
18677+
test(2233.30, DT[, c("v1","v2") := .(head(b,2L), tail(b,2L)), a], error="head.*:=.*could be implemented")
18678+
test(2233.31, DT[, c("v1","v2") := .(min(b), max(b)), a, verbose=TRUE], DT[, c("v1","v2") := .(base::min(b), base::max(b)), a], output="GForce optimized j to")
18679+
test(2233.32, DT[, c("v1","v2") := .(head(b,3L), tail(b,3L)), a], error="head.*:=.*could be implemented")
1868018680

1868118681
# gforce needs to evaluate variable arguments before calling C part (part of test 101.17 in programming.Rraw)
1868218682
set.seed(108)

src/gsumm.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ static int *oo = NULL;
2121
static int *ff = NULL;
2222
static int isunsorted = 0;
2323

24+
// for first/last with n>1 to error when used with :=, until implemented
25+
static bool assignByRef = false;
26+
2427
// from R's src/cov.c (for variance / sd)
2528
#ifdef HAVE_LONG_DOUBLE
2629
# define SQRTL sqrtl
@@ -37,10 +40,11 @@ static int nbit(int n)
3740
return nb;
3841
}
3942

40-
SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg, SEXP grpcols) {
43+
SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg, SEXP grpcols, SEXP lhs) {
4144
int nprotect=0;
4245
double started = wallclock();
4346
const bool verbose = GetVerbose();
47+
assignByRef = !isNull(lhs);
4448
if (TYPEOF(env) != ENVSXP) error(_("env is not an environment"));
4549
// The type of jsub is pretty flexible in R, so leave checking to eval() below.
4650
if (!isInteger(o)) error(_("%s is not an integer vector"), "o");
@@ -1113,6 +1117,8 @@ static SEXP gfirstlast(const SEXP x, const bool first, const SEXP nArg, const bo
11131117
if (!isInteger(nArg) || LENGTH(nArg)!=1 || INTEGER(nArg)[0]<0)
11141118
error(_("Internal error, gfirstlast is not implemented for n<0. This should have been caught before. Please report to data.table issue tracker.")); // # nocov
11151119
const int w = INTEGER(nArg)[0];
1120+
if (w>1 && assignByRef)
1121+
error(_("Is first/last/head/tail with n>1 and := by group intentional? Please provide a use case to the GitHub issue tracker. It could be implemented."));
11161122
// select 1:w when first=TRUE, and (n-w+1):n when first=FALSE
11171123
// or select w'th item when nthvalue=TRUE; e.g. the n=0 case in test 280
11181124
const bool nosubset = irowslen == -1;

0 commit comments

Comments
 (0)