Skip to content

Commit eb13c5e

Browse files
committed
add gforce dynamic support
1 parent 541f27e commit eb13c5e

File tree

8 files changed

+280
-46
lines changed

8 files changed

+280
-46
lines changed

R/data.table.R

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1890,10 +1890,11 @@ replace_dot_alias = function(e) {
18901890
assign(".N", len__, thisEnv) # For #334
18911891
#fix for #1683
18921892
if (use.I) assign(".I", seq_len(nrow(x)), thisEnv)
1893-
ans = gforce(thisEnv, jsub, o__, f__, len__, irows) # irows needed for #971.
1893+
ans = gforce(thisEnv, jsub, o__, f__, len__, irows, # irows needed for #971
1894+
.Call(CsubsetVector, groups, grpcols), # just a list() subset to make C level neater; doesn't copy column contents
1895+
lhs) # for now this just prevents := with new feature first/last n>1; in future see TODO below
18941896
gi = if (length(o__)) o__[f__] else f__
18951897
g = lapply(grpcols, function(i) .Call(CsubsetVector, groups[[i]], gi)) # use CsubsetVector instead of [ to preserve attributes #5567
1896-
18971898
# returns all rows instead of one per group
18981899
nrow_funs = c("gshift")
18991900
.is_nrows = function(q) {
@@ -1904,33 +1905,15 @@ replace_dot_alias = function(e) {
19041905
q[[1L]] %chin% nrow_funs
19051906
}
19061907
}
1907-
19081908
# adding ghead/gtail(n) support for n > 1 #5060 #523
1909-
q3 = 0
1910-
if (!is.symbol(jsub)) {
1911-
headTail_arg = function(q) {
1912-
if (length(q)==3L && length(q3 <- q[[3L]])==1L && is.numeric(q3) &&
1913-
(q[[1L]]) %chin% c("ghead", "gtail") && q3!=1) q3
1914-
else 0
1915-
}
1916-
if (jsub %iscall% "list"){
1917-
q3 = max(sapply(jsub, headTail_arg))
1918-
} else if (length(jsub)==3L) {
1919-
q3 = headTail_arg(jsub)
1920-
}
1921-
}
1922-
if (q3 > 0) {
1923-
grplens = pmin.int(q3, len__)
1924-
g = lapply(g, rep.int, times=grplens)
1925-
} else if (.is_nrows(jsub)) {
1909+
if (.is_nrows(jsub)) {
19261910
g = lapply(g, rep.int, times=len__)
19271911
# unpack list of lists for nrows functions
19281912
zip_items = function(ll) do.call(mapply, c(list(FUN = c), ll, SIMPLIFY=FALSE, USE.NAMES=FALSE))
19291913
if (all(vapply_1b(ans, is.list))) {
19301914
ans = lapply(ans, zip_items)
19311915
}
19321916
}
1933-
ans = c(g, ans)
19341917
} else {
19351918
ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose, showProgress)
19361919
}
@@ -3087,7 +3070,7 @@ gshift = function(x, n=1L, fill=NA, type=c("lag", "lead", "shift", "cyclic")) {
30873070
stopifnot(is.numeric(n))
30883071
.Call(Cgshift, x, as.integer(n), fill, type)
30893072
}
3090-
gforce = function(env, jsub, o, f, l, rows) .Call(Cgforce, env, jsub, o, f, l, rows)
3073+
gforce = function(env, jsub, o, f, l, rows, grpcols, lhs) .Call(Cgforce, env, jsub, o, f, l, rows, grpcols, lhs)
30913074

30923075
# GForce needs to evaluate all arguments not present in the data.table before calling C part #5547
30933076
# Safe cases: variables [i], calls without variables [c(0,1), list(1)] # TODO extend this list

inst/tests/tests.Rraw

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20620,8 +20620,8 @@ y = data.table(c=1, d=1L)
2062020620
test(2297.03, y[x, on=.(c == a, d == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
2062120621
test(2297.04, y[x, on=.(d == a, c == a), verbose=TRUE], data.table(c=1L, d=1L), output="Coercing .*a .*no fractions.* to type integer.*Coercing .*c .*no fractions.* to type integer")
2062220622
# dates
20623-
d_int = .Date(1L)
20624-
d_dbl = .Date(1)
20623+
d_int = `class<-`(1L, "Date")
20624+
d_dbl = `class<-`(1, "Date")
2062520625
x = data.table(a=d_int)
2062620626
y = data.table(c=d_int, d=d_dbl)
2062720627
test(2297.11, y[x, on=.(c == a, d == a)], data.table(c=d_int, d=d_int))

src/assign.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
255255
names = getAttrib(dt,R_NamesSymbol);
256256
// names may be NULL when null.data.table() passes list() to alloccol for example.
257257
// So, careful to use length() on names, not LENGTH().
258-
if (length(names)!=l) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
258+
if (length(names)!=l && length(names)>0) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
259259
if (!selfrefok(dt,verbose))
260260
return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
261261
// added (n>l) ? ... for #970, see test 1481.
@@ -320,6 +320,12 @@ SEXP truelength(SEXP x) {
320320
return ScalarInteger(isNull(x) ? 0 : TRUELENGTH(x));
321321
}
322322

323+
SEXP settruelength(SEXP x, SEXP n) {
324+
// currently just for first/last and dogroups.c; see comments at the end of last.R
325+
SET_TRUELENGTH(x, INTEGER(n)[0]);
326+
return R_NilValue;
327+
}
328+
323329
SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
324330
return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0]));
325331
}

src/data.table.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ extern SEXP sym_index;
9999
extern SEXP sym_BY;
100100
extern SEXP sym_starts, char_starts;
101101
extern SEXP sym_maxgrpn;
102+
extern SEXP sym_gforce_dynamic;
102103
extern SEXP sym_anyna;
103104
extern SEXP sym_anyinfnan;
104105
extern SEXP sym_anynotascii;
@@ -259,6 +260,7 @@ bool need2utf8(SEXP x);
259260
SEXP coerceUtf8IfNeeded(SEXP x);
260261
SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg);
261262
void internal_error(const char *call_name, const char *format, ...);
263+
SEXP setDT(SEXP list);
262264

263265
// types.c
264266
char *end(char *start);
@@ -289,6 +291,7 @@ SEXP setdt_nrows(SEXP);
289291
SEXP alloccolwrapper(SEXP, SEXP, SEXP);
290292
SEXP selfrefokwrapper(SEXP, SEXP);
291293
SEXP truelength(SEXP);
294+
SEXP settruelength(SEXP, SEXP);
292295
SEXP setcharvec(SEXP, SEXP, SEXP);
293296
SEXP chmatch_R(SEXP, SEXP, SEXP);
294297
SEXP chmatchdup_R(SEXP, SEXP, SEXP);
@@ -302,7 +305,7 @@ SEXP expandAltRep(SEXP);
302305
SEXP fmelt(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
303306
SEXP fcast(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
304307
SEXP issorted(SEXP, SEXP);
305-
SEXP gforce(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
308+
SEXP gforce(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP);
306309
SEXP gsum(SEXP, SEXP);
307310
SEXP gmean(SEXP, SEXP);
308311
SEXP gmin(SEXP, SEXP);

src/dogroups.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
425425
warning(_("Item %d of j's result for group %d is zero length. This will be filled with %d NAs to match the longest column in this result. Later groups may have a similar problem but only the first is reported to save filling the warning buffer."), j+1, i+1, maxn);
426426
NullWarnDone = TRUE;
427427
}
428-
writeNA(target, thisansloc, maxn, false);
428+
writeNA(target, thisansloc, maxn, true);
429429
} else {
430430
// thislen>0
431431
if (TYPEOF(source) != TYPEOF(target))
@@ -438,7 +438,16 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
438438
source = PROTECT(copyAsPlain(source));
439439
copied = true;
440440
}
441-
memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "");
441+
if (TRUELENGTH(source)==LENGTH(source)) {
442+
// first() and last() set truelength to mark that it is a true vector; see comments at the end of last.R and test 2240.81
443+
// a true vector is not recycled when length-1 and is padded with NA to match the length of the longest result
444+
memrecycle(target, R_NilValue, thisansloc, thislen, source, 0, -1, 0, ""); // just using memrecycle to copy contents
445+
writeNA(target, thisansloc+thislen, maxn-thislen, true); // pad with NA
446+
} else {
447+
if (thislen>1 && thislen!=maxn && grpn>0) // grpn>0 for grouping empty tables; test 1986
448+
error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn);
449+
memrecycle(target, R_NilValue, thisansloc, maxn, source, 0, -1, 0, "");
450+
}
442451
if (copied) UNPROTECT(1);
443452
}
444453
}

0 commit comments

Comments
 (0)