Skip to content

Commit a09f39b

Browse files
committed
anySpecialStatic(): hash instead of TRUELENGTH
1 parent cae6b02 commit a09f39b

File tree

1 file changed

+15
-19
lines changed

1 file changed

+15
-19
lines changed

src/dogroups.c

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include <fcntl.h>
44
#include <time.h>
55

6-
static bool anySpecialStatic(SEXP x) {
6+
static bool anySpecialStatic(SEXP x, hashtab * specials) {
77
// Special refers to special symbols .BY, .I, .N, and .GRP; see special-symbols.Rd
88
// Static because these are like C static arrays which are the same memory for each group; e.g., dogroups
99
// creates .SD for the largest group once up front, overwriting the contents for each group. Their
@@ -46,16 +46,16 @@ static bool anySpecialStatic(SEXP x) {
4646
if (n==0)
4747
return false;
4848
if (isVectorAtomic(x))
49-
return ALTREP(x) || TRUELENGTH(x)<0;
49+
return ALTREP(x) || hash_lookup(specials, x, 0)<0;
5050
if (isNewList(x)) {
51-
if (TRUELENGTH(x)<0)
51+
if (hash_lookup(specials, x, 0)<0)
5252
return true; // test 2158
5353
for (int i=0; i<n; ++i) {
5454
list_el = VECTOR_ELT(x,i);
55-
if (anySpecialStatic(list_el))
55+
if (anySpecialStatic(list_el, specials))
5656
return true;
5757
for(attribs = ATTRIB(list_el); attribs != R_NilValue; attribs = CDR(attribs)) {
58-
if (anySpecialStatic(CAR(attribs)))
58+
if (anySpecialStatic(CAR(attribs), specials))
5959
return true; // #4936
6060
}
6161
}
@@ -86,11 +86,13 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
8686
// fix for longstanding FR/bug, #495. E.g., DT[, c(sum(v1), lapply(.SD, mean)), by=grp, .SDcols=v2:v3] resulted in error.. the idea is, 1) we create .SDall, which is normally == .SD. But if extra vars are detected in jexp other than .SD, then .SD becomes a shallow copy of .SDall with only .SDcols in .SD. Since internally, we don't make a copy, changing .SDall will reflect in .SD. Hopefully this'll workout :-).
8787
SEXP SDall = PROTECT(findVar(install(".SDall"), env)); nprotect++; // PROTECT for rchk
8888
SEXP SD = PROTECT(findVar(install(".SD"), env)); nprotect++;
89-
89+
9090
const bool showProgress = LOGICAL(showProgressArg)[0]==1 && ngrp > 1; // showProgress only if more than 1 group
9191
double startTime = (showProgress) ? wallclock() : 0; // For progress printing, startTime is set at the beginning
9292
double nextTime = (showProgress) ? startTime+3 : 0; // wait 3 seconds before printing progress
9393

94+
hashtab * specials = hash_create(3 + ngrpcols + xlength(SDall)); // .I, .N, .GRP plus columns of .BY plus SDall
95+
9496
defineVar(sym_BY, BY = PROTECT(allocVector(VECSXP, ngrpcols)), env); nprotect++; // PROTECT for rchk
9597
SEXP bynames = PROTECT(allocVector(STRSXP, ngrpcols)); nprotect++; // TO DO: do we really need bynames, can we assign names afterwards in one step?
9698
for (int i=0; i<ngrpcols; ++i) {
@@ -103,7 +105,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
103105
defineVar(install(CHAR(STRING_ELT(bynames,i))), VECTOR_ELT(BY,i), env); // by vars can be used by name in j as well as via .BY
104106
if (SIZEOF(VECTOR_ELT(BY,i))==0)
105107
internal_error(__func__, "unsupported size-0 type '%s' in column %d of 'by' should have been caught earlier", type2char(TYPEOF(VECTOR_ELT(BY, i))), i+1); // # nocov
106-
SET_TRUELENGTH(VECTOR_ELT(BY,i), -1); // marker for anySpecialStatic(); see its comments
108+
hash_set(specials, VECTOR_ELT(BY,i), -1); // marker for anySpecialStatic(); see its comments
107109
}
108110
setAttrib(BY, R_NamesSymbol, bynames); // Fix for #42 - BY doesn't retain names anymore
109111
R_LockBinding(sym_BY, env);
@@ -112,9 +114,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
112114
// TO DO: check this check above.
113115

114116
N = PROTECT(findVar(install(".N"), env)); nprotect++; // PROTECT for rchk
115-
SET_TRUELENGTH(N, -1); // marker for anySpecialStatic(); see its comments
117+
hash_set(specials, N, -1); // marker for anySpecialStatic(); see its comments
116118
GRP = PROTECT(findVar(install(".GRP"), env)); nprotect++;
117-
SET_TRUELENGTH(GRP, -1); // marker for anySpecialStatic(); see its comments
119+
hash_set(specials, GRP, -1); // marker for anySpecialStatic(); see its comments
118120
iSD = PROTECT(findVar(install(".iSD"), env)); nprotect++; // 1-row and possibly no cols (if no i variables are used via JIS)
119121
xSD = PROTECT(findVar(install(".xSD"), env)); nprotect++;
120122
R_len_t maxGrpSize = 0;
@@ -123,7 +125,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
123125
if (ilens[i] > maxGrpSize) maxGrpSize = ilens[i];
124126
}
125127
defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); nprotect++;
126-
SET_TRUELENGTH(I, -maxGrpSize); // marker for anySpecialStatic(); see its comments
128+
hash_set(specials, I, -maxGrpSize); // marker for anySpecialStatic(); see its comments
127129
R_LockBinding(install(".I"), env);
128130

129131
SEXP dtnames = PROTECT(getAttrib(dt, R_NamesSymbol)); nprotect++; // added here to fix #91 - `:=` did not issue recycling warning during "by"
@@ -150,7 +152,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
150152
nameSyms[i] = install(CHAR(STRING_ELT(names, i)));
151153
// fixes http://stackoverflow.com/questions/14753411/why-does-data-table-lose-class-definition-in-sd-after-group-by
152154
copyMostAttrib(VECTOR_ELT(dt,INTEGER(dtcols)[i]-1), this); // not names, otherwise test 778 would fail
153-
SET_TRUELENGTH(this, -maxGrpSize); // marker for anySpecialStatic(); see its comments
155+
hash_set(specials, this, -maxGrpSize); // marker for anySpecialStatic(); see its comments
154156
}
155157

156158
SEXP xknames = PROTECT(getAttrib(xSD, R_NamesSymbol)); nprotect++;
@@ -329,7 +331,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
329331
copyMostAttrib(RHS, target); // attributes of first group dominate; e.g. initial factor levels come from first group
330332
}
331333
bool copied = false;
332-
if (isNewList(target) && anySpecialStatic(RHS)) { // see comments in anySpecialStatic()
334+
if (isNewList(target) && anySpecialStatic(RHS, specials)) { // see comments in anySpecialStatic()
333335
RHS = PROTECT(copyAsPlain(RHS));
334336
copied = true;
335337
}
@@ -435,7 +437,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
435437
error(_("Supplied %d items for column %d of group %d which has %d rows. The RHS length must either be 1 (single values are ok) or match the LHS length exactly. If you wish to 'recycle' the RHS please use rep() explicitly to make this intent clear to readers of your code."), thislen, j+1, i+1, maxn);
436438
}
437439
bool copied = false;
438-
if (isNewList(target) && anySpecialStatic(source)) { // see comments in anySpecialStatic()
440+
if (isNewList(target) && anySpecialStatic(source, specials)) { // see comments in anySpecialStatic()
439441
source = PROTECT(copyAsPlain(source));
440442
copied = true;
441443
}
@@ -485,12 +487,6 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
485487
}
486488
SETLENGTH(I, maxGrpSize);
487489
SET_TRUELENGTH(I, maxGrpSize);
488-
for (int i=0; i<length(BY); ++i) {
489-
SEXP this = VECTOR_ELT(BY, i);
490-
SET_TRUELENGTH(this, length(this)); // might be 0 or 1; see its allocVector above
491-
}
492-
SET_TRUELENGTH(N, 1);
493-
SET_TRUELENGTH(GRP, 1);
494490
if (verbose) {
495491
if (nblock[0] && nblock[1]) internal_error(__func__, "block 0 [%d] and block 1 [%d] have both run", nblock[0], nblock[1]); // # nocov
496492
int w = nblock[1]>0;

0 commit comments

Comments
 (0)