Skip to content

Commit 06e7db7

Browse files
committed
Use growable_* instead of SETLENGTH in dogroups()
Since dogroups() relies on being able to shrink vectors it hasn't allocated, introduce make_growable() and is_growable() to adapt. Since dogroups() relies on SD and SDall having shared columns, use the setgrowable() wrapper on the R side at the time when SD and SDall are being created. (In the ALTREP case, setgrowable() will re-create the columns.)
1 parent e243ece commit 06e7db7

File tree

7 files changed

+46
-11
lines changed

7 files changed

+46
-11
lines changed

R/data.table.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,6 +1353,7 @@ replace_dot_alias = function(e) {
13531353
}
13541354
if (!with || missing(j)) return(ans)
13551355
if (!is.data.table(ans)) setattr(ans, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...]) .SD should be data.table, test 2212.013
1356+
setgrowable(ans)
13561357
SDenv$.SDall = ans
13571358
SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall
13581359
SDenv$.N = nrow(ans)
@@ -1591,6 +1592,7 @@ replace_dot_alias = function(e) {
15911592
SDenv$.SDall = .Call(CsubsetDT, x, if (length(len__)) seq_len(max(len__)) else 0L, xcols) # must be deep copy when largest group is a subset
15921593
if (!is.data.table(SDenv$.SDall)) setattr(SDenv$.SDall, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...],by=grp) needs .SD to be data.table, test 2022.012
15931594
if (xdotcols) setattr(SDenv$.SDall, 'names', ansvars[xcolsAns]) # now that we allow 'x.' prefix in 'j', #2313 bug fix - [xcolsAns]
1595+
setgrowable(SDenv$.SDall)
15941596
SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall
15951597
}
15961598
if (nrow(SDenv$.SDall)==0L) {

R/wrappers.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ fitsInInt32 = function(x) .Call(CfitsInInt32R, x)
2121
fitsInInt64 = function(x) .Call(CfitsInInt64R, x)
2222

2323
coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy)
24+
25+
setgrowable = function(x) .Call(Csetgrowable, x)

src/data.table.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,10 @@ SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size);
305305
R_xlen_t growable_max_size(SEXP x);
306306
// Resize a growable vector to newsize. Will signal an error if newsize exceeds max_size.
307307
void growable_resize(SEXP x, R_xlen_t newsize);
308+
// Return TRUE if growable_resize(x) and growable_max_size(x) are valid operations.
309+
Rboolean is_growable(SEXP x);
310+
// Transform x into a growable vector. The return value must be reprotected in place of x. What happens to x is deliberately not specified, but no copying occurs.
311+
SEXP make_growable(SEXP x);
308312

309313
// functions called from R level .Call/.External and registered in init.c
310314
// these now live here to pass -Wstrict-prototypes, #5477
@@ -379,4 +383,5 @@ SEXP dt_has_zlib(void);
379383
SEXP startsWithAny(SEXP, SEXP, SEXP);
380384
SEXP convertDate(SEXP, SEXP);
381385
SEXP fastmean(SEXP);
386+
SEXP setgrowable(SEXP x);
382387

src/dogroups.c

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
124124
for (R_len_t i=0; i<n; ++i) {
125125
if (ilens[i] > maxGrpSize) maxGrpSize = ilens[i];
126126
}
127-
defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); nprotect++;
127+
defineVar(install(".I"), I = PROTECT(growable_allocate(INTSXP, maxGrpSize, maxGrpSize)), env); nprotect++;
128128
hash_set(specials, I, -maxGrpSize); // marker for anySpecialStatic(); see its comments
129129
R_LockBinding(install(".I"), env);
130130

@@ -197,7 +197,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
197197
INTEGER(GRP)[0] = i+1; // group counter exposed as .GRP
198198
INTEGER(rownames)[1] = -grpn; // the .set_row_names() of .SD. Not .N when nomatch=NA and this is a nomatch
199199
for (int j=0; j<length(SDall); ++j) {
200-
SETLENGTH(VECTOR_ELT(SDall,j), grpn); // before copying data in otherwise assigning after the end could error R API checks
200+
growable_resize(VECTOR_ELT(SDall,j), grpn); // before copying data in otherwise assigning after the end could error R API checks
201201
defineVar(nameSyms[j], VECTOR_ELT(SDall, j), env);
202202
// Redo this defineVar for each group in case user's j assigned to the column names (env is static) (tests 387 and 388)
203203
// nameSyms pre-stored to save repeated install() for efficiency, though.
@@ -230,14 +230,14 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
230230
// and we hope that reference counting on by default from R 4.0 will avoid costly gc()s.
231231
}
232232
grpn = 1; // it may not be 1 e.g. test 722. TODO: revisit.
233-
SETLENGTH(I, grpn);
233+
growable_resize(I, grpn);
234234
INTEGER(I)[0] = 0;
235235
for (int j=0; j<length(xSD); ++j) {
236236
writeNA(VECTOR_ELT(xSD, j), 0, 1, false);
237237
}
238238
} else {
239239
if (verbose) tstart = wallclock();
240-
SETLENGTH(I, grpn);
240+
growable_resize(I, grpn);
241241
int *iI = INTEGER(I);
242242
if (LENGTH(order)==0) {
243243
const int rownum = grpn ? istarts[i]-1 : -1;
@@ -318,13 +318,13 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
318318
RHS = VECTOR_ELT(jval,j%LENGTH(jval));
319319
if (isNull(target)) {
320320
// first time adding to new column
321-
if (TRUELENGTH(dt) < colj+1) internal_error(__func__, "Trying to add new column by reference but tl is full; setalloccol should have run first at R level before getting to this point"); // # nocov
321+
if (growable_max_size(dt) < colj+1) internal_error(__func__, "Trying to add new column by reference but table is full; setalloccol should have run first at R level before getting to this point"); // # nocov
322322
target = PROTECT(allocNAVectorLike(RHS, n));
323323
// Even if we could know reliably to switch from allocNAVectorLike to allocVector for slight speedup, user code could still
324324
// contain a switched halt, and in that case we'd want the groups not yet done to have NA rather than 0 or uninitialized.
325325
// Increment length only if the allocation passes, #1676. But before SET_VECTOR_ELT otherwise attempt-to-set-index-n/n R error
326-
SETLENGTH(dtnames, LENGTH(dtnames)+1);
327-
SETLENGTH(dt, LENGTH(dt)+1);
326+
growable_resize(dtnames, LENGTH(dtnames)+1);
327+
growable_resize(dt, LENGTH(dt)+1);
328328
SET_VECTOR_ELT(dt, colj, target);
329329
UNPROTECT(1);
330330
SET_STRING_ELT(dtnames, colj, STRING_ELT(newnames, colj-origncol));
@@ -482,11 +482,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
482482
// Also reset truelength on specials; see comments in anySpecialStatic().
483483
for (int j=0; j<length(SDall); ++j) {
484484
SEXP this = VECTOR_ELT(SDall,j);
485-
SETLENGTH(this, maxGrpSize);
486-
SET_TRUELENGTH(this, maxGrpSize);
485+
growable_resize(this, maxGrpSize);
487486
}
488-
SETLENGTH(I, maxGrpSize);
489-
SET_TRUELENGTH(I, maxGrpSize);
487+
growable_resize(I, maxGrpSize);
490488
if (verbose) {
491489
if (nblock[0] && nblock[1]) internal_error(__func__, "block 0 [%d] and block 1 [%d] have both run", nblock[0], nblock[1]); // # nocov
492490
int w = nblock[1]>0;

src/growable.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) {
44
SEXP ret = PROTECT(allocVector(type, max_size));
55
SET_TRUELENGTH(ret, max_size);
6+
#if R_VERSION >= R_Version(3, 4, 0)
67
SET_GROWABLE_BIT(ret);
8+
#endif // otherwise perceived memory use will end up higher
79
SETLENGTH(ret, size);
810
UNPROTECT(1);
911
return ret;
@@ -21,3 +23,18 @@ void growable_resize(SEXP x, R_xlen_t newsize) {
2123
);
2224
SETLENGTH(x, newsize);
2325
}
26+
27+
Rboolean is_growable(SEXP x) {
28+
return isVector(x) && TRUELENGTH(x) >= XLENGTH(x)
29+
#if R_VERSION >= R_Version(3, 4, 0)
30+
&& IS_GROWABLE(x)
31+
#endif
32+
;
33+
}
34+
35+
// Assuming no ALTREP for now
36+
SEXP make_growable(SEXP x) {
37+
if (TRUELENGTH(x) < XLENGTH(x)) SET_TRUELENGTH(x, XLENGTH(x));
38+
SET_GROWABLE_BIT(x);
39+
return x;
40+
}

src/init.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ R_CallMethodDef callMethods[] = {
150150
{"CconvertDate", (DL_FUNC)&convertDate, -1},
151151
{"Cnotchin", (DL_FUNC)&notchin, -1},
152152
{"Cwarn_matrix_column_r", (DL_FUNC)&warn_matrix_column_r, -1},
153+
{"Csetgrowable", (DL_FUNC)&setgrowable, -1},
153154
{NULL, NULL, 0}
154155
};
155156

src/wrappers.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,13 @@ SEXP warn_matrix_column_r(SEXP i) {
124124
warn_matrix_column(INTEGER(i)[0]);
125125
return R_NilValue;
126126
}
127+
128+
SEXP setgrowable(SEXP x) {
129+
for (R_xlen_t i = 0; i < xlength(x); ++i) {
130+
SEXP this = VECTOR_ELT(x, i);
131+
// relying on the rest of data.table machinery to avoid the need for resizing
132+
if (!is_growable(this) && !ALTREP(this))
133+
SET_VECTOR_ELT(x, i, make_growable(this));
134+
}
135+
return R_NilValue;
136+
}

0 commit comments

Comments
 (0)