Skip to content

Commit 23c7b3b

Browse files
committed
Use the experimental resizable vectors API
Thanks to Luke Tierney for introducing the API and helping with the migration.
1 parent a014e38 commit 23c7b3b

File tree

6 files changed

+37
-51
lines changed

6 files changed

+37
-51
lines changed

inst/tests/tests.Rraw

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19453,7 +19453,8 @@ test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in
1945319453
df = data.frame(a=1:3)
1945419454
setDT(df)
1945519455
attr(df, "att") = 1
19456-
test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned")
19456+
##test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned")
19457+
test(2291.1, set(df, NULL, "new", "new"), error="either been loaded from disk.*or constructed manually.*Please run setDT.*setalloccol.*on it first")
1945719458

1945819459
# ns-qualified bysub error, #6493
1945919460
DT = data.table(a = 1)

src/assign.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
static void finalizer(SEXP p)
44
{
5-
SEXP x;
65
R_len_t n, l, tl;
76
if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov
87
p = R_ExternalPtrTag(p);
@@ -16,9 +15,12 @@ static void finalizer(SEXP p)
1615
// already, so nothing to do (but almost never the case).
1716
return;
1817
}
18+
#ifdef DODO
19+
SEXP x;
1920
x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
20-
// INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
21+
// INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Millar, Jul 2015)
2122
SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
23+
#endif
2224
UNPROTECT(1);
2325
return;
2426
}
@@ -69,7 +71,7 @@ happens too late after GC has already released the memory, and ii) copies by bas
6971
[<- in write.table just before test 894) allocate at length LENGTH but copy the TRUELENGTH over.
7072
If the finalizer sets LENGTH to TRUELENGTH, that's a fail as it wasn't really allocated at
7173
TRUELENGTH when R did the copy.
72-
Karl Miller suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the
74+
Karl Millar suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the
7375
closest I got to getting it to pass all tests :
7476
7577
SEXP env = PROTECT(allocSExp(ENVSXP));
@@ -151,7 +153,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
151153
// called from alloccol where n is checked carefully, or from shallow() at R level
152154
// where n is set to truelength (i.e. a shallow copy only with no size change)
153155
int protecti=0;
154-
SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here?
156+
SEXP newdt = PROTECT(R_allocResizableVector(VECSXP, n)); protecti++; // to do, use growVector here?
155157
SHALLOW_DUPLICATE_ATTRIB(newdt, dt);
156158

157159
// TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
@@ -169,7 +171,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
169171
setAttrib(newdt, sym_sorted, duplicate(sorted));
170172

171173
SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
172-
SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
174+
SEXP newnames = PROTECT(R_allocResizableVector(STRSXP, n)); protecti++;
173175
const int l = isNull(cols) ? LENGTH(dt) : length(cols);
174176
if (isNull(cols)) {
175177
for (int i=0; i<l; ++i) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
@@ -186,13 +188,9 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
186188
for (int i=0; i<l; ++i) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
187189
}
188190
}
191+
R_resizeVector(newdt,l);
192+
R_resizeVector(newnames,l);
189193
setAttrib(newdt, R_NamesSymbol, newnames);
190-
// setAttrib appears to change length and truelength, so need to do that first _then_ SET next,
191-
// otherwise (if the SET were were first) the 100 tl is assigned to length.
192-
SETLENGTH(newnames,l);
193-
SET_TRUELENGTH(newnames,n);
194-
SETLENGTH(newdt,l);
195-
SET_TRUELENGTH(newdt,n);
196194
setselfref(newdt);
197195
UNPROTECT(protecti);
198196
return(newdt);
@@ -527,8 +525,9 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
527525
internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)TRUELENGTH(names), oldtncol); // # nocov
528526
if (!selfrefok(dt, verbose)) // #6410 setDT(dt) and subsequent attr<- can lead to invalid selfref
529527
error(_("It appears that at some earlier point, attributes of this data.table have been reassigned. Please use setattr(DT, name, value) rather than attr(DT, name) <- value. If that doesn't apply to you, please report your case to the data.table issue tracker."));
530-
SETLENGTH(dt, oldncol+LENGTH(newcolnames));
531-
SETLENGTH(names, oldncol+LENGTH(newcolnames));
528+
R_resizeVector(dt, oldncol+LENGTH(newcolnames));
529+
R_resizeVector(names, oldncol+LENGTH(newcolnames));
530+
setAttrib(dt, R_NamesSymbol, names);
532531
for (int i=0; i<LENGTH(newcolnames); ++i)
533532
SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
534533
// truelengths of both already set by alloccol
@@ -726,8 +725,9 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
726725
SET_VECTOR_ELT(dt, i, R_NilValue);
727726
SET_STRING_ELT(names, i, NA_STRING); // release reference to the CHARSXP
728727
}
729-
SETLENGTH(dt, ndt-ndelete);
730-
SETLENGTH(names, ndt-ndelete);
728+
R_resizeVector(dt, ndt-ndelete);
729+
R_resizeVector(names, ndt-ndelete);
730+
setAttrib(dt, R_NamesSymbol, names);
731731
if (LENGTH(names)==0) {
732732
// That was last column deleted, leaving NULL data.table, so we need to reset .row_names, so that it really is the NULL data.table.
733733
PROTECT(nullint=allocVector(INTSXP, 0)); protecti++;

src/dogroups.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
322322
// Even if we could know reliably to switch from allocNAVectorLike to allocVector for slight speedup, user code could still
323323
// contain a switched halt, and in that case we'd want the groups not yet done to have NA rather than 0 or uninitialized.
324324
// Increment length only if the allocation passes, #1676. But before SET_VECTOR_ELT otherwise attempt-to-set-index-n/n R error
325-
SETLENGTH(dtnames, LENGTH(dtnames)+1);
326-
SETLENGTH(dt, LENGTH(dt)+1);
325+
R_resizeVector(dtnames, LENGTH(dtnames)+1);
326+
R_resizeVector(dt, LENGTH(dt)+1);
327+
setAttrib(dt, R_NamesSymbol, dtnames);
327328
SET_VECTOR_ELT(dt, colj, target);
328329
UNPROTECT(1);
329330
SET_STRING_ELT(dtnames, colj, STRING_ELT(newnames, colj-origncol));
@@ -534,7 +535,7 @@ SEXP growVector(SEXP x, const R_len_t newlen)
534535
SEXP newx;
535536
R_len_t len = length(x);
536537
if (isNull(x)) error(_("growVector passed NULL"));
537-
PROTECT(newx = allocVector(TYPEOF(x), newlen)); // TO DO: R_realloc(?) here?
538+
PROTECT(newx = R_allocResizableVector(TYPEOF(x), newlen)); // TO DO: R_realloc(?) here?
538539
if (newlen < len) len=newlen; // i.e. shrink
539540
if (!len) { // cannot memcpy invalid pointer, #6819
540541
keepattr(newx, x);

src/freadR.c

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,8 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int
265265
// use typeSize superfluously to avoid not-used warning; otherwise could move typeSize from fread.h into fread.c
266266
if (typeSize[CT_BOOL8_N] != 1) internal_error(__func__, "typeSize[CT_BOOL8_N] != 1"); // # nocov
267267
if (typeSize[CT_STRING] != 8) internal_error(__func__, "typeSize[CT_STRING] != 8"); // # nocov
268-
colNamesSxp = R_NilValue;
269-
SET_VECTOR_ELT(RCHK, 1, colNamesSxp = allocVector(STRSXP, ncol));
268+
colNamesSxp = R_allocResizableVector(STRSXP, ncol);
269+
SET_VECTOR_ELT(RCHK, 1, colNamesSxp);
270270
for (int i = 0; i < ncol; i++) {
271271
if (colNames == NULL || colNames[i].len <= 0) {
272272
char buff[12];
@@ -452,7 +452,8 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
452452
if (newDT) {
453453
ncol = ncolArg;
454454
dtnrows = allocNrow;
455-
SET_VECTOR_ELT(RCHK, 0, DT = allocVector(VECSXP, ncol - ndrop));
455+
DT = R_allocResizableVector(VECSXP, ncol - ndrop);
456+
SET_VECTOR_ELT(RCHK, 0, DT);
456457
if (ndrop == 0) {
457458
setAttrib(DT, R_NamesSymbol, colNamesSxp); // colNames mkChar'd in userOverride step
458459
if (colClassesAs) setAttrib(DT, sym_colClassesAs, colClassesAs);
@@ -512,8 +513,8 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
512513
const bool typeChanged = (type[i] > 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64);
513514
const bool nrowChanged = (allocNrow != dtnrows);
514515
if (typeChanged || nrowChanged) {
515-
SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
516-
516+
SEXP thiscol = typeChanged ? R_allocResizableVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1
517+
517518
SET_VECTOR_ELT(DT, resi, thiscol);
518519
if (type[i] == CT_INT64) {
519520
SEXP tt = PROTECT(ScalarString(char_integer64));
@@ -534,7 +535,6 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size
534535

535536
setAttrib(thiscol, sym_tzone, ScalarString(char_UTC)); // see news for v1.13.0
536537
}
537-
SET_TRUELENGTH(thiscol, allocNrow);
538538
DTbytes += RTYPE_SIZEOF(thiscol) * allocNrow;
539539
}
540540
resi++;
@@ -551,9 +551,7 @@ void setFinalNrow(size_t nrow)
551551
return;
552552
const int ncol = LENGTH(DT);
553553
for (int i = 0; i < ncol; i++) {
554-
SETLENGTH(VECTOR_ELT(DT, i), nrow);
555-
SET_TRUELENGTH(VECTOR_ELT(DT, i), dtnrows);
556-
SET_GROWABLE_BIT(VECTOR_ELT(DT, i)); // #3292
554+
R_resizeVector(VECTOR_ELT(DT, i), nrow);
557555
}
558556
}
559557
R_FlushConsole(); // # 2481. Just a convenient place; nothing per se to do with setFinalNrow()
@@ -567,8 +565,9 @@ void dropFilledCols(int* dropArg, int ndelete)
567565
SET_VECTOR_ELT(DT, dropFill[i], R_NilValue);
568566
SET_STRING_ELT(colNamesSxp, dropFill[i], NA_STRING);
569567
}
570-
SETLENGTH(DT, ndt - ndelete);
571-
SETLENGTH(colNamesSxp, ndt - ndelete);
568+
R_resizeVector(DT, ndt - ndelete);
569+
R_resizeVector(colNamesSxp, ndt - ndelete);
570+
setAttrib(DT, R_NamesSymbol, colNamesSxp); // reinstall after resize
572571
}
573572

574573
void pushBuffer(ThreadLocalFreadParsingContext *ctx)

src/frollapply.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ SEXP memcpyVectoradaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) {
5050
const size_t oi = INTEGER_RO(offset)[0];
5151
const int nrow = INTEGER_RO(size)[oi - 1];
5252
const size_t o = oi - nrow; // oi should always be bigger than nrow because we filter out incomplete window using ansMask
53-
SETLENGTH(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150
53+
R_resizeVector(dest, nrow); // must be before memcpy_sexp because attempt to set index 1/1 in SET_STRING_ELT test 6010.150
5454
if (nrow) { // support k[i]==0
5555
memcpy_sexp(dest, o, src, nrow);
5656
}
@@ -65,7 +65,7 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) {
6565
const int ncol = LENGTH(dest);
6666
for (int i = 0; i < ncol; i++) {
6767
SEXP d = VECTOR_ELT(dest, i);
68-
SETLENGTH(d, nrow);
68+
R_resizeVector(d, nrow);
6969
if (nrow) { // support k[i]==0
7070
memcpy_sexp(d, o, VECTOR_ELT(src, i), nrow);
7171
}
@@ -76,18 +76,5 @@ SEXP memcpyDTadaptive(SEXP dest, SEXP src, SEXP offset, SEXP size) {
7676

7777
// needed in adaptive=TRUE
7878
SEXP setgrowable(SEXP x) {
79-
if (!isNewList(x)) {
80-
SET_GROWABLE_BIT(x);
81-
SET_TRUELENGTH(x, LENGTH(x)); // important because gc() uses TRUELENGTH to keep counts
82-
} else {
83-
// # nocov start ## does not seem to be reported to codecov most likely due to running in a fork, I manually debugged that it is being called when running froll.Rraw
84-
for (int i = 0; i < LENGTH(x); i++) {
85-
//Rprintf("%d",3); // manual code coverage to confirm it is reached when marking nocov
86-
SEXP col = VECTOR_ELT(x, i);
87-
SET_GROWABLE_BIT(col);
88-
SET_TRUELENGTH(col, LENGTH(col));
89-
}
90-
// # nocov end
91-
}
9279
return x;
9380
}

src/subset.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,16 +297,15 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
297297
}
298298

299299
int overAlloc = checkOverAlloc(GetOption1(install("datatable.alloccol")));
300-
SEXP ans = PROTECT(allocVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated.
300+
SEXP ans = PROTECT(R_allocResizableVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated.
301301

302302
// user-defined and superclass attributes get copied as from v1.12.0
303303
copyMostAttrib(x, ans);
304304
// most means all except R_NamesSymbol, R_DimSymbol and R_DimNamesSymbol
305305
// includes row.names (oddly, given other dims aren't) and "sorted" dealt with below
306306
// class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #64
307307

308-
SET_TRUELENGTH(ans, LENGTH(ans));
309-
SETLENGTH(ans, LENGTH(cols));
308+
R_resizeVector(ans, LENGTH(cols));
310309
int ansn;
311310
if (isNull(rows)) {
312311
ansn = nrow;
@@ -329,9 +328,8 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md
329328
subsetVectorRaw(target, source, rows, anyNA); // parallel within column
330329
}
331330
}
332-
SEXP tmp = PROTECT(allocVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++;
333-
SET_TRUELENGTH(tmp, LENGTH(tmp));
334-
SETLENGTH(tmp, LENGTH(cols));
331+
SEXP tmp = PROTECT(R_allocResizableVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++;
332+
R_resizeVector(tmp, LENGTH(cols));
335333
setAttrib(ans, R_NamesSymbol, tmp);
336334
subsetVectorRaw(tmp, getAttrib(x, R_NamesSymbol), cols, /*anyNA=*/false);
337335

0 commit comments

Comments
 (0)