diff --git a/.devcontainer/r-devel-growable/Dockerfile b/.devcontainer/r-devel-growable/Dockerfile new file mode 100644 index 0000000000..75d4ee64a6 --- /dev/null +++ b/.devcontainer/r-devel-growable/Dockerfile @@ -0,0 +1,13 @@ +FROM registry.gitlab.com/rdatatable/dockerfiles/r-devel-growable + +COPY DESCRIPTION . +RUN /usr/local/bin/Rscript -e ' \ +read.dcf("DESCRIPTION", c("Imports", "Suggests")) |> \ + tools:::.split_dependencies() |> \ + names() |> \ + setdiff(tools:::.get_standard_package_names()$base) |> \ + install.packages() \ +' + +WORKDIR /root +COPY .devcontainer/.Rprofile . \ No newline at end of file diff --git a/.devcontainer/r-devel-growable/devcontainer.json b/.devcontainer/r-devel-growable/devcontainer.json new file mode 100644 index 0000000000..c2d45094a2 --- /dev/null +++ b/.devcontainer/r-devel-growable/devcontainer.json @@ -0,0 +1,15 @@ +{ + "name": "R-devel with Growable Vectors", + "build": { + "dockerfile": "Dockerfile", + "context": "../.." + }, + "customizations": { + "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + } + } +} \ No newline at end of file diff --git a/R/data.table.R b/R/data.table.R index f8f39c8f7f..616b993c15 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1356,6 +1356,7 @@ replace_dot_alias = function(e) { } if (!with || missing(j)) return(ans) if (!is.data.table(ans)) setattr(ans, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...]) .SD should be data.table, test 2212.013 + setgrowable(ans) SDenv$.SDall = ans SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall SDenv$.N = nrow(ans) @@ -1594,6 +1595,7 @@ replace_dot_alias = function(e) { SDenv$.SDall = .Call(CsubsetDT, x, if (length(len__)) seq_len(max(len__)) else 0L, xcols) # must be deep copy when largest group is a subset if (!is.data.table(SDenv$.SDall)) setattr(SDenv$.SDall, "class", c("data.table","data.frame")) # DF |> DT(,.SD[...],by=grp) needs .SD to be data.table, test 2022.012 if (xdotcols) setattr(SDenv$.SDall, 'names', ansvars[xcolsAns]) # now that we allow 'x.' prefix in 'j', #2313 bug fix - [xcolsAns] + SDenv$.SDall = setgrowable(SDenv$.SDall) SDenv$.SD = if (length(non_sdvars)) shallow(SDenv$.SDall, sdvars) else SDenv$.SDall } if (nrow(SDenv$.SDall)==0L) { diff --git a/R/wrappers.R b/R/wrappers.R index 7683b434fc..3a26c880ec 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -22,5 +22,6 @@ fitsInInt64 = function(x) .Call(CfitsInInt64R, x) coerceAs = function(x, as, copy=TRUE) .Call(CcoerceAs, x, as, copy) +setgrowable = function(x) .Call(Csetgrowable, x) frev = function(x) .Call(Cfrev, x, TRUE) setfrev = function(x) invisible(.Call(Cfrev, x, FALSE)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b2142520d8..1a544267d1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -19444,12 +19444,6 @@ test(2290.3, DT[, `:=`(a, c := 3)], error="It looks like you re-used `:=` in arg # partially-named `:=`(...) --> different branch, same error test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in argument 2") -# segfault when selfref is not ok before set #6410 -df = data.frame(a=1:3) -setDT(df) -attr(df, "att") = 1 -test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned") - # ns-qualified bysub error, #6493 DT = data.table(a = 1) test(2292.1, DT[, .N, by=base::mget("a")], data.table(a = 1, N = 1L)) diff --git a/src/assign.c b/src/assign.c index 3d1ca18afb..f8fd952e3f 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1,28 +1,5 @@ #include "data.table.h" -static void finalizer(SEXP p) -{ - SEXP x; - R_len_t n, l, tl; - if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov - p = R_ExternalPtrTag(p); - if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov - l = LENGTH(p); - tl = TRUELENGTH(p); - if (l<0 || tl= 3.4, either + // (1) we allocate the data.table and/or its names, so it has the GROWABLE_BIT set, so copies will have zero TRUELENGTH, or + // (2) someone else creates them from scratch, so (only using the API) will have zero TRUELENGTH. + // We then return false and either re-create the data.table from scratch or signal an error, so the current object having a zero TRUELENGTH is fine. prot = R_ExternalPtrProtected(v); if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")). return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r - if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x)) - SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated return checkNames ? names==tag : x==R_ExternalPtrAddr(prot); } @@ -151,7 +125,8 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) // called from alloccol where n is checked carefully, or from shallow() at R level // where n is set to truelength (i.e. a shallow copy only with no size change) int protecti=0; - SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here? + const int l = isNull(cols) ? length(dt) : length(cols); + SEXP newdt = PROTECT(growable_allocate(VECSXP, l, n)); protecti++; // to do, use growVector here? SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It @@ -169,8 +144,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n) setAttrib(newdt, sym_sorted, duplicate(sorted)); SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++; - SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++; - const int l = isNull(cols) ? LENGTH(dt) : length(cols); + SEXP newnames = PROTECT(growable_allocate(STRSXP, l, n)); protecti++; if (isNull(cols)) { for (int i=0; il) ? n : l); // e.g. test 848 and 851 in R > 3.0.2 // added (n>l) ? ... for #970, see test 1481. // TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example. - // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl) - // internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov - tl = TRUELENGTH(dt); + tl = growable_capacity(dt); // R <= 2.13.2 and we didn't catch uninitialized tl somehow if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov if (tl>0 && tl +#include #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT #include // for uint64_t rather than unsigned long long #include // for va_list, va_start @@ -425,3 +426,5 @@ SEXP dt_has_zlib(void); SEXP startsWithAny(SEXP, SEXP, SEXP); SEXP convertDate(SEXP, SEXP); SEXP fastmean(SEXP); +SEXP setgrowable(SEXP x); + diff --git a/src/dogroups.c b/src/dogroups.c index 0914e2d882..a4b3bb828f 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -45,11 +45,10 @@ static bool anySpecialStatic(SEXP x, hashtab * specials) { // (see data.table.h), and isNewList() is true for NULL if (n==0) return false; + if (hash_lookup(specials, x, 0)<0) return true; // test 2158 if (isVectorAtomic(x)) - return ALTREP(x) || hash_lookup(specials, x, 0)<0; + return ALTREP(x); // see test 2156: ALTREP is a source of sharing we can't trace reliably if (isNewList(x)) { - if (hash_lookup(specials, x, 0)<0) - return true; // test 2158 for (int i=0; i maxGrpSize) maxGrpSize = ilens[i]; } - defineVar(install(".I"), I = PROTECT(allocVector(INTSXP, maxGrpSize)), env); nprotect++; + defineVar(install(".I"), I = PROTECT(growable_allocate(INTSXP, maxGrpSize, maxGrpSize)), env); nprotect++; hash_set(specials, I, -maxGrpSize); // marker for anySpecialStatic(); see its comments R_LockBinding(install(".I"), env); @@ -197,7 +196,7 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX INTEGER(GRP)[0] = i+1; // group counter exposed as .GRP INTEGER(rownames)[1] = -grpn; // the .set_row_names() of .SD. Not .N when nomatch=NA and this is a nomatch for (int j=0; j0; @@ -529,7 +526,7 @@ SEXP growVector(SEXP x, const R_len_t newlen) SEXP newx; R_len_t len = length(x); if (isNull(x)) error(_("growVector passed NULL")); - PROTECT(newx = allocVector(TYPEOF(x), newlen)); // TO DO: R_realloc(?) here? + PROTECT(newx = growable_allocate(TYPEOF(x), newlen, newlen)); // may be shrunk later by fread if (newlen < len) len=newlen; // i.e. shrink if (!len) { // cannot memcpy invalid pointer, #6819 keepattr(newx, x); diff --git a/src/freadR.c b/src/freadR.c index 11bf027598..507c8a492f 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -263,9 +263,9 @@ bool userOverride(int8_t *type, lenOff *colNames, const char *anchor, const int if (typeSize[CT_BOOL8_N] != 1) internal_error(__func__, "typeSize[CT_BOOL8_N] != 1"); // # nocov if (typeSize[CT_STRING] != 8) internal_error(__func__, "typeSize[CT_STRING] != 8"); // # nocov colNamesSxp = R_NilValue; - SET_VECTOR_ELT(RCHK, 1, colNamesSxp = allocVector(STRSXP, ncol)); - for (int i = 0; i < ncol; i++) { - if (colNames == NULL || colNames[i].len <= 0) { + SET_VECTOR_ELT(RCHK, 1, colNamesSxp=growable_allocate(STRSXP, ncol, ncol)); + for (int i=0; i 0) && (newDT || TYPEOF(col) != typeSxp[type[i]] || oldIsInt64 != newIsInt64); const bool nrowChanged = (allocNrow != dtnrows); if (typeChanged || nrowChanged) { - SEXP thiscol = typeChanged ? allocVector(typeSxp[type[i]], allocNrow) : growVector(col, allocNrow); // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 - - SET_VECTOR_ELT(DT, resi, thiscol); - if (type[i] == CT_INT64) { + SEXP thiscol = typeChanged ? growable_allocate(typeSxp[type[i]], allocNrow, allocNrow) // no need to PROTECT, passed immediately to SET_VECTOR_ELT, see R-exts 5.9.1 + : growVector(col, allocNrow); + SET_VECTOR_ELT(DT,resi,thiscol); + if (type[i]==CT_INT64) { SEXP tt = PROTECT(ScalarString(char_integer64)); setAttrib(thiscol, R_ClassSymbol, tt); UNPROTECT(1); @@ -531,8 +531,7 @@ size_t allocateDT(int8_t *typeArg, int8_t *sizeArg, int ncolArg, int ndrop, size setAttrib(thiscol, sym_tzone, ScalarString(char_UTC)); // see news for v1.13.0 } - SET_TRUELENGTH(thiscol, allocNrow); - DTbytes += RTYPE_SIZEOF(thiscol) * allocNrow; + DTbytes += RTYPE_SIZEOF(thiscol)*allocNrow; } resi++; } @@ -546,11 +545,9 @@ void setFinalNrow(size_t nrow) if (length(DT)) { if (nrow == dtnrows) return; - const int ncol = LENGTH(DT); - for (int i = 0; i < ncol; i++) { - SETLENGTH(VECTOR_ELT(DT, i), nrow); - SET_TRUELENGTH(VECTOR_ELT(DT, i), dtnrows); - SET_GROWABLE_BIT(VECTOR_ELT(DT, i)); // #3292 + const int ncol=LENGTH(DT); + for (int i=0; iLENGTH(x)) error(_("Item %d of cols is %d which is outside the range [1,ncol(x)=%d]"), i+1, this, LENGTH(x)); } - int overAlloc = checkOverAlloc(GetOption1(install("datatable.alloccol"))); - SEXP ans = PROTECT(allocVector(VECSXP, LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. + int overAlloc = checkOverAlloc(GetOption(install("datatable.alloccol"), R_NilValue)); + SEXP ans = PROTECT(growable_allocate(VECSXP, LENGTH(cols), LENGTH(cols)+overAlloc)); nprotect++; // doing alloc.col directly here; eventually alloc.col can be deprecated. // user-defined and superclass attributes get copied as from v1.12.0 copyMostAttrib(x, ans); @@ -305,8 +305,6 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md // includes row.names (oddly, given other dims aren't) and "sorted" dealt with below // class is also copied here which retains superclass name in class vector as has been the case for many years; e.g. tests 1228.* for #64 - SET_TRUELENGTH(ans, LENGTH(ans)); - SETLENGTH(ans, LENGTH(cols)); int ansn; if (isNull(rows)) { ansn = nrow; @@ -329,9 +327,7 @@ SEXP subsetDT(SEXP x, SEXP rows, SEXP cols) { // API change needs update NEWS.md subsetVectorRaw(target, source, rows, anyNA); // parallel within column } } - SEXP tmp = PROTECT(allocVector(STRSXP, LENGTH(cols)+overAlloc)); nprotect++; - SET_TRUELENGTH(tmp, LENGTH(tmp)); - SETLENGTH(tmp, LENGTH(cols)); + SEXP tmp = PROTECT(growable_allocate(STRSXP, LENGTH(cols), LENGTH(cols)+overAlloc)); nprotect++; setAttrib(ans, R_NamesSymbol, tmp); subsetVectorRaw(tmp, getAttrib(x, R_NamesSymbol), cols, /*anyNA=*/false);