Skip to content

Commit f927451

Browse files
committed
Switch shallow() to use growable_allocate()
The resulting data.tables now have GROWABLE_BIT set, therefore: - the finalizer is not needed on R >= 3.4 - duplicates of data.tables (which are not over-allocated) now have TRUELENGTH of 0 instead of whatever it was before, which is detected earlier in selfrefok() As a result, assign.c only uses TRUELENGTH on R < 3.4.
1 parent 3b31850 commit f927451

File tree

2 files changed

+29
-29
lines changed

2 files changed

+29
-29
lines changed

inst/tests/tests.Rraw

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19303,12 +19303,6 @@ test(2290.3, DT[, `:=`(a, c := 3)], error="It looks like you re-used `:=` in arg
1930319303
# partially-named `:=`(...) --> different branch, same error
1930419304
test(2290.4, DT[, `:=`(a = 2, c := 3)], error="It looks like you re-used `:=` in argument 2")
1930519305

19306-
# segfault when selfref is not ok before set #6410
19307-
df = data.frame(a=1:3)
19308-
setDT(df)
19309-
attr(df, "att") = 1
19310-
test(2291.1, set(df, NULL, "new", "new"), error="attributes .* have been reassigned")
19311-
1931219306
# ns-qualified bysub error, #6493
1931319307
DT = data.table(a = 1)
1931419308
test(2292.1, DT[, .N, by=base::mget("a")], data.table(a = 1, N = 1L))

src/assign.c

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "data.table.h"
22

3+
#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT
34
static void finalizer(SEXP p)
45
{
56
SEXP x;
@@ -22,6 +23,7 @@ static void finalizer(SEXP p)
2223
UNPROTECT(1);
2324
return;
2425
}
26+
#endif
2527

2628
void setselfref(SEXP x) {
2729
if(!INHERITS(x, char_datatable)) return; // #5286
@@ -38,7 +40,9 @@ void setselfref(SEXP x) {
3840
R_NilValue
3941
))
4042
));
43+
#if R_VERSION < R_Version(3,4,0) // not needed with GROWABLE_BIT
4144
R_RegisterCFinalizerEx(p, finalizer, FALSE);
45+
#endif
4246
UNPROTECT(2);
4347

4448
/*
@@ -126,15 +130,24 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
126130
tag = R_ExternalPtrTag(v);
127131
if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov
128132
names = getAttrib(x, R_NamesSymbol);
129-
if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734
133+
// On R >= 3.4, either
134+
// (1) we allocate the data.table and/or its names, so it has the GROWABLE_BIT set, so copies will have zero TRUELENGTH, or
135+
// (2) someone else creates them from scratch, so (only using the API) will have zero TRUELENGTH.
136+
// We then return false and either re-create the data.table from scratch or signal an error, so the current object having a zero TRUELENGTH is fine.
137+
// R < 3.4 doesn't have the GROWABLE_BIT, so let's reset the TRUELENGTH just in case.
138+
#if R_VERSION < R_Version(3,4,0)
139+
if (names!=tag && isString(names))
130140
SET_TRUELENGTH(names, LENGTH(names));
131141
// R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
132142
// because R copies the original vector's tl over despite allocating length.
143+
#endif
133144
prot = R_ExternalPtrProtected(v);
134145
if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
135146
return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
136-
if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
147+
#if R_VERSION < R_Version(3,4,0)
148+
if (x!=R_ExternalPtrAddr(prot))
137149
SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated
150+
#endif
138151
return checkNames ? names==tag : x==R_ExternalPtrAddr(prot);
139152
}
140153

@@ -151,7 +164,8 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
151164
// called from alloccol where n is checked carefully, or from shallow() at R level
152165
// where n is set to truelength (i.e. a shallow copy only with no size change)
153166
int protecti=0;
154-
SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here?
167+
const int l = isNull(cols) ? length(dt) : length(cols);
168+
SEXP newdt = PROTECT(growable_allocate(VECSXP, l, n)); protecti++; // to do, use growVector here?
155169
SHALLOW_DUPLICATE_ATTRIB(newdt, dt);
156170

157171
// TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
@@ -169,8 +183,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
169183
setAttrib(newdt, sym_sorted, duplicate(sorted));
170184

171185
SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
172-
SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
173-
const int l = isNull(cols) ? LENGTH(dt) : length(cols);
186+
SEXP newnames = PROTECT(growable_allocate(STRSXP, l, n)); protecti++;
174187
if (isNull(cols)) {
175188
for (int i=0; i<l; ++i) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
176189
if (length(names)) {
@@ -186,13 +199,8 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
186199
for (int i=0; i<l; ++i) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
187200
}
188201
}
202+
// setAttrib used to change length and truelength, but as of R-3.3 no longer does that
189203
setAttrib(newdt, R_NamesSymbol, newnames);
190-
// setAttrib appears to change length and truelength, so need to do that first _then_ SET next,
191-
// otherwise (if the SET were were first) the 100 tl is assigned to length.
192-
SETLENGTH(newnames,l);
193-
SET_TRUELENGTH(newnames,n);
194-
SETLENGTH(newdt,l);
195-
SET_TRUELENGTH(newdt,n);
196204
setselfref(newdt);
197205
UNPROTECT(protecti);
198206
return(newdt);
@@ -260,10 +268,8 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
260268
return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
261269
// added (n>l) ? ... for #970, see test 1481.
262270
// TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example.
263-
// if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
264-
// internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov
265271

266-
tl = TRUELENGTH(dt);
272+
tl = growable_max_size(dt);
267273
// R <= 2.13.2 and we didn't catch uninitialized tl somehow
268274
if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov
269275
if (tl>0 && tl<l) internal_error(__func__, "tl (%d) < l (%d) but tl of class is marked", tl, l); // # nocov
@@ -313,11 +319,11 @@ SEXP shallowwrapper(SEXP dt, SEXP cols) {
313319
if (!selfrefok(dt, FALSE)) {
314320
int n = isNull(cols) ? length(dt) : length(cols);
315321
return(shallow(dt, cols, n));
316-
} else return(shallow(dt, cols, TRUELENGTH(dt)));
322+
} else return(shallow(dt, cols, growable_max_size(dt)));
317323
}
318324

319325
SEXP truelength(SEXP x) {
320-
return ScalarInteger(isNull(x) ? 0 : TRUELENGTH(x));
326+
return ScalarInteger(isNull(x) ? 0 : growable_max_size(x));
321327
}
322328

323329
SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
@@ -514,7 +520,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
514520
// modify DT by reference. Other than if new columns are being added and the allocVec() fails with
515521
// out-of-memory. In that case the user will receive hard halt and know to rerun.
516522
if (length(newcolnames)) {
517-
oldtncol = TRUELENGTH(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
523+
oldtncol = growable_max_size(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
518524

519525
if (oldtncol<oldncol) {
520526
if (oldtncol==0) error(_("This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed manually (e.g. using structure()). Please run setDT() or setalloccol() on it first (to pre-allocate space for new columns) before assigning by reference to it.")); // #2996
@@ -527,13 +533,13 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
527533
error(_("It appears that at some earlier point, names of this data.table have been reassigned. Please ensure to use setnames() rather than names<- or colnames<-. Otherwise, please report to data.table issue tracker.")); // # nocov
528534
// Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as
529535
// strong error message for now.
530-
else if (TRUELENGTH(names) != oldtncol)
536+
else if (growable_max_size(names) != oldtncol)
531537
// Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768, PRId64 didn't work
532-
internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)TRUELENGTH(names), oldtncol); // # nocov
538+
internal_error(__func__, "selfrefnames is ok but tl names [%lld] != tl [%d]", (long long)growable_max_size(names), oldtncol); // # nocov
533539
if (!selfrefok(dt, verbose)) // #6410 setDT(dt) and subsequent attr<- can lead to invalid selfref
534540
error(_("It appears that at some earlier point, attributes of this data.table have been reassigned. Please use setattr(DT, name, value) rather than attr(DT, name) <- value. If that doesn't apply to you, please report your case to the data.table issue tracker."));
535-
SETLENGTH(dt, oldncol+LENGTH(newcolnames));
536-
SETLENGTH(names, oldncol+LENGTH(newcolnames));
541+
growable_resize(dt, oldncol+LENGTH(newcolnames));
542+
growable_resize(names, oldncol+LENGTH(newcolnames));
537543
for (int i=0; i<LENGTH(newcolnames); ++i)
538544
SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
539545
// truelengths of both already set by alloccol
@@ -730,8 +736,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
730736
SET_VECTOR_ELT(dt, i, R_NilValue);
731737
SET_STRING_ELT(names, i, NA_STRING); // release reference to the CHARSXP
732738
}
733-
SETLENGTH(dt, ndt-ndelete);
734-
SETLENGTH(names, ndt-ndelete);
739+
growable_resize(dt, ndt-ndelete);
740+
growable_resize(names, ndt-ndelete);
735741
if (LENGTH(names)==0) {
736742
// That was last column deleted, leaving NULL data.table, so we need to reset .row_names, so that it really is the NULL data.table.
737743
PROTECT(nullint=allocVector(INTSXP, 0)); protecti++;

0 commit comments

Comments
 (0)