Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 22 additions & 27 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@

static void finalizer(SEXP p)
{
SEXP x;
R_len_t n, l, tl;
if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov
p = R_ExternalPtrTag(p);
if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov
l = LENGTH(p);
tl = TRUELENGTH(p);
const R_len_t l = LENGTH(p);
const R_len_t tl = TRUELENGTH(p);
if (l<0 || tl<l) internal_error(__func__, "l=%d, tl=%d", l, tl); // # nocov
n = tl-l;
const R_len_t n = tl-l;
if (n==0) {
// gc's ReleaseLargeFreeVectors() will have reduced R_LargeVallocSize by the correct amount
// already, so nothing to do (but almost never the case).
return;
}
x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
SEXP x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
// INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
UNPROTECT(1);
Expand All @@ -25,19 +23,19 @@ static void finalizer(SEXP p)

void setselfref(SEXP x) {
if(!INHERITS(x, char_datatable)) return; // #5286
SEXP p;
// Store pointer to itself so we can detect if the object has been copied. See
// ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
// Called from C only, not R level, so returns void.
setAttrib(x, SelfRefSymbol, p=R_MakeExternalPtr(
SEXP p=R_MakeExternalPtr(
R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot
PROTECT(getAttrib(x, R_NamesSymbol)), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
PROTECT(R_MakeExternalPtr( // to avoid an infinite loop in object.size(), if prot=x here
x, // to know if this data.table has been copied by attr<-, names<-, etc.
R_NilValue, // this tag and prot currently unused
R_NilValue
))
));
);
// Store pointer to itself so we can detect if the object has been copied. See
// ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
// Called from C only, not R level, so returns void.
setAttrib(x, SelfRefSymbol, p);
R_RegisterCFinalizerEx(p, finalizer, FALSE);
UNPROTECT(2);

Expand Down Expand Up @@ -107,8 +105,7 @@ Moved out of ?setkey Details section in 1.12.2 (Mar 2019). Revisit this w.r.t. t
*/

static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
SEXP v, p, tag, prot, names;
v = getAttrib(x, SelfRefSymbol);
SEXP v = getAttrib(x, SelfRefSymbol);
if (v==R_NilValue || TYPEOF(v)!=EXTPTRSXP) {
// .internal.selfref missing is expected and normal for i) a pre v1.7.8 data.table loaded
// from disk, and ii) every time a new data.table is over-allocated for the first time.
Expand All @@ -117,20 +114,20 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
// In both cases the selfref is not ok.
return 0;
}
p = R_ExternalPtrAddr(v);
SEXP p = R_ExternalPtrAddr(v);
if (p==NULL) {
if (verbose) Rprintf(_("The data.table internal attributes of this table are invalid. This is expected and normal for a data.table loaded from disk. Please remember to always setDT() immediately after loading to prevent unexpected behavior. If this table was not loaded from disk or you've already run setDT(), please report to the data.table issue tracker.\n"));
return -1;
}
if (!isNull(p)) internal_error(__func__, ".internal.selfref ptr is neither NULL nor R_NilValue"); // # nocov
tag = R_ExternalPtrTag(v);
SEXP tag = R_ExternalPtrTag(v);
if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov
names = getAttrib(x, R_NamesSymbol);
SEXP names = getAttrib(x, R_NamesSymbol);
if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734
SET_TRUELENGTH(names, LENGTH(names));
// R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
// because R copies the original vector's tl over despite allocating length.
prot = R_ExternalPtrProtected(v);
SEXP prot = R_ExternalPtrProtected(v);
if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
Expand Down Expand Up @@ -195,7 +192,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
SET_TRUELENGTH(newdt,n);
setselfref(newdt);
UNPROTECT(protecti);
return(newdt);
return newdt;
}

// Wrapped in a function so the same message is issued for the data.frame case at the R level
Expand Down Expand Up @@ -242,14 +239,12 @@ SEXP setdt_nrows(SEXP x)

SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
{
SEXP names, klass; // klass not class at request of pydatatable because class is reserved word in C++, PR #3129
R_len_t l, tl;
if (isNull(dt)) error(_("alloccol has been passed a NULL dt"));
if (TYPEOF(dt) != VECSXP) error(_("dt passed to alloccol isn't type VECSXP"));
klass = getAttrib(dt, R_ClassSymbol);
SEXP klass = getAttrib(dt, R_ClassSymbol);// klass not class at request of pydatatable because class is reserved word in C++, PR #3129
if (isNull(klass)) error(_("dt passed to alloccol has no class attribute. Please report result of traceback() to data.table issue tracker."));
l = LENGTH(dt);
names = getAttrib(dt,R_NamesSymbol);
const R_len_t l = LENGTH(dt);
SEXP names = getAttrib(dt,R_NamesSymbol);
// names may be NULL when null.data.table() passes list() to alloccol for example.
// So, careful to use length() on names, not LENGTH().
if (length(names)!=l) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
Expand All @@ -260,15 +255,15 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
// if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
// internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov

tl = TRUELENGTH(dt);
const R_len_t tl = TRUELENGTH(dt);
// R <= 2.13.2 and we didn't catch uninitialized tl somehow
if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov
if (tl>0 && tl<l) internal_error(__func__, "tl (%d) < l (%d) but tl of class is marked", tl, l); // # nocov
if (tl>l+10000) warning(_("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the datatable.alloccol option to be very large, please report to data.table issue tracker including the result of sessionInfo()."),tl,l);
if (n>tl) return(shallow(dt,R_NilValue,n)); // usual case (increasing alloc)
if (n<tl && verbose) Rprintf(_("Attempt to reduce allocation from %d to %d ignored. Can only increase allocation via shallow copy. Please do not use DT[...]<- or DT$someCol<-. Use := inside DT[...] instead."),tl,n);
// otherwise the finalizer can't clear up the Large Vector heap
return(dt);
return dt;
}

int checkOverAlloc(SEXP x)
Expand Down Expand Up @@ -1265,7 +1260,7 @@ SEXP allocNAVectorLike(SEXP x, R_len_t n) {
copyMostAttrib(x, v);
writeNA(v, 0, n, false);
UNPROTECT(1);
return(v);
return v;
}

static SEXP *saveds=NULL;
Expand Down
Loading