Skip to content

Commit 4ef6ab5

Browse files
committed
Data desegregation + minor formatting improvements + constness improvements
1 parent ed2f36f commit 4ef6ab5

File tree

1 file changed

+22
-27
lines changed

1 file changed

+22
-27
lines changed

src/assign.c

Lines changed: 22 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,19 @@
22

33
static void finalizer(SEXP p)
44
{
5-
SEXP x;
6-
R_len_t n, l, tl;
75
if(!R_ExternalPtrAddr(p)) internal_error(__func__, "didn't receive an ExternalPtr"); // # nocov
86
p = R_ExternalPtrTag(p);
97
if (!isString(p)) internal_error(__func__, "ExternalPtr doesn't see names in tag"); // # nocov
10-
l = LENGTH(p);
11-
tl = TRUELENGTH(p);
8+
const R_len_t l = LENGTH(p);
9+
const R_len_t tl = TRUELENGTH(p);
1210
if (l<0 || tl<l) internal_error(__func__, "l=%d, tl=%d", l, tl); // # nocov
13-
n = tl-l;
11+
const R_len_t n = tl-l;
1412
if (n==0) {
1513
// gc's ReleaseLargeFreeVectors() will have reduced R_LargeVallocSize by the correct amount
1614
// already, so nothing to do (but almost never the case).
1715
return;
1816
}
19-
x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
17+
SEXP x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
2018
// INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
2119
SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
2220
UNPROTECT(1);
@@ -25,19 +23,19 @@ static void finalizer(SEXP p)
2523

2624
void setselfref(SEXP x) {
2725
if(!INHERITS(x, char_datatable)) return; // #5286
28-
SEXP p;
29-
// Store pointer to itself so we can detect if the object has been copied. See
30-
// ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
31-
// Called from C only, not R level, so returns void.
32-
setAttrib(x, SelfRefSymbol, p=R_MakeExternalPtr(
26+
SEXP p=R_MakeExternalPtr(
3327
R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot
3428
PROTECT(getAttrib(x, R_NamesSymbol)), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
3529
PROTECT(R_MakeExternalPtr( // to avoid an infinite loop in object.size(), if prot=x here
3630
x, // to know if this data.table has been copied by attr<-, names<-, etc.
3731
R_NilValue, // this tag and prot currently unused
3832
R_NilValue
3933
))
40-
));
34+
);
35+
// Store pointer to itself so we can detect if the object has been copied. See
36+
// ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
37+
// Called from C only, not R level, so returns void.
38+
setAttrib(x, SelfRefSymbol, p);
4139
R_RegisterCFinalizerEx(p, finalizer, FALSE);
4240
UNPROTECT(2);
4341

@@ -107,8 +105,7 @@ Moved out of ?setkey Details section in 1.12.2 (Mar 2019). Revisit this w.r.t. t
107105
*/
108106

109107
static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
110-
SEXP v, p, tag, prot, names;
111-
v = getAttrib(x, SelfRefSymbol);
108+
SEXP v = getAttrib(x, SelfRefSymbol);
112109
if (v==R_NilValue || TYPEOF(v)!=EXTPTRSXP) {
113110
// .internal.selfref missing is expected and normal for i) a pre v1.7.8 data.table loaded
114111
// from disk, and ii) every time a new data.table is over-allocated for the first time.
@@ -117,20 +114,20 @@ static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
117114
// In both cases the selfref is not ok.
118115
return 0;
119116
}
120-
p = R_ExternalPtrAddr(v);
117+
SEXP p = R_ExternalPtrAddr(v);
121118
if (p==NULL) {
122119
if (verbose) Rprintf(_("The data.table internal attributes of this table are invalid. This is expected and normal for a data.table loaded from disk. Please remember to always setDT() immediately after loading to prevent unexpected behavior. If this table was not loaded from disk or you've already run setDT(), please report to the data.table issue tracker.\n"));
123120
return -1;
124121
}
125122
if (!isNull(p)) internal_error(__func__, ".internal.selfref ptr is neither NULL nor R_NilValue"); // # nocov
126-
tag = R_ExternalPtrTag(v);
123+
SEXP tag = R_ExternalPtrTag(v);
127124
if (!(isNull(tag) || isString(tag))) internal_error(__func__, ".internal.selfref tag is neither NULL nor a character vector"); // # nocov
128-
names = getAttrib(x, R_NamesSymbol);
125+
SEXP names = getAttrib(x, R_NamesSymbol);
129126
if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734
130127
SET_TRUELENGTH(names, LENGTH(names));
131128
// R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
132129
// because R copies the original vector's tl over despite allocating length.
133-
prot = R_ExternalPtrProtected(v);
130+
SEXP prot = R_ExternalPtrProtected(v);
134131
if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
135132
return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
136133
if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
@@ -195,7 +192,7 @@ static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
195192
SET_TRUELENGTH(newdt,n);
196193
setselfref(newdt);
197194
UNPROTECT(protecti);
198-
return(newdt);
195+
return newdt;
199196
}
200197

201198
// Wrapped in a function so the same message is issued for the data.frame case at the R level
@@ -242,14 +239,12 @@ SEXP setdt_nrows(SEXP x)
242239

243240
SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
244241
{
245-
SEXP names, klass; // klass not class at request of pydatatable because class is reserved word in C++, PR #3129
246-
R_len_t l, tl;
247242
if (isNull(dt)) error(_("alloccol has been passed a NULL dt"));
248243
if (TYPEOF(dt) != VECSXP) error(_("dt passed to alloccol isn't type VECSXP"));
249-
klass = getAttrib(dt, R_ClassSymbol);
244+
SEXP klass = getAttrib(dt, R_ClassSymbol);// klass not class at request of pydatatable because class is reserved word in C++, PR #3129
250245
if (isNull(klass)) error(_("dt passed to alloccol has no class attribute. Please report result of traceback() to data.table issue tracker."));
251-
l = LENGTH(dt);
252-
names = getAttrib(dt,R_NamesSymbol);
246+
const R_len_t l = LENGTH(dt);
247+
SEXP names = getAttrib(dt,R_NamesSymbol);
253248
// names may be NULL when null.data.table() passes list() to alloccol for example.
254249
// So, careful to use length() on names, not LENGTH().
255250
if (length(names)!=l) internal_error(__func__, "length of names (%d) is not length of dt (%d)", length(names),l); // # nocov
@@ -260,15 +255,15 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
260255
// if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
261256
// internal_error(__func__, "tl of dt passes checks, but tl of names (%d) != tl of dt (%d)", tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov
262257

263-
tl = TRUELENGTH(dt);
258+
const R_len_t tl = TRUELENGTH(dt);
264259
// R <= 2.13.2 and we didn't catch uninitialized tl somehow
265260
if (tl<0) internal_error(__func__, "tl of class is marked but tl<0"); // # nocov
266261
if (tl>0 && tl<l) internal_error(__func__, "tl (%d) < l (%d) but tl of class is marked", tl, l); // # nocov
267262
if (tl>l+10000) warning(_("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the datatable.alloccol option to be very large, please report to data.table issue tracker including the result of sessionInfo()."),tl,l);
268263
if (n>tl) return(shallow(dt,R_NilValue,n)); // usual case (increasing alloc)
269264
if (n<tl && verbose) Rprintf(_("Attempt to reduce allocation from %d to %d ignored. Can only increase allocation via shallow copy. Please do not use DT[...]<- or DT$someCol<-. Use := inside DT[...] instead."),tl,n);
270265
// otherwise the finalizer can't clear up the Large Vector heap
271-
return(dt);
266+
return dt;
272267
}
273268

274269
int checkOverAlloc(SEXP x)
@@ -1265,7 +1260,7 @@ SEXP allocNAVectorLike(SEXP x, R_len_t n) {
12651260
copyMostAttrib(x, v);
12661261
writeNA(v, 0, n, false);
12671262
UNPROTECT(1);
1268-
return(v);
1263+
return v;
12691264
}
12701265

12711266
static SEXP *saveds=NULL;

0 commit comments

Comments
 (0)