Skip to content

Commit 61fdc06

Browse files
committed
ALTREP implementation of growable_*
Currently broken: - 1 errors out of 11637 - won't work with expression vectors at all
1 parent 7b9b141 commit 61fdc06

File tree

8 files changed

+280
-10
lines changed

8 files changed

+280
-10
lines changed

src/assign.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ SEXP shallowwrapper(SEXP dt, SEXP cols) {
323323
}
324324

325325
SEXP truelength(SEXP x) {
326-
return ScalarInteger(isNull(x) ? 0 : growable_max_size(x));
326+
return ScalarInteger(is_growable(x) ? growable_max_size(x) : 0);
327327
}
328328

329329
SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
@@ -520,7 +520,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
520520
// modify DT by reference. Other than if new columns are being added and the allocVec() fails with
521521
// out-of-memory. In that case the user will receive hard halt and know to rerun.
522522
if (length(newcolnames)) {
523-
oldtncol = growable_max_size(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
523+
oldtncol = is_growable(dt) ? growable_max_size(dt) : 0; // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
524524

525525
if (oldtncol<oldncol) {
526526
if (oldtncol==0) error(_("This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed manually (e.g. using structure()). Please run setDT() or setalloccol() on it first (to pre-allocate space for new columns) before assigning by reference to it.")); // #2996

src/data.table.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#if R_VERSION < R_Version(3, 4, 0)
1818
# define SET_GROWABLE_BIT(x) // #3292
1919
#endif
20+
#if R_VERSION >= R_Version(4, 3, 0)
21+
# define USE_GROWABLE_ALTREP
22+
#endif
2023
#include <Rinternals.h>
2124
#define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT
2225
#include <stdint.h> // for uint64_t rather than unsigned long long
@@ -309,6 +312,9 @@ void growable_resize(SEXP x, R_xlen_t newsize);
309312
Rboolean is_growable(SEXP x);
310313
// Transform x into a growable vector. The return value must be reprotected in place of x. What happens to x is deliberately not specified, but no copying occurs.
311314
SEXP make_growable(SEXP x);
315+
#if R_VERSION >= R_Version(4, 3, 0)
316+
void register_altrep_classes(DllInfo*);
317+
#endif
312318

313319
// functions called from R level .Call/.External and registered in init.c
314320
// these now live here to pass -Wstrict-prototypes, #5477

src/dogroups.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,10 @@ static bool anySpecialStatic(SEXP x, hashtab * specials) {
4545
// (see data.table.h), and isNewList() is true for NULL
4646
if (n==0)
4747
return false;
48+
if (hash_lookup(specials, x, 0)<0) return true; // test 2158
4849
if (isVectorAtomic(x))
49-
return ALTREP(x) || hash_lookup(specials, x, 0)<0;
50+
return ALTREP(x); // see test 2156: ALTREP is a source of sharing we can't trace reliably
5051
if (isNewList(x)) {
51-
if (hash_lookup(specials, x, 0)<0)
52-
return true; // test 2158
5352
for (int i=0; i<n; ++i) {
5453
list_el = VECTOR_ELT(x,i);
5554
if (anySpecialStatic(list_el, specials))

src/growable.c

Lines changed: 237 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include "data.table.h"
22

3+
#ifndef USE_GROWABLE_ALTREP
4+
35
SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) {
46
SEXP ret = PROTECT(allocVector(type, max_size));
57
SET_TRUELENGTH(ret, max_size);
@@ -32,9 +34,243 @@ Rboolean is_growable(SEXP x) {
3234
;
3335
}
3436

35-
// Assuming no ALTREP for now
37+
// Assuming no ALTREP columns
3638
SEXP make_growable(SEXP x) {
3739
if (TRUELENGTH(x) < XLENGTH(x)) SET_TRUELENGTH(x, XLENGTH(x));
3840
SET_GROWABLE_BIT(x);
3941
return x;
4042
}
43+
44+
#else
45+
46+
#include <R_ext/Altrep.h>
47+
48+
static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list;
49+
static Rcomplex NA_COMPLEX = { 0, };
50+
51+
/*
52+
ALTREP class layout:
53+
data1 = underlying vector
54+
data2 = its current length stored as a length-1 REALSXP
55+
Unless we implement an Unserialize method, this can be changed any time.
56+
Classes have been released on CRAN with a Serialized_state/Unserialize pair will have to stay as they have been defined in order to keep *.rds files readable.
57+
*/
58+
59+
static R_xlen_t altall_Length(SEXP x) {
60+
return (R_xlen_t)REAL(R_altrep_data2(x))[0];
61+
}
62+
63+
#define make_inspect_method(classname) \
64+
static Rboolean alt##classname##_Inspect( \
65+
SEXP x, int pre, int deep, int pvec, \
66+
void (*inspect_subtree)(SEXP x, int pre, int deep, int pvec) \
67+
) { \
68+
(void)pre; (void)deep; (void)pvec; (void)inspect_subtree; \
69+
Rprintf("data.table::growable" #classname "_v0(truelength=%g) ", (double)XLENGTH(R_altrep_data1(x))); \
70+
return FALSE; \
71+
}
72+
make_inspect_method(string)
73+
make_inspect_method(integer)
74+
make_inspect_method(logical)
75+
make_inspect_method(real)
76+
make_inspect_method(complex)
77+
make_inspect_method(raw)
78+
make_inspect_method(list)
79+
#undef make_inspect_method
80+
81+
#define make_dataptr_method(class, accessor) \
82+
static void * alt##class##_Dataptr(SEXP x, Rboolean writable) { \
83+
(void)writable; \
84+
return (void*)accessor(R_altrep_data1(x)); \
85+
}
86+
make_dataptr_method(string, STRING_PTR_RO)
87+
make_dataptr_method(integer, INTEGER)
88+
make_dataptr_method(logical, LOGICAL)
89+
make_dataptr_method(real, REAL)
90+
make_dataptr_method(complex, COMPLEX)
91+
make_dataptr_method(raw, RAW)
92+
make_dataptr_method(list, DATAPTR_RO) // VECTOR_PTR_RO to appear in R-4.5
93+
#undef make_dataptr_method
94+
95+
static const void * altall_Dataptr_or_null(SEXP x) { return DATAPTR_RO(x); }
96+
97+
// lots of boilerplate, but R calling *_ELT one by one would be far too slow
98+
#define make_extract_subset_method(class, type, accessor, NA) \
99+
static SEXP alt##class##_Extract_subset(SEXP x, SEXP indx, SEXP call) { \
100+
(void)call; \
101+
indx = PROTECT(coerceVector(indx, REALSXP)); \
102+
double * ii = REAL(indx); \
103+
R_xlen_t rlen = XLENGTH(indx), mylen = XLENGTH(x); \
104+
SEXP ret = PROTECT(allocVector(TYPEOF(x), rlen)); \
105+
type *rdata = accessor(ret), *mydata = accessor(x); \
106+
for (R_xlen_t i = 0; i < rlen; ++i) \
107+
rdata[i] = (ii[i] >= 1 && ii[i] <= mylen) ? mydata[(R_xlen_t)ii[i]-1] : NA; \
108+
UNPROTECT(2); \
109+
return ret; \
110+
}
111+
make_extract_subset_method(integer, int, INTEGER, NA_INTEGER)
112+
make_extract_subset_method(logical, int, LOGICAL, NA_LOGICAL)
113+
make_extract_subset_method(real, double, REAL, NA_REAL)
114+
make_extract_subset_method(complex, Rcomplex, COMPLEX, NA_COMPLEX)
115+
make_extract_subset_method(raw, Rbyte, RAW, 0)
116+
// not implementing the string and list methods because those do require the write barrier and are thus no better than calling *_ELT one by one
117+
#undef make_extract_subset_method
118+
119+
#define make_elt_method(class, accessor) \
120+
static SEXP alt##class##_Elt(SEXP x, R_xlen_t i) { \
121+
return accessor(R_altrep_data1(x), i); \
122+
}
123+
make_elt_method(string, STRING_ELT)
124+
make_elt_method(list, VECTOR_ELT)
125+
#undef make_elt_method
126+
127+
#define make_set_elt_method(class, accessor) \
128+
static void alt##class##_Set_elt(SEXP x, R_xlen_t i, SEXP v) { \
129+
accessor(R_altrep_data1(x), i, v); \
130+
}
131+
make_set_elt_method(string, SET_STRING_ELT)
132+
make_set_elt_method(list, SET_VECTOR_ELT)
133+
#undef make_set_elt_method
134+
135+
// liked the Extract_subset methods? say hello to Get_region
136+
#define make_get_region_method(class, type, accessor) \
137+
static R_xlen_t alt##class##_Get_region( \
138+
SEXP x, R_xlen_t i, R_xlen_t n, type * buf \
139+
) { \
140+
R_xlen_t j = 0, mylen = XLENGTH(x); \
141+
type * data = accessor(x); \
142+
for (; j < n && i < mylen; ++i, ++j) buf[j] = data[i]; \
143+
return j; \
144+
}
145+
make_get_region_method(integer, int, INTEGER)
146+
make_get_region_method(logical, int, LOGICAL)
147+
make_get_region_method(real, double, REAL)
148+
make_get_region_method(complex, Rcomplex, COMPLEX)
149+
make_get_region_method(raw, Rbyte, RAW)
150+
#undef make_get_region_method
151+
152+
void register_altrep_classes(DllInfo * info) {
153+
// Used by the altcomplex_Extract_subset method
154+
NA_COMPLEX = (Rcomplex){ .r = NA_REAL, .i = NA_REAL };
155+
156+
dta_grow_string = R_make_altstring_class("growable_string_v0", "data.table", info);
157+
R_set_altrep_Length_method(dta_grow_string, altall_Length);
158+
R_set_altrep_Inspect_method(dta_grow_string, altstring_Inspect);
159+
R_set_altvec_Dataptr_method(dta_grow_string, altstring_Dataptr);
160+
R_set_altvec_Dataptr_or_null_method(dta_grow_string, altall_Dataptr_or_null);
161+
R_set_altstring_Elt_method(dta_grow_string, altstring_Elt);
162+
R_set_altstring_Set_elt_method(dta_grow_string, altstring_Set_elt);
163+
dta_grow_integer = R_make_altinteger_class("growable_integer_v0", "data.table", info);
164+
R_set_altrep_Length_method(dta_grow_integer, altall_Length);
165+
R_set_altrep_Inspect_method(dta_grow_integer, altinteger_Inspect);
166+
R_set_altvec_Dataptr_method(dta_grow_integer, altinteger_Dataptr);
167+
R_set_altvec_Dataptr_or_null_method(dta_grow_integer, altall_Dataptr_or_null);
168+
R_set_altvec_Extract_subset_method(dta_grow_integer, altinteger_Extract_subset);
169+
R_set_altinteger_Get_region_method(dta_grow_integer, altinteger_Get_region);
170+
dta_grow_logical = R_make_altlogical_class("growable_logical_v0", "data.table", info);
171+
R_set_altrep_Length_method(dta_grow_logical, altall_Length);
172+
R_set_altrep_Inspect_method(dta_grow_logical, altlogical_Inspect);
173+
R_set_altvec_Dataptr_method(dta_grow_logical, altlogical_Dataptr);
174+
R_set_altvec_Dataptr_or_null_method(dta_grow_logical, altall_Dataptr_or_null);
175+
R_set_altvec_Extract_subset_method(dta_grow_logical, altlogical_Extract_subset);
176+
R_set_altlogical_Get_region_method(dta_grow_logical, altlogical_Get_region);
177+
dta_grow_real = R_make_altreal_class("growable_real_v0", "data.table", info);
178+
R_set_altrep_Length_method(dta_grow_real, altall_Length);
179+
R_set_altrep_Inspect_method(dta_grow_real, altreal_Inspect);
180+
R_set_altvec_Dataptr_method(dta_grow_real, altreal_Dataptr);
181+
R_set_altvec_Dataptr_or_null_method(dta_grow_real, altall_Dataptr_or_null);
182+
R_set_altvec_Extract_subset_method(dta_grow_real, altreal_Extract_subset);
183+
R_set_altreal_Get_region_method(dta_grow_real, altreal_Get_region);
184+
dta_grow_complex = R_make_altcomplex_class("growable_complex_v0", "data.table", info);
185+
R_set_altrep_Length_method(dta_grow_complex, altall_Length);
186+
R_set_altrep_Inspect_method(dta_grow_complex, altcomplex_Inspect);
187+
R_set_altvec_Dataptr_method(dta_grow_complex, altcomplex_Dataptr);
188+
R_set_altvec_Dataptr_or_null_method(dta_grow_complex, altall_Dataptr_or_null);
189+
R_set_altvec_Extract_subset_method(dta_grow_complex, altcomplex_Extract_subset);
190+
R_set_altcomplex_Get_region_method(dta_grow_complex, altcomplex_Get_region);
191+
dta_grow_raw = R_make_altraw_class("growable_raw_v0", "data.table", info);
192+
R_set_altrep_Length_method(dta_grow_raw, altall_Length);
193+
R_set_altrep_Inspect_method(dta_grow_raw, altraw_Inspect);
194+
R_set_altvec_Dataptr_method(dta_grow_raw, altraw_Dataptr);
195+
R_set_altvec_Dataptr_or_null_method(dta_grow_raw, altall_Dataptr_or_null);
196+
R_set_altvec_Extract_subset_method(dta_grow_raw, altraw_Extract_subset);
197+
R_set_altraw_Get_region_method(dta_grow_raw, altraw_Get_region);
198+
dta_grow_list = R_make_altlist_class("growable_list_v0", "data.table", info);
199+
R_set_altrep_Length_method(dta_grow_list, altall_Length);
200+
R_set_altrep_Inspect_method(dta_grow_list, altlist_Inspect);
201+
R_set_altvec_Dataptr_method(dta_grow_list, altlist_Dataptr);
202+
R_set_altvec_Dataptr_or_null_method(dta_grow_list, altall_Dataptr_or_null);
203+
R_set_altlist_Elt_method(dta_grow_list, altlist_Elt);
204+
R_set_altlist_Set_elt_method(dta_grow_list, altlist_Set_elt);
205+
}
206+
207+
static R_altrep_class_t dta_grow_string, dta_grow_integer, dta_grow_logical, dta_grow_real, dta_grow_complex, dta_grow_raw, dta_grow_list;
208+
209+
static R_altrep_class_t type2class(SEXPTYPE type) {
210+
switch(type) {
211+
case STRSXP:
212+
return dta_grow_string;
213+
case INTSXP:
214+
return dta_grow_integer;
215+
case LGLSXP:
216+
return dta_grow_logical;
217+
case REALSXP:
218+
return dta_grow_real;
219+
case CPLXSXP:
220+
return dta_grow_complex;
221+
case RAWSXP:
222+
return dta_grow_raw;
223+
case VECSXP:
224+
case EXPRSXP:
225+
return dta_grow_list;
226+
default:
227+
internal_error(__func__, "Can't create a growable vector of type '%s'", type2char(type));
228+
}
229+
}
230+
231+
SEXP growable_allocate(SEXPTYPE type, R_xlen_t size, R_xlen_t max_size) {
232+
SEXP ret = PROTECT(R_new_altrep(type2class(type), R_NilValue, R_NilValue));
233+
R_set_altrep_data1(ret, allocVector(type, max_size));
234+
R_set_altrep_data2(ret, ScalarReal(size));
235+
UNPROTECT(1);
236+
return ret;
237+
}
238+
239+
R_xlen_t growable_max_size(SEXP x) {
240+
return XLENGTH(R_altrep_data1(x));
241+
}
242+
243+
void growable_resize(SEXP x, R_xlen_t newsize) {
244+
R_xlen_t max_size;
245+
if (newsize > (max_size = growable_max_size(x))) internal_error(
246+
__func__, "newsize=%g > max_size=%g",
247+
(double)newsize, (double)max_size
248+
);
249+
REAL(R_altrep_data2(x))[0] = newsize;
250+
}
251+
252+
Rboolean is_growable(SEXP x) {
253+
switch(TYPEOF(x)) {
254+
case STRSXP:
255+
case INTSXP:
256+
case LGLSXP:
257+
case REALSXP:
258+
case CPLXSXP:
259+
case RAWSXP:
260+
case VECSXP:
261+
return R_altrep_inherits(x, type2class(TYPEOF(x)));
262+
default:
263+
return FALSE;
264+
}
265+
}
266+
267+
SEXP make_growable(SEXP x) {
268+
SEXP ret = PROTECT(R_new_altrep(type2class(TYPEOF(x)), R_NilValue, R_NilValue));
269+
R_set_altrep_data1(ret, x);
270+
R_set_altrep_data2(ret, ScalarReal(XLENGTH(x)));
271+
SHALLOW_DUPLICATE_ATTRIB(ret, x);
272+
UNPROTECT(1);
273+
return ret;
274+
}
275+
276+
#endif

src/init.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,12 @@ void attribute_visible R_init_data_table(DllInfo *info)
203203

204204
SEXP tmp = PROTECT(allocVector(INTSXP,2));
205205
if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg);
206+
#if R_VERSION >= R_Version(4, 3, 0)
207+
register_altrep_classes(info);
208+
#else
206209
// Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768
207210
if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg);
211+
#endif
208212
UNPROTECT(1);
209213

210214
// According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits.

src/reorder.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,17 @@ SEXP reorder(SEXP x, SEXP order)
2424
error(_("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table."), i+1, length(v), nrow);
2525
if (SIZEOF(v) > maxSize)
2626
maxSize=SIZEOF(v);
27+
#ifndef USE_GROWABLE_ALTREP
2728
if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v));
29+
#endif
2830
}
2931
copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768
3032
} else {
3133
if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16 && SIZEOF(x)!=1)
3234
error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%zu)"), type2char(TYPEOF(x)), SIZEOF(x));
35+
#ifndef USE_GROWABLE_ALTREP
3336
if (ALTREP(x)) internal_error(__func__, "cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4"); // # nocov
37+
#endif
3438
maxSize = SIZEOF(x);
3539
nrow = length(x);
3640
ncol = 1;
@@ -40,7 +44,9 @@ SEXP reorder(SEXP x, SEXP order)
4044
if (length(order) != nrow)
4145
error("nrow(x)[%d]!=length(order)[%d]", nrow, length(order)); // # notranslate
4246
int nprotect = 0;
47+
#ifndef USE_GROWABLE_ALTREP
4348
if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; } // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand
49+
#endif
4450

4551
const int *restrict idx = INTEGER(order);
4652
int i=0;

src/utils.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,9 @@ inline bool INHERITS(SEXP x, SEXP char_) {
202202
return false;
203203
}
204204

205+
#ifdef USE_GROWABLE_ALTREP
206+
SEXP copyAsPlain(SEXP x) { return duplicate(x); }
207+
#else
205208
SEXP copyAsPlain(SEXP x) {
206209
// v1.12.2 and before used standard R duplicate() to do this. But duplicate() is not guaranteed to not return an ALTREP.
207210
// e.g. ALTREP 'wrapper' on factor column (with materialized INTSXP) in package VIM under example(hotdeck)
@@ -256,6 +259,7 @@ SEXP copyAsPlain(SEXP x) {
256259
UNPROTECT(1);
257260
return ans;
258261
}
262+
#endif
259263

260264
void copySharedColumns(SEXP x) {
261265
const int ncol = length(x);
@@ -266,7 +270,12 @@ void copySharedColumns(SEXP x) {
266270
int nShared=0;
267271
for (int i=0; i<ncol; ++i) {
268272
SEXP thiscol = xp[i];
269-
if (ALTREP(thiscol) || hash_lookup(marks, thiscol, 0)<0) {
273+
if (
274+
hash_lookup(marks, thiscol, 0)<0
275+
#ifndef USE_GROWABLE_ALTREP
276+
|| ALTREP(thiscol)
277+
#endif
278+
) {
270279
shared[i] = true; // we mark ALTREP as 'shared' too, whereas 'tocopy' would be better word to use for ALTREP
271280
nShared++;
272281
} else {

0 commit comments

Comments
 (0)