Skip to content

Commit f26d043

Browse files
committed
copySharedColumns(): hash instead of TRUELENGTH
1 parent 486dd7a commit f26d043

File tree

1 file changed

+4
-18
lines changed

1 file changed

+4
-18
lines changed

src/utils.c

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -261,36 +261,22 @@ void copySharedColumns(SEXP x) {
261261
const int ncol = length(x);
262262
if (!isNewList(x) || ncol==1) return;
263263
bool *shared = (bool *)R_alloc(ncol, sizeof(bool)); // on R heap in case alloc fails
264-
int *savetl = (int *)R_alloc(ncol, sizeof(int)); // on R heap for convenience but could be a calloc
264+
hashtab * marks = hash_create(ncol);
265265
const SEXP *xp = SEXPPTR_RO(x);
266-
// first save the truelength, which may be negative on specials in dogroups, and set to zero; test 2157
267-
// the savetl() function elsewhere is for CHARSXP. Here, we are using truelength on atomic vectors.
268-
for (int i=0; i<ncol; ++i) {
269-
const SEXP thiscol = xp[i];
270-
savetl[i] = ALTREP(thiscol) ? 0 : TRUELENGTH(thiscol);
271-
SET_TRUELENGTH(thiscol, 0);
272-
}
273266
int nShared=0;
274267
for (int i=0; i<ncol; ++i) {
275268
SEXP thiscol = xp[i];
276-
if (ALTREP(thiscol) || TRUELENGTH(thiscol)<0) {
269+
if (ALTREP(thiscol) || hash_lookup(marks, thiscol, 0)<0) {
277270
shared[i] = true; // we mark ALTREP as 'shared' too, whereas 'tocopy' would be better word to use for ALTREP
278271
nShared++;
279-
// do not copyAsPlain() here yet, as its alloc might fail. Must restore tl first to all columns before attempting any copies.
280272
} else {
281-
shared[i] = false; // so the first column will never be shared (unless it is an altrep) even it is shared
273+
shared[i] = false; // so the first column will never be shared (unless it is an altrep) even if is shared
282274
// 'shared' means a later column shares an earlier column
283-
SET_TRUELENGTH(thiscol, -i-1); // -i-1 so that if, for example, column 3 shares column 1, in iteration 3 we'll know not
275+
hash_set(marks, thiscol, -i-1); // -i-1 so that if, for example, column 3 shares column 1, in iteration 3 we'll know not
284276
// only that the 3rd column is shared with an earlier column, but which one too. Although
285277
// we don't use that information currently, we could do in future.
286278
}
287279
}
288-
// now we know nShared and which ones they are (if any), restore original tl back to the unique set of columns
289-
for (int i=0; i<ncol; ++i) {
290-
if (!shared[i]) SET_TRUELENGTH(xp[i], savetl[i]);
291-
// ^^^^^^^^^^ important because if there are shared columns, the dup will have savetl==0 but we want the first restore to stand
292-
}
293-
// now that truelength has been restored for all columns, we can finally call copyAsPlain()
294280
if (nShared) {
295281
for (int i=0; i<ncol; ++i) {
296282
if (shared[i])

0 commit comments

Comments
 (0)