Skip to content

Commit cae6b02

Browse files
committed
combineFactorLevels(): hash instead of TRUELENGTH
1 parent f26d043 commit cae6b02

File tree

1 file changed

+10
-12
lines changed

1 file changed

+10
-12
lines changed

src/fmelt.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -383,9 +383,8 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna
383383
}
384384

385385
static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType, Rboolean * isRowOrdered)
386-
// Finds unique levels directly in one pass with no need to create hash tables. Creates integer factor
387-
// too in the same single pass. Previous version called factor(x, levels=unique) where x was type character
388-
// and needed hash table.
386+
// Finds unique levels directly in one pass. Creates integer factor too in the same single pass. Previous
387+
// version called factor(x, levels=unique) where x was type character.
389388
// TODO keep the original factor columns as factor and use new technique in rbindlist.c. The calling
390389
// environments are a little difference hence postponed for now (e.g. rbindlist calls writeNA which
391390
// a general purpose combiner would need to know how many to write)
@@ -404,8 +403,10 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType
404403
SEXP *levelsRaw = (SEXP *)R_alloc(maxlevels, sizeof(SEXP)); // allocate for worst-case all-unique levels
405404
int *ansd = INTEGER(ans);
406405
const SEXP *targetd = STRING_PTR_RO(target);
407-
savetl_init();
408-
// no alloc or any fail point until savetl_end()
406+
R_xlen_t hl = 0;
407+
for (R_xlen_t i = 0; i < nitem; ++i)
408+
hl += xlength(VECTOR_ELT(factorLevels, i));
409+
hashtab * marks = hash_create(hl);
409410
int nlevel=0;
410411
for (int i=0; i<nitem; ++i) {
411412
const SEXP this = VECTOR_ELT(factorLevels, i);
@@ -414,24 +415,21 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType
414415
for (int k=0; k<thisn; ++k) {
415416
SEXP s = thisd[k];
416417
if (s==NA_STRING) continue; // NA shouldn't be in levels but remove it just in case
417-
int tl = TRUELENGTH(s);
418+
int tl = hash_lookup(marks, s, 0);
418419
if (tl<0) continue; // seen this level before
419-
if (tl>0) savetl(s);
420-
SET_TRUELENGTH(s,-(++nlevel));
420+
hash_set(marks,s,-(++nlevel));
421421
levelsRaw[nlevel-1] = s;
422422
}
423423
}
424424
for (int i=0; i<nrow; ++i) {
425425
if (targetd[i]==NA_STRING) {
426426
*ansd++ = NA_INTEGER;
427427
} else {
428-
int tl = TRUELENGTH(targetd[i]);
428+
int tl = hash_lookup(marks,targetd[i],0);
429429
*ansd++ = tl<0 ? -tl : NA_INTEGER;
430430
}
431431
}
432-
for (int i=0; i<nlevel; ++i) SET_TRUELENGTH(levelsRaw[i], 0);
433-
savetl_end();
434-
// now after savetl_end, we can alloc (which might fail)
432+
// there used to be savetl_end, after which we can alloc (which might fail)
435433
SEXP levelsSxp;
436434
setAttrib(ans, R_LevelsSymbol, levelsSxp=allocVector(STRSXP, nlevel));
437435
for (int i=0; i<nlevel; ++i) SET_STRING_ELT(levelsSxp, i, levelsRaw[i]);

0 commit comments

Comments
 (0)