@@ -383,9 +383,8 @@ static void preprocess(SEXP DT, SEXP id, SEXP measure, SEXP varnames, SEXP valna
383383}
384384
385385static SEXP combineFactorLevels (SEXP factorLevels , SEXP target , int * factorType , Rboolean * isRowOrdered )
386- // Finds unique levels directly in one pass with no need to create hash tables. Creates integer factor
387- // too in the same single pass. Previous version called factor(x, levels=unique) where x was type character
388- // and needed hash table.
386+ // Finds unique levels directly in one pass. Creates integer factor too in the same single pass. Previous
387+ // version called factor(x, levels=unique) where x was type character.
389388// TODO keep the original factor columns as factor and use new technique in rbindlist.c. The calling
390389// environments are a little difference hence postponed for now (e.g. rbindlist calls writeNA which
391390// a general purpose combiner would need to know how many to write)
@@ -404,8 +403,10 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType
404403 SEXP * levelsRaw = (SEXP * )R_alloc (maxlevels , sizeof (SEXP )); // allocate for worst-case all-unique levels
405404 int * ansd = INTEGER (ans );
406405 const SEXP * targetd = STRING_PTR_RO (target );
407- savetl_init ();
408- // no alloc or any fail point until savetl_end()
406+ R_xlen_t hl = 0 ;
407+ for (R_xlen_t i = 0 ; i < nitem ; ++ i )
408+ hl += xlength (VECTOR_ELT (factorLevels , i ));
409+ hashtab * marks = hash_create (hl );
409410 int nlevel = 0 ;
410411 for (int i = 0 ; i < nitem ; ++ i ) {
411412 const SEXP this = VECTOR_ELT (factorLevels , i );
@@ -414,24 +415,21 @@ static SEXP combineFactorLevels(SEXP factorLevels, SEXP target, int * factorType
414415 for (int k = 0 ; k < thisn ; ++ k ) {
415416 SEXP s = thisd [k ];
416417 if (s == NA_STRING ) continue ; // NA shouldn't be in levels but remove it just in case
417- int tl = TRUELENGTH ( s );
418+ int tl = hash_lookup ( marks , s , 0 );
418419 if (tl < 0 ) continue ; // seen this level before
419- if (tl > 0 ) savetl (s );
420- SET_TRUELENGTH (s ,- (++ nlevel ));
420+ hash_set (marks ,s ,- (++ nlevel ));
421421 levelsRaw [nlevel - 1 ] = s ;
422422 }
423423 }
424424 for (int i = 0 ; i < nrow ; ++ i ) {
425425 if (targetd [i ]== NA_STRING ) {
426426 * ansd ++ = NA_INTEGER ;
427427 } else {
428- int tl = TRUELENGTH ( targetd [i ]);
428+ int tl = hash_lookup ( marks , targetd [i ], 0 );
429429 * ansd ++ = tl < 0 ? - tl : NA_INTEGER ;
430430 }
431431 }
432- for (int i = 0 ; i < nlevel ; ++ i ) SET_TRUELENGTH (levelsRaw [i ], 0 );
433- savetl_end ();
434- // now after savetl_end, we can alloc (which might fail)
432+ // there used to be savetl_end, after which we can alloc (which might fail)
435433 SEXP levelsSxp ;
436434 setAttrib (ans , R_LevelsSymbol , levelsSxp = allocVector (STRSXP , nlevel ));
437435 for (int i = 0 ; i < nlevel ; ++ i ) SET_STRING_ELT (levelsSxp , i , levelsRaw [i ]);
0 commit comments