@@ -55,6 +55,37 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
5555 return ans ;
5656 }
5757 // else xlen>1; nprotect is const above since no more R allocations should occur after this point
58+ // When table >> x, hash x and scan table // ToDo tune the kick-in factor
59+ if (!chmatchdup && tablelen > 4 * xlen ) {
60+ hashtab * marks = hash_create (xlen );
61+ int nuniq = 0 ;
62+ for (int i = 0 ; i < xlen ; ++ i ) {
63+ // todo use lookup_insert?
64+ int tl = hash_lookup (marks , xd [i ], 0 );
65+ if (tl == 0 ) {
66+ hash_set (marks , xd [i ], -1 );
67+ nuniq ++ ;
68+ }
69+ }
70+
71+ for (int i = 0 ; i < tablelen ; ++ i ) {
72+ int tl = hash_lookup (marks , td [i ], 0 );
73+ if (tl == -1 ) {
74+ hash_set (marks , td [i ], chin ? 1 : i + 1 );
75+ nuniq -- ;
76+ if (nuniq == 0 ) break ; // all found, stop scanning
77+ }
78+ }
79+
80+ const int not_found = chin ? 0 : nomatch ;
81+ #pragma omp parallel for num_threads(getDTthreads(xlen, true))
82+ for (int i = 0 ; i < xlen ; ++ i ) {
83+ int tl = hash_lookup (marks , xd [i ], 0 );
84+ ansd [i ] = tl == -1 ? not_found : tl ;
85+ }
86+ UNPROTECT (nprotect );
87+ return ans ;
88+ }
5889 hashtab * marks = hash_create (tablelen );
5990 int nuniq = 0 ;
6091 for (int i = 0 ; i < tablelen ; ++ i ) {
0 commit comments