@@ -55,6 +55,43 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
5555 return ans ;
5656 }
5757 // else xlen>1; nprotect is const above since no more R allocations should occur after this point
58+ // When table >> x, hash x and scan table // ToDo tune the kick-in factor
59+ if (!chmatchdup && tablelen > 2 * xlen ) {
60+ hashtab * marks = hash_create (xlen );
61+ int nuniq = 0 ;
62+ for (int i = 0 ; i < xlen ; ++ i ) {
63+ // todo use lookup_insert?
64+ int tl = hash_lookup (marks , xd [i ], 0 );
65+ if (tl == 0 ) {
66+ hash_set (marks , xd [i ], -1 );
67+ nuniq ++ ;
68+ }
69+ }
70+
71+ for (int i = 0 ; i < tablelen ; ++ i ) {
72+ int tl = hash_lookup (marks , td [i ], 0 );
73+ if (tl == -1 ) {
74+ hash_set (marks , td [i ], i + 1 );
75+ nuniq -- ;
76+ if (nuniq == 0 ) break ; // all found, stop scanning
77+ }
78+ }
79+
80+ if (chin ) {
81+ #pragma omp parallel for num_threads(getDTthreads(xlen, true))
82+ for (int i = 0 ; i < xlen ; ++ i ) {
83+ ansd [i ] = hash_lookup (marks , xd [i ], 0 ) > 0 ;
84+ }
85+ } else {
86+ #pragma omp parallel for num_threads(getDTthreads(xlen, true))
87+ for (int i = 0 ; i < xlen ; ++ i ) {
88+ const int m = hash_lookup (marks , xd [i ], 0 );
89+ ansd [i ] = (m < 0 ) ? nomatch : m ;
90+ }
91+ }
92+ UNPROTECT (nprotect );
93+ return ans ;
94+ }
5895 hashtab * marks = hash_create (tablelen );
5996 int nuniq = 0 ;
6097 for (int i = 0 ; i < tablelen ; ++ i ) {
0 commit comments