Skip to content

Commit 73000ed

Browse files
committed
add hash x branch in chmatch
1 parent 7c1cdb2 commit 73000ed

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

src/chmatch.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,37 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
5555
return ans;
5656
}
5757
// else xlen>1; nprotect is const above since no more R allocations should occur after this point
58+
// When table >> x, hash x and scan table // ToDo tune the kick-in factor
59+
if (!chmatchdup && tablelen > 4 * xlen) {
60+
hashtab *marks = hash_create(xlen);
61+
int nuniq = 0;
62+
for (int i = 0; i < xlen; ++i) {
63+
// todo use lookup_insert?
64+
int tl = hash_lookup(marks, xd[i], 0);
65+
if (tl == 0) {
66+
hash_set(marks, xd[i], -1);
67+
nuniq++;
68+
}
69+
}
70+
71+
for (int i = 0; i < tablelen; ++i) {
72+
int tl = hash_lookup(marks, td[i], 0);
73+
if (tl == -1) {
74+
hash_set(marks, td[i], chin ? 1 : i + 1);
75+
nuniq--;
76+
if (nuniq == 0) break; // all found, stop scanning
77+
}
78+
}
79+
80+
const int not_found = chin ? 0 : nomatch;
81+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
82+
for (int i = 0; i < xlen; ++i) {
83+
int tl = hash_lookup(marks, xd[i], 0);
84+
ansd[i] = tl == -1 ? not_found : tl;
85+
}
86+
UNPROTECT(nprotect);
87+
return ans;
88+
}
5889
hashtab * marks = hash_create(tablelen);
5990
int nuniq=0;
6091
for (int i=0; i<tablelen; ++i) {

0 commit comments

Comments
 (0)