Skip to content

Commit e379405

Browse files
authored
chmatch: hash x instead of table (#7454)
* add hash x branch in chmatch * adapt kick-in threshold * make chin branch more explicit
1 parent 7c1cdb2 commit e379405

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

src/chmatch.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,43 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
5555
return ans;
5656
}
5757
// else xlen>1; nprotect is const above since no more R allocations should occur after this point
58+
// When table >> x, hash x and scan table // ToDo tune the kick-in factor
59+
if (!chmatchdup && tablelen > 2 * xlen) {
60+
hashtab *marks = hash_create(xlen);
61+
int nuniq = 0;
62+
for (int i = 0; i < xlen; ++i) {
63+
// todo use lookup_insert?
64+
int tl = hash_lookup(marks, xd[i], 0);
65+
if (tl == 0) {
66+
hash_set(marks, xd[i], -1);
67+
nuniq++;
68+
}
69+
}
70+
71+
for (int i = 0; i < tablelen; ++i) {
72+
int tl = hash_lookup(marks, td[i], 0);
73+
if (tl == -1) {
74+
hash_set(marks, td[i], i + 1);
75+
nuniq--;
76+
if (nuniq == 0) break; // all found, stop scanning
77+
}
78+
}
79+
80+
if (chin) {
81+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
82+
for (int i = 0; i < xlen; ++i) {
83+
ansd[i] = hash_lookup(marks, xd[i], 0) > 0;
84+
}
85+
} else {
86+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
87+
for (int i = 0; i < xlen; ++i) {
88+
const int m = hash_lookup(marks, xd[i], 0);
89+
ansd[i] = (m < 0) ? nomatch : m;
90+
}
91+
}
92+
UNPROTECT(nprotect);
93+
return ans;
94+
}
5895
hashtab * marks = hash_create(tablelen);
5996
int nuniq=0;
6097
for (int i=0; i<tablelen; ++i) {

0 commit comments

Comments
 (0)