Skip to content

Commit 1d88ad4

Browse files
committed
use double hashing
1 parent 5a474e0 commit 1d88ad4

File tree

2 files changed

+35
-28
lines changed

2 files changed

+35
-28
lines changed

src/chmatch.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
101101
free(counts);
102102
free(map);
103103
} else if (chin) {
104+
#pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false))
104105
for (int i=0; i<xlen; i++) {
105106
ansd[i] = hash_lookup(marks,xd[i],0)<0;
106107
}
107108
} else {
109+
#pragma omp parallel for if(xlen > 100000) schedule(static) num_threads(getDTthreads(xlen, false))
108110
for (int i=0; i<xlen; i++) {
109111
const int m = hash_lookup(marks,xd[i],0);
110112
ansd[i] = (m<0) ? -m : nomatch;

src/hash.c

Lines changed: 33 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ struct hash_pair {
99
struct hash_tab {
1010
size_t size, free;
1111
uintptr_t multiplier1, multiplier2;
12-
struct hash_pair *tb1, *tb2;
12+
struct hash_pair *table; // Single table for double hashing
1313
};
1414

1515
// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio.
@@ -55,12 +55,10 @@ static hashtab * hash_create_(size_t n, double load_factor) {
5555
// Multiply by size to get different hash functions when rehashing
5656
ret->multiplier1 = n_full * hash_multiplier1;
5757
ret->multiplier2 = n_full * hash_multiplier2;
58-
ret->tb1 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1);
59-
ret->tb2 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1);
58+
ret->table = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1);
6059
// No valid SEXP is a null pointer, so it's a safe marker for empty cells.
6160
for (size_t i = 0; i < n_full; ++i) {
62-
ret->tb1[i].key = NULL;
63-
ret->tb2[i].key = NULL;
61+
ret->table[i].key = NULL;
6462
}
6563
return ret;
6664
}
@@ -90,48 +88,55 @@ void hash_rehash(hashtab *h) {
9088
hashtab *new_h = hash_create_(new_size, 0.5);
9189

9290
for (size_t i = 0; i < h->size; ++i) {
93-
if (h->tb1[i].key) hash_set(new_h, h->tb1[i].key, h->tb1[i].value);
94-
if (h->tb2[i].key) hash_set(new_h, h->tb2[i].key, h->tb2[i].value);
91+
if (h->table[i].key) hash_set(new_h, h->table[i].key, h->table[i].value);
9592
}
96-
*h = *new_h;
93+
*h = *new_h;
9794
}
9895

9996
void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
100-
size_t max_relocations = h->size;
10197
size_t mask = h->size - 1;
102-
struct hash_pair item = { .key = key, .value = value };
103-
for (size_t i = 0; i < max_relocations; ++i) {
104-
size_t idx1 = hash_index1(item.key, h->multiplier1) & mask;
105-
if (!h->tb1[idx1].key) {
106-
h->tb1[idx1] = item;
98+
size_t h1 = hash_index1(key, h->multiplier1) & mask;
99+
size_t h2 = hash_index2(key, h->multiplier2) & mask;
100+
101+
if (h2 == 0) h2 = 1;
102+
else if ((h2 & 1) == 0) h2 |= 1;
103+
104+
for (size_t i = 0; i < h->size; ++i) {
105+
size_t idx = (h1 + i * h2) & mask;
106+
107+
if (!h->table[idx].key) {
108+
// Empty slot found
109+
h->table[idx].key = key;
110+
h->table[idx].value = value;
111+
h->free--;
107112
return;
108113
}
109-
struct hash_pair temp = h->tb1[idx1];
110-
h->tb1[idx1] = item;
111-
item = temp;
112114

113-
size_t idx2 = hash_index2(item.key, h->multiplier2) & mask;
114-
if (!h->tb2[idx2].key) {
115-
h->tb2[idx2] = item;
115+
if (h->table[idx].key == key) {
116+
h->table[idx].value = value;
116117
return;
117118
}
118-
temp = h->tb2[idx2];
119-
h->tb2[idx2] = item;
120-
item = temp;
121119
}
120+
122121
// need to rehash
123122
hash_rehash(h);
124123
hash_set(h, key, value);
125124
}
126125

127126
R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
128127
size_t mask = h->size - 1;
129-
size_t idx1 = hash_index1(key, h->multiplier1) & mask;
130-
if (h->tb1[idx1].key == key) return h->tb1[idx1].value;
128+
size_t h1 = hash_index1(key, h->multiplier1) & mask;
129+
size_t h2 = hash_index2(key, h->multiplier2) & mask;
130+
131+
if (h2 == 0) h2 = 1;
132+
else if ((h2 & 1) == 0) h2 |= 1;
133+
134+
for (size_t i = 0; i < h->size; ++i) {
135+
size_t idx = (h1 + i * h2) & mask;
136+
if (!h->table[idx].key) return ifnotfound;
137+
if (h->table[idx].key == key) return h->table[idx].value;
138+
}
131139

132-
size_t idx2 = hash_index2(key, h->multiplier2) & mask;
133-
if (h->tb2[idx2].key == key) return h->tb2[idx2].value;
134-
// Should be impossible with a load factor below 1, but just in case:
135140
return ifnotfound; // # nocov
136141
}
137142

0 commit comments

Comments
 (0)