Skip to content

Commit 09fe9ae

Browse files
ben-schwenaitap
andauthored
use double hashing instead of linear probing (#7418)
* add lookup or insert * use lookup or insert * use lookup_or_insert * really use lookup or insert * use cuckoo hashing * add rehash * use power of 2 and mask instead of modulo * mix instead of multiplication * use different mixes * change multipliers * use double hashing * remove xor folding * Fix allocation non-overflow precondition * Set the default load factor * Inline hash_rehash() * update comments * Leave overflow checking to R_alloc * internal_error() is not covered --------- Co-authored-by: Ivan K <[email protected]>
1 parent 2003929 commit 09fe9ae

File tree

2 files changed

+92
-53
lines changed

2 files changed

+92
-53
lines changed

src/chmatch.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,10 +101,12 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch
101101
free(counts);
102102
free(map);
103103
} else if (chin) {
104+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
104105
for (int i=0; i<xlen; i++) {
105106
ansd[i] = hash_lookup(marks,xd[i],0)<0;
106107
}
107108
} else {
109+
#pragma omp parallel for num_threads(getDTthreads(xlen, true))
108110
for (int i=0; i<xlen; i++) {
109111
const int m = hash_lookup(marks,xd[i],0);
110112
ansd[i] = (m<0) ? -m : nomatch;

src/hash.c

Lines changed: 90 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,90 +8,125 @@ struct hash_pair {
88
};
99
struct hash_tab {
1010
size_t size, free;
11-
uintptr_t multiplier;
12-
struct hash_pair tb[];
11+
uintptr_t multiplier1, multiplier2;
12+
struct hash_pair *table; // Single table for double hashing
1313
};
1414

15-
// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio.
16-
static const double hash_multiplier = 0.618033988749895;
15+
// 1/phi and sqrt(0.1)
16+
static const double hash_multiplier1 = 0.618033988749895;
17+
static const double hash_multiplier2 = 0.316227766016838;
18+
static const double default_load_factor = .5;
1719

1820
static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) {
1921
if (load_factor <= 0 || load_factor >= 1)
2022
internal_error(__func__, "load_factor=%g not in (0, 1)", load_factor); // # nocov
2123
// precondition: n / load_factor < SIZE_MAX
2224
// this is implemented a bit stricter than needed and would fail some almost-too-high sizes
2325
// due to the size_t -> double conversion
24-
if ((size_t)((double)SIZE_MAX * load_factor) <= n_elements) internal_error(
26+
if ((size_t)((double)SIZE_MAX * load_factor) <= n_elements) internal_error( // # nocov
2527
__func__, "n=%zu / load_factor=%g would overflow size_t",
2628
n_elements, load_factor
2729
);
28-
return ceil(n_elements / load_factor);
30+
size_t min_size = ceil(n_elements / load_factor);
31+
// Round up to next power of 2 for fast modulo using bitwise AND
32+
size_t pow2 = 1;
33+
while (pow2 < min_size) {
34+
if (pow2 > SIZE_MAX / 2)
35+
internal_error(__func__, "size %zu would overflow size_t", min_size); // # nocov
36+
pow2 *= 2;
37+
}
38+
return pow2;
2939
}
3040

3141
static hashtab * hash_create_(size_t n, double load_factor) {
3242
size_t n_full = get_full_size(n, load_factor);
33-
// precondition: sizeof hashtab + hash_pair[n_full] < SIZE_MAX
34-
// n_full * sizeof hash_pair < SIZE_MAX - sizeof hashtab
35-
// sizeof hash_pair < (SIZE_MAX - sizeof hashtab) / n_full
36-
// (note that sometimes n is 0)
37-
if (n_full && sizeof(struct hash_pair) >= (SIZE_MAX - sizeof(hashtab)) / n_full)
38-
internal_error(
39-
__func__, "n=%zu with load_factor=%g would overflow total allocation size",
40-
n, load_factor
41-
);
42-
hashtab * ret = (hashtab *)R_alloc(sizeof(hashtab) + sizeof(struct hash_pair[n_full]), 1);
43+
hashtab *ret = (hashtab *)R_alloc(sizeof(hashtab), 1);
4344
ret->size = n_full;
4445
ret->free = n;
45-
// To compute floor(size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size.
46-
ret->multiplier = n_full * hash_multiplier;
46+
// Multiply by size to get different hash functions when rehashing
47+
ret->multiplier1 = n_full * hash_multiplier1;
48+
ret->multiplier2 = n_full * hash_multiplier2;
49+
ret->table = (struct hash_pair *)R_alloc(n_full, sizeof(*ret->table));
4750
// No valid SEXP is a null pointer, so it's a safe marker for empty cells.
48-
for (size_t i = 0; i < n_full; ++i)
49-
ret->tb[i].key = NULL;
51+
for (size_t i = 0; i < n_full; ++i) {
52+
ret->table[i].key = NULL;
53+
}
5054
return ret;
5155
}
5256

53-
hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
57+
hashtab * hash_create(size_t n) { return hash_create_(n, default_load_factor); }
5458

55-
// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
56-
// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
57-
static R_INLINE size_t hash_index(SEXP key, uintptr_t multiplier) {
59+
// double hashing
60+
static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) {
5861
// The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
5962
// Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
6063
// which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
6164
return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier;
6265
}
6366

64-
void hash_set(hashtab * h, SEXP key, R_xlen_t value) {
65-
struct hash_pair *cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1;
66-
for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) {
67-
if (cell->key == key) {
68-
cell->value = value;
67+
static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) {
68+
// For double hashing, we need a different hash that's coprime with table size.
69+
// We use higher-order bits that hash_index1 mostly ignores, and ensure
70+
// the result is always odd (coprime with power-of-2 table sizes).
71+
uintptr_t ptr = (uintptr_t)key;
72+
ptr = (ptr >> 12) | (ptr << (sizeof(uintptr_t) * 8 - 12));
73+
return ((ptr & 0x0fffffff) * multiplier) | 1;
74+
}
75+
76+
static R_INLINE void hash_rehash(hashtab *h) {
77+
size_t new_size = h->size * 2;
78+
hashtab *new_h = hash_create_(new_size, default_load_factor);
79+
80+
for (size_t i = 0; i < h->size; ++i) {
81+
if (h->table[i].key) hash_set(new_h, h->table[i].key, h->table[i].value);
82+
}
83+
*h = *new_h;
84+
}
85+
86+
void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
87+
size_t mask = h->size - 1;
88+
size_t h1 = hash_index1(key, h->multiplier1) & mask;
89+
size_t h2 = hash_index2(key, h->multiplier2) & mask;
90+
91+
if (h2 == 0) h2 = 1;
92+
else if ((h2 & 1) == 0) h2 |= 1;
93+
94+
for (size_t i = 0; i < h->size; ++i) {
95+
size_t idx = (h1 + i * h2) & mask;
96+
97+
if (!h->table[idx].key) {
98+
// Empty slot found
99+
h->table[idx].key = key;
100+
h->table[idx].value = value;
101+
h->free--;
69102
return;
70-
} else if (!cell->key) {
71-
if (!h->free) internal_error(
72-
__func__, "no free slots left (full size=%zu)", h->size
73-
);
74-
--h->free;
75-
*cell = (struct hash_pair){.key = key, .value = value};
103+
}
104+
105+
if (h->table[idx].key == key) {
106+
h->table[idx].value = value;
76107
return;
77108
}
78109
}
79-
internal_error( // # nocov
80-
__func__, "did not find a free slot for key %p; size=%zu, free=%zu",
81-
(void*)key, h->size, h->free
82-
);
110+
111+
// need to rehash
112+
hash_rehash(h);
113+
hash_set(h, key, value);
83114
}
84115

85-
R_xlen_t hash_lookup(const hashtab * h, SEXP key, R_xlen_t ifnotfound) {
86-
const struct hash_pair * cell = h->tb + hash_index(key, h->multiplier) % h->size, *end = h->tb + h->size - 1;
87-
for (size_t i = 0; i < h->size; ++i, cell = cell == end ? h->tb : cell+1) {
88-
if (cell->key == key) {
89-
return cell->value;
90-
} else if (!cell->key) {
91-
return ifnotfound;
92-
}
116+
R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
117+
size_t mask = h->size - 1;
118+
size_t h1 = hash_index1(key, h->multiplier1) & mask;
119+
size_t h2 = hash_index2(key, h->multiplier2) & mask;
120+
121+
if (h2 == 0) h2 = 1;
122+
else if ((h2 & 1) == 0) h2 |= 1;
123+
124+
for (size_t i = 0; i < h->size; ++i) {
125+
size_t idx = (h1 + i * h2) & mask;
126+
if (!h->table[idx].key) return ifnotfound;
127+
if (h->table[idx].key == key) return h->table[idx].value;
93128
}
94-
// Should be impossible with a load factor below 1, but just in case:
129+
95130
return ifnotfound; // # nocov
96131
}
97132

@@ -134,7 +169,7 @@ static dhashtab * dhash_create_(size_t n, double load_factor) {
134169
self->table = dhash_allocate(n_full);
135170
self->size = n_full;
136171
self->limit = n;
137-
self->multiplier = n_full * hash_multiplier;
172+
self->multiplier = n_full * hash_multiplier1;
138173
// this is the last time we're allowed to set the table parts piece by piece
139174

140175
UNPROTECT(1);
@@ -147,11 +182,13 @@ static void dhash_enlarge(dhashtab_ * self) {
147182
if (self->size > SIZE_MAX / 2)
148183
internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov
149184
size_t new_size = self->size * 2;
185+
size_t new_mask = new_size - 1;
150186
struct hash_pair * new = dhash_allocate(new_size);
151-
uintptr_t new_multiplier = new_size * hash_multiplier;
187+
uintptr_t new_multiplier = new_size * hash_multiplier1;
152188
for (size_t i = 0; i < self->size; ++i) {
189+
if (!self->table[i].key) continue;
153190
for (size_t j = 0; j < new_size; ++j) {
154-
size_t ii = (hash_index(self->table[i].key, new_multiplier) + j) % new_size;
191+
size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask;
155192
if (!new[ii].key) {
156193
new[ii] = (struct hash_pair){
157194
.key = self->table[i].key,
@@ -175,7 +212,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
175212
dhashtab_ * self = (dhashtab_ *)h;
176213
struct hash_pair *cell, *end;
177214
again:
178-
cell = self->table + hash_index(key, self->multiplier) % self->size;
215+
cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1));
179216
end = self->table + self->size - 1;
180217
for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
181218
if (cell->key == key) {
@@ -201,7 +238,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
201238
#pragma omp flush // no locking or atomic access! this is bad
202239
dhashtab_ self = *(dhashtab_ *)h;
203240
R_xlen_t ret = ifnotfound;
204-
const struct hash_pair * cell = self.table + hash_index(key, self.multiplier) % self.size;
241+
const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1));
205242
const struct hash_pair * end = self.table + self.size - 1;
206243
for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
207244
if (cell->key == key) {

0 commit comments

Comments
 (0)