Skip to content

Commit 7d4b067

Browse files
committed
use power of 2 and mask instead of modulo
1 parent 09b3725 commit 7d4b067

File tree

1 file changed

+24
-12
lines changed

1 file changed

+24
-12
lines changed

src/hash.c

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,15 @@ static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) {
2626
__func__, "n=%zu / load_factor=%g would overflow size_t",
2727
n_elements, load_factor
2828
);
29-
return ceil(n_elements / load_factor);
29+
size_t min_size = ceil(n_elements / load_factor);
30+
// Round up to next power of 2 for fast modulo using bitwise AND
31+
size_t pow2 = 1;
32+
while (pow2 < min_size) {
33+
if (pow2 > SIZE_MAX / 2)
34+
internal_error(__func__, "size %zu would overflow size_t", min_size); // # nocov
35+
pow2 *= 2;
36+
}
37+
return pow2;
3038
}
3139

3240
static hashtab * hash_create_(size_t n, double load_factor) {
@@ -83,19 +91,20 @@ void hash_rehash(hashtab *h) {
8391
}
8492

8593
void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
86-
size_t max_relocations = h->size;
94+
size_t max_relocations = h->size;
95+
size_t mask = h->size - 1;
8796
struct hash_pair item = { .key = key, .value = value };
8897
for (size_t i = 0; i < max_relocations; ++i) {
89-
size_t idx1 = hash_index1(item.key, h->multiplier1) % h->size;
98+
size_t idx1 = hash_index1(item.key, h->multiplier1) & mask;
9099
if (!h->tb1[idx1].key) {
91100
h->tb1[idx1] = item;
92101
return;
93102
}
94103
struct hash_pair temp = h->tb1[idx1];
95104
h->tb1[idx1] = item;
96-
item = temp;
97-
98-
size_t idx2 = hash_index2(item.key, h->multiplier2) % h->size;
105+
item = temp;
106+
107+
size_t idx2 = hash_index2(item.key, h->multiplier2) & mask;
99108
if (!h->tb2[idx2].key) {
100109
h->tb2[idx2] = item;
101110
return;
@@ -110,10 +119,11 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
110119
}
111120

112121
R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
113-
size_t idx1 = hash_index1(key, h->multiplier1) % h->size;
122+
size_t mask = h->size - 1;
123+
size_t idx1 = hash_index1(key, h->multiplier1) & mask;
114124
if (h->tb1[idx1].key == key) return h->tb1[idx1].value;
115-
116-
size_t idx2 = hash_index2(key, h->multiplier2) % h->size;
125+
126+
size_t idx2 = hash_index2(key, h->multiplier2) & mask;
117127
if (h->tb2[idx2].key == key) return h->tb2[idx2].value;
118128
// Should be impossible with a load factor below 1, but just in case:
119129
return ifnotfound; // # nocov
@@ -172,11 +182,13 @@ static void dhash_enlarge(dhashtab_ * self) {
172182
if (self->size > SIZE_MAX / 2)
173183
internal_error(__func__, "doubling %zu elements would overflow size_t", self->size); // # nocov
174184
size_t new_size = self->size * 2;
185+
size_t new_mask = new_size - 1;
175186
struct hash_pair * new = dhash_allocate(new_size);
176187
uintptr_t new_multiplier = new_size * hash_multiplier1;
177188
for (size_t i = 0; i < self->size; ++i) {
189+
if (!self->table[i].key) continue;
178190
for (size_t j = 0; j < new_size; ++j) {
179-
size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) % new_size;
191+
size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask;
180192
if (!new[ii].key) {
181193
new[ii] = (struct hash_pair){
182194
.key = self->table[i].key,
@@ -209,7 +221,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
209221
dhashtab_ * self = (dhashtab_ *)h;
210222
struct hash_pair *cell, *end;
211223
again:
212-
cell = self->table + hash_index1(key, self->multiplier) % self->size;
224+
cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1));
213225
end = self->table + self->size - 1;
214226
for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
215227
if (cell->key == key) {
@@ -235,7 +247,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
235247
#pragma omp flush // no locking or atomic access! this is bad
236248
dhashtab_ self = *(dhashtab_ *)h;
237249
R_xlen_t ret = ifnotfound;
238-
const struct hash_pair * cell = self.table + hash_index1(key, self.multiplier) % self.size;
250+
const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1));
239251
const struct hash_pair * end = self.table + self.size - 1;
240252
for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
241253
if (cell->key == key) {

0 commit comments

Comments
 (0)