Skip to content

Commit e169d2c

Browse files
committed
mix instead of multiplication
1 parent 7d4b067 commit e169d2c

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

src/hash.c

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,19 @@ hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
6868

6969
// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
7070
// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
71-
static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) {
71+
static R_INLINE size_t hash_index1(SEXP key, size_t mask) {
7272
// The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
7373
// Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
7474
// which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
75-
return ((((uintptr_t)key) >> 4) & 0x0fffffff) * multiplier;
75+
uintptr_t h = (uintptr_t)key >> 4;
76+
return h & mask;
7677
}
7778

78-
static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) {
79-
return ((((uintptr_t)key) >> 6) & 0x0fffffff) * multiplier;
79+
static R_INLINE size_t hash_index2(SEXP key, size_t mask) {
80+
// Use XOR folding to mix up the bits
81+
uintptr_t h = (uintptr_t)key >> 4;
82+
h ^= h >> 10;
83+
return h & mask;
8084
}
8185

8286
void hash_rehash(hashtab *h) {
@@ -95,7 +99,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
9599
size_t mask = h->size - 1;
96100
struct hash_pair item = { .key = key, .value = value };
97101
for (size_t i = 0; i < max_relocations; ++i) {
98-
size_t idx1 = hash_index1(item.key, h->multiplier1) & mask;
102+
size_t idx1 = hash_index1(item.key, mask);
99103
if (!h->tb1[idx1].key) {
100104
h->tb1[idx1] = item;
101105
return;
@@ -104,7 +108,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
104108
h->tb1[idx1] = item;
105109
item = temp;
106110

107-
size_t idx2 = hash_index2(item.key, h->multiplier2) & mask;
111+
size_t idx2 = hash_index2(item.key, mask);
108112
if (!h->tb2[idx2].key) {
109113
h->tb2[idx2] = item;
110114
return;
@@ -120,10 +124,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
120124

121125
R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
122126
size_t mask = h->size - 1;
123-
size_t idx1 = hash_index1(key, h->multiplier1) & mask;
127+
size_t idx1 = hash_index1(key, mask);
124128
if (h->tb1[idx1].key == key) return h->tb1[idx1].value;
125129

126-
size_t idx2 = hash_index2(key, h->multiplier2) & mask;
130+
size_t idx2 = hash_index2(key, mask);
127131
if (h->tb2[idx2].key == key) return h->tb2[idx2].value;
128132
// Should be impossible with a load factor below 1, but just in case:
129133
return ifnotfound; // # nocov
@@ -188,7 +192,7 @@ static void dhash_enlarge(dhashtab_ * self) {
188192
for (size_t i = 0; i < self->size; ++i) {
189193
if (!self->table[i].key) continue;
190194
for (size_t j = 0; j < new_size; ++j) {
191-
size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask;
195+
size_t ii = (hash_index1(self->table[i].key, new_mask) + j) & new_mask;
192196
if (!new[ii].key) {
193197
new[ii] = (struct hash_pair){
194198
.key = self->table[i].key,
@@ -221,7 +225,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
221225
dhashtab_ * self = (dhashtab_ *)h;
222226
struct hash_pair *cell, *end;
223227
again:
224-
cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1));
228+
cell = self->table + hash_index1(key, self->size - 1);
225229
end = self->table + self->size - 1;
226230
for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
227231
if (cell->key == key) {
@@ -247,7 +251,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
247251
#pragma omp flush // no locking or atomic access! this is bad
248252
dhashtab_ self = *(dhashtab_ *)h;
249253
R_xlen_t ret = ifnotfound;
250-
const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1));
254+
const struct hash_pair * cell = self.table + hash_index1(key, self.size - 1);
251255
const struct hash_pair * end = self.table + self.size - 1;
252256
for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
253257
if (cell->key == key) {

0 commit comments

Comments
 (0)