Skip to content

Commit b017013

Browse files
committed
use different mixes
1 parent e169d2c commit b017013

File tree

1 file changed

+30
-23
lines changed

1 file changed

+30
-23
lines changed

src/hash.c

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,15 @@ struct hash_tab {
1212
struct hash_pair *tb1, *tb2;
1313
};
1414

15-
// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio.
16-
static const double hash_multiplier1 = 0.618033988749895;
17-
static const double hash_multiplier2 = 0.316227766016838;
15+
// Fast integer hash multipliers based on golden ratio and other constants
16+
// 0x9e3779b9 is 2^32 * phi (golden ratio) for 32-bit mixing
17+
#if SIZE_MAX == UINT64_MAX
18+
static const uintptr_t hash_multiplier1 = 0x9e3779b97f4a7c15ULL;
19+
static const uintptr_t hash_multiplier2 = 0x85ebca77c2b2ae35ULL;
20+
#else
21+
static const uintptr_t hash_multiplier1 = 0x9e3779b9U;
22+
static const uintptr_t hash_multiplier2 = 0x85ebca77U;
23+
#endif
1824

1925
static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) {
2026
if (load_factor <= 0 || load_factor >= 1)
@@ -51,7 +57,7 @@ static hashtab * hash_create_(size_t n, double load_factor) {
5157
hashtab *ret = (hashtab *)R_alloc(sizeof(hashtab), 1);
5258
ret->size = n_full;
5359
ret->free = n;
54-
// To compute floor(size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size.
60+
// Multiply by size to get different hash functions when rehashing
5561
ret->multiplier1 = n_full * hash_multiplier1;
5662
ret->multiplier2 = n_full * hash_multiplier2;
5763
ret->tb1 = (struct hash_pair *)R_alloc(sizeof(struct hash_pair[n_full]), 1);
@@ -66,21 +72,22 @@ static hashtab * hash_create_(size_t n, double load_factor) {
6672

6773
hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
6874

69-
// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
70-
// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
71-
static R_INLINE size_t hash_index1(SEXP key, size_t mask) {
72-
// The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
73-
// Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
74-
// which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
75+
// Fast hash mixing using XOR-shift and integer multiplication
76+
static R_INLINE size_t hash_index1(SEXP key, uintptr_t multiplier) {
7577
uintptr_t h = (uintptr_t)key >> 4;
76-
return h & mask;
78+
// XOR folding to mix high bits into low bits
79+
h ^= h >> 16;
80+
h *= multiplier;
81+
h ^= h >> 13;
82+
return h;
7783
}
7884

79-
static R_INLINE size_t hash_index2(SEXP key, size_t mask) {
80-
// Use XOR folding to mix up the bits
81-
uintptr_t h = (uintptr_t)key >> 4;
82-
h ^= h >> 10;
83-
return h & mask;
85+
static R_INLINE size_t hash_index2(SEXP key, uintptr_t multiplier) {
86+
uintptr_t h = (uintptr_t)key >> 6;
87+
h ^= h >> 18;
88+
h *= multiplier;
89+
h ^= h >> 15;
90+
return h;
8491
}
8592

8693
void hash_rehash(hashtab *h) {
@@ -99,7 +106,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
99106
size_t mask = h->size - 1;
100107
struct hash_pair item = { .key = key, .value = value };
101108
for (size_t i = 0; i < max_relocations; ++i) {
102-
size_t idx1 = hash_index1(item.key, mask);
109+
size_t idx1 = hash_index1(item.key, h->multiplier1) & mask;
103110
if (!h->tb1[idx1].key) {
104111
h->tb1[idx1] = item;
105112
return;
@@ -108,7 +115,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
108115
h->tb1[idx1] = item;
109116
item = temp;
110117

111-
size_t idx2 = hash_index2(item.key, mask);
118+
size_t idx2 = hash_index2(item.key, h->multiplier2) & mask;
112119
if (!h->tb2[idx2].key) {
113120
h->tb2[idx2] = item;
114121
return;
@@ -124,10 +131,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
124131

125132
R_xlen_t hash_lookup(const hashtab *h, SEXP key, R_xlen_t ifnotfound) {
126133
size_t mask = h->size - 1;
127-
size_t idx1 = hash_index1(key, mask);
134+
size_t idx1 = hash_index1(key, h->multiplier1) & mask;
128135
if (h->tb1[idx1].key == key) return h->tb1[idx1].value;
129136

130-
size_t idx2 = hash_index2(key, mask);
137+
size_t idx2 = hash_index2(key, h->multiplier2) & mask;
131138
if (h->tb2[idx2].key == key) return h->tb2[idx2].value;
132139
// Should be impossible with a load factor below 1, but just in case:
133140
return ifnotfound; // # nocov
@@ -192,7 +199,7 @@ static void dhash_enlarge(dhashtab_ * self) {
192199
for (size_t i = 0; i < self->size; ++i) {
193200
if (!self->table[i].key) continue;
194201
for (size_t j = 0; j < new_size; ++j) {
195-
size_t ii = (hash_index1(self->table[i].key, new_mask) + j) & new_mask;
202+
size_t ii = (hash_index1(self->table[i].key, new_multiplier) + j) & new_mask;
196203
if (!new[ii].key) {
197204
new[ii] = (struct hash_pair){
198205
.key = self->table[i].key,
@@ -225,7 +232,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
225232
dhashtab_ * self = (dhashtab_ *)h;
226233
struct hash_pair *cell, *end;
227234
again:
228-
cell = self->table + hash_index1(key, self->size - 1);
235+
cell = self->table + (hash_index1(key, self->multiplier) & (self->size - 1));
229236
end = self->table + self->size - 1;
230237
for (size_t i = 0; i < self->size; ++i, cell = cell == end ? self->table : cell+1) {
231238
if (cell->key == key) {
@@ -251,7 +258,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
251258
#pragma omp flush // no locking or atomic access! this is bad
252259
dhashtab_ self = *(dhashtab_ *)h;
253260
R_xlen_t ret = ifnotfound;
254-
const struct hash_pair * cell = self.table + hash_index1(key, self.size - 1);
261+
const struct hash_pair * cell = self.table + (hash_index1(key, self.multiplier) & (self.size - 1));
255262
const struct hash_pair * end = self.table + self.size - 1;
256263
for (size_t i = 0; i < self.size; ++i, cell = cell == end ? self.table : cell+1) {
257264
if (cell->key == key) {

0 commit comments

Comments
 (0)