@@ -12,9 +12,15 @@ struct hash_tab {
1212 struct hash_pair * tb1 , * tb2 ;
1313};
1414
15- // TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio.
16- static const double hash_multiplier1 = 0.618033988749895 ;
17- static const double hash_multiplier2 = 0.316227766016838 ;
15+ // Fast integer hash multipliers based on golden ratio and other constants
16+ // 0x9e3779b9 is 2^32 * phi (golden ratio) for 32-bit mixing
17+ #if SIZE_MAX == UINT64_MAX
18+ static const uintptr_t hash_multiplier1 = 0x9e3779b97f4a7c15ULL ;
19+ static const uintptr_t hash_multiplier2 = 0x85ebca77c2b2ae35ULL ;
20+ #else
21+ static const uintptr_t hash_multiplier1 = 0x9e3779b9U ;
22+ static const uintptr_t hash_multiplier2 = 0x85ebca77U ;
23+ #endif
1824
1925static R_INLINE size_t get_full_size (size_t n_elements , double load_factor ) {
2026 if (load_factor <= 0 || load_factor >= 1 )
@@ -51,7 +57,7 @@ static hashtab * hash_create_(size_t n, double load_factor) {
5157 hashtab * ret = (hashtab * )R_alloc (sizeof (hashtab ), 1 );
5258 ret -> size = n_full ;
5359 ret -> free = n ;
54- // To compute floor( size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size.
60+ // Multiply by size to get different hash functions when rehashing
5561 ret -> multiplier1 = n_full * hash_multiplier1 ;
5662 ret -> multiplier2 = n_full * hash_multiplier2 ;
5763 ret -> tb1 = (struct hash_pair * )R_alloc (sizeof (struct hash_pair [n_full ]), 1 );
@@ -66,21 +72,22 @@ static hashtab * hash_create_(size_t n, double load_factor) {
6672
6773hashtab * hash_create (size_t n ) { return hash_create_ (n , .5 ); }
6874
69- // Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
70- // This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
71- static R_INLINE size_t hash_index1 (SEXP key , size_t mask ) {
72- // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
73- // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
74- // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
75+ // Fast hash mixing using XOR-shift and integer multiplication
76+ static R_INLINE size_t hash_index1 (SEXP key , uintptr_t multiplier ) {
7577 uintptr_t h = (uintptr_t )key >> 4 ;
76- return h & mask ;
78+ // XOR folding to mix high bits into low bits
79+ h ^= h >> 16 ;
80+ h *= multiplier ;
81+ h ^= h >> 13 ;
82+ return h ;
7783}
7884
79- static R_INLINE size_t hash_index2 (SEXP key , size_t mask ) {
80- // Use XOR folding to mix up the bits
81- uintptr_t h = (uintptr_t )key >> 4 ;
82- h ^= h >> 10 ;
83- return h & mask ;
85+ static R_INLINE size_t hash_index2 (SEXP key , uintptr_t multiplier ) {
86+ uintptr_t h = (uintptr_t )key >> 6 ;
87+ h ^= h >> 18 ;
88+ h *= multiplier ;
89+ h ^= h >> 15 ;
90+ return h ;
8491}
8592
8693void hash_rehash (hashtab * h ) {
@@ -99,7 +106,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
99106 size_t mask = h -> size - 1 ;
100107 struct hash_pair item = { .key = key , .value = value };
101108 for (size_t i = 0 ; i < max_relocations ; ++ i ) {
102- size_t idx1 = hash_index1 (item .key , mask ) ;
109+ size_t idx1 = hash_index1 (item .key , h -> multiplier1 ) & mask ;
103110 if (!h -> tb1 [idx1 ].key ) {
104111 h -> tb1 [idx1 ] = item ;
105112 return ;
@@ -108,7 +115,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
108115 h -> tb1 [idx1 ] = item ;
109116 item = temp ;
110117
111- size_t idx2 = hash_index2 (item .key , mask ) ;
118+ size_t idx2 = hash_index2 (item .key , h -> multiplier2 ) & mask ;
112119 if (!h -> tb2 [idx2 ].key ) {
113120 h -> tb2 [idx2 ] = item ;
114121 return ;
@@ -124,10 +131,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
124131
125132R_xlen_t hash_lookup (const hashtab * h , SEXP key , R_xlen_t ifnotfound ) {
126133 size_t mask = h -> size - 1 ;
127- size_t idx1 = hash_index1 (key , mask ) ;
134+ size_t idx1 = hash_index1 (key , h -> multiplier1 ) & mask ;
128135 if (h -> tb1 [idx1 ].key == key ) return h -> tb1 [idx1 ].value ;
129136
130- size_t idx2 = hash_index2 (key , mask ) ;
137+ size_t idx2 = hash_index2 (key , h -> multiplier2 ) & mask ;
131138 if (h -> tb2 [idx2 ].key == key ) return h -> tb2 [idx2 ].value ;
132139 // Should be impossible with a load factor below 1, but just in case:
133140 return ifnotfound ; // # nocov
@@ -192,7 +199,7 @@ static void dhash_enlarge(dhashtab_ * self) {
192199 for (size_t i = 0 ; i < self -> size ; ++ i ) {
193200 if (!self -> table [i ].key ) continue ;
194201 for (size_t j = 0 ; j < new_size ; ++ j ) {
195- size_t ii = (hash_index1 (self -> table [i ].key , new_mask ) + j ) & new_mask ;
202+ size_t ii = (hash_index1 (self -> table [i ].key , new_multiplier ) + j ) & new_mask ;
196203 if (!new [ii ].key ) {
197204 new [ii ] = (struct hash_pair ){
198205 .key = self -> table [i ].key ,
@@ -225,7 +232,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
225232 dhashtab_ * self = (dhashtab_ * )h ;
226233 struct hash_pair * cell , * end ;
227234again :
228- cell = self -> table + hash_index1 (key , self -> size - 1 );
235+ cell = self -> table + ( hash_index1 (key , self -> multiplier ) & ( self -> size - 1 ) );
229236 end = self -> table + self -> size - 1 ;
230237 for (size_t i = 0 ; i < self -> size ; ++ i , cell = cell == end ? self -> table : cell + 1 ) {
231238 if (cell -> key == key ) {
@@ -251,7 +258,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
251258 #pragma omp flush // no locking or atomic access! this is bad
252259 dhashtab_ self = * (dhashtab_ * )h ;
253260 R_xlen_t ret = ifnotfound ;
254- const struct hash_pair * cell = self .table + hash_index1 (key , self .size - 1 );
261+ const struct hash_pair * cell = self .table + ( hash_index1 (key , self .multiplier ) & ( self . size - 1 ) );
255262 const struct hash_pair * end = self .table + self .size - 1 ;
256263 for (size_t i = 0 ; i < self .size ; ++ i , cell = cell == end ? self .table : cell + 1 ) {
257264 if (cell -> key == key ) {
0 commit comments