@@ -68,15 +68,19 @@ hashtab * hash_create(size_t n) { return hash_create_(n, .5); }
6868
6969// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
7070// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
71- static R_INLINE size_t hash_index1 (SEXP key , uintptr_t multiplier ) {
71+ static R_INLINE size_t hash_index1 (SEXP key , size_t mask ) {
7272 // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
7373 // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
7474 // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
75- return ((((uintptr_t )key ) >> 4 ) & 0x0fffffff ) * multiplier ;
75+ uintptr_t h = (uintptr_t )key >> 4 ;
76+ return h & mask ;
7677}
7778
78- static R_INLINE size_t hash_index2 (SEXP key , uintptr_t multiplier ) {
79- return ((((uintptr_t )key ) >> 6 ) & 0x0fffffff ) * multiplier ;
79+ static R_INLINE size_t hash_index2 (SEXP key , size_t mask ) {
80+ // Use XOR folding to mix up the bits
81+ uintptr_t h = (uintptr_t )key >> 4 ;
82+ h ^= h >> 10 ;
83+ return h & mask ;
8084}
8185
8286void hash_rehash (hashtab * h ) {
@@ -95,7 +99,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
9599 size_t mask = h -> size - 1 ;
96100 struct hash_pair item = { .key = key , .value = value };
97101 for (size_t i = 0 ; i < max_relocations ; ++ i ) {
98- size_t idx1 = hash_index1 (item .key , h -> multiplier1 ) & mask ;
102+ size_t idx1 = hash_index1 (item .key , mask ) ;
99103 if (!h -> tb1 [idx1 ].key ) {
100104 h -> tb1 [idx1 ] = item ;
101105 return ;
@@ -104,7 +108,7 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
104108 h -> tb1 [idx1 ] = item ;
105109 item = temp ;
106110
107- size_t idx2 = hash_index2 (item .key , h -> multiplier2 ) & mask ;
111+ size_t idx2 = hash_index2 (item .key , mask ) ;
108112 if (!h -> tb2 [idx2 ].key ) {
109113 h -> tb2 [idx2 ] = item ;
110114 return ;
@@ -120,10 +124,10 @@ void hash_set(hashtab *h, SEXP key, R_xlen_t value) {
120124
121125R_xlen_t hash_lookup (const hashtab * h , SEXP key , R_xlen_t ifnotfound ) {
122126 size_t mask = h -> size - 1 ;
123- size_t idx1 = hash_index1 (key , h -> multiplier1 ) & mask ;
127+ size_t idx1 = hash_index1 (key , mask ) ;
124128 if (h -> tb1 [idx1 ].key == key ) return h -> tb1 [idx1 ].value ;
125129
126- size_t idx2 = hash_index2 (key , h -> multiplier2 ) & mask ;
130+ size_t idx2 = hash_index2 (key , mask ) ;
127131 if (h -> tb2 [idx2 ].key == key ) return h -> tb2 [idx2 ].value ;
128132 // Should be impossible with a load factor below 1, but just in case:
129133 return ifnotfound ; // # nocov
@@ -188,7 +192,7 @@ static void dhash_enlarge(dhashtab_ * self) {
188192 for (size_t i = 0 ; i < self -> size ; ++ i ) {
189193 if (!self -> table [i ].key ) continue ;
190194 for (size_t j = 0 ; j < new_size ; ++ j ) {
191- size_t ii = (hash_index1 (self -> table [i ].key , new_multiplier ) + j ) & new_mask ;
195+ size_t ii = (hash_index1 (self -> table [i ].key , new_mask ) + j ) & new_mask ;
192196 if (!new [ii ].key ) {
193197 new [ii ] = (struct hash_pair ){
194198 .key = self -> table [i ].key ,
@@ -221,7 +225,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
221225 dhashtab_ * self = (dhashtab_ * )h ;
222226 struct hash_pair * cell , * end ;
223227again :
224- cell = self -> table + ( hash_index1 (key , self -> multiplier ) & ( self -> size - 1 ) );
228+ cell = self -> table + hash_index1 (key , self -> size - 1 );
225229 end = self -> table + self -> size - 1 ;
226230 for (size_t i = 0 ; i < self -> size ; ++ i , cell = cell == end ? self -> table : cell + 1 ) {
227231 if (cell -> key == key ) {
@@ -247,7 +251,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
247251 #pragma omp flush // no locking or atomic access! this is bad
248252 dhashtab_ self = * (dhashtab_ * )h ;
249253 R_xlen_t ret = ifnotfound ;
250- const struct hash_pair * cell = self .table + ( hash_index1 (key , self .multiplier ) & ( self . size - 1 ) );
254+ const struct hash_pair * cell = self .table + hash_index1 (key , self .size - 1 );
251255 const struct hash_pair * end = self .table + self .size - 1 ;
252256 for (size_t i = 0 ; i < self .size ; ++ i , cell = cell == end ? self .table : cell + 1 ) {
253257 if (cell -> key == key ) {
0 commit comments