@@ -8,12 +8,13 @@ struct hash_pair {
88};
99struct hash_tab {
1010 size_t size , free ;
11- uintptr_t multiplier ;
12- struct hash_pair tb [] ;
11+ uintptr_t multiplier1 , multiplier2 ;
12+ struct hash_pair * tb1 , * tb2 ;
1313};
1414
1515// TAOCP vol. 3, section 6.4: for multiplication hashing, use A ~ 1/phi, the golden ratio.
16- static const double hash_multiplier = 0.618033988749895 ;
16+ static const double hash_multiplier1 = 0.618033988749895 ;
17+ static const double hash_multiplier2 = 0.316227766016838 ;
1718
1819static R_INLINE size_t get_full_size (size_t n_elements , double load_factor ) {
1920 if (load_factor <= 0 || load_factor >= 1 )
@@ -39,85 +40,73 @@ static hashtab * hash_create_(size_t n, double load_factor) {
3940 __func__ , "n=%zu with load_factor=%g would overflow total allocation size" ,
4041 n , load_factor
4142 );
42- hashtab * ret = (hashtab * )R_alloc (sizeof (hashtab ) + sizeof ( struct hash_pair [ n_full ] ), 1 );
43+ hashtab * ret = (hashtab * )R_alloc (sizeof (hashtab ), 1 );
4344 ret -> size = n_full ;
4445 ret -> free = n ;
4546 // To compute floor(size * (A * key % 1)) in integer arithmetic with A < 1, use ((size * A) * key) % size.
46- ret -> multiplier = n_full * hash_multiplier ;
47+ ret -> multiplier1 = n_full * hash_multiplier1 ;
48+ ret -> multiplier2 = n_full * hash_multiplier2 ;
49+ ret -> tb1 = (struct hash_pair * )R_alloc (sizeof (struct hash_pair [n_full ]), 1 );
50+ ret -> tb2 = (struct hash_pair * )R_alloc (sizeof (struct hash_pair [n_full ]), 1 );
4751 // No valid SEXP is a null pointer, so it's a safe marker for empty cells.
48- for (size_t i = 0 ; i < n_full ; ++ i )
49- ret -> tb [i ].key = NULL ;
52+ for (size_t i = 0 ; i < n_full ; ++ i ) {
53+ ret -> tb1 [i ].key = NULL ;
54+ ret -> tb2 [i ].key = NULL ;
55+ }
5056 return ret ;
5157}
5258
5359hashtab * hash_create (size_t n ) { return hash_create_ (n , .5 ); }
5460
5561// Hashing for an open addressing hash table. See Cormen et al., Introduction to Algorithms, 3rd ed., section 11.4.
5662// This is far from perfect. Make size a prime or a power of two and you'll be able to use double hashing.
57- static R_INLINE size_t hash_index (SEXP key , uintptr_t multiplier ) {
63+ static R_INLINE size_t hash_index1 (SEXP key , uintptr_t multiplier ) {
5864 // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
5965 // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
6066 // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
6167 return ((((uintptr_t )key ) >> 4 ) & 0x0fffffff ) * multiplier ;
6268}
6369
64- void hash_set (hashtab * h , SEXP key , R_xlen_t value ) {
65- struct hash_pair * cell = h -> tb + hash_index (key , h -> multiplier ) % h -> size , * end = h -> tb + h -> size - 1 ;
66- for (size_t i = 0 ; i < h -> size ; ++ i , cell = cell == end ? h -> tb : cell + 1 ) {
67- if (cell -> key == key ) {
68- cell -> value = value ;
70+ static R_INLINE size_t hash_index2 (SEXP key , uintptr_t multiplier ) {
71+ return ((((uintptr_t )key ) >> 6 ) & 0x0fffffff ) * multiplier ;
72+ }
73+
74+
75+ void hash_set (hashtab * h , SEXP key , R_xlen_t value ) {
76+ size_t max_relocations = h -> size ;
77+ struct hash_pair item = { .key = key , .value = value };
78+ for (size_t i = 0 ; i < max_relocations ; ++ i ) {
79+ size_t idx1 = hash_index1 (item .key , h -> multiplier1 ) % h -> size ;
80+ if (!h -> tb1 [idx1 ].key ) {
81+ h -> tb1 [idx1 ] = item ;
6982 return ;
70- } else if (!cell -> key ) {
71- if (!h -> free ) internal_error (
72- __func__ , "no free slots left (full size=%zu)" , h -> size
73- );
74- -- h -> free ;
75- * cell = (struct hash_pair ){.key = key , .value = value };
83+ }
84+ struct hash_pair temp = h -> tb1 [idx1 ];
85+ h -> tb1 [idx1 ] = item ;
86+ item = temp ;
87+
88+ size_t idx2 = hash_index2 (item .key , h -> multiplier2 ) % h -> size ;
89+ if (!h -> tb2 [idx2 ].key ) {
90+ h -> tb2 [idx2 ] = item ;
7691 return ;
7792 }
93+ temp = h -> tb2 [idx2 ];
94+ h -> tb2 [idx2 ] = item ;
95+ item = temp ;
7896 }
79- internal_error ( // # nocov
80- __func__ , "did not find a free slot for key %p; size=%zu, free=%zu" ,
81- (void * )key , h -> size , h -> free
82- );
97+ internal_error (__func__ , "Cuckoo hashing cycle detected, rehash needed" );
8398}
8499
85- R_xlen_t hash_lookup (const hashtab * h , SEXP key , R_xlen_t ifnotfound ) {
86- const struct hash_pair * cell = h -> tb + hash_index (key , h -> multiplier ) % h -> size , * end = h -> tb + h -> size - 1 ;
87- for (size_t i = 0 ; i < h -> size ; ++ i , cell = cell == end ? h -> tb : cell + 1 ) {
88- if (cell -> key == key ) {
89- return cell -> value ;
90- } else if (!cell -> key ) {
91- return ifnotfound ;
92- }
93- }
100+ R_xlen_t hash_lookup (const hashtab * h , SEXP key , R_xlen_t ifnotfound ) {
101+ size_t idx1 = hash_index1 (key , h -> multiplier1 ) % h -> size ;
102+ if (h -> tb1 [idx1 ].key == key ) return h -> tb1 [idx1 ].value ;
103+
104+ size_t idx2 = hash_index2 (key , h -> multiplier2 ) % h -> size ;
105+ if (h -> tb2 [idx2 ].key == key ) return h -> tb2 [idx2 ].value ;
94106 // Should be impossible with a load factor below 1, but just in case:
95107 return ifnotfound ; // # nocov
96108}
97109
98- R_xlen_t hash_lookup_or_insert (hashtab * h , SEXP key , R_xlen_t value ) {
99- struct hash_pair * cell = h -> tb + hash_index (key , h -> multiplier ) % h -> size , * end = h -> tb + h -> size - 1 ;
100- for (size_t i = 0 ; i < h -> size ; ++ i , cell = (cell == end ? h -> tb : cell + 1 )) {
101- if (cell -> key == key ) {
102- return cell -> value ; // found key, only lookup, no insert
103- } else if (!cell -> key ) {
104- if (!h -> free ) internal_error (
105- __func__ , "no free slots left (full size=%zu)" , h -> size
106- );
107- -- h -> free ;
108- * cell = (struct hash_pair ){.key = key , .value = value };
109- return value ; // insert here
110- }
111- }
112-
113- internal_error ( // # nocov
114- __func__ , "did not find a free slot for key %p; size=%zu, free=%zu" ,
115- (void * )key , h -> size , h -> free
116- );
117- // Should be impossible, but just in case:
118- return value ;
119- }
120-
121110typedef struct dhashtab_ {
122111 dhashtab public ; // must be at offset 0
123112 size_t size , used , limit ;
@@ -158,7 +147,7 @@ static dhashtab * dhash_create_(size_t n, double load_factor) {
158147 self -> table = dhash_allocate (n_full );
159148 self -> size = n_full ;
160149 self -> limit = n ;
161- self -> multiplier = n_full * hash_multiplier ;
150+ self -> multiplier = n_full * hash_multiplier1 ;
162151 // this is the last time we're allowed to set the table parts piece by piece
163152
164153 UNPROTECT (1 );
@@ -172,10 +161,10 @@ static void dhash_enlarge(dhashtab_ * self) {
172161 internal_error (__func__ , "doubling %zu elements would overflow size_t" , self -> size ); // # nocov
173162 size_t new_size = self -> size * 2 ;
174163 struct hash_pair * new = dhash_allocate (new_size );
175- uintptr_t new_multiplier = new_size * hash_multiplier ;
164+ uintptr_t new_multiplier = new_size * hash_multiplier1 ;
176165 for (size_t i = 0 ; i < self -> size ; ++ i ) {
177166 for (size_t j = 0 ; j < new_size ; ++ j ) {
178- size_t ii = (hash_index (self -> table [i ].key , new_multiplier ) + j ) % new_size ;
167+ size_t ii = (hash_index1 (self -> table [i ].key , new_multiplier ) + j ) % new_size ;
179168 if (!new [ii ].key ) {
180169 new [ii ] = (struct hash_pair ){
181170 .key = self -> table [i ].key ,
@@ -208,7 +197,7 @@ void dhash_set(dhashtab * h, SEXP key, R_xlen_t value) {
208197 dhashtab_ * self = (dhashtab_ * )h ;
209198 struct hash_pair * cell , * end ;
210199again :
211- cell = self -> table + hash_index (key , self -> multiplier ) % self -> size ;
200+ cell = self -> table + hash_index1 (key , self -> multiplier ) % self -> size ;
212201 end = self -> table + self -> size - 1 ;
213202 for (size_t i = 0 ; i < self -> size ; ++ i , cell = cell == end ? self -> table : cell + 1 ) {
214203 if (cell -> key == key ) {
@@ -234,7 +223,7 @@ R_xlen_t dhash_lookup(dhashtab * h, SEXP key, R_xlen_t ifnotfound) {
234223 #pragma omp flush // no locking or atomic access! this is bad
235224 dhashtab_ self = * (dhashtab_ * )h ;
236225 R_xlen_t ret = ifnotfound ;
237- const struct hash_pair * cell = self .table + hash_index (key , self .multiplier ) % self .size ;
226+ const struct hash_pair * cell = self .table + hash_index1 (key , self .multiplier ) % self .size ;
238227 const struct hash_pair * end = self .table + self .size - 1 ;
239228 for (size_t i = 0 ; i < self .size ; ++ i , cell = cell == end ? self .table : cell + 1 ) {
240229 if (cell -> key == key ) {
0 commit comments