11#include "data.table.h"
22
3+ #define HASH_BITS (8 * sizeof(void*))
4+ #define HASH_MULTIPLIER ((sizeof(void*) == 8) ? 11400714819323198485ULL : 2654435769U)
35struct hash_pair {
46 SEXP key ;
57 R_xlen_t value ;
68};
79struct hash_tab {
810 size_t size , free ;
9- uintptr_t multiplier1 , multiplier2 ;
10- struct hash_pair * table ; // Single table for double hashing
11+ int shift ;
12+ struct hash_pair * table ;
1113};
1214
13- // 1/phi and sqrt(0.1)
14- static const double hash_multiplier1 = 0.618033988749895 ;
15- static const double hash_multiplier2 = 0.316227766016838 ;
16- static const double default_load_factor = .5 ;
15+ static const double default_load_factor = .75 ;
16+
17+ static R_INLINE size_t get_next_pow2 (size_t n ) {
18+ if (n <= 1 ) return 1 ;
19+ n -- ;
20+ n |= n >> 1 ;
21+ n |= n >> 2 ;
22+ n |= n >> 4 ;
23+ n |= n >> 8 ;
24+ n |= n >> 16 ;
25+ if (sizeof (size_t ) > 4 ) n |= n >> 32 ;
26+ return n + 1 ;
27+ }
1728
1829static R_INLINE size_t get_full_size (size_t n_elements , double load_factor ) {
1930 if (load_factor <= 0 || load_factor >= 1 )
@@ -25,13 +36,11 @@ static R_INLINE size_t get_full_size(size_t n_elements, double load_factor) {
2536 __func__ , "n=%zu / load_factor=%g would overflow size_t" ,
2637 n_elements , load_factor
2738 );
28- size_t min_size = ceil ( n_elements / load_factor );
39+ size_t min_size = ( size_t )( n_elements / load_factor ) + 1 ;
2940 // Round up to next power of 2 for fast modulo using bitwise AND
30- size_t pow2 = 1 ;
31- while (pow2 < min_size ) {
32- if (pow2 > SIZE_MAX / 2 )
41+ size_t pow2 = get_next_pow2 (min_size );
42+ if (pow2 == 0 ) {
3343 internal_error (__func__ , "size %zu would overflow size_t" , min_size ); // # nocov
34- pow2 *= 2 ;
3544 }
3645 return pow2 ;
3746}
@@ -40,35 +49,21 @@ static hashtab * hash_create_(size_t n, double load_factor) {
4049 size_t n_full = get_full_size (n , load_factor );
4150 hashtab * ret = (hashtab * )R_alloc (sizeof (hashtab ), 1 );
4251 ret -> size = n_full ;
43- ret -> free = n ;
44- // Multiply by size to get different hash functions when rehashing
45- ret -> multiplier1 = n_full * hash_multiplier1 ;
46- ret -> multiplier2 = n_full * hash_multiplier2 ;
52+ ret -> free = (size_t )(n_full * load_factor );
53+
54+ int k = 0 ;
55+ while ((1ULL << k ) < n_full ) k ++ ;
56+ ret -> shift = HASH_BITS - k ;
4757 ret -> table = (struct hash_pair * )R_alloc (n_full , sizeof (* ret -> table ));
4858 // No valid SEXP is a null pointer, so it's a safe marker for empty cells.
49- for (size_t i = 0 ; i < n_full ; ++ i ) {
50- ret -> table [i ].key = NULL ;
51- }
59+ memset (ret -> table , 0 , n_full * sizeof (struct hash_pair ));
5260 return ret ;
5361}
5462
5563hashtab * hash_create (size_t n ) { return hash_create_ (n , default_load_factor ); }
5664
57- // double hashing
58- static R_INLINE size_t hash_index1 (SEXP key , uintptr_t multiplier ) {
59- // The 4 lowest bits of the pointer are probably zeroes because a typical SEXPREC exceeds 16 bytes in size.
60- // Since SEXPRECs are heap-allocated, they are subject to malloc() alignment guarantees,
61- // which is at least 4 bytes on 32-bit platforms, most likely more than 8 bytes.
62- return ((((uintptr_t )key ) >> 4 ) & 0x0fffffff ) * multiplier ;
63- }
64-
65- static R_INLINE size_t hash_index2 (SEXP key , uintptr_t multiplier ) {
66- // For double hashing, we need a different hash that's coprime with table size.
67- // We use higher-order bits that hash_index1 mostly ignores, and ensure
68- // the result is always odd (coprime with power-of-2 table sizes).
69- uintptr_t ptr = (uintptr_t )key ;
70- ptr = (ptr >> 12 ) | (ptr << (sizeof (uintptr_t ) * 8 - 12 ));
71- return ((ptr & 0x0fffffff ) * multiplier ) | 1 ;
65+ static R_INLINE size_t hash_index (SEXP key , int shift ) {
66+ return (size_t )((uintptr_t )key * HASH_MULTIPLIER ) >> shift ;
7267}
7368
7469static R_INLINE hashtab * hash_rehash (const hashtab * h ) {
@@ -83,17 +78,10 @@ static R_INLINE hashtab *hash_rehash(const hashtab *h) {
8378
8479static bool hash_set_ (hashtab * h , SEXP key , R_xlen_t value ) {
8580 size_t mask = h -> size - 1 ;
86- size_t h1 = hash_index1 (key , h -> multiplier1 ) & mask ;
87- size_t h2 = hash_index2 (key , h -> multiplier2 ) & mask ;
88-
89- if (h2 == 0 ) h2 = 1 ;
90- else if ((h2 & 1 ) == 0 ) h2 |= 1 ;
91-
92- for (size_t i = 0 ; i < h -> size ; ++ i ) {
93- size_t idx = (h1 + i * h2 ) & mask ;
94-
81+ size_t idx = hash_index (key , h -> shift );
82+ while (true) {
9583 if (!h -> table [idx ].key ) {
96- // Empty slot found
84+ if ( h -> free == 0 ) return false; // table full -> need rehash
9785 h -> table [idx ].key = key ;
9886 h -> table [idx ].value = value ;
9987 h -> free -- ;
@@ -104,10 +92,8 @@ static bool hash_set_(hashtab *h, SEXP key, R_xlen_t value) {
10492 h -> table [idx ].value = value ;
10593 return true;
10694 }
95+ idx = (idx + 1 ) & mask ;
10796 }
108-
109- // need to rehash
110- return false;
11197}
11298
11399void hash_set (hashtab * h , SEXP key , R_xlen_t value ) {
@@ -127,17 +113,10 @@ hashtab *hash_set_shared(hashtab *h, SEXP key, R_xlen_t value) {
127113
128114R_xlen_t hash_lookup (const hashtab * h , SEXP key , R_xlen_t ifnotfound ) {
129115 size_t mask = h -> size - 1 ;
130- size_t h1 = hash_index1 (key , h -> multiplier1 ) & mask ;
131- size_t h2 = hash_index2 (key , h -> multiplier2 ) & mask ;
132-
133- if (h2 == 0 ) h2 = 1 ;
134- else if ((h2 & 1 ) == 0 ) h2 |= 1 ;
135-
136- for (size_t i = 0 ; i < h -> size ; ++ i ) {
137- size_t idx = (h1 + i * h2 ) & mask ;
138- if (!h -> table [idx ].key ) return ifnotfound ;
116+ size_t idx = hash_index (key , h -> shift );
117+ while (true) {
139118 if (h -> table [idx ].key == key ) return h -> table [idx ].value ;
119+ if (h -> table [idx ].key == NULL ) return ifnotfound ;
120+ idx = (idx + 1 ) & mask ;
140121 }
141-
142- return ifnotfound ; // # nocov
143122}
0 commit comments