@@ -17,6 +17,26 @@ import dmd.root.rmem, dmd.root.hash;
1717private enum POOL_BITS = 12 ;
1818private enum POOL_SIZE = (1U << POOL_BITS );
1919
20+ // Table of prime numbers to use for table sizes
21+ // Using prime numbers reduces clustering in hash tables
22+ private immutable size_t [] primeNumbers = [
23+ 53 , 97 , 193 , 389 , 769 , 1543 , 3079 , 6151 , 12289 , 24593 , 49157 , 98317 ,
24+ 196613 , 393241 , 786433 , 1572869 , 3145739 , 6291469 , 12582917 , 25165843 ,
25+ 50331653 , 100663319 , 201326611 , 402653189 , 805306457 , 1610612741
26+ ];
27+
28+ // Find the next prime number larger than val
29+ private size_t nextPrime (size_t val) @nogc nothrow pure @safe
30+ {
31+ foreach (prime; primeNumbers)
32+ if (prime > val)
33+ return prime;
34+
35+ // If we need something even larger, just use power of 2
36+ // (though this is unlikely in normal compilation scenarios)
37+ return nextpow2 (val);
38+ }
39+
2040/*
2141Returns the smallest integer power of 2 larger than val.
2242if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an
@@ -38,8 +58,11 @@ unittest
3858 // note: nextpow2((1UL << 63) + 1) results in an endless loop
3959}
4060
41- private enum loadFactorNumerator = 8 ;
42- private enum loadFactorDenominator = 10 ; // for a load factor of 0.8
61+ // Optimized load factors for better performance
62+ // Initial lower load factor for better initial performance, then higher load factor for space efficiency
63+ private enum initialLoadFactorNumerator = 6 ; // 0.6 initial load factor
64+ private enum normalLoadFactorNumerator = 7 ; // 0.7 normal load factor
65+ private enum loadFactorDenominator = 10 ;
4366
4467private struct StringEntry
4568{
@@ -90,18 +113,20 @@ private:
90113 size_t nfill;
91114 size_t count;
92115 size_t countTrigger; // amount which will trigger growing the table
116+ bool isInitialSize; // Is this the initial table size
93117
94118public :
95119 void _init (size_t size = 0 ) nothrow pure
96120 {
97- size = nextpow2(( size * loadFactorDenominator) / loadFactorNumerator);
98- if ( size < 32 )
99- size = 32 ;
121+ // Start with a prime size for better hash distribution
122+ size = size ? nextPrime(( size * loadFactorDenominator) / initialLoadFactorNumerator) : 53 ;
123+
100124 table = (cast (StringEntry* )mem.xcalloc(size, (table[0 ]).sizeof))[0 .. size];
101- countTrigger = (table.length * loadFactorNumerator ) / loadFactorDenominator;
125+ countTrigger = (table.length * initialLoadFactorNumerator ) / loadFactorDenominator;
102126 pools = null ;
103127 nfill = 0 ;
104128 count = 0 ;
129+ isInitialSize = true ;
105130 }
106131
107132 void reset (size_t size = 0 ) nothrow pure
@@ -131,7 +156,6 @@ public:
131156 {
132157 const (size_t ) hash = calcHash(str);
133158 const (size_t ) i = findSlot(hash, str);
134- // printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null);
135159 return getValue (table[i].vptr);
136160 }
137161
@@ -168,7 +192,6 @@ public:
168192 }
169193 table[i].hash = hash;
170194 table[i].vptr = allocValue(str, value);
171- // printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
172195 return getValue (table[i].vptr);
173196 }
174197
@@ -192,7 +215,6 @@ public:
192215 table[i].hash = hash;
193216 table[i].vptr = allocValue(str, T.init);
194217 }
195- // printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
196218 return getValue (table[i].vptr);
197219 }
198220
@@ -281,34 +303,73 @@ private:
281303 return cast (inout (StringValue! T)* )&pools[idx][off];
282304 }
283305
284- size_t findSlot (hash_t hash, scope const (char )[] str) const @nogc nothrow pure
306+ // FNV-1a hash function - much better distribution for string data
307+ private uint calcHash (scope const (char )[] str) const @nogc nothrow pure
308+ {
309+ enum uint FNV_prime = 0x01000193 ;
310+ enum uint FNV_offset_basis = 0x811c9dc5 ;
311+
312+ uint hash = FNV_offset_basis;
313+
314+ foreach (char c; str)
315+ {
316+ hash ^= c;
317+ hash *= FNV_prime;
318+ }
319+
320+ return hash;
321+ }
322+
323+ size_t findSlot (uint hash, scope const (char )[] str) const @nogc nothrow pure
285324 {
286- // quadratic probing using triangular numbers
287- // https://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
288- for (size_t i = hash & (table.length - 1 ), j = 1 ;; ++ j)
325+ // Linear probing - more cache friendly than quadratic probing
326+ // for short probe sequences (which should be the common case with a good hash function)
327+ const size_t mask = table.length - 1 ;
328+ size_t i = hash % table.length; // Use modulo for prime sized tables
329+ size_t step = 1 ;
330+
331+ while (true )
289332 {
290333 const (StringValue! T)* sv;
291334 auto vptr = table[i].vptr;
292- if (! vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0 )
335+
336+ if (! vptr)
293337 return i;
294- i = (i + j) & (table.length - 1 );
338+
339+ if (table[i].hash == hash)
340+ {
341+ sv = getValue(vptr);
342+ if (sv.length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0 )
343+ return i;
344+ }
345+
346+ i = (i + step) % table.length;
295347 }
296348 }
297349
298350 void grow () nothrow pure
299351 {
300352 const odim = table.length;
301353 auto otab = table;
302- const ndim = table.length * 2 ;
303- countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator;
354+
355+ // Use a different load factor after the initial growth
356+ const loadFactor = isInitialSize ? initialLoadFactorNumerator : normalLoadFactorNumerator;
357+ isInitialSize = false ;
358+
359+ // Use next prime number size for better distribution
360+ const ndim = nextPrime(table.length * 2 );
361+ countTrigger = (ndim * loadFactor) / loadFactorDenominator;
362+
304363 table = (cast (StringEntry* )mem.xcalloc_noscan(ndim, (table[0 ]).sizeof))[0 .. ndim];
364+
305365 foreach (const se; otab[0 .. odim])
306366 {
307367 if (! se.vptr)
308368 continue ;
309369 const sv = getValue(se.vptr);
310370 table[findSlot(se.hash, sv.toString())] = se;
311371 }
372+
312373 mem.xfree(otab.ptr);
313374 }
314375}
0 commit comments