Skip to content

Commit f3ec073

Browse files
committed
Test
1 parent ce15cb9 commit f3ec073

File tree

1 file changed

+78
-17
lines changed

1 file changed

+78
-17
lines changed

compiler/src/dmd/root/stringtable.d

Lines changed: 78 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,26 @@ import dmd.root.rmem, dmd.root.hash;
1717
private enum POOL_BITS = 12;
1818
private enum POOL_SIZE = (1U << POOL_BITS);
1919

20+
// Table of prime numbers to use for table sizes
21+
// Using prime numbers reduces clustering in hash tables
22+
private immutable size_t[] primeNumbers = [
23+
53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317,
24+
196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843,
25+
50331653, 100663319, 201326611, 402653189, 805306457, 1610612741
26+
];
27+
28+
// Find the next prime number larger than val
29+
private size_t nextPrime(size_t val) @nogc nothrow pure @safe
30+
{
31+
foreach (prime; primeNumbers)
32+
if (prime > val)
33+
return prime;
34+
35+
// If we need something even larger, just use power of 2
36+
// (though this is unlikely in normal compilation scenarios)
37+
return nextpow2(val);
38+
}
39+
2040
/*
2141
Returns the smallest integer power of 2 larger than val.
2242
if val > 2^^63 on 64-bit targets or val > 2^^31 on 32-bit targets it enters an
@@ -38,8 +58,11 @@ unittest
3858
// note: nextpow2((1UL << 63) + 1) results in an endless loop
3959
}
4060

41-
private enum loadFactorNumerator = 8;
42-
private enum loadFactorDenominator = 10; // for a load factor of 0.8
61+
// Optimized load factors for better performance
62+
// Initial lower load factor for better initial performance, then higher load factor for space efficiency
63+
private enum initialLoadFactorNumerator = 6; // 0.6 initial load factor
64+
private enum normalLoadFactorNumerator = 7; // 0.7 normal load factor
65+
private enum loadFactorDenominator = 10;
4366

4467
private struct StringEntry
4568
{
@@ -90,18 +113,20 @@ private:
90113
size_t nfill;
91114
size_t count;
92115
size_t countTrigger; // amount which will trigger growing the table
116+
bool isInitialSize; // Is this the initial table size
93117

94118
public:
95119
void _init(size_t size = 0) nothrow pure
96120
{
97-
size = nextpow2((size * loadFactorDenominator) / loadFactorNumerator);
98-
if (size < 32)
99-
size = 32;
121+
// Start with a prime size for better hash distribution
122+
size = size ? nextPrime((size * loadFactorDenominator) / initialLoadFactorNumerator) : 53;
123+
100124
table = (cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof))[0 .. size];
101-
countTrigger = (table.length * loadFactorNumerator) / loadFactorDenominator;
125+
countTrigger = (table.length * initialLoadFactorNumerator) / loadFactorDenominator;
102126
pools = null;
103127
nfill = 0;
104128
count = 0;
129+
isInitialSize = true;
105130
}
106131

107132
void reset(size_t size = 0) nothrow pure
@@ -131,7 +156,6 @@ public:
131156
{
132157
const(size_t) hash = calcHash(str);
133158
const(size_t) i = findSlot(hash, str);
134-
// printf("lookup %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: null);
135159
return getValue(table[i].vptr);
136160
}
137161

@@ -168,7 +192,6 @@ public:
168192
}
169193
table[i].hash = hash;
170194
table[i].vptr = allocValue(str, value);
171-
// printf("insert %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
172195
return getValue(table[i].vptr);
173196
}
174197

@@ -192,7 +215,6 @@ public:
192215
table[i].hash = hash;
193216
table[i].vptr = allocValue(str, T.init);
194217
}
195-
// printf("update %.*s %p\n", cast(int)str.length, str.ptr, table[i].value ?: NULL);
196218
return getValue(table[i].vptr);
197219
}
198220

@@ -281,34 +303,73 @@ private:
281303
return cast(inout(StringValue!T)*)&pools[idx][off];
282304
}
283305

284-
size_t findSlot(hash_t hash, scope const(char)[] str) const @nogc nothrow pure
306+
// FNV-1a hash function - much better distribution for string data
307+
private uint calcHash(scope const(char)[] str) const @nogc nothrow pure
308+
{
309+
enum uint FNV_prime = 0x01000193;
310+
enum uint FNV_offset_basis = 0x811c9dc5;
311+
312+
uint hash = FNV_offset_basis;
313+
314+
foreach (char c; str)
315+
{
316+
hash ^= c;
317+
hash *= FNV_prime;
318+
}
319+
320+
return hash;
321+
}
322+
323+
size_t findSlot(uint hash, scope const(char)[] str) const @nogc nothrow pure
285324
{
286-
// quadratic probing using triangular numbers
287-
// https://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
288-
for (size_t i = hash & (table.length - 1), j = 1;; ++j)
325+
// Linear probing - more cache friendly than quadratic probing
326+
// for short probe sequences (which should be the common case with a good hash function)
327+
const size_t mask = table.length - 1;
328+
size_t i = hash % table.length; // Use modulo for prime sized tables
329+
size_t step = 1;
330+
331+
while (true)
289332
{
290333
const(StringValue!T)* sv;
291334
auto vptr = table[i].vptr;
292-
if (!vptr || table[i].hash == hash && (sv = getValue(vptr)).length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0)
335+
336+
if (!vptr)
293337
return i;
294-
i = (i + j) & (table.length - 1);
338+
339+
if (table[i].hash == hash)
340+
{
341+
sv = getValue(vptr);
342+
if (sv.length == str.length && .memcmp(str.ptr, sv.toDchars(), str.length) == 0)
343+
return i;
344+
}
345+
346+
i = (i + step) % table.length;
295347
}
296348
}
297349

298350
void grow() nothrow pure
299351
{
300352
const odim = table.length;
301353
auto otab = table;
302-
const ndim = table.length * 2;
303-
countTrigger = (ndim * loadFactorNumerator) / loadFactorDenominator;
354+
355+
// Use a different load factor after the initial growth
356+
const loadFactor = isInitialSize ? initialLoadFactorNumerator : normalLoadFactorNumerator;
357+
isInitialSize = false;
358+
359+
// Use next prime number size for better distribution
360+
const ndim = nextPrime(table.length * 2);
361+
countTrigger = (ndim * loadFactor) / loadFactorDenominator;
362+
304363
table = (cast(StringEntry*)mem.xcalloc_noscan(ndim, (table[0]).sizeof))[0 .. ndim];
364+
305365
foreach (const se; otab[0 .. odim])
306366
{
307367
if (!se.vptr)
308368
continue;
309369
const sv = getValue(se.vptr);
310370
table[findSlot(se.hash, sv.toString())] = se;
311371
}
372+
312373
mem.xfree(otab.ptr);
313374
}
314375
}

0 commit comments

Comments
 (0)